diff options
Diffstat (limited to 'sys-utils')
67 files changed, 2495 insertions, 1261 deletions
diff --git a/sys-utils/Makemodule.am b/sys-utils/Makemodule.am index 4139fcf65..93a2c8d92 100644 --- a/sys-utils/Makemodule.am +++ b/sys-utils/Makemodule.am @@ -47,6 +47,14 @@ cytune_SOURCES = sys-utils/cytune.c sys-utils/cyclades.h cytune_LDADD = $(LDADD) libcommon.la endif +if BUILD_FSTRIM +sbin_PROGRAMS += fstrim +dist_man_MANS += sys-utils/fstrim.8 +fstrim_SOURCES = sys-utils/fstrim.c +fstrim_LDADD = $(LDADD) libcommon.la libmount.la +fstrim_CFLAGS = $(AM_CFLAGS) -I$(ul_libmount_incdir) +endif + if LINUX # # Linux-only utils with no another dependencies. All another dependencies have @@ -55,7 +63,7 @@ if LINUX bin_PROGRAMS += dmesg dist_man_MANS += sys-utils/dmesg.1 dmesg_SOURCES = sys-utils/dmesg.c -dmesg_LDADD = $(LDADD) libcommon.la +dmesg_LDADD = $(LDADD) libcommon.la -lrt sbin_PROGRAMS += ctrlaltdel dist_man_MANS += sys-utils/ctrlaltdel.8 @@ -65,11 +73,6 @@ sbin_PROGRAMS += fsfreeze dist_man_MANS += sys-utils/fsfreeze.8 fsfreeze_SOURCES = sys-utils/fsfreeze.c -sbin_PROGRAMS += fstrim -dist_man_MANS += sys-utils/fstrim.8 -fstrim_SOURCES = sys-utils/fstrim.c -fstrim_LDADD = $(LDADD) libcommon.la - sbin_PROGRAMS += blkdiscard dist_man_MANS += sys-utils/blkdiscard.8 blkdiscard_SOURCES = sys-utils/blkdiscard.c @@ -239,7 +242,10 @@ endif if BUILD_LSCPU usrbin_exec_PROGRAMS += lscpu -lscpu_SOURCES = sys-utils/lscpu.c +lscpu_SOURCES = \ + sys-utils/lscpu.c \ + sys-utils/lscpu.h \ + sys-utils/lscpu-dmi.c lscpu_LDADD = $(LDADD) libcommon.la dist_man_MANS += sys-utils/lscpu.1 endif @@ -290,6 +296,7 @@ usrbin_exec_PROGRAMS += unshare dist_man_MANS += sys-utils/unshare.1 unshare_SOURCES = sys-utils/unshare.c unshare_LDADD = $(LDADD) libcommon.la +unshare_CFLAGS = $(AM_CFLAGS) -I$(ul_libmount_incdir) endif if BUILD_NSENTER diff --git a/sys-utils/blkdiscard.8 b/sys-utils/blkdiscard.8 index 7e8c91e8f..e71a09360 100644 --- a/sys-utils/blkdiscard.8 +++ b/sys-utils/blkdiscard.8 @@ -38,7 +38,7 @@ MiB=1024*1024, and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB" is optional, e.g., "K" has the same meaning as "KiB") or the suffixes KB=1000, MB=1000*1000, and so on for GB, TB, PB, EB, ZB and YB. .IP "\fB\-h, \-\-help\fP" -Print help and exit. +Display help text and exit. .IP "\fB\-o, \-\-offset\fP \fIoffset\fP" Byte offset in the device from which to discard. Provided value will be aligned to the device sector size. Default value is zero. diff --git a/sys-utils/blkdiscard.c b/sys-utils/blkdiscard.c index bdcd06e37..2ddcdb1b4 100644 --- a/sys-utils/blkdiscard.c +++ b/sys-utils/blkdiscard.c @@ -70,8 +70,8 @@ static void __attribute__((__noreturn__)) usage(FILE *out) int main(int argc, char **argv) { char *path; - int c, fd, verbose = 0, secure = 0; - uint64_t end, blksize, secsize, range[2]; + int c, fd, verbose = 0, secure = 0, secsize; + uint64_t end, blksize, range[2]; struct stat sb; static const struct option longopts[] = { @@ -121,7 +121,7 @@ int main(int argc, char **argv) } if (optind == argc) - errx(EXIT_FAILURE, _("no device specified.")); + errx(EXIT_FAILURE, _("no device specified")); path = argv[optind++]; @@ -130,18 +130,17 @@ int main(int argc, char **argv) usage(stderr); } - if (stat(path, &sb) == -1) - err(EXIT_FAILURE, _("stat failed %s"), path); - if (!S_ISBLK(sb.st_mode)) - errx(EXIT_FAILURE, _("%s: not a block device"), path); - fd = open(path, O_WRONLY); if (fd < 0) err(EXIT_FAILURE, _("cannot open %s"), path); + if (fstat(fd, &sb) == -1) + err(EXIT_FAILURE, _("stat failed %s"), path); + if (!S_ISBLK(sb.st_mode)) + errx(EXIT_FAILURE, _("%s: not a block device"), path); + if (ioctl(fd, BLKGETSIZE64, &blksize)) err(EXIT_FAILURE, _("%s: BLKGETSIZE64 ioctl failed"), path); - if (ioctl(fd, BLKSSZGET, &secsize)) err(EXIT_FAILURE, _("%s: BLKSSZGET ioctl failed"), path); diff --git a/sys-utils/chcpu.8 b/sys-utils/chcpu.8 index d016b86f2..c3ce86c3c 100644 --- a/sys-utils/chcpu.8 +++ b/sys-utils/chcpu.8 @@ -76,10 +76,24 @@ instance runs and returns it to the CPU pool. A CPU must be offline, see \-d, before it can be deconfigured. .TP .BR \-h , " \-\-help" -Display help information and exit. +Display help text and exit. .TP .BR \-V , " \-\-version" Display version information and exit. + +.SH RETURN CODES +.B chcpu +has the following return codes: +.TP +.BR 0 +success +.TP +.BR 1 +failure +.TP +.BR 64 +partial success +.RE .SH AUTHOR .MT heiko.carstens@de.ibm.com Heiko Carstens diff --git a/sys-utils/chcpu.c b/sys-utils/chcpu.c index 1162888d5..ada0eaacc 100644 --- a/sys-utils/chcpu.c +++ b/sys-utils/chcpu.c @@ -45,6 +45,9 @@ #define EXCL_ERROR "--{configure,deconfigure,disable,dispatch,enable}" +/* partial success, otherwise we return regular EXIT_{SUCCESS,FAILURE} */ +#define CHCPU_EXIT_SOMEOK 64 + #define _PATH_SYS_CPU "/sys/devices/system/cpu" #define _PATH_SYS_CPU_ONLINE _PATH_SYS_CPU "/online" #define _PATH_SYS_CPU_RESCAN _PATH_SYS_CPU "/rescan" @@ -66,21 +69,28 @@ enum { CMD_CPU_DISPATCH_VERTICAL, }; +/* returns: 0 = success + * < 0 = failure + * > 0 = partial success + */ static int cpu_enable(cpu_set_t *cpu_set, size_t setsize, int enable) { unsigned int cpu; int online, rc; int configured = -1; + size_t fails = 0; for (cpu = 0; cpu < setsize; cpu++) { if (!CPU_ISSET(cpu, cpu_set)) continue; if (!path_exist(_PATH_SYS_CPU "/cpu%d", cpu)) { - printf(_("CPU %d does not exist\n"), cpu); + warnx(_("CPU %d does not exist"), cpu); + fails++; continue; } if (!path_exist(_PATH_SYS_CPU "/cpu%d/online", cpu)) { - printf(_("CPU %d is not hot pluggable\n"), cpu); + warnx(_("CPU %d is not hot pluggable"), cpu); + fails++; continue; } online = path_read_s32(_PATH_SYS_CPU "/cpu%d/online", cpu); @@ -96,30 +106,33 @@ static int cpu_enable(cpu_set_t *cpu_set, size_t setsize, int enable) configured = path_read_s32(_PATH_SYS_CPU "/cpu%d/configure", cpu); if (enable) { rc = path_write_str("1", _PATH_SYS_CPU "/cpu%d/online", cpu); - if ((rc == -1) && (configured == 0)) - warnx(_("CPU %d enable failed " - "(CPU is deconfigured)"), cpu); - else if (rc == -1) + if ((rc == -1) && (configured == 0)) { + warn(_("CPU %d enable failed (CPU is deconfigured)"), cpu); + fails++; + } else if (rc == -1) { warn(_("CPU %d enable failed"), cpu); - else + fails++; + } else printf(_("CPU %d enabled\n"), cpu); } else { if (onlinecpus && num_online_cpus() == 1) { - printf(_("CPU %d disable failed " - "(last enabled CPU)\n"), cpu); + warnx(_("CPU %d disable failed (last enabled CPU)"), cpu); + fails++; continue; } rc = path_write_str("0", _PATH_SYS_CPU "/cpu%d/online", cpu); - if (rc == -1) + if (rc == -1) { warn(_("CPU %d disable failed"), cpu); - else { + fails++; + } else { printf(_("CPU %d disabled\n"), cpu); if (onlinecpus) CPU_CLR(cpu, onlinecpus); } } } - return EXIT_SUCCESS; + + return fails == 0 ? 0 : fails == setsize ? -1 : 1; } static int cpu_rescan(void) @@ -129,7 +142,7 @@ static int cpu_rescan(void) if (path_write_str("1", _PATH_SYS_CPU_RESCAN) == -1) err(EXIT_FAILURE, _("Failed to trigger rescan of CPUs")); printf(_("Triggered rescan of CPUs\n")); - return EXIT_SUCCESS; + return 0; } static int cpu_set_dispatch(int mode) @@ -146,23 +159,30 @@ static int cpu_set_dispatch(int mode) err(EXIT_FAILURE, _("Failed to set vertical dispatch mode")); printf(_("Successfully set vertical dispatching mode\n")); } - return EXIT_SUCCESS; + return 0; } +/* returns: 0 = success + * < 0 = failure + * > 0 = partial success + */ static int cpu_configure(cpu_set_t *cpu_set, size_t setsize, int configure) { unsigned int cpu; int rc, current; + size_t fails = 0; for (cpu = 0; cpu < setsize; cpu++) { if (!CPU_ISSET(cpu, cpu_set)) continue; if (!path_exist(_PATH_SYS_CPU "/cpu%d", cpu)) { - printf(_("CPU %d does not exist\n"), cpu); + warnx(_("CPU %d does not exist"), cpu); + fails++; continue; } if (!path_exist(_PATH_SYS_CPU "/cpu%d/configure", cpu)) { - printf(_("CPU %d is not configurable\n"), cpu); + warnx(_("CPU %d is not configurable"), cpu); + fails++; continue; } current = path_read_s32(_PATH_SYS_CPU "/cpu%d/configure", cpu); @@ -176,25 +196,28 @@ static int cpu_configure(cpu_set_t *cpu_set, size_t setsize, int configure) } if ((current == 1) && (configure == 0) && onlinecpus && is_cpu_online(cpu)) { - printf(_("CPU %d deconfigure failed " - "(CPU is enabled)\n"), cpu); + warnx(_("CPU %d deconfigure failed (CPU is enabled)"), cpu); + fails++; continue; } if (configure) { rc = path_write_str("1", _PATH_SYS_CPU "/cpu%d/configure", cpu); - if (rc == -1) + if (rc == -1) { warn(_("CPU %d configure failed"), cpu); - else + fails++; + } else printf(_("CPU %d configured\n"), cpu); } else { rc = path_write_str("0", _PATH_SYS_CPU "/cpu%d/configure", cpu); - if (rc == -1) + if (rc == -1) { warn(_("CPU %d deconfigure failed"), cpu); - else + fails++; + } else printf(_("CPU %d deconfigured\n"), cpu); } } - return EXIT_SUCCESS; + + return fails == 0 ? 0 : fails == setsize ? -1 : 1; } static void cpu_parse(char *cpu_string, cpu_set_t *cpu_set, size_t setsize) @@ -233,7 +256,7 @@ int main(int argc, char *argv[]) cpu_set_t *cpu_set; size_t setsize; int cmd = -1; - int c; + int c, rc; static const struct option longopts[] = { { "configure", required_argument, 0, 'c' }, @@ -317,19 +340,31 @@ int main(int argc, char *argv[]) switch (cmd) { case CMD_CPU_ENABLE: - return cpu_enable(cpu_set, maxcpus, 1); + rc = cpu_enable(cpu_set, maxcpus, 1); + break; case CMD_CPU_DISABLE: - return cpu_enable(cpu_set, maxcpus, 0); + rc = cpu_enable(cpu_set, maxcpus, 0); + break; case CMD_CPU_CONFIGURE: - return cpu_configure(cpu_set, maxcpus, 1); + rc = cpu_configure(cpu_set, maxcpus, 1); + break; case CMD_CPU_DECONFIGURE: - return cpu_configure(cpu_set, maxcpus, 0); + rc = cpu_configure(cpu_set, maxcpus, 0); + break; case CMD_CPU_RESCAN: - return cpu_rescan(); + rc = cpu_rescan(); + break; case CMD_CPU_DISPATCH_HORIZONTAL: - return cpu_set_dispatch(0); + rc = cpu_set_dispatch(0); + break; case CMD_CPU_DISPATCH_VERTICAL: - return cpu_set_dispatch(1); + rc = cpu_set_dispatch(1); + break; + default: + rc = -EINVAL; + break; } - return EXIT_SUCCESS; + + return rc == 0 ? EXIT_SUCCESS : + rc < 0 ? EXIT_FAILURE : CHCPU_EXIT_SOMEOK; } diff --git a/sys-utils/ctrlaltdel.8 b/sys-utils/ctrlaltdel.8 index 14cf3dd16..30bbae458 100644 --- a/sys-utils/ctrlaltdel.8 +++ b/sys-utils/ctrlaltdel.8 @@ -32,10 +32,10 @@ file. .SH OPTIONS .TP \fB\-V\fR, \fB\-\-version\fR -Output version information and exit. +Display version information and exit. .TP \fB\-h\fR, \fB\-\-help\fR -Display help and exit. +Display help text and exit. .SH FILES .I /etc/rc.local .SH "SEE ALSO" diff --git a/sys-utils/cytune.c b/sys-utils/cytune.c index 932fcaac2..8a42f964e 100644 --- a/sys-utils/cytune.c +++ b/sys-utils/cytune.c @@ -95,7 +95,7 @@ static void __attribute__ ((__noreturn__)) usage(FILE * out) fprintf(out, _(" -g, --get-threshold display current threshold value\n")); fprintf(out, _(" -S, --set-default-threshold <num> set default threshold value\n")); fprintf(out, _(" -t, --set-flush <num> set flush timeout to value\n")); - fprintf(out, _(" -G, --get-glush display default flush timeout value\n")); + fprintf(out, _(" -G, --get-flush display default flush timeout value\n")); fprintf(out, _(" -T, --set-default-flush <num> set the default flush timeout to value\n")); fprintf(out, _(" -q, --stats display statistics about the tty\n")); fprintf(out, _(" -i, --interval <seconds> gather statistics every <seconds> interval\n")); diff --git a/sys-utils/dmesg.1 b/sys-utils/dmesg.1 index f2dcc1c24..f5d256cf7 100644 --- a/sys-utils/dmesg.1 +++ b/sys-utils/dmesg.1 @@ -20,7 +20,7 @@ dmesg \-\-console-off .B dmesg is used to examine or control the kernel ring buffer. .PP -The default action is to read all messages from kernel ring buffer. +The default action is to read all messages from the kernel ring buffer. .SH OPTIONS The \-\-clear, \-\-read-clear, \-\-console-on, \-\-console-off and \-\-console-level options are mutually exclusive. @@ -28,25 +28,25 @@ The \-\-clear, \-\-read-clear, \-\-console-on, \-\-console-off and .IP "\fB\-C\fR, \fB\-\-clear\fR" Clear the ring buffer. .IP "\fB\-c\fR, \fB\-\-read-clear\fR" -Clear the ring buffer contents after printing. +Clear the ring buffer after first printing its contents. .IP "\fB\-D\fR, \fB\-\-console-off\fR" Disable printing messages to the console. .IP "\fB\-d\fR, \fB\-\-show-delta\fR" -Display the timestamp and time delta spent between messages. If used +Display the timestamp and the time delta spent between messages. If used together with .B \-\-notime then only the time delta without the timestamp is printed. .IP "\fB\-e\fR, \fB\-\-reltime\fR" -Display the local time and delta in human readable format. +Display the local time and the delta in human-readable format. .IP "\fB\-E\fR, \fB\-\-console-on\fR" Enable printing messages to the console. .IP "\fB\-F\fR, \fB\-\-file \fIfile\fR" -Read log from +Read the messages from the given .IR file . .IP "\fB\-f\fR, \fB\-\-facility \fIlist\fR" -Restrict output to defined (comma separated) +Restrict output to the given (comma-separated) .I list -of facilities. For example +of facilities. For example: .PP .RS 14 dmesg \-\-facility=daemon @@ -57,18 +57,20 @@ see .B dmesg \-\-help output. .IP "\fB\-H\fR, \fB\-\-human\fR" -Enable human readable output. See also \fB\-\-color\fR, \fB\-\-reltime\fR +Enable human-readable output. See also \fB\-\-color\fR, \fB\-\-reltime\fR and \fB\-\-nopager\fR. .IP "\fB\-h\fR, \fB\-\-help\fR" -Print a help text and exit. +Display help text and exit. .IP "\fB\-k\fR, \fB\-\-kernel\fR" Print kernel messages. -.IP "\fB\-L\fR, \fB\-\-color\fR" -Colorize important messages. +.IP "\fB\-L\fR, \fB\-\-color\fR[=\fIwhen\fR]" +Colorize important messages. The optional argument \fIwhen\fP can be \fBauto\fR, +\fBnever\fR or \fBalways\fR. If the \fIwhen\fR argument is omitted, +then it defaults to \fBauto\fR. .IP "\fB\-l\fR, \fB\-\-level \fIlist\fR" -Restrict output to defined (comma separated) +Restrict output to the given (comma-separated) .I list -of levels. For example +of levels. For example: .PP .RS 14 dmesg \-\-level=err,warn @@ -80,7 +82,7 @@ output. .IP "\fB\-n\fR, \fB\-\-console-level \fIlevel\fR Set the .I level -at which logging of messages is done to the console. The +at which printing of messages is done to the console. The .I level is a level number or abbreviation of the level name. For all supported levels see @@ -104,24 +106,24 @@ will .I not print or clear the kernel ring buffer. .IP "\fB\-P\fR, \fB\-\-nopager\fR" -Do not pipe output into a pager, the pager is enabled for \fB\-\-human\fR output. +Do not pipe output into a pager. A pager is enabled by default for \fB\-\-human\fR output. .IP "\fB\-r\fR, \fB\-\-raw\fR" -Print the raw message buffer, i.e., do not strip the log level prefixes. +Print the raw message buffer, i.e. do not strip the log-level prefixes. -Note that the real raw format depends on method how +Note that the real raw format depends on the method how .BR dmesg (1) -reads kernel messages. The /dev/kmsg uses different format than +reads kernel messages. The /dev/kmsg device uses a different format than .BR syslog (2) . -For backward compatibility +For backward compatibility, .BR dmesg (1) -returns data always in +returns data always in the .BR syslog (2) -format. The real raw data from /dev/kmsg is possible to read for example by -command 'dd if=/dev/kmsg iflag=nonblock'. +format. It is possible to read the real raw data from /dev/kmsg by, for example, +the command 'dd if=/dev/kmsg iflag=nonblock'. .IP "\fB\-S\fR, \fB\-\-syslog\fR" -Force to use +Force \fBdmesg\fR to use the .BR syslog (2) -kernel interface to read kernel messages. The default is to use /dev/kmsg rather +kernel interface to read kernel messages. The default is to use /dev/kmsg rather than .BR syslog (2) since kernel 3.5.0. @@ -130,11 +132,12 @@ Use a buffer of .I size to query the kernel ring buffer. This is 16392 by default. (The default kernel syslog buffer size was 4096 at first, 8192 since 1.3.54, 16384 since -2.1.113.) If you have set the kernel buffer to be larger than the default +2.1.113.) If you have set the kernel buffer to be larger than the default, then this option can be used to view the entire buffer. .IP "\fB\-T\fR, \fB\-\-ctime\fR" -Print human readable timestamps. The timestamp could be inaccurate! +Print human-readable timestamps. .IP +Be aware that the timestamp could be inaccurate! The .B time source used for the logs is @@ -146,12 +149,34 @@ Do not print kernel's timestamps. .IP "\fB\-u\fR, \fB\-\-userspace\fR" Print userspace messages. .IP "\fB\-V\fR, \fB\-\-version\fR" -Output version information and exit. +Display version information and exit. .IP "\fB\-w\fR, \fB\-\-follow\fR" -Wait for new messages. This feature is supported on systems with readable -/dev/kmsg only (since kernel 3.5.0). +Wait for new messages. This feature is supported only on systems with +a readable /dev/kmsg (since kernel 3.5.0). .IP "\fB\-x\fR, \fB\-\-decode\fR" -Decode facility and level (priority) number to human readable prefixes. +Decode facility and level (priority) numbers to human-readable prefixes. +.IP "\fB\-\-time\-format\fR \fIformat\fR" +Print timestamps using the given \fIformat\fR, which can be +.BR ctime , +.BR reltime , +.BR delta +or +.BR iso . +The first three formats are aliases of the time-format-specific options. +The +.B iso +format is a +.B dmesg +implementation of the ISO-8601 timestamp format. The purpose of this format is +to make the comparing of timestamps between two systems, and any other parsing, +easy. The definition of the \fBiso\fR timestamp is: +YYYY-MM-DD<T>HH:MM:SS,<microseconds><-+><timezone offset from UTC>. +.IP +The +.B iso +format has the same issue as +.BR ctime : +the time may be inaccurate when a system is suspended and resumed. .SH SEE ALSO .BR syslogd (8) .SH AUTHORS diff --git a/sys-utils/dmesg.c b/sys-utils/dmesg.c index a89fb6c75..177577152 100644 --- a/sys-utils/dmesg.c +++ b/sys-utils/dmesg.c @@ -115,7 +115,7 @@ static const struct dmesg_name facility_names[] = [FAC_BASE(LOG_UUCP)] = { "uucp", N_("UUCP subsystem") }, [FAC_BASE(LOG_CRON)] = { "cron", N_("clock daemon") }, [FAC_BASE(LOG_AUTHPRIV)] = { "authpriv", N_("security/authorization messages (private)") }, - [FAC_BASE(LOG_FTP)] = { "ftp", N_("ftp daemon") }, + [FAC_BASE(LOG_FTP)] = { "ftp", N_("FTP daemon") }, }; /* supported methods to read message buffer @@ -126,6 +126,18 @@ enum { DMESG_METHOD_MMAP /* mmap file with records (see --file) */ }; +enum { + DMESG_TIMEFTM_NONE = 0, + DMESG_TIMEFTM_CTIME, /* [ctime] */ + DMESG_TIMEFTM_CTIME_DELTA, /* [ctime <delta>] */ + DMESG_TIMEFTM_DELTA, /* [<delta>] */ + DMESG_TIMEFTM_RELTIME, /* [relative] */ + DMESG_TIMEFTM_TIME, /* [time] */ + DMESG_TIMEFTM_TIME_DELTA, /* [time <delta>] */ + DMESG_TIMEFTM_ISO8601 /* 2013-06-13T22:11:00,123456+0100 */ +}; +#define is_timefmt(c, f) ((c)->time_fmt == (DMESG_TIMEFTM_ ##f)) + struct dmesg_control { /* bit arrays -- see include/bitops.h */ char levels[ARRAY_SIZE(level_names) / NBBY + 1]; @@ -133,7 +145,7 @@ struct dmesg_control { struct timeval lasttime; /* last printed timestamp */ struct tm lasttm; /* last localtime */ - time_t boot_time; /* system boot time */ + struct timeval boot_time; /* system boot time */ int action; /* SYSLOG_ACTION_* */ int method; /* DMESG_METHOD_* */ @@ -152,16 +164,13 @@ struct dmesg_control { char *filename; char *mmap_buff; size_t pagesize; + unsigned int time_fmt; /* time format */ unsigned int follow:1, /* wait for new messages */ raw:1, /* raw mode */ fltr_lev:1, /* filter out by levels[] */ fltr_fac:1, /* filter out by facilities[] */ decode:1, /* use "facility: level: " prefix */ - notime:1, /* don't print timestamp */ - delta:1, /* show time deltas */ - reltime:1, /* show human readable relative times */ - ctime:1, /* show human readable time */ pager:1, /* pipe output into a pager */ color:1; /* colorize messages */ }; @@ -229,26 +238,28 @@ static void __attribute__((__noreturn__)) usage(FILE *out) fputs(_(" -C, --clear clear the kernel ring buffer\n"), out); fputs(_(" -c, --read-clear read and clear all messages\n"), out); fputs(_(" -D, --console-off disable printing messages to console\n"), out); - fputs(_(" -d, --show-delta show time delta between printed messages\n"), out); - fputs(_(" -e, --reltime show local time and time delta in readable format\n"), out); fputs(_(" -E, --console-on enable printing messages to console\n"), out); fputs(_(" -F, --file <file> use the file instead of the kernel log buffer\n"), out); fputs(_(" -f, --facility <list> restrict output to defined facilities\n"), out); fputs(_(" -H, --human human readable output\n"), out); fputs(_(" -k, --kernel display kernel messages\n"), out); - fputs(_(" -L, --color colorize messages\n"), out); + fputs(_(" -L, --color[=<when>] colorize messages (auto, always or never)\n"), out); fputs(_(" -l, --level <list> restrict output to defined levels\n"), out); fputs(_(" -n, --console-level <level> set level of messages printed to console\n"), out); fputs(_(" -P, --nopager do not pipe output into a pager\n"), out); fputs(_(" -r, --raw print the raw message buffer\n"), out); fputs(_(" -S, --syslog force to use syslog(2) rather than /dev/kmsg\n"), out); fputs(_(" -s, --buffer-size <size> buffer size to query the kernel ring buffer\n"), out); - fputs(_(" -T, --ctime show human readable timestamp (could be \n" - " inaccurate if you have used SUSPEND/RESUME)\n"), out); - fputs(_(" -t, --notime don't print messages timestamp\n"), out); fputs(_(" -u, --userspace display userspace messages\n"), out); fputs(_(" -w, --follow wait for new messages\n"), out); fputs(_(" -x, --decode decode facility and level to readable string\n"), out); + fputs(_(" -d, --show-delta show time delta between printed messages\n"), out); + fputs(_(" -e, --reltime show local time and time delta in readable format\n"), out); + fputs(_(" -T, --ctime show human readable timestamp\n"), out); + fputs(_(" -t, --notime don't print messages timestamp\n"), out); + fputs(_(" --time-format <format> show time stamp using format:\n" + " [delta|reltime|ctime|notime|iso]\n" + "Suspending/resume will make ctime and iso timestamps inaccurate.\n"), out); fputs(USAGE_SEPARATOR, out); fputs(USAGE_HELP, out); fputs(USAGE_VERSION, out); @@ -264,7 +275,7 @@ static void __attribute__((__noreturn__)) usage(FILE *out) level_names[i].name, _(level_names[i].help)); fputs(USAGE_SEPARATOR, out); - fprintf(out, USAGE_MAN_TAIL("dmesg(q)")); + fprintf(out, USAGE_MAN_TAIL("dmesg(1)")); exit(out == stderr ? EXIT_FAILURE : EXIT_SUCCESS); } @@ -461,17 +472,30 @@ static int get_syslog_buffer_size(void) return n > 0 ? n : 0; } -static time_t get_boot_time(void) +static int get_boot_time(struct timeval *boot_time) { + struct timespec hires_uptime; + struct timeval lores_uptime, now; struct sysinfo info; - struct timeval tv; + if (gettimeofday(&now, NULL) != 0) { + warn(_("gettimeofday failed")); + return -errno; + } + +#ifdef CLOCK_BOOTTIME + if (clock_gettime(CLOCK_BOOTTIME, &hires_uptime) == 0) { + TIMESPEC_TO_TIMEVAL(&lores_uptime, &hires_uptime); + timersub(&now, &lores_uptime, boot_time); + return 0; + } +#endif + /* fallback */ if (sysinfo(&info) != 0) warn(_("sysinfo failed")); - else if (gettimeofday(&tv, NULL) != 0) - warn(_("gettimeofday failed")); - else - return tv.tv_sec -= info.uptime; + + boot_time->tv_sec = now.tv_sec - info.uptime; + boot_time->tv_usec = 0; return 0; } @@ -615,8 +639,11 @@ static void safe_fwrite(const char *buf, size_t size, FILE *out) rc = fwrite_hex(p, len, out); else rc = fwrite(p, 1, len, out) != len; - if (rc != 0) - err(EXIT_FAILURE, _("write failed")); + if (rc != 0) { + if (errno != EPIPE) + err(EXIT_FAILURE, _("write failed")); + exit(EXIT_SUCCESS); + } } } @@ -689,9 +716,10 @@ static int get_next_syslog_record(struct dmesg_control *ctl, if (*begin == '[' && (*(begin + 1) == ' ' || isdigit(*(begin + 1)))) { - if (ctl->delta || ctl->ctime || ctl->reltime) + + if (!is_timefmt(ctl, NONE)) begin = parse_syslog_timestamp(begin + 1, &rec->tv); - else if (ctl->notime) + else begin = skip_item(begin, end, "]"); if (begin < end && *begin == ' ') @@ -759,7 +787,7 @@ static struct tm *record_localtime(struct dmesg_control *ctl, struct dmesg_record *rec, struct tm *tm) { - time_t t = ctl->boot_time + rec->tv.tv_sec; + time_t t = ctl->boot_time.tv_sec + rec->tv.tv_sec; return localtime_r(&t, tm); } @@ -783,6 +811,23 @@ static char *short_ctime(struct tm *tm, char *buf, size_t bufsiz) return buf; } +static char *iso_8601_time(struct dmesg_control *ctl, struct dmesg_record *rec, + char *buf, size_t bufsiz) +{ + struct tm tm; + size_t len; + record_localtime(ctl, rec, &tm); + if (strftime(buf, bufsiz, "%Y-%m-%dT%H:%M:%S", &tm) == 0) { + *buf = '\0'; + return buf; + } + len = strlen(buf); + snprintf(buf + len, bufsiz - len, ",%06d", (int)rec->tv.tv_usec); + len = strlen(buf); + strftime(buf + len, bufsiz - len, "%z", &tm); + return buf; +} + static double record_count_delta(struct dmesg_control *ctl, struct dmesg_record *rec) { @@ -852,79 +897,56 @@ static void print_record(struct dmesg_control *ctl, printf("%-6s:%-6s: ", facility_names[rec->facility].name, level_names[rec->level].name); - /* - * [sec.usec <delta>] or [ctime <delta>] - */ - if (ctl->delta) { - if (ctl->color) - color_enable(DMESG_COLOR_TIME); - if (ctl->ctime) - printf("[%s ", record_ctime(ctl, rec, buf, sizeof(buf))); - else if (ctl->notime) - putchar('['); - else - printf("[%5d.%06d ", (int) rec->tv.tv_sec, - (int) rec->tv.tv_usec); - printf("<%12.06f>] ", record_count_delta(ctl, rec)); - if (ctl->color) - color_disable(); - - /* - * [ctime] - */ - } else if (ctl->ctime) { - if (ctl->color) - color_enable(DMESG_COLOR_TIME); - printf("[%s] ", record_ctime(ctl, rec, buf, sizeof(buf))); - if (ctl->color) - color_disable(); - } + if (ctl->color) + color_enable(DMESG_COLOR_TIME); - /* - * [reltime] - */ - else if (ctl->reltime) { + switch (ctl->time_fmt) { double delta; struct tm cur; - + case DMESG_TIMEFTM_NONE: + break; + case DMESG_TIMEFTM_CTIME: + printf("[%s] ", record_ctime(ctl, rec, buf, sizeof(buf))); + break; + case DMESG_TIMEFTM_CTIME_DELTA: + printf("[%s <%12.06f>] ", + record_ctime(ctl, rec, buf, sizeof(buf)), + record_count_delta(ctl, rec)); + break; + case DMESG_TIMEFTM_DELTA: + printf("[<%12.06f>] ", record_count_delta(ctl, rec)); + break; + case DMESG_TIMEFTM_RELTIME: record_localtime(ctl, rec, &cur); delta = record_count_delta(ctl, rec); - - if (cur.tm_min != ctl->lasttm.tm_min || + if (cur.tm_min != ctl->lasttm.tm_min || cur.tm_hour != ctl->lasttm.tm_hour || cur.tm_yday != ctl->lasttm.tm_yday) { - if (ctl->color) - color_enable(DMESG_COLOR_RELTIME); printf("[%s] ", short_ctime(&cur, buf, sizeof(buf))); } else { - if (ctl->color) - color_enable(DMESG_COLOR_TIME); if (delta < 10) printf("[ %+8.06f] ", delta); else printf("[ %+9.06f] ", delta); } - if (ctl->color) - color_disable(); ctl->lasttm = cur; + break; + case DMESG_TIMEFTM_TIME: + printf("[%5d.%06d] ", (int)rec->tv.tv_sec, (int)rec->tv.tv_usec); + break; + case DMESG_TIMEFTM_TIME_DELTA: + printf("[%5d.%06d <%12.06f>] ", (int)rec->tv.tv_sec, + (int)rec->tv.tv_usec, record_count_delta(ctl, rec)); + break; + case DMESG_TIMEFTM_ISO8601: + printf("%s ", iso_8601_time(ctl, rec, buf, sizeof(buf))); + break; + default: + abort(); } - /* - * In syslog output the timestamp is part of the message and we don't - * parse the timestamp by default. We parse the timestamp only if - * --show-delta or --ctime is specified. - * - * In kmsg output we always parse the timesptamp, so we have to compose - * the [sec.usec] string. - */ - if (ctl->method == DMESG_METHOD_KMSG && - !ctl->notime && !ctl->delta && !ctl->ctime && !ctl->reltime) { - if (ctl->color) - color_enable(DMESG_COLOR_TIME); - printf("[%5d.%06d] ", (int) rec->tv.tv_sec, (int) rec->tv.tv_usec); - if (ctl->color) - color_disable(); - } + if (ctl->color) + color_disable(); mesg: mesg = rec->mesg; @@ -991,6 +1013,8 @@ static int init_kmsg(struct dmesg_control *ctl) if (!ctl->follow) mode |= O_NONBLOCK; + else + setlinebuf(stdout); ctl->kmsg = open("/dev/kmsg", mode); if (ctl->kmsg < 0) @@ -1065,7 +1089,7 @@ static int parse_kmsg_record(struct dmesg_control *ctl, goto mesg; /* C) timestamp */ - if (ctl->notime) + if (is_timefmt(ctl, NONE)) p = skip_item(p, end, ",;"); else p = parse_kmsg_timestamp(p, &rec->tv); @@ -1136,24 +1160,45 @@ static int read_kmsg(struct dmesg_control *ctl) return 0; } +static int which_time_format(const char *optarg) +{ + if (!strcmp(optarg, "notime")) + return DMESG_TIMEFTM_NONE; + if (!strcmp(optarg, "ctime")) + return DMESG_TIMEFTM_CTIME; + if (!strcmp(optarg, "delta")) + return DMESG_TIMEFTM_DELTA; + if (!strcmp(optarg, "reltime")) + return DMESG_TIMEFTM_RELTIME; + if (!strcmp(optarg, "iso")) + return DMESG_TIMEFTM_ISO8601; + errx(EXIT_FAILURE, _("unknown time format: %s"), optarg); +} + int main(int argc, char *argv[]) { char *buf = NULL; int c, nopager = 0; int console_level = 0; int klog_rc = 0; + int delta = 0; ssize_t n; static struct dmesg_control ctl = { .filename = NULL, .action = SYSLOG_ACTION_READ_ALL, .method = DMESG_METHOD_KMSG, .kmsg = -1, + .time_fmt = DMESG_TIMEFTM_TIME, + }; + int colormode = UL_COLORMODE_NEVER; + enum { + OPT_TIME_FORMAT = CHAR_MAX + 1, }; static const struct option longopts[] = { { "buffer-size", required_argument, NULL, 's' }, { "clear", no_argument, NULL, 'C' }, - { "color", no_argument, NULL, 'L' }, + { "color", optional_argument, NULL, 'L' }, { "console-level", required_argument, NULL, 'n' }, { "console-off", no_argument, NULL, 'D' }, { "console-on", no_argument, NULL, 'E' }, @@ -1175,14 +1220,20 @@ int main(int argc, char *argv[]) { "nopager", no_argument, NULL, 'P' }, { "userspace", no_argument, NULL, 'u' }, { "version", no_argument, NULL, 'V' }, + { "time-format", required_argument, NULL, OPT_TIME_FORMAT }, { NULL, 0, NULL, 0 } }; static const ul_excl_t excl[] = { /* rows and cols in in ASCII order */ - { 'C','D','E','c','n' }, /* clear,off,on,read-clear,level*/ + { 'C','D','E','c','n','r' }, /* clear,off,on,read-clear,level,raw*/ { 'H','r' }, /* human, raw */ { 'L','r' }, /* color, raw */ { 'S','w' }, /* syslog,follow */ + { 'T','r' }, /* ctime, raw */ + { 'd','r' }, /* delta, raw */ + { 'e','r' }, /* reltime, raw */ + { 'r','x' }, /* raw, decode */ + { 'r','t' }, /* notime, raw */ { 0 } }; int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; @@ -1192,7 +1243,7 @@ int main(int argc, char *argv[]) textdomain(PACKAGE); atexit(close_stdout); - while ((c = getopt_long(argc, argv, "CcDdEeF:f:HhkLl:n:iPrSs:TtuVwx", + while ((c = getopt_long(argc, argv, "CcDdEeF:f:HhkL::l:n:iPrSs:TtuVwx", longopts, NULL)) != -1) { err_exclusive_options(c, longopts, excl, excl_st); @@ -1208,13 +1259,13 @@ int main(int argc, char *argv[]) ctl.action = SYSLOG_ACTION_CONSOLE_OFF; break; case 'd': - ctl.delta = 1; + delta = 1; break; case 'E': ctl.action = SYSLOG_ACTION_CONSOLE_ON; break; case 'e': - ctl.reltime = 1; + ctl.time_fmt = DMESG_TIMEFTM_RELTIME; break; case 'F': ctl.filename = optarg; @@ -1227,8 +1278,8 @@ int main(int argc, char *argv[]) return EXIT_FAILURE; break; case 'H': - ctl.reltime = 1; - ctl.color = 1; + ctl.time_fmt = DMESG_TIMEFTM_RELTIME; + colormode = UL_COLORMODE_AUTO; ctl.pager = 1; break; case 'h': @@ -1239,7 +1290,10 @@ int main(int argc, char *argv[]) setbit(ctl.facilities, FAC_BASE(LOG_KERN)); break; case 'L': - ctl.color = 1; + colormode = UL_COLORMODE_AUTO; + if (optarg) + colormode = colormode_or_err(optarg, + _("unsupported color mode")); break; case 'l': ctl.fltr_lev= 1; @@ -1267,12 +1321,11 @@ int main(int argc, char *argv[]) ctl.bufsize = 4096; break; case 'T': - ctl.boot_time = get_boot_time(); - if (ctl.boot_time) - ctl.ctime = 1; + ctl.time_fmt = DMESG_TIMEFTM_CTIME; break; case 't': - ctl.notime = 1; + ctl.time_fmt = DMESG_TIMEFTM_NONE; + delta = 0; break; case 'u': ctl.fltr_fac = 1; @@ -1289,6 +1342,9 @@ int main(int argc, char *argv[]) case 'x': ctl.decode = 1; break; + case OPT_TIME_FORMAT: + ctl.time_fmt = which_time_format(optarg); + break; case '?': default: usage(stderr); @@ -1300,24 +1356,33 @@ int main(int argc, char *argv[]) if (argc > 1) usage(stderr); - if (ctl.raw && (ctl.fltr_lev || ctl.fltr_fac || ctl.delta || - ctl.notime || ctl.ctime || ctl.decode)) - errx(EXIT_FAILURE, _("--raw can't be used together with level, " - "facility, decode, delta, ctime or notime options")); + if (is_timefmt(&ctl, RELTIME) || + is_timefmt(&ctl, CTIME) || + is_timefmt(&ctl, ISO8601)) { - if (ctl.notime && (ctl.ctime || ctl.reltime)) - errx(EXIT_FAILURE, _("--notime can't be used together with --ctime or --reltime")); - if (ctl.reltime && ctl.ctime) - errx(EXIT_FAILURE, _("--reltime can't be used together with --ctime ")); - - if (ctl.reltime) { - ctl.boot_time = get_boot_time(); - if (!ctl.boot_time) - ctl.reltime = 0; + if (get_boot_time(&ctl.boot_time) != 0) + ctl.time_fmt = DMESG_TIMEFTM_NONE; } - if (ctl.color) - ctl.color = colors_init() ? 1 : 0; + if (delta) + switch (ctl.time_fmt) { + case DMESG_TIMEFTM_CTIME: + ctl.time_fmt = DMESG_TIMEFTM_CTIME_DELTA; + break; + case DMESG_TIMEFTM_TIME: + ctl.time_fmt = DMESG_TIMEFTM_TIME_DELTA; + break; + case DMESG_TIMEFTM_ISO8601: + warnx(_("--show-delta is ignored when used together with iso8601 time format")); + break; + default: + ctl.time_fmt = DMESG_TIMEFTM_DELTA; + } + + + ctl.color = colors_init(colormode) ? 1 : 0; + if (ctl.follow) + nopager = 1; ctl.pager = nopager ? 0 : ctl.pager; if (ctl.pager) setup_pager(); @@ -1327,6 +1392,12 @@ int main(int argc, char *argv[]) case SYSLOG_ACTION_READ_CLEAR: if (ctl.method == DMESG_METHOD_KMSG && init_kmsg(&ctl) != 0) ctl.method = DMESG_METHOD_SYSLOG; + + if (ctl.raw + && ctl.method != DMESG_METHOD_KMSG + && (ctl.fltr_lev || ctl.fltr_fac)) + errx(EXIT_FAILURE, _("--raw could be used together with --level or " + "--facility only when read messages from /dev/kmsg")); if (ctl.pager) setup_pager(); n = read_buffer(&ctl, &buf); diff --git a/sys-utils/eject.1 b/sys-utils/eject.1 index 429f0dd0f..1acc3b3cd 100644 --- a/sys-utils/eject.1 +++ b/sys-utils/eject.1 @@ -46,7 +46,7 @@ disk eject command. .IP "\fB\-F, \-\-force\fP" Force eject, don't check device type. .IP "\fB\-h, \-\-help\fP" -Print a help text and exit. +Display help text and exit. .IP "\fB\-i, \-\-manualeject \fIon|off\fP" This option controls locking of the hardware eject button. When enabled, the drive will not be ejected when the button is pressed. This is useful when you @@ -83,7 +83,7 @@ This option specifies that the drive should be ejected using SCSI commands. Run in verbose mode; more information is displayed about what the command is doing. .IP "\fB\-V, \-\-version\fP" -Display program version and exit. +Display version information and exit. .IP "\fB\-x, \-\-cdspeed \fI<speed>\fP" With this option the drive is given a CD-ROM select speed command. The speed argument is a number indicating the desired speed (e.g. 8 for 8X speed), or 0 diff --git a/sys-utils/eject.c b/sys-utils/eject.c index a5b56565d..03744c7e7 100644 --- a/sys-utils/eject.c +++ b/sys-utils/eject.c @@ -53,6 +53,14 @@ #include "pathnames.h" #include "sysfs.h" +/* + * sg_io_hdr_t driver_status -- see kernel include/scsi/scsi.h + */ +#ifndef DRIVER_SENSE +# define DRIVER_SENSE 0x08 +#endif + + #define EJECT_DEFAULT_DEVICE "/dev/cdrom" @@ -98,7 +106,6 @@ static long int c_arg; static long int x_arg; struct libmnt_table *mtab; -struct libmnt_cache *cache; static void vinfo(const char *fmt, va_list va) { @@ -398,6 +405,9 @@ static void close_tray(int fd) static int eject_cdrom(int fd) { #if defined(CDROMEJECT) + int ret = ioctl(fd, CDROM_LOCKDOOR, 0); + if (ret < 0) + return 0; return ioctl(fd, CDROMEJECT) >= 0; #elif defined(CDIOCEJECT) return ioctl(fd, CDIOCEJECT) >= 0; @@ -601,21 +611,31 @@ static int eject_scsi(int fd) io_hdr.cmdp = allowRmBlk; status = ioctl(fd, SG_IO, (void *)&io_hdr); - if (status < 0) + if (status < 0 || io_hdr.host_status || io_hdr.driver_status) return 0; io_hdr.cmdp = startStop1Blk; status = ioctl(fd, SG_IO, (void *)&io_hdr); - if (status < 0) + if (status < 0 || io_hdr.host_status) + return 0; + + /* Ignore errors when there is not medium -- in this case driver sense + * buffer sets MEDIUM NOT PRESENT (3a) bit. For more details see: + * http://www.tldp.org/HOWTO/archived/SCSI-Programming-HOWTO/SCSI-Programming-HOWTO-22.html#sec-sensecodes + * -- kzak Jun 2013 + */ + if (io_hdr.driver_status != 0 && + !(io_hdr.driver_status == DRIVER_SENSE && io_hdr.sbp && + io_hdr.sbp[12] == 0x3a)) return 0; io_hdr.cmdp = startStop2Blk; status = ioctl(fd, SG_IO, (void *)&io_hdr); - if (status < 0) + if (status < 0 || io_hdr.host_status || io_hdr.driver_status) return 0; /* force kernel to reread partition table when new disc inserted */ - status = ioctl(fd, BLKRRPART); + ioctl(fd, BLKRRPART); return 1; } @@ -704,12 +724,15 @@ static int device_get_mountpoint(char **devname, char **mnt) *mnt = NULL; if (!mtab) { + struct libmnt_cache *cache; + mtab = mnt_new_table(); if (!mtab) err(EXIT_FAILURE, _("failed to initialize libmount table")); cache = mnt_new_cache(); mnt_table_set_cache(mtab, cache); + mnt_unref_cache(cache); if (p_option) rc = mnt_table_parse_file(mtab, _PATH_PROC_MOUNTINFO); @@ -1125,8 +1148,7 @@ int main(int argc, char **argv) free(device); free(mountpoint); - mnt_free_table(mtab); - mnt_free_cache(cache); + mnt_unref_table(mtab); return EXIT_SUCCESS; } diff --git a/sys-utils/fallocate.1 b/sys-utils/fallocate.1 index 49d26e44c..efa42c1d2 100644 --- a/sys-utils/fallocate.1 +++ b/sys-utils/fallocate.1 @@ -1,7 +1,7 @@ .\" -*- nroff -*- .TH FALLOCATE 1 "September 2011" "util-linux" "User Commands" .SH NAME -fallocate \- preallocate space to a file +fallocate \- preallocate or deallocate space to a file .SH SYNOPSIS .B fallocate .RB [ \-n ] @@ -13,10 +13,11 @@ fallocate \- preallocate space to a file .I filename .SH DESCRIPTION .B fallocate -is used to preallocate blocks to a file. For filesystems which support the -fallocate system call, this is done quickly by allocating blocks and marking -them as uninitialized, requiring no IO to the data blocks. This is much faster -than creating a file by filling it with zeros. +is used to manipulate the allocated disk space for a file, either to deallocate +or preallocate it. For filesystems which support the fallocate system call, +preallocation is done quickly by allocating blocks and marking them as +uninitialized, requiring no IO to the data blocks. This is much faster than +creating a file by filling it with zeros. .PP As of the Linux Kernel v2.6.31, the fallocate system call is supported by the btrfs, ext4, ocfs2, and xfs filesystems. @@ -40,9 +41,9 @@ Specifies the beginning offset of the allocation, in bytes. .IP "\fB\-l, \-\-length\fP \fIlength\fP Specifies the length of the allocation, in bytes. .IP "\fB\-h, \-\-help\fP" -Print help and exit. +Display help text and exit. .IP "\fB-V, \-\-version" -Print version and exit. +Display version information and exit. .SH AUTHORS .UR sandeen@redhat.com Eric Sandeen diff --git a/sys-utils/fallocate.c b/sys-utils/fallocate.c index ff0f9e6e1..0e143166c 100644 --- a/sys-utils/fallocate.c +++ b/sys-utils/fallocate.c @@ -61,8 +61,8 @@ static void __attribute__((__noreturn__)) usage(FILE *out) fputs(USAGE_OPTIONS, out); fputs(_(" -n, --keep-size don't modify the length of the file\n" " -p, --punch-hole punch holes in the file\n" - " -o, --offset <num> offset of the allocation, in bytes\n" - " -l, --length <num> length of the allocation, in bytes\n"), out); + " -o, --offset <num> offset of the (de)allocation, in bytes\n" + " -l, --length <num> length of the (de)allocation, in bytes\n"), out); fputs(USAGE_SEPARATOR, out); fputs(USAGE_HELP, out); fputs(USAGE_VERSION, out); @@ -168,6 +168,7 @@ int main(int argc, char **argv) err(EXIT_FAILURE, _("%s: fallocate failed"), fname); } - close(fd); + if (close_fd(fd) != 0) + err(EXIT_FAILURE, _("write failed: %s"), fname); return EXIT_SUCCESS; } diff --git a/sys-utils/flock.1 b/sys-utils/flock.1 index b28526f69..99ae682d2 100644 --- a/sys-utils/flock.1 +++ b/sys-utils/flock.1 @@ -104,9 +104,9 @@ without arguments, to the shell with .BR -c . .TP \fB\-h\fP, \fB\-\-help\fP -Print a help message. +Display help text and exit. .IP "\fB\-V, \-\-version\fP" -Show version number and exit. +Display version information and exit. .SH EXAMPLES .TP shell1> flock /tmp -c cat @@ -152,11 +152,11 @@ also sets the FLOCKER env var to the right value so it doesn't run again. .SH "EXIT STATUS" The command uses .B sysexits.h -return values for everything else but an options +return values for everything, except when using either of the options .I \-n or .I \-w -failures which return either the value given by the +which report a failure to acquire the lock with a return value given by the .I \-E option, or 1 by default. .SH AUTHOR diff --git a/sys-utils/flock.c b/sys-utils/flock.c index 3322e142d..f160eaca5 100644 --- a/sys-utils/flock.c +++ b/sys-utils/flock.c @@ -49,7 +49,7 @@ static void __attribute__((__noreturn__)) usage(int ex) { fprintf(stderr, USAGE_HEADER); fprintf(stderr, - _(" %1$s [options] <file|directory> <command> [command args]\n" + _(" %1$s [options] <file|directory> <command> [<arguments>...]\n" " %1$s [options] <file|directory> -c <command>\n" " %1$s [options] <file descriptor number>\n"), program_invocation_short_name); @@ -250,6 +250,7 @@ int main(int argc, char *argv[]) /* otherwise try again */ continue; case EIO: + case EBADF: /* since Linux 3.4 (commit 55725513) */ /* Probably NFSv4 where flock() is emulated by fcntl(). * Let's try to reopen in read-write mode. */ diff --git a/sys-utils/fsfreeze.8 b/sys-utils/fsfreeze.8 index df161462c..f57bbd7d0 100644 --- a/sys-utils/fsfreeze.8 +++ b/sys-utils/fsfreeze.8 @@ -39,7 +39,7 @@ the traditional atime semantics (mount option strictatime, for more details see .SH OPTIONS .IP "\fB\-h, \-\-help\fP" -Print help and exit. +Display help text and exit. .IP "\fB\-f, \-\-freeze\fP" This option requests the specified a filesystem to be frozen from new modifications. When this is selected, all ongoing transactions in the @@ -57,7 +57,7 @@ This option is used to un-freeze the filesystem and allow operations to continue. Any filesystem modifications that were blocked by the freeze are unblocked and allowed to complete. .IP "\fB\-V, \-\-version\fP" -Show version number and exit. +Display version information and exit. .SH AUTHOR .PP Written by Hajime Taira. diff --git a/sys-utils/fstab.5 b/sys-utils/fstab.5 index 91c31576c..487480f06 100644 --- a/sys-utils/fstab.5 +++ b/sys-utils/fstab.5 @@ -88,9 +88,10 @@ writing LABEL=<label> or UUID=<uuid>, e.g., `LABEL=Boot' or `UUID=3e6be9de\%-8139\%-11d1\%-9106\%-a43f08d823a6'. It's also possible to use PARTUUID= and PARTLABEL=. These partitions identifiers -are supported for GUID Partition Table (GPT) and MAC partition table only. +are supported for example for GUID Partition Table (GPT). See +.BR mount (8), .BR blkid (8) or .BR lsblk (8) diff --git a/sys-utils/fstrim.8 b/sys-utils/fstrim.8 index f3708b5ac..ae331e310 100644 --- a/sys-utils/fstrim.8 +++ b/sys-utils/fstrim.8 @@ -4,6 +4,7 @@ fstrim \- discard unused blocks on a mounted filesystem .SH SYNOPSIS .B fstrim +.RB [ \-a ] .RB [ \-o .IR offset ] .RB [ \-l @@ -28,6 +29,15 @@ The .I mountpoint argument is the pathname of the directory where the filesystem is mounted. +.PP +Running +.B fstrim +more frequently or even using +.B mount -o discard +might affect lifetime of the poor quality SSD devices. The usual sufficient +frequency is once a week for most desktop and server systems. Note that not all +devices support queued trim, so each trim command incur a performance penalty +to whatever else might be trying to use the disk at the time. .SH OPTIONS The \fIoffset\fR, \fIlength\fR, and \fIminimum-free-extent\fR arguments may be @@ -35,8 +45,14 @@ followed by the multiplicative suffixes KiB=1024, MiB=1024*1024, and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB" is optional, e.g. "K" has the same meaning as "KiB") or the suffixes KB=1000, MB=1000*1000, and so on for GB, TB, PB, EB, ZB and YB. +.IP "\fB\-a, \-\-all\fP" +Trim all mounted filesystems on devices that support the discard operation. +The other supplied options, like \fB\-\-offset\fR, \fB\-\-length\fR and +\fB-\-minimum\fR, are applied to all these devices. +Errors from filesystems that do not support the discard operation are silently +ignored. .IP "\fB\-h, \-\-help\fP" -Print help and exit. +Display help text and exit. .IP "\fB\-o, \-\-offset\fP \fIoffset\fP" Byte offset in filesystem from which to begin searching for free blocks to discard. Default value is zero, starting at the beginning of the @@ -73,6 +89,20 @@ LVM setup, etc. These reductions would not be reflected in fstrim_range.len .B --length option). +.SH RETURN CODES +.IP 0 +success +.IP 1 +failure +.IP 32 +all failed +.IP 64 +some filesystem discards have succeeded, some failed +.PP +The command +.B fstrim --all +returns 0 (all success), 32 (all failed) or 64 (some failed, some success). + .SH AUTHOR .nf Lukas Czerner <lczerner@redhat.com> diff --git a/sys-utils/fstrim.c b/sys-utils/fstrim.c index 332845643..28d0ed74c 100644 --- a/sys-utils/fstrim.c +++ b/sys-utils/fstrim.c @@ -41,6 +41,11 @@ #include "strutils.h" #include "c.h" #include "closestream.h" +#include "pathnames.h" +#include "sysfs.h" +#include "exitcodes.h" + +#include <libmount.h> #ifndef FITRIM struct fstrim_range { @@ -51,16 +56,171 @@ struct fstrim_range { #define FITRIM _IOWR('X', 121, struct fstrim_range) #endif +/* returns: 0 = success, 1 = unsupported, < 0 = error */ +static int fstrim_filesystem(const char *path, struct fstrim_range *rangetpl, + int verbose) +{ + int fd; + struct stat sb; + struct fstrim_range range; + + /* kernel modifies the range */ + memcpy(&range, rangetpl, sizeof(range)); + + if (stat(path, &sb) == -1) { + warn(_("stat failed %s"), path); + return -1; + } + if (!S_ISDIR(sb.st_mode)) { + warnx(_("%s: not a directory"), path); + return -1; + } + + fd = open(path, O_RDONLY); + if (fd < 0) { + warn(_("cannot open %s"), path); + return -1; + } + errno = 0; + if (ioctl(fd, FITRIM, &range)) { + int rc = errno == EOPNOTSUPP || errno == ENOTTY ? 1 : -1; + + if (rc != 1) + warn(_("%s: FITRIM ioctl failed"), path); + close(fd); + return rc; + } + + if (verbose) { + char *str = size_to_human_string( + SIZE_SUFFIX_3LETTER | SIZE_SUFFIX_SPACE, + (uint64_t) range.len); + /* TRANSLATORS: The standard value here is a very large number. */ + printf(_("%s: %s (%" PRIu64 " bytes) trimmed\n"), + path, str, (uint64_t) range.len); + free(str); + } + close(fd); + return 0; +} + +static int has_discard(const char *devname, struct sysfs_cxt *wholedisk) +{ + struct sysfs_cxt cxt, *parent = NULL; + uint64_t dg = 0; + dev_t disk = 0, dev; + int rc; + + dev = sysfs_devname_to_devno(devname, NULL); + if (!dev) + return 1; + /* + * This is tricky to read the info from sys/, because the queue + * atrributes are provided for whole devices (disk) only. We're trying + * to reuse the whole-disk sysfs context to optimize this stuff (as + * system usualy have just one disk only). + */ + if (sysfs_devno_to_wholedisk(dev, NULL, 0, &disk) || !disk) + return 1; + if (dev != disk) { + if (wholedisk->devno != disk) { + sysfs_deinit(wholedisk); + if (sysfs_init(wholedisk, disk, NULL)) + return 1; + } + parent = wholedisk; + } + + rc = sysfs_init(&cxt, dev, parent); + if (!rc) + rc = sysfs_read_u64(&cxt, "queue/discard_granularity", &dg); + + sysfs_deinit(&cxt); + return rc == 0 && dg > 0; +} + +/* + * fstrim --all follows "mount -a" return codes: + * + * 0 = all success + * 32 = all failed + * 64 = some failed, some success + */ +static int fstrim_all(struct fstrim_range *rangetpl, int verbose) +{ + struct libmnt_fs *fs; + struct libmnt_iter *itr; + struct libmnt_table *tab; + struct sysfs_cxt wholedisk = UL_SYSFSCXT_EMPTY; + int cnt = 0, cnt_err = 0; + + itr = mnt_new_iter(MNT_ITER_BACKWARD); + if (!itr) + err(MOUNT_EX_FAIL, _("failed to initialize libmount iterator")); + + tab = mnt_new_table_from_file(_PATH_PROC_MOUNTINFO); + if (!tab) + err(MOUNT_EX_FAIL, _("failed to parse %s"), _PATH_PROC_MOUNTINFO); + + while (mnt_table_next_fs(tab, itr, &fs) == 0) { + const char *src = mnt_fs_get_srcpath(fs), + *tgt = mnt_fs_get_target(fs); + char *path; + int rc = 1; + + if (!src || !tgt || *src != '/' || + mnt_fs_is_pseudofs(fs) || + mnt_fs_is_netfs(fs)) + continue; + + /* Is it really accessible mountpoint? Not all mountpoints are + * accessible (maybe over mounted by another fylesystem) */ + path = mnt_get_mountpoint(tgt); + if (path && strcmp(path, tgt) == 0) + rc = 0; + free(path); + if (rc) + continue; /* overlaying mount */ + + if (!has_discard(src, &wholedisk)) + continue; + cnt++; + + /* + * We're able to detect that the device supports discard, but + * things also depend on filesystem or device mapping, for + * example vfat or LUKS (by default) does not support FSTRIM. + * + * This is reason why we ignore EOPNOTSUPP and ENOTTY errors + * from discard ioctl. + */ + if (fstrim_filesystem(tgt, rangetpl, verbose) < 0) + cnt_err++; + } + + sysfs_deinit(&wholedisk); + mnt_free_table(tab); + + if (cnt && cnt == cnt_err) + return MOUNT_EX_FAIL; /* all failed */ + if (cnt && cnt_err) + return MOUNT_EX_SOMEOK; /* some ok */ + + return EXIT_SUCCESS; +} + static void __attribute__((__noreturn__)) usage(FILE *out) { fputs(USAGE_HEADER, out); fprintf(out, _(" %s [options] <mount point>\n"), program_invocation_short_name); fputs(USAGE_OPTIONS, out); - fputs(_(" -o, --offset <num> offset in bytes to discard from\n" - " -l, --length <num> length of bytes to discard from the offset\n" - " -m, --minimum <num> minimum extent length to discard\n" - " -v, --verbose print number of discarded bytes\n"), out); + fputs(_(" -a, --all trim all mounted filesystems that are supported\n"), out); + fputs(_(" -o, --offset <num> the offset in bytes to start discarding from\n"), out); + fputs(_(" -l, --length <num> the number of bytes to discard\n"), out); + fputs(_(" -m, --minimum <num> the minimum extent length to discard\n"), out); + fputs(_(" -v, --verbose print number of discarded bytes\n"), out); + fputs(USAGE_SEPARATOR, out); fputs(USAGE_HELP, out); fputs(USAGE_VERSION, out); @@ -71,11 +231,11 @@ static void __attribute__((__noreturn__)) usage(FILE *out) int main(int argc, char **argv) { char *path; - int c, fd, verbose = 0; + int c, rc, verbose = 0, all = 0; struct fstrim_range range; - struct stat sb; static const struct option longopts[] = { + { "all", 0, 0, 'a' }, { "help", 0, 0, 'h' }, { "version", 0, 0, 'V' }, { "offset", 1, 0, 'o' }, @@ -93,8 +253,11 @@ int main(int argc, char **argv) memset(&range, 0, sizeof(range)); range.len = ULLONG_MAX; - while ((c = getopt_long(argc, argv, "hVo:l:m:v", longopts, NULL)) != -1) { + while ((c = getopt_long(argc, argv, "ahVo:l:m:v", longopts, NULL)) != -1) { switch(c) { + case 'a': + all = 1; + break; case 'h': usage(stdout); break; @@ -122,38 +285,26 @@ int main(int argc, char **argv) } } - if (optind == argc) - errx(EXIT_FAILURE, _("no mountpoint specified")); - - path = argv[optind++]; + if (!all) { + if (optind == argc) + errx(EXIT_FAILURE, _("no mountpoint specified")); + path = argv[optind++]; + } if (optind != argc) { warnx(_("unexpected number of arguments")); usage(stderr); } - if (stat(path, &sb) == -1) - err(EXIT_FAILURE, _("stat failed %s"), path); - if (!S_ISDIR(sb.st_mode)) - errx(EXIT_FAILURE, _("%s: not a directory"), path); - - fd = open(path, O_RDONLY); - if (fd < 0) - err(EXIT_FAILURE, _("cannot open %s"), path); - - if (ioctl(fd, FITRIM, &range)) - err(EXIT_FAILURE, _("%s: FITRIM ioctl failed"), path); - - if (verbose) { - char *str = size_to_human_string(SIZE_SUFFIX_3LETTER | - SIZE_SUFFIX_SPACE, - (uint64_t) range.len); - /* TRANSLATORS: The standard value here is a very large number. */ - printf(_("%s: %s (%" PRIu64 " bytes) trimmed\n"), - path, str, - (uint64_t) range.len); - free(str); + if (all) + rc = fstrim_all(&range, verbose); + else { + rc = fstrim_filesystem(path, &range, verbose); + if (rc == 1) { + warnx(_("%s: discard operation not supported."), path); + rc = EXIT_FAILURE; + } } - close(fd); - return EXIT_SUCCESS; + + return rc; } diff --git a/sys-utils/hwclock-cmos.c b/sys-utils/hwclock-cmos.c index b56616245..58b3ea5dc 100644 --- a/sys-utils/hwclock-cmos.c +++ b/sys-utils/hwclock-cmos.c @@ -315,13 +315,11 @@ static inline unsigned long cmos_read(unsigned long reg) unsigned char v = reg | 0x80; lseek(dev_port_fd, clock_ctl_addr, 0); if (write(dev_port_fd, &v, 1) == -1 && debug) - printf(_ - ("cmos_read(): write to control address %X failed: %m\n"), + warn(_("cmos_read(): write to control address %X failed"), clock_ctl_addr); lseek(dev_port_fd, clock_data_addr, 0); if (read(dev_port_fd, &v, 1) == -1 && debug) - printf(_ - ("cmos_read(): read data address %X failed: %m\n"), + warn(_("cmos_read(): read data address %X failed"), clock_data_addr); return v; } else { @@ -358,14 +356,12 @@ static inline unsigned long cmos_write(unsigned long reg, unsigned long val) unsigned char v = reg | 0x80; lseek(dev_port_fd, clock_ctl_addr, 0); if (write(dev_port_fd, &v, 1) == -1 && debug) - printf(_ - ("cmos_write(): write to control address %X failed: %m\n"), + warn(_("cmos_write(): write to control address %X failed"), clock_ctl_addr); v = (val & 0xff); lseek(dev_port_fd, clock_data_addr, 0); if (write(dev_port_fd, &v, 1) == -1 && debug) - printf(_ - ("cmos_write(): write to data address %X failed: %m\n"), + warn(_("cmos_write(): write to data address %X failed"), clock_data_addr); } else { outb(reg, clock_ctl_addr); diff --git a/sys-utils/hwclock.8 b/sys-utils/hwclock.8 index 462a25891..09f1edc5e 100644 --- a/sys-utils/hwclock.8 +++ b/sys-utils/hwclock.8 @@ -117,12 +117,10 @@ the difference every 10 seconds. This will also print the frequency offset and tick. .TP .BR \-h , \ \-\-help -Display a help text and exit. +Display help text and exit. .TP .BR \-V , \ \-\-version -Display the version of -.B hwclock -and exit. +Display version information and exit. .SH OPTIONS .PP @@ -375,9 +373,7 @@ would happen if you used the program to set it while the system is running. You can, however, do whatever you want to the Hardware Clock while the system is running, and the next time Linux starts up, it will do so with the adjusted time from the Hardware -Clock. You can also use the program -.BR adjtimex (8) -to smoothly adjust the System Time while the system runs. +Clock. .PP A Linux kernel maintains a concept of a local timezone for the system. But don't be misled -- almost nobody cares what timezone the kernel @@ -598,13 +594,6 @@ can turn it off by running anything, including .IR "hwclock \-\-hctosys" , that sets the System Time the old fashioned way. -To see if it is on or -off, use the command -.I adjtimex \-\-print -and look at the value of "status". If the "64" bit of this number -(expressed in binary) equal to 0, 11 minute mode is on. Otherwise, it -is off. - If your system runs with 11 minute mode on, don't use .I hwclock \-\-adjust or @@ -650,7 +639,6 @@ on old systems) .I /proc/cpuinfo .SH "SEE ALSO" -.BR adjtimex (8), .BR date (1), .BR gettimeofday (2), .BR settimeofday (2), diff --git a/sys-utils/hwclock.c b/sys-utils/hwclock.c index 5dd7919ec..c7789c8d2 100644 --- a/sys-utils/hwclock.c +++ b/sys-utils/hwclock.c @@ -125,7 +125,7 @@ struct adjtime { * We are running in debug mode, wherein we put a lot of information about * what we're doing to standard output. */ -bool debug; +int debug; /* Workaround for Award 4.50g BIOS bug: keep the year in a file. */ bool badyear; @@ -526,43 +526,141 @@ set_hardware_clock_exact(const time_t sethwtime, const struct timeval refsystime, const bool universal, const bool testing) { - time_t newhwtime = sethwtime; - struct timeval beginsystime, nowsystime; - double tdiff; - int time_resync = 1; - /* - * Now delay some more until Hardware Clock time newhwtime arrives. - * The 0.5 s is because the Hardware Clock always sets to your set - * time plus 500 ms (because it is designed to update to the next - * second precisely 500 ms after you finish the setting). + * The Hardware Clock can only be set to any integer time plus one + * half second. The integer time is required because there is no + * interface to set or get a fractional second. The additional half + * second is because the Hardware Clock updates to the following + * second precisely 500 ms (not 1 second!) after you release the + * divider reset (after setting the new time) - see description of + * DV2, DV1, DV0 in Register A in the MC146818A data sheet (and note + * that although that document doesn't say so, real-world code seems + * to expect that the SET bit in Register B functions the same way). + * That means that, e.g., when you set the clock to 1:02:03, it + * effectively really sets it to 1:02:03.5, because it will update to + * 1:02:04 only half a second later. Our caller passes the desired + * integer Hardware Clock time in sethwtime, and the corresponding + * system time (which may have a fractional part, and which may or may + * not be the same!) in refsystime. In an ideal situation, we would + * then apply sethwtime to the Hardware Clock at refsystime+500ms, so + * that when the Hardware Clock ticks forward to sethwtime+1s half a + * second later at refsystime+1000ms, everything is in sync. So we + * spin, waiting for gettimeofday() to return a time at or after that + * time (refsystime+500ms) up to a tolerance value, initially 1ms. If + * we miss that time due to being preempted for some other process, + * then we increase the margin a little bit (initially 1ms, doubling + * each time), add 1 second (or more, if needed to get a time that is + * in the future) to both the time for which we are waiting and the + * time that we will apply to the Hardware Clock, and start waiting + * again. + * + * For example, the caller requests that we set the Hardware Clock to + * 1:02:03, with reference time (current system time) = 6:07:08.250. + * We want the Hardware Clock to update to 1:02:04 at 6:07:09.250 on + * the system clock, and the first such update will occur 0.500 + * seconds after we write to the Hardware Clock, so we spin until the + * system clock reads 6:07:08.750. If we get there, great, but let's + * imagine the system is so heavily loaded that our process is + * preempted and by the time we get to run again, the system clock + * reads 6:07:11.990. We now want to wait until the next xx:xx:xx.750 + * time, which is 6:07:12.750 (4.5 seconds after the reference time), + * at which point we will set the Hardware Clock to 1:02:07 (4 seconds + * after the originally requested time). If we do that successfully, + * then at 6:07:13.250 (5 seconds after the reference time), the + * Hardware Clock will update to 1:02:08 (5 seconds after the + * originally requested time), and all is well thereafter. */ - do { - if (time_resync) { - gettimeofday(&beginsystime, NULL); - tdiff = time_diff(beginsystime, refsystime); - newhwtime = sethwtime + (int)(tdiff + 0.5); - if (debug) - printf(_ - ("Time elapsed since reference time has been %.6f seconds.\n" - "Delaying further to reach the new time.\n"), - tdiff); - time_resync = 0; + + time_t newhwtime = sethwtime; + double target_time_tolerance_secs = 0.001; /* initial value */ + double tolerance_incr_secs = 0.001; /* initial value */ + const double RTC_SET_DELAY_SECS = 0.5; /* 500 ms */ + const struct timeval RTC_SET_DELAY_TV = { 0, RTC_SET_DELAY_SECS * 1E6 }; + + struct timeval targetsystime; + struct timeval nowsystime; + struct timeval prevsystime = refsystime; + double deltavstarget; + + timeradd(&refsystime, &RTC_SET_DELAY_TV, &targetsystime); + + while (1) { + double ticksize; + + /* FOR TESTING ONLY: inject random delays of up to 1000ms */ + if (debug >= 10) { + int usec = random() % 1000000; + printf(_("sleeping ~%d usec\n"), usec); + usleep(usec); } gettimeofday(&nowsystime, NULL); - tdiff = time_diff(nowsystime, beginsystime); - if (tdiff < 0) { - time_resync = 1; /* probably backward time reset */ - continue; - } - if (tdiff > 0.1) { - time_resync = 1; /* probably forward time reset */ - continue; + deltavstarget = time_diff(nowsystime, targetsystime); + ticksize = time_diff(nowsystime, prevsystime); + prevsystime = nowsystime; + + if (ticksize < 0) { + if (debug) + printf(_("time jumped backward %.6f seconds " + "to %ld.%06d - retargeting\n"), + ticksize, (long)nowsystime.tv_sec, + (int)nowsystime.tv_usec); + /* The retarget is handled at the end of the loop. */ + } else if (deltavstarget < 0) { + /* deltavstarget < 0 if current time < target time */ + if (debug >= 2) + printf(_("%ld.%06d < %ld.%06d (%.6f)\n"), + (long)nowsystime.tv_sec, + (int)nowsystime.tv_usec, + (long)targetsystime.tv_sec, + (int)targetsystime.tv_usec, + deltavstarget); + continue; /* not there yet - keep spinning */ + } else if (deltavstarget <= target_time_tolerance_secs) { + /* Close enough to the target time; done waiting. */ + break; + } else /* (deltavstarget > target_time_tolerance_secs) */ { + /* + * We missed our window. Increase the tolerance and + * aim for the next opportunity. + */ + if (debug) + printf(_("missed it - %ld.%06d is too far " + "past %ld.%06d (%.6f > %.6f)\n"), + (long)nowsystime.tv_sec, + (int)nowsystime.tv_usec, + (long)targetsystime.tv_sec, + (int)targetsystime.tv_usec, + deltavstarget, + target_time_tolerance_secs); + target_time_tolerance_secs += tolerance_incr_secs; + tolerance_incr_secs *= 2; } - beginsystime = nowsystime; - tdiff = time_diff(nowsystime, refsystime); - } while (newhwtime == sethwtime + (int)(tdiff + 0.5)); + + /* + * Aim for the same offset (tv_usec) within the second in + * either the current second (if that offset hasn't arrived + * yet), or the next second. + */ + if (nowsystime.tv_usec < targetsystime.tv_usec) + targetsystime.tv_sec = nowsystime.tv_sec; + else + targetsystime.tv_sec = nowsystime.tv_sec + 1; + } + + newhwtime = sethwtime + + (int)(time_diff(nowsystime, refsystime) + - RTC_SET_DELAY_SECS /* don't count this */ + + 0.5 /* for rounding */); + if (debug) + printf(_("%ld.%06d is close enough to %ld.%06d (%.6f < %.6f)\n" + "Set RTC to %ld (%ld + %d; refsystime = %ld.%06d)\n"), + (long)nowsystime.tv_sec, (int)nowsystime.tv_usec, + (long)targetsystime.tv_sec, (int)targetsystime.tv_usec, + deltavstarget, target_time_tolerance_secs, + (long)newhwtime, (long)sethwtime, + (int)(newhwtime - sethwtime), + (long)refsystime.tv_sec, (int)refsystime.tv_usec); set_hardware_clock(newhwtime, universal, testing); } @@ -1636,7 +1734,7 @@ int main(int argc, char **argv) switch (c) { case 'D': - debug = TRUE; + ++debug; break; case 'a': adjust = TRUE; @@ -1953,10 +2051,4 @@ void __attribute__((__noreturn__)) hwaudit_exit(int status) * * hwclock uses this method, and considers the Hardware Clock to have * infinite precision. - * - * TODO: Enhancements needed: - * - * - When waiting for whole second boundary in set_hardware_clock_exact, - * fail if we miss the goal by more than .1 second, as could happen if we - * get pre-empted (by the kernel dispatcher). */ diff --git a/sys-utils/hwclock.h b/sys-utils/hwclock.h index 175a6d1ae..69b0ce200 100644 --- a/sys-utils/hwclock.h +++ b/sys-utils/hwclock.h @@ -24,7 +24,6 @@ extern struct clock_ops *probe_for_kd_clock(void); typedef int bool; /* hwclock.c */ -extern char *progname; extern int debug; extern unsigned long epoch_option; extern double time_diff(struct timeval subtrahend, struct timeval subtractor); diff --git a/sys-utils/ipcmk.1 b/sys-utils/ipcmk.1 index 0900a19b1..0bac9a092 100644 --- a/sys-utils/ipcmk.1 +++ b/sys-utils/ipcmk.1 @@ -29,13 +29,13 @@ Message queue. .SH "ADDITIONAL OPTIONS" .TP \fB\-p\fR, \fB\-\-mode\fR [\fImode\fR] -Permission for the resource. Default is 0644. +Access permissions for the resource. Default is 0644. .TP \fB\-h\fR, \fB\-\-help\fR -Display a short help message and exit. +Display help text and exit. .TP \fB\-V\fR, \fB\-\-version\fR -Output version information and exit. +Display version information and exit. .PP .SH "SEE ALSO" .BR ipcrm (1), diff --git a/sys-utils/ipcrm.1 b/sys-utils/ipcrm.1 index 6cf74a510..51ccd48ea 100644 --- a/sys-utils/ipcrm.1 +++ b/sys-utils/ipcrm.1 @@ -5,13 +5,13 @@ ipcrm \- remove a message queue, semaphore set or shared memory id .SH SYNOPSIS .B ipcrm -[options] +.RI [ options ] .br .B ipcrm -.I <shm|msg|sem> <id> -[...] +.RB { shm | msg | sem } +.IR id ... .SH DESCRIPTION -.I ipcrm +.B ipcrm removes System V interprocess communication (IPC) objects and associated data structures from the system. In order to delete such objects, you must be superuser, or @@ -28,65 +28,65 @@ after all currently attached processes have detached the object from their virtual address space. .PP Two syntax styles are supported. The old Linux historical syntax specifies -a three letter keyword indicating which class of object is to be deleted, +a three-letter keyword indicating which class of object is to be deleted, followed by one or more IPC identifiers for objects of this type. .PP The SUS-compliant syntax allows the specification of zero or more objects of all three types in a single command line, -with objects specified either by key or by identifier. (See below.) +with objects specified either by key or by identifier (see below). Both keys and identifiers may be specified in decimal, hexadecimal (specified with an initial '0x' or '0X'), or octal (specified with an initial '0'). .SH OPTIONS .TP \fB\-M\fR, \fB\-\-shmem\-key\fR \fIshmkey\fR -removes the shared memorysegment created with +Remove the shared memory segment created with .I shmkey after the last detach is performed. .TP \fB\-m\fR, \fB\-\-shmem\-id\fR \fIshmid\fR -removes the shared memory segment identified by +Remove the shared memory segment identified by .I shmid after the last detach is performed. .TP \fB\-Q\fR, \fB\-\-queue\-key\fR \fImsgkey\fR -removes the message queue created with +Remove the message queue created with .IR msgkey . .TP \fB\-q\fR, \fB\-\-queue\-id\fR \fImsgid\fR -removes the message queue identified by +Remove the message queue identified by .IR msgid . .TP \fB\-S\fR, \fB\-\-semaphore\-key\fR \fIsemkey\fR -removes the semaphore created with +Remove the semaphore created with .IR semkey . .TP \fB\-s\fR, \fB\-\-semaphore\-id\fR \fIsemid\fR -removes the semaphore identified by +Remove the semaphore identified by .IR semid . .TP -\fB-a\fR, \fB\-\-all\fR [\fIshm msg sem\fR] -Remove all resources. When option argument is provided the removal is -performed only to for the specified resource types. Warning! Do not use +\fB-a\fR, \fB\-\-all\fR [\fBshm\fR] [\fBmsg\fR] [\fBsem\fR] +Remove all resources. When an option argument is provided, the removal is +performed only for the specified resource types. \fIWarning!\fR Do not use .B \-a -if you are unsure how the software using resources might react on missing -objects. Some programs create these resources at start up and may not have -any code to deal unexpected disappearance. +if you are unsure how the software using the resources might react to missing +objects. Some programs create these resources at startup and may not have +any code to deal with an unexpected disappearance. .LP The details of the removes are described in -.IR msgctl (2), -.IR shmctl (2), +.BR msgctl (2), +.BR shmctl (2), and -.IR semctl (2). +.BR semctl (2). The identifiers and keys may be found by using -.IR ipcs (1). +.BR ipcs (1). .SH NOTES -In its first Linux implementation, ipcrm used the deprecated syntax +In its first Linux implementation, \fBipcrm\fR used the deprecated syntax shown in the .BR SYNOPSIS . -Functionality present in other *nix implementations of ipcrm has since +Functionality present in other *nix implementations of \fBipcrm\fR has since been added, namely the ability to delete resources by key (not just -identifier), and to respect the same command-line syntax. For backward +identifier), and to respect the same command-line syntax. For backward compatibility the previous syntax is still supported. .\" .SH AUTHORS .\" Andre C. Mazzone (linuxdev@karagee.com) diff --git a/sys-utils/ipcs.1 b/sys-utils/ipcs.1 index c070d1a62..453ce5c01 100644 --- a/sys-utils/ipcs.1 +++ b/sys-utils/ipcs.1 @@ -21,7 +21,7 @@ Print details only on the resource identified by .IR id . .TP \fB\-h\fR, \fB\-\-help\fR -Display a help text and exit. +Display help text and exit. .TP \fB\-V\fR, \fB\-\-version\fR Display version information and exit. diff --git a/sys-utils/ipcs.c b/sys-utils/ipcs.c index 14f5f0ba1..16e520649 100644 --- a/sys-utils/ipcs.c +++ b/sys-utils/ipcs.c @@ -278,7 +278,6 @@ static void do_shm (char format, int unit) */ if (ipc_shm_get_info(-1, &shmds) < 1) return; - shmdsp = shmds; for (shmdsp = shmds; shmdsp->next != NULL; shmdsp = shmdsp->next) { if (format == CREATOR) { @@ -396,7 +395,6 @@ static void do_sem (char format) */ if (ipc_sem_get_info(-1, &semds) < 1) return; - semdsp = semds; for (semdsp = semds; semdsp->next != NULL; semdsp = semdsp->next) { if (format == CREATOR) { @@ -501,7 +499,6 @@ static void do_msg (char format, int unit) */ if (ipc_msg_get_info(-1, &msgds) < 1) return; - msgdsp = msgds; for (msgdsp = msgds; msgdsp->next != NULL; msgdsp = msgdsp->next) { if (format == CREATOR) { diff --git a/sys-utils/ipcutils.c b/sys-utils/ipcutils.c index 62d742845..7a349ffe7 100644 --- a/sys-utils/ipcutils.c +++ b/sys-utils/ipcutils.c @@ -105,7 +105,7 @@ int ipc_shm_get_info(int id, struct shm_data **shmds) f = path_fopen("r", 0, _PATH_PROC_SYSV_SHM); if (!f) - goto fallback; + goto shm_fallback; while (fgetc(f) != '\n'); /* skip header */ @@ -153,28 +153,20 @@ int ipc_shm_get_info(int id, struct shm_data **shmds) return i; /* Fallback; /proc or /sys file(s) missing. */ -fallback: - i = id < 0 ? 0 : id; - +shm_fallback: maxid = shmctl(0, SHM_INFO, (struct shmid_ds *) &dummy); - if (maxid < 0) - return 0; - while (i <= maxid) { + for (int j = 0; j <= maxid; j++) { int shmid; struct shmid_ds shmseg; struct ipc_perm *ipcp = &shmseg.shm_perm; - shmid = shmctl(i, SHM_STAT, &shmseg); - if (shmid < 0) { - if (-1 < id) { - free(*shmds); - return 0; - } - i++; + shmid = shmctl(j, SHM_STAT, &shmseg); + if (shmid < 0 || (id > -1 && shmid != id)) { continue; } + i++; p->shm_perm.key = ipcp->KEY; p->shm_perm.id = shmid; p->shm_perm.mode = ipcp->mode; @@ -196,11 +188,12 @@ fallback: p->next = xcalloc(1, sizeof(struct shm_data)); p = p->next; p->next = NULL; - i++; } else - return 1; + break; } + if (i == 0) + free(*shmds); return i; } @@ -299,30 +292,22 @@ int ipc_sem_get_info(int id, struct sem_data **semds) return i; /* Fallback; /proc or /sys file(s) missing. */ - sem_fallback: - i = id < 0 ? 0 : id; - +sem_fallback: arg.array = (ushort *) (void *)&dummy; maxid = semctl(0, 0, SEM_INFO, arg); - if (maxid < 0) - return 0; - while (i <= maxid) { + for (int j = 0; j <= maxid; j++) { int semid; struct semid_ds semseg; struct ipc_perm *ipcp = &semseg.sem_perm; arg.buf = (struct semid_ds *)&semseg; - semid = semctl(i, 0, SEM_STAT, arg); - if (semid < 0) { - if (-1 < id) { - free(*semds); - return 0; - } - i++; + semid = semctl(j, 0, SEM_STAT, arg); + if (semid < 0 || (id > -1 && semid != id)) { continue; } + i++; p->sem_perm.key = ipcp->KEY; p->sem_perm.id = semid; p->sem_perm.mode = ipcp->mode; @@ -341,10 +326,12 @@ int ipc_sem_get_info(int id, struct sem_data **semds) i++; } else { get_sem_elements(p); - return 1; + break; } } + if (i == 0) + free(*semds); return i; } @@ -422,27 +409,19 @@ int ipc_msg_get_info(int id, struct msg_data **msgds) return i; /* Fallback; /proc or /sys file(s) missing. */ - msg_fallback: - i = id < 0 ? 0 : id; +msg_fallback: + maxid = msgctl(0, MSG_INFO, &dummy); - maxid = msgctl(id, MSG_STAT, &dummy); - if (maxid < 0) - return 0; - - while (i <= maxid) { + for (int j = 0; j <= maxid; j++) { int msgid; struct ipc_perm *ipcp = &msgseg.msg_perm; - msgid = msgctl(i, MSG_STAT, &msgseg); - if (msgid < 0) { - if (-1 < id) { - free(*msgds); - return 0; - } - i++; + msgid = msgctl(j, MSG_STAT, &msgseg); + if (msgid < 0 || (id > -1 && msgid != id)) { continue; } + i++; p->msg_perm.key = ipcp->KEY; p->msg_perm.id = msgid; p->msg_perm.mode = ipcp->mode; @@ -463,11 +442,12 @@ int ipc_msg_get_info(int id, struct msg_data **msgds) p->next = xcalloc(1, sizeof(struct msg_data)); p = p->next; p->next = NULL; - i++; } else - return 1; + break; } + if (i == 0) + free(*msgds); return i; } diff --git a/sys-utils/ldattach.8 b/sys-utils/ldattach.8 index 8ce14f3c1..52bb7874f 100644 --- a/sys-utils/ldattach.8 +++ b/sys-utils/ldattach.8 @@ -96,10 +96,10 @@ to stay in the foreground so that it can be interrupted or debugged, and to print verbose messages about its progress to the standard error output. .TP \fB-h\fP | \fB--help\fP -Prints a usage message and exits. +Display help text and exit. .TP \fB-V\fP | \fB--version\fP -Prints the program version. +Display version information and exit. .TP \fB-s\fP \fIvalue\fP | \fB--speed\fP \fIvalue\fP Set the speed of the serial line to the specified value. diff --git a/sys-utils/ldattach.c b/sys-utils/ldattach.c index c06aed204..5c460feef 100644 --- a/sys-utils/ldattach.c +++ b/sys-utils/ldattach.c @@ -42,7 +42,6 @@ # define TIOCSETD 0x5423 #endif -static const char *progname; static int debug = 0; struct ld_table { @@ -238,8 +237,6 @@ int main(int argc, char **argv) atexit(close_stdout); /* parse options */ - progname = program_invocation_short_name; - if (argc == 0) usage(EXIT_SUCCESS); while ((optc = diff --git a/sys-utils/losetup.8 b/sys-utils/losetup.8 index bd735180c..c61a61e1b 100644 --- a/sys-utils/losetup.8 +++ b/sys-utils/losetup.8 @@ -95,8 +95,12 @@ print help show status of all loop devices associated with given .I file .IP "\fB\-l, \-\-list" -if a loop device or the -a option is specified, print default columns for either the specified -loop device or all loop devices, default is to print info about all devices. +if a loop device or the -a option is specified, print default columns for +either the specified loop device or all loop devices, default is to print info +about all devices. See also \fB\-\-output\fP, \fB\-\-noheadings\fP +and \fB\-\-raw\fP. +.IP "\fB\-n, \fB\-\-noheadings\fP" +don't print headings for \fB\-\-list\fP output format .IP "\fB\-o, \-\-offset \fIoffset\fP" the data start is moved \fIoffset\fP bytes into the specified file or device @@ -106,6 +110,8 @@ specify which columns are to be printed for the \fB\-\-list\fP output the data end is set to no more than \fIsize\fP bytes after the data start .IP "\fB\-P, \-\-partscan\fP" force kernel to scan partition table on newly created loop device +.IP "\fB\-\-raw\fP" +use raw \fB\-\-list\fP output format .IP "\fB\-r, \-\-read-only\fP" setup read-only loop device .IP "\fB\-\-show\fP" @@ -134,7 +140,7 @@ from determining the status of the device. .I /dev/loop[0..N] loop block devices .TP -.I /dev/loop-cotrol +.I /dev/loop-control loop control device .SH EXAMPLE diff --git a/sys-utils/losetup.c b/sys-utils/losetup.c index b44c9e540..5be328848 100644 --- a/sys-utils/losetup.c +++ b/sys-utils/losetup.c @@ -160,6 +160,7 @@ static int show_all_loops(struct loopdev_cxt *lc, const char *file, uint64_t offset, int flags) { struct stat sbuf, *st = &sbuf; + char *cn_file = NULL; if (loopcxt_init_iterator(lc, LOOPITER_FL_USED)) return -1; @@ -168,18 +169,23 @@ static int show_all_loops(struct loopdev_cxt *lc, const char *file, st = NULL; while (loopcxt_next(lc) == 0) { - if (file && !loopcxt_is_used(lc, st, file, offset, flags)) { - char *canonized; - int ret; - canonized = canonicalize_path(file); - ret = loopcxt_is_used(lc, st, canonized, offset, flags); - free(canonized); - if (!ret) + if (file) { + int used; + const char *bf = cn_file ? cn_file : file; + + used = loopcxt_is_used(lc, st, bf, offset, flags); + if (!used && !cn_file) { + bf = cn_file = canonicalize_path(file); + used = loopcxt_is_used(lc, st, bf, offset, flags); + } + if (!used) continue; } printf_loopdev(lc); } loopcxt_deinit_iterator(lc); + if (cn_file) + free(cn_file); return 0; } @@ -290,14 +296,18 @@ static int set_tt_data(struct loopdev_cxt *lc, struct tt_line *ln) return 0; } -static int make_table(struct loopdev_cxt *lc, const char *file, - uint64_t offset, int flags) +static int make_table(struct loopdev_cxt *lc, + const char *file, + uint64_t offset, + int flags, + int tt_flags) { struct stat sbuf, *st = &sbuf; struct tt_line *ln; + char *cn_file = NULL; int i; - if (!(tt = tt_new_table(0))) + if (!(tt = tt_new_table(tt_flags | TT_FL_FREEDATA))) errx(EXIT_FAILURE, _("failed to initialize output table")); for (i = 0; i < ncolumns; i++) { @@ -307,6 +317,7 @@ static int make_table(struct loopdev_cxt *lc, const char *file, warn(_("failed to initialize output column")); } + /* only one loopdev requested (already assigned to loopdev_cxt) */ if (loopcxt_get_device(lc)) { ln = tt_add_line(tt, NULL); if (set_tt_data(lc, ln)) @@ -314,15 +325,25 @@ static int make_table(struct loopdev_cxt *lc, const char *file, return 0; } + /* list all loopdevs */ if (loopcxt_init_iterator(lc, LOOPITER_FL_USED)) return -1; if (!file || stat(file, st)) st = NULL; while (loopcxt_next(lc) == 0) { - if (file && !loopcxt_is_used(lc, st, file, offset, flags)) - continue; - + if (file) { + int used; + const char *bf = cn_file ? cn_file : file; + + used = loopcxt_is_used(lc, st, bf, offset, flags); + if (!used && !cn_file) { + bf = cn_file = canonicalize_path(file); + used = loopcxt_is_used(lc, st, bf, offset, flags); + } + if (!used) + continue; + } ln = tt_add_line(tt, NULL); if (set_tt_data(lc, ln)) @@ -330,6 +351,8 @@ static int make_table(struct loopdev_cxt *lc, const char *file, } loopcxt_deinit_iterator(lc); + if (cn_file) + free(cn_file); return 0; } @@ -345,17 +368,16 @@ static void usage(FILE *out) program_invocation_short_name); fputs(USAGE_OPTIONS, out); - fputs(_(" -a, --all list all used devices\n" - " -d, --detach <loopdev> [...] detach one or more devices\n" - " -D, --detach-all detach all used devices\n" - " -f, --find find first unused device\n" - " -c, --set-capacity <loopdev> resize device\n" - " -j, --associated <file> list all devices associated with <file>\n"), out); + fputs(_(" -a, --all list all used devices\n"), out); + fputs(_(" -d, --detach <loopdev> [...] detach one or more devices\n"), out); + fputs(_(" -D, --detach-all detach all used devices\n"), out); + fputs(_(" -f, --find find first unused device\n"), out); + fputs(_(" -c, --set-capacity <loopdev> resize device\n"), out); + fputs(_(" -j, --associated <file> list all devices associated with <file>\n"), out); + fputs(USAGE_SEPARATOR, out); - fputs(_(" -l, --list list info about all or specified\n"), out); fputs(_(" -o, --offset <num> start at offset <num> into file\n"), out); - fputs(_(" -O, --output <cols> specify columns to output for --list\n"), out); fputs(_(" --sizelimit <num> device limited to <num> bytes of the file\n"), out); fputs(_(" -P, --partscan create partitioned loop device\n"), out); fputs(_(" -r, --read-only setup read-only loop device\n"), out); @@ -363,6 +385,13 @@ static void usage(FILE *out) fputs(_(" -v, --verbose verbose mode\n"), out); fputs(USAGE_SEPARATOR, out); + + fputs(_(" -l, --list list info about all or specified\n"), out); + fputs(_(" -O, --output <cols> specify columns to output for --list\n"), out); + fputs(_(" -n, --noheadings don't print headings for --list output\n"), out); + fputs(_(" --raw use raw --list output format\n"), out); + + fputs(USAGE_SEPARATOR, out); fputs(USAGE_HELP, out); fputs(USAGE_VERSION, out); @@ -386,11 +415,11 @@ static void warn_size(const char *filename, uint64_t size) } if (size < 512) - warnx(_("%s: warning: file smaller than 512 bytes, the loop device " - "maybe be useless or invisible for system tools."), + warnx(_("%s: Warning: file is smaller than 512 bytes; the loop device " + "may be useless or invisible for system tools."), filename); else if (size % 512) - warnx(_("%s: warning: file does not fit into a 512-byte sector " + warnx(_("%s: Warning: file does not fit into a 512-byte sector; " "the end of the file will be ignored."), filename); } @@ -401,13 +430,14 @@ int main(int argc, char **argv) int act = 0, flags = 0, c; char *file = NULL; uint64_t offset = 0, sizelimit = 0; - int res = 0, showdev = 0, lo_flags = 0; + int res = 0, showdev = 0, lo_flags = 0, tt_flags = 0; char *outarg = NULL; int list = 0; enum { OPT_SIZELIMIT = CHAR_MAX + 1, - OPT_SHOW + OPT_SHOW, + OPT_RAW }; static const struct option longopts[] = { { "all", 0, 0, 'a' }, @@ -419,12 +449,14 @@ int main(int argc, char **argv) { "help", 0, 0, 'h' }, { "associated", 1, 0, 'j' }, { "list", 0, 0, 'l' }, + { "noheadings", 0, 0, 'n' }, { "offset", 1, 0, 'o' }, { "output", 1, 0, 'O' }, { "sizelimit", 1, 0, OPT_SIZELIMIT }, { "pass-fd", 1, 0, 'p' }, { "partscan", 0, 0, 'P' }, { "read-only", 0, 0, 'r' }, + { "raw", 0, 0, OPT_RAW }, { "show", 0, 0, OPT_SHOW }, { "verbose", 0, 0, 'v' }, { "version", 0, 0, 'V' }, @@ -447,7 +479,7 @@ int main(int argc, char **argv) if (loopcxt_init(&lc, 0)) err(EXIT_FAILURE, _("failed to initialize loopcxt")); - while ((c = getopt_long(argc, argv, "ac:d:De:E:fhj:lo:O:p:PrvV", + while ((c = getopt_long(argc, argv, "ac:d:De:E:fhj:lno:O:p:PrvV", longopts, NULL)) != -1) { err_exclusive_options(c, longopts, excl, excl_st); @@ -458,7 +490,8 @@ int main(int argc, char **argv) break; case 'c': act = A_SET_CAPACITY; - if (loopcxt_set_device(&lc, optarg)) + if (!is_loopdev(optarg) || + loopcxt_set_device(&lc, optarg)) err(EXIT_FAILURE, _("%s: failed to use device"), optarg); break; @@ -467,7 +500,8 @@ int main(int argc, char **argv) break; case 'd': act = A_DELETE; - if (loopcxt_set_device(&lc, optarg)) + if (!is_loopdev(optarg) || + loopcxt_set_device(&lc, optarg)) err(EXIT_FAILURE, _("%s: failed to use device"), optarg); break; @@ -491,6 +525,12 @@ int main(int argc, char **argv) case 'l': list = 1; break; + case 'n': + tt_flags |= TT_FL_NOHEADINGS; + break; + case OPT_RAW: + tt_flags |= TT_FL_RAW; + break; case 'o': offset = strtosize_or_err(optarg, _("failed to parse offset")); flags |= LOOPDEV_FL_OFFSET; @@ -558,7 +598,8 @@ int main(int argc, char **argv) * losetup [--list] <device> */ act = A_SHOW_ONE; - if (loopcxt_set_device(&lc, argv[optind])) + if (!is_loopdev(argv[optind]) || + loopcxt_set_device(&lc, argv[optind])) err(EXIT_FAILURE, _("%s: failed to use device"), argv[optind]); optind++; @@ -571,6 +612,7 @@ int main(int argc, char **argv) if (optind >= argc) errx(EXIT_FAILURE, _("no loop device specified")); + /* don't use is_loopdev() here, the device does not have exist yet */ if (loopcxt_set_device(&lc, argv[optind])) err(EXIT_FAILURE, _("%s: failed to use device"), argv[optind]); @@ -584,12 +626,12 @@ int main(int argc, char **argv) if (act != A_CREATE && (sizelimit || lo_flags || showdev)) errx(EXIT_FAILURE, - _("the options %s are allowed to loop device setup only"), + _("the options %s are allowed during loop device setup only"), "--{sizelimit,read-only,show}"); if ((flags & LOOPDEV_FL_OFFSET) && act != A_CREATE && (act != A_SHOW || !file)) - errx(EXIT_FAILURE, _("the option --offset is not allowed in this context.")); + errx(EXIT_FAILURE, _("the option --offset is not allowed in this context")); if (outarg && string_add_to_idarray(outarg, columns, ARRAY_SIZE(columns), &ncolumns, column_name_to_id) < 0) @@ -600,12 +642,16 @@ int main(int argc, char **argv) { int hasdev = loopcxt_has_device(&lc); + if (hasdev && !is_loopdev(loopcxt_get_device(&lc))) + loopcxt_add_device(&lc); do { + const char *errpre; + /* Note that loopcxt_{find_unused,set_device}() resets * loopcxt struct. */ if (!hasdev && (res = loopcxt_find_unused(&lc))) { - warnx(_("not found unused device")); + warnx(_("cannot find an unused loop device")); break; } if (flags & LOOPDEV_FL_OFFSET) @@ -622,12 +668,14 @@ int main(int argc, char **argv) res = loopcxt_setup_device(&lc); if (res == 0) break; /* success */ - if (errno != EBUSY) { - warn(_("%s: failed to setup loop device"), - hasdev && loopcxt_get_fd(&lc) < 0 ? - loopcxt_get_device(&lc) : file); - break; - } + if (errno == EBUSY) + continue; + + /* errors */ + errpre = hasdev && loopcxt_get_fd(&lc) < 0 ? + loopcxt_get_device(&lc) : file; + warn(_("%s: failed to set up loop device"), errpre); + break; } while (hasdev == 0); if (res == 0) { @@ -640,7 +688,8 @@ int main(int argc, char **argv) case A_DELETE: res = delete_loop(&lc); while (optind < argc) { - if (loopcxt_set_device(&lc, argv[optind])) + if (!is_loopdev(argv[optind]) || + loopcxt_set_device(&lc, argv[optind])) warn(_("%s: failed to use device"), argv[optind]); optind++; @@ -652,23 +701,23 @@ int main(int argc, char **argv) break; case A_FIND_FREE: if (loopcxt_find_unused(&lc)) - warn(_("find unused loop device failed")); + warn(_("cannot find an unused loop device")); else printf("%s\n", loopcxt_get_device(&lc)); break; case A_SHOW: if (list) - res = make_table(&lc, file, offset, flags); + res = make_table(&lc, file, offset, flags, tt_flags); else res = show_all_loops(&lc, file, offset, flags); break; case A_SHOW_ONE: if (list) - res = make_table( &lc, NULL, 0, 0); + res = make_table( &lc, NULL, 0, 0, tt_flags); else res = printf_loopdev(&lc); if (res) - warn(_("%s"), loopcxt_get_device(&lc)); + warn("%s", loopcxt_get_device(&lc)); break; case A_SET_CAPACITY: res = loopcxt_set_capacity(&lc); diff --git a/sys-utils/lscpu-dmi.c b/sys-utils/lscpu-dmi.c new file mode 100644 index 000000000..c82bfc134 --- /dev/null +++ b/sys-utils/lscpu-dmi.c @@ -0,0 +1,283 @@ +/* + * lscpu-dmi - Module to parse SMBIOS information + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Code originally taken from the dmidecode utility and slightly rewritten + * to suite the needs of lscpu + */ +#include <errno.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> +#include <stdio.h> + +#include "c.h" +#include "pathnames.h" +#include "all-io.h" +#include "lscpu.h" + +#define WORD(x) (uint16_t)(*(const uint16_t *)(x)) +#define DWORD(x) (uint32_t)(*(const uint32_t *)(x)) + +struct dmi_header +{ + uint8_t type; + uint8_t length; + uint16_t handle; + uint8_t *data; +}; + +static int checksum(const uint8_t *buf, size_t len) +{ + uint8_t sum = 0; + size_t a; + + for (a = 0; a < len; a++) + sum += buf[a]; + return (sum == 0); +} + +static void *get_mem_chunk(size_t base, size_t len, const char *devmem) +{ + void *p = NULL; + int fd; + + if ((fd = open(devmem, O_RDONLY)) < 0) + return NULL; + + if (!(p = malloc(len))) + goto nothing; + if (lseek(fd, base, SEEK_SET) == -1) + goto nothing; + if (read_all(fd, p, len) == -1) + goto nothing; + + close(fd); + return p; + +nothing: + free(p); + close(fd); + return NULL; +} + +static void to_dmi_header(struct dmi_header *h, uint8_t *data) +{ + h->type = data[0]; + h->length = data[1]; + h->handle = WORD(data + 2); + h->data = data; +} + +static char *dmi_string(const struct dmi_header *dm, uint8_t s) +{ + char *bp = (char *)dm->data; + + if (s == 0) + return NULL; + + bp += dm->length; + while (s > 1 && *bp) + { + bp += strlen(bp); + bp++; + s--; + } + + if (!*bp) + return NULL; + + return bp; +} + +static int hypervisor_from_dmi_table(uint32_t base, uint16_t len, + uint16_t num, const char *devmem) +{ + uint8_t *buf; + uint8_t *data; + int i = 0; + char *vendor = NULL; + char *product = NULL; + char *manufacturer = NULL; + int rc = HYPER_NONE; + + data = buf = get_mem_chunk(base, len, devmem); + if (!buf) + goto done; + + /* 4 is the length of an SMBIOS structure header */ + while (i < num && data + 4 <= buf + len) { + uint8_t *next; + struct dmi_header h; + + to_dmi_header(&h, data); + + /* + * If a short entry is found (less than 4 bytes), not only it + * is invalid, but we cannot reliably locate the next entry. + * Better stop at this point. + */ + if (h.length < 4) + goto done; + + /* look for the next handle */ + next = data + h.length; + while (next - buf + 1 < len && (next[0] != 0 || next[1] != 0)) + next++; + next += 2; + switch (h.type) { + case 0: + vendor = dmi_string(&h, data[0x04]); + break; + case 1: + manufacturer = dmi_string(&h, data[0x04]); + product = dmi_string(&h, data[0x05]); + break; + default: + break; + } + + data = next; + i++; + } + if (manufacturer && !strcmp(manufacturer, "innotek GmbH")) + rc = HYPER_INNOTEK; + else if (manufacturer && strstr(manufacturer, "HITACHI") && + product && strstr(product, "LPAR")) + rc = HYPER_HITACHI; + else if (!vendor && strcmp(vendor, "Parallels")) + rc = HYPER_PARALLELS; +done: + free(buf); + return rc; +} + +static int hypervisor_decode_legacy(uint8_t *buf, const char *devmem) +{ + if (!checksum(buf, 0x0F)) + return HYPER_NONE; + + return hypervisor_from_dmi_table(DWORD(buf + 0x08), WORD(buf + 0x06), + WORD(buf + 0x0C), + devmem); +} + +static int hypervisor_decode_smbios(uint8_t *buf, const char *devmem) +{ + if (!checksum(buf, buf[0x05]) + || memcmp(buf + 0x10, "_DMI_", 5) != 0 + || !checksum(buf + 0x10, 0x0F)) + return -1; + + return hypervisor_from_dmi_table(DWORD(buf + 0x18), WORD(buf + 0x16), + WORD(buf + 0x1C), + devmem); +} + +/* + * Probe for EFI interface + */ +#define EFI_NOT_FOUND (-1) +#define EFI_NO_SMBIOS (-2) +static int address_from_efi(size_t *address) +{ + FILE *tab; + char linebuf[64]; + int ret; + + *address = 0; /* Prevent compiler warning */ + + /* + * Linux up to 2.6.6: /proc/efi/systab + * Linux 2.6.7 and up: /sys/firmware/efi/systab + */ + if (!(tab = fopen("/sys/firmware/efi/systab", "r")) && + !(tab = fopen("/proc/efi/systab", "r"))) + return EFI_NOT_FOUND; /* No EFI interface */ + + ret = EFI_NO_SMBIOS; + while ((fgets(linebuf, sizeof(linebuf) - 1, tab)) != NULL) { + char *addrp = strchr(linebuf, '='); + if (!addrp) + continue; + *(addrp++) = '\0'; + if (strcmp(linebuf, "SMBIOS") == 0) { + *address = strtoul(addrp, NULL, 0); + ret = 0; + break; + } + } + + fclose(tab); + return ret; +} + +int read_hypervisor_dmi(void) +{ + int rc = HYPER_NONE; + uint8_t *buf = NULL; + size_t fp = 0; + + if (sizeof(uint8_t) != 1 + || sizeof(uint16_t) != 2 + || sizeof(uint32_t) != 4 + || '\0' != 0) + return rc; + + /* First try EFI (ia64, Intel-based Mac) */ + switch (address_from_efi(&fp)) { + case EFI_NOT_FOUND: + goto memory_scan; + case EFI_NO_SMBIOS: + goto done; + } + + buf = get_mem_chunk(fp, 0x20, _PATH_DEV_MEM); + if (!buf) + goto done; + + rc = hypervisor_decode_smbios(buf, _PATH_DEV_MEM); + if (rc) + goto done; + free(buf); + +memory_scan: +#if defined(__x86_64__) || defined(__i386__) + /* Fallback to memory scan (x86, x86_64) */ + buf = get_mem_chunk(0xF0000, 0x10000, _PATH_DEV_MEM); + if (!buf) + goto done; + + for (fp = 0; fp <= 0xFFF0; fp += 16) { + if (memcmp(buf + fp, "_SM_", 4) == 0 && fp <= 0xFFE0) { + rc = hypervisor_decode_smbios(buf + fp, _PATH_DEV_MEM); + if (rc == -1) + fp += 16; + + } else if (memcmp(buf + fp, "_DMI_", 5) == 0) + rc = hypervisor_decode_legacy(buf + fp, _PATH_DEV_MEM); + + if (rc >= 0) + break; + } +#endif +done: + free(buf); + return rc; +} diff --git a/sys-utils/lscpu.1 b/sys-utils/lscpu.1 index f747a35b7..5e838fb11 100644 --- a/sys-utils/lscpu.1 +++ b/sys-utils/lscpu.1 @@ -29,6 +29,9 @@ Not all columns are supported on all architectures. If an unsupported column is specified, \fBlscpu\fP prints the column but does not provide any data for it. .SS COLUMNS +Note that topology elements (core, socket, etc.) use sequential unique ID +starting from zero, but CPU logical numbers follow kernel where is no guarantee +of the sequential numbering. .TP .B CPU The logical CPU number of a CPU as used by the Linux kernel. @@ -76,6 +79,11 @@ The workload is concentrated on few CPUs. For vertical polarization, the column also shows the degree of concentration, high, medium, or low. This column contains data only if your hardware system and hypervisor support CPU polarization. +.TP +.B MMHZ +Maximum megaherz value for the cpu. Useful when lscpu is used as hardware +inventory information gathering tool. Notice that the megahertz value is +dynamic, and driven by CPU governor depending on current resource need. .RE .SH OPTIONS .TP @@ -102,7 +110,7 @@ When specifying the \fIlist\fP argument, the string of option, equal sign (=), a Examples: '\fB-e=cpu,node\fP' or '\fB--extended=cpu,node\fP'. .TP .BR \-h , " \-\-help" -Display help information and exit. +Display help text and exit. .TP .BR \-p , " \-\-parse" [=\fIlist\fP] Optimize the command output for easy parsing. diff --git a/sys-utils/lscpu.c b/sys-utils/lscpu.c index 2e08f66a9..a42ee8e37 100644 --- a/sys-utils/lscpu.c +++ b/sys-utils/lscpu.c @@ -43,46 +43,50 @@ #include "path.h" #include "closestream.h" #include "optutils.h" +#include "lscpu.h" #define CACHE_MAX 100 /* /sys paths */ #define _PATH_SYS_SYSTEM "/sys/devices/system" #define _PATH_SYS_CPU _PATH_SYS_SYSTEM "/cpu" +#define _PATH_SYS_NODE _PATH_SYS_SYSTEM "/node" #define _PATH_PROC_XEN "/proc/xen" #define _PATH_PROC_XENCAP _PATH_PROC_XEN "/capabilities" #define _PATH_PROC_CPUINFO "/proc/cpuinfo" #define _PATH_PROC_PCIDEVS "/proc/bus/pci/devices" #define _PATH_PROC_SYSINFO "/proc/sysinfo" +#define _PATH_PROC_STATUS "/proc/self/status" +#define _PATH_PROC_VZ "/proc/vz" +#define _PATH_PROC_BC "/proc/bc" +#define _PATH_DEV_MEM "/dev/mem" /* virtualization types */ enum { VIRT_NONE = 0, VIRT_PARA, - VIRT_FULL + VIRT_FULL, + VIRT_CONT }; const char *virt_types[] = { [VIRT_NONE] = N_("none"), [VIRT_PARA] = N_("para"), - [VIRT_FULL] = N_("full") + [VIRT_FULL] = N_("full"), + [VIRT_CONT] = N_("container"), }; -/* hypervisor vendors */ -enum { - HYPER_NONE = 0, - HYPER_XEN, - HYPER_KVM, - HYPER_MSHV, - HYPER_VMWARE, - HYPER_IBM -}; const char *hv_vendors[] = { [HYPER_NONE] = NULL, [HYPER_XEN] = "Xen", [HYPER_KVM] = "KVM", [HYPER_MSHV] = "Microsoft", [HYPER_VMWARE] = "VMware", - [HYPER_IBM] = "IBM" + [HYPER_IBM] = "IBM", + [HYPER_VSERVER] = "Linux-VServer", + [HYPER_UML] = "User-mode Linux", + [HYPER_INNOTEK] = "Innotek GmbH", + [HYPER_HITACHI] = "Hitachi", + [HYPER_PARALLELS] = "Parallels" }; /* CPU modes */ @@ -145,6 +149,8 @@ struct lscpu_desc { int hyper; /* hypervisor vendor ID */ int virtype; /* VIRT_PARA|FULL|NONE ? */ char *mhz; + char **maxmhz; /* maximum mega hertz */ + char **minmhz; /* minimum mega hertz */ char *stepping; char *bogomips; char *flags; @@ -156,7 +162,23 @@ struct lscpu_desc { cpu_set_t *present; /* mask with present CPUs */ cpu_set_t *online; /* mask with online CPUs */ + int nthreads; /* number of online threads */ + + int ncaches; + struct cpu_cache *caches; + + /* + * All maps are sequentially indexed (0..ncpuspos), the array index + * does not have match with cpuX number as presented by kernel. You + * have to use real_cpu_num() to get the real cpuX number. + * + * For example, the possible system CPUs are: 1,3,5, it means that + * ncpuspos=3, so all arrays are in range 0..3. + */ + int *idx2cpunum; /* mapping index to CPU num */ + int nnodes; /* number of NUMA modes */ + int *idx2nodenum; /* Support for discontinuous nodes */ cpu_set_t **nodemaps; /* array with NUMA nodes */ /* books -- based on book_siblings (internal kernel map of cpuX's @@ -174,11 +196,6 @@ struct lscpu_desc { int ncores; /* number of all online cores */ cpu_set_t **coremaps; /* unique thread_siblings */ - int nthreads; /* number of online threads */ - - int ncaches; - struct cpu_cache *caches; - int *polarization; /* cpu polarization */ int *addresses; /* physical cpu addresses */ int *configured; /* cpu configured */ @@ -213,6 +230,8 @@ static int maxcpus; /* size in bits of kernel cpu mask */ ((_d) && (_d)->present ? \ CPU_ISSET_S((_cpu), CPU_ALLOC_SIZE(maxcpus), (_d)->present) : 0) +#define real_cpu_num(_d, _i) ((_d)->idx2cpunum[(_i)]) + /* * IDs */ @@ -227,6 +246,8 @@ enum { COL_ADDRESS, COL_CONFIGURED, COL_ONLINE, + COL_MAXMHZ, + COL_MINMHZ, }; /* column description @@ -249,7 +270,9 @@ static struct lscpu_coldesc coldescs[] = [COL_POLARIZATION] = { "POLARIZATION", N_("CPU dispatching mode on virtual hardware") }, [COL_ADDRESS] = { "ADDRESS", N_("physical address of a CPU") }, [COL_CONFIGURED] = { "CONFIGURED", N_("shows if the hypervisor has allocated the CPU") }, - [COL_ONLINE] = { "ONLINE", N_("shows if Linux currently makes use of the CPU") } + [COL_ONLINE] = { "ONLINE", N_("shows if Linux currently makes use of the CPU") }, + [COL_MAXMHZ] = { "MAXMHZ", N_("shows the maximum MHz of the CPU") }, + [COL_MINMHZ] = { "MINMHZ", N_("shows the minimum MHz of the CPU") } }; static int @@ -391,9 +414,9 @@ read_basicinfo(struct lscpu_desc *desc, struct lscpu_modifier *mod) fclose(fp); - if (path_exist(_PATH_SYS_SYSTEM "/cpu/kernel_max")) + if (path_exist(_PATH_SYS_CPU "/kernel_max")) /* note that kernel_max is maximum index [NR_CPUS-1] */ - maxcpus = path_read_s32(_PATH_SYS_SYSTEM "/cpu/kernel_max") + 1; + maxcpus = path_read_s32(_PATH_SYS_CPU "/kernel_max") + 1; else if (mod->system == SYSTEM_LIVE) /* the root is '/' so we are working with data from the current kernel */ @@ -406,30 +429,38 @@ read_basicinfo(struct lscpu_desc *desc, struct lscpu_modifier *mod) setsize = CPU_ALLOC_SIZE(maxcpus); - if (path_exist(_PATH_SYS_SYSTEM "/cpu/possible")) { - cpu_set_t *tmp = path_read_cpulist(maxcpus, _PATH_SYS_SYSTEM "/cpu/possible"); + if (path_exist(_PATH_SYS_CPU "/possible")) { + cpu_set_t *tmp = path_read_cpulist(maxcpus, _PATH_SYS_CPU "/possible"); + int num, idx; + desc->ncpuspos = CPU_COUNT_S(setsize, tmp); + desc->idx2cpunum = xcalloc(desc->ncpuspos, sizeof(int)); + + for (num = 0, idx = 0; num < maxcpus; num++) { + if (CPU_ISSET(num, tmp)) + desc->idx2cpunum[idx++] = num; + } cpuset_free(tmp); } else err(EXIT_FAILURE, _("failed to determine number of CPUs: %s"), - _PATH_SYS_SYSTEM "/cpu/possible"); + _PATH_SYS_CPU "/possible"); /* get mask for present CPUs */ - if (path_exist(_PATH_SYS_SYSTEM "/cpu/present")) { - desc->present = path_read_cpulist(maxcpus, _PATH_SYS_SYSTEM "/cpu/present"); + if (path_exist(_PATH_SYS_CPU "/present")) { + desc->present = path_read_cpulist(maxcpus, _PATH_SYS_CPU "/present"); desc->ncpus = CPU_COUNT_S(setsize, desc->present); } /* get mask for online CPUs */ - if (path_exist(_PATH_SYS_SYSTEM "/cpu/online")) { - desc->online = path_read_cpulist(maxcpus, _PATH_SYS_SYSTEM "/cpu/online"); + if (path_exist(_PATH_SYS_CPU "/online")) { + desc->online = path_read_cpulist(maxcpus, _PATH_SYS_CPU "/online"); desc->nthreads = CPU_COUNT_S(setsize, desc->online); } /* get dispatching mode */ - if (path_exist(_PATH_SYS_SYSTEM "/cpu/dispatching")) - desc->dispatching = path_read_s32(_PATH_SYS_SYSTEM "/cpu/dispatching"); + if (path_exist(_PATH_SYS_CPU "/dispatching")) + desc->dispatching = path_read_s32(_PATH_SYS_CPU "/dispatching"); else desc->dispatching = -1; } @@ -529,17 +560,21 @@ read_hypervisor_cpuid(struct lscpu_desc *desc __attribute__((__unused__))) static void read_hypervisor(struct lscpu_desc *desc, struct lscpu_modifier *mod) { - if (mod->system != SYSTEM_SNAPSHOT) + FILE *fd; + + if (mod->system != SYSTEM_SNAPSHOT) { read_hypervisor_cpuid(desc); + if (!desc->hyper) + desc->hyper = read_hypervisor_dmi(); + } if (desc->hyper) - /* hvm */ desc->virtype = VIRT_FULL; + /* Xen para-virt or dom0 */ else if (path_exist(_PATH_PROC_XEN)) { - /* Xen para-virt or dom0 */ - FILE *fd = path_fopen("r", 0, _PATH_PROC_XENCAP); int dom0 = 0; + fd = path_fopen("r", 0, _PATH_PROC_XENCAP); if (fd) { char buf[256]; @@ -552,20 +587,22 @@ read_hypervisor(struct lscpu_desc *desc, struct lscpu_modifier *mod) desc->virtype = dom0 ? VIRT_NONE : VIRT_PARA; desc->hyper = HYPER_XEN; + /* Xen full-virt on non-x86_64 */ } else if (has_pci_device(0x5853, 0x0001)) { - /* Xen full-virt on non-x86_64 */ desc->hyper = HYPER_XEN; desc->virtype = VIRT_FULL; + + /* IBM PR/SM */ } else if (path_exist(_PATH_PROC_SYSINFO)) { - FILE *fd = path_fopen("r", 0, _PATH_PROC_SYSINFO); + FILE *sysinfo_fd = path_fopen("r", 0, _PATH_PROC_SYSINFO); char buf[BUFSIZ]; - if (!fd) + if (!sysinfo_fd) return; desc->hyper = HYPER_IBM; desc->hypervisor = "PR/SM"; desc->virtype = VIRT_FULL; - while (fgets(buf, sizeof(buf), fd) != NULL) { + while (fgets(buf, sizeof(buf), sysinfo_fd) != NULL) { char *str; if (!strstr(buf, "Control Program:")) @@ -589,7 +626,47 @@ read_hypervisor(struct lscpu_desc *desc, struct lscpu_modifier *mod) while ((str = strstr(desc->hypervisor, " "))) memmove(str, str + 1, strlen(str)); } + fclose(sysinfo_fd); + } + + /* OpenVZ/Virtuozzo - /proc/vz dir should exist + * /proc/bc should not */ + else if (path_exist(_PATH_PROC_VZ) && !path_exist(_PATH_PROC_BC)) { + desc->hyper = HYPER_PARALLELS; + desc->virtype = VIRT_CONT; + + /* IBM */ + } else if (desc->vendor && + (strcmp(desc->vendor, "PowerVM Lx86") == 0 || + strcmp(desc->vendor, "IBM/S390") == 0)) { + desc->hyper = HYPER_IBM; + desc->virtype = VIRT_FULL; + + /* User-mode-linux */ + } else if (desc->modelname && strstr(desc->modelname, "UML")) { + desc->hyper = HYPER_UML; + desc->virtype = VIRT_PARA; + + /* Linux-VServer */ + } else if (path_exist(_PATH_PROC_STATUS)) { + char buf[BUFSIZ]; + char *val = NULL; + + fd = path_fopen("r", 1, _PATH_PROC_STATUS); + while (fgets(buf, sizeof(buf), fd) != NULL) { + if (lookup(buf, "VxID", &val)) + break; + } fclose(fd); + + if (val) { + while (isdigit(*val)) + ++val; + if (!*val) { + desc->hyper = HYPER_VSERVER; + desc->virtype = VIRT_CONT; + } + } } } @@ -616,9 +693,10 @@ static int add_cpuset_to_array(cpu_set_t **ary, int *items, cpu_set_t *set) } static void -read_topology(struct lscpu_desc *desc, int num) +read_topology(struct lscpu_desc *desc, int idx) { cpu_set_t *thread_siblings, *core_siblings, *book_siblings; + int num = real_cpu_num(desc, idx); if (!path_exist(_PATH_SYS_CPU "/cpu%d/topology/thread_siblings", num)) return; @@ -628,10 +706,9 @@ read_topology(struct lscpu_desc *desc, int num) core_siblings = path_read_cpuset(maxcpus, _PATH_SYS_CPU "/cpu%d/topology/core_siblings", num); book_siblings = NULL; - if (path_exist(_PATH_SYS_CPU "/cpu%d/topology/book_siblings", num)) { + if (path_exist(_PATH_SYS_CPU "/cpu%d/topology/book_siblings", num)) book_siblings = path_read_cpuset(maxcpus, _PATH_SYS_CPU "/cpu%d/topology/book_siblings", num); - } if (!desc->coremaps) { int nbooks, nsockets, ncores, nthreads; @@ -684,10 +761,12 @@ read_topology(struct lscpu_desc *desc, int num) if (book_siblings) add_cpuset_to_array(desc->bookmaps, &desc->nbooks, book_siblings); } + static void -read_polarization(struct lscpu_desc *desc, int num) +read_polarization(struct lscpu_desc *desc, int idx) { char mode[64]; + int num = real_cpu_num(desc, idx); if (desc->dispatching < 0) return; @@ -697,35 +776,67 @@ read_polarization(struct lscpu_desc *desc, int num) desc->polarization = xcalloc(desc->ncpuspos, sizeof(int)); path_read_str(mode, sizeof(mode), _PATH_SYS_CPU "/cpu%d/polarization", num); if (strncmp(mode, "vertical:low", sizeof(mode)) == 0) - desc->polarization[num] = POLAR_VLOW; + desc->polarization[idx] = POLAR_VLOW; else if (strncmp(mode, "vertical:medium", sizeof(mode)) == 0) - desc->polarization[num] = POLAR_VMEDIUM; + desc->polarization[idx] = POLAR_VMEDIUM; else if (strncmp(mode, "vertical:high", sizeof(mode)) == 0) - desc->polarization[num] = POLAR_VHIGH; + desc->polarization[idx] = POLAR_VHIGH; else if (strncmp(mode, "horizontal", sizeof(mode)) == 0) - desc->polarization[num] = POLAR_HORIZONTAL; + desc->polarization[idx] = POLAR_HORIZONTAL; else - desc->polarization[num] = POLAR_UNKNOWN; + desc->polarization[idx] = POLAR_UNKNOWN; } static void -read_address(struct lscpu_desc *desc, int num) +read_address(struct lscpu_desc *desc, int idx) { + int num = real_cpu_num(desc, idx); + if (!path_exist(_PATH_SYS_CPU "/cpu%d/address", num)) return; if (!desc->addresses) desc->addresses = xcalloc(desc->ncpuspos, sizeof(int)); - desc->addresses[num] = path_read_s32(_PATH_SYS_CPU "/cpu%d/address", num); + desc->addresses[idx] = path_read_s32(_PATH_SYS_CPU "/cpu%d/address", num); } static void -read_configured(struct lscpu_desc *desc, int num) +read_configured(struct lscpu_desc *desc, int idx) { + int num = real_cpu_num(desc, idx); + if (!path_exist(_PATH_SYS_CPU "/cpu%d/configure", num)) return; if (!desc->configured) desc->configured = xcalloc(desc->ncpuspos, sizeof(int)); - desc->configured[num] = path_read_s32(_PATH_SYS_CPU "/cpu%d/configure", num); + desc->configured[idx] = path_read_s32(_PATH_SYS_CPU "/cpu%d/configure", num); +} + +static void +read_max_mhz(struct lscpu_desc *desc, int idx) +{ + int num = real_cpu_num(desc, idx); + + if (!path_exist(_PATH_SYS_CPU "/cpu%d/cpufreq/cpuinfo_max_freq", num)) + return; + if (!desc->maxmhz) + desc->maxmhz = xcalloc(desc->ncpuspos, sizeof(char *)); + xasprintf(&(desc->maxmhz[idx]), "%.4f", + (float)path_read_s32(_PATH_SYS_CPU + "/cpu%d/cpufreq/cpuinfo_max_freq", num) / 1000); +} + +static void +read_min_mhz(struct lscpu_desc *desc, int idx) +{ + int num = real_cpu_num(desc, idx); + + if (!path_exist(_PATH_SYS_CPU "/cpu%d/cpufreq/cpuinfo_min_freq", num)) + return; + if (!desc->minmhz) + desc->minmhz = xcalloc(desc->ncpuspos, sizeof(char *)); + xasprintf(&(desc->minmhz[idx]), "%.4f", + (float)path_read_s32(_PATH_SYS_CPU + "/cpu%d/cpufreq/cpuinfo_min_freq", num) / 1000); } static int @@ -738,13 +849,14 @@ cachecmp(const void *a, const void *b) } static void -read_cache(struct lscpu_desc *desc, int num) +read_cache(struct lscpu_desc *desc, int idx) { char buf[256]; int i; + int num = real_cpu_num(desc, idx); if (!desc->ncaches) { - while(path_exist(_PATH_SYS_SYSTEM "/cpu/cpu%d/cache/index%d", + while(path_exist(_PATH_SYS_CPU "/cpu%d/cache/index%d", num, desc->ncaches)) desc->ncaches++; @@ -757,7 +869,7 @@ read_cache(struct lscpu_desc *desc, int num) struct cpu_cache *ca = &desc->caches[i]; cpu_set_t *map; - if (!path_exist(_PATH_SYS_SYSTEM "/cpu/cpu%d/cache/index%d", + if (!path_exist(_PATH_SYS_CPU "/cpu%d/cache/index%d", num, i)) continue; if (!ca->name) { @@ -785,10 +897,13 @@ read_cache(struct lscpu_desc *desc, int num) ca->name = xstrdup(buf); /* cache size */ - path_read_str(buf, sizeof(buf), - _PATH_SYS_CPU "/cpu%d/cache/index%d/size", - num, i); - ca->size = xstrdup(buf); + if (path_exist(_PATH_SYS_CPU "/cpu%d/cache/index%d/size",num, i)) { + path_read_str(buf, sizeof(buf), + _PATH_SYS_CPU "/cpu%d/cache/index%d/size", num, i); + ca->size = xstrdup(buf); + } else { + ca->size = xstrdup("unknown size"); + } } /* information about how CPUs share different caches */ @@ -802,34 +917,77 @@ read_cache(struct lscpu_desc *desc, int num) } } +static inline int is_node_dirent(struct dirent *d) +{ + return + d && +#ifdef _DIRENT_HAVE_D_TYPE + (d->d_type == DT_DIR || d->d_type == DT_UNKNOWN) && +#endif + strncmp(d->d_name, "node", 4) == 0 && + isdigit_string(d->d_name + 4); +} + +static int +nodecmp(const void *ap, const void *bp) +{ + int *a = (int *) ap, *b = (int *) bp; + return *a - *b; +} + static void read_nodes(struct lscpu_desc *desc) { - int i; + int i = 0; + DIR *dir; + struct dirent *d; + char *path; /* number of NUMA node */ - while (path_exist(_PATH_SYS_SYSTEM "/node/node%d", desc->nnodes)) - desc->nnodes++; + path = path_strdup(_PATH_SYS_NODE); + dir = opendir(path); + free(path); + + while (dir && (d = readdir(dir))) { + if (is_node_dirent(d)) + desc->nnodes++; + } - if (!desc->nnodes) + if (!desc->nnodes) { + if (dir) + closedir(dir); return; + } desc->nodemaps = xcalloc(desc->nnodes, sizeof(cpu_set_t *)); + desc->idx2nodenum = xmalloc(desc->nnodes * sizeof(int)); + + if (dir) { + rewinddir(dir); + while ((d = readdir(dir)) && i < desc->nnodes) { + if (is_node_dirent(d)) + desc->idx2nodenum[i++] = strtol_or_err(((d->d_name) + 4), + _("Failed to extract the node number")); + } + closedir(dir); + qsort(desc->idx2nodenum, desc->nnodes, sizeof(int), nodecmp); + } /* information about how nodes share different CPUs */ for (i = 0; i < desc->nnodes; i++) desc->nodemaps[i] = path_read_cpuset(maxcpus, - _PATH_SYS_SYSTEM "/node/node%d/cpumap", - i); + _PATH_SYS_NODE "/node%d/cpumap", + desc->idx2nodenum[i]); } static char * -get_cell_data(struct lscpu_desc *desc, int cpu, int col, +get_cell_data(struct lscpu_desc *desc, int idx, int col, struct lscpu_modifier *mod, char *buf, size_t bufsz) { size_t setsize = CPU_ALLOC_SIZE(maxcpus); - size_t idx; + size_t i; + int cpu = real_cpu_num(desc, idx); *buf = '\0'; @@ -839,23 +997,23 @@ get_cell_data(struct lscpu_desc *desc, int cpu, int col, break; case COL_CORE: if (cpuset_ary_isset(cpu, desc->coremaps, - desc->ncores, setsize, &idx) == 0) - snprintf(buf, bufsz, "%zd", idx); + desc->ncores, setsize, &i) == 0) + snprintf(buf, bufsz, "%zd", i); break; case COL_SOCKET: if (cpuset_ary_isset(cpu, desc->socketmaps, - desc->nsockets, setsize, &idx) == 0) - snprintf(buf, bufsz, "%zd", idx); + desc->nsockets, setsize, &i) == 0) + snprintf(buf, bufsz, "%zd", i); break; case COL_NODE: if (cpuset_ary_isset(cpu, desc->nodemaps, - desc->nnodes, setsize, &idx) == 0) - snprintf(buf, bufsz, "%zd", idx); + desc->nnodes, setsize, &i) == 0) + snprintf(buf, bufsz, "%d", desc->idx2nodenum[i]); break; case COL_BOOK: if (cpuset_ary_isset(cpu, desc->bookmaps, - desc->nbooks, setsize, &idx) == 0) - snprintf(buf, bufsz, "%zd", idx); + desc->nbooks, setsize, &i) == 0) + snprintf(buf, bufsz, "%zd", i); break; case COL_CACHE: { @@ -867,8 +1025,8 @@ get_cell_data(struct lscpu_desc *desc, int cpu, int col, struct cpu_cache *ca = &desc->caches[j]; if (cpuset_ary_isset(cpu, ca->sharedmaps, - ca->nsharedmaps, setsize, &idx) == 0) { - int x = snprintf(p, sz, "%zd", idx); + ca->nsharedmaps, setsize, &i) == 0) { + int x = snprintf(p, sz, "%zd", i); if (x <= 0 || (size_t) x + 2 >= sz) return NULL; p += x; @@ -884,7 +1042,7 @@ get_cell_data(struct lscpu_desc *desc, int cpu, int col, } case COL_POLARIZATION: if (desc->polarization) { - int x = desc->polarization[cpu]; + int x = desc->polarization[idx]; snprintf(buf, bufsz, "%s", mod->mode == OUTPUT_PARSABLE ? @@ -894,17 +1052,17 @@ get_cell_data(struct lscpu_desc *desc, int cpu, int col, break; case COL_ADDRESS: if (desc->addresses) - snprintf(buf, bufsz, "%d", desc->addresses[cpu]); + snprintf(buf, bufsz, "%d", desc->addresses[idx]); break; case COL_CONFIGURED: if (!desc->configured) break; if (mod->mode == OUTPUT_PARSABLE) snprintf(buf, bufsz, - desc->configured[cpu] ? _("Y") : _("N")); + desc->configured[idx] ? _("Y") : _("N")); else snprintf(buf, bufsz, - desc->configured[cpu] ? _("yes") : _("no")); + desc->configured[idx] ? _("yes") : _("no")); break; case COL_ONLINE: if (!desc->online) @@ -916,6 +1074,14 @@ get_cell_data(struct lscpu_desc *desc, int cpu, int col, snprintf(buf, bufsz, is_cpu_online(desc, cpu) ? _("yes") : _("no")); break; + case COL_MAXMHZ: + if (desc->maxmhz) + xstrncpy(buf, desc->maxmhz[idx], bufsz); + break; + case COL_MINMHZ: + if (desc->minmhz) + xstrncpy(buf, desc->minmhz[idx], bufsz); + break; } return buf; } @@ -1025,12 +1191,13 @@ print_parsable(struct lscpu_desc *desc, int cols[], int ncols, */ for (i = 0; i < desc->ncpuspos; i++) { int c; + int cpu = real_cpu_num(desc, i); - if (!mod->offline && desc->online && !is_cpu_online(desc, i)) + if (!mod->offline && desc->online && !is_cpu_online(desc, cpu)) continue; - if (!mod->online && desc->online && is_cpu_online(desc, i)) + if (!mod->online && desc->online && is_cpu_online(desc, cpu)) continue; - if (desc->present && !is_cpu_present(desc, i)) + if (desc->present && !is_cpu_present(desc, cpu)) continue; for (c = 0; c < ncols; c++) { if (mod->compat && cols[c] == COL_CACHE) { @@ -1059,7 +1226,7 @@ print_readable(struct lscpu_desc *desc, int cols[], int ncols, { int i; char buf[BUFSIZ], *data; - struct tt *tt = tt_new_table(0); + struct tt *tt = tt_new_table(TT_FL_FREEDATA); if (!tt) err(EXIT_FAILURE, _("failed to initialize output table")); @@ -1072,12 +1239,13 @@ print_readable(struct lscpu_desc *desc, int cols[], int ncols, for (i = 0; i < desc->ncpuspos; i++) { int c; struct tt_line *line; + int cpu = real_cpu_num(desc, i); - if (!mod->offline && desc->online && !is_cpu_online(desc, i)) + if (!mod->offline && desc->online && !is_cpu_online(desc, cpu)) continue; - if (!mod->online && desc->online && is_cpu_online(desc, i)) + if (!mod->online && desc->online && is_cpu_online(desc, cpu)) continue; - if (desc->present && !is_cpu_present(desc, i)) + if (desc->present && !is_cpu_present(desc, cpu)) continue; line = tt_add_line(tt, NULL); @@ -1085,11 +1253,13 @@ print_readable(struct lscpu_desc *desc, int cols[], int ncols, for (c = 0; c < ncols; c++) { data = get_cell_data(desc, i, cols[c], mod, buf, sizeof(buf)); - tt_line_set_data(line, c, data && *data ? xstrdup(data) : "-"); + tt_line_set_data(line, c, + xstrdup(data && *data ? data : "-")); } } tt_print_table(tt); + tt_free_table(tt); } /* output formats "<key> <value>"*/ @@ -1163,8 +1333,9 @@ print_summary(struct lscpu_desc *desc, struct lscpu_modifier *mod) err(EXIT_FAILURE, _("failed to callocate cpu set")); CPU_ZERO_S(setsize, set); for (i = 0; i < desc->ncpuspos; i++) { - if (!is_cpu_online(desc, i) && is_cpu_present(desc, i)) - CPU_SET_S(i, setsize, set); + int cpu = real_cpu_num(desc, i); + if (!is_cpu_online(desc, cpu) && is_cpu_present(desc, cpu)) + CPU_SET_S(cpu, setsize, set); } print_cpuset(mod->hex ? _("Off-line CPU(s) mask:") : _("Off-line CPU(s) list:"), @@ -1223,6 +1394,10 @@ print_summary(struct lscpu_desc *desc, struct lscpu_modifier *mod) print_s(_("Stepping:"), desc->stepping); if (desc->mhz) print_s(_("CPU MHz:"), desc->mhz); + if (desc->maxmhz) + print_s(_("CPU max MHz:"), desc->maxmhz[0]); + if (desc->minmhz) + print_s(_("CPU min MHz:"), desc->minmhz[0]); if (desc->bogomips) print_s(_("BogoMIPS:"), desc->bogomips); if (desc->virtflag) { @@ -1250,7 +1425,7 @@ print_summary(struct lscpu_desc *desc, struct lscpu_modifier *mod) } for (i = 0; i < desc->nnodes; i++) { - snprintf(buf, sizeof(buf), _("NUMA node%d CPU(s):"), i); + snprintf(buf, sizeof(buf), _("NUMA node%d CPU(s):"), desc->idx2nodenum[i]); print_cpuset(buf, desc->nodemaps[i], mod->hex); } } @@ -1390,6 +1565,8 @@ int main(int argc, char *argv[]) read_polarization(desc, i); read_address(desc, i); read_configured(desc, i); + read_max_mhz(desc, i); + read_min_mhz(desc, i); } if (desc->caches) @@ -1436,6 +1613,10 @@ int main(int argc, char *argv[]) columns[ncolumns++] = COL_POLARIZATION; if (desc->addresses) columns[ncolumns++] = COL_ADDRESS; + if (desc->maxmhz) + columns[ncolumns++] = COL_MAXMHZ; + if (desc->minmhz) + columns[ncolumns++] = COL_MINMHZ; } print_readable(desc, columns, ncolumns, mod); break; diff --git a/sys-utils/lscpu.h b/sys-utils/lscpu.h new file mode 100644 index 000000000..312038fcc --- /dev/null +++ b/sys-utils/lscpu.h @@ -0,0 +1,21 @@ +#ifndef LSCPU_H +#define LSCPU_H + +/* hypervisor vendors */ +enum { + HYPER_NONE = 0, + HYPER_XEN, + HYPER_KVM, + HYPER_MSHV, + HYPER_VMWARE, + HYPER_IBM, /* sys-z powervm */ + HYPER_VSERVER, + HYPER_UML, + HYPER_INNOTEK, /* VBOX */ + HYPER_HITACHI, + HYPER_PARALLELS /* OpenVZ/VIrtuozzo */ +}; + +extern int read_hypervisor_dmi(void); + +#endif /* LSCPU_H */ diff --git a/sys-utils/mount.8 b/sys-utils/mount.8 index a3a658297..073f18a7f 100644 --- a/sys-utils/mount.8 +++ b/sys-utils/mount.8 @@ -1,5 +1,6 @@ +'\" t .\" Copyright (c) 1996-2004 Andries Brouwer -.\" Copyright (C) 2006-2012 Karel Zak <kzak@redhat.com> +.\" Copyright (C) 2006-2012 Karel Zak <kzak@redhat.com> .\" .\" This page is somewhat derived from a page that was .\" (c) 1980, 1989, 1991 The Regents of the University of California @@ -35,7 +36,8 @@ mount \- mount a filesystem .B mount .RB [ \-lhV ] .LP -.BI "mount \-a +.\" Quote used to include space between arguments +.B "mount \-a .RB [ \-fFnrsvw ] .RB [ \-t .IR vfstype ] @@ -100,14 +102,14 @@ If only directory or device is given, for example: .RE then mount looks for a mountpoint and if not found then for a device in the /etc/fstab file. It's possible to use -.B --target +.B \-\-target or -.B --source +.B \-\-source options to avoid ambivalent interpretation of the given argument. For example .RS .br -.BI "mount --target /mountpoint" +.BI "mount \-\-target /mountpoint" .br .RE @@ -123,7 +125,7 @@ For more robust and definable output use mountpoint name are replaced with '?'. .TP -.BR "mount " [ -l "] [" "-t \fItype\fP" ] +.BR "mount " [ \-l "] [" "\-t \fItype\/\fP" ] lists all mounted filesystems (of type .IR type ). The option \-l adds the labels in this listing. @@ -148,8 +150,13 @@ partition .B PARTUUID or .B PARTLABEL -(partition identifiers are supported for GUID Partition Table (GPT) and MAC -partition tables only). +(partition identifiers are supported for example for GUID Partition Table (GPT) +partition tables). + +Don't forget that there is no guarantee that UUIDs and labels are really +unique, especially if you move, share or copy the device. Use +.B "lsblk \-o +UUID,PARTUUID" +to verify that the UUIDs are really unique in your system. The recommended setup is to use tags (e.g. LABEL=<label>) rather than .B /dev/disk/by-{label,uuid,partuuid,partlabel} @@ -157,7 +164,7 @@ udev symlinks in the /etc/fstab file. The tags are more readable, robust and portable. The .BR mount (8) command internally uses udev -symlinks, so use the symlinks in /etc/fstab has no advantage over the tags. +symlinks, so the use of symlinks in /etc/fstab has no advantage over the tags. For more details see .BR libblkid (3). @@ -190,7 +197,7 @@ The file may contain lines describing what devices are usually mounted where, using which options. The default location of the .BR fstab (5) -file could be overridden by --fstab <path> command line option (see below for +file could be overridden by \-\-fstab <path> command line option (see below for more details). .LP The command @@ -214,9 +221,9 @@ option will make mount fork, so that the filesystems are mounted simultaneously. .LP When mounting a filesystem mentioned in -.IR fstab +.I fstab or -.IR mtab, +.IR mtab , it suffices to give only the device, or only the mount point. @@ -249,7 +256,7 @@ If you want to override mount options from you have to use: .RS .sp -.B "mount device|dir -o <options>" +.B "mount device|dir \-o <options>" .sp .RE and then the mount options from command line will be appended to @@ -268,7 +275,7 @@ and .I /proc/mounts have very similar contents. The former has somewhat more information, such as the mount options used, -but is not necessarily up-to-date (cf. the +but is not necessarily up-to-date (cf.\& the .B \-n option below). It is possible to replace .I /etc/mtab @@ -323,7 +330,7 @@ The option is similar to the .B user option, with the restriction that the user must be the owner -of the special file. This may be useful e.g. for +of the special file. This may be useful e.g.\& for .I /dev/fd if a login script makes the console user owner of this device. The @@ -340,13 +347,13 @@ Since Linux 2.4.0 it is possible to remount part of the file hierarchy somewhere else. The call is .RS .br -.B mount --bind +.B mount \-\-bind .I olddir newdir .RE or shortoption .RS .br -.B mount -B +.B mount \-B .I olddir newdir .RE or fstab entry is: @@ -354,7 +361,7 @@ or fstab entry is: .br .I /olddir .I /newdir -.B none bind +.B none bind .RE After this call the same contents is accessible in two places. @@ -364,7 +371,7 @@ directory, for example: .RS .br -.B mount --bind +.B mount \-\-bind .I foo foo .RE @@ -374,7 +381,7 @@ a second place using .RS .br -.B mount --rbind +.B mount \-\-rbind .I olddir newdir .RE @@ -382,22 +389,22 @@ or shortoption .RS .br -.B mount -R +.B mount \-R .I olddir newdir .RE .\" available since Linux 2.4.11. Note that the filesystem mount options will remain the same as those -on the original mount point, and cannot be changed by passing the -o -option along with --bind/--rbind. The mount options can be +on the original mount point, and cannot be changed by passing the \-o +option along with \-\-bind/\-\-rbind. The mount options can be changed by a separate remount command, for example: .RS .br -.B mount --bind +.B mount \-\-bind .I olddir newdir .br -.B mount -o remount,ro +.B mount \-o remount,ro .I newdir .RE @@ -410,10 +417,10 @@ command (then mount(8) does not read /etc/mtab), then you have to use bind flag .RS .br -.B mount --bind +.B mount \-\-bind .I olddir newdir .br -.B mount -o remount,ro,bind +.B mount \-o remount,ro,bind .I olddir newdir .RE @@ -434,13 +441,13 @@ Since Linux 2.5.1 it is possible to atomically move a to another place. The call is .RS .br -.B mount --move +.B mount \-\-move .I olddir newdir .RE or shortoption .RS .br -.B mount -M +.B mount \-M .I olddir newdir .RE This will cause the contents which previously appeared under olddir to be @@ -451,7 +458,7 @@ has to be a mountpoint. Note that moving a mount residing under a shared mount is invalid and unsupported. Use -.B findmnt -o TARGET,PROPAGATION /dir +.B findmnt \-o TARGET,PROPAGATION /dir to see the current propagation flags. .RE @@ -461,7 +468,7 @@ Since Linux 2.6.15 it is possible to mark a mount and its submounts as shared, private, slave or unbindable. A shared mount provides ability to create mirrors of that mount such that mounts and umounts within any of the mirrors propagate to the other mirror. A slave mount receives propagation from its master, but -any not vice-versa. A private mount carries no propagation abilities. A +any not vice-versa. A private mount carries no propagation abilities. An unbindable mount is a private mount which cannot be cloned through a bind operation. Detailed semantics is documented in .B Documentation/filesystems/sharedsubtree.txt @@ -470,10 +477,10 @@ file in the kernel source tree. Supported operations: .RS .nf -.BI "mount --make-shared " mountpoint -.BI "mount --make-slave " mountpoint -.BI "mount --make-private " mountpoint -.BI "mount --make-unbindable " mountpoint +.BI "mount \-\-make-shared " mountpoint +.BI "mount \-\-make-slave " mountpoint +.BI "mount \-\-make-private " mountpoint +.BI "mount \-\-make-unbindable " mountpoint .fi .RE @@ -482,17 +489,17 @@ mounts under a given mountpoint. .RS .nf -.BI "mount --make-rshared " mountpoint -.BI "mount --make-rslave " mountpoint -.BI "mount --make-rprivate " mountpoint -.BI "mount --make-runbindable " mountpoint +.BI "mount \-\-make-rshared " mountpoint +.BI "mount \-\-make-rslave " mountpoint +.BI "mount \-\-make-rprivate " mountpoint +.BI "mount \-\-make-runbindable " mountpoint .fi .RE .BR mount (8) .B does not read .BR fstab (5) -when --make-* operation is requested. All necessary information has to be +when \-\-make-* operation is requested. All necessary information has to be specified on command line. Note that Linux kernel does not allow to change more propagation flags by one @@ -518,7 +525,7 @@ as mount options For example .RS .nf -.BI "mount --make-private --make-unbindable /dev/sda1 /A" +.BI "mount \-\-make-private \-\-make-unbindable /dev/sda1 /A" .fi .RE @@ -526,8 +533,8 @@ is the same as .RS .nf .BI "mount /dev/sda1 /A" -.BI "mount --make-private /A" -.BI "mount --make-unbindable /A" +.BI "mount \-\-make-private /A" +.BI "mount \-\-make-unbindable /A" .fi .RE .RE @@ -548,14 +555,17 @@ Command line options available for the .B mount command: .IP "\fB\-V, \-\-version\fP" -Output version. +Display version information and exit. .IP "\fB\-h, \-\-help\fP" -Print a help message. +Display help text and exit. .IP "\fB\-v, \-\-verbose\fP" Verbose mode. .IP "\fB\-a, \-\-all\fP" Mount all filesystems (of the given types) mentioned in -.IR fstab . +.I fstab +(except for those whose line contains the +.B noauto +keyword). .IP "\fB\-F, \-\-fork\fP" (Used in conjunction with .BR \-a .) @@ -576,53 +586,52 @@ conjunction with the flag to determine what the .B mount command is trying to do. It can also be used to add entries for devices -that were mounted earlier with the -n option. The -f option checks for +that were mounted earlier with the \-n option. The \-f option checks for existing record in /etc/mtab and fails when the record already exists (with regular non-fake mount, this check is done by kernel). -.IP "\fB\-i, \-\-internal\-only\fP" +.IP "\fB\-i, \-\-internal-only\fP" Don't call the /sbin/mount.<filesystem> helper even if it exists. -.IP "\fB\-l, \-\-show\-labels\fP" +.IP "\fB\-l, \-\-show-labels\fP" Add the labels in the mount output. Mount must have -permission to read the disk device (e.g. be suid root) for this to work. +permission to read the disk device (e.g.\& be suid root) for this to work. One can set such a label for ext2, ext3 or ext4 using the .BR e2label (8) utility, or for XFS using .BR xfs_admin (8), or for reiserfs using .BR reiserfstune (8). -.IP "\fB\-n, \-\-no\-mtab\fP" +.IP "\fB\-n, \-\-no-mtab\fP" Mount without writing in .IR /etc/mtab . This is necessary for example when .I /etc is on a read-only filesystem. -.IP "\fB\-c, \-\-no\-canonicalize\fP" -Don't canonicalize paths. The mount command canonicalizes all paths -(from command line or fstab) and stores canonicalized paths to the -.IR /etc/mtab +.IP "\fB\-c, \-\-no-canonicalize\fP" +Don't canonicalize paths. The mount command canonicalizes all paths +(from command line or fstab) and stores canonicalized paths to the +.I /etc/mtab file. This option can be used together with the .B \-f flag for already canonicalized absolute paths. .IP "\fB\-s\fP" -Tolerate sloppy mount options rather than failing. This will ignore -mount options not supported by a filesystem type. Not all filesystems -support this option. This option exists for support of the Linux -autofs\-based automounter. +Tolerate sloppy mount options rather than failing. This will ignore mount +options not supported by a filesystem type. Not all filesystems support this +option. Currently it's supported by the mount.nfs mount helper only. .IP "\fB\-\-source \fIsrc\fP" If only one argument for the mount command is given then the argument might be interpreted as target (mountpoint) or source (device). This option allows to explicitly define that the argument is mount source. -.IP "\fB\-r, \-\-read\-only\fP" +.IP "\fB\-r, \-\-read-only\fP" Mount the filesystem read-only. A synonym is .BR "\-o ro" . Note that, depending on the filesystem type, state and kernel behavior, the -system may still write to the device. For example, Ext3 or ext4 will replay its +system may still write to the device. For example, ext3 or ext4 will replay its journal if the filesystem is dirty. To prevent this kind of write access, you may want to mount ext3 or ext4 filesystem with "ro,noload" mount options or set the block device to read-only mode, see command .BR blockdev (8). -.IP "\fB\-w, \-\-rw, \-\-read\-write\fP" +.IP "\fB\-w, \-\-rw, \-\-read-write\fP" Mount the filesystem read/write. This is the default. A synonym is .BR "\-o rw" . .IP "\fB\-L, \-\-label \fIlabel\fP" @@ -638,7 +647,7 @@ These two options require the file Specifies alternative fstab file. If the \fIpath\fP is directory then the files in the directory are sorted by .BR strverscmp (3), -files that starts with "." or without .fstab extension are ignored. The option +files that starts with "."\& or without \&.fstab extension are ignored. The option can be specified more than once. This option is mostly designed for initramfs or chroot scripts where additional configuration is specified outside standard system configuration. @@ -699,7 +708,7 @@ Note that coherent, sysv and xenix are equivalent and that .I xenix and .I coherent -will be removed at some point in the future \(em use +will be removed at some point in the future \(en use .I sysv instead. Since kernel version 2.1.21 the types .I ext @@ -719,7 +728,7 @@ and support filesystem subtypes. The subtype is defined by '.subtype' suffix. For example 'fuse.sshfs'. It's recommended to use subtype notation rather than add any prefix to the mount source (for example 'sshfs#example.com' is -depreacated). +deprecated). For most types all the .B mount @@ -870,7 +879,7 @@ in the system kernel. To check the current setting see the options in /proc/mounts. The following options apply to any filesystem that is being -mounted (but not every filesystem actually honors them - e.g., the +mounted (but not every filesystem actually honors them \(en e.g.\&, the .B sync option today has effect only for ext2, ext3, fat, vfat and ufs): @@ -902,14 +911,14 @@ Can only be mounted explicitly (i.e., the .B \-a option will not cause the filesystem to be mounted). .TP -\fBcontext=\fP\fIcontext\fP, \fBfscontext=\fP\fIcontext\fP, \fBdefcontext=\fP\fIcontext\fP and \fBrootcontext=\fP\fIcontext\fP +\fBcontext=\fP\,\fIcontext\fP, \fBfscontext=\fP\,/\fIcontext\fP, \fBdefcontext=\fP\,/\fIcontext\fP and \fBrootcontext=\fP\,\fIcontext\fP The -.BR context= +.B context= option is useful when mounting filesystems that do not support extended attributes, such as a floppy or hard disk formatted with VFAT, or systems that are not normally running under SELinux, such as an ext3 formatted disk from a non-SELinux workstation. You can also use -.BR context= +.B context= on filesystems you do not trust, such as a floppy. It also helps in compatibility with xattr-supporting filesystems on earlier 2.4.<x> kernel versions. Even where xattrs are supported, you can save time not having to label every file by @@ -919,7 +928,7 @@ A commonly used option for removable media is .BR context="system_u:object_r:removable_t" . Two other options are -.BR fscontext= +.B fscontext= and .BR defcontext= , both of which are mutually exclusive of the context option. This means you @@ -927,7 +936,7 @@ can use fscontext and defcontext with each other, but neither can be used with context. The -.BR fscontext= +.B fscontext= option works for all filesystems, regardless of their xattr support. The fscontext option sets the overarching filesystem label to a specific security context. This filesystem label is separate from the @@ -939,35 +948,37 @@ fscontext provides, in addition to supplying the same label for individual files. You can set the default security context for unlabeled files using -.BR defcontext= +.B defcontext= option. This overrides the value set for unlabeled files in the policy and requires a filesystem that supports xattr labeling. The -.BR rootcontext= +.B rootcontext= option allows you to explicitly label the root inode of a FS being mounted -before that FS or inode because visible to userspace. This was found to be +before that FS or inode becomes visible to userspace. This was found to be useful for things like stateless linux. -Note that kernel rejects any remount request that includes the context -option even if unchanged from the current context. +Note that the kernel rejects any remount request that includes the context +option, \fBeven\fP when unchanged from the current context. -.B Warning that \fIcontext\fP value might contains comma -and in this case the value has to be properly quoted otherwise +.BR "Warning: the \fIcontext\fP value might contain commas" , +in which case the value has to be properly quoted, otherwise .BR mount (8) -will interpret the comma as separator between mount options. Don't forget that -shell strips off quotes and -.BR "double quoting is required" , -for example: +will interpret the comma as a separator between mount options. Don't forget that +the shell strips off quotes and thus +.BR "double quoting is required" . +For example: .RS .RS .sp -mount -t tmpfs none /mnt \-o 'context="system_u:object_r:tmp_t:s0:c127,c456",noexec' +.nf +.B mount \-t tmpfs none /mnt \-o \e +.B 'context="system_u:object_r:tmp_t:s0:c127,c456",noexec' +.fi .sp .RE - For more details, see -.BR selinux (8) +.BR selinux (8). .RE .TP @@ -1071,8 +1082,7 @@ effect. .TP .B nosuid Do not allow set-user-identifier or set-group-identifier bits to take -effect. (This seems safe, but is in fact rather unsafe if you have -suidperl(1) installed.) +effect. .TP .B silent Turn on the silent flag. @@ -1096,22 +1106,22 @@ readonly filesystem writable. It does not change device or mount point. The remount functionality follows the standard way how the mount command works with options from fstab. It means the mount command doesn't read fstab (or mtab) only when a -.IR device +.I device and -.IR dir +.I dir are fully specified. -.BR "mount -o remount,rw /dev/foo /dir" +.B "mount \-o remount,rw /dev/foo /dir" After this call all old mount options are replaced and arbitrary stuff from fstab is ignored, except the loop= option which is internally generated and maintained by the mount command. -.BR "mount -o remount,rw /dir" +.B "mount \-o remount,rw /dir" After this call mount reads fstab (or mtab) and merges these options with -options from command line ( -.B -o +options from command line (\c +.B \-o\c ). .TP .B ro @@ -1122,7 +1132,7 @@ Mount the filesystem read-write. .TP .B sync All I/O to the filesystem should be done synchronously. In case of media with limited number of write cycles -(e.g. some flash drives) "sync" may cause life-cycle shortening. +(e.g.\& some flash drives) "sync" may cause life-cycle shortening. .TP .B user Allow an ordinary user to mount the filesystem. @@ -1148,13 +1158,13 @@ This option implies the options All options prefixed with "x-" are interpreted as comments or userspace applications specific options. These options are not stored to mtab file, send to mount.<type> helpers or -.B mount(2) -system call. The suggested format is x-<appname>.<option> (e.g. x-systemd.automount). +.BR mount (2) +system call. The suggested format is x-<appname>.<option> (e.g.\& x-systemd.automount). .TP .B x-mount.mkdir[=<mode>] Allow to make a target directory (mountpoint). The optional argument <mode> specifies the file system access mode used for -.B mkdir (2) +.BR mkdir (2) in octal notation. The default mode is 0755. This functionality is supported only for root users. @@ -1170,17 +1180,17 @@ More info may be found in the kernel source subdirectory .SH "Mount options for adfs" .TP -\fBuid=\fP\fIvalue\fP and \fBgid=\fP\fIvalue\fP +\fBuid=\fP\,\fIvalue\fP and \fBgid=\fP\,\fIvalue\fP Set the owner and group of the files in the filesystem (default: uid=gid=0). .TP -\fBownmask=\fP\fIvalue\fP and \fBothmask=\fP\fIvalue\fP +\fBownmask=\fP\,\fIvalue\fP and \fBothmask=\fP\,\fIvalue\fP Set the permission mask for ADFS 'owner' permissions and 'other' permissions, respectively (default: 0700 and 0077, respectively). See also .IR /usr/src/linux/Documentation/filesystems/adfs.txt . .SH "Mount options for affs" .TP -\fBuid=\fP\fIvalue\fP and \fBgid=\fP\fIvalue\fP +\fBuid=\fP\,\fIvalue\fP and \fBgid=\fP\,\fIvalue\fP Set the owner and group of the root of the filesystem (default: uid=gid=0, but with option .B uid @@ -1188,7 +1198,7 @@ or .B gid without specified value, the uid and gid of the current process are taken). .TP -\fBsetuid=\fP\fIvalue\fP and \fBsetgid=\fP\fIvalue\fP +\fBsetuid=\fP\,\fIvalue\fP and \fBsetgid=\fP\,\fIvalue\fP Set the owner and group of all files. .TP .BI mode= value @@ -1258,7 +1268,7 @@ the number of the pseudo terminal is then made available to the process and the pseudo terminal slave can be accessed as .IR /dev/pts/ <number>. .TP -\fBuid=\fP\fIvalue\fP and \fBgid=\fP\fIvalue\fP +\fBuid=\fP\,\fIvalue\fP and \fBgid=\fP\,\fIvalue\fP This sets the owner or the group of newly created PTYs to the specified values. When nothing is specified, they will be set to the UID and GID of the creating process. @@ -1294,33 +1304,33 @@ only if CONFIG_DEVPTS_MULTIPLE_INSTANCES is enabled in the kernel configuration. To use this option effectively, -.IR /dev/ptmx +.I /dev/ptmx must be a symbolic link to -.IR pts/ptmx. +.I pts/ptmx. See -.IR Documentation/filesystems/devpts.txt +.I Documentation/filesystems/devpts.txt in the linux kernel source tree for details. .TP .BI ptmxmode= value Set the mode for the new -.IR ptmx +.I ptmx device node in the devpts filesystem. With the support for multiple instances of devpts (see .B newinstance option above), each instance has a private -.IR ptmx +.I ptmx node in the root of the devpts filesystem (typically -.IR /dev/pts/ptmx). +.IR /dev/pts/ptmx ). For compatibility with older versions of the kernel, the default mode of the new -.IR ptmx +.I ptmx node is 0000. .BI ptmxmode= value specifies a more useful mode for the -.IR ptmx +.I ptmx node and is highly recommended when the .B newinstance option is specified. @@ -1358,16 +1368,25 @@ field the total number of blocks of the filesystem, while the .B bsddf behaviour (which is the default) is to subtract the overhead blocks used by the ext2 filesystem and not available for file storage. Thus -.nf - -% mount /k -o minixdf; df /k; umount /k -Filesystem 1024-blocks Used Available Capacity Mounted on -/dev/sda6 2630655 86954 2412169 3% /k -% mount /k -o bsddf; df /k; umount /k -Filesystem 1024-blocks Used Available Capacity Mounted on -/dev/sda6 2543714 13 2412169 0% /k - -.fi +.sp 1 +% mount /k \-o minixdf; df /k; umount /k +.TS +tab(#); +l2 l2 r2 l2 l2 l +l c r c c l. +Filesystem#1024-blocks#Used#Available#Capacity#Mounted on +/dev/sda6#2630655#86954#2412169#3%#/k +.TE +.sp 1 +% mount /k \-o bsddf; df /k; umount /k +.TS +tab(#); +l2 l2 r2 l2 l2 l +l c r c c l. +Filesystem#1024-blocks#Used#Available#Capacity#Mounted on +/dev/sda6#2543714#13#2412169#0%#/k +.TE +.sp 1 (Note that this example shows that one can add command line options to the options given in .IR /etc/fstab .) @@ -1377,7 +1396,7 @@ to the options given in No checking is done at mount time. This is the default. This is fast. It is wise to invoke .BR e2fsck (8) -every now and then, e.g. at boot time. The non-default behavior is unsupported +every now and then, e.g.\& at boot time. The non-default behavior is unsupported (check=normal and check=strict options have been removed). Note that these mount options don't have to be supported if ext4 kernel driver is used for ext2 and ext3 filesystems. .TP @@ -1395,7 +1414,7 @@ changed using .BR grpid | bsdgroups " and " nogrpid | sysvgroups These options define what group id a newly created file gets. When -.BR grpid +.B grpid is set, it takes the group id of the directory in which it is created; otherwise (the default) it takes the fsgid of the current process, unless the directory has the setgid bit set, in which case it takes the gid @@ -1407,14 +1426,14 @@ The usrquota (same as quota) mount option enables user quota support on the filesystem. grpquota enables group quotas support. You need the quota utilities to actually enable and manage the quota system. .TP -.BR nouid32 +.B nouid32 Disables 32-bit UIDs and GIDs. This is for interoperability with older kernels which only store and expect 16-bit values. .TP .BR oldalloc " or " orlov Use old allocator or Orlov allocator for new inodes. Orlov is default. .TP -\fBresgid=\fP\fIn\fP and \fBresuid=\fP\fIn\fP +\fBresgid=\fP\,\fIn\fP and \fBresuid=\fP\,\fIn\fP The ext2 filesystem reserves a certain percentage of the available space (by default 5%, see .BR mke2fs (8) @@ -1428,7 +1447,7 @@ Instead of block 1, use block .I n as superblock. This could be useful when the filesystem has been damaged. (Earlier, copies of the superblock would be made every 8192 blocks: in -block 1, 8193, 16385, ... (and one got thousands of copies on +block 1, 8193, 16385, \&...\& (and one got thousands of copies on a big filesystem). Since version 1.08, .B mke2fs has a \-s (sparse superblock) option to reduce the number of backup @@ -1436,8 +1455,8 @@ superblocks, and since version 1.15 this is the default. Note that this may mean that ext2 filesystems created by a recent .B mke2fs cannot be mounted r/w under Linux 2.0.*.) -The block number here uses 1k units. Thus, if you want to use logical -block 32768 on a filesystem with 4k blocks, use "sb=131072". +The block number here uses 1\ k units. Thus, if you want to use logical +block 32768 on a filesystem with 4\ k blocks, use "sb=131072". .TP .BR user_xattr | nouser_xattr Support "user." extended attributes (or not). @@ -1452,22 +1471,22 @@ well as the following additions: .\" .BR abort .\" Mount the filesystem in abort mode, as if a fatal error has occurred. .TP -.BR journal=update +.B journal=update Update the ext3 filesystem's journal to the current format. .TP -.BR journal=inum +.B journal=inum When a journal already exists, this option is ignored. Otherwise, it specifies the number of the inode which will represent the ext3 filesystem's -journal file; ext3 will create a new journal, overwriting the old contents +journal file; ext3 will create a new journal, overwriting the old contents of the file whose inode number is .IR inum . .TP -.BR journal_dev=devnum +.BR journal_dev=devnum / journal_path=path When the external journal device's major/minor numbers -have changed, this option allows the user to specify +have changed, these options allow the user to specify the new journal location. The journal device is -identified through its new major/minor numbers encoded -in devnum. +identified either through its new major/minor numbers encoded +in devnum, or via a path to the device. .TP .BR norecovery / noload Don't load the journal on mounting. Note that @@ -1480,7 +1499,7 @@ lead to any number of problems. Specifies the journaling mode for file data. Metadata is always journaled. To use modes other than .B ordered -on the root filesystem, pass the mode to the kernel as boot parameter, e.g. +on the root filesystem, pass the mode to the kernel as boot parameter, e.g.\& .IR rootflags=data=journal . .RS .TP @@ -1493,32 +1512,40 @@ This is the default mode. All data is forced directly out to the main file system prior to its metadata being committed to the journal. .TP .B writeback -Data ordering is not preserved - data may be written into the main +Data ordering is not preserved \(en data may be written into the main filesystem after its metadata has been committed to the journal. This is rumoured to be the highest-throughput option. It guarantees internal filesystem integrity, however it can allow old data to appear in files after a crash and journal recovery. .RE .TP -.BR barrier=0 " / " barrier=1 " -This enables/disables barriers. barrier=0 disables it, barrier=1 enables it. -Write barriers enforce proper on-disk ordering of journal commits, making -volatile disk write caches safe to use, at some performance penalty. The ext3 -filesystem does not enable write barriers by default. Be sure to enable -barriers unless your disks are battery-backed one way or another. Otherwise -you risk filesystem corruption in case of power failure. +.B data_err=ignore +Just print an error message if an error occurs in a file data buffer in +ordered mode. +.TP +.B data_err=abort +Abort the journal if an error occurs in a file data buffer in ordered mode. +.TP +.BR barrier=0 " / " barrier=1 " +This disables / enables the use of write barriers in the jbd code. barrier=0 +disables, barrier=1 enables (default). This also requires an IO stack which can +support barriers, and if jbd gets an error on a barrier write, it will disable +barriers again with a warning. Write barriers enforce proper on-disk ordering +of journal commits, making volatile disk write caches safe to use, at some +performance penalty. If your disks are battery-backed in one way or another, +disabling barriers may safely improve performance. .TP .BI commit= nrsec Sync all data and metadata every .I nrsec seconds. The default value is 5 seconds. Zero means default. .TP -.BR user_xattr +.B user_xattr Enable Extended User Attributes. See the .BR attr (5) manual page. .TP -.BR acl +.B acl Enable POSIX Access Control Lists. See the .BR acl (5) manual page. @@ -1537,32 +1564,26 @@ incorporates scalability and reliability enhancements for supporting large filesystem. The options -.B journal_dev, noload, data, commit, orlov, oldalloc, [no]user_xattr +.B journal_dev, norecovery, noload, data, commit, orlov, oldalloc, [no]user_xattr .B [no]acl, bsddf, minixdf, debug, errors, data_err, grpid, bsdgroups, nogrpid .B sysvgroups, resgid, resuid, sb, quota, noquota, grpquota, usrquota .B usrjquota, grpjquota and jqfmt are backwardly compatible with ext3 or ext2. .TP -.BR journal_checksum +.B journal_checksum Enable checksumming of the journal transactions. This will allow the recovery code in e2fsck and the kernel to detect corruption in the kernel. It is a compatible change and will be ignored by older kernels. .TP -.BR journal_async_commit +.B journal_async_commit Commit block can be written to disk without waiting for descriptor blocks. If enabled older kernels cannot mount the device. This will enable 'journal_checksum' internally. .TP -.BR barrier=0 " / " barrier=1 " / " barrier " / " nobarrier -This enables/disables the use of write barriers in the jbd code. barrier=0 -disables, barrier=1 enables. This also requires an IO stack which can support -barriers, and if jbd gets an error on a barrier write, it will disable again -with a warning. Write barriers enforce proper on-disk ordering of journal -commits, making volatile disk write caches safe to use, at some performance -penalty. If your disks are battery-backed in one way or another, disabling -barriers may safely improve performance. The mount options "barrier" and -"nobarrier" can also be used to enable or disable barriers, for consistency -with other ext4 mount options. +.BR barrier=0 " / " barrier=1 " / " barrier " / " nobarrier +These mount options have the same effect as in ext3. The mount options +"barrier" and "nobarrier" are added for consistency with other ext4 mount +options. The ext4 filesystem enables write barriers by default. .TP @@ -1576,10 +1597,10 @@ Number of filesystem blocks that mballoc will try to use for allocation size and alignment. For RAID5/6 systems this should be the number of data disks * RAID chunk size in filesystem blocks. .TP -.BR delalloc +.B delalloc Deferring block allocation until write-out time. .TP -.BR nodelalloc +.B nodelalloc Disable delayed allocation. Blocks are allocated when data is copied from user to page cache. .TP @@ -1595,7 +1616,7 @@ takes to finish committing a transaction. Call this time the "commit time". If the time that the transaction has been running is less than the commit time, ext4 will try sleeping for the commit time to see if other operations will join the transaction. The commit time is capped by the max_batch_time, which -defaults to 15000us (15ms). This optimization can be turned off entirely by +defaults to 15000\ \[mc]s (15\ ms). This optimization can be turned off entirely by setting max_batch_time to 0. .TP .BI min_batch_time= usec @@ -1610,7 +1631,7 @@ used for I/O operations submitted by kjournald2 during a commit operation. This defaults to 3, which is a slightly higher priority than the default I/O priority. .TP -.BR abort +.B abort Simulate the effects of calling ext4_abort() for debugging purposes. This is normally used while remounting a filesystem which is already mounted. @@ -1619,11 +1640,11 @@ remounting a filesystem which is already mounted. Many broken applications don't use fsync() when replacing existing files via patterns such as -fd = open("foo.new")/write(fd,..)/close(fd)/ rename("foo.new", "foo") +fd = open("foo.new")/write(fd,...)/close(fd)/ rename("foo.new", "foo") or worse yet -fd = open("foo", O_TRUNC)/write(fd,..)/close(fd). +fd = open("foo", O_TRUNC)/write(fd,...)/close(fd). If auto_da_alloc is enabled, ext4 will detect the replace-via-rename and replace-via-truncate patterns and force that any delayed allocation blocks are @@ -1633,26 +1654,31 @@ operation is committed. This provides roughly the same level of guarantees as ext3, and avoids the "zero-length" problem that can happen when a system crashes before the delayed allocation blocks are forced to disk. .TP +.B noinit_itable +Do not initialize any uninitialized inode table blocks in the background. This +feature may be used by installation CD's so that the install process can +complete as quickly as possible; the inode table initialization process would +then be deferred until the next time the filesystem is mounted. +.TP +.B init_itable=n +The lazy itable init code will wait n times the number of milliseconds it took +to zero out the previous block group's inode table. This minimizes the impact on +system performance while the filesystem's inode table is being initialized. +.TP .BR discard / nodiscard Controls whether ext4 should issue discard/TRIM commands to the underlying block device when blocks are freed. This is useful for SSD devices and sparse/thinly-provisioned LUNs, but it is off by default until sufficient testing has been done. .TP -.BR nouid32 +.B nouid32 Disables 32-bit UIDs and GIDs. This is for interoperability with older kernels which only store and expect 16-bit values. .TP -.BR resize -Allows to resize filesystem to the end of the last -existing block group, further resize has to be done -with resize2fs either online, or offline. It can be -used only with conjunction with remount. -.TP .BR block_validity / noblock_validity This options allows to enables/disables the in-kernel facility for tracking -filesystem metadata blocks within internal data structures. This allows multi- +filesystem metadata blocks within internal data structures. This allows multi-\c block allocator and other routines to quickly locate extents which might overlap with filesystem metadata blocks. This option is intended for debugging purposes and since it negatively affects the performance, it is off by default. @@ -1666,9 +1692,17 @@ scalability on high speed storages. However this does not work with data journaling and dioread_nolock option will be ignored with kernel warning. Note that dioread_nolock code path is only used for extent-based files. Because of the restrictions this options comprises it is off by default -(e.g. dioread_lock). +(e.g.\& dioread_lock). .TP -.BR i_version +.B max_dir_size_kb=n +This limits the size of the directories so that any attempt to expand them +beyond the specified limit in kilobytes will cause an ENOSPC error. This is +useful in memory-constrained environments, where a very large directory can +cause severe performance problems or even provoke the Out Of Memory killer. (For +example, if there is only 512\ MB memory available, a 176\ MB directory may +seriously cramp the system's style.) +.TP +.B i_version Enable 64-bit inode version support. This option is off by default. .SH "Mount options for fat" @@ -1684,7 +1718,7 @@ filesystems.) .BR blocksize= { 512 | 1024 | 2048 } Set blocksize (default 512). This option is obsolete. .TP -\fBuid=\fP\fIvalue\fP and \fBgid=\fP\fIvalue\fP +\fBuid=\fP\,\fIvalue\fP and \fBgid=\fP\,\fIvalue\fP Set the owner and group of all files. (Default: the uid and gid of the current process.) .TP @@ -1718,7 +1752,7 @@ Other users can change timestamp. .PP The default is set from `dmask' option. (If the directory is writable, .BR utime (2) -is also allowed. I.e. ~dmask & 022) +is also allowed. I.e.\& \s+3~\s0dmask & 022) Normally .BR utime (2) @@ -1728,12 +1762,12 @@ normal check is too inflexible. With this option you can relax it. .RE .TP .BI check= value -Three different levels of pickyness can be chosen: +Three different levels of pickiness can be chosen: .RS .TP .BR r [ elaxed ] Upper and lower case are accepted and equivalent, long name parts are -truncated (e.g. +truncated (e.g.\& .I verylongname.foobar becomes .IR verylong.foo ), @@ -1835,7 +1869,7 @@ although they fail. Use with caution! .TP .B showexec If set, the execute permission bits of the file will be allowed only if -the extension part of the name is .EXE, .COM, or .BAT. Not set by default. +the extension part of the name is \&.EXE, \&.COM, or \&.BAT. Not set by default. .TP .B sys_immutable If set, ATTR_SYS attribute on FAT is handled as IMMUTABLE flag on Linux. @@ -1886,7 +1920,7 @@ Don't complain about invalid mount options. .SH "Mount options for hpfs" .TP -\fBuid=\fP\fIvalue\fP and \fBgid=\fP\fIvalue\fP +\fBuid=\fP\,\fIvalue\fP and \fBgid=\fP\,\fIvalue\fP Set the owner and group of all files. (Default: the uid and gid of the current process.) .TP @@ -1955,7 +1989,7 @@ and (Default: .BR check=strict .) .TP -\fBuid=\fP\fIvalue\fP and \fBgid=\fP\fIvalue\fP +\fBuid=\fP\,\fIvalue\fP and \fBgid=\fP\,\fIvalue\fP Give all files in the filesystem the indicated user or group id, possibly overriding the information found in the Rock Ridge extensions. (Default: @@ -1972,12 +2006,12 @@ no name translation is done. See .BR map=normal .) .B map=acorn is like -.BR map=normal +.B map=normal but also apply Acorn extensions if present. .TP .BI mode= value For non-Rock Ridge volumes, give all files the indicated mode. -(Default: read permission for everybody.) +(Default: read and execute permission for everybody.) Since Linux 2.1.37 one no longer needs to specify the mode in decimal. (Octal is indicated by a leading 0.) .TP @@ -2001,7 +2035,7 @@ possibly leading to silent data corruption.) .B cruft If the high byte of the file length contains other garbage, set this mount option to ignore the high order bits of the file length. -This implies that a file cannot be larger than 16MB. +This implies that a file cannot be larger than 16\ MB. .TP .BI session= x Select number of session on multisession CD. (Since 2.3.4.) @@ -2107,7 +2141,7 @@ New name for the option earlier called .IR iocharset . .\" since 2.5.11 .TP -.BR utf8 +.B utf8 Use UTF-8 for converting file names. .TP .BR uni_xlate= { 0 | 1 | 2 } @@ -2122,14 +2156,14 @@ If enabled (posix=1), the filesystem distinguishes between upper and lower case. The 8.3 alias names are presented as hard links instead of being suppressed. This option is obsolete. .TP -\fBuid=\fP\fIvalue\fP, \fBgid=\fP\fIvalue\fP and \fBumask=\fP\fIvalue\fP +\fBuid=\fP\,\fIvalue\fP, \fBgid=\fP\,\fIvalue\fP and \fBumask=\fP\,\fIvalue\fP Set the file permission on the filesystem. The umask value is given in octal. By default, the files are owned by root and not readable by somebody else. .SH "Mount options for proc" .TP -\fBuid=\fP\fIvalue\fP and \fBgid=\fP\fIvalue\fP +\fBuid=\fP\,\fIvalue\fP and \fBgid=\fP\,\fIvalue\fP These options are recognized, but have no effect as far as I can see. .SH "Mount options for ramfs" @@ -2140,7 +2174,7 @@ There are no mount options. .SH "Mount options for reiserfs" Reiserfs is a journaling filesystem. .TP -.BR conv +.B conv Instructs version 3.6 reiserfs software to mount a version 3.5 filesystem, using the 3.6 format for newly created objects. This filesystem will no longer be compatible with reiserfs 3.5 tools. @@ -2168,41 +2202,41 @@ unusual file-name patterns. .TP .B detect Instructs -.IR mount +.I mount to detect which hash function is in use by examining -the filesystem being mounted, and to write this information into +the filesystem being mounted, and to write this information into the reiserfs superblock. This is only useful on the first mount of an old format filesystem. .RE .TP -.BR hashed_relocation +.B hashed_relocation Tunes the block allocator. This may provide performance improvements in some situations. .TP -.BR no_unhashed_relocation +.B no_unhashed_relocation Tunes the block allocator. This may provide performance improvements in some situations. .TP -.BR noborder +.B noborder Disable the border allocator algorithm invented by Yury Yu. Rupasov. This may provide performance improvements in some situations. .TP -.BR nolog +.B nolog Disable journaling. This will provide slight performance improvements in some situations at the cost of losing reiserfs's fast recovery from crashes. Even with this option turned on, reiserfs still performs all journaling operations, save for actual writes into its journaling area. Implementation of -.IR nolog +.I nolog is a work in progress. .TP -.BR notail +.B notail By default, reiserfs stores small files and `file tails' directly into its tree. This confuses some utilities such as .BR LILO (8). This option is used to disable packing of files into the tree. .TP -.BR replayonly +.B replayonly Replay the transactions which are in the journal, but do not actually mount the filesystem. Mainly used by .IR reiserfsck . @@ -2219,24 +2253,24 @@ There is a special utility which can be obtained from .IR ftp://ftp.namesys.com/pub/reiserfsprogs . .TP -.BR user_xattr +.B user_xattr Enable Extended User Attributes. See the .BR attr (5) manual page. .TP -.BR acl +.B acl Enable POSIX Access Control Lists. See the .BR acl (5) manual page. .TP -.BR barrier=none " / " barrier=flush " -This enables/disables the use of write barriers in the journaling code. -barrier=none disables it, barrier=flush enables it. Write barriers enforce +.BR barrier=none " / " barrier=flush " +This disables / enables the use of write barriers in the journaling code. +barrier=none disables, barrier=flush enables (default). This also requires an +IO stack which can support barriers, and if reiserfs gets an error on a barrier +write, it will disable barriers again with a warning. Write barriers enforce proper on-disk ordering of journal commits, making volatile disk write caches -safe to use, at some performance penalty. The reiserfs filesystem does not -enable write barriers by default. Be sure to enable barriers unless your disks -are battery-backed one way or another. Otherwise you risk filesystem -corruption in case of power failure. +safe to use, at some performance penalty. If your disks are battery-backed in +one way or another, disabling barriers may safely improve performance. .SH "Mount options for romfs" None. @@ -2276,7 +2310,7 @@ is half of the number of your physical RAM pages, or (on a machine with highmem) the number of lowmem RAM pages, whichever is the lower. .PP -The tmpfs mount options for sizing ( +The tmpfs mount options for sizing (\c .BR size , .BR nr_blocks , and @@ -2286,7 +2320,7 @@ accept a suffix .B m or .B g -for Ki, Mi, Gi (binary kilo, mega and giga) and can be changed on remount. +for Ki, Mi, Gi (binary kilo (kibi), binary mega (mebi) and binary giga (gibi)) and can be changed on remount. .TP .B mode= @@ -2300,8 +2334,8 @@ The group id. .TP .B mpol=[default|prefer:Node|bind:NodeList|interleave|interleave:NodeList] Set the NUMA memory allocation policy for all files in that -instance (if the kernel CONFIG_NUMA is enabled) - which can be adjusted on the -fly via 'mount -o remount ...' +instance (if the kernel CONFIG_NUMA is enabled) \(en which can be adjusted on the +fly via 'mount \-o remount \&...' .RS .TP .B default @@ -2320,8 +2354,8 @@ prefers to allocate from each node in turn allocates from each node of NodeList in turn. .PP The NodeList format is a comma-separated list of decimal numbers and ranges, a -range being two hyphen-separated decimal numbers, the smallest and largest node -numbers in the range. For example, mpol=bind:0-3,5,7,9-15 +range being two "hyphen-minus"-separated decimal numbers, the smallest and largest node +numbers in the range. For example, mpol=bind:0\(en3,5,7,9\(en15 Note that trying to mount a tmpfs with an mpol option will fail if the running kernel does not support NUMA; and will fail if its nodelist @@ -2330,7 +2364,7 @@ tmpfs being mounted, but from time to time runs a kernel built without NUMA capability (perhaps a safe recovery kernel), or with fewer nodes online, then it is advisable to omit the mpol option from automatic mount options. It can be added later, when the tmpfs is already mounted -on MountPoint, by 'mount -o remount,mpol=Policy:NodeList MountPoint'. +on MountPoint, by 'mount \-o remount,mpol=Policy:NodeList MountPoint'. .SH "Mount options for ubifs" UBIFS is a flash file system which works on top of UBI volumes. Note that @@ -2371,19 +2405,19 @@ separator may be used instead of .TP The following mount options are available: .TP -.BR bulk_read +.B bulk_read Enable bulk-read. VFS read-ahead is disabled because it slows down the file system. Bulk-Read is an internal optimization. Some flashes may read faster if the data are read at one go, rather than at several read requests. For example, OneNAND can do "read-while-load" if it reads more than one NAND page. .TP -.BR no_bulk_read +.B no_bulk_read Do not bulk-read. This is the default. .TP -.BR chk_data_crc +.B chk_data_crc Check data CRC-32 checksums. This is the default. .TP -.BR no_chk_data_crc. +.BR no_chk_data_crc . Do not check data CRC-32 checksums. With this option, the filesystem does not check CRC-32 checksum for data, but it does check it for the internal indexing information. This option only affects reading, not writing. CRC-32 is always @@ -2469,7 +2503,7 @@ Old format of ufs, this is the default, read only. (Don't forget to give the \-r option.) .TP .B 44bsd -For filesystems created by a BSD-like system (NetBSD,FreeBSD,OpenBSD). +For filesystems created by a BSD-like system (NetBSD, FreeBSD, OpenBSD). .TP .B ufs2 Used in FreeBSD 5.x supported as read-write. @@ -2532,7 +2566,7 @@ Translate unhandled Unicode characters to special escaped sequences. This lets you backup and restore filenames that are created with any Unicode characters. Without this option, a '?' is used when no translation is possible. The escape character is ':' because it is -otherwise illegal on the vfat filesystem. The escape sequence +otherwise invalid on the vfat filesystem. The escape sequence that gets used, where u is the unicode character, is: ':', (u & 0x3f), ((u>>6) & 0x3f), (u>>12). .TP @@ -2543,7 +2577,7 @@ This option is obsolete. .B nonumtail First try to make a short name without sequence number, before trying -.IR name~num.ext . +.IR name\s+3~\s0num.ext . .TP .B utf8 UTF8 is the filesystem safe 8-bit encoding of Unicode that is used by the @@ -2579,15 +2613,15 @@ all upper case. This mode is the default since Linux 2.6.32. .SH "Mount options for usbfs" .TP -\fBdevuid=\fP\fIuid\fP and \fBdevgid=\fP\fIgid\fP and \fBdevmode=\fP\fImode\fP +\fBdevuid=\fP\,\fIuid\fP and \fBdevgid=\fP\,\fIgid\fP and \fBdevmode=\fP\,\fImode\fP Set the owner and group and mode of the device files in the usbfs filesystem (default: uid=gid=0, mode=0644). The mode is given in octal. .TP -\fBbusuid=\fP\fIuid\fP and \fBbusgid=\fP\fIgid\fP and \fBbusmode=\fP\fImode\fP +\fBbusuid=\fP\,\fIuid\fP and \fBbusgid=\fP\,\fIgid\fP and \fBbusmode=\fP\,\fImode\fP Set the owner and group and mode of the bus directories in the usbfs filesystem (default: uid=gid=0, mode=0555). The mode is given in octal. .TP -\fBlistuid=\fP\fIuid\fP and \fBlistgid=\fP\fIgid\fP and \fBlistmode=\fP\fImode\fP +\fBlistuid=\fP\,\fIuid\fP and \fBlistgid=\fP\,\fIgid\fP and \fBlistmode=\fP\,\fImode\fP Set the owner and group and mode of the file .I devices (default: uid=gid=0, mode=0444). The mode is given in octal. @@ -2597,198 +2631,200 @@ None. .SH "Mount options for xfs" .TP -.BI allocsize= size +.B allocsize=size Sets the buffered I/O end-of-file preallocation size when -doing delayed allocation writeout (default size is 64KiB). -Valid values for this option are page size (typically 4KiB) -through to 1GiB, inclusive, in power-of-2 increments. +doing delayed allocation writeout (default size is 64\ KiB). +Valid values for this option are page size (typically 4\ KiB) +through to 1\ GiB, inclusive, in power-of-2 increments. +.sp +The default behaviour is for dynamic end-of-file +preallocation size, which uses a set of heuristics to +optimise the preallocation size based on the current +allocation patterns within the file and the access patterns +to the file. Specifying a fixed allocsize value turns off +the dynamic behaviour. .TP .BR attr2 | noattr2 -The options enable/disable (default is enabled) an "opportunistic" -improvement to be made in the way inline extended attributes are -stored on-disk. -When the new form is used for the first time (by setting or -removing extended attributes) the on-disk superblock feature -bit field will be updated to reflect this format being in use. -.TP -.B barrier -Enables the use of block layer write barriers for writes into -the journal and unwritten extent conversion. This allows for -drive level write caching to be enabled, for devices that -support write barriers. -.TP -.B dmapi -Enable the DMAPI (Data Management API) event callouts. -Use with the -.B mtpt -option. -.TP -.BR grpid | bsdgroups " and " nogrpid | sysvgroups -These options define what group ID a newly created file gets. -When grpid is set, it takes the group ID of the directory in -which it is created; otherwise (the default) it takes the fsgid -of the current process, unless the directory has the setgid bit -set, in which case it takes the gid from the parent directory, -and also gets the setgid bit set if it is a directory itself. -.TP -.BI ihashsize= value -Sets the number of hash buckets available for hashing the -in-memory inodes of the specified mount point. If a value -of zero is used, the value selected by the default algorithm -will be displayed in -.IR /proc/mounts . +The options enable/disable an "opportunistic" improvement to +be made in the way inline extended attributes are stored +on-disk. When the new form is used for the first time when +attr2 is selected (either when setting or removing extended +attributes) the on-disk superblock feature bit field will be +updated to reflect this format being in use. +.sp +The default behaviour is determined by the on-disk feature +bit indicating that attr2 behaviour is active. If either +mount option it set, then that becomes the new default used +by the filesystem. +.sp +CRC enabled filesystems always use the attr2 format, and so +will reject the noattr2 mount option if it is set. +.TP +.BR barrier | nobarrier +Enables/disables the use of block layer write barriers for +writes into the journal and for data integrity operations. +This allows for drive level write caching to be enabled, for +devices that support write barriers. +.TP +.BR discard | nodiscard +Enable/disable the issuing of commands to let the block +device reclaim space freed by the filesystem. This is +useful for SSD devices, thinly provisioned LUNs and virtual +machine images, but may have a performance impact. +.sp +Note: It is currently recommended that you use the fstrim +application to discard unused blocks rather than the discard +mount option because the performance impact of this option +is quite severe. +.TP +.BR grpid | bsdgroups | nogrpid | sysvgroups +These options define what group ID a newly created file +gets. When grpid is set, it takes the group ID of the +directory in which it is created; otherwise it takes the +fsgid of the current process, unless the directory has the +setgid bit set, in which case it takes the gid from the +parent directory, and also gets the setgid bit set if it is +a directory itself. +.TP +.B filestreams +Make the data allocator use the filestreams allocation mode +across the entire filesystem rather than just on directories +configured to use it. .TP .BR ikeep | noikeep -When inode clusters are emptied of inodes, keep them around -on the disk (ikeep) - this is the traditional XFS behaviour -and is still the default for now. Using the noikeep option, -inode clusters are returned to the free space pool. -.TP -.B inode64 -Indicates that XFS is allowed to create inodes at any location -in the filesystem, including those which will result in inode -numbers occupying more than 32 bits of significance. This is -provided for backwards compatibility, but causes problems for -backup applications that cannot handle large inode numbers. -.TP -.BR largeio | nolargeio -If -.B nolargeio -is specified, the optimal I/O reported in -st_blksize by -.BR stat (2) -will be as small as possible to allow user -applications to avoid inefficient read/modify/write I/O. -If -.B largeio -is specified, a filesystem that has a -.B swidth -specified -will return the -.B swidth -value (in bytes) in st_blksize. If the -filesystem does not have a -.B swidth -specified but does specify -an -.B allocsize -then -.B allocsize -(in bytes) will be returned -instead. -If neither of these two options are specified, then filesystem -will behave as if -.B nolargeio -was specified. -.TP -.BI logbufs= value -Set the number of in-memory log buffers. Valid numbers range -from 2-8 inclusive. -The default value is 8 buffers for any recent kernel. -.TP -.BI logbsize= value -Set the size of each in-memory log buffer. -Size may be specified in bytes, or in kilobytes with a "k" suffix. -Valid sizes for version 1 and version 2 logs are 16384 (16k) and -32768 (32k). Valid sizes for version 2 logs also include -65536 (64k), 131072 (128k) and 262144 (256k). -The default value for any recent kernel is 32768. -.TP -\fBlogdev=\fP\fIdevice\fP and \fBrtdev=\fP\fIdevice\fP +When ikeep is specified, XFS does not delete empty inode +clusters and keeps them around on disk. When noikeep is +specified, empty inode clusters are returned to the free +space pool. +.TP +.BR inode32 | inode64 +When inode32 is specified, it indicates that XFS limits +inode creation to locations which will not result in inode +numbers with more than 32 bits of significance. +.sp +When inode64 is specified, it indicates that XFS is allowed +to create inodes at any location in the filesystem, +including those which will result in inode numbers occupying +more than 32 bits of significance. +.sp +inode32 is provided for backwards compatibility with older +systems and applications, since 64 bits inode numbers might +cause problems for some applications that cannot handle +large inode numbers. If applications are in use which do +not handle inode numbers bigger than 32 bits, the inode32 +option should be specified. +.TP +.BR largeio | nolargeio +If "nolargeio" is specified, the optimal I/O reported in +st_blksize by stat(2) will be as small as possible to allow +user applications to avoid inefficient read/modify/write +I/O. This is typically the page size of the machine, as +this is the granularity of the page cache. +.sp +If "largeio" specified, a filesystem that was created with a +"swidth" specified will return the "swidth" value (in bytes) +in st_blksize. If the filesystem does not have a "swidth" +specified but does specify an "allocsize" then "allocsize" +(in bytes) will be returned instead. Otherwise the behaviour +is the same as if "nolargeio" was specified. +.TP +.B logbufs=value +Set the number of in-memory log buffers. Valid numbers +range from 2\(en8 inclusive. +.sp +The default value is 8 buffers. +.sp +If the memory cost of 8 log buffers is too high on small +systems, then it may be reduced at some cost to performance +on metadata intensive workloads. The logbsize option below +controls the size of each buffer and so is also relevant to +this case. +.TP +.B logbsize=value +Set the size of each in-memory log buffer. The size may be +specified in bytes, or in kibibytes (KiB) with a "k" suffix. +Valid sizes for version 1 and version 2 logs are 16384 (value=16k) +and 32768 (value=32k). Valid sizes for version 2 logs also +include 65536 (value=64k), 131072 (value=128k) and 262144 (value=256k). The +logbsize must be an integer multiple of the log +stripe unit configured at mkfs time. +.sp +The default value for version 1 logs is 32768, while the +default value for version 2 logs is MAX(32768, log_sunit). +.TP +.BR logdev=device and rtdev=device Use an external log (metadata journal) and/or real-time device. -An XFS filesystem has up to three parts: a data section, a log section, -and a real-time section. -The real-time section is optional, and the log section can be separate -from the data section or contained within it. -Refer to -.BR xfs (5). -.TP -.BI mtpt= mountpoint -Use with the -.B dmapi -option. The value specified here will be -included in the DMAPI mount event, and should be the path of -the actual mountpoint that is used. +An XFS filesystem has up to three parts: a data section, a log +section, and a real-time section. The real-time section is +optional, and the log section can be separate from the data +section or contained within it. .TP .B noalign -Data allocations will not be aligned at stripe unit boundaries. -.TP -.B noatime -Access timestamps are not updated when a file is read. +Data allocations will not be aligned at stripe unit +boundaries. This is only relevant to filesystems created +with non-zero data alignment parameters (sunit, swidth) by +mkfs. .TP .B norecovery The filesystem will be mounted without running log recovery. If the filesystem was not cleanly unmounted, it is likely to -be inconsistent when mounted in -.B norecovery -mode. +be inconsistent when mounted in "norecovery" mode. Some files or directories may not be accessible because of this. -Filesystems mounted -.B norecovery -must be mounted read-only or the mount will fail. +Filesystems mounted "norecovery" must be mounted read-only or +the mount will fail. .TP .B nouuid -Don't check for double mounted filesystems using the filesystem uuid. -This is useful to mount LVM snapshot volumes. -.TP -.B osyncisosync -Make O_SYNC writes implement true O_SYNC. WITHOUT this option, -Linux XFS behaves as if an -.B osyncisdsync -option is used, -which will make writes to files opened with the O_SYNC flag set -behave as if the O_DSYNC flag had been used instead. -This can result in better performance without compromising -data safety. -However if this option is not in effect, timestamp updates from -O_SYNC writes can be lost if the system crashes. -If timestamp updates are critical, use the -.B osyncisosync -option. +Don't check for double mounted file systems using the file +system uuid. This is useful to mount LVM snapshot volumes, +and often used in combination with "norecovery" for mounting +read-only snapshots. .TP -.BR uquota | usrquota | uqnoenforce | quota +.B noquota +Forcibly turns off all quota accounting and enforcement +within the filesystem. +.TP +.B uquota/usrquota/uqnoenforce/quota User disk quota accounting enabled, and limits (optionally) -enforced. Refer to -.BR xfs_quota (8) -for further details. +enforced. Refer to xfs_quota(8) for further details. .TP -.BR gquota | grpquota | gqnoenforce +.B gquota/grpquota/gqnoenforce Group disk quota accounting enabled and limits (optionally) -enforced. Refer to -.BR xfs_quota (8) -for further details. +enforced. Refer to xfs_quota(8) for further details. .TP -.BR pquota | prjquota | pqnoenforce +.B pquota/prjquota/pqnoenforce Project disk quota accounting enabled and limits (optionally) -enforced. Refer to -.BR xfs_quota (8) -for further details. +enforced. Refer to xfs_quota(8) for further details. .TP -\fBsunit=\fP\fIvalue\fP and \fBswidth=\fP\fIvalue\fP -Used to specify the stripe unit and width for a RAID device or a stripe -volume. -.I value -must be specified in 512-byte block units. -If this option is not specified and the filesystem was made on a stripe -volume or the stripe width or unit were specified for the RAID device at -mkfs time, then the mount system call will restore the value from the -superblock. -For filesystems that are made directly on RAID devices, these options can be -used to override the information in the superblock if the underlying disk -layout changes after the filesystem has been created. -The -.B swidth -option is required if the -.B sunit -option has been specified, -and must be a multiple of the -.B sunit -value. +.BR sunit=value " and " swidth=value +Used to specify the stripe unit and width for a RAID device +or a stripe volume. "value" must be specified in 512-byte +block units. These options are only relevant to filesystems +that were created with non-zero data alignment parameters. +.sp +The sunit and swidth parameters specified must be compatible +with the existing filesystem alignment characteristics. In +general, that means the only valid changes to sunit are +increasing it by a power-of-2 multiple. Valid swidth values +are any integer multiple of a valid sunit value. +.sp +Typically the only time these mount options are necessary if +after an underlying RAID device has had it's geometry +modified, such as adding a new disk to a RAID5 lun and +reshaping it. .TP .B swalloc Data allocations will be rounded up to stripe width boundaries when the current end of file is being extended and the file size is larger than the stripe width size. +.TP +.B wsync +When specified, all filesystem namespace operations are +executed synchronously. This ensures that when the namespace +operation (create, unlink, etc) completes, the change to the +namespace is on stable storage. This is useful in HA setups +where failover must not result in clients seeing +inconsistent namespace presentation during or after a +failover event. .SH "Mount options for xiafs" None. Although nothing is wrong with xiafs, it is not used much, @@ -2800,7 +2836,7 @@ One further possible type is a mount via the loop device. For example, the command .RS .sp -.B "mount /tmp/disk.img /mnt -t vfat -o loop=/dev/loop" +.B "mount /tmp/disk.img /mnt \-t vfat \-o loop=/dev/loop" .sp .RE will set up the loop device @@ -2816,7 +2852,7 @@ If no explicit loop device is mentioned will try to find some unused loop device and use that, for example .RS .sp -.B "mount /tmp/disk.img /mnt -o loop" +.B "mount /tmp/disk.img /mnt \-o loop" .sp .RE The mount command @@ -2827,7 +2863,7 @@ not specified or the filesystem is known for libblkid, for example: .sp .B "mount /tmp/disk.img /mnt" .sp -.B "mount -t ext3 /tmp/disk.img /mnt" +.B "mount \-t ext3 /tmp/disk.img /mnt" .sp .RE This type of mount knows about four options, namely @@ -2845,43 +2881,43 @@ will be freed by independently on .IR /etc/mtab . -You can also free a loop device by hand, using `losetup -d' or `umount -d`. +You can also free a loop device by hand, using `losetup \-d' or `umount \-d`. .SH RETURN CODES .B mount has the following return codes (the bits can be ORed): .TP -.BR 0 +.B 0 success .TP -.BR 1 +.B 1 incorrect invocation or permissions .TP -.BR 2 +.B 2 system error (out of memory, cannot fork, no more loop devices) .TP -.BR 4 +.B 4 internal .B mount bug .TP -.BR 8 +.B 8 user interrupt .TP -.BR 16 +.B 16 problems writing or locking /etc/mtab .TP -.BR 32 +.B 32 mount failure .TP -.BR 64 +.B 64 some mount succeeded .RE -The command mount -a returns 0 (all success), 32 (all failed) or 64 (some +The command mount \-a returns 0 (all success), 32 (all failed) or 64 (some failed, some success). -.SH NOTES +.SH "EXTERNAL HELPERS" The syntax of external mount helpers is: .RS @@ -2896,7 +2932,27 @@ The syntax of external mount helpers is: where the <type> is filesystem type and \-sfnvo options have same meaning like standard mount options. The \-t option is used for filesystems with subtypes -support (for example /sbin/mount.fuse -t fuse.sshfs). +support (for example /sbin/mount.fuse \-t fuse.sshfs). + +The command mount does not pass mount options +.BR unbindable , +.BR runbindable , +.BR private , +.BR rprivate , +.BR slave , +.BR rslave , +.BR shared , +.BR rshared , +.BR auto , +.BR noauto , +.BR comment , +.BR x-* , +.BR loop , +.B offset +and +.B sizelimit +to mount.<suffix> helpers. The all others options are +used in comma delimited list as argument for the option \-o. .SH FILES .TP 18n @@ -2906,7 +2962,7 @@ filesystem table .I /etc/mtab table of mounted filesystems .TP -.I /etc/mtab~ +.I /etc/mtab\s+3~\s0 lock file .TP .I /etc/mtab.tmp @@ -2953,7 +3009,7 @@ The may not be able to change mount parameters (all .IR ext2fs -specific parameters, except -.BR sb , +.BR sb , are changeable with a remount, for example, but you can't change .B gid or @@ -2962,19 +3018,19 @@ for the .IR fatfs ). .PP It is possible that files -.IR /etc/mtab +.I /etc/mtab and -.IR /proc/mounts +.I /proc/mounts don't match. The first file is based only on the mount command options, but the -content of the second file also depends on the kernel and others settings (e.g. +content of the second file also depends on the kernel and others settings (e.g.\& remote NFS server. In particular case the mount command may reports unreliable information about a NFS mount point and the /proc/mounts file usually contains more reliable information.) .PP -Checking files on NFS filesystem referenced by file descriptors (i.e. the -.BR fcntl +Checking files on NFS filesystem referenced by file descriptors (i.e.\& the +.B fcntl and -.BR ioctl +.B ioctl families of functions) may lead to inconsistent result due to the lack of consistency check in kernel even if noac is used. .PP diff --git a/sys-utils/mount.c b/sys-utils/mount.c index 8ff94611c..97365b168 100644 --- a/sys-utils/mount.c +++ b/sys-utils/mount.c @@ -163,7 +163,7 @@ static void print_all(struct libmnt_context *cxt, char *pattern, int show_label) free(xsrc); } - mnt_free_cache(cache); + mnt_unref_cache(cache); mnt_free_iter(itr); } @@ -197,12 +197,14 @@ static int mount_all(struct libmnt_context *cxt) if (mnt_context_is_verbose(cxt)) printf("%-25s: mount successfully forked\n", tgt); } else { - mk_exit_code(cxt, mntrc); /* to print warnings */ - - if (mnt_context_get_status(cxt)) { + if (mk_exit_code(cxt, mntrc) == MOUNT_EX_SUCCESS) { nsucc++; - if (mnt_context_is_verbose(cxt)) + /* Note that MOUNT_EX_SUCCESS return code does + * not mean that FS has been really mounted + * (e.g. nofail option) */ + if (mnt_context_get_status(cxt) + && mnt_context_is_verbose(cxt)) printf("%-25s: successfully mounted\n", tgt); } else nerrs++; @@ -429,6 +431,8 @@ try_readonly: warnx(_("you must specify the filesystem type")); return MOUNT_EX_USAGE; case -MNT_ERR_NOSOURCE: + if (uflags & MNT_MS_NOFAIL) + return MOUNT_EX_SUCCESS; if (src) warnx(_("can't find %s"), src); else @@ -641,6 +645,8 @@ static struct libmnt_table *append_fstab(struct libmnt_context *cxt, mnt_table_set_parser_errcb(fstab, table_parser_errcb); mnt_context_set_fstab(cxt, fstab); + + mnt_unref_table(fstab); /* reference is handled by @cxt now */ } if (mnt_table_parse_fstab(fstab, path)) @@ -682,6 +688,8 @@ static void sanitize_paths(struct libmnt_context *cxt) static void append_option(struct libmnt_context *cxt, const char *opt) { + if (opt && (*opt == '=' || *opt == '\'' || *opt == '\"' || isblank(*opt))) + errx(MOUNT_EX_USAGE, _("unsupported option format: %s"), opt); if (mnt_context_append_options(cxt, opt)) err(MOUNT_EX_SYSERR, _("failed to append option '%s'"), opt); } @@ -988,8 +996,7 @@ int main(int argc, char **argv) * make a connection between the fstab and the canonicalization * cache. */ - struct libmnt_cache *cache = mnt_context_get_cache(cxt); - mnt_table_set_cache(fstab, cache); + mnt_table_set_cache(fstab, mnt_context_get_cache(cxt)); } if (!mnt_context_get_source(cxt) && @@ -1027,25 +1034,42 @@ int main(int argc, char **argv) mnt_context_get_target(cxt))) { /* * B) mount -L|-U|--source|--target + * + * non-root may specify source *or* target, but not both */ if (mnt_context_is_restricted(cxt) && mnt_context_get_source(cxt) && mnt_context_get_target(cxt)) exit_non_root(NULL); - } else if (argc == 1) { + } else if (argc == 1 && (!mnt_context_get_source(cxt) || + !mnt_context_get_target(cxt))) { /* * C) mount [-L|-U|--source] <target> + * mount [--target <dir>] <source> * mount <source|target> * * non-root may specify source *or* target, but not both + * + * It does not matter for libmount if we set source or target + * here (the library is able to swap it), but it matters for + * sanitize_paths(). */ + int istag = mnt_tag_is_valid(argv[0]); + + if (istag && mnt_context_get_source(cxt)) + /* -L, -U or --source together with LABEL= or UUID= */ + errx(MOUNT_EX_USAGE, _("source specified more than once")); + else if (istag || mnt_context_get_target(cxt)) + mnt_context_set_source(cxt, argv[0]); + else + mnt_context_set_target(cxt, argv[0]); + if (mnt_context_is_restricted(cxt) && - mnt_context_get_source(cxt)) + mnt_context_get_source(cxt) && + mnt_context_get_target(cxt)) exit_non_root(NULL); - mnt_context_set_target(cxt, argv[0]); - } else if (argc == 2 && !mnt_context_get_source(cxt) && !mnt_context_get_target(cxt)) { /* @@ -1053,6 +1077,7 @@ int main(int argc, char **argv) */ if (mnt_context_is_restricted(cxt)) exit_non_root(NULL); + mnt_context_set_source(cxt, argv[0]); mnt_context_set_target(cxt, argv[1]); @@ -1077,7 +1102,6 @@ int main(int argc, char **argv) success_message(cxt); done: mnt_free_context(cxt); - mnt_free_table(fstab); return rc; } diff --git a/sys-utils/mountpoint.1 b/sys-utils/mountpoint.1 index 5eb415692..687462485 100644 --- a/sys-utils/mountpoint.1 +++ b/sys-utils/mountpoint.1 @@ -17,7 +17,7 @@ mountpoint \- see if a directory is a mountpoint checks if the directory is mentioned in the /proc/self/mountinfo file. .SH OPTIONS .IP "\fB\-h, \-\-help\fP" -Print help and exit. +Display help text and exit. .IP "\fB\-q, \-\-quiet\fP" Be quiet - don't print anything. .IP "\fB\-d, \-\-fs\-devno\fP" diff --git a/sys-utils/mountpoint.c b/sys-utils/mountpoint.c index c86e94b12..cba5f0125 100644 --- a/sys-utils/mountpoint.c +++ b/sys-utils/mountpoint.c @@ -83,6 +83,7 @@ static int dir_to_device(const char *spec, dev_t *dev) /* to canonicalize all necessary paths */ cache = mnt_new_cache(); mnt_table_set_cache(tb, cache); + mnt_unref_cache(cache); fs = mnt_table_find_target(tb, spec, MNT_ITER_BACKWARD); if (fs && mnt_fs_get_target(fs)) { @@ -90,8 +91,7 @@ static int dir_to_device(const char *spec, dev_t *dev) rc = 0; } - mnt_free_table(tb); - mnt_free_cache(cache); + mnt_unref_table(tb); return rc; } diff --git a/sys-utils/nsenter.1 b/sys-utils/nsenter.1 index 47895753b..3964ee5d7 100644 --- a/sys-utils/nsenter.1 +++ b/sys-utils/nsenter.1 @@ -1,44 +1,45 @@ -.TH NSENTER 1 "January 2013" "util-linux" "User Commands" +.TH NSENTER 1 "June 2013" "util-linux" "User Commands" .SH NAME nsenter \- run program with namespaces of other processes .SH SYNOPSIS .B nsenter .RI [ options ] -.RI [ program ] -.RI [ arguments ] +.RI [ program +.RI [ arguments ]] .SH DESCRIPTION Enters the namespaces of one or more other processes and then executes the specified program. Enterable namespaces are: .TP .B mount namespace -mounting and unmounting filesystems will not affect rest of the system +Mounting and unmounting filesystems will not affect the rest of the system .RB ( CLONE_\:NEWNS -flag), except for filesystems which are explicitly marked as shared (by mount ---make-\:shared). See /proc\:/self\:/mountinfo for the shared flag. +flag), except for filesystems which are explicitly marked as shared (with +\fBmount --make-\:shared\fP; see \fI/proc\:/self\:/mountinfo\fP for the +\fBshared\fP flag). .TP .B UTS namespace -setting hostname, domainname will not affect rest of the system +Setting hostname or domainname will not affect the rest of the system. .RB ( CLONE_\:NEWUTS -flag). +flag) .TP .B IPC namespace -process will have independent namespace for System V message queues, semaphore -sets and shared memory segments +The process will have an independent namespace for System V message queues, +semaphore sets and shared memory segments. .RB ( CLONE_\:NEWIPC -flag). +flag) .TP .B network namespace -process will have independent IPv4 and IPv6 stacks, IP routing tables, firewall -rules, the +The process will have independent IPv4 and IPv6 stacks, IP routing tables, +firewall rules, the .I /proc\:/net and .I /sys\:/class\:/net -directory trees, sockets etc. +directory trees, sockets, etc. .RB ( CLONE_\:NEWNET -flag). +flag) .TP .B PID namespace -children will have a set of PID to process mappings separate from the +Children will have a set of PID to process mappings separate from the .B nsenter process .RB ( CLONE_\:NEWPID @@ -46,23 +47,18 @@ flag). .B nsenter will fork by default if changing the PID namespace, so that the new program and its children share the same PID namespace and are visible to each other. -If \-\-no\-fork is used, the new program will be exec'ed without forking. +If \fB\-\-no\-fork\fP is used, the new program will be exec'ed without forking. .TP .B user namespace -process will have distinct set of UIDs, GIDs and capabilities +The process will have a distinct set of UIDs, GIDs and capabilities. .RB ( CLONE_\:NEWUSER -flag). +flag) .TP -See the -.BR clone (2) -for exact semantics of the flags. +See \fBclone\fP(2) for the exact semantics of the flags. .TP -If program is not given, run ``${SHELL}'' (default: /bin\:/sh). +If \fIprogram\fP is not given, then ``${SHELL}'' is run (default: /bin\:/sh). .SH OPTIONS -Argument with square brakets, such as [\fIfile\fR], means optional argument. -Command line syntax to specify optional argument \-\-mount=/path\:/to\:/file. -Please notice the equals sign. .TP \fB\-t\fR, \fB\-\-target\fR \fIpid\fP Specify a target process to get contexts from. The paths to the contexts @@ -99,56 +95,62 @@ the working directory respectively .PD .RE .TP -\fB\-m\fR, \fB\-\-mount\fR [\fIfile\fR] -Enter the mount namespace. If no file is specified enter the mount namespace -of the target process. If file is specified enter the mount namespace +\fB\-m\fR, \fB\-\-mount\fR[=\fIfile\fR] +Enter the mount namespace. If no file is specified, enter the mount namespace +of the target process. If file is specified, enter the mount namespace specified by file. .TP -\fB\-u\fR, \fB\-\-uts\fR [\fIfile\fR] -Enter the UTS namespace. If no file is specified enter the UTS namespace of -the target process. If file is specified enter the UTS namespace specified by +\fB\-u\fR, \fB\-\-uts\fR[=\fIfile\fR] +Enter the UTS namespace. If no file is specified, enter the UTS namespace of +the target process. If file is specified, enter the UTS namespace specified by file. .TP -\fB\-i\fR, \fB\-\-ipc\fR [\fIfile\fR] -Enter the IPC namespace. If no file is specified enter the IPC namespace of -the target process. If file is specified enter the IPC namespace specified by +\fB\-i\fR, \fB\-\-ipc\fR[=\fIfile\fR] +Enter the IPC namespace. If no file is specified, enter the IPC namespace of +the target process. If file is specified, enter the IPC namespace specified by file. .TP -\fB\-n\fR, \fB\-\-net\fR [\fIfile\fR] -Enter the network namespace. If no file is specified enter the network -namespace of the target process. If file is specified enter the network +\fB\-n\fR, \fB\-\-net\fR[=\fIfile\fR] +Enter the network namespace. If no file is specified, enter the network +namespace of the target process. If file is specified, enter the network namespace specified by file. .TP -\fB\-p\fR, \fB\-\-pid\fR [\fIfile\fR] -Enter the PID namespace. If no file is specified enter the PID namespace of -the target process. If file is specified enter the PID namespace specified by +\fB\-p\fR, \fB\-\-pid\fR[=\fIfile\fR] +Enter the PID namespace. If no file is specified, enter the PID namespace of +the target process. If file is specified, enter the PID namespace specified by file. .TP -\fB\-U\fR, \fB\-\-user\fR [\fIfile\fR] -Enter the user namespace. If no file is specified enter the user namespace of -the target process. If file is specified enter the user namespace specified by -file. +\fB\-U\fR, \fB\-\-user\fR[=\fIfile\fR] +Enter the user namespace. If no file is specified, enter the user namespace of +the target process. If file is specified, enter the user namespace specified by +file. See also the \fB\-\-setuid\fR and \fB\-\-setgid\fR options. +.TP +\fB\-G\fR, \fB\-\-setgid\fR \fIgid\fR +Set the group ID which will be used in the entered user namespace. +.TP +\fB\-S\fR, \fB\-\-setuid\fR \fIuid\fR +Set the user ID which will be used in the entered user namespace. .TP -\fB\-r\fR, \fB\-\-root\fR [\fIdirectory\fR] -Set the root directory. If no directory is specified set the root directory to -the root directory of the target process. If directory is specified set the +\fB\-r\fR, \fB\-\-root\fR[=\fIdirectory\fR] +Set the root directory. If no directory is specified, set the root directory to +the root directory of the target process. If directory is specified, set the root directory to the specified directory. .TP -\fB\-w\fR, \fB\-\-wd\fR [\fIdirectory\fR] -Set the working directory. If no directory is specified set the working +\fB\-w\fR, \fB\-\-wd\fR[=\fIdirectory\fR] +Set the working directory. If no directory is specified, set the working directory to the working directory of the target process. If directory is -specified set the working directory to the specified directory. +specified, set the working directory to the specified directory. .TP \fB\-F\fR, \fB\-\-no-fork\fR -Do not fork before exec'ing the specified program. By default when entering a -pid namespace enter calls fork before calling exec so that the children will be -in the newly entered pid namespace. +Do not fork before exec'ing the specified program. By default, when entering a +PID namespace, \fBnsenter\fP calls \fBfork\fP before calling \fBexec\fP so that +any children will also be in the newly entered PID namespace. .TP \fB\-V\fR, \fB\-\-version\fR Display version information and exit. .TP \fB\-h\fR, \fB\-\-help\fR -Print a help message. +Display help text and exit. .SH SEE ALSO .BR setns (2), .BR clone (2) diff --git a/sys-utils/nsenter.c b/sys-utils/nsenter.c index 106349c7e..dfb1a3b51 100644 --- a/sys-utils/nsenter.c +++ b/sys-utils/nsenter.c @@ -28,6 +28,7 @@ #include <assert.h> #include <sys/types.h> #include <sys/wait.h> +#include <grp.h> #include "strutils.h" #include "nls.h" @@ -72,6 +73,8 @@ static void usage(int status) fputs(_(" -n, --net [=<file>] enter network namespace\n"), out); fputs(_(" -p, --pid [=<file>] enter pid namespace\n"), out); fputs(_(" -U, --user [=<file>] enter user namespace\n"), out); + fputs(_(" -S, --setuid <uid> set uid in user namespace\n"), out); + fputs(_(" -G, --setgid <gid> set gid in user namespace\n"), out); fputs(_(" -r, --root [=<dir>] set the root directory\n"), out); fputs(_(" -w, --wd [=<dir>] set the working directory\n"), out); fputs(_(" -F, --no-fork do not fork before exec'ing <program>\n"), out); @@ -169,6 +172,8 @@ int main(int argc, char *argv[]) { "net", optional_argument, NULL, 'n' }, { "pid", optional_argument, NULL, 'p' }, { "user", optional_argument, NULL, 'U' }, + { "setuid", required_argument, NULL, 'S' }, + { "setgid", required_argument, NULL, 'G' }, { "root", optional_argument, NULL, 'r' }, { "wd", optional_argument, NULL, 'w' }, { "no-fork", no_argument, NULL, 'F' }, @@ -179,14 +184,16 @@ int main(int argc, char *argv[]) int c, namespaces = 0; bool do_rd = false, do_wd = false; int do_fork = -1; /* unknown yet */ + uid_t uid = 0; + gid_t gid = 0; - setlocale(LC_MESSAGES, ""); + setlocale(LC_ALL, ""); bindtextdomain(PACKAGE, LOCALEDIR); textdomain(PACKAGE); atexit(close_stdout); while ((c = - getopt_long(argc, argv, "hVt:m::u::i::n::p::U::r::w::F", + getopt_long(argc, argv, "hVt:m::u::i::n::p::U::S:G:r::w::F", longopts, NULL)) != -1) { switch (c) { case 'h': @@ -234,6 +241,12 @@ int main(int argc, char *argv[]) else namespaces |= CLONE_NEWUSER; break; + case 'S': + uid = strtoul_or_err(optarg, _("failed to parse uid")); + break; + case 'G': + gid = strtoul_or_err(optarg, _("failed to parse gid")); + break; case 'F': do_fork = 0; break; @@ -315,6 +328,15 @@ int main(int argc, char *argv[]) if (do_fork == 1) continue_as_child(); + if (namespaces & CLONE_NEWUSER) { + if (setgroups(0, NULL)) /* drop supplementary groups */ + err(EXIT_FAILURE, _("setgroups failed")); + if (setgid(gid) < 0) + err(EXIT_FAILURE, _("setgid failed")); + if (setuid(uid) < 0) + err(EXIT_FAILURE, _("setuid failed")); + } + if (optind < argc) { execvp(argv[optind], argv + optind); err(EXIT_FAILURE, _("failed to execute %s"), argv[optind]); diff --git a/sys-utils/pivot_root.8 b/sys-utils/pivot_root.8 index e2609728a..525eb438e 100644 --- a/sys-utils/pivot_root.8 +++ b/sys-utils/pivot_root.8 @@ -35,10 +35,10 @@ shell's root or not). .SH OPTIONS .TP \fB\-V\fR, \fB\-\-version\fR -Output version information and exit. +Display version information and exit. .TP \fB\-h\fR, \fB\-\-help\fR -Display help and exit. +Display help text and exit. .SH EXAMPLES Change the root file system to /dev/hda1 from an interactive shell: .sp diff --git a/sys-utils/prlimit.1 b/sys-utils/prlimit.1 index 262319953..95bc87607 100644 --- a/sys-utils/prlimit.1 +++ b/sys-utils/prlimit.1 @@ -47,7 +47,7 @@ Specify the process id, if none is given, it will use the running process. Define the output columns to use. If no output arrangement is specified, then a default set is used. Use \fB\-\-help\fP to get list of all supported columns. .IP "\fB\-V, \-\-version\fP" -Output version information and exit. +Display version information and exit. .IP "\fB\-\-verbose\fP" Verbose mode. .IP "\fB\-\-raw\fP" @@ -55,7 +55,7 @@ Use the raw output format. .IP "\fB\-\-noheadings\fP" Do not print a header line. .IP "\fB\-h, \-\-help\fP" -Print a help text and exit. +Display help text and exit. .SH RESOURCE OPTIONS .IP "\fB\-c, \-\-core\fP[=limits]" diff --git a/sys-utils/prlimit.c b/sys-utils/prlimit.c index 9098e4976..5bb99d1ec 100644 --- a/sys-utils/prlimit.c +++ b/sys-utils/prlimit.c @@ -144,8 +144,6 @@ static int prlimit(pid_t p, int resource, } #endif -static void rem_prlim(struct prlimit *lim); - static void __attribute__ ((__noreturn__)) usage(FILE * out) { size_t i; @@ -228,20 +226,20 @@ static void add_tt_line(struct tt *tt, struct prlimit *l) switch (get_column_id(i)) { case COL_RES: - xasprintf(&str, "%s", l->desc->name); + str = xstrdup(l->desc->name); break; case COL_HELP: - xasprintf(&str, "%s", l->desc->help); + str = xstrdup(l->desc->help); break; case COL_SOFT: if (l->rlim.rlim_cur == RLIM_INFINITY) - xasprintf(&str, "%s", "unlimited"); + str = xstrdup(_("unlimited")); else xasprintf(&str, "%llu", (unsigned long long) l->rlim.rlim_cur); break; case COL_HARD: if (l->rlim.rlim_max == RLIM_INFINITY) - xasprintf(&str, "%s", "unlimited"); + str = xstrdup(_("unlimited")); else xasprintf(&str, "%llu", (unsigned long long) l->rlim.rlim_max); break; @@ -273,13 +271,21 @@ static int column_name_to_id(const char *name, size_t namesz) return -1; } +static void rem_prlim(struct prlimit *lim) +{ + if (!lim) + return; + list_del(&lim->lims); + free(lim); +} + static int show_limits(struct list_head *lims, int tt_flags) { int i; struct list_head *p, *pnext; struct tt *tt; - tt = tt_new_table(tt_flags); + tt = tt_new_table(tt_flags | TT_FL_FREEDATA); if (!tt) { warn(_("failed to initialize output table")); return -1; @@ -372,8 +378,6 @@ static void do_prlimit(struct list_head *lims) } } - - static int get_range(char *str, rlim_t *soft, rlim_t *hard, int *found) { char *end = NULL; @@ -466,14 +470,6 @@ static int add_prlim(char *ops, struct list_head *lims, size_t id) return 0; } -static void rem_prlim(struct prlimit *lim) -{ - if (!lim) - return; - list_del(&lim->lims); - free(lim); -} - int main(int argc, char **argv) { int opt, tt_flags = 0; diff --git a/sys-utils/readprofile.8 b/sys-utils/readprofile.8 index ec1c22634..f4e4dd53c 100644 --- a/sys-utils/readprofile.8 +++ b/sys-utils/readprofile.8 @@ -86,12 +86,10 @@ the second is the name of the function, the third is the number of clock ticks and the last is the normalized load. .TP \fB\-V\fR, \fB\-\-version\fR -This makes -.B readprofile -print its version number and exit. +Display version information and exit. .TP \fB\-h\fR, \fB\-\-help\fR -Display help and exit. +Display help text and exit. .SH EXAMPLES Browse the profiling buffer ordering by clock ticks: .nf diff --git a/sys-utils/renice.1 b/sys-utils/renice.1 index 705138cf6..8a332e140 100644 --- a/sys-utils/renice.1 +++ b/sys-utils/renice.1 @@ -73,27 +73,23 @@ Force the succeeding arguments to be interpreted as process IDs (the default). .TP .BR \-h , " \-\-help" -Display a help text. +Display help text and exit. .TP .BR \-V , " \-\-version" -Display version information. +Display version information and exit. .SH EXAMPLES The following command would change the priority of the processes with PIDs 987 and 32, plus all processes owned by the users daemon and root: .TP .B " renice" +1 987 -u daemon root -p 32 .SH NOTES -Users other than the super-user may only alter the priority of processes they +Users other than the superuser may only alter the priority of processes they own, and can only monotonically increase their ``nice value'' (for security -reasons) within the range 0 to -.BR PRIO_MAX \ (20), +reasons) within the range 0 to 19, unless a nice resource limit is set (Linux 2.6.12 and higher). The -super-user may alter the priority of any process and set the priority to any -value in the range -.BR PRIO_MIN \ (\-20) -to -.BR PRIO_MAX . -Useful priorities are: 20 (the affected processes will run only when nothing +superuser may alter the priority of any process and set the priority to any +value in the range \-20 to 19. +Useful priorities are: 19 (the affected processes will run only when nothing else in the system wants to), 0 (the ``base'' scheduling priority), anything negative (to make things go very fast). .SH FILES @@ -104,7 +100,7 @@ to map user names to user IDs .BR getpriority (2), .BR setpriority (2) .SH BUGS -Non super-users can not increase scheduling priorities of their own processes, +Non-superusers cannot increase scheduling priorities of their own processes, even if they were the ones that decreased the priorities in the first place. .PP The Linux kernel (at least version 2.0.0) and linux libc (at least version diff --git a/sys-utils/renice.c b/sys-utils/renice.c index 50b16422b..c0378e1a5 100644 --- a/sys-utils/renice.c +++ b/sys-utils/renice.c @@ -138,6 +138,7 @@ main(int argc, char **argv) if (pwd == NULL) { warnx(_("unknown user %s"), *argv); + errs = 1; continue; } who = pwd->pw_uid; @@ -145,10 +146,11 @@ main(int argc, char **argv) who = strtol(*argv, &endptr, 10); if (who < 0 || *endptr) { warnx(_("bad value %s"), *argv); + errs = 1; continue; } } - errs += donice(which, who, prio); + errs |= donice(which, who, prio); } return errs != 0 ? EXIT_FAILURE : EXIT_SUCCESS; } diff --git a/sys-utils/rtcwake.8 b/sys-utils/rtcwake.8 index def3faaef..02ab5cc15 100644 --- a/sys-utils/rtcwake.8 +++ b/sys-utils/rtcwake.8 @@ -47,14 +47,14 @@ appropriate for such suspend modes. Be verbose. .TP \fB-h\fP | \fB--help\fP -Display a short help message that shows how to use the program. +Display help text and exit. .TP \fB-V\fP | \fB--version\fP -Displays version information and exists. +Display version information and exit. .TP \fB-n\fP | \fB--dry-run\fP -This option does everything but actually setup alarm, suspend system or wait -for the alarm. +This option does everything apart from actually setting up the alarm, +suspending the system, or waiting for the alarm. .TP \fB-A\fP | \fB--adjfile\fP \fIfile\fP Specifies an alternative path to the adjust file. diff --git a/sys-utils/setarch.8 b/sys-utils/setarch.8 index ea885c4ba..f8c748fbe 100644 --- a/sys-utils/setarch.8 +++ b/sys-utils/setarch.8 @@ -27,10 +27,11 @@ is /bin/sh. Be verbose. .TP \fB\-h\fR, \fB\-\-help\fR -Display help (it is also displayed when setarch takes no arguments). +Display help text and exit. +(The help text is also displayed when \fBsetarch\fR receives no arguments.) .TP \fB\-V\fR, \fB\-\-version\fR -Output version information and exit. +Display version information and exit. .TP \fB\-\-uname\-2.6\fR Causes the program to see a kernel version number beginning with 2.6. diff --git a/sys-utils/setarch.c b/sys-utils/setarch.c index 051cbefcd..957bb2b03 100644 --- a/sys-utils/setarch.c +++ b/sys-utils/setarch.c @@ -53,37 +53,37 @@ enum { } while(0) -#if !HAVE_DECL_UNAME26 +#ifndef UNAME26 # define UNAME26 0x0020000 #endif -#if !HAVE_DECL_ADDR_NO_RANDOMIZE +#ifndef ADDR_NO_RANDOMIZE # define ADDR_NO_RANDOMIZE 0x0040000 #endif -#if !HAVE_DECL_FDPIC_FUNCPTRS +#ifndef FDPIC_FUNCPTRS # define FDPIC_FUNCPTRS 0x0080000 #endif -#if !HAVE_DECL_MMAP_PAGE_ZERO +#ifndef MMAP_PAGE_ZERO # define MMAP_PAGE_ZERO 0x0100000 #endif -#if !HAVE_DECL_ADDR_COMPAT_LAYOUT +#ifndef ADDR_COMPAT_LAYOUT # define ADDR_COMPAT_LAYOUT 0x0200000 #endif -#if !HAVE_DECL_READ_IMPLIES_EXEC +#ifndef READ_IMPLIES_EXEC # define READ_IMPLIES_EXEC 0x0400000 #endif -#if !HAVE_DECL_ADDR_LIMIT_32BIT +#ifndef ADDR_LIMIT_32BIT # define ADDR_LIMIT_32BIT 0x0800000 #endif -#if !HAVE_DECL_SHORT_INODE +#ifndef SHORT_INODE # define SHORT_INODE 0x1000000 #endif -#if !HAVE_DECL_WHOLE_SECONDS +#ifndef WHOLE_SECONDS # define WHOLE_SECONDS 0x2000000 #endif -#if !HAVE_DECL_STICKY_TIMEOUTS +#ifndef STICKY_TIMEOUTS # define STICKY_TIMEOUTS 0x4000000 #endif -#if !HAVE_DECL_ADDR_LIMIT_3GB +#ifndef ADDR_LIMIT_3GB # define ADDR_LIMIT_3GB 0x8000000 #endif @@ -149,11 +149,17 @@ set_arch(const char *pers, unsigned long options, int list) {PER_LINUX32, "linux32", NULL}, {PER_LINUX, "linux64", NULL}, #if defined(__powerpc__) || defined(__powerpc64__) +#ifdef __BIG_ENDIAN__ {PER_LINUX32, "ppc32", "ppc"}, {PER_LINUX32, "ppc", "ppc"}, {PER_LINUX, "ppc64", "ppc64"}, {PER_LINUX, "ppc64pseries", "ppc64"}, {PER_LINUX, "ppc64iseries", "ppc64"}, +#else + {PER_LINUX32, "ppc32le", "ppcle"}, + {PER_LINUX32, "ppcle", "ppcle"}, + {PER_LINUX, "ppc64le", "ppc64le"}, +#endif #endif #if defined(__x86_64__) || defined(__i386__) || defined(__ia64__) {PER_LINUX32, "i386", "i386"}, diff --git a/sys-utils/setpriv.1 b/sys-utils/setpriv.1 index b992b4170..62467ad6b 100644 --- a/sys-utils/setpriv.1 +++ b/sys-utils/setpriv.1 @@ -55,7 +55,8 @@ inheritable set, you are likely to become confused. Do not do that. Lists all known capabilities. Must be specified alone. .TP \fB\-\-ruid\fR \fIuid\fR, \fB\-\-euid\fR \fIuid\fR, \fB\-\-reuid\fR \fIuid\fR -Sets the real, effective, or both \fIuid\fRs. +Sets the real, effective, or both \fIuid\fRs. The uid argument can be +given as textual login name. .IP Setting .I uid @@ -68,7 +69,8 @@ something like: \-\-reuid=1000 \-\-\:regid=1000 \-\-\:caps=\-\:all .TP \fB\-\-rgid\fR \fIgid\fR, \fB\-\-egid\fR \fIgid\fR, \fB\-\-regid\fR \fIgid\fR -Sets the real, effective, or both \fIgid\fRs. +Sets the real, effective, or both \fIgid\fRs. The gid argument can be +given as textual group name. .IP For safety, you must specify one of \-\-\:keep\-\:groups, \-\-\:clear\-\:groups, or \-\-\:groups if you set any primary @@ -121,7 +123,7 @@ to fail at AppArmor's whim. Display version information and exit. .TP \fB\-h\fR, \fB\-\-help\fR -Display help and exit. +Display help text and exit. .SH NOTES If applying any specified option fails, .I program diff --git a/sys-utils/setpriv.c b/sys-utils/setpriv.c index a547fd7ca..7bea62649 100644 --- a/sys-utils/setpriv.c +++ b/sys-utils/setpriv.c @@ -23,10 +23,12 @@ #include <getopt.h> #include <grp.h> #include <linux/securebits.h> +#include <pwd.h> #include <stdarg.h> #include <stdio.h> #include <stdlib.h> #include <sys/prctl.h> +#include <sys/types.h> #include <unistd.h> #include "c.h" @@ -104,8 +106,8 @@ static void __attribute__((__noreturn__)) usage(FILE *out) fputs(_(" --keep-groups keep supplementary groups\n"), out); fputs(_(" --groups <group,...> set supplementary groups\n"), out); fputs(_(" --securebits <bits> set securebits\n"), out); - fputs(_(" --selinux-label <label> set SELinux label (requires process:transition)\n"), out); - fputs(_(" --apparmor-profile <pr> set AppArmor profile (requires onexec permission)\n"), out); + fputs(_(" --selinux-label <label> set SELinux label\n"), out); + fputs(_(" --apparmor-profile <pr> set AppArmor profile\n"), out); fputs(USAGE_SEPARATOR, out); fputs(USAGE_HELP, out); fputs(USAGE_VERSION, out); @@ -228,7 +230,7 @@ static void dump_label(const char *name) close(fd); if (len < 0) { errno = e; - warn(_("read failed: %s"), name); + warn(_("cannot read %s"), name); return; } if (sizeof(buf) - 1 <= (size_t)len) { @@ -524,7 +526,9 @@ static void do_selinux_label(const char *label) err(SETPRIV_EXIT_PRIVERR, _("write failed: %s"), _PATH_PROC_ATTR_EXEC); - close(fd); + if (close_fd(fd) != 0) + err(SETPRIV_EXIT_PRIVERR, + _("write failed: %s"), _PATH_PROC_ATTR_EXEC); } static void do_apparmor_profile(const char *label) @@ -534,17 +538,40 @@ static void do_apparmor_profile(const char *label) if (access(_PATH_SYS_APPARMOR, F_OK) != 0) errx(SETPRIV_EXIT_PRIVERR, _("AppArmor is not running")); - f = fopen(_PATH_PROC_ATTR_EXEC, "wx"); + f = fopen(_PATH_PROC_ATTR_EXEC, "r+"); if (!f) err(SETPRIV_EXIT_PRIVERR, _("cannot open %s"), _PATH_PROC_ATTR_EXEC); - if (fprintf(f, "changeprofile %s", label) < 0 || fflush(f) != 0 - || fclose(f) != 0) + fprintf(f, "exec %s", label); + + if (close_stream(f) != 0) err(SETPRIV_EXIT_PRIVERR, _("write failed: %s"), _PATH_PROC_ATTR_EXEC); } +static uid_t get_user(const char *s, const char *err) +{ + struct passwd *pw; + long tmp; + pw = getpwnam(s); + if (pw) + return pw->pw_uid; + tmp = strtol_or_err(s, err); + return tmp; +} + +static gid_t get_group(const char *s, const char *err) +{ + struct group *gr; + long tmp; + gr = getgrnam(s); + if (gr) + return gr->gr_gid; + tmp = strtol_or_err(s, err); + return tmp; +} + int main(int argc, char **argv) { enum { @@ -603,7 +630,7 @@ int main(int argc, char **argv) int total_opts = 0; int list_caps = 0; - setlocale(LC_MESSAGES, ""); + setlocale(LC_ALL, ""); bindtextdomain(PACKAGE, LOCALEDIR); textdomain(PACKAGE); atexit(close_stdout); @@ -627,43 +654,37 @@ int main(int argc, char **argv) if (opts.have_ruid) errx(EXIT_FAILURE, _("duplicate ruid")); opts.have_ruid = 1; - opts.ruid = strtol_or_err(optarg, - _("failed to parse ruid")); + opts.ruid = get_user(optarg, _("failed to parse ruid")); break; case EUID: if (opts.have_euid) errx(EXIT_FAILURE, _("duplicate euid")); opts.have_euid = 1; - opts.euid = strtol_or_err(optarg, - _("failed to parse euid")); + opts.euid = get_user(optarg, _("failed to parse euid")); break; case REUID: if (opts.have_ruid || opts.have_euid) errx(EXIT_FAILURE, _("duplicate ruid or euid")); opts.have_ruid = opts.have_euid = 1; - opts.ruid = opts.euid = strtol_or_err(optarg, - _("failed to parse reuid")); + opts.ruid = opts.euid = get_user(optarg, _("failed to parse reuid")); break; case RGID: if (opts.have_rgid) errx(EXIT_FAILURE, _("duplicate rgid")); opts.have_rgid = 1; - opts.rgid = strtol_or_err(optarg, - _("failed to parse rgid")); + opts.rgid = get_group(optarg, _("failed to parse rgid")); break; case EGID: if (opts.have_egid) errx(EXIT_FAILURE, _("duplicate egid")); opts.have_egid = 1; - opts.egid = strtol_or_err(optarg, - _("failed to parse egid")); + opts.egid = get_group(optarg, _("failed to parse egid")); break; case REGID: if (opts.have_rgid || opts.have_egid) errx(EXIT_FAILURE, _("duplicate rgid or egid")); opts.have_rgid = opts.have_egid = 1; - opts.rgid = opts.egid = strtol_or_err(optarg, - _("failed to parse regid")); + opts.rgid = opts.egid = get_group(optarg, _("failed to parse regid")); break; case CLEAR_GROUPS: if (opts.clear_groups) @@ -793,7 +814,7 @@ int main(int argc, char **argv) if (opts.have_securebits) if (prctl(PR_SET_SECUREBITS, opts.securebits, 0, 0, 0) != 0) - err(SETPRIV_EXIT_PRIVERR, _("set procecess securebits failed")); + err(SETPRIV_EXIT_PRIVERR, _("set process securebits failed")); if (opts.bounding_set) { do_caps(CAPNG_BOUNDING_SET, opts.bounding_set); diff --git a/sys-utils/setsid.1 b/sys-utils/setsid.1 index eff794858..da8d648e7 100644 --- a/sys-utils/setsid.1 +++ b/sys-utils/setsid.1 @@ -15,6 +15,11 @@ runs a program in a new session. .TP \fB\-c\fP, \fB\-\-ctty\fP Set the controlling terminal to the current one. +.TP +\fB\-w\fP, \fB\-\-wait\fP +Wait the execution of the program to end, and return the exit value of +the child as return value of the +.BR setsid . .SH "SEE ALSO" .BR setsid (2) .SH AUTHOR diff --git a/sys-utils/setsid.c b/sys-utils/setsid.c index b3e6ebfaf..782de82b5 100644 --- a/sys-utils/setsid.c +++ b/sys-utils/setsid.c @@ -9,6 +9,8 @@ * 2001-01-18 John Fremlin <vii@penguinpowered.com> * - fork in case we are process group leader * + * 2008-08-20 Daniel Kahn Gillmor <dkg@fifthhorseman.net> + * - if forked, wait on child process and emit its return code. */ #include <getopt.h> @@ -16,6 +18,8 @@ #include <stdlib.h> #include <unistd.h> #include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/wait.h> #include "c.h" #include "nls.h" @@ -29,8 +33,8 @@ static void __attribute__ ((__noreturn__)) usage(FILE * out) program_invocation_short_name); fputs(USAGE_OPTIONS, out); - fputs(_(" -c, --ctty set the controlling terminal to the current one\n"), - out); + fputs(_(" -c, --ctty set the controlling terminal to the current one\n"), out); + fputs(_(" -w, --wait wait program to exit, and use the same return\n"), out); fputs(USAGE_HELP, out); fputs(USAGE_VERSION, out); @@ -43,9 +47,12 @@ int main(int argc, char **argv) { int ch; int ctty = 0; + pid_t pid; + int status = 0; static const struct option longopts[] = { {"ctty", no_argument, NULL, 'c'}, + {"wait", no_argument, NULL, 'w'}, {"version", no_argument, NULL, 'V'}, {"help", no_argument, NULL, 'h'}, {NULL, 0, NULL, 0} @@ -56,7 +63,7 @@ int main(int argc, char **argv) textdomain(PACKAGE); atexit(close_stdout); - while ((ch = getopt_long(argc, argv, "+Vhc", longopts, NULL)) != -1) + while ((ch = getopt_long(argc, argv, "+Vhcw", longopts, NULL)) != -1) switch (ch) { case 'V': printf(UTIL_LINUX_VERSION); @@ -64,6 +71,9 @@ int main(int argc, char **argv) case 'c': ctty=1; break; + case 'w': + status = 1; + break; case 'h': usage(stdout); default: @@ -74,7 +84,8 @@ int main(int argc, char **argv) usage(stderr); if (getpgrp() == getpid()) { - switch (fork()) { + pid = fork(); + switch (pid) { case -1: err(EXIT_FAILURE, _("fork")); case 0: @@ -82,7 +93,13 @@ int main(int argc, char **argv) break; default: /* parent */ - return 0; + if (!status) + return EXIT_SUCCESS; + if (wait(&status) != pid) + err(EXIT_FAILURE, "wait"); + if (WIFEXITED(status)) + return WEXITSTATUS(status); + err(status, _("child %d did not exit normally"), pid); } } if (setsid() < 0) @@ -91,7 +108,7 @@ int main(int argc, char **argv) if (ctty) { if (ioctl(STDIN_FILENO, TIOCSCTTY, 1)) - warn(_("failed to set the controlling terminal")); + err(EXIT_FAILURE, _("failed to set the controlling terminal")); } execvp(argv[optind], argv + optind); err(EXIT_FAILURE, _("failed to execute %s"), argv[optind]); diff --git a/sys-utils/swapoff.c b/sys-utils/swapoff.c index 0bd85ac9c..182ce9592 100644 --- a/sys-utils/swapoff.c +++ b/sys-utils/swapoff.c @@ -190,7 +190,7 @@ int main(int argc, char *argv[]) status |= swapoff_all(); free_tables(); - mnt_free_cache(mntcache); + mnt_unref_cache(mntcache); return status; } diff --git a/sys-utils/swapon-common.c b/sys-utils/swapon-common.c index 5c95ef342..6dd7bacb1 100644 --- a/sys-utils/swapon-common.c +++ b/sys-utils/swapon-common.c @@ -41,8 +41,8 @@ struct libmnt_table *get_swaps(void) void free_tables(void) { - mnt_free_table(swaps); - mnt_free_table(fstab); + mnt_unref_table(swaps); + mnt_unref_table(fstab); } int match_swap(struct libmnt_fs *fs, void *data __attribute__((unused))) diff --git a/sys-utils/swapon.8 b/sys-utils/swapon.8 index 385bf5aea..f30dd9bdb 100644 --- a/sys-utils/swapon.8 +++ b/sys-utils/swapon.8 @@ -42,41 +42,13 @@ .SH NAME swapon, swapoff \- enable/disable devices and files for paging and swapping .SH SYNOPSIS -Get info: -.br -.in +5 -.B swapon \-s -.RB [ \-h ] -.RB [ \-V ] -.sp -.in -5 -Enable/disable: -.br -.in +5 .B swapon -.RB [ \-d ] -.RB [ \-f ] -.RB [ \-p -.IR priority ] -.RB [ \-v ] -.IR specialfile ... +[ options ] +.RI [ specialfile... ] .br .B swapoff -.RB [ \-v ] -.IR specialfile ... -.sp -.in -5 -Enable/disable all: -.br -.in +5 -.B swapon \-a -.RB [ \-e ] -.RB [ \-f ] -.RB [ \-v ] -.br -.B swapoff \-a -.RB [ \-v ] -.in -5 +.RB [ \-va ] +.RI [ specialfile... ] .SH DESCRIPTION .B swapon is used to specify devices on which paging and swapping are to take place. @@ -112,15 +84,25 @@ All devices marked as ``swap'' in are made available, except for those with the ``noauto'' option. Devices that are already being used as swap are silently skipped. .TP -.B "\-d, \-\-discard" -Discard freed swap pages before they are reused, if the swap -device supports the discard or trim operation. This may improve -performance on some Solid State Devices, but often it does not. +.B "\-d, \-\-discard\fR [=\fIpolicy\fR]" +Enable swap discards, if the swap backing device supports the discard or +trim operation. This may improve performance on some Solid State Devices, +but often it does not. The option allows one to select between two +available swap discard policies: +.BI \-\-discard=once +to perform a single-time discard operation for the whole swap area at swapon; +or +.BI \-\-discard=pages +to discard freed swap pages before they are reused, while swapping. +If no policy is selected, the default behavior is to enable both discard types. The .I /etc/fstab -mount option -.BI discard -may be also used to enable discard flag. +mount options +.BI discard, +.BI discard=once, +or +.BI discard=pages +may be also used to enable discard flags. .TP .B "\-e, \-\-ifexists" Silently skip devices that do not exist. @@ -138,7 +120,7 @@ match that of the current running kernel. initializes the whole device and does not check for bad blocks. .TP .B \-h, \-\-help -Provide help. +Display help text and exit. .TP .B "\-L \fIlabel\fP" Use the partition that has the specified @@ -165,7 +147,7 @@ When priority is not defined it defaults to \-1. Display swap usage summary by device. Equivalent to "cat /proc/swaps". Not available before Linux 2.1.25. .TP -\fB\-\-show\fR [\fIcolumn,column\fR] +\fB\-\-show\fR [\fIcolumn\fR, ...] Display definable device table similar to .B \-\-summary output. See \-\-help output for @@ -194,7 +176,7 @@ Use the partition that has the specified Be verbose. .TP .B "\-V, \-\-version" -Display version. +Display version information and exit. .SH NOTES You should not use .B swapon diff --git a/sys-utils/swapon.c b/sys-utils/swapon.c index d5b7e3789..1691df6d0 100644 --- a/sys-utils/swapon.c +++ b/sys-utils/swapon.c @@ -34,9 +34,20 @@ #endif #ifndef SWAP_FLAG_DISCARD -# define SWAP_FLAG_DISCARD 0x10000 /* discard swap cluster after use */ +# define SWAP_FLAG_DISCARD 0x10000 /* enable discard for swap */ #endif +#ifndef SWAP_FLAG_DISCARD_ONCE +# define SWAP_FLAG_DISCARD_ONCE 0x20000 /* discard swap area at swapon-time */ +#endif + +#ifndef SWAP_FLAG_DISCARD_PAGES +# define SWAP_FLAG_DISCARD_PAGES 0x40000 /* discard page-clusters after use */ +#endif + +#define SWAP_FLAGS_DISCARD_VALID (SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \ + SWAP_FLAG_DISCARD_PAGES) + #ifndef SWAP_FLAG_PREFER # define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ #endif @@ -70,7 +81,7 @@ enum { static int all; static int priority = -1; /* non-prioritized swap by default */ -static int discard; +static int discard; /* don't send swap discards by default */ /* If true, don't complain if the device/file doesn't exist */ static int ifexists; @@ -188,12 +199,14 @@ static int display_summary(void) if (!st) return -1; + if (mnt_table_is_empty(st)) + return 0; + itr = mnt_new_iter(MNT_ITER_FORWARD); if (!itr) err(EXIT_FAILURE, _("failed to initialize libmount iterator")); - if (mnt_table_get_nents(st) > 0) - printf(_("%s\t\t\t\tType\t\tSize\tUsed\tPriority\n"), _("Filename")); + printf(_("%s\t\t\t\tType\t\tSize\tUsed\tPriority\n"), _("Filename")); while (mnt_table_next_fs(st, itr, &fs) == 0) { printf("%-39s\t%s\t%jd\t%jd\t%d\n", @@ -224,7 +237,7 @@ static int show_table(int tt_flags, int bytes) if (!itr) err(EXIT_FAILURE, _("failed to initialize libmount iterator")); - tt = tt_new_table(tt_flags); + tt = tt_new_table(tt_flags | TT_FL_FREEDATA); if (!tt) { warn(_("failed to initialize output table")); goto done; @@ -323,7 +336,10 @@ static int swap_rewrite_signature(const char *devname, unsigned int pagesize) rc = 0; err: - close(fd); + if (close_fd(fd) != 0) { + warn(_("write failed: %s"), devname); + rc = -1; + } return rc; } @@ -567,8 +583,22 @@ static int do_swapon(const char *orig_special, int prio, << SWAP_FLAG_PRIO_SHIFT); } #endif - if (fl_discard) - flags |= SWAP_FLAG_DISCARD; + /* + * Validate the discard flags passed and set them + * accordingly before calling sys_swapon. + */ + if (fl_discard && !(fl_discard & ~SWAP_FLAGS_DISCARD_VALID)) { + /* + * If we get here with both discard policy flags set, + * we just need to tell the kernel to enable discards + * and it will do correctly, just as we expect. + */ + if ((fl_discard & SWAP_FLAG_DISCARD_ONCE) && + (fl_discard & SWAP_FLAG_DISCARD_PAGES)) + flags |= SWAP_FLAG_DISCARD; + else + flags |= fl_discard; + } status = swapon(special, flags); if (status < 0) @@ -608,12 +638,22 @@ static int swapon_all(void) while (mnt_table_find_next_fs(tb, itr, match_swap, NULL, &fs) == 0) { /* defaults */ int pri = priority, dsc = discard, nofail = ifexists; - char *p, *src; + char *p, *src, *dscarg; if (mnt_fs_get_option(fs, "noauto", NULL, NULL) == 0) continue; - if (mnt_fs_get_option(fs, "discard", NULL, NULL) == 0) - dsc = 1; + if (mnt_fs_get_option(fs, "discard", &dscarg, NULL) == 0) { + dsc |= SWAP_FLAG_DISCARD; + if (dscarg) { + /* only single-time discards are wanted */ + if (strcmp(dscarg, "once") == 0) + dsc |= SWAP_FLAG_DISCARD_ONCE; + + /* do discard for every released swap page */ + if (strcmp(dscarg, "pages") == 0) + dsc |= SWAP_FLAG_DISCARD_PAGES; + } + } if (mnt_fs_get_option(fs, "nofail", NULL, NULL) == 0) nofail = 1; if (mnt_fs_get_option(fs, "pri", &p, NULL) == 0 && p) @@ -643,17 +683,17 @@ static void __attribute__ ((__noreturn__)) usage(FILE * out) fprintf(out, _(" %s [options] [<spec>]\n"), program_invocation_short_name); fputs(USAGE_OPTIONS, out); - fputs(_(" -a, --all enable all swaps from /etc/fstab\n" - " -d, --discard discard freed pages before they are reused\n" - " -e, --ifexists silently skip devices that do not exist\n" - " -f, --fixpgsz reinitialize the swap space if necessary\n" - " -p, --priority <prio> specify the priority of the swap device\n" - " -s, --summary display summary about used swap devices\n" - " --show[=<columns>] display summary in definable table\n" - " --noheadings don't print headings, use with --show\n" - " --raw use the raw output format, use with --show\n" - " --bytes display swap size in bytes in --show output\n" - " -v, --verbose verbose mode\n"), out); + fputs(_(" -a, --all enable all swaps from /etc/fstab\n" + " -d, --discard[=<policy>] enable swap discards, if supported by device\n" + " -e, --ifexists silently skip devices that do not exist\n" + " -f, --fixpgsz reinitialize the swap space if necessary\n" + " -p, --priority <prio> specify the priority of the swap device\n" + " -s, --summary display summary about used swap devices\n" + " --show[=<columns>] display summary in definable table\n" + " --noheadings don't print headings, use with --show\n" + " --raw use the raw output format, use with --show\n" + " --bytes display swap size in bytes in --show output\n" + " -v, --verbose verbose mode\n"), out); fputs(USAGE_SEPARATOR, out); fputs(USAGE_HELP, out); @@ -669,6 +709,11 @@ static void __attribute__ ((__noreturn__)) usage(FILE * out) " <device> name of device to be used\n" " <file> name of file to be used\n"), out); + fputs(_("\nAvailable discard policy types (for --discard):\n" + " once : only single-time area discards are issued. (swapon)\n" + " pages : discard freed pages before they are reused.\n" + " * if no policy is selected both discard types are enabled. (default)\n"), out); + fputs(_("\nAvailable columns (for --show):\n"), out); for (i = 0; i < NCOLS; i++) fprintf(out, " %4s %s\n", infos[i].name, _(infos[i].help)); @@ -693,7 +738,7 @@ int main(int argc, char *argv[]) static const struct option long_opts[] = { { "priority", 1, 0, 'p' }, - { "discard", 0, 0, 'd' }, + { "discard", 2, 0, 'd' }, { "ifexists", 0, 0, 'e' }, { "summary", 0, 0, 's' }, { "fixpgsz", 0, 0, 'f' }, @@ -716,7 +761,7 @@ int main(int argc, char *argv[]) mnt_init_debug(0); mntcache = mnt_new_cache(); - while ((c = getopt_long(argc, argv, "ahdefp:svVL:U:", + while ((c = getopt_long(argc, argv, "ahd::efp:svVL:U:", long_opts, NULL)) != -1) { switch (c) { case 'a': /* all */ @@ -736,7 +781,18 @@ int main(int argc, char *argv[]) add_uuid(optarg); break; case 'd': - discard = 1; + discard |= SWAP_FLAG_DISCARD; + if (optarg) { + if (*optarg == '=') + optarg++; + + if (strcmp(optarg, "once") == 0) + discard |= SWAP_FLAG_DISCARD_ONCE; + else if (strcmp(optarg, "pages") == 0) + discard |= SWAP_FLAG_DISCARD_PAGES; + else + errx(EXIT_FAILURE, _("unsupported discard policy: %s"), optarg); + } break; case 'e': /* ifexists */ ifexists = 1; @@ -811,7 +867,7 @@ int main(int argc, char *argv[]) status |= do_swapon(*argv++, priority, discard, !CANONIC); free_tables(); - mnt_free_cache(mntcache); + mnt_unref_cache(mntcache); return status; } diff --git a/sys-utils/switch_root.8 b/sys-utils/switch_root.8 index debf0c87d..e61dba627 100644 --- a/sys-utils/switch_root.8 +++ b/sys-utils/switch_root.8 @@ -24,9 +24,9 @@ process. .SH OPTIONS .IP "\fB\-h, \-\-help\fP" -show help and exit +Display help text and exit. .IP "\fB\-V, \-\-version\fP" -show version number and exit +Display version information and exit. .SH RETURN VALUE .B switch_root diff --git a/sys-utils/switch_root.c b/sys-utils/switch_root.c index f26f7dae4..975360f01 100644 --- a/sys-utils/switch_root.c +++ b/sys-utils/switch_root.c @@ -23,6 +23,7 @@ #include <sys/mount.h> #include <sys/types.h> #include <sys/stat.h> +#include <sys/statfs.h> #include <sys/param.h> #include <fcntl.h> #include <stdio.h> @@ -45,6 +46,10 @@ #define MNT_DETACH 0x00000002 /* Just detach from the tree */ #endif +#define STATFS_RAMFS_MAGIC 0x858458f6 +#define STATFS_TMPFS_MAGIC 0x01021994 + + /* remove all files/directories below dirName -- don't cross mountpoints */ static int recursiveRemove(int fd) { @@ -68,6 +73,7 @@ static int recursiveRemove(int fd) while(1) { struct dirent *d; + int isdir = 0; errno = 0; if (!(d = readdir(dir))) { @@ -80,8 +86,10 @@ static int recursiveRemove(int fd) if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, "..")) continue; - - if (d->d_type == DT_DIR) { +#ifdef _DIRENT_HAVE_D_TYPE + if (d->d_type == DT_DIR || d->d_type == DT_UNKNOWN) +#endif + { struct stat sb; if (fstatat(dfd, d->d_name, &sb, AT_SYMLINK_NOFOLLOW)) { @@ -90,7 +98,7 @@ static int recursiveRemove(int fd) } /* remove subdirectories if device is same as dir */ - if (sb.st_dev == rb.st_dev) { + if (S_ISDIR(sb.st_mode) && sb.st_dev == rb.st_dev) { int cfd; cfd = openat(dfd, d->d_name, O_RDONLY); @@ -98,12 +106,12 @@ static int recursiveRemove(int fd) recursiveRemove(cfd); close(cfd); } + isdir = 1; } else continue; } - if (unlinkat(dfd, d->d_name, - d->d_type == DT_DIR ? AT_REMOVEDIR : 0)) + if (unlinkat(dfd, d->d_name, isdir ? AT_REMOVEDIR : 0)) warn(_("failed to unlink %s"), d->d_name); } @@ -174,12 +182,13 @@ static int switchroot(const char *newroot) if (cfd >= 0) { pid = fork(); if (pid <= 0) { - if (fstat(cfd, &sb) == 0) { - if (sb.st_dev == makedev(0, 1)) - recursiveRemove(cfd); - else - warn(_("old root filesystem is not an initramfs")); - } + struct statfs stfs; + if (fstatfs(cfd, &stfs) == 0 && + (stfs.f_type == STATFS_RAMFS_MAGIC || + stfs.f_type == STATFS_TMPFS_MAGIC)) + recursiveRemove(cfd); + else + warn(_("old root filesystem is not an initramfs")); if (pid == 0) exit(EXIT_SUCCESS); diff --git a/sys-utils/umount.8 b/sys-utils/umount.8 index 1fa653c23..2f3679db2 100644 --- a/sys-utils/umount.8 +++ b/sys-utils/umount.8 @@ -138,10 +138,10 @@ to specify the file system types on which no action should be taken. Verbose mode. .TP \fB\-h\fR, \fB\-\-help\fR -Print help message and exit. +Display help text and exit. .TP \fB\-V\fR, \fB\-\-version\fR -Print version and exit. +Display version information and exit. .SH "THE LOOP DEVICE" The .B umount @@ -151,7 +151,7 @@ case it finds the option 'loop=...' in or when the \-d option was given. Any pending loop devices can be freed using 'losetup -d', see .BR losetup (8). -.SH NOTES +.SH "EXTERNAL HELPERS" The syntax of external umount helpers is: .PP .BI /sbin/umount. <suffix> diff --git a/sys-utils/umount.c b/sys-utils/umount.c index 17764b534..0d576595b 100644 --- a/sys-utils/umount.c +++ b/sys-utils/umount.c @@ -80,18 +80,18 @@ static void __attribute__((__noreturn__)) usage(FILE *out) fputs(USAGE_OPTIONS, out); fputs(_(" -a, --all unmount all filesystems\n"), out); - fputs(_(" -A, --all-targets unmount all mountpoins for the given device\n" - " in the current namespace\n"), out); + fputs(_(" -A, --all-targets unmount all mountpoints for the given device in the\n" + " current namespace\n"), out); fputs(_(" -c, --no-canonicalize don't canonicalize paths\n"), out); fputs(_(" -d, --detach-loop if mounted loop device, also free this loop device\n"), out); fputs(_(" --fake dry run; skip the umount(2) syscall\n"), out); fputs(_(" -f, --force force unmount (in case of an unreachable NFS system)\n"), out); fputs(_(" -i, --internal-only don't call the umount.<type> helpers\n"), out); fputs(_(" -n, --no-mtab don't write to /etc/mtab\n"), out); - fputs(_(" -l, --lazy detach the filesystem now, and cleanup all later\n"), out); + fputs(_(" -l, --lazy detach the filesystem now, clean up things later\n"), out); fputs(_(" -O, --test-opts <list> limit the set of filesystems (use with -a)\n"), out); fputs(_(" -R, --recursive recursively unmount a target with all its children\n"), out); - fputs(_(" -r, --read-only In case unmounting fails, try to remount read-only\n"), out); + fputs(_(" -r, --read-only in case unmounting fails, try to remount read-only\n"), out); fputs(_(" -t, --types <list> limit the set of filesystem types\n"), out); fputs(_(" -v, --verbose say what is being done\n"), out); @@ -198,6 +198,12 @@ static int mk_exit_code(struct libmnt_context *cxt, int rc) /* * libmount errors (extra library checks) */ + if (rc == -EPERM && !mnt_context_tab_applied(cxt)) { + /* failed to evaluate permissions because not found + * relevant entry in mtab */ + warnx(_("%s: not mounted"), tgt); + return MOUNT_EX_USAGE; + } return handle_generic_errors(rc, _("%s: umount failed"), tgt); } else if (mnt_context_get_syscall_errno(cxt) == 0) { @@ -207,7 +213,7 @@ static int mk_exit_code(struct libmnt_context *cxt, int rc) */ if (rc < 0) return handle_generic_errors(rc, - _("%s: filesystem umounted, but mount(8) failed"), + _("%s: filesystem was unmounted, but mount(8) failed"), tgt); return MOUNT_EX_SOFTWARE; /* internal error */ @@ -230,23 +236,26 @@ static int mk_exit_code(struct libmnt_context *cxt, int rc) warnx(_("%s: can't write superblock"), tgt); break; case EBUSY: - warnx(_("%s: target is busy.\n" - " (In some cases useful info about processes that use\n" - " the device is found by lsof(8) or fuser(1))"), + warnx(_("%s: target is busy\n" + " (In some cases useful info about processes that\n" + " use the device is found by lsof(8) or fuser(1).)"), tgt); break; case ENOENT: - warnx(_("%s: not found"), tgt); + if (tgt && *tgt) + warnx(_("%s: mountpoint not found"), tgt); + else + warnx(_("undefined mountpoint")); break; case EPERM: - warnx(_("%s: must be superuser to umount"), tgt); + warnx(_("%s: must be superuser to unmount"), tgt); break; case EACCES: - warnx(_("%s: block devices not permitted on fs"), tgt); + warnx(_("%s: block devices are not permitted on filesystem"), tgt); break; default: errno = syserr; - warn(_("%s"), tgt); + warn("%s", tgt); break; } return MOUNT_EX_FAIL; @@ -272,10 +281,12 @@ static int umount_all(struct libmnt_context *cxt) if (mnt_context_is_verbose(cxt)) printf(_("%-25s: ignored\n"), tgt); } else { - rc |= mk_exit_code(cxt, mntrc); + int xrc = mk_exit_code(cxt, mntrc); - if (mnt_context_is_verbose(cxt)) - printf("%-25s: successfully umounted\n", tgt); + if (xrc == MOUNT_EX_SUCCESS + && mnt_context_is_verbose(cxt)) + printf("%-25s: successfully unmounted\n", tgt); + rc |= xrc; } } @@ -314,7 +325,7 @@ static struct libmnt_table *new_mountinfo(struct libmnt_context *cxt) if (mnt_table_parse_file(tb, _PATH_PROC_MOUNTINFO)) { warn(_("failed to parse %s"), _PATH_PROC_MOUNTINFO); - mnt_free_table(tb); + mnt_unref_table(tb); tb = NULL; } @@ -397,7 +408,7 @@ static int umount_recursive(struct libmnt_context *cxt, const char *spec) _("%s: not found"), spec); } - mnt_free_table(tb); + mnt_unref_table(tb); return rc; } @@ -456,7 +467,7 @@ static int umount_alltargets(struct libmnt_context *cxt, const char *spec, int r } mnt_free_iter(itr); - mnt_free_table(tb); + mnt_unref_table(tb); return rc; } @@ -618,15 +629,17 @@ int main(int argc, char **argv) rc += umount_recursive(cxt, *argv++); } else { while (argc--) { - char *path = *argv++; + char *path = *argv; - if (mnt_context_is_restricted(cxt)) + if (mnt_context_is_restricted(cxt) + && !mnt_tag_is_valid(path)) path = sanitize_path(path); rc += umount_one(cxt, path); - if (mnt_context_is_restricted(cxt)) + if (path != *argv) free(path); + argv++; } } diff --git a/sys-utils/unshare.1 b/sys-utils/unshare.1 index 58f2bb228..1f5273eba 100644 --- a/sys-utils/unshare.1 +++ b/sys-utils/unshare.1 @@ -1,69 +1,89 @@ .\" Process this file with .\" groff -man -Tascii lscpu.1 .\" -.TH UNSHARE 1 "January 2013" "util-linux" "User Commands" +.TH UNSHARE 1 "July 2013" "util-linux" "User Commands" .SH NAME unshare \- run program with some namespaces unshared from parent .SH SYNOPSIS .B unshare .RI [ options ] -program +.I program .RI [ arguments ] .SH DESCRIPTION -Unshares specified namespaces from parent process and then executes specified -program. Unshareable namespaces are: +Unshares the indicated namespaces from the parent process and then executes +the specified program. The namespaces to be unshared are indicated via +options. Unshareable namespaces are: .TP .BR "mount namespace" -mounting and unmounting filesystems will not affect rest of the system +Mounting and unmounting filesystems will not affect the rest of the system (\fBCLONE_NEWNS\fP flag), except for filesystems which are explicitly marked as -shared (by mount --make-shared). See /proc/self/mountinfo for the shared flags. +shared (with \fBmount --make-shared\fP; see \fI/proc/self/mountinfo\fP for the +\fBshared\fP flags). + +It's recommended to use \fBmount --make-rprivate\fP or \fBmount --make-rslave\fP +after \fBunshare --mount\fP to make sure that mountpoints in the new namespace +are really unshared from parental namespace. .TP .BR "UTS namespace" -setting hostname, domainname will not affect rest of the system -(\fBCLONE_NEWUTS\fP flag). +Setting hostname or domainname will not affect the rest of the system. +(\fBCLONE_NEWUTS\fP flag) .TP .BR "IPC namespace" -process will have independent namespace for System V message queues, semaphore -sets and shared memory segments (\fBCLONE_NEWIPC\fP flag). +The process will have an independent namespace for System V message queues, +semaphore sets and shared memory segments. (\fBCLONE_NEWIPC\fP flag) .TP .BR "network namespace" -process will have independent IPv4 and IPv6 stacks, IP routing tables, firewall -rules, the \fI/proc/net\fP and \fI/sys/class/net\fP directory trees, sockets -etc. (\fBCLONE_NEWNET\fP flag). +The process will have independent IPv4 and IPv6 stacks, IP routing tables, +firewall rules, the \fI/proc/net\fP and \fI/sys/class/net\fP directory trees, +sockets, etc. (\fBCLONE_NEWNET\fP flag) .TP .BR "pid namespace" -children will have a distinct set of pid to process mappings than their parent. -(\fBCLONE_NEWPID\fP flag). +Children will have a distinct set of PID to process mappings from their parent. +(\fBCLONE_NEWPID\fP flag) .TP .BR "user namespace" -process will have distinct set of uids, gids and capabilities. (\fBCLONE_NEWUSER\fP flag). -.TP -See the \fBclone\fR(2) for exact semantics of the flags. +The process will have a distinct set of UIDs, GIDs and capabilities. +(\fBCLONE_NEWUSER\fP flag) +.PP +See \fBclone\fR(2) for the exact semantics of the flags. .SH OPTIONS .TP .BR \-h , " \-\-help" -Print a help message, -.TP -.BR \-m , " \-\-mount" -Unshare the mount namespace, -.TP -.BR \-u , " \-\-uts" -Unshare the UTS namespace, +Display help text and exit. .TP .BR \-i , " \-\-ipc" -Unshare the IPC namespace, +Unshare the IPC namespace. +.TP +.BR \-m , " \-\-mount" +Unshare the mount namespace. .TP .BR \-n , " \-\-net" Unshare the network namespace. .TP .BR \-p , " \-\-pid" Unshare the pid namespace. +See also the \fB--fork\fP and \fB--mount-proc\fP options. +.TP +.BR \-u , " \-\-uts" +Unshare the UTS namespace. .TP .BR \-U , " \-\-user" Unshare the user namespace. +.TP +.BR \-f , " \-\-fork" +Fork the specified \fIprogram\fR as a child process of \fBunshare\fR rather than +running it directly. This is useful when creating a new pid namespace. +.TP +.BR \-\-mount-proc "[=\fImountpoint\fP]" +Just before running the program, mount the proc filesystem at the \fImountpoint\fP +(default is /proc). This is useful when creating a new pid namespace. It also +implies creating a new mount namespace since the /proc mount would otherwise +mess up existing programs on the system. The new proc filesystem is explicitly +mounted as private (by MS_PRIVATE|MS_REC). .SH SEE ALSO .BR unshare (2), -.BR clone (2) +.BR clone (2), +.BR mount (8) .SH BUGS None known so far. .SH AUTHOR diff --git a/sys-utils/unshare.c b/sys-utils/unshare.c index 8cc9c46c8..1299d184e 100644 --- a/sys-utils/unshare.c +++ b/sys-utils/unshare.c @@ -24,6 +24,11 @@ #include <stdio.h> #include <stdlib.h> #include <unistd.h> +#include <sys/wait.h> +#include <sys/mount.h> + +/* we only need some defines missing in sys/mount.h, no libmount linkage */ +#include <libmount.h> #include "nls.h" #include "c.h" @@ -40,12 +45,14 @@ static void usage(int status) _(" %s [options] <program> [args...]\n"), program_invocation_short_name); fputs(USAGE_OPTIONS, out); - fputs(_(" -m, --mount unshare mounts namespace\n"), out); - fputs(_(" -u, --uts unshare UTS namespace (hostname etc)\n"), out); - fputs(_(" -i, --ipc unshare System V IPC namespace\n"), out); - fputs(_(" -n, --net unshare network namespace\n"), out); - fputs(_(" -p, --pid unshare pid namespace\n"), out); - fputs(_(" -U, --user unshare user namespace\n"), out); + fputs(_(" -m, --mount unshare mounts namespace\n"), out); + fputs(_(" -u, --uts unshare UTS namespace (hostname etc)\n"), out); + fputs(_(" -i, --ipc unshare System V IPC namespace\n"), out); + fputs(_(" -n, --net unshare network namespace\n"), out); + fputs(_(" -p, --pid unshare pid namespace\n"), out); + fputs(_(" -U, --user unshare user namespace\n"), out); + fputs(_(" -f, --fork fork before launching <program>\n"), out); + fputs(_(" --mount-proc[=<dir>] mount proc filesystem first (implies --mount)\n"), out); fputs(USAGE_SEPARATOR, out); fputs(USAGE_HELP, out); @@ -57,6 +64,9 @@ static void usage(int status) int main(int argc, char *argv[]) { + enum { + OPT_MOUNTPROC = CHAR_MAX + 1 + }; static const struct option longopts[] = { { "help", no_argument, 0, 'h' }, { "version", no_argument, 0, 'V'}, @@ -66,20 +76,25 @@ int main(int argc, char *argv[]) { "net", no_argument, 0, 'n' }, { "pid", no_argument, 0, 'p' }, { "user", no_argument, 0, 'U' }, + { "fork", no_argument, 0, 'f' }, + { "mount-proc", optional_argument, 0, OPT_MOUNTPROC }, { NULL, 0, 0, 0 } }; int unshare_flags = 0; + int c, forkit = 0; + const char *procmnt = NULL; - int c; - - setlocale(LC_MESSAGES, ""); + setlocale(LC_ALL, ""); bindtextdomain(PACKAGE, LOCALEDIR); textdomain(PACKAGE); atexit(close_stdout); - while ((c = getopt_long(argc, argv, "hVmuinpU", longopts, NULL)) != -1) { + while ((c = getopt_long(argc, argv, "fhVmuinpU", longopts, NULL)) != -1) { switch (c) { + case 'f': + forkit = 1; + break; case 'h': usage(EXIT_SUCCESS); case 'V': @@ -103,6 +118,10 @@ int main(int argc, char *argv[]) case 'U': unshare_flags |= CLONE_NEWUSER; break; + case OPT_MOUNTPROC: + unshare_flags |= CLONE_NEWNS; + procmnt = optarg ? optarg : "/proc"; + break; default: usage(EXIT_FAILURE); } @@ -111,6 +130,31 @@ int main(int argc, char *argv[]) if (-1 == unshare(unshare_flags)) err(EXIT_FAILURE, _("unshare failed")); + if (forkit) { + int status; + pid_t pid = fork(); + + switch(pid) { + case -1: + err(EXIT_FAILURE, _("fork failed")); + case 0: /* child */ + break; + default: /* parent */ + if (waitpid(pid, &status, 0) == -1) + err(EXIT_FAILURE, _("waitpid failed")); + if (WIFEXITED(status)) + return WEXITSTATUS(status); + else if (WIFSIGNALED(status)) + kill(getpid(), WTERMSIG(status)); + err(EXIT_FAILURE, _("child exit failed")); + } + } + + if (procmnt && + (mount("none", procmnt, NULL, MS_PRIVATE|MS_REC, NULL) != 0 || + mount("proc", procmnt, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) != 0)) + err(EXIT_FAILURE, _("mount %s failed"), procmnt); + if (optind < argc) { execvp(argv[optind], argv + optind); err(EXIT_FAILURE, _("failed to execute %s"), argv[optind]); diff --git a/sys-utils/wdctl.8 b/sys-utils/wdctl.8 index 0c9a3acdb..61ba5c30d 100644 --- a/sys-utils/wdctl.8 +++ b/sys-utils/wdctl.8 @@ -36,14 +36,14 @@ output arrangement is specified, then a default set is used. Use to get list of all supported columns. .IP "\fB\-O\fR, \fB\-\-oneline\fP" Print all wanted information on one line in key="value" output format. -.IP "\fB\-V\fR, \fB\-\-version\fP" -Output version information and exit. .IP "\fB\-r\fR, \fB\-\-raw\fP" Use the raw output format. .IP "\fB\-x\fR, \fB\-\-flags-only\fP" Same as \fB\-I \-T\fP. .IP "\fB\-h\fR, \fB\-\-help\fP" -Print a help text and exit. +Display help text and exit. +.IP "\fB\-V\fR, \fB\-\-version\fP" +Display version information and exit. .SH AUTHORS .MT kzak@\:redhat\:.com Karel Zak diff --git a/sys-utils/wdctl.c b/sys-utils/wdctl.c index 24ec770fb..c0953c9c8 100644 --- a/sys-utils/wdctl.c +++ b/sys-utils/wdctl.c @@ -28,6 +28,7 @@ #include "nls.h" #include "c.h" +#include "xalloc.h" #include "closestream.h" #include "optutils.h" #include "pathnames.h" @@ -232,7 +233,7 @@ static void add_flag_line(struct tt *tt, struct wdinfo *wd, const struct wdflag } if (str) - tt_line_set_data(line, i, str); + tt_line_set_data(line, i, xstrdup(str)); } } @@ -244,7 +245,7 @@ static int show_flags(struct wdinfo *wd, int tt_flags, uint32_t wanted) uint32_t flags; /* create output table */ - tt = tt_new_table(tt_flags); + tt = tt_new_table(tt_flags | TT_FL_FREEDATA); if (!tt) { warn(_("failed to initialize output table")); return -1; @@ -330,9 +331,11 @@ static int set_watchdog(struct wdinfo *wd, int timeout) warn(_("cannot set timeout for %s"), wd->device); } - close(fd); + if (close_fd(fd)) + warn(_("write failed")); sigprocmask(SIG_SETMASK, &oldsigs, NULL); - printf("Set timeout to %d seconds\n", timeout); + printf(P_("Timeout has been set to %d second.\n", + "Timeout has been set to %d seconds.\n", timeout), timeout); return rc; } @@ -393,7 +396,8 @@ static int read_watchdog(struct wdinfo *wd) * the machine might end up rebooting. */ } - close(fd); + if (close_fd(fd)) + warn(_("write failed")); sigprocmask(SIG_SETMASK, &oldsigs, NULL); return 0; @@ -446,11 +450,14 @@ static void print_oneline(struct wdinfo *wd, uint32_t wanted, static void show_timeouts(struct wdinfo *wd) { if (wd->has_timeout) - printf(_("%-15s%2i seconds\n"), _("Timeout:"), wd->timeout); + printf(P_("%-14s %2i second\n", "%-14s %2i seconds\n", wd->timeout), + _("Timeout:"), wd->timeout); if (wd->has_pretimeout) - printf(_("%-15s%2i seconds\n"), _("Pre-timeout:"), wd->pretimeout); + printf(P_("%-14s %2i second\n", "%-14s %2i seconds\n", wd->pretimeout), + _("Pre-timeout:"), wd->pretimeout); if (wd->has_timeleft) - printf(_("%-15s%2i seconds\n"), _("Timeleft:"), wd->timeleft); + printf(P_("%-14s %2i second\n", "%-14s %2i seconds\n", wd->timeleft), + _("Timeleft:"), wd->timeleft); } int main(int argc, char *argv[]) |
