101 files changed, 39192 insertions, 0 deletions
diff --git a/sys-utils/Makemodule.am b/sys-utils/Makemodule.am
new file mode 100644
index 0000000..825a733
--- /dev/null
+++ b/sys-utils/Makemodule.am
@@ -0,0 +1,470 @@
+if BUILD_LSMEM
+usrbin_exec_PROGRAMS += lsmem
+dist_man_MANS += sys-utils/lsmem.1
+lsmem_SOURCES = sys-utils/lsmem.c
+lsmem_LDADD = $(LDADD) libcommon.la libsmartcols.la
+lsmem_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir)
+endif
+
+if BUILD_CHMEM
+usrbin_exec_PROGRAMS += chmem
+dist_man_MANS += sys-utils/chmem.8
+chmem_SOURCES = sys-utils/chmem.c
+chmem_LDADD = $(LDADD) libcommon.la
+endif
+
+if BUILD_FLOCK
+usrbin_exec_PROGRAMS += flock
+dist_man_MANS += sys-utils/flock.1
+flock_SOURCES = sys-utils/flock.c lib/monotonic.c lib/timer.c
+flock_LDADD = $(LDADD) libcommon.la $(REALTIME_LIBS)
+endif
+
+if BUILD_CHOOM
+usrbin_exec_PROGRAMS += choom
+dist_man_MANS += sys-utils/choom.1
+choom_SOURCES = sys-utils/choom.c
+choom_LDADD = $(LDADD) libcommon.la
+endif
+
+if BUILD_IPCMK
+usrbin_exec_PROGRAMS += ipcmk
+dist_man_MANS += sys-utils/ipcmk.1
+ipcmk_SOURCES = sys-utils/ipcmk.c
+ipcmk_LDADD = $(LDADD) libcommon.la
+endif
+
+if BUILD_IPCRM
+usrbin_exec_PROGRAMS += ipcrm
+dist_man_MANS += sys-utils/ipcrm.1
+ipcrm_SOURCES = sys-utils/ipcrm.c
+ipcrm_LDADD = $(LDADD) libcommon.la
+endif
+
+if BUILD_IPCS
+usrbin_exec_PROGRAMS += ipcs
+dist_man_MANS += sys-utils/ipcs.1
+ipcs_SOURCES =	sys-utils/ipcs.c \
+		sys-utils/ipcutils.c \
+		sys-utils/ipcutils.h
+ipcs_LDADD = $(LDADD) libcommon.la
+endif
+
+if BUILD_LSIPC
+usrbin_exec_PROGRAMS += lsipc
+dist_man_MANS += sys-utils/lsipc.1
+lsipc_SOURCES =	sys-utils/lsipc.c \
+		sys-utils/ipcutils.c \
+		sys-utils/ipcutils.h
+lsipc_LDADD = $(LDADD) libcommon.la libsmartcols.la
+lsipc_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir)
+endif
+
+if BUILD_RENICE
+usrbin_exec_PROGRAMS += renice
+dist_man_MANS += sys-utils/renice.1
+renice_SOURCES = sys-utils/renice.c
+endif
+
+if BUILD_RFKILL
+usrsbin_exec_PROGRAMS += rfkill
+dist_man_MANS += sys-utils/rfkill.8
+rfkill_SOURCES = sys-utils/rfkill.c
+rfkill_LDADD = $(LDADD) libcommon.la libsmartcols.la
+rfkill_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir)
+endif
+
+if BUILD_SETSID
+usrbin_exec_PROGRAMS += setsid
+dist_man_MANS += sys-utils/setsid.1
+setsid_SOURCES = sys-utils/setsid.c
+endif
+
+if BUILD_READPROFILE
+usrsbin_exec_PROGRAMS += readprofile
+dist_man_MANS += sys-utils/readprofile.8
+readprofile_SOURCES = sys-utils/readprofile.c
+endif
+
+if BUILD_TUNELP
+usrsbin_exec_PROGRAMS += tunelp
+dist_man_MANS += sys-utils/tunelp.8
+tunelp_SOURCES = sys-utils/tunelp.c
+tunelp_LDADD = $(LDADD) libcommon.la
+endif
+
+if BUILD_FSTRIM
+sbin_PROGRAMS += fstrim
+dist_man_MANS += sys-utils/fstrim.8
+fstrim_SOURCES = sys-utils/fstrim.c
+fstrim_LDADD = $(LDADD) libcommon.la libmount.la
+fstrim_CFLAGS = $(AM_CFLAGS) -I$(ul_libmount_incdir)
+if HAVE_SYSTEMD
+systemdsystemunit_DATA += \
+		sys-utils/fstrim.service \
+		sys-utils/fstrim.timer
+endif
+endif # BUILD_FSTRIM
+
+PATHFILES += sys-utils/fstrim.service
+EXTRA_DIST += sys-utils/fstrim.timer
+
+if BUILD_DMESG
+bin_PROGRAMS += dmesg
+dist_man_MANS += sys-utils/dmesg.1
+dmesg_SOURCES = sys-utils/dmesg.c lib/monotonic.c
+dmesg_LDADD = $(LDADD) libcommon.la libtcolors.la $(REALTIME_LIBS)
+dmesg_CFLAGS = $(AM_CFLAGS)
+check_PROGRAMS += test_dmesg
+test_dmesg_SOURCES = $(dmesg_SOURCES)
+test_dmesg_LDADD = $(dmesg_LDADD)
+test_dmesg_CFLAGS = -DTEST_DMESG $(dmesg_CFLAGS)
+endif
+
+if BUILD_CTRLALTDEL
+sbin_PROGRAMS += ctrlaltdel
+dist_man_MANS += sys-utils/ctrlaltdel.8
+ctrlaltdel_SOURCES = sys-utils/ctrlaltdel.c
+ctrlaltdel_LDADD = $(LDADD) libcommon.la
+endif
+
+if BUILD_FSFREEZE
+sbin_PROGRAMS += fsfreeze
+dist_man_MANS += sys-utils/fsfreeze.8
+fsfreeze_SOURCES = sys-utils/fsfreeze.c
+endif
+
+if BUILD_BLKDISCARD
+sbin_PROGRAMS += blkdiscard
+dist_man_MANS += sys-utils/blkdiscard.8
+blkdiscard_SOURCES = sys-utils/blkdiscard.c lib/monotonic.c
+blkdiscard_LDADD = $(LDADD) libcommon.la $(REALTIME_LIBS)
+endif
+
+if BUILD_BLKZONE
+sbin_PROGRAMS += blkzone
+dist_man_MANS += sys-utils/blkzone.8
+blkzone_SOURCES = sys-utils/blkzone.c
+blkzone_LDADD = $(LDADD) libcommon.la
+endif
+
+if BUILD_LDATTACH
+usrsbin_exec_PROGRAMS += ldattach
+dist_man_MANS += sys-utils/ldattach.8
+ldattach_SOURCES = sys-utils/ldattach.c
+ldattach_LDADD = $(LDADD) libcommon.la
+endif
+
+if BUILD_RTCWAKE
+usrsbin_exec_PROGRAMS += rtcwake
+dist_man_MANS += sys-utils/rtcwake.8
+PATHFILES += sys-utils/rtcwake.8
+rtcwake_SOURCES = sys-utils/rtcwake.c
+rtcwake_LDADD = $(LDADD) libcommon.la
+endif
+
+if BUILD_SETARCH
+usrbin_exec_PROGRAMS += setarch
+dist_man_MANS += sys-utils/setarch.8
+setarch_SOURCES = sys-utils/setarch.c
+
+SETARCH_LINKS = uname26 linux32 linux64
+
+if ARCH_S390
+SETARCH_LINKS += s390 s390x
+endif
+if ARCH_I86
+SETARCH_LINKS += i386
+endif
+if ARCH_86_64
+SETARCH_LINKS += i386 x86_64
+endif
+if ARCH_PPC
+SETARCH_LINKS += ppc ppc64 ppc32
+endif
+if ARCH_SPARC
+SETARCH_LINKS += sparc sparc64 sparc32 sparc32bash
+endif
+if ARCH_MIPS
+SETARCH_LINKS += mips mips64 mips32
+endif
+if ARCH_IA64
+SETARCH_LINKS += i386 ia64
+endif
+if ARCH_HPPA
+SETARCH_LINKS += parisc parisc64 parisc32
+endif
+
+SETARCH_MAN_LINKS = $(addprefix sys-utils/,$(SETARCH_LINKS:=.8))
+man_MANS += $(SETARCH_MAN_LINKS)
+CLEANFILES += $(SETARCH_MAN_LINKS)
+
+$(SETARCH_MAN_LINKS):
+	$(AM_V_at) $(MKDIR_P) sys-utils
+	$(AM_V_GEN)echo ".so man8/setarch.8" > $@
+
+install-exec-hook-setarch:
+	for I in $(SETARCH_LINKS); do \
+		cd $(DESTDIR)$(usrbin_execdir) && ln -sf setarch $$I ; \
+	done
+
+uninstall-hook-setarch:
+	for I in $(SETARCH_LINKS); do \
+		rm -f $(DESTDIR)$(usrbin_execdir)/$$I ; \
+	done
+
+INSTALL_EXEC_HOOKS += install-exec-hook-setarch
+UNINSTALL_HOOKS += uninstall-hook-setarch
+
+endif # BUILD_SETARCH
+
+
+if BUILD_EJECT
+usrbin_exec_PROGRAMS += eject
+eject_SOURCES =  sys-utils/eject.c lib/monotonic.c
+eject_LDADD = $(LDADD) libmount.la libcommon.la $(REALTIME_LIBS)
+eject_CFLAGS = $(AM_CFLAGS) -I$(ul_libmount_incdir)
+dist_man_MANS += sys-utils/eject.1
+endif
+
+
+if BUILD_LOSETUP
+sbin_PROGRAMS += losetup
+dist_man_MANS += sys-utils/losetup.8
+losetup_SOURCES = sys-utils/losetup.c
+losetup_LDADD = $(LDADD) libcommon.la libsmartcols.la
+losetup_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir)
+
+if HAVE_STATIC_LOSETUP
+bin_PROGRAMS += losetup.static
+losetup_static_SOURCES = $(losetup_SOURCES)
+losetup_static_LDFLAGS = -all-static
+losetup_static_LDADD = $(losetup_LDADD)
+losetup_static_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir)
+endif
+endif # BUILD_LOSETUP
+
+
+if BUILD_ZRAMCTL
+sbin_PROGRAMS += zramctl
+dist_man_MANS += sys-utils/zramctl.8
+zramctl_SOURCES = sys-utils/zramctl.c
+zramctl_LDADD = $(LDADD) libcommon.la libsmartcols.la
+zramctl_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir)
+endif
+
+
+if BUILD_PRLIMIT
+usrbin_exec_PROGRAMS += prlimit
+dist_man_MANS += sys-utils/prlimit.1
+prlimit_SOURCES = sys-utils/prlimit.c
+prlimit_LDADD = $(LDADD) libcommon.la libsmartcols.la
+prlimit_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir)
+endif
+
+
+if BUILD_LSNS
+usrbin_exec_PROGRAMS += lsns
+dist_man_MANS += sys-utils/lsns.8
+lsns_SOURCES =	sys-utils/lsns.c
+lsns_LDADD = $(LDADD) libcommon.la libsmartcols.la libmount.la
+lsns_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir) -I$(ul_libmount_incdir)
+endif
+
+
+if BUILD_MOUNT
+bin_PROGRAMS += mount umount
+dist_man_MANS += \
+	sys-utils/mount.8 \
+	sys-utils/fstab.5 \
+	sys-utils/umount.8
+mount_SOURCES = sys-utils/mount.c
+mount_LDADD = $(LDADD) libcommon.la libmount.la $(SELINUX_LIBS)
+mount_CFLAGS = $(SUID_CFLAGS) $(AM_CFLAGS) -I$(ul_libmount_incdir)
+mount_LDFLAGS = $(SUID_LDFLAGS) $(AM_LDFLAGS)
+
+umount_SOURCES = sys-utils/umount.c
+umount_LDADD = $(LDADD) libcommon.la libmount.la
+umount_CFLAGS = $(AM_CFLAGS) $(SUID_CFLAGS) -I$(ul_libmount_incdir)
+umount_LDFLAGS = $(SUID_LDFLAGS) $(AM_LDFLAGS)
+
+if HAVE_STATIC_MOUNT
+bin_PROGRAMS += mount.static
+mount_static_SOURCES = $(mount_SOURCES)
+mount_static_CFLAGS = $(mount_CFLAGS)
+mount_static_LDFLAGS = $(mount_LDFLAGS) -all-static
+mount_static_LDADD = $(mount_LDADD) $(SELINUX_LIBS_STATIC)
+endif
+
+if HAVE_STATIC_UMOUNT
+bin_PROGRAMS += umount.static
+umount_static_SOURCES = $(umount_SOURCES)
+umount_static_CFLAGS = $(umount_CFLAGS)
+umount_static_LDFLAGS = $(umount_LDFLAGS) -all-static
+umount_static_LDADD = $(umount_LDADD)
+endif
+
+install-exec-hook-mount:
+if MAKEINSTALL_DO_CHOWN
+	chown root:root $(DESTDIR)$(bindir)/mount
+endif
+if MAKEINSTALL_DO_SETUID
+	chmod 4755 $(DESTDIR)$(bindir)/mount
+endif
+if MAKEINSTALL_DO_CHOWN
+	chown root:root $(DESTDIR)$(bindir)/umount
+endif
+if MAKEINSTALL_DO_SETUID
+	chmod 4755 $(DESTDIR)$(bindir)/umount
+endif
+
+INSTALL_EXEC_HOOKS += install-exec-hook-mount
+endif # BUILD_MOUNT
+
+
+if BUILD_SWAPON
+sbin_PROGRAMS += swapon swapoff
+dist_man_MANS += \
+	sys-utils/swapoff.8 \
+	sys-utils/swapon.8
+
+swapon_SOURCES = \
+	sys-utils/swapon.c \
+	sys-utils/swapon-common.c \
+	sys-utils/swapon-common.h \
+	lib/swapprober.c \
+	include/swapprober.h
+swapon_CFLAGS = $(AM_CFLAGS) \
+	-I$(ul_libblkid_incdir) \
+	-I$(ul_libmount_incdir) \
+	-I$(ul_libsmartcols_incdir)
+swapon_LDADD = $(LDADD) \
+	libblkid.la \
+	libcommon.la \
+	libmount.la \
+	libsmartcols.la
+
+swapoff_SOURCES = \
+	sys-utils/swapoff.c \
+	sys-utils/swapon-common.c \
+	sys-utils/swapon-common.h \
+	lib/swapprober.c \
+	include/swapprober.h
+swapoff_CFLAGS = $(AM_CFLAGS) \
+	-I$(ul_libblkid_incdir) \
+	-I$(ul_libmount_incdir)
+swapoff_LDADD = $(LDADD) \
+	libmount.la \
+	libblkid.la \
+	libcommon.la
+endif
+
+if BUILD_LSCPU
+usrbin_exec_PROGRAMS += lscpu
+lscpu_SOURCES = \
+	sys-utils/lscpu.c \
+	sys-utils/lscpu.h \
+	sys-utils/lscpu-arm.c \
+	sys-utils/lscpu-dmi.c
+lscpu_LDADD = $(LDADD) libcommon.la libsmartcols.la $(RTAS_LIBS)
+lscpu_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir)
+dist_man_MANS += sys-utils/lscpu.1
+endif
+
+if BUILD_CHCPU
+sbin_PROGRAMS += chcpu
+chcpu_SOURCES = sys-utils/chcpu.c
+chcpu_LDADD = $(LDADD) libcommon.la
+dist_man_MANS += sys-utils/chcpu.8
+endif
+
+if BUILD_WDCTL
+bin_PROGRAMS += wdctl
+dist_man_MANS += sys-utils/wdctl.8
+wdctl_SOURCES = sys-utils/wdctl.c
+wdctl_LDADD = $(LDADD) libcommon.la libsmartcols.la
+wdctl_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir)
+endif
+
+if BUILD_MOUNTPOINT
+bin_PROGRAMS += mountpoint
+mountpoint_LDADD = $(LDADD) libmount.la
+mountpoint_CFLAGS = $(AM_CFLAGS) -I$(ul_libmount_incdir)
+dist_man_MANS += sys-utils/mountpoint.1
+mountpoint_SOURCES = sys-utils/mountpoint.c
+endif
+
+if BUILD_FALLOCATE
+usrbin_exec_PROGRAMS += fallocate
+fallocate_SOURCES = sys-utils/fallocate.c
+fallocate_LDADD = $(LDADD) libcommon.la
+dist_man_MANS += sys-utils/fallocate.1
+endif
+
+if BUILD_PIVOT_ROOT
+sbin_PROGRAMS += pivot_root
+dist_man_MANS += sys-utils/pivot_root.8
+pivot_root_SOURCES = sys-utils/pivot_root.c
+endif
+
+if BUILD_SWITCH_ROOT
+sbin_PROGRAMS += switch_root
+dist_man_MANS += sys-utils/switch_root.8
+switch_root_SOURCES = sys-utils/switch_root.c
+endif
+
+if BUILD_UNSHARE
+usrbin_exec_PROGRAMS += unshare
+dist_man_MANS += sys-utils/unshare.1
+unshare_SOURCES = sys-utils/unshare.c
+unshare_LDADD = $(LDADD) libcommon.la
+unshare_CFLAGS = $(AM_CFLAGS) -I$(ul_libmount_incdir)
+
+if HAVE_STATIC_UNSHARE
+usrbin_exec_PROGRAMS += unshare.static
+unshare_static_SOURCES = $(unshare_SOURCES)
+unshare_static_LDFLAGS = -all-static
+unshare_static_LDADD = $(unshare_LDADD)
+unshare_static_CFLAGS = $(unshare_CFLAGS)
+endif
+endif
+
+if BUILD_NSENTER
+usrbin_exec_PROGRAMS += nsenter
+dist_man_MANS += sys-utils/nsenter.1
+nsenter_SOURCES = sys-utils/nsenter.c
+nsenter_LDADD = $(LDADD) libcommon.la $(SELINUX_LIBS)
+
+if HAVE_STATIC_NSENTER
+usrbin_exec_PROGRAMS += nsenter.static
+nsenter_static_SOURCES = $(nsenter_SOURCES)
+nsenter_static_LDFLAGS = -all-static
+nsenter_static_LDADD = $(nsenter_LDADD)
+endif
+endif
+
+if BUILD_HWCLOCK
+sbin_PROGRAMS += hwclock
+dist_man_MANS += \
+	sys-utils/hwclock.8 \
+	sys-utils/adjtime_config.5
+PATHFILES += sys-utils/hwclock.8
+hwclock_SOURCES = \
+	sys-utils/hwclock.c \
+	sys-utils/hwclock.h \
+	sys-utils/hwclock-cmos.c
+if LINUX
+hwclock_SOURCES += sys-utils/hwclock-rtc.c
+endif
+hwclock_LDADD = $(LDADD) libcommon.la -lm
+if HAVE_AUDIT
+hwclock_LDADD += -laudit
+endif
+endif # BUILD_HWCLOCK
+
+if BUILD_SETPRIV
+usrbin_exec_PROGRAMS += setpriv
+dist_man_MANS += sys-utils/setpriv.1
+setpriv_SOURCES = sys-utils/setpriv.c
+setpriv_LDADD = $(LDADD) -lcap-ng libcommon.la
+endif
diff --git a/sys-utils/adjtime_config.5 b/sys-utils/adjtime_config.5
new file mode 100644
index 0000000..6f03ca7
--- /dev/null
+++ b/sys-utils/adjtime_config.5
@@ -0,0 +1,64 @@
+.TH ADJTIME_CONFIG 5 "August 2018" "util-linux" "File Formats"
+.SH NAME
+adjtime \- information about hardware clock setting and drift factor
+.SH SYNOPSIS
+.I /etc/adjtime
+.SH DESCRIPTION
+The file
+.B /etc/adjtime
+contains descriptive information about the hardware mode clock setting and clock drift factor.
+The file is read and write by hwclock; and read by programs like rtcwake to get RTC time mode.
+.PP
+The file is usually located in /etc, but tools like
+.BR hwclock (8)
+or
+.BR rtcwake (8)
+allow to use alternative location by command line options if write access to
+/etc is unwanted.  The default clock mode is "UTC" if the file is missing.
+.PP
+The Hardware Clock is usually not very accurate.  However, much of its inaccuracy is completely predictable - it gains 
+or loses the same amount of time every day.  This is called systematic drift.  The util hwclock keeps the file /etc/adjtime,
+that keeps some historical information.
+For more details see "\fBThe Adjust Function\fR" and  "\fBThe Adjtime File\fR" sections from
+.BR hwckock (8)
+man page.
+.PP
+
+The format of the adjtime file is, in ASCII.
+.sp
+.SS First line
+Three numbers, separated by blanks:
+.TP
+.B "drift factor"
+the systematic drift rate in seconds per day (floating point decimal)
+.TP
+.B last adjust time
+the resulting number of seconds since  1969  UTC  of  most recent adjustment or calibration (decimal integer)
+.TP
+.B "adjustment status"
+zero (for compatibility with clock(8)) as a decimal integer.
+
+.SS Second line
+.TP
+.B "last calibration time"
+The resulting number of seconds since 1969 UTC of most recent calibration.
+Zero if there has been no calibration yet or it is known that any previous
+calibration is moot (for example, because the Hardware Clock has been found,
+since that calibration, not to contain a valid time).  This is a decimal
+integer.
+
+.SS Third line
+.TP
+.B "clock mode"
+Supported values are "UTC" or "LOCAL".  Tells whether the Hardware Clock is set
+to Coordinated Universal Time or local time.  You can always override this
+value with options on the hwclock command line.
+
+.SH FILES
+.IR /etc/adjtime
+.SH "SEE ALSO"
+.BR hwclock (8),
+.BR rtcwake (8)
+.SH AVAILABILITY
+This man page is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/blkdiscard.8 b/sys-utils/blkdiscard.8
new file mode 100644
index 0000000..1f3a32b
--- /dev/null
+++ b/sys-utils/blkdiscard.8
@@ -0,0 +1,85 @@
+.TH BLKDISCARD 8 "July 2014" "util-linux" "System Administration"
+.SH NAME
+blkdiscard \- discard sectors on a device
+.SH SYNOPSIS
+.B blkdiscard
+[options]
+.RB [ \-o
+.IR offset ]
+.RB [ \-l
+.IR length ]
+.I device
+.SH DESCRIPTION
+.B blkdiscard
+is used to discard device sectors.  This is useful for solid-state
+drivers (SSDs) and thinly-provisioned storage.  Unlike
+.BR fstrim (8),
+this command is used directly on the block device.
+.PP
+By default,
+.B blkdiscard
+will discard all blocks on the device.  Options may be used to modify
+this behavior based on range or size, as explained below.
+.PP
+The
+.I device
+argument is the pathname of the block device.
+.PP
+.B WARNING: All data in the discarded region on the device will be lost!
+.SH OPTIONS
+The
+.I offset
+and
+.I length
+arguments may be followed by the multiplicative suffixes KiB (=1024),
+MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB" is
+optional, e.g., "K" has the same meaning as "KiB") or the suffixes
+KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB and YB.
+.TP
+.BR \-o , " \-\-offset \fIoffset"
+Byte offset into the device from which to start discarding.  The provided value
+will be aligned to the device sector size.  The default value is zero.
+.TP
+.BR \-l , " \-\-length \fIlength"
+The number of bytes to discard (counting from the starting point).  The provided value
+will be aligned to the device sector size.  If the specified value extends past
+the end of the device,
+.B blkdiscard
+will stop at the device size boundary.  The default value extends to the end
+of the device.
+.TP
+.BR \-p , " \-\-step \fIlength"
+The number of bytes to discard within one iteration. The default is to discard
+all by one ioctl call.
+.TP
+.BR \-s , " \-\-secure"
+Perform a secure discard.  A secure discard is the same as a regular discard
+except that all copies of the discarded blocks that were possibly created by
+garbage collection must also be erased.  This requires support from the device.
+.TP
+.BR \-z , " \-\-zeroout"
+Zero-fill rather than discard.
+.TP
+.BR \-v , " \-\-verbose"
+Display the aligned values of
+.I offset
+and
+.IR length .
+If the \fB\-\-step\fR option is specified, it prints the discard progress every second.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH AUTHOR
+.MT lczerner@redhat.com
+Lukas Czerner
+.ME
+.SH SEE ALSO
+.BR fstrim (8)
+.SH AVAILABILITY
+The blkdiscard command is part of the util-linux package and is available
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/blkdiscard.c b/sys-utils/blkdiscard.c
new file mode 100644
index 0000000..c19b67b
--- /dev/null
+++ b/sys-utils/blkdiscard.c
@@ -0,0 +1,254 @@
+/*
+ * blkdiscard.c -- discard the part (or whole) of the block device.
+ *
+ * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
+ * Written by Lukas Czerner <lczerner@redhat.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This program uses BLKDISCARD ioctl to discard part or the whole block
+ * device if the device supports it. You can specify range (start and
+ * length) to be discarded, or simply discard the whole device.
+ */
+
+
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <getopt.h>
+#include <time.h>
+
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <linux/fs.h>
+
+#include "nls.h"
+#include "strutils.h"
+#include "c.h"
+#include "closestream.h"
+#include "monotonic.h"
+
+#ifndef BLKDISCARD
+# define BLKDISCARD	_IO(0x12,119)
+#endif
+
+#ifndef BLKSECDISCARD
+# define BLKSECDISCARD	_IO(0x12,125)
+#endif
+
+#ifndef BLKZEROOUT
+# define BLKZEROOUT	_IO(0x12,127)
+#endif
+
+enum {
+	ACT_DISCARD = 0,	/* default */
+	ACT_ZEROOUT,
+	ACT_SECURE
+};
+
+static void print_stats(int act, char *path, uint64_t stats[])
+{
+	switch (act) {
+	case ACT_ZEROOUT:
+		printf(_("%s: Zero-filled %" PRIu64 " bytes from the offset %" PRIu64"\n"), \
+			path, stats[1], stats[0]);
+		break;
+	case ACT_SECURE:
+	case ACT_DISCARD:
+		printf(_("%s: Discarded %" PRIu64 " bytes from the offset %" PRIu64"\n"), \
+			path, stats[1], stats[0]);
+		break;
+	}
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	fputs(USAGE_HEADER, out);
+	fprintf(out,
+	      _(" %s [options] <device>\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Discard the content of sectors on a device.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -o, --offset <num>  offset in bytes to discard from\n"), out);
+	fputs(_(" -l, --length <num>  length of bytes to discard from the offset\n"), out);
+	fputs(_(" -p, --step <num>    size of the discard iterations within the offset\n"), out);
+	fputs(_(" -s, --secure        perform secure discard\n"), out);
+	fputs(_(" -z, --zeroout       zero-fill rather than discard\n"), out);
+	fputs(_(" -v, --verbose       print aligned length and offset\n"), out);
+
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(21));
+
+	printf(USAGE_MAN_TAIL("blkdiscard(8)"));
+	exit(EXIT_SUCCESS);
+}
+
+
+int main(int argc, char **argv)
+{
+	char *path;
+	int c, fd, verbose = 0, secsize;
+	uint64_t end, blksize, step, range[2], stats[2];
+	struct stat sb;
+	struct timeval now, last;
+	int act = ACT_DISCARD;
+
+	static const struct option longopts[] = {
+	    { "help",      no_argument,       NULL, 'h' },
+	    { "version",   no_argument,       NULL, 'V' },
+	    { "offset",    required_argument, NULL, 'o' },
+	    { "length",    required_argument, NULL, 'l' },
+	    { "step",      required_argument, NULL, 'p' },
+	    { "secure",    no_argument,       NULL, 's' },
+	    { "verbose",   no_argument,       NULL, 'v' },
+	    { "zeroout",   no_argument,       NULL, 'z' },
+	    { NULL, 0, NULL, 0 }
+	};
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	range[0] = 0;
+	range[1] = ULLONG_MAX;
+	step = 0;
+
+	while ((c = getopt_long(argc, argv, "hVsvo:l:p:z", longopts, NULL)) != -1) {
+		switch(c) {
+		case 'h':
+			usage();
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		case 'l':
+			range[1] = strtosize_or_err(optarg,
+					_("failed to parse length"));
+			break;
+		case 'o':
+			range[0] = strtosize_or_err(optarg,
+					_("failed to parse offset"));
+			break;
+		case 'p':
+			step = strtosize_or_err(optarg,
+					_("failed to parse step"));
+			break;
+		case 's':
+			act = ACT_SECURE;
+			break;
+		case 'v':
+			verbose = 1;
+			break;
+		case 'z':
+			act = ACT_ZEROOUT;
+			break;
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if (optind == argc)
+		errx(EXIT_FAILURE, _("no device specified"));
+
+	path = argv[optind++];
+
+	if (optind != argc) {
+		warnx(_("unexpected number of arguments"));
+		errtryhelp(EXIT_FAILURE);
+	}
+
+	fd = open(path, O_WRONLY);
+	if (fd < 0)
+		err(EXIT_FAILURE, _("cannot open %s"), path);
+
+	if (fstat(fd, &sb) == -1)
+		err(EXIT_FAILURE, _("stat of %s failed"), path);
+	if (!S_ISBLK(sb.st_mode))
+		errx(EXIT_FAILURE, _("%s: not a block device"), path);
+
+	if (ioctl(fd, BLKGETSIZE64, &blksize))
+		err(EXIT_FAILURE, _("%s: BLKGETSIZE64 ioctl failed"), path);
+	if (ioctl(fd, BLKSSZGET, &secsize))
+		err(EXIT_FAILURE, _("%s: BLKSSZGET ioctl failed"), path);
+
+	/* check offset alignment to the sector size */
+	if (range[0] % secsize)
+		errx(EXIT_FAILURE, _("%s: offset %" PRIu64 " is not aligned "
+			 "to sector size %i"), path, range[0], secsize);
+
+	/* is the range end behind the end of the device ?*/
+	if (range[0] > blksize)
+		errx(EXIT_FAILURE, _("%s: offset is greater than device size"), path);
+	end = range[0] + range[1];
+	if (end < range[0] || end > blksize)
+		end = blksize;
+
+	range[1] = (step > 0) ? step : end - range[0];
+
+	/* check length alignment to the sector size */
+	if (range[1] % secsize)
+		errx(EXIT_FAILURE, _("%s: length %" PRIu64 " is not aligned "
+			 "to sector size %i"), path, range[1], secsize);
+
+	stats[0] = range[0], stats[1] = 0;
+	gettime_monotonic(&last);
+
+	for (/* nothing */; range[0] < end; range[0] += range[1]) {
+		if (range[0] + range[1] > end)
+			range[1] = end - range[0];
+
+		switch (act) {
+		case ACT_ZEROOUT:
+			if (ioctl(fd, BLKZEROOUT, &range))
+				 err(EXIT_FAILURE, _("%s: BLKZEROOUT ioctl failed"), path);
+			break;
+		case ACT_SECURE:
+			if (ioctl(fd, BLKSECDISCARD, &range))
+				err(EXIT_FAILURE, _("%s: BLKSECDISCARD ioctl failed"), path);
+			break;
+		case ACT_DISCARD:
+			if (ioctl(fd, BLKDISCARD, &range))
+				err(EXIT_FAILURE, _("%s: BLKDISCARD ioctl failed"), path);
+			break;
+		}
+
+		stats[1] += range[1];
+
+		/* reporting progress at most once per second */
+		if (verbose && step) {
+			gettime_monotonic(&now);
+			if (now.tv_sec > last.tv_sec &&
+			    (now.tv_usec >= last.tv_usec || now.tv_sec > last.tv_sec + 1)) {
+				print_stats(act, path, stats);
+				stats[0] += stats[1], stats[1] = 0;
+				last = now;
+			}
+		}
+	}
+
+	if (verbose && stats[1])
+		print_stats(act, path, stats);
+
+	close(fd);
+	return EXIT_SUCCESS;
+}
diff --git a/sys-utils/blkzone.8 b/sys-utils/blkzone.8
new file mode 100644
index 0000000..bf7f15f
--- /dev/null
+++ b/sys-utils/blkzone.8
@@ -0,0 +1,109 @@
+.TH BLKZONE 8 "February 2017" "util-linux" "System Administration"
+.SH NAME
+blkzone \- run zone command on a device
+.SH SYNOPSIS
+.B blkzone
+.I command
+[options]
+.I device
+.SH DESCRIPTION
+.B blkzone
+is used to run zone command on device that support the Zoned Block Commands
+(ZBC) or Zoned-device ATA Commands (ZAC). The zones to operate on can be
+specified using the offset, count and length options.
+.PP
+The
+.I device
+argument is the pathname of the block device.
+.SH COMMANDS
+.SS report
+The command \fBblkzone report\fP is used to report device zone information.
+.PP
+By default, the command will report all zones from the start of the
+block device. Options may be used to modify this behavior, changing the
+starting zone or the size of the report, as explained below.
+
+.B Report output
+.TS
+tab(:);
+l l.
+start:Zone start sector
+len:Zone length in number of sectors
+wptr:Zone write pointer position
+reset:Reset write pointer recommended
+non-seq:Non-sequential write resources active
+cond:Zone condition
+type:Zone type
+.TE
+
+.B Zone conditions
+.TS
+tab(:);
+l l.
+cl:Closed
+nw:Not write pointer
+em:Empty
+fu:Full
+oe:Explicitly opened
+oi:Implicitly opened
+ol:Offline
+ro:Read only
+x?:Reserved conditions (should not be reported)
+.TE
+
+.SS reset
+The command \fBblkzone reset\fP is used to reset one or more zones. Unlike
+.BR sg_reset_wp (8),
+this command operates from the block layer and can reset a range of zones.
+.PP
+By default, the command will operate from the zone at device 
+sector 0 and reset all zones. Options may be used to modify this behavior
+as well as specify the operation to be performed on the zone, as explained below.
+
+.SH OPTIONS
+The
+.I offset
+and
+.I length
+option arguments may be followed by the multiplicative suffixes KiB (=1024),
+MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB" is
+optional, e.g., "K" has the same meaning as "KiB") or the suffixes
+KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB and YB.
+Additionally, the 0x prefix can be used to specify \fIoffset\fR and
+\fIlength\fR in hex.
+.TP
+.BR \-o , " \-\-offset "\fIsector\fP
+The starting zone specified as a sector offset. The provided offset in sector
+units (512 bytes) should match the start of a zone. The default value is zero.
+.TP
+.BR \-l , " \-\-length "\fIsectors\fP
+The maximum number of sectors the command should operate on. The default value
+is the number of sectors remaining after \fIoffset\fR. This option cannot be
+used together with the option \fB\-\-count\fP.
+.TP
+.BR \-c , " \-\-count "\fIcount\fP
+The maximum number of zones the command should operate on. The default value
+is the number of zones starting from \fIoffset\fR. This option cannot be
+used together with the option \fB\-\-length\fP.
+.TP
+.BR \-v , " \-\-verbose"
+Display the number of zones returned in the report or the range of sectors
+reset..
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH AUTHORS
+.nf
+Shaun Tancheff <shaun@tancheff.com>
+Karel Zak <kzak@redhat.com>
+.fi
+.SH SEE ALSO
+.BR sg_rep_zones (8)
+.SH AVAILABILITY
+The blkzone command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/blkzone.c b/sys-utils/blkzone.c
new file mode 100644
index 0000000..1dcbdf5
--- /dev/null
+++ b/sys-utils/blkzone.c
@@ -0,0 +1,416 @@
+/*
+ * blkzone.c -- the block device zone commands
+ *
+ * Copyright (C) 2015,2016 Seagate Technology PLC
+ * Written by Shaun Tancheff <shaun.tancheff@seagate.com>
+ *
+ * Copyright (C) 2017 Karel Zak <kzak@redhat.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <getopt.h>
+#include <time.h>
+
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <linux/fs.h>
+#include <linux/blkzoned.h>
+
+#include "nls.h"
+#include "strutils.h"
+#include "xalloc.h"
+#include "c.h"
+#include "closestream.h"
+#include "blkdev.h"
+#include "sysfs.h"
+#include "optutils.h"
+
+struct blkzone_control;
+
+static int blkzone_report(struct blkzone_control *ctl);
+static int blkzone_reset(struct blkzone_control *ctl);
+
+struct blkzone_command {
+	const char *name;
+	int (*handler)(struct blkzone_control *);
+	const char *help;
+};
+
+struct blkzone_control {
+	const char *devname;
+	const struct blkzone_command *command;
+
+	uint64_t total_sectors;
+	int secsize;
+
+	uint64_t offset;
+	uint64_t length;
+	uint32_t count;
+
+	unsigned int verbose : 1;
+};
+
+static const struct blkzone_command commands[] = {
+	{ "report",	blkzone_report, N_("Report zone information about the given device") },
+	{ "reset",	blkzone_reset,  N_("Reset a range of zones.") }
+};
+
+static const struct blkzone_command *name_to_command(const char *name)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(commands); i++) {
+		if (strcmp(commands[i].name, name) == 0)
+			return &commands[i];
+	}
+
+	return NULL;
+}
+
+static int init_device(struct blkzone_control *ctl, int mode)
+{
+	struct stat sb;
+	int fd;
+
+	fd = open(ctl->devname, mode);
+	if (fd < 0)
+		err(EXIT_FAILURE, _("cannot open %s"), ctl->devname);
+
+	if (fstat(fd, &sb) == -1)
+		err(EXIT_FAILURE, _("stat of %s failed"), ctl->devname);
+	if (!S_ISBLK(sb.st_mode))
+		errx(EXIT_FAILURE, _("%s: not a block device"), ctl->devname);
+
+	if (blkdev_get_sectors(fd, (unsigned long long *) &ctl->total_sectors))
+		err(EXIT_FAILURE, _("%s: blkdev_get_sectors ioctl failed"), ctl->devname);
+
+	if (blkdev_get_sector_size(fd, &ctl->secsize))
+		err(EXIT_FAILURE, _("%s: BLKSSZGET ioctl failed"), ctl->devname);
+
+	return fd;
+}
+
+/*
+ * Get the device zone size indicated by chunk sectors).
+ */
+static unsigned long blkdev_chunk_sectors(const char *dname)
+{
+	struct path_cxt *pc = NULL;
+	dev_t devno = sysfs_devname_to_devno(dname);
+	dev_t disk;
+	uint64_t sz = 0;
+	int rc;
+
+	/*
+	 * Mapping /dev/sdXn -> /sys/block/sdX to read the chunk_size entry.
+	 * This method masks off the partition specified by the minor device
+	 * component.
+	 */
+	pc = ul_new_sysfs_path(devno, NULL, NULL);
+	if (!pc)
+		return 0;
+
+	rc = sysfs_blkdev_get_wholedisk(pc, NULL, 0, &disk);
+	if (rc != 0)
+		goto done;
+
+	/* if @pc is not while-disk device, switch to disk */
+	if (devno != disk) {
+		rc = sysfs_blkdev_init_path(pc, disk, NULL);
+		if (rc != 0)
+			goto done;
+	}
+
+	rc = ul_path_read_u64(pc, &sz, "queue/chunk_sectors");
+done:
+	ul_unref_path(pc);
+	return rc == 0 ? sz : 0;
+}
+
+/*
+ * blkzone report
+ */
+#define DEF_REPORT_LEN		(1U << 12) /* 4k zones per report (256k kzalloc) */
+
+static const char *type_text[] = {
+	"RESERVED",
+	"CONVENTIONAL",
+	"SEQ_WRITE_REQUIRED",
+	"SEQ_WRITE_PREFERRED",
+};
+
+static const char *condition_str[] = {
+	"nw", /* Not write pointer */
+	"em", /* Empty */
+	"oi", /* Implicitly opened */
+	"oe", /* Explicitly opened */
+	"cl", /* Closed */
+	"x5", "x6", "x7", "x8", "x9", "xA", "xB", "xC", /* xN: reserved */
+	"ro", /* Read only */
+	"fu", /* Full */
+	"of"  /* Offline */
+};
+
+static int blkzone_report(struct blkzone_control *ctl)
+{
+	struct blk_zone_report *zi;
+	unsigned long zonesize;
+	uint32_t i, nr_zones;
+	int fd;
+
+	fd = init_device(ctl, O_RDONLY);
+
+	if (ctl->offset >= ctl->total_sectors)
+		errx(EXIT_FAILURE,
+		     _("%s: offset is greater than or equal to device size"), ctl->devname);
+
+	zonesize = blkdev_chunk_sectors(ctl->devname);
+	if (!zonesize)
+		errx(EXIT_FAILURE, _("%s: unable to determine zone size"), ctl->devname);
+
+	if (ctl->count)
+		nr_zones = ctl->count;
+	else if (ctl->length)
+		nr_zones = (ctl->length + zonesize - 1) / zonesize;
+	else
+		nr_zones = 1 + (ctl->total_sectors - ctl->offset) / zonesize;
+
+	zi = xmalloc(sizeof(struct blk_zone_report) +
+		     (DEF_REPORT_LEN * sizeof(struct blk_zone)));
+
+	while (nr_zones && ctl->offset < ctl->total_sectors) {
+
+		zi->nr_zones = min(nr_zones, DEF_REPORT_LEN);
+		zi->sector = ctl->offset;
+
+		if (ioctl(fd, BLKREPORTZONE, zi) == -1)
+			err(EXIT_FAILURE, _("%s: BLKREPORTZONE ioctl failed"), ctl->devname);
+
+		if (ctl->verbose)
+			printf(_("Found %d zones from 0x%"PRIx64"\n"),
+				zi->nr_zones, ctl->offset);
+
+		if (!zi->nr_zones) {
+			nr_zones = 0;
+			break;
+		}
+
+		for (i = 0; i < zi->nr_zones; i++) {
+			const struct blk_zone *entry = &zi->zones[i];
+			unsigned int type = entry->type;
+			uint64_t start = entry->start;
+			uint64_t wp = entry->wp;
+			uint8_t cond = entry->cond;
+			uint64_t len = entry->len;
+
+			if (!len) {
+				nr_zones = 0;
+				break;
+			}
+
+			printf(_("  start: 0x%09"PRIx64", len 0x%06"PRIx64", wptr 0x%06"PRIx64
+			 	" reset:%u non-seq:%u, zcond:%2u(%s) [type: %u(%s)]\n"),
+				start, len, (type == 0x1) ? 0 : wp - start,
+				entry->reset, entry->non_seq,
+				cond, condition_str[cond & (ARRAY_SIZE(condition_str) - 1)],
+				type, type_text[type]);
+
+			nr_zones--;
+			ctl->offset = start + len;
+
+		}
+
+	}
+
+	free(zi);
+	close(fd);
+
+	return 0;
+}
+
+/*
+ * blkzone reset
+ */
+static int blkzone_reset(struct blkzone_control *ctl)
+{
+	struct blk_zone_range za = { .sector = 0 };
+	unsigned long zonesize;
+	uint64_t zlen;
+	int fd;
+
+	zonesize = blkdev_chunk_sectors(ctl->devname);
+	if (!zonesize)
+		errx(EXIT_FAILURE, _("%s: unable to determine zone size"), ctl->devname);
+
+	fd = init_device(ctl, O_WRONLY);
+
+	if (ctl->offset & (zonesize - 1))
+		errx(EXIT_FAILURE, _("%s: offset %" PRIu64 " is not aligned "
+			"to zone size %lu"),
+			ctl->devname, ctl->offset, zonesize);
+
+	if (ctl->offset > ctl->total_sectors)
+		errx(EXIT_FAILURE, _("%s: offset is greater than device size"), ctl->devname);
+
+	if (ctl->count)
+		zlen = ctl->count * zonesize;
+	else if (ctl->length)
+		zlen = ctl->length;
+	else
+		zlen = ctl->total_sectors;
+	if (ctl->offset + zlen > ctl->total_sectors)
+		zlen = ctl->total_sectors - ctl->offset;
+
+	if (ctl->length &&
+	    (zlen & (zonesize - 1)) &&
+	    ctl->offset + zlen != ctl->total_sectors)
+		errx(EXIT_FAILURE, _("%s: number of sectors %" PRIu64 " is not aligned "
+			"to zone size %lu"),
+			ctl->devname, ctl->length, zonesize);
+
+	za.sector = ctl->offset;
+	za.nr_sectors = zlen;
+
+	if (ioctl(fd, BLKRESETZONE, &za) == -1)
+		err(EXIT_FAILURE, _("%s: BLKRESETZONE ioctl failed"), ctl->devname);
+	else if (ctl->verbose)
+		printf(_("%s: successfully reset in range from %" PRIu64 ", to %" PRIu64),
+			ctl->devname,
+			ctl->offset,
+			ctl->offset + zlen);
+	close(fd);
+	return 0;
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	size_t i;
+
+	fputs(USAGE_HEADER, out);
+	fprintf(out, _(" %s <command> [options] <device>\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Run zone command on the given block device.\n"), out);
+
+	fputs(USAGE_COMMANDS, out);
+	for (i = 0; i < ARRAY_SIZE(commands); i++)
+		fprintf(out, " %-11s  %s\n", commands[i].name, _(commands[i].help));
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -o, --offset <sector>  start sector of zone to act (in 512-byte sectors)\n"), out);
+	fputs(_(" -l, --length <sectors> maximum sectors to act (in 512-byte sectors)\n"), out);
+	fputs(_(" -c, --count <number>   maximum number of zones\n"), out);
+	fputs(_(" -v, --verbose          display more details\n"), out);
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(24));
+
+	printf(USAGE_MAN_TAIL("blkzone(8)"));
+	exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+	int c;
+	struct blkzone_control ctl = {
+		.devname = NULL,
+		.offset = 0,
+		.count = 0,
+		.length = 0
+	};
+
+	static const struct option longopts[] = {
+	    { "help",    no_argument,       NULL, 'h' },
+	    { "count",   required_argument, NULL, 'c' }, /* max #of zones to operate on */
+	    { "length",  required_argument, NULL, 'l' }, /* max of sectors to operate on */
+	    { "offset",  required_argument, NULL, 'o' }, /* starting LBA */
+	    { "verbose", no_argument,       NULL, 'v' },
+	    { "version", no_argument,       NULL, 'V' },
+	    { NULL, 0, NULL, 0 }
+	};
+	static const ul_excl_t excl[] = {       /* rows and cols in ASCII order */
+		{ 'c', 'l' },
+		{ 0 }
+	};
+	int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	if (argc >= 2 && *argv[1] != '-') {
+		ctl.command = name_to_command(argv[1]);
+		if (!ctl.command)
+			errx(EXIT_FAILURE, _("%s is not valid command name"), argv[1]);
+		argv++;
+		argc--;
+	}
+
+	while ((c = getopt_long(argc, argv, "hc:l:o:vV", longopts, NULL)) != -1) {
+
+		err_exclusive_options(c, longopts, excl, excl_st);
+
+		switch (c) {
+		case 'h':
+			usage();
+			break;
+		case 'c':
+			ctl.count = strtou32_or_err(optarg,
+					_("failed to parse number of zones"));
+			break;
+		case 'l':
+			ctl.length = strtosize_or_err(optarg,
+					_("failed to parse number of sectors"));
+			break;
+		case 'o':
+			ctl.offset = strtosize_or_err(optarg,
+					_("failed to parse zone offset"));
+			break;
+		case 'v':
+			ctl.verbose = 1;
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if (!ctl.command)
+		errx(EXIT_FAILURE, _("no command specified"));
+
+	if (optind == argc)
+		errx(EXIT_FAILURE, _("no device specified"));
+	ctl.devname = argv[optind++];
+
+	if (optind != argc)
+		errx(EXIT_FAILURE,_("unexpected number of arguments"));
+
+	if (ctl.command->handler(&ctl) < 0)
+		return EXIT_FAILURE;
+
+	return EXIT_SUCCESS;
+
+}
diff --git a/sys-utils/chcpu.8 b/sys-utils/chcpu.8
new file mode 100644
index 0000000..2fb7111
--- /dev/null
+++ b/sys-utils/chcpu.8
@@ -0,0 +1,106 @@
+.TH CHCPU 8 "July 2014" "util-linux" "System Administration"
+.SH NAME
+chcpu \- configure CPUs
+.SH SYNOPSIS
+.B chcpu
+.BR \-c | \-d | \-e | \-g
+.I cpu-list
+.br
+.B chcpu \-p
+.I mode
+.br
+.B chcpu
+.BR \-r | \-h | \-V
+.SH DESCRIPTION
+.B chcpu
+can modify the state of CPUs.  It can enable or disable CPUs, scan for new
+CPUs, change the CPU dispatching
+.I mode
+of the underlying hypervisor, and request CPUs from the hypervisor
+(configure) or return CPUs to the hypervisor (deconfigure).
+.PP
+Some options have a
+.I cpu-list
+argument.  Use this argument to specify a comma-separated list of CPUs.  The
+list can contain individual CPU addresses or ranges of addresses.  For
+example,
+.B 0,5,7,9-11
+makes the command applicable to the CPUs with the addresses 0, 5, 7, 9, 10,
+and 11.
+.SH OPTIONS
+.TP
+.BR \-c , " \-\-configure " \fIcpu-list\fP
+Configure the specified CPUs.  Configuring a CPU means that the hypervisor
+takes a CPU from the CPU pool and assigns it to the virtual hardware on which
+your kernel runs.
+.TP
+.BR \-d , " \-\-disable " \fIcpu-list\fP
+Disable the specified CPUs.  Disabling a CPU means that the kernel sets it
+offline.
+.TP
+.BR \-e , " \-\-enable " \fIcpu-list\fP
+Enable the specified CPUs.  Enabling a CPU means that the kernel sets it
+online.  A CPU must be configured, see \fB\-c\fR, before it can be enabled.
+.TP
+.BR \-g , " \-\-deconfigure " \fIcpu-list\fP
+Deconfigure the specified CPUs.  Deconfiguring a CPU means that the
+hypervisor removes the CPU from the virtual hardware on which the Linux
+instance runs and returns it to the CPU pool.  A CPU must be offline, see
+\fB\-d\fR, before it can be deconfigured.
+.TP
+.BR \-p , " \-\-dispatch " \fImode\fP
+Set the CPU dispatching
+.I mode
+(polarization).  This option has an effect only if your hardware architecture
+and hypervisor support CPU polarization.  Available
+.I modes
+are:
+.RS 14
+.TP 12
+.PD 0
+.B horizontal
+The workload is spread across all available CPUs.
+.TP 12
+.B vertical
+The workload is concentrated on few CPUs.
+.RE
+.PD 1
+.TP
+.BR \-r , " \-\-rescan"
+Trigger a rescan of CPUs.  After a rescan, the Linux kernel recognizes
+the new CPUs.  Use this option on systems that do not
+automatically detect newly attached CPUs.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+
+.SH RETURN CODES
+.B chcpu
+has the following return codes:
+.TP
+.B 0
+success
+.TP
+.B 1
+failure
+.TP
+.B 64
+partial success
+.RE
+.SH AUTHOR
+.MT heiko.carstens@de.ibm.com
+Heiko Carstens
+.ME
+.SH COPYRIGHT
+Copyright IBM Corp. 2011
+.br
+.SH "SEE ALSO"
+.BR lscpu (1)
+.SH AVAILABILITY
+The chcpu command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/chcpu.c b/sys-utils/chcpu.c
new file mode 100644
index 0000000..36c47af
--- /dev/null
+++ b/sys-utils/chcpu.c
@@ -0,0 +1,389 @@
+/*
+ * chcpu - CPU configuration tool
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "cpuset.h"
+#include "nls.h"
+#include "xalloc.h"
+#include "c.h"
+#include "strutils.h"
+#include "bitops.h"
+#include "path.h"
+#include "closestream.h"
+#include "optutils.h"
+
+#define EXCL_ERROR "--{configure,deconfigure,disable,dispatch,enable}"
+
+/* partial success, otherwise we return regular EXIT_{SUCCESS,FAILURE} */
+#define CHCPU_EXIT_SOMEOK	64
+
+#define _PATH_SYS_CPU		"/sys/devices/system/cpu"
+
+static cpu_set_t *onlinecpus;
+static int maxcpus;
+
+#define is_cpu_online(cpu) (CPU_ISSET_S((cpu), CPU_ALLOC_SIZE(maxcpus), onlinecpus))
+#define num_online_cpus()  (CPU_COUNT_S(CPU_ALLOC_SIZE(maxcpus), onlinecpus))
+
+enum {
+	CMD_CPU_ENABLE	= 0,
+	CMD_CPU_DISABLE,
+	CMD_CPU_CONFIGURE,
+	CMD_CPU_DECONFIGURE,
+	CMD_CPU_RESCAN,
+	CMD_CPU_DISPATCH_HORIZONTAL,
+	CMD_CPU_DISPATCH_VERTICAL,
+};
+
+/* returns:   0 = success
+ *          < 0 = failure
+ *          > 0 = partial success
+ */
+static int cpu_enable(struct path_cxt *sys, cpu_set_t *cpu_set, size_t setsize, int enable)
+{
+	int cpu;
+	int online, rc;
+	int configured = -1;
+	int fails = 0;
+
+	for (cpu = 0; cpu < maxcpus; cpu++) {
+		if (!CPU_ISSET_S(cpu, setsize, cpu_set))
+			continue;
+		if (ul_path_accessf(sys, F_OK, "cpu%d", cpu) != 0) {
+			warnx(_("CPU %u does not exist"), cpu);
+			fails++;
+			continue;
+		}
+		if (ul_path_accessf(sys, F_OK, "cpu%d/online", cpu) != 0) {
+			warnx(_("CPU %u is not hot pluggable"), cpu);
+			fails++;
+			continue;
+		}
+		if (ul_path_readf_s32(sys, &online, "cpu%d/online", cpu) == 0
+		    && online == 1
+		    && enable == 1) {
+			printf(_("CPU %u is already enabled\n"), cpu);
+			continue;
+		}
+		if (online == 0 && enable == 0) {
+			printf(_("CPU %u is already disabled\n"), cpu);
+			continue;
+		}
+		if (ul_path_accessf(sys, F_OK, "cpu%d/configure", cpu) == 0)
+			ul_path_readf_s32(sys, &configured, "cpu%d/configure", cpu);
+		if (enable) {
+			rc = ul_path_writef_string(sys, "1", "cpu%d/online", cpu);
+			if (rc != 0 && configured == 0) {
+				warn(_("CPU %u enable failed (CPU is deconfigured)"), cpu);
+				fails++;
+			} else if (rc != 0) {
+				warn(_("CPU %u enable failed"), cpu);
+				fails++;
+			} else
+				printf(_("CPU %u enabled\n"), cpu);
+		} else {
+			if (onlinecpus && num_online_cpus() == 1) {
+				warnx(_("CPU %u disable failed (last enabled CPU)"), cpu);
+				fails++;
+				continue;
+			}
+			rc = ul_path_writef_string(sys, "0", "cpu%d/online", cpu);
+			if (rc != 0) {
+				warn(_("CPU %u disable failed"), cpu);
+				fails++;
+			} else {
+				printf(_("CPU %u disabled\n"), cpu);
+				if (onlinecpus)
+					CPU_CLR_S(cpu, setsize, onlinecpus);
+			}
+		}
+	}
+
+	return fails == 0 ? 0 : fails == maxcpus ? -1 : 1;
+}
+
+static int cpu_rescan(struct path_cxt *sys)
+{
+	if (ul_path_access(sys, F_OK, "rescan") != 0)
+		errx(EXIT_FAILURE, _("This system does not support rescanning of CPUs"));
+
+	if (ul_path_write_string(sys, "1", "rescan") != 0)
+		err(EXIT_FAILURE, _("Failed to trigger rescan of CPUs"));
+
+	printf(_("Triggered rescan of CPUs\n"));
+	return 0;
+}
+
+static int cpu_set_dispatch(struct path_cxt *sys, int mode)
+{
+	if (ul_path_access(sys, F_OK, "dispatching") != 0)
+		errx(EXIT_FAILURE, _("This system does not support setting "
+				     "the dispatching mode of CPUs"));
+	if (mode == 0) {
+		if (ul_path_write_string(sys, "0", "dispatching") != 0)
+			err(EXIT_FAILURE, _("Failed to set horizontal dispatch mode"));
+
+		printf(_("Successfully set horizontal dispatching mode\n"));
+	} else {
+		if (ul_path_write_string(sys, "1", "dispatching") != 0)
+			err(EXIT_FAILURE, _("Failed to set vertical dispatch mode"));
+
+		printf(_("Successfully set vertical dispatching mode\n"));
+	}
+	return 0;
+}
+
+/* returns:   0 = success
+ *          < 0 = failure
+ *          > 0 = partial success
+ */
+static int cpu_configure(struct path_cxt *sys, cpu_set_t *cpu_set, size_t setsize, int configure)
+{
+	int cpu;
+	int rc, current;
+	int fails = 0;
+
+	for (cpu = 0; cpu < maxcpus; cpu++) {
+		if (!CPU_ISSET_S(cpu, setsize, cpu_set))
+			continue;
+		if (ul_path_accessf(sys, F_OK, "cpu%d", cpu) != 0) {
+			warnx(_("CPU %u does not exist"), cpu);
+			fails++;
+			continue;
+		}
+		if (ul_path_accessf(sys, F_OK, "cpu%d/configure", cpu) != 0) {
+			warnx(_("CPU %u is not configurable"), cpu);
+			fails++;
+			continue;
+		}
+		ul_path_readf_s32(sys, &current, "cpu%d/configure", cpu);
+		if (current == 1 && configure == 1) {
+			printf(_("CPU %u is already configured\n"), cpu);
+			continue;
+		}
+		if (current == 0 && configure == 0) {
+			printf(_("CPU %u is already deconfigured\n"), cpu);
+			continue;
+		}
+		if (current == 1 && configure == 0 && onlinecpus &&
+		    is_cpu_online(cpu)) {
+			warnx(_("CPU %u deconfigure failed (CPU is enabled)"), cpu);
+			fails++;
+			continue;
+		}
+		if (configure) {
+			rc = ul_path_writef_string(sys, "1", "cpu%d/configure", cpu);
+			if (rc != 0) {
+				warn(_("CPU %u configure failed"), cpu);
+				fails++;
+			} else
+				printf(_("CPU %u configured\n"), cpu);
+		} else {
+			rc = ul_path_writef_string(sys, "0", "cpu%d/configure", cpu);
+			if (rc != 0) {
+				warn(_("CPU %u deconfigure failed"), cpu);
+				fails++;
+			} else
+				printf(_("CPU %u deconfigured\n"), cpu);
+		}
+	}
+
+	return fails == 0 ? 0 : fails == maxcpus ? -1 : 1;
+}
+
+static void cpu_parse(char *cpu_string, cpu_set_t *cpu_set, size_t setsize)
+{
+	int rc;
+
+	rc = cpulist_parse(cpu_string, cpu_set, setsize, 1);
+	if (rc == 0)
+		return;
+	if (rc == 2)
+		errx(EXIT_FAILURE, _("invalid CPU number in CPU list: %s"), cpu_string);
+	errx(EXIT_FAILURE, _("failed to parse CPU list: %s"), cpu_string);
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	fprintf(out, _(
+		"\nUsage:\n"
+		" %s [options]\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Configure CPUs in a multi-processor system.\n"), out);
+
+	fputs(USAGE_OPTIONS, stdout);
+	fputs(_(
+		" -e, --enable <cpu-list>       enable cpus\n"
+		" -d, --disable <cpu-list>      disable cpus\n"
+		" -c, --configure <cpu-list>    configure cpus\n"
+		" -g, --deconfigure <cpu-list>  deconfigure cpus\n"
+		" -p, --dispatch <mode>         set dispatching mode\n"
+		" -r, --rescan                  trigger rescan of cpus\n"
+		), stdout);
+	printf(USAGE_HELP_OPTIONS(31));
+
+	printf(USAGE_MAN_TAIL("chcpu(8)"));
+	exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char *argv[])
+{
+	struct path_cxt *sys = NULL;	/* _PATH_SYS_CPU handler */
+	cpu_set_t *cpu_set;
+	size_t setsize;
+	int cmd = -1;
+	int c, rc;
+
+	static const struct option longopts[] = {
+		{ "configure",	required_argument, NULL, 'c' },
+		{ "deconfigure",required_argument, NULL, 'g' },
+		{ "disable",	required_argument, NULL, 'd' },
+		{ "dispatch",	required_argument, NULL, 'p' },
+		{ "enable",	required_argument, NULL, 'e' },
+		{ "help",	no_argument,       NULL, 'h' },
+		{ "rescan",	no_argument,       NULL, 'r' },
+		{ "version",	no_argument,       NULL, 'V' },
+		{ NULL,		0, NULL, 0 }
+	};
+
+	static const ul_excl_t excl[] = {       /* rows and cols in ASCII order */
+		{ 'c','d','e','g','p' },
+		{ 0 }
+	};
+	int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	ul_path_init_debug();
+	sys = ul_new_path(_PATH_SYS_CPU);
+	if (!sys)
+		err(EXIT_FAILURE, _("failed to initialize sysfs handler"));
+
+	maxcpus = get_max_number_of_cpus();
+	if (maxcpus < 1)
+		errx(EXIT_FAILURE, _("cannot determine NR_CPUS; aborting"));
+
+	if (ul_path_access(sys, F_OK, "online") == 0)
+		ul_path_readf_cpulist(sys, &cpu_set, maxcpus, "online");
+
+	setsize = CPU_ALLOC_SIZE(maxcpus);
+	cpu_set = CPU_ALLOC(maxcpus);
+	if (!cpu_set)
+		err(EXIT_FAILURE, _("cpuset_alloc failed"));
+
+	while ((c = getopt_long(argc, argv, "c:d:e:g:hp:rV", longopts, NULL)) != -1) {
+
+		err_exclusive_options(c, longopts, excl, excl_st);
+
+		switch (c) {
+		case 'c':
+			cmd = CMD_CPU_CONFIGURE;
+			cpu_parse(argv[optind - 1], cpu_set, setsize);
+			break;
+		case 'd':
+			cmd = CMD_CPU_DISABLE;
+			cpu_parse(argv[optind - 1], cpu_set, setsize);
+			break;
+		case 'e':
+			cmd = CMD_CPU_ENABLE;
+			cpu_parse(argv[optind - 1], cpu_set, setsize);
+			break;
+		case 'g':
+			cmd = CMD_CPU_DECONFIGURE;
+			cpu_parse(argv[optind - 1], cpu_set, setsize);
+			break;
+		case 'h':
+			usage();
+		case 'p':
+			if (strcmp("horizontal", argv[optind - 1]) == 0)
+				cmd = CMD_CPU_DISPATCH_HORIZONTAL;
+			else if (strcmp("vertical", argv[optind - 1]) == 0)
+				cmd = CMD_CPU_DISPATCH_VERTICAL;
+			else
+				errx(EXIT_FAILURE, _("unsupported argument: %s"),
+				     argv[optind -1 ]);
+			break;
+		case 'r':
+			cmd = CMD_CPU_RESCAN;
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if ((argc == 1) || (argc != optind)) {
+		warnx(_("bad usage"));
+		errtryhelp(EXIT_FAILURE);
+	}
+
+	switch (cmd) {
+	case CMD_CPU_ENABLE:
+		rc = cpu_enable(sys, cpu_set, maxcpus, 1);
+		break;
+	case CMD_CPU_DISABLE:
+		rc = cpu_enable(sys, cpu_set, maxcpus, 0);
+		break;
+	case CMD_CPU_CONFIGURE:
+		rc = cpu_configure(sys, cpu_set, maxcpus, 1);
+		break;
+	case CMD_CPU_DECONFIGURE:
+		rc = cpu_configure(sys, cpu_set, maxcpus, 0);
+		break;
+	case CMD_CPU_RESCAN:
+		rc = cpu_rescan(sys);
+		break;
+	case CMD_CPU_DISPATCH_HORIZONTAL:
+		rc = cpu_set_dispatch(sys, 0);
+		break;
+	case CMD_CPU_DISPATCH_VERTICAL:
+		rc = cpu_set_dispatch(sys, 1);
+		break;
+	default:
+		rc = -EINVAL;
+		break;
+	}
+
+	ul_unref_path(sys);
+
+	return rc == 0 ? EXIT_SUCCESS :
+	        rc < 0 ? EXIT_FAILURE : CHCPU_EXIT_SOMEOK;
+}
diff --git a/sys-utils/chmem.8 b/sys-utils/chmem.8
new file mode 100644
index 0000000..8a3b34d
--- /dev/null
+++ b/sys-utils/chmem.8
@@ -0,0 +1,114 @@
+.TH CHMEM 8 "October 2016" "util-linux" "System Administration"
+.SH NAME
+chmem \- configure memory
+.SH SYNOPSIS
+.B chmem
+.RB [ \-h "] [" \-V "] [" \-v "] [" \-e | \-d "]"
+[\fISIZE\fP|\fIRANGE\fP|\fB\-b\fP \fIBLOCKRANGE\fP]
+[-z ZONE]
+.SH DESCRIPTION
+The chmem command sets a particular size or range of memory online or offline.
+.
+.IP "\(hy" 2
+Specify \fISIZE\fP as <size>[m|M|g|G]. With m or M, <size> specifies the memory
+size in MiB (1024 x 1024 bytes). With g or G, <size> specifies the memory size
+in GiB (1024 x 1024 x 1024 bytes). The default unit is MiB.
+.
+.IP "\(hy" 2
+Specify \fIRANGE\fP in the form 0x<start>-0x<end> as shown in the output of the
+\fBlsmem\fP command. <start> is the hexadecimal address of the first byte and <end>
+is the hexadecimal address of the last byte in the memory range.
+.
+.IP "\(hy" 2
+Specify \fIBLOCKRANGE\fP in the form <first>-<last> or <block> as shown in the
+output of the \fBlsmem\fP command. <first> is the number of the first memory block
+and <last> is the number of the last memory block in the memory
+range. Alternatively a single block can be specified. \fIBLOCKRANGE\fP requires
+the \fB--blocks\fP option.
+.
+.IP "\(hy" 2
+Specify \fIZONE\fP as the name of a memory zone, as shown in the output of the
+\fBlsmem -o +ZONES\fP command. The output shows one or more valid memory zones
+for each memory range. If multiple zones are shown, then the memory range
+currently belongs to the first zone. By default, chmem will set memory online
+to the zone Movable, if this is among the valid zones. This default can be
+changed by specifying the \fB--zone\fP option with another valid zone.
+For memory ballooning, it is recommended to select the zone Movable for memory
+online and offline, if possible. Memory in this zone is much more likely to be
+able to be offlined again, but it cannot be used for arbitrary kernel
+allocations, only for migratable pages (e.g. anonymous and page cache pages).
+Use the \fB\-\-help\fR option to see all available zones.
+.
+.PP
+\fISIZE\fP and \fIRANGE\fP must be aligned to the Linux memory block size, as
+shown in the output of the \fBlsmem\fP command.
+
+Setting memory online can fail for various reasons. On virtualized systems it
+can fail if the hypervisor does not have enough memory left, for example
+because memory was overcommitted. Setting memory offline can fail if Linux
+cannot free the memory. If only part of the requested memory can be set online
+or offline, a message tells you how much memory was set online or offline
+instead of the requested amount.
+
+When setting memory online \fBchmem\fP starts with the lowest memory block
+numbers. When setting memory offline \fBchmem\fP starts with the highest memory
+block numbers.
+.SH OPTIONS
+.TP
+.BR \-b ", " \-\-blocks
+Use a \fIBLOCKRANGE\fP parameter instead of \fIRANGE\fP or \fISIZE\fP for the
+\fB--enable\fP and \fB--disable\fP options.
+.TP
+.BR \-d ", " \-\-disable
+Set the specified \fIRANGE\fP, \fISIZE\fP, or \fIBLOCKRANGE\fP of memory offline.
+.TP
+.BR \-e ", " \-\-enable
+Set the specified \fIRANGE\fP, \fISIZE\fP, or \fIBLOCKRANGE\fP of memory online.
+.TP
+.BR \-z ", " \-\-zone
+Select the memory \fIZONE\fP where to set the specified \fIRANGE\fP, \fISIZE\fP,
+or \fIBLOCKRANGE\fP of memory online or offline. By default, memory will be set
+online to the zone Movable, if possible.
+.TP
+.BR \-h ", " \-\-help
+Print a short help text, then exit.
+.TP
+.BR \-v ", " \-\-verbose
+Verbose mode. Causes \fBchmem\fP to print debugging messages about it's
+progress.
+.TP
+.BR \-V ", " \-\-version
+Print the version number, then exit.
+.SH RETURN CODES
+.B chmem
+has the following return codes:
+.TP
+.B 0
+success
+.TP
+.B 1
+failure
+.TP
+.B 64
+partial success
+.SH EXAMPLES
+.TP
+.B chmem --enable 1024
+This command requests 1024 MiB of memory to be set online.
+.TP
+.B chmem -e 2g
+This command requests 2 GiB of memory to be set online.
+.TP
+.B chmem --disable 0x00000000e4000000-0x00000000f3ffffff
+This command requests the memory range starting with 0x00000000e4000000
+and ending with 0x00000000f3ffffff to be set offline.
+.TP
+.B chmem -b -d 10
+This command requests the memory block number 10 to be set offline.
+.SH SEE ALSO
+.BR lsmem (1)
+.SH AVAILABILITY
+The \fBchmem\fP command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/chmem.c b/sys-utils/chmem.c
new file mode 100644
index 0000000..861f6cf
--- /dev/null
+++ b/sys-utils/chmem.c
@@ -0,0 +1,453 @@
+/*
+ * chmem - Memory configuration tool
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <assert.h>
+#include <dirent.h>
+
+#include "c.h"
+#include "nls.h"
+#include "path.h"
+#include "strutils.h"
+#include "strv.h"
+#include "optutils.h"
+#include "closestream.h"
+#include "xalloc.h"
+
+/* partial success, otherwise we return regular EXIT_{SUCCESS,FAILURE} */
+#define CHMEM_EXIT_SOMEOK		64
+
+#define _PATH_SYS_MEMORY		"/sys/devices/system/memory"
+
+struct chmem_desc {
+	struct path_cxt	*sysmem;	/* _PATH_SYS_MEMORY handler */
+	struct dirent	**dirs;
+	int		ndirs;
+	uint64_t	block_size;
+	uint64_t	start;
+	uint64_t	end;
+	uint64_t	size;
+	unsigned int	use_blocks : 1;
+	unsigned int	is_size	   : 1;
+	unsigned int	verbose	   : 1;
+	unsigned int	have_zones : 1;
+};
+
+enum {
+	CMD_MEMORY_ENABLE = 0,
+	CMD_MEMORY_DISABLE,
+	CMD_NONE
+};
+
+enum zone_id {
+	ZONE_DMA = 0,
+	ZONE_DMA32,
+	ZONE_NORMAL,
+	ZONE_HIGHMEM,
+	ZONE_MOVABLE,
+	ZONE_DEVICE,
+};
+
+static char *zone_names[] = {
+	[ZONE_DMA]	= "DMA",
+	[ZONE_DMA32]	= "DMA32",
+	[ZONE_NORMAL]	= "Normal",
+	[ZONE_HIGHMEM]	= "Highmem",
+	[ZONE_MOVABLE]	= "Movable",
+	[ZONE_DEVICE]	= "Device",
+};
+
+/*
+ * name must be null-terminated
+ */
+static int zone_name_to_id(const char *name)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(zone_names); i++) {
+		if (!strcasecmp(name, zone_names[i]))
+			return i;
+	}
+	return -1;
+}
+
+static void idxtostr(struct chmem_desc *desc, uint64_t idx, char *buf, size_t bufsz)
+{
+	uint64_t start, end;
+
+	start = idx * desc->block_size;
+	end = start + desc->block_size - 1;
+	snprintf(buf, bufsz,
+		 _("Memory Block %"PRIu64" (0x%016"PRIx64"-0x%016"PRIx64")"),
+		 idx, start, end);
+}
+
+static int chmem_size(struct chmem_desc *desc, int enable, int zone_id)
+{
+	char *name, *onoff, line[BUFSIZ], str[BUFSIZ];
+	uint64_t size, index;
+	const char *zn;
+	int i, rc;
+
+	size = desc->size;
+	onoff = enable ? "online" : "offline";
+	i = enable ? 0 : desc->ndirs - 1;
+
+	if (enable && zone_id >= 0) {
+		if (zone_id == ZONE_MOVABLE)
+			onoff = "online_movable";
+		else
+			onoff = "online_kernel";
+	}
+
+	for (; i >= 0 && i < desc->ndirs && size; i += enable ? 1 : -1) {
+		name = desc->dirs[i]->d_name;
+		index = strtou64_or_err(name + 6, _("Failed to parse index"));
+
+		if (ul_path_readf_buffer(desc->sysmem, line, sizeof(line), "%s/state", name) > 0
+		    && strncmp(onoff, line, 6) == 0)
+			continue;
+
+		if (desc->have_zones) {
+			ul_path_readf_buffer(desc->sysmem, line, sizeof(line), "%s/valid_zones", name);
+			if (zone_id >= 0) {
+				zn = zone_names[zone_id];
+				if (enable && !strcasestr(line, zn))
+					continue;
+				if (!enable && strncasecmp(line, zn, strlen(zn)))
+					continue;
+			} else if (enable) {
+				/* By default, use zone Movable for online, if valid */
+				if (strcasestr(line, zone_names[ZONE_MOVABLE]))
+					onoff = "online_movable";
+				else
+					onoff = "online";
+			}
+		}
+
+		idxtostr(desc, index, str, sizeof(str));
+		rc = ul_path_writef_string(desc->sysmem, onoff, "%s/state", name);
+		if (rc != 0 && desc->verbose) {
+			if (enable)
+				fprintf(stdout, _("%s enable failed\n"), str);
+			else
+				fprintf(stdout, _("%s disable failed\n"), str);
+		} else if (rc == 0 && desc->verbose) {
+			if (enable)
+				fprintf(stdout, _("%s enabled\n"), str);
+			else
+				fprintf(stdout, _("%s disabled\n"), str);
+		}
+		if (rc == 0)
+			size--;
+	}
+	if (size) {
+		uint64_t bytes;
+		char *sizestr;
+
+		bytes = (desc->size - size) * desc->block_size;
+		sizestr = size_to_human_string(SIZE_SUFFIX_1LETTER, bytes);
+		if (enable)
+			warnx(_("Could only enable %s of memory"), sizestr);
+		else
+			warnx(_("Could only disable %s of memory"), sizestr);
+		free(sizestr);
+	}
+	return size == 0 ? 0 : size == desc->size ? -1 : 1;
+}
+
+static int chmem_range(struct chmem_desc *desc, int enable, int zone_id)
+{
+	char *name, *onoff, line[BUFSIZ], str[BUFSIZ];
+	uint64_t index, todo;
+	const char *zn;
+	int i, rc;
+
+	todo = desc->end - desc->start + 1;
+	onoff = enable ? "online" : "offline";
+
+	if (enable && zone_id >= 0) {
+		if (zone_id == ZONE_MOVABLE)
+			onoff = "online_movable";
+		else
+			onoff = "online_kernel";
+	}
+
+	for (i = 0; i < desc->ndirs; i++) {
+		name = desc->dirs[i]->d_name;
+		index = strtou64_or_err(name + 6, _("Failed to parse index"));
+		if (index < desc->start)
+			continue;
+		if (index > desc->end)
+			break;
+		idxtostr(desc, index, str, sizeof(str));
+		if (ul_path_readf_buffer(desc->sysmem, line, sizeof(line), "%s/state", name) > 0
+		    && strncmp(onoff, line, 6) == 0) {
+			if (desc->verbose && enable)
+				fprintf(stdout, _("%s already enabled\n"), str);
+			else if (desc->verbose && !enable)
+				fprintf(stdout, _("%s already disabled\n"), str);
+			todo--;
+			continue;
+		}
+
+		if (desc->have_zones) {
+			ul_path_readf_buffer(desc->sysmem, line, sizeof(line), "%s/valid_zones", name);
+			if (zone_id >= 0) {
+				zn = zone_names[zone_id];
+				if (enable && !strcasestr(line, zn)) {
+					warnx(_("%s enable failed: Zone mismatch"), str);
+					continue;
+				}
+				if (!enable && strncasecmp(line, zn, strlen(zn))) {
+					warnx(_("%s disable failed: Zone mismatch"), str);
+					continue;
+				}
+			} else if (enable) {
+				/* By default, use zone Movable for online, if valid */
+				if (strcasestr(line, zone_names[ZONE_MOVABLE]))
+					onoff = "online_movable";
+				else
+					onoff = "online";
+			}
+		}
+
+		rc = ul_path_writef_string(desc->sysmem, onoff, "%s/state", name);
+		if (rc != 0) {
+			if (enable)
+				warn(_("%s enable failed"), str);
+			else
+				warn(_("%s disable failed"), str);
+		} else if (desc->verbose) {
+			if (enable)
+				fprintf(stdout, _("%s enabled\n"), str);
+			else
+				fprintf(stdout, _("%s disabled\n"), str);
+		}
+		if (rc == 0)
+			todo--;
+	}
+	return todo == 0 ? 0 : todo == desc->end - desc->start + 1 ? -1 : 1;
+}
+
+static int filter(const struct dirent *de)
+{
+	if (strncmp("memory", de->d_name, 6))
+		return 0;
+	return isdigit_string(de->d_name + 6);
+}
+
+static void read_info(struct chmem_desc *desc)
+{
+	char line[128];
+
+	desc->ndirs = scandir(_PATH_SYS_MEMORY, &desc->dirs, filter, versionsort);
+	if (desc->ndirs <= 0)
+		err(EXIT_FAILURE, _("Failed to read %s"), _PATH_SYS_MEMORY);
+	ul_path_read_buffer(desc->sysmem, line, sizeof(line), "block_size_bytes");
+	desc->block_size = strtoumax(line, NULL, 16);
+}
+
+static void parse_single_param(struct chmem_desc *desc, char *str)
+{
+	if (desc->use_blocks) {
+		desc->start = strtou64_or_err(str, _("Failed to parse block number"));
+		desc->end = desc->start;
+		return;
+	}
+	desc->is_size = 1;
+	desc->size = strtosize_or_err(str, _("Failed to parse size"));
+	if (isdigit(str[strlen(str) - 1]))
+		desc->size *= 1024*1024;
+	if (desc->size % desc->block_size) {
+		errx(EXIT_FAILURE, _("Size must be aligned to memory block size (%s)"),
+		     size_to_human_string(SIZE_SUFFIX_1LETTER, desc->block_size));
+	}
+	desc->size /= desc->block_size;
+}
+
+static void parse_range_param(struct chmem_desc *desc, char *start, char *end)
+{
+	if (desc->use_blocks) {
+		desc->start = strtou64_or_err(start, _("Failed to parse start"));
+		desc->end = strtou64_or_err(end, _("Failed to parse end"));
+		return;
+	}
+	if (strlen(start) < 2 || start[1] != 'x')
+		errx(EXIT_FAILURE, _("Invalid start address format: %s"), start);
+	if (strlen(end) < 2 || end[1] != 'x')
+		errx(EXIT_FAILURE, _("Invalid end address format: %s"), end);
+	desc->start = strtox64_or_err(start, _("Failed to parse start address"));
+	desc->end = strtox64_or_err(end, _("Failed to parse end address"));
+	if (desc->start % desc->block_size || (desc->end + 1) % desc->block_size) {
+		errx(EXIT_FAILURE,
+		     _("Start address and (end address + 1) must be aligned to "
+		       "memory block size (%s)"),
+		     size_to_human_string(SIZE_SUFFIX_1LETTER, desc->block_size));
+	}
+	desc->start /= desc->block_size;
+	desc->end /= desc->block_size;
+}
+
+static void parse_parameter(struct chmem_desc *desc, char *param)
+{
+	char **split;
+
+	split = strv_split(param, "-");
+	if (strv_length(split) > 2)
+		errx(EXIT_FAILURE, _("Invalid parameter: %s"), param);
+	if (strv_length(split) == 1)
+		parse_single_param(desc, split[0]);
+	else
+		parse_range_param(desc, split[0], split[1]);
+	strv_free(split);
+	if (desc->start > desc->end)
+		errx(EXIT_FAILURE, _("Invalid range: %s"), param);
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	size_t i;
+
+	fputs(USAGE_HEADER, out);
+	fprintf(out, _(" %s [options] [SIZE|RANGE|BLOCKRANGE]\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Set a particular size or range of memory online or offline.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -e, --enable       enable memory\n"), out);
+	fputs(_(" -d, --disable      disable memory\n"), out);
+	fputs(_(" -b, --blocks       use memory blocks\n"), out);
+	fputs(_(" -z, --zone <name>  select memory zone (see below)\n"), out);
+	fputs(_(" -v, --verbose      verbose output\n"), out);
+	printf(USAGE_HELP_OPTIONS(20));
+
+	fputs(_("\nSupported zones:\n"), out);
+	for (i = 0; i < ARRAY_SIZE(zone_names); i++)
+		fprintf(out, " %s\n", zone_names[i]);
+
+	printf(USAGE_MAN_TAIL("chmem(8)"));
+
+	exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+	struct chmem_desc _desc = { 0 }, *desc = &_desc;
+	int cmd = CMD_NONE, zone_id = -1;
+	char *zone = NULL;
+	int c, rc;
+
+	static const struct option longopts[] = {
+		{"block",	no_argument,		NULL, 'b'},
+		{"disable",	no_argument,		NULL, 'd'},
+		{"enable",	no_argument,		NULL, 'e'},
+		{"help",	no_argument,		NULL, 'h'},
+		{"verbose",	no_argument,		NULL, 'v'},
+		{"version",	no_argument,		NULL, 'V'},
+		{"zone",	required_argument,	NULL, 'z'},
+		{NULL,		0,			NULL, 0}
+	};
+
+	static const ul_excl_t excl[] = {	/* rows and cols in ASCII order */
+		{ 'd','e' },
+		{ 0 }
+	};
+	int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	ul_path_init_debug();
+	desc->sysmem = ul_new_path(_PATH_SYS_MEMORY);
+	if (!desc->sysmem)
+		err(EXIT_FAILURE, _("failed to initialize %s handler"), _PATH_SYS_MEMORY);
+
+	read_info(desc);
+
+	while ((c = getopt_long(argc, argv, "bdehvVz:", longopts, NULL)) != -1) {
+
+		err_exclusive_options(c, longopts, excl, excl_st);
+
+		switch (c) {
+		case 'd':
+			cmd = CMD_MEMORY_DISABLE;
+			break;
+		case 'e':
+			cmd = CMD_MEMORY_ENABLE;
+			break;
+		case 'b':
+			desc->use_blocks = 1;
+			break;
+		case 'h':
+			usage();
+			break;
+		case 'v':
+			desc->verbose = 1;
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		case 'z':
+			zone = xstrdup(optarg);
+			break;
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if ((argc == 1) || (argc != optind + 1) || (cmd == CMD_NONE)) {
+		warnx(_("bad usage"));
+		errtryhelp(EXIT_FAILURE);
+	}
+
+	parse_parameter(desc, argv[optind]);
+
+
+	/* The valid_zones sysfs attribute was introduced with kernel 3.18 */
+	if (ul_path_access(desc->sysmem, F_OK, "memory0/valid_zones") == 0)
+		desc->have_zones = 1;
+	else if (zone)
+		warnx(_("zone ignored, no valid_zones sysfs attribute present"));
+
+	if (zone && desc->have_zones) {
+		zone_id = zone_name_to_id(zone);
+		if (zone_id == -1) {
+			warnx(_("unknown memory zone: %s"), zone);
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if (desc->is_size)
+		rc = chmem_size(desc, cmd == CMD_MEMORY_ENABLE ? 1 : 0, zone_id);
+	else
+		rc = chmem_range(desc, cmd == CMD_MEMORY_ENABLE ? 1 : 0, zone_id);
+
+	ul_unref_path(desc->sysmem);
+
+	return rc == 0 ? EXIT_SUCCESS :
+		rc < 0 ? EXIT_FAILURE : CHMEM_EXIT_SOMEOK;
+}
diff --git a/sys-utils/choom.1 b/sys-utils/choom.1
new file mode 100644
index 0000000..2b844cb
--- /dev/null
+++ b/sys-utils/choom.1
@@ -0,0 +1,82 @@
+.TH CHOOM 1 "April 2018" "util-linux" "User Commands"
+.SH NAME
+choom \- display and adjust OOM-killer score.
+.SH SYNOPSIS
+.B choom
+.B \-p
+.I pid
+.sp
+.B choom
+.B \-p
+.I pid
+.B \-n
+.I number
+.sp
+.B choom
+.B \-n
+.I number
+.IR command\  [ argument ...]
+
+.SH DESCRIPTION
+The \fBchoom\fP command displays and adjusts Out-Of-Memory killer score setting.
+
+.SH OPTIONS
+.TP
+.BR \-p ", " \-\-pid " \fIpid\fP
+Specifies process ID.
+.TP
+.BR \-n , " \-\-adjust " \fIvalue\fP
+Specify the adjust score value.
+.TP
+.BR \-h ", " \-\-help
+Display help text and exit.
+.TP
+.BR \-V ", " \-\-version
+Display version information and exit.
+.SH NOTES
+Linux kernel uses the badness heuristic to select which process gets killed in
+out of memory conditions.
+
+The badness heuristic assigns a value to each candidate task ranging from 0
+(never kill) to 1000 (always kill) to determine which process is targeted.  The
+units are roughly a proportion along that range of allowed memory the process
+may allocate from based on an estimation of its current memory and swap use.
+For example, if a task is using all allowed memory, its badness score will be
+1000.  If it is using half of its allowed memory, its score will be 500.
+
+There is an additional factor included in the badness score: the current memory
+and swap usage is discounted by 3% for root processes.
+
+The amount of "allowed" memory depends on the context in which the oom killer
+was called.  If it is due to the memory assigned to the allocating task's cpuset
+being exhausted, the allowed memory represents the set of mems assigned to that
+cpuset.  If it is due to a mempolicy's node(s) being exhausted, the allowed
+memory represents the set of mempolicy nodes.  If it is due to a memory
+limit (or swap limit) being reached, the allowed memory is that configured
+limit.  Finally, if it is due to the entire system being out of memory, the
+allowed memory represents all allocatable resources.
+
+The adjust score value is added to the badness score before it is used to
+determine which task to kill.  Acceptable values range from -1000 to +1000.
+This allows userspace to polarize the preference for oom killing either by
+always preferring a certain task or completely disabling it.  The lowest
+possible value, -1000, is equivalent to disabling oom killing entirely for that
+task since it will always report a badness score of 0.
+
+Setting an adjust score value of +500, for example, is roughly equivalent to
+allowing the remainder of tasks sharing the same system, cpuset, mempolicy, or
+memory controller resources to use at least 50% more memory.  A value of -500,
+on the other hand, would be roughly equivalent to discounting 50% of the task's
+allowed memory from being considered as scoring against the task.
+
+.SH AUTHORS
+.nf
+Karel Zak <kzak@redhat.com>
+.fi
+.SH SEE ALSO
+.BR proc (5)
+.SH AVAILABILITY
+The \fBchoom\fP command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/choom.c b/sys-utils/choom.c
new file mode 100644
index 0000000..eff95b6
--- /dev/null
+++ b/sys-utils/choom.c
@@ -0,0 +1,159 @@
+/*
+ * choom - Change OOM score setting
+ *
+ * Copyright (C) 2018 Karel Zak <kzak@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <errno.h>
+
+#include "nls.h"
+#include "c.h"
+#include "path.h"
+#include "strutils.h"
+#include "closestream.h"
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	fputs(USAGE_HEADER, out);
+	fprintf(out,
+	      _(" %1$s [options] -p pid\n"
+		" %1$s [options] -n number -p pid\n"
+		" %1$s [options] -n number command [args...]]\n"),
+		program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Display and adjust OOM-killer score.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -n, --adjust <num>     specify the adjust score value\n"), out);
+	fputs(_(" -p, --pid <num>        process ID\n"), out);
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(24));
+	printf(USAGE_MAN_TAIL("choom(1)"));
+	exit(EXIT_SUCCESS);
+}
+
+static int get_score(struct path_cxt *pc)
+{
+	int ret;
+
+	if (ul_path_read_s32(pc, &ret, "oom_score") != 0)
+		err(EXIT_FAILURE, _("failed to read OOM score value"));
+
+	return ret;
+}
+
+static int get_score_adj(struct path_cxt *pc)
+{
+	int ret;
+
+	if (ul_path_read_s32(pc, &ret, "oom_score_adj") != 0)
+		err(EXIT_FAILURE, _("failed to read OOM score adjust value"));
+
+	return ret;
+}
+
+static int set_score_adj(struct path_cxt *pc, int adj)
+{
+	return ul_path_write_s64(pc, adj, "oom_score_adj");
+}
+
+int main(int argc, char **argv)
+{
+	pid_t pid = 0;
+	int c, adj = 0, has_adj = 0;
+	struct path_cxt *pc = NULL;
+
+	static const struct option longopts[] = {
+		{ "adjust",  required_argument, NULL, 'n' },
+		{ "pid",     required_argument, NULL, 'p' },
+		{ "help",    no_argument,       NULL, 'h' },
+		{ "version", no_argument,       NULL, 'V' },
+		{ NULL,      0,                 NULL,  0  }
+	};
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	while ((c = getopt_long(argc, argv, "hn:p:V", longopts, NULL)) != -1) {
+		switch (c) {
+		case 'p':
+			pid = strtos32_or_err(optarg, _("invalid PID argument"));
+			break;
+		case 'n':
+			adj = strtos32_or_err(optarg, _("invalid adjust argument"));
+			has_adj = 1;
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		case 'h':
+			usage();
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if (optind < argc && pid) {
+		warnx(_("invalid argument: %s"), argv[optind]);
+		errtryhelp(EXIT_FAILURE);
+	}
+	if (!pid && argc - optind < 1) {
+		warnx(_("no PID or COMMAND specified"));
+		errtryhelp(EXIT_FAILURE);
+	}
+	if (optind < argc && !has_adj) {
+		warnx(_("no OOM score adjust value specified"));
+		errtryhelp(EXIT_FAILURE);
+	}
+
+	pc = ul_new_path("/proc/%d", (int) (pid ? pid : getpid()));
+
+	/* Show */
+	if (!has_adj) {
+		printf(_("pid %d's current OOM score: %d\n"), pid, get_score(pc));
+		printf(_("pid %d's current OOM score adjust value: %d\n"), pid, get_score_adj(pc));
+
+	/* Change */
+	} else if (pid) {
+		int old = get_score_adj(pc);
+
+		if (set_score_adj(pc, adj))
+			err(EXIT_FAILURE, _("failed to set score adjust value"));
+
+		printf(_("pid %d's OOM score adjust value changed from %d to %d\n"), pid, old, adj);
+
+	/* Start new process */
+	} else {
+		if (set_score_adj(pc, adj))
+			err(EXIT_FAILURE, _("failed to set score adjust value"));
+		ul_unref_path(pc);
+		argv += optind;
+		execvp(argv[0], argv);
+		errexec(argv[0]);
+	}
+
+	ul_unref_path(pc);
+	return EXIT_SUCCESS;
+}
diff --git a/sys-utils/ctrlaltdel.8 b/sys-utils/ctrlaltdel.8
new file mode 100644
index 0000000..a44ad19
--- /dev/null
+++ b/sys-utils/ctrlaltdel.8
@@ -0,0 +1,58 @@
+.\" Copyright 1992, 1993 Rickard E. Faith (faith@cs.unc.edu)
+.\" May be distributed under the GNU General Public License
+.TH CTRLALTDEL 8 "October 2015" "util-linux" "System Administration"
+.SH NAME
+ctrlaltdel \- set the function of the Ctrl-Alt-Del combination
+.SH SYNOPSIS
+.BR "ctrlaltdel hard" | soft
+.SH DESCRIPTION
+Based on examination of the
+.I linux/kernel/reboot.c
+code, it is clear that there are two supported functions that the
+Ctrl-Alt-Del sequence can perform.
+.TP
+.B hard
+Immediately reboot the computer without calling
+.BR sync (2)
+and without any other preparation.  This is the default.
+.TP
+.B soft
+Make the kernel send the SIGINT (interrupt) signal to the
+.B init
+process (this is always the process with PID 1).  If this option is used,
+the
+.BR init (8)
+program must support this feature.  Since there are now several
+.BR init (8)
+programs in the Linux community, please consult the documentation for the
+version that you are currently using.
+.PP
+When the command is run without any argument, it will display the current
+setting.
+.PP
+The function of
+.B ctrlaltdel
+is usually set in the
+.I /etc/rc.local
+file.
+.SH OPTIONS
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+Display version information and exit.
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+Display help text and exit.
+.SH FILES
+.I /etc/rc.local
+.SH "SEE ALSO"
+.BR init (8),
+.BR systemd (1)
+.SH AUTHOR
+.UR poe@daimi.aau.dk
+Peter Orbaek
+.UE
+.SH AVAILABILITY
+The ctrlaltdel command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/ctrlaltdel.c b/sys-utils/ctrlaltdel.c
new file mode 100644
index 0000000..ea662c4
--- /dev/null
+++ b/sys-utils/ctrlaltdel.c
@@ -0,0 +1,114 @@
+/*
+ * ctrlaltdel.c - Set the function of the Ctrl-Alt-Del combination
+ * Created 4-Jul-92 by Peter Orbaek <poe@daimi.aau.dk>
+ * 1999-02-22 Arkadiusz Miśkiewicz <misiek@pld.ORG.PL>
+ * - added Native Language Support
+ */
+
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/reboot.h>
+#include "nls.h"
+#include "c.h"
+#include "closestream.h"
+#include "pathnames.h"
+#include "path.h"
+
+#define LINUX_REBOOT_CMD_CAD_ON 0x89ABCDEF
+#define LINUX_REBOOT_CMD_CAD_OFF 0x00000000
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	fputs(USAGE_HEADER, out);
+	fprintf(out, _(" %s hard|soft\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fprintf(out, _("Set the function of the Ctrl-Alt-Del combination.\n"));
+
+	fputs(USAGE_OPTIONS, out);
+	printf(USAGE_HELP_OPTIONS(16));
+	printf(USAGE_MAN_TAIL("ctrlaltdel(8)"));
+	exit(EXIT_SUCCESS);
+}
+
+static int get_cad(void)
+{
+	uint64_t val;
+
+	if (ul_path_read_u64(NULL, &val, _PATH_PROC_CTRL_ALT_DEL) != 0)
+		err(EXIT_FAILURE, _("cannot read %s"), _PATH_PROC_CTRL_ALT_DEL);
+
+	switch (val) {
+	case 0:
+		fputs("soft\n", stdout);
+		break;
+	case 1:
+		fputs("hard\n", stdout);
+		break;
+	default:
+		printf("%s hard\n", _("implicit"));
+		warnx(_("unexpected value in %s: %ju"), _PATH_PROC_CTRL_ALT_DEL, val);
+		return EXIT_FAILURE;
+	}
+	return EXIT_SUCCESS;
+}
+
+static int set_cad(const char *arg)
+{
+	unsigned int cmd;
+
+	if (geteuid()) {
+		warnx(_("You must be root to set the Ctrl-Alt-Del behavior"));
+		return EXIT_FAILURE;
+	}
+	if (!strcmp("hard", arg))
+		cmd = LINUX_REBOOT_CMD_CAD_ON;
+	else if (!strcmp("soft", arg))
+		cmd = LINUX_REBOOT_CMD_CAD_OFF;
+	else {
+		warnx(_("unknown argument: %s"), arg);
+		return EXIT_FAILURE;
+	}
+	if (reboot(cmd) < 0) {
+		warnx("reboot");
+		return EXIT_FAILURE;
+	}
+	return EXIT_SUCCESS;
+}
+
+int main(int argc, char **argv)
+{
+	int ch, ret;
+	static const struct option longopts[] = {
+		{"version", no_argument, NULL, 'V'},
+		{"help", no_argument, NULL, 'h'},
+		{NULL, 0, NULL, 0}
+	};
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	while ((ch = getopt_long(argc, argv, "Vh", longopts, NULL)) != -1)
+		switch (ch) {
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		case 'h':
+			usage();
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+
+	if (argc < 2)
+		ret = get_cad();
+	else
+		ret = set_cad(argv[1]);
+	return ret;
+}
diff --git a/sys-utils/dmesg.1 b/sys-utils/dmesg.1
new file mode 100644
index 0000000..a93821a
--- /dev/null
+++ b/sys-utils/dmesg.1
@@ -0,0 +1,256 @@
+.\" Copyright 1993 Rickard E. Faith (faith@cs.unc.edu)
+.\" May be distributed under the GNU General Public License
+.TH DMESG "1" "July 2012" "util-linux" "User Commands"
+.SH NAME
+dmesg \- print or control the kernel ring buffer
+.SH SYNOPSIS
+.B dmesg
+[options]
+.sp
+.B dmesg \-\-clear
+.br
+.BR "dmesg \-\-read\-clear " [options]
+.br
+.BI "dmesg \-\-console\-level " level
+.br
+.B dmesg \-\-console\-on
+.br
+.B dmesg \-\-console\-off
+.SH DESCRIPTION
+.B dmesg
+is used to examine or control the kernel ring buffer.
+.PP
+The default action is to display all messages from the kernel ring buffer.
+.SH OPTIONS
+The
+.BR \-\-clear ,
+.BR \-\-read\-clear ,
+.BR \-\-console\-on ,
+.BR \-\-console\-off ,
+and
+.B \-\-console\-level
+options are mutually exclusive.
+.PP
+.IP "\fB\-C\fR, \fB\-\-clear\fR"
+Clear the ring buffer.
+.IP "\fB\-c\fR, \fB\-\-read\-clear\fR"
+Clear the ring buffer after first printing its contents.
+.IP "\fB\-D\fR, \fB\-\-console\-off\fR"
+Disable the printing of messages to the console.
+.IP "\fB\-d\fR, \fB\-\-show\-delta\fR"
+Display the timestamp and the time delta spent between messages.  If used
+together with
+.B \-\-notime
+then only the time delta without the timestamp is printed.
+.IP "\fB\-E\fR, \fB\-\-console\-on\fR"
+Enable printing messages to the console.
+.IP "\fB\-e\fR, \fB\-\-reltime\fR"
+Display the local time and the delta in human-readable format.  Be aware that
+conversion to the local time could be inaccurate (see \fB\-T\fR for more
+details).
+.IP "\fB\-F\fR, \fB\-\-file \fIfile\fR"
+Read the syslog messages from the given
+.IR file .
+Note that \fB\-F\fR does not support messages in kmsg format. The old syslog format is supported only.
+.IP "\fB\-f\fR, \fB\-\-facility \fIlist\fR"
+Restrict output to the given (comma-separated)
+.I list
+of facilities.  For example:
+.PP
+.RS 14
+.B dmesg \-\-facility=daemon
+.RE
+.IP
+will print messages from system daemons only.  For all supported facilities
+see the
+.B \-\-help
+output.
+.IP "\fB\-H\fR, \fB\-\-human\fR"
+Enable human-readable output.  See also \fB\-\-color\fR, \fB\-\-reltime\fR
+and \fB\-\-nopager\fR.
+.IP "\fB\-k\fR, \fB\-\-kernel\fR"
+Print kernel messages.
+.IP "\fB\-L\fR, \fB\-\-color\fR[=\fIwhen\fR]"
+Colorize the output.  The optional argument \fIwhen\fP
+can be \fBauto\fR, \fBnever\fR or \fBalways\fR.  If the \fIwhen\fR argument is omitted,
+it defaults to \fBauto\fR.  The colors can be disabled; for the current built-in default
+see the \fB\-\-help\fR output.  See also the \fBCOLORS\fR section below.
+.IP  "\fB\-l\fR, \fB\-\-level \fIlist\fR"
+Restrict output to the given (comma-separated)
+.I list
+of levels.  For example:
+.PP
+.RS 14
+.B dmesg \-\-level=err,warn
+.RE
+.IP
+will print error and warning messages only.  For all supported levels see the
+.B \-\-help
+output.
+.IP "\fB\-n\fR, \fB\-\-console\-level \fIlevel\fR
+Set the
+.I level
+at which printing of messages is done to the console.  The
+.I level
+is a level number or abbreviation of the level name.  For all supported
+levels see the
+.B \-\-help
+output.
+.sp
+For example,
+.B \-n 1
+or
+.B \-n emerg
+prevents all messages, except emergency (panic) messages, from appearing on
+the console.  All levels of messages are still written to
+.IR /proc/kmsg ,
+so
+.BR syslogd (8)
+can still be used to control exactly where kernel messages appear.  When the
+.B \-n
+option is used,
+.B dmesg
+will
+.I not
+print or clear the kernel ring buffer.
+.IP "\fB\-P\fR, \fB\-\-nopager\fR"
+Do not pipe output into a pager.  A pager is enabled by default for \fB\-\-human\fR output.
+.IP "\fB\-p\fR, \fB\-\-force\-prefix\fR"
+Add facility, level or timestamp information to each line of a multi-line message.
+.IP "\fB\-r\fR, \fB\-\-raw\fR"
+Print the raw message buffer, i.e. do not strip the log-level prefixes.
+
+Note that the real raw format depends on the method how
+.BR dmesg (1)
+reads kernel messages.  The /dev/kmsg device uses a different format than
+.BR syslog (2).
+For backward compatibility,
+.BR dmesg (1)
+returns data always in the
+.BR syslog (2)
+format.  It is possible to read the real raw data from /dev/kmsg by, for example,
+the command 'dd if=/dev/kmsg iflag=nonblock'.
+.IP "\fB\-S\fR, \fB\-\-syslog\fR"
+Force \fBdmesg\fR to use the
+.BR syslog (2)
+kernel interface to read kernel messages.  The default is to use /dev/kmsg rather
+than
+.BR syslog (2)
+since kernel 3.5.0.
+.IP "\fB\-s\fR, \fB\-\-buffer\-size \fIsize\fR
+Use a buffer of
+.I size
+to query the kernel ring buffer.  This is 16392 by default.  (The default
+kernel syslog buffer size was 4096 at first, 8192 since 1.3.54, 16384 since
+2.1.113.)  If you have set the kernel buffer to be larger than the default,
+then this option can be used to view the entire buffer.
+.IP "\fB\-T\fR, \fB\-\-ctime\fR"
+Print human-readable timestamps.
+.IP
+.B Be aware that the timestamp could be inaccurate!
+The
+.B time
+source used for the logs is
+.B not updated after
+system
+.BR SUSPEND / RESUME .
+.IP "\fB\-t\fR, \fB\-\-notime\fR"
+Do not print kernel's timestamps.
+.IP "\fB\-\-time\-format\fR \fIformat\fR"
+Print timestamps using the given \fIformat\fR, which can be
+.BR ctime ,
+.BR reltime ,
+.B delta
+or
+.BR iso .
+The first three formats are aliases of the time-format-specific options.
+The
+.B iso
+format is a
+.B dmesg
+implementation of the ISO-8601 timestamp format.  The purpose of this format is
+to make the comparing of timestamps between two systems, and any other parsing,
+easy.  The definition of the \fBiso\fR timestamp is:
+YYYY-MM-DD<T>HH:MM:SS,<microseconds><-+><timezone offset from UTC>.
+.IP
+The
+.B iso
+format has the same issue as
+.BR ctime :
+the time may be inaccurate when a system is suspended and resumed.
+.TP
+.BR \-u , " \-\-userspace"
+Print userspace messages.
+.TP
+.BR \-w , " \-\-follow"
+Wait for new messages.  This feature is supported only on systems with
+a readable /dev/kmsg (since kernel 3.5.0).
+.TP
+.BR \-x , " \-\-decode"
+Decode facility and level (priority) numbers to human-readable prefixes.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH COLORS
+Implicit coloring can be disabled by an empty file \fI/etc/terminal-colors.d/dmesg.disable\fR.
+See
+.BR terminal-colors.d (5)
+for more details about colorization configuration.
+.PP
+The logical color names supported by
+.B dmesg
+are:
+.TP
+.B subsys
+The message sub-system prefix (e.g. "ACPI:").
+.TP
+.B time
+The message timestamp.
+.TP
+.B timebreak
+The message timestamp in short ctime format in \fB\-\-reltime\fR 
+or \fB\-\-human\fR output.
+.TP
+.B alert
+The text of the message with the alert log priority.
+.TP
+.B crit
+The text of the message with the critical log priority.
+.TP
+.B err
+The text of the message with the error log priority.
+.TP
+.B warn
+The text of the message with the warning log priority.
+.TP
+.B segfault
+The text of the message that inform about segmentation fault.
+.SH EXIT STATUS
+.B dmesg
+can fail reporting permission denied error.  This is usually caused by
+.B dmesg_restrict
+kernel setting, please see
+.BR syslog (2)
+for more details.
+.SH SEE ALSO
+.BR terminal-colors.d (5),
+.BR syslogd (8)
+.SH AUTHORS
+.MT kzak@redhat.com
+Karel Zak
+.ME
+
+.br
+.B dmesg
+was originally written by
+.MT tytso@athena.mit.edu
+Theodore Ts'o
+.ME
+.SH AVAILABILITY
+The dmesg command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/dmesg.c b/sys-utils/dmesg.c
new file mode 100644
index 0000000..ba4e225
--- /dev/null
+++ b/sys-utils/dmesg.c
@@ -0,0 +1,1547 @@
+/*
+ * dmesg.c -- Print out the contents of the kernel ring buffer
+ *
+ * Copyright (C) 1993 Theodore Ts'o <tytso@athena.mit.edu>
+ * Copyright (C) 2011 Karel Zak <kzak@redhat.com>
+ *
+ * This program comes with ABSOLUTELY NO WARRANTY.
+ */
+#include <stdio.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include <sys/klog.h>
+#include <sys/syslog.h>
+#include <sys/time.h>
+#include <sys/sysinfo.h>
+#include <ctype.h>
+#include <time.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "c.h"
+#include "colors.h"
+#include "nls.h"
+#include "strutils.h"
+#include "xalloc.h"
+#include "widechar.h"
+#include "all-io.h"
+#include "bitops.h"
+#include "closestream.h"
+#include "optutils.h"
+#include "timeutils.h"
+#include "monotonic.h"
+#include "mangle.h"
+#include "pager.h"
+
+/* Close the log.  Currently a NOP. */
+#define SYSLOG_ACTION_CLOSE          0
+/* Open the log. Currently a NOP. */
+#define SYSLOG_ACTION_OPEN           1
+/* Read from the log. */
+#define SYSLOG_ACTION_READ           2
+/* Read all messages remaining in the ring buffer. (allowed for non-root) */
+#define SYSLOG_ACTION_READ_ALL       3
+/* Read and clear all messages remaining in the ring buffer */
+#define SYSLOG_ACTION_READ_CLEAR     4
+/* Clear ring buffer. */
+#define SYSLOG_ACTION_CLEAR          5
+/* Disable printk's to console */
+#define SYSLOG_ACTION_CONSOLE_OFF    6
+/* Enable printk's to console */
+#define SYSLOG_ACTION_CONSOLE_ON     7
+/* Set level of messages printed to console */
+#define SYSLOG_ACTION_CONSOLE_LEVEL  8
+/* Return number of unread characters in the log buffer */
+#define SYSLOG_ACTION_SIZE_UNREAD    9
+/* Return size of the log buffer */
+#define SYSLOG_ACTION_SIZE_BUFFER   10
+
+/*
+ * Color scheme
+ */
+struct dmesg_color {
+	const char *scheme;	/* name used in termina-colors.d/dmesg.scheme */
+	const char *dflt;	/* default color ESC sequence */
+};
+
+enum {
+	DMESG_COLOR_SUBSYS,
+	DMESG_COLOR_TIME,
+	DMESG_COLOR_TIMEBREAK,
+	DMESG_COLOR_ALERT,
+	DMESG_COLOR_CRIT,
+	DMESG_COLOR_ERR,
+	DMESG_COLOR_WARN,
+	DMESG_COLOR_SEGFAULT
+};
+
+static const struct dmesg_color colors[] =
+{
+	[DMESG_COLOR_SUBSYS]    = { "subsys",	UL_COLOR_BROWN },
+	[DMESG_COLOR_TIME]	= { "time",     UL_COLOR_GREEN },
+	[DMESG_COLOR_TIMEBREAK]	= { "timebreak",UL_COLOR_GREEN UL_COLOR_BOLD },
+	[DMESG_COLOR_ALERT]	= { "alert",    UL_COLOR_REVERSE UL_COLOR_RED },
+	[DMESG_COLOR_CRIT]	= { "crit",     UL_COLOR_BOLD UL_COLOR_RED },
+	[DMESG_COLOR_ERR]       = { "err",      UL_COLOR_RED },
+	[DMESG_COLOR_WARN]	= { "warn",     UL_COLOR_BOLD },
+	[DMESG_COLOR_SEGFAULT]	= { "segfault", UL_COLOR_HALFBRIGHT UL_COLOR_RED }
+};
+
+#define dmesg_enable_color(_id) \
+		color_scheme_enable(colors[_id].scheme, colors[_id].dflt);
+
+/*
+ * Priority and facility names
+ */
+struct dmesg_name {
+	const char *name;
+	const char *help;
+};
+
+/*
+ * Priority names -- based on sys/syslog.h
+ */
+static const struct dmesg_name level_names[] =
+{
+	[LOG_EMERG]   = { "emerg", N_("system is unusable") },
+	[LOG_ALERT]   = { "alert", N_("action must be taken immediately") },
+	[LOG_CRIT]    = { "crit",  N_("critical conditions") },
+	[LOG_ERR]     = { "err",   N_("error conditions") },
+	[LOG_WARNING] = { "warn",  N_("warning conditions") },
+	[LOG_NOTICE]  = { "notice",N_("normal but significant condition") },
+	[LOG_INFO]    = { "info",  N_("informational") },
+	[LOG_DEBUG]   = { "debug", N_("debug-level messages") }
+};
+
+/*
+ * sys/syslog.h uses (f << 3) for all facility codes.
+ * We want to use the codes as array indexes, so shift back...
+ *
+ * Note that libc LOG_FAC() macro returns the base codes, not the
+ * shifted code :-)
+ */
+#define FAC_BASE(f)	((f) >> 3)
+
+static const struct dmesg_name facility_names[] =
+{
+	[FAC_BASE(LOG_KERN)]     = { "kern",     N_("kernel messages") },
+	[FAC_BASE(LOG_USER)]     = { "user",     N_("random user-level messages") },
+	[FAC_BASE(LOG_MAIL)]     = { "mail",     N_("mail system") },
+	[FAC_BASE(LOG_DAEMON)]   = { "daemon",   N_("system daemons") },
+	[FAC_BASE(LOG_AUTH)]     = { "auth",     N_("security/authorization messages") },
+	[FAC_BASE(LOG_SYSLOG)]   = { "syslog",   N_("messages generated internally by syslogd") },
+	[FAC_BASE(LOG_LPR)]      = { "lpr",      N_("line printer subsystem") },
+	[FAC_BASE(LOG_NEWS)]     = { "news",     N_("network news subsystem") },
+	[FAC_BASE(LOG_UUCP)]     = { "uucp",     N_("UUCP subsystem") },
+	[FAC_BASE(LOG_CRON)]     = { "cron",     N_("clock daemon") },
+	[FAC_BASE(LOG_AUTHPRIV)] = { "authpriv", N_("security/authorization messages (private)") },
+	[FAC_BASE(LOG_FTP)]      = { "ftp",      N_("FTP daemon") },
+};
+
+/* supported methods to read message buffer
+ */
+enum {
+	DMESG_METHOD_KMSG,	/* read messages from /dev/kmsg (default) */
+	DMESG_METHOD_SYSLOG,	/* klogctl() buffer */
+	DMESG_METHOD_MMAP	/* mmap file with records (see --file) */
+};
+
+enum {
+	DMESG_TIMEFTM_NONE = 0,
+	DMESG_TIMEFTM_CTIME,		/* [ctime] */
+	DMESG_TIMEFTM_CTIME_DELTA,	/* [ctime <delta>] */
+	DMESG_TIMEFTM_DELTA,		/* [<delta>] */
+	DMESG_TIMEFTM_RELTIME,		/* [relative] */
+	DMESG_TIMEFTM_TIME,		/* [time] */
+	DMESG_TIMEFTM_TIME_DELTA,	/* [time <delta>] */
+	DMESG_TIMEFTM_ISO8601		/* 2013-06-13T22:11:00,123456+0100 */
+};
+#define is_timefmt(c, f) ((c)->time_fmt == (DMESG_TIMEFTM_ ##f))
+
+struct dmesg_control {
+	/* bit arrays -- see include/bitops.h */
+	char levels[ARRAY_SIZE(level_names) / NBBY + 1];
+	char facilities[ARRAY_SIZE(facility_names) / NBBY + 1];
+
+	struct timeval	lasttime;	/* last printed timestamp */
+	struct tm	lasttm;		/* last localtime */
+	struct timeval	boot_time;	/* system boot time */
+
+	int		action;		/* SYSLOG_ACTION_* */
+	int		method;		/* DMESG_METHOD_* */
+
+	size_t		bufsize;	/* size of syslog buffer */
+
+	int		kmsg;		/* /dev/kmsg file descriptor */
+	ssize_t		kmsg_first_read;/* initial read() return code */
+	char		kmsg_buf[BUFSIZ];/* buffer to read kmsg data */
+
+	/*
+	 * For the --file option we mmap whole file. The unnecessary (already
+	 * printed) pages are always unmapped. The result is that we have in
+	 * memory only the currently used page(s).
+	 */
+	char		*filename;
+	char		*mmap_buff;
+	size_t		pagesize;
+	unsigned int	time_fmt;	/* time format */
+
+	unsigned int	follow:1,	/* wait for new messages */
+			raw:1,		/* raw mode */
+			fltr_lev:1,	/* filter out by levels[] */
+			fltr_fac:1,	/* filter out by facilities[] */
+			decode:1,	/* use "facility: level: " prefix */
+			pager:1,	/* pipe output into a pager */
+			color:1,	/* colorize messages */
+			force_prefix:1;	/* force timestamp and decode prefix
+					   on each line */
+	int		indent;		/* due to timestamps if newline */
+};
+
+struct dmesg_record {
+	const char	*mesg;
+	size_t		mesg_size;
+
+	int		level;
+	int		facility;
+	struct timeval  tv;
+
+	const char	*next;		/* buffer with next unparsed record */
+	size_t		next_size;	/* size of the next buffer */
+};
+
+#define INIT_DMESG_RECORD(_r)  do { \
+		(_r)->mesg = NULL; \
+		(_r)->mesg_size = 0; \
+		(_r)->facility = -1; \
+		(_r)->level = -1; \
+		(_r)->tv.tv_sec = 0; \
+		(_r)->tv.tv_usec = 0; \
+	} while (0)
+
+static int read_kmsg(struct dmesg_control *ctl);
+
+static int set_level_color(int log_level, const char *mesg, size_t mesgsz)
+{
+	int id = -1;
+
+	switch (log_level) {
+	case LOG_ALERT:
+		id = DMESG_COLOR_ALERT;
+		break;
+	case LOG_CRIT:
+		id = DMESG_COLOR_CRIT;
+		break;
+	case LOG_ERR:
+		id = DMESG_COLOR_ERR;
+		break;
+	case LOG_WARNING:
+		id = DMESG_COLOR_WARN;
+		break;
+	default:
+		break;
+	}
+
+	/* well, sometimes the messages contains important keywords, but in
+	 * non-warning/error messages
+	 */
+	if (id < 0 && memmem(mesg, mesgsz, "segfault at", 11))
+		id = DMESG_COLOR_SEGFAULT;
+
+	if (id >= 0)
+		dmesg_enable_color(id);
+
+	return id >= 0 ? 0 : -1;
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	size_t i;
+
+	fputs(USAGE_HEADER, out);
+	fprintf(out, _(" %s [options]\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Display or control the kernel ring buffer.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -C, --clear                 clear the kernel ring buffer\n"), out);
+	fputs(_(" -c, --read-clear            read and clear all messages\n"), out);
+	fputs(_(" -D, --console-off           disable printing messages to console\n"), out);
+	fputs(_(" -E, --console-on            enable printing messages to console\n"), out);
+	fputs(_(" -F, --file <file>           use the file instead of the kernel log buffer\n"), out);
+	fputs(_(" -f, --facility <list>       restrict output to defined facilities\n"), out);
+	fputs(_(" -H, --human                 human readable output\n"), out);
+	fputs(_(" -k, --kernel                display kernel messages\n"), out);
+	fputs(_(" -L, --color[=<when>]        colorize messages (auto, always or never)\n"), out);
+	fprintf(out,
+	        "                               %s\n", USAGE_COLORS_DEFAULT);
+	fputs(_(" -l, --level <list>          restrict output to defined levels\n"), out);
+	fputs(_(" -n, --console-level <level> set level of messages printed to console\n"), out);
+	fputs(_(" -P, --nopager               do not pipe output into a pager\n"), out);
+	fputs(_(" -p, --force-prefix          force timestamp output on each line of multi-line messages\n"), out);
+	fputs(_(" -r, --raw                   print the raw message buffer\n"), out);
+	fputs(_(" -S, --syslog                force to use syslog(2) rather than /dev/kmsg\n"), out);
+	fputs(_(" -s, --buffer-size <size>    buffer size to query the kernel ring buffer\n"), out);
+	fputs(_(" -u, --userspace             display userspace messages\n"), out);
+	fputs(_(" -w, --follow                wait for new messages\n"), out);
+	fputs(_(" -x, --decode                decode facility and level to readable string\n"), out);
+	fputs(_(" -d, --show-delta            show time delta between printed messages\n"), out);
+	fputs(_(" -e, --reltime               show local time and time delta in readable format\n"), out);
+	fputs(_(" -T, --ctime                 show human-readable timestamp (may be inaccurate!)\n"), out);
+	fputs(_(" -t, --notime                don't show any timestamp with messages\n"), out);
+	fputs(_("     --time-format <format>  show timestamp using the given format:\n"
+		"                               [delta|reltime|ctime|notime|iso]\n"
+		"Suspending/resume will make ctime and iso timestamps inaccurate.\n"), out);
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(29));
+	fputs(_("\nSupported log facilities:\n"), out);
+	for (i = 0; i < ARRAY_SIZE(level_names); i++)
+		fprintf(out, " %7s - %s\n",
+			facility_names[i].name,
+			_(facility_names[i].help));
+
+	fputs(_("\nSupported log levels (priorities):\n"), out);
+	for (i = 0; i < ARRAY_SIZE(level_names); i++)
+		fprintf(out, " %7s - %s\n",
+			level_names[i].name,
+			_(level_names[i].help));
+
+	printf(USAGE_MAN_TAIL("dmesg(1)"));
+	exit(EXIT_SUCCESS);
+}
+
+/*
+ * LEVEL     ::= <number> | <name>
+ *  <number> ::= @len is set:  number in range <0..N>, where N < ARRAY_SIZE(level_names)
+ *           ::= @len not set: number in range <1..N>, where N <= ARRAY_SIZE(level_names)
+ *  <name>   ::= case-insensitive text
+ *
+ *  Note that @len argument is not set when parsing "-n <level>" command line
+ *  option. The console_level is interpreted as "log level less than the value".
+ *
+ *  For example "dmesg -n 8" or "dmesg -n debug" enables debug console log
+ *  level by klogctl(SYSLOG_ACTION_CONSOLE_LEVEL, NULL, 8). The @str argument
+ *  has to be parsed to number in range <1..8>.
+ */
+static int parse_level(const char *str, size_t len)
+{
+	int offset = 0;
+
+	if (!str)
+		return -1;
+	if (!len) {
+		len = strlen(str);
+		offset = 1;
+	}
+	errno = 0;
+
+	if (isdigit(*str)) {
+		char *end = NULL;
+		long x = strtol(str, &end, 10) - offset;
+
+		if (!errno && end && end > str && (size_t) (end - str) == len &&
+		    x >= 0 && (size_t) x < ARRAY_SIZE(level_names))
+			return x + offset;
+	} else {
+		size_t i;
+
+		for (i = 0; i < ARRAY_SIZE(level_names); i++) {
+			const char *n = level_names[i].name;
+
+			if (strncasecmp(str, n, len) == 0 && *(n + len) == '\0')
+				return i + offset;
+		}
+	}
+
+	if (errno)
+		err(EXIT_FAILURE, _("failed to parse level '%s'"), str);
+
+	errx(EXIT_FAILURE, _("unknown level '%s'"), str);
+	return -1;
+}
+
+/*
+ * FACILITY  ::= <number> | <name>
+ *  <number> ::= number in range <0..N>, where N < ARRAY_SIZE(facility_names)
+ *  <name>   ::= case-insensitive text
+ */
+static int parse_facility(const char *str, size_t len)
+{
+	if (!str)
+		return -1;
+	if (!len)
+		len = strlen(str);
+	errno = 0;
+
+	if (isdigit(*str)) {
+		char *end = NULL;
+		long x = strtol(str, &end, 10);
+
+		if (!errno && end && end > str && (size_t) (end - str) == len &&
+		    x >= 0 && (size_t) x < ARRAY_SIZE(facility_names))
+			return x;
+	} else {
+		size_t i;
+
+		for (i = 0; i < ARRAY_SIZE(facility_names); i++) {
+			const char *n = facility_names[i].name;
+
+			if (strncasecmp(str, n, len) == 0 && *(n + len) == '\0')
+				return i;
+		}
+	}
+
+	if (errno)
+		err(EXIT_FAILURE, _("failed to parse facility '%s'"), str);
+
+	errx(EXIT_FAILURE, _("unknown facility '%s'"), str);
+	return -1;
+}
+
+/*
+ * Parses numerical prefix used for all messages in kernel ring buffer.
+ *
+ * Priorities/facilities are encoded into a single 32-bit quantity, where the
+ * bottom 3 bits are the priority (0-7) and the top 28 bits are the facility
+ * (0-big number).
+ *
+ * Note that the number has to end with '>' or ',' char.
+ */
+static const char *parse_faclev(const char *str, int *fac, int *lev)
+{
+	long num;
+	char *end = NULL;
+
+	if (!str)
+		return str;
+
+	errno = 0;
+	num = strtol(str, &end, 10);
+
+	if (!errno && end && end > str) {
+		*fac = LOG_FAC(num);
+		*lev = LOG_PRI(num);
+
+		if (*lev < 0 || (size_t) *lev > ARRAY_SIZE(level_names))
+			*lev = -1;
+		if (*fac < 0 || (size_t) *fac > ARRAY_SIZE(facility_names))
+			*fac = -1;
+		return end + 1;		/* skip '<' or ',' */
+	}
+
+	return str;
+}
+
+/*
+ * Parses timestamp from syslog message prefix, expected format:
+ *
+ *	seconds.microseconds]
+ *
+ * the ']' is the timestamp field terminator.
+ */
+static const char *parse_syslog_timestamp(const char *str0, struct timeval *tv)
+{
+	const char *str = str0;
+	char *end = NULL;
+
+	if (!str0)
+		return str0;
+
+	errno = 0;
+	tv->tv_sec = strtol(str, &end, 10);
+
+	if (!errno && end && *end == '.' && *(end + 1)) {
+		str = end + 1;
+		end = NULL;
+		tv->tv_usec = strtol(str, &end, 10);
+	}
+	if (errno || !end || end == str || *end != ']')
+		return str0;
+
+	return end + 1;	/* skip ']' */
+}
+
+/*
+ * Parses timestamp from /dev/kmsg, expected formats:
+ *
+ *	microseconds,
+ *	microseconds;
+ *
+ * the ',' is fields separators and ';' items terminator (for the last item)
+ */
+static const char *parse_kmsg_timestamp(const char *str0, struct timeval *tv)
+{
+	const char *str = str0;
+	char *end = NULL;
+	uint64_t usec;
+
+	if (!str0)
+		return str0;
+
+	errno = 0;
+	usec = strtoumax(str, &end, 10);
+
+	if (!errno && end && (*end == ';' || *end == ',')) {
+		tv->tv_usec = usec % 1000000;
+		tv->tv_sec = usec / 1000000;
+	} else
+		return str0;
+
+	return end + 1;	/* skip separator */
+}
+
+
+static double time_diff(struct timeval *a, struct timeval *b)
+{
+	return (a->tv_sec - b->tv_sec) + (a->tv_usec - b->tv_usec) / 1E6;
+}
+
+static int get_syslog_buffer_size(void)
+{
+	int n = klogctl(SYSLOG_ACTION_SIZE_BUFFER, NULL, 0);
+
+	return n > 0 ? n : 0;
+}
+
+/*
+ * Reads messages from regular file by mmap
+ */
+static ssize_t mmap_file_buffer(struct dmesg_control *ctl, char **buf)
+{
+	struct stat st;
+	int fd;
+
+	if (!ctl->filename)
+		return -1;
+
+	fd = open(ctl->filename, O_RDONLY);
+	if (fd < 0)
+		err(EXIT_FAILURE, _("cannot open %s"), ctl->filename);
+	if (fstat(fd, &st))
+		err(EXIT_FAILURE, _("stat of %s failed"), ctl->filename);
+
+	*buf = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
+	if (*buf == MAP_FAILED)
+		err(EXIT_FAILURE, _("cannot mmap: %s"), ctl->filename);
+	ctl->mmap_buff = *buf;
+	ctl->pagesize = getpagesize();
+	close(fd);
+
+	return st.st_size;
+}
+
+/*
+ * Reads messages from kernel ring buffer by klogctl()
+ */
+static ssize_t read_syslog_buffer(struct dmesg_control *ctl, char **buf)
+{
+	size_t sz;
+	int rc = -1;
+
+	if (ctl->bufsize) {
+		sz = ctl->bufsize + 8;
+		*buf = xmalloc(sz * sizeof(char));
+		rc = klogctl(ctl->action, *buf, sz);
+	} else {
+		sz = 16392;
+		while (1) {
+			*buf = xmalloc(sz * sizeof(char));
+			rc = klogctl(SYSLOG_ACTION_READ_ALL, *buf, sz);
+			if (rc < 0)
+				break;
+			if ((size_t) rc != sz || sz > (1 << 28))
+				break;
+			free(*buf);
+			*buf = NULL;
+			sz *= 4;
+		}
+
+		if (rc > 0 && ctl->action == SYSLOG_ACTION_READ_CLEAR)
+			rc = klogctl(SYSLOG_ACTION_READ_CLEAR, *buf, sz);
+	}
+
+	return rc;
+}
+
+/*
+ * Top level function to read messages
+ */
+static ssize_t read_buffer(struct dmesg_control *ctl, char **buf)
+{
+	ssize_t n = -1;
+
+	switch (ctl->method) {
+	case DMESG_METHOD_MMAP:
+		n = mmap_file_buffer(ctl, buf);
+		break;
+	case DMESG_METHOD_SYSLOG:
+		if (!ctl->bufsize)
+			ctl->bufsize = get_syslog_buffer_size();
+
+		n = read_syslog_buffer(ctl, buf);
+		break;
+	case DMESG_METHOD_KMSG:
+		/*
+		 * Since kernel 3.5.0
+		 */
+		n = read_kmsg(ctl);
+		if (n == 0 && ctl->action == SYSLOG_ACTION_READ_CLEAR)
+			n = klogctl(SYSLOG_ACTION_CLEAR, NULL, 0);
+		break;
+	default:
+		abort();	/* impossible method -> drop core */
+	}
+
+	return n;
+}
+
+static int fwrite_hex(const char *buf, size_t size, FILE *out)
+{
+	size_t i;
+
+	for (i = 0; i < size; i++) {
+		int rc = fprintf(out, "\\x%02hhx", buf[i]);
+		if (rc < 0)
+			return rc;
+	}
+	return 0;
+}
+
+/*
+ * Prints to 'out' and non-printable chars are replaced with \x<hex> sequences.
+ */
+static void safe_fwrite(const char *buf, size_t size, int indent, FILE *out)
+{
+	size_t i;
+#ifdef HAVE_WIDECHAR
+	mbstate_t s;
+	memset(&s, 0, sizeof (s));
+#endif
+	for (i = 0; i < size; i++) {
+		const char *p = buf + i;
+		int rc, hex = 0;
+		size_t len;
+
+#ifdef HAVE_WIDECHAR
+		wchar_t wc;
+		len = mbrtowc(&wc, p, size - i, &s);
+
+		if (len == 0)				/* L'\0' */
+			return;
+
+		if (len == (size_t)-1 || len == (size_t)-2) {		/* invalid sequence */
+			memset(&s, 0, sizeof (s));
+			len = hex = 1;
+		} else if (len > 1 && !iswprint(wc)) {	/* non-printable multibyte */
+			hex = 1;
+		}
+		i += len - 1;
+#else
+		len = 1;
+		if (!isprint((unsigned char) *p) &&
+		    !isspace((unsigned char) *p))        /* non-printable */
+			hex = 1;
+#endif
+		if (hex)
+			rc = fwrite_hex(p, len, out);
+		else if (*p == '\n' && *(p + 1) && indent) {
+		        rc = fwrite(p, 1, len, out) != len;
+			if (fprintf(out, "%*s", indent, "") != indent)
+				rc |= 1;
+		}
+		else
+			rc = fwrite(p, 1, len, out) != len;
+		if (rc != 0) {
+			if (errno != EPIPE)
+				err(EXIT_FAILURE, _("write failed"));
+			exit(EXIT_SUCCESS);
+		}
+	}
+}
+
+static const char *skip_item(const char *begin, const char *end, const char *sep)
+{
+	while (begin < end) {
+		int c = *begin++;
+
+		if (c == '\0' || strchr(sep, c))
+			break;
+	}
+
+	return begin;
+}
+
+/*
+ * Parses one record from syslog(2) buffer
+ */
+static int get_next_syslog_record(struct dmesg_control *ctl,
+				  struct dmesg_record *rec)
+{
+	size_t i;
+	const char *begin = NULL;
+
+	if (ctl->method != DMESG_METHOD_MMAP &&
+	    ctl->method != DMESG_METHOD_SYSLOG)
+		return -1;
+
+	if (!rec->next || !rec->next_size)
+		return 1;
+
+	INIT_DMESG_RECORD(rec);
+
+	/*
+	 * Unmap already printed file data from memory
+	 */
+	if (ctl->mmap_buff && (size_t) (rec->next - ctl->mmap_buff) > ctl->pagesize) {
+		void *x = ctl->mmap_buff;
+
+		ctl->mmap_buff += ctl->pagesize;
+		munmap(x, ctl->pagesize);
+	}
+
+	for (i = 0; i < rec->next_size; i++) {
+		const char *p = rec->next + i;
+		const char *end = NULL;
+
+		if (!begin)
+			begin = p;
+		if (i + 1 == rec->next_size) {
+			end = p + 1;
+			i++;
+		} else if (*p == '\n' && *(p + 1) == '<')
+			end = p;
+
+		if (begin && !*begin)
+			begin = NULL;	/* zero(s) at the end of the buffer? */
+		if (!begin || !end)
+			continue;
+		if (end <= begin)
+			continue;	/* error or empty line? */
+
+		if (*begin == '<') {
+			if (ctl->fltr_lev || ctl->fltr_fac || ctl->decode || ctl->color)
+				begin = parse_faclev(begin + 1, &rec->facility,
+						     &rec->level);
+			else
+				begin = skip_item(begin, end, ">");
+		}
+
+		if (*begin == '[' && (*(begin + 1) == ' ' ||
+				      isdigit(*(begin + 1)))) {
+
+			if (!is_timefmt(ctl, NONE))
+				begin = parse_syslog_timestamp(begin + 1, &rec->tv);
+			else
+				begin = skip_item(begin, end, "]");
+
+			if (begin < end && *begin == ' ')
+				begin++;
+		}
+
+		rec->mesg = begin;
+		rec->mesg_size = end - begin;
+
+		/* Don't count \n from the last message to the message size */
+		if (*end != '\n' && *(end - 1) == '\n')
+			rec->mesg_size--;
+
+		rec->next_size -= end - rec->next;
+		rec->next = rec->next_size > 0 ? end + 1 : NULL;
+		if (rec->next_size > 0)
+			rec->next_size--;
+
+		return 0;
+	}
+
+	return 1;
+}
+
+static int accept_record(struct dmesg_control *ctl, struct dmesg_record *rec)
+{
+	if (ctl->fltr_lev && (rec->facility < 0 ||
+			      !isset(ctl->levels, rec->level)))
+		return 0;
+
+	if (ctl->fltr_fac && (rec->facility < 0 ||
+			      !isset(ctl->facilities, rec->facility)))
+		return 0;
+
+	return 1;
+}
+
+static void raw_print(struct dmesg_control *ctl, const char *buf, size_t size)
+{
+	int lastc = '\n';
+
+	if (!ctl->mmap_buff) {
+		/*
+		 * Print whole ring buffer
+		 */
+		safe_fwrite(buf, size, 0, stdout);
+		lastc = buf[size - 1];
+	} else {
+		/*
+		 * Print file in small chunks to save memory
+		 */
+		while (size) {
+			size_t sz = size > ctl->pagesize ? ctl->pagesize : size;
+			char *x = ctl->mmap_buff;
+
+			safe_fwrite(x, sz, 0, stdout);
+			lastc = x[sz - 1];
+			size -= sz;
+			ctl->mmap_buff += sz;
+			munmap(x, sz);
+		}
+	}
+
+	if (lastc != '\n')
+		putchar('\n');
+}
+
+static struct tm *record_localtime(struct dmesg_control *ctl,
+				   struct dmesg_record *rec,
+				   struct tm *tm)
+{
+	time_t t = ctl->boot_time.tv_sec + rec->tv.tv_sec;
+	return localtime_r(&t, tm);
+}
+
+static char *record_ctime(struct dmesg_control *ctl,
+			  struct dmesg_record *rec,
+			  char *buf, size_t bufsiz)
+{
+	struct tm tm;
+
+	record_localtime(ctl, rec, &tm);
+
+	if (strftime(buf, bufsiz, "%a %b %e %H:%M:%S %Y", &tm) == 0)
+		*buf = '\0';
+	return buf;
+}
+
+static char *short_ctime(struct tm *tm, char *buf, size_t bufsiz)
+{
+	if (strftime(buf, bufsiz, "%b%e %H:%M", tm) == 0)
+		*buf = '\0';
+	return buf;
+}
+
+static char *iso_8601_time(struct dmesg_control *ctl, struct dmesg_record *rec,
+			   char *buf, size_t bufsz)
+{
+	struct timeval tv = {
+		.tv_sec = ctl->boot_time.tv_sec + rec->tv.tv_sec,
+		.tv_usec = rec->tv.tv_usec
+	};
+
+	if (strtimeval_iso(&tv,	ISO_TIMESTAMP_COMMA_T, buf, bufsz) != 0)
+		return NULL;
+
+	return buf;
+}
+
+static double record_count_delta(struct dmesg_control *ctl,
+				 struct dmesg_record *rec)
+{
+	double delta = 0;
+
+	if (timerisset(&ctl->lasttime))
+		delta = time_diff(&rec->tv, &ctl->lasttime);
+
+	ctl->lasttime = rec->tv;
+	return delta;
+}
+
+static const char *get_subsys_delimiter(const char *mesg, size_t mesg_size)
+{
+	const char *p = mesg;
+	size_t sz = mesg_size;
+
+	while (sz > 0) {
+		const char *d = strnchr(p, sz, ':');
+		if (!d)
+			return NULL;
+		sz -= d - p + 1;
+		if (sz) {
+			if (isblank(*(d + 1)))
+				return d;
+			p = d + 1;
+		}
+	}
+	return NULL;
+}
+
+static void print_record(struct dmesg_control *ctl,
+			 struct dmesg_record *rec)
+{
+	char buf[128];
+	char fpbuf[32] = "\0";
+	char tsbuf[64] = "\0";
+	size_t mesg_size = rec->mesg_size;
+	int timebreak = 0;
+	char *mesg_copy = NULL;
+	const char *line = NULL;
+
+	if (!accept_record(ctl, rec))
+		return;
+
+	if (!rec->mesg_size) {
+		putchar('\n');
+		return;
+	}
+
+	/*
+	 * Compose syslog(2) compatible raw output -- used for /dev/kmsg for
+	 * backward compatibility with syslog(2) buffers only
+	 */
+	if (ctl->raw) {
+		ctl->indent = snprintf(tsbuf, sizeof(tsbuf),
+				       "<%d>[%5ld.%06ld] ",
+				       LOG_MAKEPRI(rec->facility, rec->level),
+				       (long) rec->tv.tv_sec,
+				       (long) rec->tv.tv_usec);
+		goto full_output;
+	}
+
+	/* Store decode information (facility & priority level) in a buffer */
+	if (ctl->decode &&
+	    (rec->level > -1) && (rec->level < (int) ARRAY_SIZE(level_names)) &&
+	    (rec->facility > -1) &&
+	    (rec->facility < (int) ARRAY_SIZE(facility_names)))
+		snprintf(fpbuf, sizeof(fpbuf), "%-6s:%-6s: ",
+			 facility_names[rec->facility].name,
+			 level_names[rec->level].name);
+
+	/* Store the timestamp in a buffer */
+	switch (ctl->time_fmt) {
+		double delta;
+		struct tm cur;
+	case DMESG_TIMEFTM_NONE:
+		ctl->indent = 0;
+		break;
+	case DMESG_TIMEFTM_CTIME:
+		ctl->indent = snprintf(tsbuf, sizeof(tsbuf), "[%s] ",
+				      record_ctime(ctl, rec, buf, sizeof(buf)));
+		break;
+	case DMESG_TIMEFTM_CTIME_DELTA:
+		ctl->indent = snprintf(tsbuf, sizeof(tsbuf), "[%s <%12.06f>] ",
+				      record_ctime(ctl, rec, buf, sizeof(buf)),
+				      record_count_delta(ctl, rec));
+		break;
+	case DMESG_TIMEFTM_DELTA:
+		ctl->indent = snprintf(tsbuf, sizeof(tsbuf), "[<%12.06f>] ",
+				      record_count_delta(ctl, rec));
+		break;
+	case DMESG_TIMEFTM_RELTIME:
+		record_localtime(ctl, rec, &cur);
+		delta = record_count_delta(ctl, rec);
+		if (cur.tm_min != ctl->lasttm.tm_min ||
+		    cur.tm_hour != ctl->lasttm.tm_hour ||
+		    cur.tm_yday != ctl->lasttm.tm_yday) {
+			timebreak = 1;
+			ctl->indent = snprintf(tsbuf, sizeof(tsbuf), "[%s] ",
+					      short_ctime(&cur, buf,
+							  sizeof(buf)));
+		} else {
+			if (delta < 10)
+				ctl->indent = snprintf(tsbuf, sizeof(tsbuf),
+						"[  %+8.06f] ",  delta);
+			else
+				ctl->indent = snprintf(tsbuf, sizeof(tsbuf),
+						"[ %+9.06f] ", delta);
+		}
+		ctl->lasttm = cur;
+		break;
+	case DMESG_TIMEFTM_TIME:
+		ctl->indent = snprintf(tsbuf, sizeof(tsbuf), "[%5ld.%06ld] ",
+				      (long)rec->tv.tv_sec,
+				      (long)rec->tv.tv_usec);
+		break;
+	case DMESG_TIMEFTM_TIME_DELTA:
+		ctl->indent = snprintf(tsbuf, sizeof(tsbuf), "[%5ld.%06ld <%12.06f>] ",
+				      (long)rec->tv.tv_sec,
+				      (long)rec->tv.tv_usec,
+				      record_count_delta(ctl, rec));
+		break;
+	case DMESG_TIMEFTM_ISO8601:
+		ctl->indent = snprintf(tsbuf, sizeof(tsbuf), "%s ",
+				      iso_8601_time(ctl, rec, buf,
+						    sizeof(buf)));
+		break;
+	default:
+		abort();
+	}
+
+	ctl->indent += strlen(fpbuf);
+
+full_output:
+	/* Output the decode information */
+	if (*fpbuf)
+		fputs(fpbuf, stdout);
+
+	/* Output the timestamp buffer */
+	if (*tsbuf) {
+		/* Colorize the timestamp */
+		if (ctl->color)
+			dmesg_enable_color(timebreak ? DMESG_COLOR_TIMEBREAK :
+						       DMESG_COLOR_TIME);
+		if (ctl->time_fmt != DMESG_TIMEFTM_RELTIME) {
+			fputs(tsbuf, stdout);
+		} else {
+			/*
+			 * For relative timestamping, the first line's
+			 * timestamp is the offset and all other lines will
+			 * report an offset of 0.000000.
+			 */
+			if (!line)
+				fputs(tsbuf, stdout);
+			else
+				printf("[  +0.000000] ");
+		}
+		if (ctl->color)
+			color_disable();
+	}
+
+	/*
+	 * A kernel message may contain several lines of output, separated
+	 * by '\n'.  If the timestamp and decode outputs are forced then each
+	 * line of the message must be displayed with that information.
+	 */
+	if (ctl->force_prefix) {
+		if (!line) {
+			mesg_copy = xstrdup(rec->mesg);
+			line = strtok(mesg_copy, "\n");
+			mesg_size = strlen(line);
+		}
+	} else {
+		line = rec->mesg;
+		mesg_size = rec->mesg_size;
+	}
+
+	/* Colorize kernel message output */
+	if (ctl->color) {
+		/* Subsystem prefix */
+		const char *subsys = get_subsys_delimiter(line, mesg_size);
+		int has_color = 0;
+
+		if (subsys) {
+			dmesg_enable_color(DMESG_COLOR_SUBSYS);
+			safe_fwrite(line, subsys - line, ctl->indent, stdout);
+			color_disable();
+
+			mesg_size -= subsys - line;
+			line = subsys;
+		}
+		/* Error, alert .. etc. colors */
+		has_color = set_level_color(rec->level, line, mesg_size) == 0;
+		safe_fwrite(line, mesg_size, ctl->indent, stdout);
+		if (has_color)
+			color_disable();
+	} else
+		safe_fwrite(line, mesg_size, ctl->indent, stdout);
+
+	/* Get the next line */
+	if (ctl->force_prefix) {
+		line = strtok(NULL, "\n");
+		if (line && *line) {
+			putchar('\n');
+			mesg_size = strlen(line);
+			goto full_output;
+		}
+		free(mesg_copy);
+	}
+
+	putchar('\n');
+}
+
+/*
+ * Prints the 'buf' kernel ring buffer; the messages are filtered out according
+ * to 'levels' and 'facilities' bitarrays.
+ */
+static void print_buffer(struct dmesg_control *ctl,
+			const char *buf, size_t size)
+{
+	struct dmesg_record rec = { .next = buf, .next_size = size };
+
+	if (ctl->raw) {
+		raw_print(ctl, buf, size);
+		return;
+	}
+
+	while (get_next_syslog_record(ctl, &rec) == 0)
+		print_record(ctl, &rec);
+}
+
+static ssize_t read_kmsg_one(struct dmesg_control *ctl)
+{
+	ssize_t size;
+
+	/* kmsg returns EPIPE if record was modified while reading */
+	do {
+		size = read(ctl->kmsg, ctl->kmsg_buf,
+			    sizeof(ctl->kmsg_buf) - 1);
+	} while (size < 0 && errno == EPIPE);
+
+	return size;
+}
+
+static int init_kmsg(struct dmesg_control *ctl)
+{
+	int mode = O_RDONLY;
+
+	if (!ctl->follow)
+		mode |= O_NONBLOCK;
+	else
+		setlinebuf(stdout);
+
+	ctl->kmsg = open("/dev/kmsg", mode);
+	if (ctl->kmsg < 0)
+		return -1;
+
+	/*
+	 * Seek after the last record available at the time
+	 * the last SYSLOG_ACTION_CLEAR was issued.
+	 *
+	 * ... otherwise SYSLOG_ACTION_CLEAR will have no effect for kmsg.
+	 */
+	lseek(ctl->kmsg, 0, SEEK_DATA);
+
+	/*
+	 * Old kernels (<3.5) allow to successfully open /dev/kmsg for
+	 * read-only, but read() returns -EINVAL :-(((
+	 *
+	 * Let's try to read the first record. The record is later processed in
+	 * read_kmsg().
+	 */
+	ctl->kmsg_first_read = read_kmsg_one(ctl);
+	if (ctl->kmsg_first_read < 0) {
+		close(ctl->kmsg);
+		ctl->kmsg = -1;
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * /dev/kmsg record format:
+ *
+ *     faclev,seqnum,timestamp[optional, ...];message\n
+ *      TAGNAME=value
+ *      ...
+ *
+ * - fields are separated by ','
+ * - last field is terminated by ';'
+ *
+ */
+#define LAST_KMSG_FIELD(s)	(!s || !*s || *(s - 1) == ';')
+
+static int parse_kmsg_record(struct dmesg_control *ctl,
+			     struct dmesg_record *rec,
+			     char *buf,
+			     size_t sz)
+{
+	const char *p = buf, *end;
+
+	if (sz == 0 || !buf || !*buf)
+		return -1;
+
+	end = buf + (sz - 1);
+	INIT_DMESG_RECORD(rec);
+
+	while (p < end && isspace(*p))
+		p++;
+
+	/* A) priority and facility */
+	if (ctl->fltr_lev || ctl->fltr_fac || ctl->decode ||
+	    ctl->raw || ctl->color)
+		p = parse_faclev(p, &rec->facility, &rec->level);
+	else
+		p = skip_item(p, end, ",");
+	if (LAST_KMSG_FIELD(p))
+		goto mesg;
+
+	/* B) sequence number */
+	p = skip_item(p, end, ",;");
+	if (LAST_KMSG_FIELD(p))
+		goto mesg;
+
+	/* C) timestamp */
+	if (is_timefmt(ctl, NONE))
+		p = skip_item(p, end, ",;");
+	else
+		p = parse_kmsg_timestamp(p, &rec->tv);
+	if (LAST_KMSG_FIELD(p))
+		goto mesg;
+
+	/* D) optional fields (ignore) */
+	p = skip_item(p, end, ";");
+
+mesg:
+	/* E) message text */
+	rec->mesg = p;
+	p = skip_item(p, end, "\n");
+	if (!p)
+		return -1;
+
+	/* The message text is terminated by \n, but it's possible that the
+	 * message contains another stuff behind this linebreak; in this case
+	 * the previous skip_item() returns pointer to the stuff behind \n.
+	 * Let's normalize all these situations and make sure we always point to
+	 * the \n.
+	 *
+	 * Note that the next unhexmangle_to_buffer() will replace \n by \0.
+	 */
+	if (*p && *p != '\n')
+		p--;
+
+	/*
+	 * Kernel escapes non-printable characters, unfortunately kernel
+	 * definition of "non-printable" is too strict. On UTF8 console we can
+	 * print many chars, so let's decode from kernel.
+	 */
+	rec->mesg_size = unhexmangle_to_buffer(rec->mesg,
+				(char *) rec->mesg, p - rec->mesg + 1);
+
+	rec->mesg_size--;	/* don't count \0 */
+
+	/* F) message tags (ignore) */
+
+	return 0;
+}
+
+/*
+ * Note that each read() call for /dev/kmsg returns always one record. It means
+ * that we don't have to read whole message buffer before the records parsing.
+ *
+ * So this function does not compose one huge buffer (like read_syslog_buffer())
+ * and print_buffer() is unnecessary. All is done in this function.
+ *
+ * Returns 0 on success, -1 on error.
+ */
+static int read_kmsg(struct dmesg_control *ctl)
+{
+	struct dmesg_record rec;
+	ssize_t sz;
+
+	if (ctl->method != DMESG_METHOD_KMSG || ctl->kmsg < 0)
+		return -1;
+
+	/*
+	 * The very first read() call is done in kmsg_init() where we test
+	 * /dev/kmsg usability. The return code from the initial read() is
+	 * stored in ctl->kmsg_first_read;
+	 */
+	sz = ctl->kmsg_first_read;
+
+	while (sz > 0) {
+		*(ctl->kmsg_buf + sz) = '\0';	/* for debug messages */
+
+		if (parse_kmsg_record(ctl, &rec,
+				      ctl->kmsg_buf, (size_t) sz) == 0)
+			print_record(ctl, &rec);
+
+		sz = read_kmsg_one(ctl);
+	}
+
+	return 0;
+}
+
+static int which_time_format(const char *s)
+{
+	if (!strcmp(s, "notime"))
+		return DMESG_TIMEFTM_NONE;
+	if (!strcmp(s, "ctime"))
+		return DMESG_TIMEFTM_CTIME;
+	if (!strcmp(s, "delta"))
+		return DMESG_TIMEFTM_DELTA;
+	if (!strcmp(s, "reltime"))
+		return DMESG_TIMEFTM_RELTIME;
+	if (!strcmp(s, "iso"))
+		return DMESG_TIMEFTM_ISO8601;
+	errx(EXIT_FAILURE, _("unknown time format: %s"), s);
+}
+
+#ifdef TEST_DMESG
+static inline int dmesg_get_boot_time(struct timeval *tv)
+{
+	char *str = getenv("DMESG_TEST_BOOTIME");
+	uintmax_t sec, usec;
+
+	if (str && sscanf(str, "%ju.%ju", &sec, &usec) == 2) {
+		tv->tv_sec = sec;
+		tv->tv_usec = usec;
+		return tv->tv_sec >= 0 && tv->tv_usec >= 0 ? 0 : -EINVAL;
+	}
+
+	return get_boot_time(tv);
+}
+#else
+# define dmesg_get_boot_time	get_boot_time
+#endif
+
+int main(int argc, char *argv[])
+{
+	char *buf = NULL;
+	int  c, nopager = 0;
+	int  console_level = 0;
+	int  klog_rc = 0;
+	int  delta = 0;
+	ssize_t n;
+	static struct dmesg_control ctl = {
+		.filename = NULL,
+		.action = SYSLOG_ACTION_READ_ALL,
+		.method = DMESG_METHOD_KMSG,
+		.kmsg = -1,
+		.time_fmt = DMESG_TIMEFTM_TIME,
+		.indent = 0,
+	};
+	int colormode = UL_COLORMODE_UNDEF;
+	enum {
+		OPT_TIME_FORMAT = CHAR_MAX + 1,
+	};
+
+	static const struct option longopts[] = {
+		{ "buffer-size",   required_argument, NULL, 's' },
+		{ "clear",         no_argument,	      NULL, 'C' },
+		{ "color",         optional_argument, NULL, 'L' },
+		{ "console-level", required_argument, NULL, 'n' },
+		{ "console-off",   no_argument,       NULL, 'D' },
+		{ "console-on",    no_argument,       NULL, 'E' },
+		{ "decode",        no_argument,	      NULL, 'x' },
+		{ "file",          required_argument, NULL, 'F' },
+		{ "facility",      required_argument, NULL, 'f' },
+		{ "follow",        no_argument,       NULL, 'w' },
+		{ "human",         no_argument,       NULL, 'H' },
+		{ "help",          no_argument,	      NULL, 'h' },
+		{ "kernel",        no_argument,       NULL, 'k' },
+		{ "level",         required_argument, NULL, 'l' },
+		{ "syslog",        no_argument,       NULL, 'S' },
+		{ "raw",           no_argument,       NULL, 'r' },
+		{ "read-clear",    no_argument,	      NULL, 'c' },
+		{ "reltime",       no_argument,       NULL, 'e' },
+		{ "show-delta",    no_argument,	      NULL, 'd' },
+		{ "ctime",         no_argument,       NULL, 'T' },
+		{ "notime",        no_argument,       NULL, 't' },
+		{ "nopager",       no_argument,       NULL, 'P' },
+		{ "userspace",     no_argument,       NULL, 'u' },
+		{ "version",       no_argument,	      NULL, 'V' },
+		{ "time-format",   required_argument, NULL, OPT_TIME_FORMAT },
+		{ "force-prefix",  no_argument,       NULL, 'p' },
+		{ NULL,	           0, NULL, 0 }
+	};
+
+	static const ul_excl_t excl[] = {	/* rows and cols in ASCII order */
+		{ 'C','D','E','c','n','r' },	/* clear,off,on,read-clear,level,raw*/
+		{ 'H','r' },			/* human, raw */
+		{ 'L','r' },			/* color, raw */
+		{ 'S','w' },			/* syslog,follow */
+		{ 'T','r' },			/* ctime, raw */
+		{ 'd','r' },			/* delta, raw */
+		{ 'e','r' },			/* reltime, raw */
+		{ 'r','x' },			/* raw, decode */
+		{ 'r','t' },			/* notime, raw */
+		{ 0 }
+	};
+	int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	while ((c = getopt_long(argc, argv, "CcDdEeF:f:HhkL::l:n:iPprSs:TtuVwx",
+				longopts, NULL)) != -1) {
+
+		err_exclusive_options(c, longopts, excl, excl_st);
+
+		switch (c) {
+		case 'C':
+			ctl.action = SYSLOG_ACTION_CLEAR;
+			break;
+		case 'c':
+			ctl.action = SYSLOG_ACTION_READ_CLEAR;
+			break;
+		case 'D':
+			ctl.action = SYSLOG_ACTION_CONSOLE_OFF;
+			break;
+		case 'd':
+			delta = 1;
+			break;
+		case 'E':
+			ctl.action = SYSLOG_ACTION_CONSOLE_ON;
+			break;
+		case 'e':
+			ctl.time_fmt = DMESG_TIMEFTM_RELTIME;
+			break;
+		case 'F':
+			ctl.filename = optarg;
+			ctl.method = DMESG_METHOD_MMAP;
+			break;
+		case 'f':
+			ctl.fltr_fac = 1;
+			if (string_to_bitarray(optarg,
+					     ctl.facilities, parse_facility) < 0)
+				return EXIT_FAILURE;
+			break;
+		case 'H':
+			ctl.time_fmt = DMESG_TIMEFTM_RELTIME;
+			colormode = UL_COLORMODE_AUTO;
+			ctl.pager = 1;
+			break;
+		case 'h':
+			usage();
+			break;
+		case 'k':
+			ctl.fltr_fac = 1;
+			setbit(ctl.facilities, FAC_BASE(LOG_KERN));
+			break;
+		case 'L':
+			colormode = UL_COLORMODE_AUTO;
+			if (optarg)
+				colormode = colormode_or_err(optarg,
+						_("unsupported color mode"));
+			break;
+		case 'l':
+			ctl.fltr_lev= 1;
+			if (string_to_bitarray(optarg,
+					     ctl.levels, parse_level) < 0)
+				return EXIT_FAILURE;
+			break;
+		case 'n':
+			ctl.action = SYSLOG_ACTION_CONSOLE_LEVEL;
+			console_level = parse_level(optarg, 0);
+			break;
+		case 'P':
+			nopager = 1;
+			break;
+		case 'p':
+			ctl.force_prefix = 1;
+			break;
+		case 'r':
+			ctl.raw = 1;
+			break;
+		case 'S':
+			ctl.method = DMESG_METHOD_SYSLOG;
+			break;
+		case 's':
+			ctl.bufsize = strtou32_or_err(optarg,
+					_("invalid buffer size argument"));
+			if (ctl.bufsize < 4096)
+				ctl.bufsize = 4096;
+			break;
+		case 'T':
+			ctl.time_fmt = DMESG_TIMEFTM_CTIME;
+			break;
+		case 't':
+			ctl.time_fmt = DMESG_TIMEFTM_NONE;
+			break;
+		case 'u':
+			ctl.fltr_fac = 1;
+			for (n = 1; (size_t) n < ARRAY_SIZE(facility_names); n++)
+				setbit(ctl.facilities, n);
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		case 'w':
+			ctl.follow = 1;
+			break;
+		case 'x':
+			ctl.decode = 1;
+			break;
+		case OPT_TIME_FORMAT:
+			ctl.time_fmt = which_time_format(optarg);
+			break;
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if (argc != optind) {
+		warnx(_("bad usage"));
+		errtryhelp(EXIT_FAILURE);
+	}
+
+	if ((is_timefmt(&ctl, RELTIME) ||
+	     is_timefmt(&ctl, CTIME)   ||
+	     is_timefmt(&ctl, ISO8601))
+	    && dmesg_get_boot_time(&ctl.boot_time) != 0)
+		ctl.time_fmt = DMESG_TIMEFTM_NONE;
+
+	if (delta)
+		switch (ctl.time_fmt) {
+		case DMESG_TIMEFTM_CTIME:
+			ctl.time_fmt = DMESG_TIMEFTM_CTIME_DELTA;
+			break;
+		case DMESG_TIMEFTM_TIME:
+			ctl.time_fmt = DMESG_TIMEFTM_TIME_DELTA;
+			break;
+		case DMESG_TIMEFTM_ISO8601:
+			warnx(_("--show-delta is ignored when used together with iso8601 time format"));
+			break;
+		default:
+			ctl.time_fmt = DMESG_TIMEFTM_DELTA;
+		}
+
+
+	ctl.color = colors_init(colormode, "dmesg") ? 1 : 0;
+	if (ctl.follow)
+		nopager = 1;
+	ctl.pager = nopager ? 0 : ctl.pager;
+	if (ctl.pager)
+		pager_redirect();
+
+	switch (ctl.action) {
+	case SYSLOG_ACTION_READ_ALL:
+	case SYSLOG_ACTION_READ_CLEAR:
+		if (ctl.method == DMESG_METHOD_KMSG && init_kmsg(&ctl) != 0)
+			ctl.method = DMESG_METHOD_SYSLOG;
+
+		if (ctl.raw
+		    && ctl.method != DMESG_METHOD_KMSG
+		    && (ctl.fltr_lev || ctl.fltr_fac))
+			    errx(EXIT_FAILURE, _("--raw can be used together with --level or "
+				 "--facility only when reading messages from /dev/kmsg"));
+
+		/* only kmsg supports multi-line messages */
+		if (ctl.force_prefix && ctl.method != DMESG_METHOD_KMSG)
+			ctl.force_prefix = 0;
+
+		if (ctl.pager)
+			pager_redirect();
+		n = read_buffer(&ctl, &buf);
+		if (n > 0)
+			print_buffer(&ctl, buf, n);
+		if (!ctl.mmap_buff)
+			free(buf);
+		if (n < 0)
+			err(EXIT_FAILURE, _("read kernel buffer failed"));
+		if (ctl.kmsg >= 0)
+			close(ctl.kmsg);
+		break;
+	case SYSLOG_ACTION_CLEAR:
+	case SYSLOG_ACTION_CONSOLE_OFF:
+	case SYSLOG_ACTION_CONSOLE_ON:
+		klog_rc = klogctl(ctl.action, NULL, 0);
+		break;
+	case SYSLOG_ACTION_CONSOLE_LEVEL:
+		klog_rc = klogctl(ctl.action, NULL, console_level);
+		break;
+	default:
+		errx(EXIT_FAILURE, _("unsupported command"));
+		break;
+	}
+
+
+	if (klog_rc)
+		err(EXIT_FAILURE, _("klogctl failed"));
+
+	return EXIT_SUCCESS;
+}
diff --git a/sys-utils/eject.1 b/sys-utils/eject.1
new file mode 100644
index 0000000..f901b23
--- /dev/null
+++ b/sys-utils/eject.1
@@ -0,0 +1,187 @@
+.\" Copyright (C) 1994-2005 Jeff Tranter (tranter@pobox.com)
+.\" Copyright (C) 2012 Karel Zak <kzak@redhat.com>
+.\"
+.\" It may be distributed under the GNU Public License, version 2, or
+.\" any higher version. See section COPYING of the GNU Public license
+.\" for conditions under which this file may be redistributed.
+.TH EJECT 1 "April 2012" "Linux" "User Commands"
+.SH NAME
+eject \- eject removable media
+.SH SYNOPSIS
+.B eject
+[options]
+.IR device | mountpoint
+.SH DESCRIPTION
+.B eject
+allows removable media (typically a CD-ROM, floppy disk, tape, JAZ, ZIP or USB
+disk) to be ejected under software control.  The command can also control some
+multi-disc CD-ROM changers, the auto-eject feature supported by some devices,
+and close the disc tray of some CD-ROM drives.
+.PP
+The device corresponding to \fIdevice\fP or \fImountpoint\fP is ejected.  If no
+name is specified, the default name \fB/dev/cdrom\fR is used.  The device may be
+addressed by device name (e.g. 'sda'), device path (e.g. '/dev/sda'),
+UUID=\fIuuid\fR or LABEL=\fIlabel\fR tags.
+.PP
+There are four different methods of ejecting, depending on whether the device
+is a CD-ROM, SCSI device, removable floppy, or tape.  By default \fBeject\fR tries
+all four methods in order until it succeeds.
+.PP
+If a device partition is specified, the whole-disk device is used.  If the device
+or a device partition is currently mounted, it is unmounted before ejecting.
+.SH OPTIONS
+.TP
+.BR \-a , " \-\-auto on" | off
+This option controls the auto-eject mode, supported by some devices.  When
+enabled, the drive automatically ejects when the device is closed.
+.TP
+.BR \-c , " \-\-changerslot " \fIslot
+With this option a CD slot can be selected from an ATAPI/IDE CD-ROM changer.
+The CD-ROM drive cannot be in use (mounted data CD or playing a music CD) for
+a change request to work. Please also note that the first slot of the changer
+is referred to as 0, not 1.
+.TP
+.BR \-d , " \-\-default"
+List the default device name.
+.TP
+.BR \-F , " \-\-force"
+Force eject, don't check device type.
+.TP
+.BR \-f , " \-\-floppy"
+This option specifies that the drive should be ejected using a removable floppy
+disk eject command.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.TP
+.BR \-i , " \-\-manualeject on" | off
+This option controls locking of the hardware eject button.  When enabled, the
+drive will not be ejected when the button is pressed.  This is useful when you
+are carrying a laptop in a bag or case and don't want it to eject if the button
+is inadvertently pressed.
+.TP
+.BR \-M , " \-\-no\-partitions\-unmount"
+The option tells eject to not try to unmount other partitions on partitioned
+devices.  If another partition is still mounted, the program will not attempt
+to eject the media.  It will attempt to unmount only the device or mountpoint
+given on the command line.
+.TP
+.BR \-m , " \-\-no\-unmount"
+The option tells eject to not try to unmount at all.
+.TP
+.BR \-n , " \-\-noop"
+With this option the selected device is displayed but no action is performed.
+.TP
+.BR \-p , " \-\-proc"
+This option allows you to use /proc/mounts instead /etc/mtab.  It also passes the
+\fB\-n\fR option to \fBumount\fR(8).
+.TP
+.BR \-q , " \-\-tape"
+This option specifies that the drive should be ejected using a tape drive
+offline command.
+.TP
+.BR \-r , " \-\-cdrom"
+This option specifies that the drive should be ejected using a CDROM eject
+command.
+.TP
+.BR \-s , " \-\-scsi"
+This option specifies that the drive should be ejected using SCSI commands.
+.TP
+.BR \-T , " \-\-traytoggle"
+With this option the drive is given a CD-ROM tray close command if it's opened,
+and a CD-ROM tray eject command if it's closed.  Not all devices support this
+command, because it uses the above CD-ROM tray close command.
+.TP
+.BR \-t , " \-\-trayclose"
+With this option the drive is given a CD-ROM tray close command.  Not all
+devices support this command.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-v , " \-\-verbose"
+Run in verbose mode; more information is displayed about what the command is
+doing.
+.TP
+.BR \-X , " \-\-listspeed"
+With this option the CD-ROM drive will be probed to detect the available
+speeds.  The output is a list of speeds which can be used as an argument of the
+\fB\-x\fR option.  This only works with Linux 2.6.13 or higher, on previous versions
+solely the maximum speed will be reported.  Also note that some drives may not
+correctly report the speed and therefore this option does not work with them.
+.TP
+.BR \-x , " \-\-cdspeed " \fIspeed
+With this option the drive is given a CD-ROM select speed command.  The
+.I speed
+argument is a number indicating the desired speed (e.g. 8 for 8X speed), or 0
+for maximum data rate.  Not all devices support this command and you can only
+specify speeds that the drive is capable of.  Every time the media is changed
+this option is cleared.  This option can be used alone, or with the
+\fB\-t\fR and \fB\-c\fR options.
+.SH EXIT STATUS
+Returns 0 if operation was successful, 1 if operation failed or command syntax
+was not valid.
+.SH NOTES
+.B eject
+only works with devices that support one or more of the four methods of
+ejecting.  This includes most CD-ROM drives (IDE, SCSI, and proprietary), some
+SCSI tape drives, JAZ drives, ZIP drives (parallel port, SCSI, and IDE
+versions), and LS120 removable floppies.  Users have also reported success with
+floppy drives on Sun SPARC and Apple Macintosh systems.  If
+.B eject
+does not work, it is most likely a limitation of the kernel driver for the
+device and not the
+.B eject
+program itself.
+.PP
+The \fB\-r\fR, \fB\-s\fR, \fB\-f\fR, and \fB\-q\fR options allow controlling
+which methods are used to
+eject.  More than one method can be specified.  If none of these options are
+specified, it tries all four (this works fine in most cases).
+.PP
+.B eject
+may not always be able to determine if the device is mounted (e.g. if it has
+several names).  If the device name is a symbolic link,
+.B eject
+will follow the link and use the device that it points to.
+.PP
+If
+.B eject
+determines that the device can have multiple partitions, it will attempt to
+unmount all mounted partitions of the device before ejecting (see also
+\fB--no-partitions-unmount\fR).  If an unmount fails, the program will not
+attempt to eject the media.
+.PP
+You can eject an audio CD.  Some CD-ROM drives will refuse to open the tray if
+the drive is empty.  Some devices do not support the tray close command.
+.PP
+If the auto-eject feature is enabled, then the drive will always be ejected
+after running this command.  Not all Linux kernel CD-ROM drivers support the
+auto-eject mode.  There is no way to find out the state of the auto-eject mode.
+.PP
+You need appropriate privileges to access the device files.  Running as root is
+required to eject some devices (e.g. SCSI devices).
+.SH AUTHORS
+.MT tranter@\:pobox.com
+Jeff Tranter
+.ME
+- original author.
+.br
+.MT kzak@\:redhat.com
+Karel Zak
+.ME
+and
+.MT mluscon@\:redhat.com
+Michal Luscon
+.ME
+- util-linux version.
+.SH SEE ALSO
+.BR findmnt (8),
+.BR lsblk (8),
+.BR mount (8),
+.BR umount (8)
+.SH AVAILABILITY
+The eject command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/eject.c b/sys-utils/eject.c
new file mode 100644
index 0000000..8196b60
--- /dev/null
+++ b/sys-utils/eject.c
@@ -0,0 +1,1044 @@
+/*
+ * Copyright (C) 1994-2005 Jeff Tranter (tranter@pobox.com)
+ * Copyright (C) 2012 Karel Zak <kzak@redhat.com>
+ * Copyright (C) Michal Luscon <mluscon@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <err.h>
+#include <stdarg.h>
+
+#include <getopt.h>
+#include <errno.h>
+#include <regex.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/wait.h>
+#include <sys/mtio.h>
+#include <linux/cdrom.h>
+#include <linux/fd.h>
+#include <sys/mount.h>
+#include <scsi/scsi.h>
+#include <scsi/sg.h>
+#include <scsi/scsi_ioctl.h>
+#include <sys/time.h>
+
+#include <libmount.h>
+
+#include "c.h"
+#include "closestream.h"
+#include "nls.h"
+#include "strutils.h"
+#include "xalloc.h"
+#include "pathnames.h"
+#include "sysfs.h"
+#include "monotonic.h"
+
+/*
+ * sg_io_hdr_t driver_status -- see kernel include/scsi/scsi.h
+ */
+#ifndef DRIVER_SENSE
+# define DRIVER_SENSE	0x08
+#endif
+
+
+#define EJECT_DEFAULT_DEVICE "/dev/cdrom"
+
+
+/* Used by the toggle_tray() function. If ejecting the tray takes this
+ * time or less, the tray was probably already ejected, so we close it
+ * again.
+ */
+#define TRAY_WAS_ALREADY_OPEN_USECS  200000	/* about 0.2 seconds */
+
+struct eject_control {
+	struct libmnt_table *mtab;
+	char *device;			/* device or mount point to be ejected */
+	int fd;				/* file descriptor for device */
+	unsigned int 			/* command flags and arguments */
+		a_option:1,
+		c_option:1,
+		d_option:1,
+		F_option:1,
+		f_option:1,
+		i_option:1,
+		M_option:1,
+		m_option:1,
+		n_option:1,
+		p_option:1,
+		q_option:1,
+		r_option:1,
+		s_option:1,
+		T_option:1,
+		t_option:1,
+		v_option:1,
+		X_option:1,
+		x_option:1,
+		a_arg:1,
+		i_arg:1;
+	long int c_arg;			/* changer slot number */
+	long int x_arg;			/* cd speed */
+};
+
+static void vinfo(const char *fmt, va_list va)
+{
+	fprintf(stdout, "%s: ", program_invocation_short_name);
+	vprintf(fmt, va);
+	fputc('\n', stdout);
+}
+
+static inline void verbose(const struct eject_control *ctl, const char *fmt, ...)
+{
+	va_list va;
+
+	if (!ctl->v_option)
+		return;
+
+	va_start(va, fmt);
+	vinfo(fmt, va);
+	va_end(va);
+}
+
+static inline void info(const char *fmt, ...)
+{
+	va_list va;
+	va_start(va, fmt);
+	vinfo(fmt, va);
+	va_end(va);
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	fputs(USAGE_HEADER, out);
+	fprintf(out,
+		_(" %s [options] [<device>|<mountpoint>]\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Eject removable media.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -a, --auto <on|off>         turn auto-eject feature on or off\n"
+		" -c, --changerslot <slot>    switch discs on a CD-ROM changer\n"
+		" -d, --default               display default device\n"
+		" -f, --floppy                eject floppy\n"
+		" -F, --force                 don't care about device type\n"
+		" -i, --manualeject <on|off>  toggle manual eject protection on/off\n"
+		" -m, --no-unmount            do not unmount device even if it is mounted\n"
+		" -M, --no-partitions-unmount do not unmount another partitions\n"
+		" -n, --noop                  don't eject, just show device found\n"
+		" -p, --proc                  use /proc/mounts instead of /etc/mtab\n"
+		" -q, --tape                  eject tape\n"
+		" -r, --cdrom                 eject CD-ROM\n"
+		" -s, --scsi                  eject SCSI device\n"
+		" -t, --trayclose             close tray\n"
+		" -T, --traytoggle            toggle tray\n"
+		" -v, --verbose               enable verbose output\n"
+		" -x, --cdspeed <speed>       set CD-ROM max speed\n"
+		" -X, --listspeed             list CD-ROM available speeds\n"),
+		out);
+
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(29));
+
+	fputs(_("\nBy default tries -r, -s, -f, and -q in order until success.\n"), out);
+	printf(USAGE_MAN_TAIL("eject(1)"));
+
+	exit(EXIT_SUCCESS);
+}
+
+
+/* Handle command line options. */
+static void parse_args(struct eject_control *ctl, int argc, char **argv)
+{
+	static const struct option long_opts[] =
+	{
+		{"auto",	required_argument, NULL, 'a'},
+		{"cdrom",	no_argument,	   NULL, 'r'},
+		{"cdspeed",	required_argument, NULL, 'x'},
+		{"changerslot", required_argument, NULL, 'c'},
+		{"default",	no_argument,	   NULL, 'd'},
+		{"floppy",	no_argument,	   NULL, 'f'},
+		{"force",       no_argument,       NULL, 'F'},
+		{"help",	no_argument,	   NULL, 'h'},
+		{"listspeed",   no_argument,       NULL, 'X'},
+		{"manualeject", required_argument, NULL, 'i'},
+		{"noop",	no_argument,	   NULL, 'n'},
+		{"no-unmount",	no_argument,	   NULL, 'm'},
+		{"no-partitions-unmount", no_argument, NULL, 'M' },
+		{"proc",	no_argument,	   NULL, 'p'},
+		{"scsi",	no_argument,	   NULL, 's'},
+		{"tape",	no_argument,	   NULL, 'q'},
+		{"trayclose",	no_argument,	   NULL, 't'},
+		{"traytoggle",	no_argument,	   NULL, 'T'},
+		{"verbose",	no_argument,	   NULL, 'v'},
+		{"version",	no_argument,	   NULL, 'V'},
+		{NULL, 0, NULL, 0}
+	};
+	int c;
+
+	while ((c = getopt_long(argc, argv,
+				"a:c:i:x:dfFhnqrstTXvVpmM", long_opts, NULL)) != -1) {
+		switch (c) {
+		case 'a':
+			ctl->a_option = 1;
+			ctl->a_arg = parse_switch(optarg, _("argument error"),
+						"on", "off",  "1", "0",  NULL);
+			break;
+		case 'c':
+			ctl->c_option = 1;
+			ctl->c_arg = strtoul_or_err(optarg, _("invalid argument to --changerslot/-c option"));
+			break;
+		case 'x':
+			ctl->x_option = 1;
+			ctl->x_arg = strtoul_or_err(optarg, _("invalid argument to --cdspeed/-x option"));
+			break;
+		case 'd':
+			ctl->d_option = 1;
+			break;
+		case 'f':
+			ctl->f_option = 1;
+			break;
+		case 'F':
+			ctl->F_option = 1;
+			break;
+		case 'h':
+			usage();
+			break;
+		case 'i':
+			ctl->i_option = 1;
+			ctl->i_arg = parse_switch(optarg, _("argument error"),
+						"on", "off",  "1", "0",  NULL);
+			break;
+		case 'm':
+			ctl->m_option = 1;
+			break;
+		case 'M':
+			ctl->M_option = 1;
+			break;
+		case 'n':
+			ctl->n_option = 1;
+			break;
+		case 'p':
+			ctl->p_option = 1;
+			break;
+		case 'q':
+			ctl->q_option = 1;
+			break;
+		case 'r':
+			ctl->r_option = 1;
+			break;
+		case 's':
+			ctl->s_option = 1;
+			break;
+		case 't':
+			ctl->t_option = 1;
+			break;
+		case 'T':
+			ctl->T_option = 1;
+			break;
+		case 'X':
+			ctl->X_option = 1;
+			break;
+		case 'v':
+			ctl->v_option = 1;
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			exit(EXIT_SUCCESS);
+			break;
+		default:
+			errtryhelp(EXIT_FAILURE);
+			break;
+		}
+	}
+
+	/* check for a single additional argument */
+	if ((argc - optind) > 1)
+		errx(EXIT_FAILURE, _("too many arguments"));
+
+	if ((argc - optind) == 1)
+		ctl->device = xstrdup(argv[optind]);
+}
+
+/*
+ * Given name, such as foo, see if any of the following exist:
+ *
+ * foo (if foo starts with '.' or '/')
+ * /dev/foo
+ *
+ * If found, return the full path. If not found, return 0.
+ * Returns pointer to dynamically allocated string.
+ */
+static char *find_device(const char *name)
+{
+	if (!name)
+		return NULL;
+
+	if ((*name == '.' || *name == '/') && access(name, F_OK) == 0)
+		return xstrdup(name);
+	else {
+		char buf[PATH_MAX];
+
+		snprintf(buf, sizeof(buf), "/dev/%s", name);
+		if (access(buf, F_OK) == 0)
+			return xstrdup(buf);
+	}
+
+	return NULL;
+}
+
+/* Set or clear auto-eject mode. */
+static void auto_eject(const struct eject_control *ctl)
+{
+	int status = -1;
+
+#if defined(CDROM_SET_OPTIONS) && defined(CDROM_CLEAR_OPTIONS)
+	if (ctl->a_arg)
+		status = ioctl(ctl->fd, CDROM_SET_OPTIONS, CDO_AUTO_EJECT);
+	else
+		status = ioctl(ctl->fd, CDROM_CLEAR_OPTIONS, CDO_AUTO_EJECT);
+#else
+	errno = ENOSYS;
+#endif
+	if (status < 0)
+		err(EXIT_FAILURE,_("CD-ROM auto-eject command failed"));
+}
+
+/*
+ * Stops CDROM from opening on manual eject button press.
+ * This can be useful when you carry your laptop
+ * in your bag while it's on and no CD inserted in it's drive.
+ * Implemented as found in Documentation/ioctl/cdrom.txt
+ */
+static void manual_eject(const struct eject_control *ctl)
+{
+	if (ioctl(ctl->fd, CDROM_LOCKDOOR, ctl->i_arg) < 0) {
+		switch (errno) {
+		case EDRIVE_CANT_DO_THIS:
+			errx(EXIT_FAILURE, _("CD-ROM door lock is not supported"));
+		case EBUSY:
+			errx(EXIT_FAILURE, _("other users have the drive open and not CAP_SYS_ADMIN"));
+		default:
+			err(EXIT_FAILURE, _("CD-ROM lock door command failed"));
+		}
+	}
+
+	if (ctl->i_arg)
+		info(_("CD-Drive may NOT be ejected with device button"));
+	else
+		info(_("CD-Drive may be ejected with device button"));
+}
+
+/*
+ * Changer select. CDROM_SELECT_DISC is preferred, older kernels used
+ * CDROMLOADFROMSLOT.
+ */
+static void changer_select(const struct eject_control *ctl)
+{
+#ifdef CDROM_SELECT_DISC
+	if (ioctl(ctl->fd, CDROM_SELECT_DISC, ctl->c_arg) < 0)
+		err(EXIT_FAILURE, _("CD-ROM select disc command failed"));
+
+#elif defined CDROMLOADFROMSLOT
+	if (ioctl(ctl->fd, CDROMLOADFROMSLOT, ctl->c_arg) != 0)
+		err(EXIT_FAILURE, _("CD-ROM load from slot command failed"));
+#else
+	warnx(_("IDE/ATAPI CD-ROM changer not supported by this kernel\n") );
+#endif
+}
+
+/*
+ * Close tray. Not supported by older kernels.
+ */
+static void close_tray(int fd)
+{
+	int status;
+
+#if defined(CDROMCLOSETRAY) || defined(CDIOCCLOSE)
+#if defined(CDROMCLOSETRAY)
+	status = ioctl(fd, CDROMCLOSETRAY);
+#elif defined(CDIOCCLOSE)
+	status = ioctl(fd, CDIOCCLOSE);
+#endif
+	if (status != 0)
+		err(EXIT_FAILURE, _("CD-ROM tray close command failed"));
+#else
+	warnx(_("CD-ROM tray close command not supported by this kernel\n"));
+#endif
+}
+
+/*
+ * Eject using CDROMEJECT ioctl.
+ */
+static int eject_cdrom(int fd)
+{
+#if defined(CDROMEJECT)
+	int ret = ioctl(fd, CDROM_LOCKDOOR, 0);
+	if (ret < 0)
+		return 0;
+	return ioctl(fd, CDROMEJECT) >= 0;
+#elif defined(CDIOCEJECT)
+	return ioctl(fd, CDIOCEJECT) >= 0;
+#else
+	warnx(_("CD-ROM eject unsupported"));
+	errno = ENOSYS;
+	return 0;
+#endif
+}
+
+/*
+ * Toggle tray.
+ *
+ * Written by Benjamin Schwenk <benjaminschwenk@yahoo.de> and
+ * Sybren Stuvel <sybren@thirdtower.com>
+ *
+ * Not supported by older kernels because it might use
+ * CloseTray().
+ *
+ */
+static void toggle_tray(int fd)
+{
+#ifdef CDROM_DRIVE_STATUS
+	/* First ask the CDROM for info, otherwise fall back to manual.  */
+	switch (ioctl(fd, CDROM_DRIVE_STATUS)) {
+	case CDS_TRAY_OPEN:
+		close_tray(fd);
+		return;
+
+	case CDS_NO_DISC:
+	case CDS_DISC_OK:
+		if (!eject_cdrom(fd))
+			err(EXIT_FAILURE, _("CD-ROM eject command failed"));
+		return;
+	case CDS_NO_INFO:
+		warnx(_("no CD-ROM information available"));
+		return;
+	case CDS_DRIVE_NOT_READY:
+		warnx(_("CD-ROM drive is not ready"));
+		return;
+	default:
+		err(EXIT_FAILURE, _("CD-ROM status command failed"));
+	}
+#else
+	struct timeval time_start, time_stop;
+	int time_elapsed;
+
+	/* Try to open the CDROM tray and measure the time therefor
+	 * needed.  In my experience the function needs less than 0.05
+	 * seconds if the tray was already open, and at least 1.5 seconds
+	 * if it was closed.  */
+	gettime_monotonic(&time_start);
+
+	/* Send the CDROMEJECT command to the device. */
+	if (!eject_cdrom(fd))
+		err(EXIT_FAILURE, _("CD-ROM eject command failed"));
+
+	/* Get the second timestamp, to measure the time needed to open
+	 * the tray.  */
+	gettime_monotonic(&time_stop);
+
+	time_elapsed = (time_stop.tv_sec * 1000000 + time_stop.tv_usec) -
+		(time_start.tv_sec * 1000000 + time_start.tv_usec);
+
+	/* If the tray "opened" too fast, we can be nearly sure, that it
+	 * was already open. In this case, close it now. Else the tray was
+	 * closed before. This would mean that we are done.  */
+	if (time_elapsed < TRAY_WAS_ALREADY_OPEN_USECS)
+		close_tray(fd);
+#endif
+}
+
+/*
+ * Select Speed of CD-ROM drive.
+ * Thanks to Roland Krivanek (krivanek@fmph.uniba.sk)
+ * http://dmpc.dbp.fmph.uniba.sk/~krivanek/cdrom_speed/
+ */
+static void select_speed(const struct eject_control *ctl)
+{
+#ifdef CDROM_SELECT_SPEED
+	if (ioctl(ctl->fd, CDROM_SELECT_SPEED, ctl->x_arg) != 0)
+		err(EXIT_FAILURE, _("CD-ROM select speed command failed"));
+#else
+	warnx(_("CD-ROM select speed command not supported by this kernel"));
+#endif
+}
+
+/*
+ * Read Speed of CD-ROM drive. From Linux 2.6.13, the current speed
+ * is correctly reported
+ */
+static int read_speed(const char *devname)
+{
+	int drive_number = -1;
+	char *name;
+	FILE *f;
+
+	f = fopen(_PATH_PROC_CDROMINFO, "r");
+	if (!f)
+		err(EXIT_FAILURE, _("cannot open %s"), _PATH_PROC_CDROMINFO);
+
+	name = strrchr(devname, '/') + 1;
+
+	while (name && !feof(f)) {
+		char line[512];
+		char *str;
+
+		if (!fgets(line, sizeof(line), f))
+			break;
+
+		/* find drive number in line "drive name" */
+		if (drive_number == -1) {
+			if (strncmp(line, "drive name:", 11) == 0) {
+				str = strtok(&line[11], "\t ");
+				drive_number = 0;
+				while (str && strncmp(name, str, strlen(name)) != 0) {
+					drive_number++;
+					str = strtok(NULL, "\t ");
+					if (!str)
+						errx(EXIT_FAILURE,
+						     _("%s: failed to finding CD-ROM name"),
+						     _PATH_PROC_CDROMINFO);
+				}
+			}
+		/* find line "drive speed" and read the correct speed */
+		} else {
+			if (strncmp(line, "drive speed:", 12) == 0) {
+				int i;
+
+				str = strtok(&line[12], "\t ");
+				for (i = 1; i < drive_number; i++)
+					str = strtok(NULL, "\t ");
+
+				if (!str)
+					errx(EXIT_FAILURE,
+						_("%s: failed to read speed"),
+						_PATH_PROC_CDROMINFO);
+				fclose(f);
+				return atoi(str);
+			}
+		}
+	}
+
+	errx(EXIT_FAILURE, _("failed to read speed"));
+}
+
+/*
+ * List Speed of CD-ROM drive.
+ */
+static void list_speeds(struct eject_control *ctl)
+{
+	int max_speed, curr_speed = 0;
+
+	select_speed(ctl);
+	max_speed = read_speed(ctl->device);
+
+	while (curr_speed < max_speed) {
+		ctl->x_arg = curr_speed + 1;
+		select_speed(ctl);
+		curr_speed = read_speed(ctl->device);
+		if (ctl->x_arg < curr_speed)
+			printf("%d ", curr_speed);
+		else
+			curr_speed = ctl->x_arg + 1;
+	}
+
+	printf("\n");
+}
+
+/*
+ * Eject using SCSI SG_IO commands. Return 1 if successful, 0 otherwise.
+ */
+static int eject_scsi(const struct eject_control *ctl)
+{
+	int status, k;
+	sg_io_hdr_t io_hdr;
+	unsigned char allowRmBlk[6] = {ALLOW_MEDIUM_REMOVAL, 0, 0, 0, 0, 0};
+	unsigned char startStop1Blk[6] = {START_STOP, 0, 0, 0, 1, 0};
+	unsigned char startStop2Blk[6] = {START_STOP, 0, 0, 0, 2, 0};
+	unsigned char inqBuff[2];
+	unsigned char sense_buffer[32];
+
+	if ((ioctl(ctl->fd, SG_GET_VERSION_NUM, &k) < 0) || (k < 30000)) {
+		verbose(ctl, _("not an sg device, or old sg driver"));
+		return 0;
+	}
+
+	memset(&io_hdr, 0, sizeof(sg_io_hdr_t));
+	io_hdr.interface_id = 'S';
+	io_hdr.cmd_len = 6;
+	io_hdr.mx_sb_len = sizeof(sense_buffer);
+	io_hdr.dxfer_direction = SG_DXFER_NONE;
+	io_hdr.dxfer_len = 0;
+	io_hdr.dxferp = inqBuff;
+	io_hdr.sbp = sense_buffer;
+	io_hdr.timeout = 10000;
+
+	io_hdr.cmdp = allowRmBlk;
+	status = ioctl(ctl->fd, SG_IO, (void *)&io_hdr);
+	if (status < 0 || io_hdr.host_status || io_hdr.driver_status)
+		return 0;
+
+	io_hdr.cmdp = startStop1Blk;
+	status = ioctl(ctl->fd, SG_IO, (void *)&io_hdr);
+	if (status < 0 || io_hdr.host_status)
+		return 0;
+
+	/* Ignore errors when there is not medium -- in this case driver sense
+	 * buffer sets MEDIUM NOT PRESENT (3a) bit. For more details see:
+	 * http://www.tldp.org/HOWTO/archived/SCSI-Programming-HOWTO/SCSI-Programming-HOWTO-22.html#sec-sensecodes
+	 * -- kzak Jun 2013
+	 */
+	if (io_hdr.driver_status != 0 &&
+	    !(io_hdr.driver_status == DRIVER_SENSE && io_hdr.sbp &&
+		                                      io_hdr.sbp[12] == 0x3a))
+		return 0;
+
+	io_hdr.cmdp = startStop2Blk;
+	status = ioctl(ctl->fd, SG_IO, (void *)&io_hdr);
+	if (status < 0 || io_hdr.host_status || io_hdr.driver_status)
+		return 0;
+
+	/* force kernel to reread partition table when new disc inserted */
+	ioctl(ctl->fd, BLKRRPART);
+	return 1;
+}
+
+/*
+ * Eject using FDEJECT ioctl. Return 1 if successful, 0 otherwise.
+ */
+static int eject_floppy(int fd)
+{
+	return ioctl(fd, FDEJECT) >= 0;
+}
+
+
+/*
+ * Rewind and eject using tape ioctl. Return 1 if successful, 0 otherwise.
+ */
+static int eject_tape(int fd)
+{
+	struct mtop op = { .mt_op = MTOFFL, .mt_count = 0 };
+
+	return ioctl(fd, MTIOCTOP, &op) >= 0;
+}
+
+
+/* umount a device. */
+static void umount_one(const struct eject_control *ctl, const char *name)
+{
+	int status;
+
+	if (!name)
+		return;
+
+	verbose(ctl, _("%s: unmounting"), name);
+
+	switch (fork()) {
+	case 0: /* child */
+		if (setgid(getgid()) < 0)
+			err(EXIT_FAILURE, _("cannot set group id"));
+
+		if (setuid(getuid()) < 0)
+			err(EXIT_FAILURE, _("cannot set user id"));
+
+		if (ctl->p_option)
+			execl("/bin/umount", "/bin/umount", name, "-n", NULL);
+		else
+			execl("/bin/umount", "/bin/umount", name, NULL);
+
+		errexec("/bin/umount");
+
+	case -1:
+		warn( _("unable to fork"));
+		break;
+
+	default: /* parent */
+		wait(&status);
+		if (WIFEXITED(status) == 0)
+			errx(EXIT_FAILURE,
+			     _("unmount of `%s' did not exit normally"), name);
+
+		if (WEXITSTATUS(status) != 0)
+			errx(EXIT_FAILURE, _("unmount of `%s' failed\n"), name);
+		break;
+	}
+}
+
+/* Open a device file. */
+static void open_device(struct eject_control *ctl)
+{
+	ctl->fd = open(ctl->device, O_RDWR | O_NONBLOCK);
+	if (ctl->fd < 0)
+		ctl->fd = open(ctl->device, O_RDONLY | O_NONBLOCK);
+	if (ctl->fd == -1)
+		err(EXIT_FAILURE, _("cannot open %s"), ctl->device);
+}
+
+/*
+ * See if device has been mounted by looking in mount table.  If so, set
+ * device name and mount point name, and return 1, otherwise return 0.
+ */
+static int device_get_mountpoint(struct eject_control *ctl, char **devname, char **mnt)
+{
+	struct libmnt_fs *fs;
+	int rc;
+
+	*mnt = NULL;
+
+	if (!ctl->mtab) {
+		struct libmnt_cache *cache;
+
+		ctl->mtab = mnt_new_table();
+		if (!ctl->mtab)
+			err(EXIT_FAILURE, _("failed to initialize libmount table"));
+
+		cache = mnt_new_cache();
+		mnt_table_set_cache(ctl->mtab, cache);
+		mnt_unref_cache(cache);
+
+		if (ctl->p_option)
+			rc = mnt_table_parse_file(ctl->mtab, _PATH_PROC_MOUNTINFO);
+		else
+			rc = mnt_table_parse_mtab(ctl->mtab, NULL);
+		if (rc)
+			err(EXIT_FAILURE, _("failed to parse mount table"));
+	}
+
+	fs = mnt_table_find_source(ctl->mtab, *devname, MNT_ITER_BACKWARD);
+	if (!fs) {
+		/* maybe 'devname' is mountpoint rather than a real device */
+		fs = mnt_table_find_target(ctl->mtab, *devname, MNT_ITER_BACKWARD);
+		if (fs) {
+			free(*devname);
+			*devname = xstrdup(mnt_fs_get_source(fs));
+		}
+	}
+
+	if (fs)
+		*mnt = xstrdup(mnt_fs_get_target(fs));
+	return *mnt ? 0 : -1;
+}
+
+static char *get_disk_devname(const char *device)
+{
+	struct stat st;
+	dev_t diskno = 0;
+	char diskname[128];
+
+	if (stat(device, &st) != 0)
+		return NULL;
+
+	/* get whole-disk devno */
+	if (sysfs_devno_to_wholedisk(st.st_rdev, diskname,
+				sizeof(diskname), &diskno) != 0)
+		return NULL;
+
+	return st.st_rdev == diskno ? NULL : find_device(diskname);
+}
+
+static int umount_partitions(struct eject_control *ctl)
+{
+	struct path_cxt *pc = NULL;
+	dev_t devno;
+	DIR *dir = NULL;
+	struct dirent *d;
+	int count = 0;
+
+	devno = sysfs_devname_to_devno(ctl->device);
+	if (devno)
+		pc = ul_new_sysfs_path(devno, NULL, NULL);
+	if (!pc)
+		return 0;
+
+	/* open /sys/block/<wholedisk> */
+	if (!(dir = ul_path_opendir(pc, NULL)))
+		goto done;
+
+	/* scan for partition subdirs */
+	while ((d = readdir(dir))) {
+		if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
+			continue;
+
+		if (sysfs_blkdev_is_partition_dirent(dir, d, ctl->device)) {
+			char *mnt = NULL;
+			char *dev = find_device(d->d_name);
+
+			if (dev && device_get_mountpoint(ctl, &dev, &mnt) == 0) {
+				verbose(ctl, _("%s: mounted on %s"), dev, mnt);
+				if (!ctl->M_option)
+					umount_one(ctl, mnt);
+				count++;
+			}
+			free(dev);
+			free(mnt);
+		}
+	}
+
+done:
+	if (dir)
+		closedir(dir);
+	ul_unref_path(pc);
+
+	return count;
+}
+
+static int is_hotpluggable(const struct eject_control *ctl)
+{
+	struct path_cxt *pc = NULL;
+	dev_t devno;
+	int rc = 0;
+
+	devno = sysfs_devname_to_devno(ctl->device);
+	if (devno)
+		pc = ul_new_sysfs_path(devno, NULL, NULL);
+	if (!pc)
+		return 0;
+
+	rc = sysfs_blkdev_is_hotpluggable(pc);
+	ul_unref_path(pc);
+	return rc;
+}
+
+
+/* handle -x option */
+static void set_device_speed(struct eject_control *ctl)
+{
+	if (!ctl->x_option)
+		return;
+
+	if (ctl->x_arg == 0)
+		verbose(ctl, _("setting CD-ROM speed to auto"));
+	else
+		verbose(ctl, _("setting CD-ROM speed to %ldX"), ctl->x_arg);
+
+	open_device(ctl);
+	select_speed(ctl);
+	exit(EXIT_SUCCESS);
+}
+
+
+/* main program */
+int main(int argc, char **argv)
+{
+	char *disk = NULL;
+	char *mountpoint = NULL;
+	int worked = 0;    /* set to 1 when successfully ejected */
+	struct eject_control ctl = { NULL };
+
+	setlocale(LC_ALL,"");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	/* parse the command line arguments */
+	parse_args(&ctl, argc, argv);
+
+	/* handle -d option */
+	if (ctl.d_option) {
+		info(_("default device: `%s'"), EJECT_DEFAULT_DEVICE);
+		return EXIT_SUCCESS;
+	}
+
+	if (!ctl.device) {
+		ctl.device = mnt_resolve_path(EJECT_DEFAULT_DEVICE, NULL);
+		verbose(&ctl, _("using default device `%s'"), ctl.device);
+	} else {
+		char *p;
+
+		if (ctl.device[strlen(ctl.device) - 1] == '/')
+			ctl.device[strlen(ctl.device) - 1] = '\0';
+
+		/* figure out full device or mount point name */
+		p = find_device(ctl.device);
+		if (p)
+			free(ctl.device);
+		else
+			p = ctl.device;
+
+		ctl.device = mnt_resolve_spec(p, NULL);
+		free(p);
+	}
+
+	if (!ctl.device)
+		errx(EXIT_FAILURE, _("%s: unable to find device"), ctl.device);
+
+	verbose(&ctl, _("device name is `%s'"), ctl.device);
+
+	device_get_mountpoint(&ctl, &ctl.device, &mountpoint);
+	if (mountpoint)
+		verbose(&ctl, _("%s: mounted on %s"), ctl.device, mountpoint);
+	else
+		verbose(&ctl, _("%s: not mounted"), ctl.device);
+
+	disk = get_disk_devname(ctl.device);
+	if (disk) {
+		verbose(&ctl, _("%s: disc device: %s (disk device will be used for eject)"), ctl.device, disk);
+		free(ctl.device);
+		ctl.device = disk;
+		disk = NULL;
+	} else {
+		struct stat st;
+
+		if (stat(ctl.device, &st) != 0 || !S_ISBLK(st.st_mode))
+			errx(EXIT_FAILURE, _("%s: not found mountpoint or device "
+					"with the given name"), ctl.device);
+
+		verbose(&ctl, _("%s: is whole-disk device"), ctl.device);
+	}
+
+	if (ctl.F_option == 0 && is_hotpluggable(&ctl) == 0)
+		errx(EXIT_FAILURE, _("%s: is not hot-pluggable device"), ctl.device);
+
+	/* handle -n option */
+	if (ctl.n_option) {
+		info(_("device is `%s'"), ctl.device);
+		verbose(&ctl, _("exiting due to -n/--noop option"));
+		return EXIT_SUCCESS;
+	}
+
+	/* handle -i option */
+	if (ctl.i_option) {
+		open_device(&ctl);
+		manual_eject(&ctl);
+		return EXIT_SUCCESS;
+	}
+
+	/* handle -a option */
+	if (ctl.a_option) {
+		if (ctl.a_arg)
+			verbose(&ctl, _("%s: enabling auto-eject mode"), ctl.device);
+		else
+			verbose(&ctl, _("%s: disabling auto-eject mode"), ctl.device);
+		open_device(&ctl);
+		auto_eject(&ctl);
+		return EXIT_SUCCESS;
+	}
+
+	/* handle -t option */
+	if (ctl.t_option) {
+		verbose(&ctl, _("%s: closing tray"), ctl.device);
+		open_device(&ctl);
+		close_tray(ctl.fd);
+		set_device_speed(&ctl);
+		return EXIT_SUCCESS;
+	}
+
+	/* handle -T option */
+	if (ctl.T_option) {
+		verbose(&ctl, _("%s: toggling tray"), ctl.device);
+		open_device(&ctl);
+		toggle_tray(ctl.fd);
+		set_device_speed(&ctl);
+		return EXIT_SUCCESS;
+	}
+
+	/* handle -X option */
+	if (ctl.X_option) {
+		verbose(&ctl, _("%s: listing CD-ROM speed"), ctl.device);
+		open_device(&ctl);
+		list_speeds(&ctl);
+		return EXIT_SUCCESS;
+	}
+
+	/* handle -x option only */
+	if (!ctl.c_option)
+		set_device_speed(&ctl);
+
+
+	/*
+	 * Unmount all partitions if -m is not specified; or umount given
+	 * mountpoint if -M is specified, otherwise print error of another
+	 * partition is mounted.
+	 */
+	if (!ctl.m_option) {
+		int ct = umount_partitions(&ctl);
+
+		if (ct == 0 && mountpoint)
+			umount_one(&ctl, mountpoint); /* probably whole-device */
+
+		if (ctl.M_option) {
+			if (ct == 1 && mountpoint)
+				umount_one(&ctl, mountpoint);
+			else if (ct)
+				errx(EXIT_FAILURE, _("error: %s: device in use"), ctl.device);
+		}
+	}
+
+	/* handle -c option */
+	if (ctl.c_option) {
+		verbose(&ctl, _("%s: selecting CD-ROM disc #%ld"), ctl.device, ctl.c_arg);
+		open_device(&ctl);
+		changer_select(&ctl);
+		set_device_speed(&ctl);
+		return EXIT_SUCCESS;
+	}
+
+	/* if user did not specify type of eject, try all four methods */
+	if (ctl.r_option + ctl.s_option + ctl.f_option + ctl.q_option == 0)
+		ctl.r_option = ctl.s_option = ctl.f_option = ctl.q_option = 1;
+
+	/* open device */
+	open_device(&ctl);
+
+	/* try various methods of ejecting until it works */
+	if (ctl.r_option) {
+		verbose(&ctl, _("%s: trying to eject using CD-ROM eject command"), ctl.device);
+		worked = eject_cdrom(ctl.fd);
+		verbose(&ctl, worked ? _("CD-ROM eject command succeeded") :
+				 _("CD-ROM eject command failed"));
+	}
+
+	if (ctl.s_option && !worked) {
+		verbose(&ctl, _("%s: trying to eject using SCSI commands"), ctl.device);
+		worked = eject_scsi(&ctl);
+		verbose(&ctl, worked ? _("SCSI eject succeeded") :
+				 _("SCSI eject failed"));
+	}
+
+	if (ctl.f_option && !worked) {
+		verbose(&ctl, _("%s: trying to eject using floppy eject command"), ctl.device);
+		worked = eject_floppy(ctl.fd);
+		verbose(&ctl, worked ? _("floppy eject command succeeded") :
+				 _("floppy eject command failed"));
+	}
+
+	if (ctl.q_option && !worked) {
+		verbose(&ctl, _("%s: trying to eject using tape offline command"), ctl.device);
+		worked = eject_tape(ctl.fd);
+		verbose(&ctl, worked ? _("tape offline command succeeded") :
+				 _("tape offline command failed"));
+	}
+
+	if (!worked)
+		errx(EXIT_FAILURE, _("unable to eject"));
+
+	/* cleanup */
+	close(ctl.fd);
+	free(ctl.device);
+	free(mountpoint);
+
+	mnt_unref_table(ctl.mtab);
+
+	return EXIT_SUCCESS;
+}
diff --git a/sys-utils/fallocate.1 b/sys-utils/fallocate.1
new file mode 100644
index 0000000..fe5072a
--- /dev/null
+++ b/sys-utils/fallocate.1
@@ -0,0 +1,191 @@
+.TH FALLOCATE 1 "April 2014" "util-linux" "User Commands"
+.SH NAME
+fallocate \- preallocate or deallocate space to a file
+.SH SYNOPSIS
+.B fallocate
+.RB [ \-c | \-p | \-z ]
+.RB [ \-o
+.IR offset ]
+.B \-l
+.I length
+.RB [ \-n ]
+.I filename
+.PP
+.B fallocate \-d
+.RB [ \-o
+.IR offset ]
+.RB [ \-l
+.IR length ]
+.I filename
+.PP
+.B fallocate \-x
+.RB [ \-o
+.IR offset ]
+.B \-l
+.I length
+.I filename
+.SH DESCRIPTION
+.B fallocate
+is used to manipulate the allocated disk space for a file,
+either to deallocate or preallocate it.
+For filesystems which support the fallocate system call,
+preallocation is done quickly by allocating blocks and marking them as
+uninitialized, requiring no IO to the data blocks.
+This is much faster than creating a file by filling it with zeroes.
+.PP
+The exit code returned by
+.B fallocate
+is 0 on success and 1 on failure.
+.SH OPTIONS
+The
+.I length
+and
+.I offset
+arguments may be followed by the multiplicative suffixes KiB (=1024),
+MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB, and YiB (the "iB" is
+optional, e.g., "K" has the same meaning as "KiB") or the suffixes
+KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB, and YB.
+.PP
+The options
+.BR \-\-collapse\-range ", " \-\-dig\-holes ", " \-\-punch\-hole ,
+and
+.B \-\-zero\-range
+are mutually exclusive.
+.TP
+.BR \-c ", " \-\-collapse\-range
+Removes a byte range from a file, without leaving a hole.
+The byte range to be collapsed starts at
+.I offset
+and continues for
+.I length
+bytes.
+At the completion of the operation,
+the contents of the file starting at the location
+.IR offset + length
+will be appended at the location
+.IR offset ,
+and the file will be
+.I length
+bytes smaller.
+The option
+.B \-\-keep\-size
+may not be specified for the collapse-range operation.
+.sp
+Available since Linux 3.15 for ext4 (only for extent-based files) and XFS.
+.sp
+A filesystem may place limitations on the granularity of the operation, in
+order to ensure efficient implementation.  Typically, offset and len must be a
+multiple of the filesystem logical block size, which varies according to the
+filesystem type and configuration.  If a filesystem has such a requirement,
+the operation will fail with the error EINVAL if this requirement is violated.
+.TP
+.BR \-d ", " \-\-dig\-holes
+Detect and dig holes.
+This makes the file sparse in-place, without using extra disk space.
+The minimum size of the hole depends on filesystem I/O block size
+(usually 4096 bytes).
+Also, when using this option,
+.B \-\-keep\-size
+is implied.  If no range is specified by
+.B \-\-offset
+and
+.BR \-\-length ,
+then the entire file is analyzed for holes.
+.sp
+You can think of this option as doing a
+.RB """" "cp \-\-sparse" """"
+and then renaming the destination file to the original,
+without the need for extra disk space.
+.sp
+See \fB\-\-punch\-hole\fP for a list of supported filesystems.
+.TP
+.BR \-i ", " \-\-insert\-range
+Insert a hole of
+.I length
+bytes from
+.IR offset ,
+shifting existing data.
+.TP
+.BR \-l ", " "\-\-length " \fIlength
+Specifies the length of the range, in bytes.
+.TP
+.BR \-n ", " \-\-keep\-size
+Do not modify the apparent length of the file.  This may effectively allocate
+blocks past EOF, which can be removed with a truncate.
+.TP
+.BR \-o ", " "\-\-offset " \fIoffset
+Specifies the beginning offset of the range, in bytes.
+.TP
+.BR \-p ", " \-\-punch\-hole
+Deallocates space (i.e., creates a hole) in the byte range starting at
+.I offset
+and continuing for
+.I length
+bytes.
+Within the specified range, partial filesystem blocks are zeroed,
+and whole filesystem blocks are removed from the file.
+After a successful call,
+subsequent reads from this range will return zeroes.
+This option may not be specified at the same time as the
+.B \-\-zero\-range
+option.
+Also, when using this option,
+.B \-\-keep\-size
+is implied.
+.sp
+Supported for XFS (since Linux 2.6.38), ext4 (since Linux 3.0),
+Btrfs (since Linux 3.7) and tmpfs (since Linux 3.5).
+.TP
+.BR \-v ", " \-\-verbose
+Enable verbose mode.
+.TP
+.BR \-x ", " \-\-posix
+Enable POSIX operation mode.
+In that mode allocation operation always completes,
+but it may take longer time when fast allocation is not supported by
+the underlying filesystem.
+.TP
+.BR \-z ", " \-\-zero\-range
+Zeroes space in the byte range starting at
+.I offset
+and continuing for
+.I length
+bytes.
+Within the specified range, blocks are preallocated for the regions
+that span the holes in the file.
+After a successful call,
+subsequent reads from this range will return zeroes.
+.sp
+Zeroing is done within the filesystem preferably by converting the
+range into unwritten extents.  This approach means that the specified
+range will not be physically zeroed out on the device (except for
+partial blocks at the either end of the range), and I/O is
+(otherwise) required only to update metadata.
+.sp
+Option \fB\-\-keep\-size\fP can be specified to prevent file length
+modification.
+.sp
+Available since Linux 3.14 for ext4 (only for extent-based files) and XFS.
+.TP
+.BR \-V ", " \-\-version
+Display version information and exit.
+.TP
+.BR \-h ", " \-\-help
+Display help text and exit.
+.SH AUTHORS
+.MT sandeen@redhat.com
+Eric Sandeen
+.ME
+.br
+.MT kzak@redhat.com
+Karel Zak
+.ME
+.SH SEE ALSO
+.BR truncate (1),
+.BR fallocate (2),
+.BR posix_fallocate (3)
+.SH AVAILABILITY
+The fallocate command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/fallocate.c b/sys-utils/fallocate.c
new file mode 100644
index 0000000..ba3867c
--- /dev/null
+++ b/sys-utils/fallocate.c
@@ -0,0 +1,412 @@
+/*
+ * fallocate - utility to use the fallocate system call
+ *
+ * Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved.
+ * Written by Eric Sandeen <sandeen@redhat.com>
+ *            Karel Zak <kzak@redhat.com>
+ *
+ * cvtnum routine taken from xfsprogs,
+ * Copyright (c) 2003-2005 Silicon Graphics, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <limits.h>
+#include <string.h>
+
+#ifndef HAVE_FALLOCATE
+# include <sys/syscall.h>
+#endif
+
+#if defined(HAVE_LINUX_FALLOC_H) && \
+    (!defined(FALLOC_FL_KEEP_SIZE) || !defined(FALLOC_FL_PUNCH_HOLE) || \
+     !defined(FALLOC_FL_COLLAPSE_RANGE) || !defined(FALLOC_FL_ZERO_RANGE) || \
+     !defined(FALLOC_FL_INSERT_RANGE))
+# include <linux/falloc.h>	/* non-libc fallback for FALLOC_FL_* flags */
+#endif
+
+
+#ifndef FALLOC_FL_KEEP_SIZE
+# define FALLOC_FL_KEEP_SIZE		0x1
+#endif
+
+#ifndef FALLOC_FL_PUNCH_HOLE
+# define FALLOC_FL_PUNCH_HOLE		0x2
+#endif
+
+#ifndef FALLOC_FL_COLLAPSE_RANGE
+# define FALLOC_FL_COLLAPSE_RANGE	0x8
+#endif
+
+#ifndef FALLOC_FL_ZERO_RANGE
+# define FALLOC_FL_ZERO_RANGE		0x10
+#endif
+
+#ifndef FALLOC_FL_INSERT_RANGE
+# define FALLOC_FL_INSERT_RANGE		0x20
+#endif
+
+#include "nls.h"
+#include "strutils.h"
+#include "c.h"
+#include "closestream.h"
+#include "xalloc.h"
+#include "optutils.h"
+
+static int verbose;
+static char *filename;
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	fputs(USAGE_HEADER, out);
+	fprintf(out,
+	      _(" %s [options] <filename>\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Preallocate space to, or deallocate space from a file.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -c, --collapse-range remove a range from the file\n"), out);
+	fputs(_(" -d, --dig-holes      detect zeroes and replace with holes\n"), out);
+	fputs(_(" -i, --insert-range   insert a hole at range, shifting existing data\n"), out);
+	fputs(_(" -l, --length <num>   length for range operations, in bytes\n"), out);
+	fputs(_(" -n, --keep-size      maintain the apparent size of the file\n"), out);
+	fputs(_(" -o, --offset <num>   offset for range operations, in bytes\n"), out);
+	fputs(_(" -p, --punch-hole     replace a range with a hole (implies -n)\n"), out);
+	fputs(_(" -z, --zero-range     zero and ensure allocation of a range\n"), out);
+#ifdef HAVE_POSIX_FALLOCATE
+	fputs(_(" -x, --posix          use posix_fallocate(3) instead of fallocate(2)\n"), out);
+#endif
+	fputs(_(" -v, --verbose        verbose mode\n"), out);
+
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(22));
+
+	printf(USAGE_MAN_TAIL("fallocate(1)"));
+
+	exit(EXIT_SUCCESS);
+}
+
+static loff_t cvtnum(char *s)
+{
+	uintmax_t x;
+
+	if (strtosize(s, &x))
+		return -1LL;
+
+	return x;
+}
+
+static void xfallocate(int fd, int mode, off_t offset, off_t length)
+{
+	int error;
+
+#ifdef HAVE_FALLOCATE
+	error = fallocate(fd, mode, offset, length);
+#else
+	error = syscall(SYS_fallocate, fd, mode, offset, length);
+#endif
+	/*
+	 * EOPNOTSUPP: The FALLOC_FL_KEEP_SIZE is unsupported
+	 * ENOSYS: The filesystem does not support sys_fallocate
+	 */
+	if (error < 0) {
+		if ((mode & FALLOC_FL_KEEP_SIZE) && errno == EOPNOTSUPP)
+			errx(EXIT_FAILURE, _("fallocate failed: keep size mode is unsupported"));
+		err(EXIT_FAILURE, _("fallocate failed"));
+	}
+}
+
+#ifdef HAVE_POSIX_FALLOCATE
+static void xposix_fallocate(int fd, off_t offset, off_t length)
+{
+	int error = posix_fallocate(fd, offset, length);
+	if (error < 0) {
+		err(EXIT_FAILURE, _("fallocate failed"));
+	}
+}
+#endif
+
+/* The real buffer size has to be bufsize + sizeof(uintptr_t) */
+static int is_nul(void *buf, size_t bufsize)
+{
+	typedef uintptr_t word;
+	void const *vp;
+	char const *cbuf = buf, *cp;
+	word const *wp = buf;
+
+	/* set sentinel */
+	memset((char *) buf + bufsize, '\1', sizeof(word));
+
+	/* Find first nonzero *word*, or the word with the sentinel.  */
+	while (*wp++ == 0)
+		continue;
+
+	/* Find the first nonzero *byte*, or the sentinel.  */
+	vp = wp - 1;
+	cp = vp;
+
+	while (*cp++ == 0)
+		continue;
+
+	return cbuf + bufsize < cp;
+}
+
+static void dig_holes(int fd, off_t file_off, off_t len)
+{
+	off_t file_end = len ? file_off + len : 0;
+	off_t hole_start = 0, hole_sz = 0;
+	uintmax_t ct = 0;
+	size_t  bufsz;
+	char *buf;
+	struct stat st;
+#if defined(POSIX_FADV_SEQUENTIAL) && defined(HAVE_POSIX_FADVISE)
+	off_t cache_start = file_off;
+	/*
+	 * We don't want to call POSIX_FADV_DONTNEED to discard cached
+	 * data in PAGE_SIZE steps. IMHO it's overkill (too many syscalls).
+	 *
+	 * Let's assume that 1MiB (on system with 4K page size) is just
+	 * a good compromise.
+	 *					    -- kzak Feb-2014
+	 */
+	const size_t cachesz = getpagesize() * 256;
+#endif
+
+	if (fstat(fd, &st) != 0)
+		err(EXIT_FAILURE, _("stat of %s failed"), filename);
+
+	bufsz = st.st_blksize;
+
+	if (lseek(fd, file_off, SEEK_SET) < 0)
+		err(EXIT_FAILURE, _("seek on %s failed"), filename);
+
+	/* buffer + extra space for is_nul() sentinel */
+	buf = xmalloc(bufsz + sizeof(uintptr_t));
+	while (file_end == 0 || file_off < file_end) {
+		/*
+		 * Detect data area (skip holes)
+		 */
+		off_t end, off;
+
+		off = lseek(fd, file_off, SEEK_DATA);
+		if ((off == -1 && errno == ENXIO) ||
+		    (file_end && off >= file_end))
+			break;
+
+		end = lseek(fd, off, SEEK_HOLE);
+		if (file_end && end > file_end)
+			end = file_end;
+
+#if defined(POSIX_FADV_SEQUENTIAL) && defined(HAVE_POSIX_FADVISE)
+		posix_fadvise(fd, off, end, POSIX_FADV_SEQUENTIAL);
+#endif
+		/*
+		 * Dig holes in the area
+		 */
+		while (off < end) {
+			ssize_t rsz = pread(fd, buf, bufsz, off);
+			if (rsz < 0 && errno)
+				err(EXIT_FAILURE, _("%s: read failed"), filename);
+			if (end && rsz > 0 && off > end - rsz)
+				rsz = end - off;
+			if (rsz <= 0)
+				break;
+
+			if (is_nul(buf, rsz)) {
+				if (!hole_sz)				/* new hole detected */
+					hole_start = off;
+				hole_sz += rsz;
+			 } else if (hole_sz) {
+				xfallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE,
+					   hole_start, hole_sz);
+				ct += hole_sz;
+				hole_sz = hole_start = 0;
+			}
+
+#if defined(POSIX_FADV_DONTNEED) && defined(HAVE_POSIX_FADVISE)
+			/* discard cached data */
+			if (off - cache_start > (off_t) cachesz) {
+				size_t clen = off - cache_start;
+
+				clen = (clen / cachesz) * cachesz;
+				posix_fadvise(fd, cache_start, clen, POSIX_FADV_DONTNEED);
+				cache_start = cache_start + clen;
+			}
+#endif
+			off += rsz;
+		}
+		if (hole_sz) {
+			xfallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE,
+					hole_start, hole_sz);
+			ct += hole_sz;
+		}
+		file_off = off;
+	}
+
+	free(buf);
+
+	if (verbose) {
+		char *str = size_to_human_string(SIZE_SUFFIX_3LETTER | SIZE_SUFFIX_SPACE, ct);
+		fprintf(stdout, _("%s: %s (%ju bytes) converted to sparse holes.\n"),
+				filename, str, ct);
+		free(str);
+	}
+}
+
+int main(int argc, char **argv)
+{
+	int	c;
+	int	fd;
+	int	mode = 0;
+	int	dig = 0;
+	int posix = 0;
+	loff_t	length = -2LL;
+	loff_t	offset = 0;
+
+	static const struct option longopts[] = {
+	    { "help",           no_argument,       NULL, 'h' },
+	    { "version",        no_argument,       NULL, 'V' },
+	    { "keep-size",      no_argument,       NULL, 'n' },
+	    { "punch-hole",     no_argument,       NULL, 'p' },
+	    { "collapse-range", no_argument,       NULL, 'c' },
+	    { "dig-holes",      no_argument,       NULL, 'd' },
+	    { "insert-range",   no_argument,       NULL, 'i' },
+	    { "zero-range",     no_argument,       NULL, 'z' },
+	    { "offset",         required_argument, NULL, 'o' },
+	    { "length",         required_argument, NULL, 'l' },
+	    { "posix",          no_argument,       NULL, 'x' },
+	    { "verbose",        no_argument,       NULL, 'v' },
+	    { NULL, 0, NULL, 0 }
+	};
+
+	static const ul_excl_t excl[] = {	/* rows and cols in ASCII order */
+		{ 'c', 'd', 'p', 'z' },
+		{ 'c', 'n' },
+		{ 'x', 'c', 'd', 'i', 'n', 'p', 'z'},
+		{ 0 }
+	};
+	int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	while ((c = getopt_long(argc, argv, "hvVncpdizxl:o:", longopts, NULL))
+			!= -1) {
+
+		err_exclusive_options(c, longopts, excl, excl_st);
+
+		switch(c) {
+		case 'h':
+			usage();
+			break;
+		case 'c':
+			mode |= FALLOC_FL_COLLAPSE_RANGE;
+			break;
+		case 'd':
+			dig = 1;
+			break;
+		case 'i':
+			mode |= FALLOC_FL_INSERT_RANGE;
+			break;
+		case 'l':
+			length = cvtnum(optarg);
+			break;
+		case 'n':
+			mode |= FALLOC_FL_KEEP_SIZE;
+			break;
+		case 'o':
+			offset = cvtnum(optarg);
+			break;
+		case 'p':
+			mode |= FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
+			break;
+		case 'z':
+			mode |= FALLOC_FL_ZERO_RANGE;
+			break;
+		case 'x':
+#ifdef HAVE_POSIX_FALLOCATE
+			posix = 1;
+			break;
+#else
+			errx(EXIT_FAILURE, _("posix_fallocate support is not compiled"));
+#endif
+		case 'v':
+			verbose++;
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if (optind == argc)
+		errx(EXIT_FAILURE, _("no filename specified"));
+
+	filename = argv[optind++];
+
+	if (optind != argc)
+		errx(EXIT_FAILURE, _("unexpected number of arguments"));
+
+	if (dig) {
+		/* for --dig-holes the default is analyze all file */
+		if (length == -2LL)
+			length = 0;
+		if (length < 0)
+			errx(EXIT_FAILURE, _("invalid length value specified"));
+	} else {
+		/* it's safer to require the range specification (--length --offset) */
+		if (length == -2LL)
+			errx(EXIT_FAILURE, _("no length argument specified"));
+		if (length <= 0)
+			errx(EXIT_FAILURE, _("invalid length value specified"));
+	}
+	if (offset < 0)
+		errx(EXIT_FAILURE, _("invalid offset value specified"));
+
+	/* O_CREAT makes sense only for the default fallocate(2) behavior
+	 * when mode is no specified and new space is allocated */
+	fd = open(filename, O_RDWR | (!dig && !mode ? O_CREAT : 0),
+		  S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
+	if (fd < 0)
+		err(EXIT_FAILURE, _("cannot open %s"), filename);
+
+	if (dig)
+		dig_holes(fd, offset, length);
+#ifdef HAVE_POSIX_FALLOCATE
+	else if (posix)
+		xposix_fallocate(fd, offset, length);
+#endif
+	else
+		xfallocate(fd, mode, offset, length);
+
+	if (close_fd(fd) != 0)
+		err(EXIT_FAILURE, _("write failed: %s"), filename);
+
+	return EXIT_SUCCESS;
+}
diff --git a/sys-utils/flock.1 b/sys-utils/flock.1
new file mode 100644
index 0000000..5b1d635
--- /dev/null
+++ b/sys-utils/flock.1
@@ -0,0 +1,197 @@
+.\" -----------------------------------------------------------------------
+.\"
+.\"   Copyright 2003-2006 H. Peter Anvin - All Rights Reserved
+.\"
+.\"   Permission is hereby granted, free of charge, to any person
+.\"   obtaining a copy of this software and associated documentation
+.\"   files (the "Software"), to deal in the Software without
+.\"   restriction, including without limitation the rights to use,
+.\"   copy, modify, merge, publish, distribute, sublicense, and/or
+.\"   sell copies of the Software, and to permit persons to whom
+.\"   the Software is furnished to do so, subject to the following
+.\"   conditions:
+.\"
+.\"   The above copyright notice and this permission notice shall
+.\"   be included in all copies or substantial portions of the Software.
+.\"
+.\"   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+.\"   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+.\"   OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+.\"   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+.\"   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+.\"   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+.\"   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+.\"   OTHER DEALINGS IN THE SOFTWARE.
+.\"
+.\" -----------------------------------------------------------------------
+.TH FLOCK 1 "July 2014" "util-linux" "User Commands"
+.SH NAME
+flock \- manage locks from shell scripts
+.SH SYNOPSIS
+.B flock
+[options]
+.IR file | "directory command " [ arguments ]
+.br
+.B flock
+[options]
+.IR file | directory
+.BI \-c " command"
+.br
+.B flock
+.RI [options] " number"
+.SH DESCRIPTION
+.PP
+This utility manages
+.BR flock (2)
+locks from within shell scripts or from the command line.
+.PP
+The first and second of the above forms wrap the lock around the execution of a
+.IR command ,
+in a manner similar to
+.BR su (1)
+or
+.BR newgrp (1).
+They lock a specified \fIfile\fR or \fIdirectory\fR, which is created (assuming
+appropriate permissions) if it does not already exist.  By default, if the
+lock cannot be immediately acquired,
+.B flock
+waits until the lock is available.
+.PP
+The third form uses an open file by its file descriptor \fInumber\fR.
+See the examples below for how that can be used.
+.SH OPTIONS
+.TP
+.BR \-c , " \-\-command " \fIcommand
+Pass a single \fIcommand\fR, without arguments, to the shell with
+.BR \-c .
+.TP
+.BR \-E , " \-\-conflict\-exit\-code " \fInumber
+The exit code used when the \fB\-n\fP option is in use, and the
+conflicting lock exists, or the \fB\-w\fP option is in use,
+and the timeout is reached.  The default value is \fB1\fR.
+.TP
+.BR \-F , " \-\-no\-fork"
+Do not fork before executing
+.IR command .
+Upon execution the flock process is replaced by
+.I command
+which continues to hold the lock. This option is incompatible with
+\fB\-\-close\fR as there would otherwise be nothing left to hold the lock.
+.TP
+.BR \-e , " \-x" , " \-\-exclusive"
+Obtain an exclusive lock, sometimes called a write lock.  This is the
+default.
+.TP
+.BR \-n , " \-\-nb" , " \-\-nonblock"
+Fail rather than wait if the lock cannot be
+immediately acquired.
+See the
+.B \-E
+option for the exit code used.
+.TP
+.BR \-o , " \-\-close"
+Close the file descriptor on which the lock is held before executing
+.IR command .
+This is useful if
+.I command
+spawns a child process which should not be holding the lock.
+.TP
+.BR \-s , " \-\-shared"
+Obtain a shared lock, sometimes called a read lock.
+.TP
+.BR \-u , " \-\-unlock"
+Drop a lock.  This is usually not required, since a lock is automatically
+dropped when the file is closed.  However, it may be required in special
+cases, for example if the enclosed command group may have forked a background
+process which should not be holding the lock.
+.TP
+.BR \-w , " \-\-wait" , " \-\-timeout " \fIseconds
+Fail if the lock cannot be acquired within
+.IR seconds .
+Decimal fractional values are allowed.
+See the
+.B \-E
+option for the exit code used. The zero number of
+.I seconds
+is interpreted as \fB\-\-nonblock\fR.
+.TP
+.B \-\-verbose
+Report how long it took to acquire the lock, or why the lock could not be
+obtained.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH EXAMPLES
+.TP
+shell1> flock /tmp -c cat
+.TQ
+shell2> flock -w .007 /tmp -c echo; /bin/echo $?
+Set exclusive lock to directory /tmp and the second command will fail.
+.TP
+shell1> flock -s /tmp -c cat
+.TQ
+shell2> flock -s -w .007 /tmp -c echo; /bin/echo $?
+Set shared lock to directory /tmp and the second command will not fail.
+Notice that attempting to get exclusive lock with second command would fail.
+.TP
+shell> flock -x local-lock-file echo 'a b c'
+Grab the exclusive lock "local-lock-file" before running echo with 'a b c'.
+.TP
+(
+.TQ
+  flock -n 9 || exit 1
+.TQ
+  # ... commands executed under lock ...
+.TQ
+) 9>/var/lock/mylockfile
+The form is convenient inside shell scripts.  The mode used to open the file
+doesn't matter to
+.BR flock ;
+using
+.I >
+or
+.I >>
+allows the lockfile to be created if it does not already exist, however,
+write permission is required.  Using
+.I <
+requires that the file already exists but only read permission is required.
+.TP
+[ "${FLOCKER}" != "$0" ] && exec env FLOCKER="$0" flock -en "$0" "$0" "$@" || :
+This is useful boilerplate code for shell scripts.  Put it at the top of the
+shell script you want to lock and it'll automatically lock itself on the first
+run.  If the env var $FLOCKER is not set to the shell script that is being run,
+then execute flock and grab an exclusive non-blocking lock (using the script
+itself as the lock file) before re-execing itself with the right arguments.  It
+also sets the FLOCKER env var to the right value so it doesn't run again.
+.SH "EXIT STATUS"
+The command uses
+.B sysexits.h
+return values for everything, except when using either of the options
+.B \-n
+or
+.B \-w
+which report a failure to acquire the lock with a return value given by the
+.B \-E
+option, or 1 by default.
+.PP
+When using the \fIcommand\fR variant, and executing the child worked, then
+the exit status is that of the child command.
+.SH AUTHOR
+.UR hpa@zytor.com
+H. Peter Anvin
+.UE
+.SH COPYRIGHT
+Copyright \(co 2003\-2006 H. Peter Anvin.
+.br
+This is free software; see the source for copying conditions.  There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+.SH "SEE ALSO"
+.BR flock (2)
+.SH AVAILABILITY
+The flock command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/flock.c b/sys-utils/flock.c
new file mode 100644
index 0000000..ed25230
--- /dev/null
+++ b/sys-utils/flock.c
@@ -0,0 +1,380 @@
+/*   Copyright 2003-2005 H. Peter Anvin - All Rights Reserved
+ *
+ *   Permission is hereby granted, free of charge, to any person
+ *   obtaining a copy of this software and associated documentation
+ *   files (the "Software"), to deal in the Software without
+ *   restriction, including without limitation the rights to use,
+ *   copy, modify, merge, publish, distribute, sublicense, and/or
+ *   sell copies of the Software, and to permit persons to whom
+ *   the Software is furnished to do so, subject to the following
+ *   conditions:
+ *
+ *   The above copyright notice and this permission notice shall
+ *   be included in all copies or substantial portions of the Software.
+ *
+ *   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ *   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ *   OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ *   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ *   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ *   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ *   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ *   OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <paths.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "c.h"
+#include "nls.h"
+#include "strutils.h"
+#include "closestream.h"
+#include "monotonic.h"
+#include "timer.h"
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	fputs(USAGE_HEADER, stdout);
+	printf(
+		_(" %1$s [options] <file>|<directory> <command> [<argument>...]\n"
+		  " %1$s [options] <file>|<directory> -c <command>\n"
+		  " %1$s [options] <file descriptor number>\n"),
+		program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, stdout);
+	fputs(_("Manage file locks from shell scripts.\n"), stdout);
+
+	fputs(USAGE_OPTIONS, stdout);
+	fputs(_(  " -s, --shared             get a shared lock\n"), stdout);
+	fputs(_(  " -x, --exclusive          get an exclusive lock (default)\n"), stdout);
+	fputs(_(  " -u, --unlock             remove a lock\n"), stdout);
+	fputs(_(  " -n, --nonblock           fail rather than wait\n"), stdout);
+	fputs(_(  " -w, --timeout <secs>     wait for a limited amount of time\n"), stdout);
+	fputs(_(  " -E, --conflict-exit-code <number>  exit code after conflict or timeout\n"), stdout);
+	fputs(_(  " -o, --close              close file descriptor before running command\n"), stdout);
+	fputs(_(  " -c, --command <command>  run a single command string through the shell\n"), stdout);
+	fputs(_(  " -F, --no-fork            execute command without forking\n"), stdout);
+	fputs(_(  "     --verbose            increase verbosity\n"), stdout);
+	fputs(USAGE_SEPARATOR, stdout);
+	printf(USAGE_HELP_OPTIONS(26));
+	printf(USAGE_MAN_TAIL("flock(1)"));
+	exit(EXIT_SUCCESS);
+}
+
+static sig_atomic_t timeout_expired = 0;
+
+static void timeout_handler(int sig __attribute__((__unused__)),
+			    siginfo_t *info,
+			    void *context __attribute__((__unused__)))
+{
+	if (info->si_code == SI_TIMER)
+		timeout_expired = 1;
+}
+
+static int open_file(const char *filename, int *flags)
+{
+
+	int fd;
+	int fl = *flags == 0 ? O_RDONLY : *flags;
+
+	errno = 0;
+	fl |= O_NOCTTY | O_CREAT;
+	fd = open(filename, fl, 0666);
+
+	/* Linux doesn't like O_CREAT on a directory, even though it
+	 * should be a no-op; POSIX doesn't allow O_RDWR or O_WRONLY
+	 */
+	if (fd < 0 && errno == EISDIR) {
+		fl = O_RDONLY | O_NOCTTY;
+		fd = open(filename, fl);
+	}
+	if (fd < 0) {
+		warn(_("cannot open lock file %s"), filename);
+		if (errno == ENOMEM || errno == EMFILE || errno == ENFILE)
+			exit(EX_OSERR);
+		if (errno == EROFS || errno == ENOSPC)
+			exit(EX_CANTCREAT);
+		exit(EX_NOINPUT);
+	}
+	*flags = fl;
+	return fd;
+}
+
+static void __attribute__((__noreturn__)) run_program(char **cmd_argv)
+{
+	execvp(cmd_argv[0], cmd_argv);
+
+	warn(_("failed to execute %s"), cmd_argv[0]);
+	_exit((errno == ENOMEM) ? EX_OSERR : EX_UNAVAILABLE);
+}
+
+int main(int argc, char *argv[])
+{
+	static timer_t t_id;
+	struct itimerval timeout;
+	int have_timeout = 0;
+	int type = LOCK_EX;
+	int block = 0;
+	int open_flags = 0;
+	int fd = -1;
+	int opt, ix;
+	int do_close = 0;
+	int no_fork = 0;
+	int status;
+	int verbose = 0;
+	struct timeval time_start, time_done;
+	/*
+	 * The default exit code for lock conflict or timeout
+	 * is specified in man flock.1
+	 */
+	int conflict_exit_code = 1;
+	char **cmd_argv = NULL, *sh_c_argv[4];
+	const char *filename = NULL;
+	enum {
+		OPT_VERBOSE = CHAR_MAX + 1
+	};
+	static const struct option long_options[] = {
+		{"shared", no_argument, NULL, 's'},
+		{"exclusive", no_argument, NULL, 'x'},
+		{"unlock", no_argument, NULL, 'u'},
+		{"nonblocking", no_argument, NULL, 'n'},
+		{"nb", no_argument, NULL, 'n'},
+		{"timeout", required_argument, NULL, 'w'},
+		{"wait", required_argument, NULL, 'w'},
+		{"conflict-exit-code", required_argument, NULL, 'E'},
+		{"close", no_argument, NULL, 'o'},
+		{"no-fork", no_argument, NULL, 'F'},
+		{"verbose", no_argument, NULL, OPT_VERBOSE},
+		{"help", no_argument, NULL, 'h'},
+		{"version", no_argument, NULL, 'V'},
+		{NULL, 0, NULL, 0}
+	};
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	strutils_set_exitcode(EX_USAGE);
+
+	if (argc < 2) {
+		warnx(_("not enough arguments"));
+		errtryhelp(EX_USAGE);
+	}
+
+	memset(&timeout, 0, sizeof timeout);
+
+	optopt = 0;
+	while ((opt =
+		getopt_long(argc, argv, "+sexnoFuw:E:hV?", long_options,
+			    &ix)) != EOF) {
+		switch (opt) {
+		case 's':
+			type = LOCK_SH;
+			break;
+		case 'e':
+		case 'x':
+			type = LOCK_EX;
+			break;
+		case 'u':
+			type = LOCK_UN;
+			break;
+		case 'o':
+			do_close = 1;
+			break;
+		case 'F':
+			no_fork = 1;
+			break;
+		case 'n':
+			block = LOCK_NB;
+			break;
+		case 'w':
+			have_timeout = 1;
+			strtotimeval_or_err(optarg, &timeout.it_value,
+				_("invalid timeout value"));
+			break;
+		case 'E':
+			conflict_exit_code = strtos32_or_err(optarg,
+				_("invalid exit code"));
+			break;
+		case OPT_VERBOSE:
+			verbose = 1;
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			exit(EX_OK);
+		case 'h':
+			usage();
+		default:
+			errtryhelp(EX_USAGE);
+		}
+	}
+
+	if (no_fork && do_close)
+		errx(EX_USAGE,
+			_("the --no-fork and --close options are incompatible"));
+
+	if (argc > optind + 1) {
+		/* Run command */
+		if (!strcmp(argv[optind + 1], "-c") ||
+		    !strcmp(argv[optind + 1], "--command")) {
+			if (argc != optind + 3)
+				errx(EX_USAGE,
+				     _("%s requires exactly one command argument"),
+				     argv[optind + 1]);
+			cmd_argv = sh_c_argv;
+			cmd_argv[0] = getenv("SHELL");
+			if (!cmd_argv[0] || !*cmd_argv[0])
+				cmd_argv[0] = _PATH_BSHELL;
+			cmd_argv[1] = "-c";
+			cmd_argv[2] = argv[optind + 2];
+			cmd_argv[3] = NULL;
+		} else {
+			cmd_argv = &argv[optind + 1];
+		}
+
+		filename = argv[optind];
+		fd = open_file(filename, &open_flags);
+
+	} else if (optind < argc) {
+		/* Use provided file descriptor */
+		fd = strtos32_or_err(argv[optind], _("bad file descriptor"));
+	} else {
+		/* Bad options */
+		errx(EX_USAGE, _("requires file descriptor, file or directory"));
+	}
+
+	if (have_timeout) {
+		if (timeout.it_value.tv_sec == 0 &&
+		    timeout.it_value.tv_usec == 0) {
+			/* -w 0 is equivalent to -n; this has to be
+			 * special-cased because setting an itimer to zero
+			 * means disabled!
+			 */
+			have_timeout = 0;
+			block = LOCK_NB;
+		} else
+			if (setup_timer(&t_id, &timeout, &timeout_handler))
+				err(EX_OSERR, _("cannot set up timer"));
+	}
+
+	if (verbose)
+		gettime_monotonic(&time_start);
+	while (flock(fd, type | block)) {
+		switch (errno) {
+		case EWOULDBLOCK:
+			/* -n option set and failed to lock. */
+			if (verbose)
+				warnx(_("failed to get lock"));
+			exit(conflict_exit_code);
+		case EINTR:
+			/* Signal received */
+			if (timeout_expired) {
+				/* -w option set and failed to lock. */
+				if (verbose)
+					warnx(_("timeout while waiting to get lock"));
+				exit(conflict_exit_code);
+			}
+			/* otherwise try again */
+			continue;
+		case EIO:
+		case EBADF:		/* since Linux 3.4 (commit 55725513) */
+			/* Probably NFSv4 where flock() is emulated by fcntl().
+			 * Let's try to reopen in read-write mode.
+			 */
+			if (!(open_flags & O_RDWR) &&
+			    type != LOCK_SH &&
+			    filename &&
+			    access(filename, R_OK | W_OK) == 0) {
+
+				close(fd);
+				open_flags = O_RDWR;
+				fd = open_file(filename, &open_flags);
+
+				if (open_flags & O_RDWR)
+					break;
+			}
+			/* fallthrough */
+		default:
+			/* Other errors */
+			if (filename)
+				warn("%s", filename);
+			else
+				warn("%d", fd);
+			exit((errno == ENOLCK
+			      || errno == ENOMEM) ? EX_OSERR : EX_DATAERR);
+		}
+	}
+
+	if (have_timeout)
+		cancel_timer(&t_id);
+	if (verbose) {
+		struct timeval delta;
+
+		gettime_monotonic(&time_done);
+		timersub(&time_done, &time_start, &delta);
+		printf(_("%s: getting lock took %ld.%06ld seconds\n"),
+		       program_invocation_short_name, delta.tv_sec,
+		       delta.tv_usec);
+	}
+	status = EX_OK;
+
+	if (cmd_argv) {
+		pid_t w, f;
+		/* Clear any inherited settings */
+		signal(SIGCHLD, SIG_DFL);
+		if (verbose)
+			printf(_("%s: executing %s\n"), program_invocation_short_name, cmd_argv[0]);
+
+		if (!no_fork) {
+			f = fork();
+			if (f < 0)
+				err(EX_OSERR, _("fork failed"));
+
+			/* child */
+			else if (f == 0) {
+				if (do_close)
+					close(fd);
+				run_program(cmd_argv);
+
+			/* parent */
+			} else {
+				do {
+					w = waitpid(f, &status, 0);
+					if (w == -1 && errno != EINTR)
+						break;
+				} while (w != f);
+
+				if (w == -1) {
+					status = EXIT_FAILURE;
+					warn(_("waitpid failed"));
+				} else if (WIFEXITED(status))
+					status = WEXITSTATUS(status);
+				else if (WIFSIGNALED(status))
+					status = WTERMSIG(status) + 128;
+				else
+					/* WTF? */
+					status = EX_OSERR;
+			}
+
+		} else
+			/* no-fork execution */
+			run_program(cmd_argv);
+	}
+
+	return status;
+}
diff --git a/sys-utils/fsfreeze.8 b/sys-utils/fsfreeze.8
new file mode 100644
index 0000000..3cd6738
--- /dev/null
+++ b/sys-utils/fsfreeze.8
@@ -0,0 +1,89 @@
+.TH FSFREEZE 8 "July 2014" "util-linux" "System Administration"
+.SH NAME
+fsfreeze \- suspend access to a filesystem (Ext3/4, ReiserFS, JFS, XFS)
+.SH SYNOPSIS
+.B fsfreeze
+.BR \--freeze | \--unfreeze
+.I mountpoint
+
+.SH DESCRIPTION
+.B fsfreeze
+suspends or resumes access to a filesystem.
+.PP
+.B fsfreeze
+halts any new access to the filesystem and creates a stable image on disk.
+.B fsfreeze
+is intended to be used with hardware RAID devices that support the creation
+of snapshots.
+.PP
+.B fsfreeze
+is unnecessary for
+.B device-mapper
+devices.  The device-mapper (and LVM) automatically freezes a filesystem
+on the device when a snapshot creation is requested.
+For more details see the
+.BR dmsetup (8)
+man page.
+.PP
+The
+.I mountpoint
+argument is the pathname of the directory where the filesystem
+is mounted.
+The filesystem must be mounted to be frozen (see
+.BR mount (8)).
+.PP
+Note that access-time updates are also suspended if the filesystem is mounted with
+the traditional atime behavior (mount option \fBstrictatime\fR, for more details see
+.BR mount (8)).
+
+.SH OPTIONS
+.TP
+.BR \-f , " \-\-freeze"
+This option requests the specified a filesystem to be frozen from new
+modifications.  When this is selected, all ongoing transactions in the
+filesystem are allowed to complete, new write system calls are halted, other
+calls which modify the filesystem are halted, and all dirty data, metadata, and
+log information are written to disk.  Any process attempting to write to the
+frozen filesystem will block waiting for the filesystem to be unfrozen.
+.sp
+Note that even after freezing, the on-disk filesystem can contain
+information on files that are still in the process of unlinking.
+These files will not be unlinked until the filesystem is unfrozen
+or a clean mount of the snapshot is complete.
+.TP
+.BR \-u , " \-\-unfreeze"
+This option is used to un-freeze the filesystem and allow operations to
+continue.  Any filesystem modifications that were blocked by the freeze are
+unblocked and allowed to complete.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH FILESYSTEM SUPPORT
+This command will work only if filesystem supports has support for freezing.
+List of these filesystems include (2016-12-18)
+.BR btrfs ,
+.BR ext2/3/4 ,
+.BR f2fs ,
+.BR jfs ,
+.BR nilfs2 ,
+.BR reiserfs ,
+and
+.BR xfs .
+Previous list may be incomplete, as more filesystems get support.  If in
+doubt easiest way to know if a filesystem has support is create a small
+loopback mount and test freezing it.
+.SH AUTHOR
+.PP
+Written by Hajime Taira.
+.SH NOTES
+.PP
+This man page is based on
+.BR xfs_freeze (8).
+.SH SEE ALSO
+.BR mount (8)
+.SH AVAILABILITY
+The fsfreeze command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/fsfreeze.c b/sys-utils/fsfreeze.c
new file mode 100644
index 0000000..401ab5c
--- /dev/null
+++ b/sys-utils/fsfreeze.c
@@ -0,0 +1,152 @@
+/*
+ * fsfreeze.c -- Filesystem freeze/unfreeze IO for Linux
+ *
+ * Copyright (C) 2010 Hajime Taira <htaira@redhat.com>
+ *                    Masatake Yamato <yamato@redhat.com>
+ *
+ * This program is free software.  You can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation: either version 1 or
+ * (at your option) any later version.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <getopt.h>
+
+#include "c.h"
+#include "blkdev.h"
+#include "nls.h"
+#include "closestream.h"
+#include "optutils.h"
+
+enum fs_operation {
+	NOOP,
+	FREEZE,
+	UNFREEZE
+};
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	fputs(USAGE_HEADER, out);
+	fprintf(out,
+	      _(" %s [options] <mountpoint>\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Suspend access to a filesystem.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -f, --freeze      freeze the filesystem\n"), out);
+	fputs(_(" -u, --unfreeze    unfreeze the filesystem\n"), out);
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(19));
+	printf(USAGE_MAN_TAIL("fsfreeze(8)"));
+
+	exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+	int fd = -1, c;
+	int action = NOOP, rc = EXIT_FAILURE;
+	char *path;
+	struct stat sb;
+
+	static const struct option longopts[] = {
+	    { "help",      no_argument, NULL, 'h' },
+	    { "freeze",    no_argument, NULL, 'f' },
+	    { "unfreeze",  no_argument, NULL, 'u' },
+	    { "version",   no_argument, NULL, 'V' },
+	    { NULL, 0, NULL, 0 }
+	};
+
+	static const ul_excl_t excl[] = {       /* rows and cols in ASCII order */
+		{ 'f','u' },			/* freeze, unfreeze */
+		{ 0 }
+	};
+	int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	while ((c = getopt_long(argc, argv, "hfuV", longopts, NULL)) != -1) {
+
+		err_exclusive_options(c, longopts, excl, excl_st);
+
+		switch(c) {
+		case 'h':
+			usage();
+			break;
+		case 'f':
+			action = FREEZE;
+			break;
+		case 'u':
+			action = UNFREEZE;
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			exit(EXIT_SUCCESS);
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if (action == NOOP)
+		errx(EXIT_FAILURE, _("neither --freeze or --unfreeze specified"));
+	if (optind == argc)
+		errx(EXIT_FAILURE, _("no filename specified"));
+	path = argv[optind++];
+
+	if (optind != argc) {
+		warnx(_("unexpected number of arguments"));
+		errtryhelp(EXIT_FAILURE);
+	}
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		err(EXIT_FAILURE, _("cannot open %s"), path);
+
+	if (fstat(fd, &sb) == -1) {
+		warn(_("stat of %s failed"), path);
+		goto done;
+	}
+
+	if (!S_ISDIR(sb.st_mode)) {
+		warnx(_("%s: is not a directory"), path);
+		goto done;
+	}
+
+	switch (action) {
+	case FREEZE:
+		if (ioctl(fd, FIFREEZE, 0)) {
+			warn(_("%s: freeze failed"), path);
+			goto done;
+		}
+		break;
+	case UNFREEZE:
+		if (ioctl(fd, FITHAW, 0)) {
+			warn(_("%s: unfreeze failed"), path);
+			goto done;
+		}
+		break;
+	default:
+		abort();
+	}
+
+	rc = EXIT_SUCCESS;
+done:
+	if (fd >= 0)
+		close(fd);
+	return rc;
+}
+
diff --git a/sys-utils/fstab.5 b/sys-utils/fstab.5
new file mode 100644
index 0000000..a9e9f8c
--- /dev/null
+++ b/sys-utils/fstab.5
@@ -0,0 +1,248 @@
+.\" Copyright (c) 1980, 1989, 1991 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\"    must display the following acknowledgement:
+.\"	This product includes software developed by the University of
+.\"	California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\"    may be used to endorse or promote products derived from this software
+.\"    without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\"     @(#)fstab.5	6.5 (Berkeley) 5/10/91
+.\"
+.TH FSTAB 5 "February 2015" "util-linux" "File Formats"
+.SH NAME
+fstab \- static information about the filesystems
+.SH SYNOPSIS
+.I /etc/fstab
+.SH DESCRIPTION
+The file
+.B fstab
+contains descriptive information about the filesystems the system can mount.
+.B fstab
+is only read by programs, and not written; it is the duty of the system
+administrator to properly create and maintain this file.  The order of records in
+.B fstab
+is important because
+.BR fsck (8),
+.BR mount (8),
+and
+.BR umount (8)
+sequentially iterate through
+.B fstab
+doing their thing.
+
+Each filesystem is described on a separate line.
+Fields on each line are separated by tabs or spaces.
+Lines starting with '#' are comments.  Blank lines are ignored.
+.PP
+The following is a typical example of an
+.B fstab
+entry:
+.sp
+.RS 7
+LABEL=t-home2   /home      ext4    defaults,auto_da_alloc      0  2
+.RE
+
+.B The first field
+.RI ( fs_spec ).
+.RS
+This field describes the block special device or
+remote filesystem to be mounted.
+.LP
+For ordinary mounts, it will hold (a link to) a block special
+device node (as created by
+.BR mknod (8))
+for the device to be mounted, like `/dev/cdrom' or `/dev/sdb7'.
+For NFS mounts, this field is <host>:<dir>, e.g., `knuth.aeb.nl:/'.
+For filesystems with no storage, any string can be used, and will show up in
+.BR df (1)
+output, for example.  Typical usage is `proc' for procfs; `mem', `none',
+or `tmpfs' for tmpfs.  Other special filesystems, like udev and sysfs,
+are typically not listed in
+.BR fstab .
+.LP
+LABEL=<label> or UUID=<uuid> may be given instead of a device name.
+This is the recommended method, as device names are often a coincidence
+of hardware detection order, and can change when other disks are added or removed.
+For example, `LABEL=Boot' or `UUID=3e6be9de\%-8139\%-11d1\%-9106\%-a43f08d823a6'.
+(Use a filesystem-specific tool like
+.BR e2label (8),
+.BR xfs_admin (8),
+or
+.BR fatlabel (8)
+to set LABELs on filesystems).
+
+It's also possible to use PARTUUID= and PARTLABEL=. These partitions identifiers
+are supported for example for GUID Partition Table (GPT).
+
+See
+.BR mount (8),
+.BR blkid (8)
+or
+.BR lsblk (8)
+for more details about device identifiers.
+
+.LP
+Note that
+.BR mount (8)
+uses UUIDs as strings. The string representation of the UUID should be based on
+lower case characters.
+.RE
+
+.B The second field
+.RI ( fs_file ).
+.RS
+This field describes the mount point (target) for the filesystem.  For swap partitions, this
+field should be specified as `none'. If the name of the mount point
+contains spaces or tabs these can be escaped as `\\040' and '\\011'
+respectively.
+.RE
+
+.B The third field
+.RI ( fs_vfstype ).
+.RS
+This field describes the type of the filesystem.  Linux supports many
+filesystem types: ext4, xfs, btrfs, f2fs, vfat, ntfs, hfsplus,
+tmpfs, sysfs, proc, iso9660, udf, squashfs, nfs, cifs, and many more.
+For more details, see
+.BR mount (8).
+
+An entry
+.I swap
+denotes a file or partition to be used
+for swapping, cf.\&
+.BR swapon (8).
+An entry
+.I none
+is useful for bind or move mounts.
+
+More than one type may be specified in a comma-separated list.
+
+.BR mount (8)
+and
+.BR umount (8)
+support filesystem
+.IR subtypes .
+The subtype is defined by '.subtype' suffix.  For
+example 'fuse.sshfs'. It's recommended to use subtype notation rather than add
+any prefix to the first fstab field (for example 'sshfs#example.com' is
+deprecated).
+.RE
+
+.B The fourth field
+.RI ( fs_mntops ).
+.RS
+This field describes the mount options associated with the filesystem.
+
+It is formatted as a comma-separated list of options.
+It contains at least the type of mount
+.RB ( ro
+or
+.BR rw ),
+plus any additional options appropriate to the filesystem
+type (including performance-tuning options).
+For details, see
+.BR mount (8)
+or
+.BR swapon (8).
+
+Basic filesystem-independent options are:
+.TP
+.B defaults
+use default options: rw, suid, dev, exec, auto, nouser, and async.
+.TP
+.B noauto
+do not mount when "mount -a" is given (e.g., at boot time)
+.TP
+.B user
+allow a user to mount
+.TP
+.B owner
+allow device owner to mount
+.TP
+.B comment
+or
+.B x-<name>
+for use by fstab-maintaining programs
+.TP
+.B nofail
+do not report errors for this device if it does not exist.
+.RE
+
+.B The fifth field
+.RI ( fs_freq ).
+.RS
+This field is used by
+.BR dump (8)
+to determine which filesystems need to be dumped.
+Defaults to zero (don't dump) if not present.
+.RE
+
+.B The sixth field
+.RI ( fs_passno ).
+.RS
+This field is used by
+.BR fsck (8)
+to determine the order in which filesystem checks are done at
+boot time.  The root filesystem should be specified with a
+.I fs_passno
+of 1.  Other filesystems should have a
+.I fs_passno
+of 2.  Filesystems within a drive will be checked sequentially, but
+filesystems on different drives will be checked at the same time to utilize
+parallelism available in the hardware.
+Defaults to zero (don't fsck) if not present.
+
+.SH NOTES
+The proper way to read records from
+.B fstab
+is to use the routines
+.BR getmntent (3)
+or
+.BR libmount .
+
+The keyword
+.B ignore
+as a filesystem type (3rd field) is no longer supported by the pure
+libmount based mount utility (since util-linux v2.22).
+
+.SH FILES
+.IR /etc/fstab ,
+.I <fstab.h>
+.SH "SEE ALSO"
+.BR getmntent (3),
+.BR fs (5),
+.BR findmnt (8),
+.BR mount (8),
+.BR swapon (8)
+.SH HISTORY
+The ancestor of this
+.B fstab
+file format appeared in 4.0BSD.
+.\" But without comment convention, and options and vfs_type.
+.\" Instead there was a type rw/ro/rq/sw/xx, where xx is the present 'ignore'.
+.SH AVAILABILITY
+This man page is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/fstrim.8 b/sys-utils/fstrim.8
new file mode 100644
index 0000000..ff572a4
--- /dev/null
+++ b/sys-utils/fstrim.8
@@ -0,0 +1,131 @@
+.TH FSTRIM 8 "July 2014" "util-linux" "System Administration"
+.SH NAME
+fstrim \- discard unused blocks on a mounted filesystem
+.SH SYNOPSIS
+.B fstrim
+.RB [ \-Aa ]
+.RB [ \-o
+.IR offset ]
+.RB [ \-l
+.IR length ]
+.RB [ \-m
+.IR minimum-size ]
+.RB [ \-v ]
+.I mountpoint
+
+.SH DESCRIPTION
+.B fstrim
+is used on a mounted filesystem to discard (or "trim") blocks which are not in
+use by the filesystem.  This is useful for solid-state drives (SSDs) and
+thinly-provisioned storage.
+.PP
+By default,
+.B fstrim
+will discard all unused blocks in the filesystem.  Options may be used to
+modify this behavior based on range or size, as explained below.
+.PP
+The
+.I mountpoint
+argument is the pathname of the directory where the filesystem
+is mounted.
+.PP
+Running
+.B fstrim
+frequently, or even using
+.BR "mount -o discard" ,
+might negatively affect the lifetime of poor-quality SSD devices.  For most
+desktop and server systems a sufficient trimming frequency is once a week.
+Note that not all
+devices support a queued trim, so each trim command incurs a performance penalty
+on whatever else might be trying to use the disk at the time.
+
+.SH OPTIONS
+The \fIoffset\fR, \fIlength\fR, and \fIminimum-size\fR arguments may be
+followed by the multiplicative suffixes KiB (=1024),
+MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB"
+is optional, e.g., "K" has the same meaning as "KiB") or the suffixes
+KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB and YB.
+
+.IP "\fB\-A, \-\-fstab\fP"
+Trim all mounted filesystems mentioned in \fI/etc/fstab\fR on devices that support the
+discard operation.
+The other supplied options, like \fB\-\-offset\fR, \fB\-\-length\fR and
+\fB-\-minimum\fR, are applied to all these devices.
+Errors from filesystems that do not support the discard operation are silently
+ignored.
+
+.IP "\fB\-a, \-\-all\fP"
+Trim all mounted filesystems on devices that support the discard operation.
+The other supplied options, like \fB\-\-offset\fR, \fB\-\-length\fR and
+\fB-\-minimum\fR, are applied to all these devices.
+Errors from filesystems that do not support the discard operation are silently
+ignored.
+.IP "\fB\-n, \-\-dry\-run\fP"
+This option does everything apart from actually call FITRIM ioctl.
+.IP "\fB\-o, \-\-offset\fP \fIoffset\fP"
+Byte offset in the filesystem from which to begin searching for free blocks
+to discard.  The default value is zero, starting at the beginning of the
+filesystem.
+.IP "\fB\-l, \-\-length\fP \fIlength\fP"
+The number of bytes (after the starting point) to search for free blocks
+to discard.  If the specified value extends past the end of the filesystem,
+.B fstrim
+will stop at the filesystem size boundary.  The default value extends to
+the end of the filesystem.
+.IP "\fB\-m, \-\-minimum\fP \fIminimum-size\fP"
+Minimum contiguous free range to discard, in bytes. (This value is internally
+rounded up to a multiple of the filesystem block size.)  Free ranges smaller
+than this will be ignored.  By increasing this value, the fstrim operation
+will complete more quickly for filesystems with badly fragmented freespace,
+although not all blocks will be discarded.  The default value is zero,
+discarding every free block.
+.IP "\fB\-v, \-\-verbose\fP"
+Verbose execution.  With this option
+.B fstrim
+will output the number of bytes passed from the filesystem
+down the block stack to the device for potential discard.  This number is a
+maximum discard amount from the storage device's perspective, because
+.I FITRIM
+ioctl called repeated will keep sending the same sectors for discard repeatedly.
+.sp
+.B fstrim
+will report the same potential discard bytes each time, but only sectors which
+had been written to between the discards would actually be discarded by the
+storage device.  Further, the kernel block layer reserves the right to adjust
+the discard ranges to fit raid stripe geometry, non-trim capable devices in a
+LVM setup, etc.  These reductions would not be reflected in fstrim_range.len
+(the
+.B --length
+option).
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+
+.SH RETURN CODES
+.IP 0
+success
+.IP 1
+failure
+.IP 32
+all failed
+.IP 64
+some filesystem discards have succeeded, some failed
+.PP
+The command
+.B fstrim --all
+returns 0 (all succeeded), 32 (all failed) or 64 (some failed, some succeeded).
+
+.SH AUTHOR
+.nf
+Lukas Czerner <lczerner@redhat.com>
+Karel Zak <kzak@redhat.com>
+.fi
+.SH SEE ALSO
+.BR blkdiscard (8),
+.BR mount (8)
+.SH AVAILABILITY
+The fstrim command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/fstrim.c b/sys-utils/fstrim.c
new file mode 100644
index 0000000..2a67892
--- /dev/null
+++ b/sys-utils/fstrim.c
@@ -0,0 +1,417 @@
+/*
+ * fstrim.c -- discard the part (or whole) of mounted filesystem.
+ *
+ * Copyright (C) 2010 Red Hat, Inc. All rights reserved.
+ * Written by Lukas Czerner <lczerner@redhat.com>
+ *            Karel Zak <kzak@redhat.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This program uses FITRIM ioctl to discard parts or the whole filesystem
+ * online (mounted). You can specify range (start and length) to be
+ * discarded, or simply discard whole filesystem.
+ */
+
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <getopt.h>
+
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <linux/fs.h>
+
+#include "nls.h"
+#include "strutils.h"
+#include "c.h"
+#include "closestream.h"
+#include "pathnames.h"
+#include "sysfs.h"
+
+#include <libmount.h>
+
+
+#ifndef FITRIM
+struct fstrim_range {
+	uint64_t start;
+	uint64_t len;
+	uint64_t minlen;
+};
+#define FITRIM		_IOWR('X', 121, struct fstrim_range)
+#endif
+
+struct fstrim_control {
+	struct fstrim_range range;
+
+	unsigned int verbose : 1,
+		     fstab   : 1,
+		     dryrun : 1;
+};
+
+/* returns: 0 = success, 1 = unsupported, < 0 = error */
+static int fstrim_filesystem(struct fstrim_control *ctl, const char *path, const char *devname)
+{
+	int fd, rc;
+	struct stat sb;
+	struct fstrim_range range;
+
+	/* kernel modifies the range */
+	memcpy(&range, &ctl->range, sizeof(range));
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0) {
+		warn(_("cannot open %s"), path);
+		rc = -errno;
+		goto done;
+	}
+	if (fstat(fd, &sb) == -1) {
+		warn(_("stat of %s failed"), path);
+		rc = -errno;
+		goto done;
+	}
+	if (!S_ISDIR(sb.st_mode)) {
+		warnx(_("%s: not a directory"), path);
+		rc = -EINVAL;
+		goto done;
+	}
+
+	if (ctl->dryrun) {
+		if (devname)
+			printf(_("%s: 0 B (dry run) trimmed on %s\n"), path, devname);
+		else
+			printf(_("%s: 0 B (dry run) trimmed\n"), path);
+		rc = 0;
+		goto done;
+	}
+
+	errno = 0;
+	if (ioctl(fd, FITRIM, &range)) {
+		rc = errno == EOPNOTSUPP || errno == ENOTTY ? 1 : -errno;
+
+		if (rc != 1)
+			warn(_("%s: FITRIM ioctl failed"), path);
+		goto done;
+	}
+
+	if (ctl->verbose) {
+		char *str = size_to_human_string(
+				SIZE_SUFFIX_3LETTER | SIZE_SUFFIX_SPACE,
+				(uint64_t) range.len);
+		if (devname)
+			/* TRANSLATORS: The standard value here is a very large number. */
+			printf(_("%s: %s (%" PRIu64 " bytes) trimmed on %s\n"),
+				path, str, (uint64_t) range.len, devname);
+		else
+			/* TRANSLATORS: The standard value here is a very large number. */
+			printf(_("%s: %s (%" PRIu64 " bytes) trimmed\n"),
+				path, str, (uint64_t) range.len);
+
+		free(str);
+	}
+
+	rc = 0;
+done:
+	if (fd >= 0)
+		close(fd);
+	return rc;
+}
+
+static int has_discard(const char *devname, struct path_cxt **wholedisk)
+{
+	struct path_cxt *pc = NULL;
+	uint64_t dg = 0;
+	dev_t disk = 0, dev;
+	int rc = -1;
+
+	dev = sysfs_devname_to_devno(devname);
+	if (!dev)
+		goto fail;
+
+	pc = ul_new_sysfs_path(dev, NULL, NULL);
+	if (!pc)
+		goto fail;
+
+	/*
+	 * This is tricky to read the info from sys/, because the queue
+	 * attributes are provided for whole devices (disk) only. We're trying
+	 * to reuse the whole-disk sysfs context to optimize this stuff (as
+	 * system usually have just one disk only).
+	 */
+	rc = sysfs_blkdev_get_wholedisk(pc, NULL, 0, &disk);
+	if (rc != 0 || !disk)
+		goto fail;
+
+	if (dev != disk) {
+		/* Partition, try reuse whole-disk context if valid for the
+		 * current device, otherwise create new context for the
+		 * whole-disk.
+		 */
+		if (*wholedisk && sysfs_blkdev_get_devno(*wholedisk) != disk) {
+			ul_unref_path(*wholedisk);
+			*wholedisk = NULL;
+		}
+		if (!*wholedisk) {
+			*wholedisk = ul_new_sysfs_path(disk, NULL, NULL);
+			if (!*wholedisk)
+				goto fail;
+		}
+		sysfs_blkdev_set_parent(pc, *wholedisk);
+	}
+
+	rc = ul_path_read_u64(pc, &dg, "queue/discard_granularity");
+
+	ul_unref_path(pc);
+	return rc == 0 && dg > 0;
+fail:
+	ul_unref_path(pc);
+	return 1;
+}
+
+
+static int uniq_fs_target_cmp(
+		struct libmnt_table *tb __attribute__((__unused__)),
+		struct libmnt_fs *a,
+		struct libmnt_fs *b)
+{
+	return !mnt_fs_streq_target(a, mnt_fs_get_target(b));
+}
+
+static int uniq_fs_source_cmp(
+		struct libmnt_table *tb __attribute__((__unused__)),
+		struct libmnt_fs *a,
+		struct libmnt_fs *b)
+{
+	if (mnt_fs_is_pseudofs(a) || mnt_fs_is_netfs(a) ||
+	    mnt_fs_is_pseudofs(b) || mnt_fs_is_netfs(b))
+		return 1;
+
+	return !mnt_fs_streq_srcpath(a, mnt_fs_get_srcpath(b));
+}
+
+/*
+ * fstrim --all follows "mount -a" return codes:
+ *
+ * 0  = all success
+ * 32 = all failed
+ * 64 = some failed, some success
+ */
+static int fstrim_all(struct fstrim_control *ctl)
+{
+	struct libmnt_fs *fs;
+	struct libmnt_iter *itr;
+	struct libmnt_table *tab;
+	struct libmnt_cache *cache = NULL;
+	struct path_cxt *wholedisk = NULL;
+	int cnt = 0, cnt_err = 0;
+	const char *filename = _PATH_PROC_MOUNTINFO;
+
+	mnt_init_debug(0);
+	ul_path_init_debug();
+
+	itr = mnt_new_iter(MNT_ITER_BACKWARD);
+	if (!itr)
+		err(MNT_EX_FAIL, _("failed to initialize libmount iterator"));
+
+	if (ctl->fstab)
+		filename = mnt_get_fstab_path();
+
+	tab = mnt_new_table_from_file(filename);
+	if (!tab)
+		err(MNT_EX_FAIL, _("failed to parse %s"), filename);
+
+	/* de-duplicate by mountpoints */
+	mnt_table_uniq_fs(tab, 0, uniq_fs_target_cmp);
+
+	/* de-duplicate by source */
+	mnt_table_uniq_fs(tab, MNT_UNIQ_FORWARD, uniq_fs_source_cmp);
+
+	if (ctl->fstab) {
+		cache = mnt_new_cache();
+		if (!cache)
+			err(MNT_EX_FAIL, _("failed to initialize libmount cache"));
+	}
+
+	while (mnt_table_next_fs(tab, itr, &fs) == 0) {
+		const char *src = mnt_fs_get_srcpath(fs),
+			   *tgt = mnt_fs_get_target(fs);
+		char *path;
+		int rc = 1;
+
+		if (!tgt || mnt_fs_is_pseudofs(fs) || mnt_fs_is_netfs(fs))
+			continue;
+
+		if (!src && cache) {
+			/* convert LABEL= (etc.) from fstab to paths */
+			const char *spec = mnt_fs_get_source(fs);
+
+			if (!spec)
+				continue;
+			src = mnt_resolve_spec(spec, cache);
+		}
+
+		if (!src || *src != '/')
+			continue;
+
+		/* Is it really accessible mountpoint? Not all mountpoints are
+		 * accessible (maybe over mounted by another filesystem) */
+		path = mnt_get_mountpoint(tgt);
+		if (path && strcmp(path, tgt) == 0)
+			rc = 0;
+		free(path);
+		if (rc)
+			continue;	/* overlaying mount */
+
+		if (!has_discard(src, &wholedisk))
+			continue;
+		cnt++;
+
+		/*
+		 * We're able to detect that the device supports discard, but
+		 * things also depend on filesystem or device mapping, for
+		 * example vfat or LUKS (by default) does not support FSTRIM.
+		 *
+		 * This is reason why we ignore EOPNOTSUPP and ENOTTY errors
+		 * from discard ioctl.
+		 */
+		if (fstrim_filesystem(ctl, tgt, src) < 0)
+		       cnt_err++;
+	}
+
+	ul_unref_path(wholedisk);
+	mnt_unref_table(tab);
+	mnt_free_iter(itr);
+	mnt_unref_cache(cache);
+
+	if (cnt && cnt == cnt_err)
+		return MNT_EX_FAIL;		/* all failed */
+	if (cnt && cnt_err)
+		return MNT_EX_SOMEOK;		/* some ok */
+
+	return MNT_EX_SUCCESS;
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	fputs(USAGE_HEADER, out);
+	fprintf(out,
+	      _(" %s [options] <mount point>\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Discard unused blocks on a mounted filesystem.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -a, --all           trim all supported mounted filesystems\n"), out);
+	fputs(_(" -A, --fstab         trim all supported mounted filesystems from /etc/fstab\n"), out);
+	fputs(_(" -o, --offset <num>  the offset in bytes to start discarding from\n"), out);
+	fputs(_(" -l, --length <num>  the number of bytes to discard\n"), out);
+	fputs(_(" -m, --minimum <num> the minimum extent length to discard\n"), out);
+	fputs(_(" -v, --verbose       print number of discarded bytes\n"), out);
+	fputs(_(" -n, --dry-run       does everything, but trim\n"), out);
+
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(21));
+	printf(USAGE_MAN_TAIL("fstrim(8)"));
+	exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+	char *path = NULL;
+	int c, rc, all = 0;
+	struct fstrim_control ctl = {
+			.range = { .len = ULLONG_MAX }
+	};
+
+	static const struct option longopts[] = {
+	    { "all",       no_argument,       NULL, 'a' },
+	    { "fstab",     no_argument,       NULL, 'A' },
+	    { "help",      no_argument,       NULL, 'h' },
+	    { "version",   no_argument,       NULL, 'V' },
+	    { "offset",    required_argument, NULL, 'o' },
+	    { "length",    required_argument, NULL, 'l' },
+	    { "minimum",   required_argument, NULL, 'm' },
+	    { "verbose",   no_argument,       NULL, 'v' },
+	    { "dry-run",   no_argument,       NULL, 'n' },
+	    { NULL, 0, NULL, 0 }
+	};
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	while ((c = getopt_long(argc, argv, "Aahl:m:no:Vv", longopts, NULL)) != -1) {
+		switch(c) {
+		case 'A':
+			ctl.fstab = 1;
+			/* fallthrough */
+		case 'a':
+			all = 1;
+			break;
+		case 'n':
+			ctl.dryrun = 1;
+			break;
+		case 'h':
+			usage();
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		case 'l':
+			ctl.range.len = strtosize_or_err(optarg,
+					_("failed to parse length"));
+			break;
+		case 'o':
+			ctl.range.start = strtosize_or_err(optarg,
+					_("failed to parse offset"));
+			break;
+		case 'm':
+			ctl.range.minlen = strtosize_or_err(optarg,
+					_("failed to parse minimum extent length"));
+			break;
+		case 'v':
+			ctl.verbose = 1;
+			break;
+		default:
+			errtryhelp(EXIT_FAILURE);
+			break;
+		}
+	}
+
+	if (!all) {
+		if (optind == argc)
+			errx(EXIT_FAILURE, _("no mountpoint specified"));
+		path = argv[optind++];
+	}
+
+	if (optind != argc) {
+		warnx(_("unexpected number of arguments"));
+		errtryhelp(EXIT_FAILURE);
+	}
+
+	if (all)
+		return fstrim_all(&ctl);	/* MNT_EX_* codes */
+
+	rc = fstrim_filesystem(&ctl, path, NULL);
+	if (rc == 1)
+		warnx(_("%s: the discard operation is not supported"), path);
+
+	return rc == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/sys-utils/fstrim.service.in b/sys-utils/fstrim.service.in
new file mode 100644
index 0000000..2d5daf9
--- /dev/null
+++ b/sys-utils/fstrim.service.in
@@ -0,0 +1,7 @@
+[Unit]
+Description=Discard unused blocks on filesystems from /etc/fstab
+Documentation=man:fstrim(8)
+
+[Service]
+Type=oneshot
+ExecStart=@sbindir@/fstrim -Av
diff --git a/sys-utils/fstrim.timer b/sys-utils/fstrim.timer
new file mode 100644
index 0000000..3a3762d
--- /dev/null
+++ b/sys-utils/fstrim.timer
@@ -0,0 +1,11 @@
+[Unit]
+Description=Discard unused blocks once a week
+Documentation=man:fstrim
+
+[Timer]
+OnCalendar=weekly
+AccuracySec=1h
+Persistent=true
+
+[Install]
+WantedBy=timers.target
diff --git a/sys-utils/hwclock-cmos.c b/sys-utils/hwclock-cmos.c
new file mode 100644
index 0000000..a11f676
--- /dev/null
+++ b/sys-utils/hwclock-cmos.c
@@ -0,0 +1,420 @@
+/*
+ * i386 CMOS starts out with 14 bytes clock data alpha has something
+ * similar, but with details depending on the machine type.
+ *
+ * byte 0: seconds		0-59
+ * byte 2: minutes		0-59
+ * byte 4: hours		0-23 in 24hr mode,
+ *				1-12 in 12hr mode, with high bit unset/set
+ *					if am/pm.
+ * byte 6: weekday		1-7, Sunday=1
+ * byte 7: day of the month	1-31
+ * byte 8: month		1-12
+ * byte 9: year			0-99
+ *
+ * Numbers are stored in BCD/binary if bit 2 of byte 11 is unset/set The
+ * clock is in 12hr/24hr mode if bit 1 of byte 11 is unset/set The clock is
+ * undefined (being updated) if bit 7 of byte 10 is set. The clock is frozen
+ * (to be updated) by setting bit 7 of byte 11 Bit 7 of byte 14 indicates
+ * whether the CMOS clock is reliable: it is 1 if RTC power has been good
+ * since this bit was last read; it is 0 when the battery is dead and system
+ * power has been off.
+ *
+ * Avoid setting the RTC clock within 2 seconds of the day rollover that
+ * starts a new month or enters daylight saving time.
+ *
+ * The century situation is messy:
+ *
+ * Usually byte 50 (0x32) gives the century (in BCD, so 19 or 20 hex), but
+ * IBM PS/2 has (part of) a checksum there and uses byte 55 (0x37).
+ * Sometimes byte 127 (0x7f) or Bank 1, byte 0x48 gives the century. The
+ * original RTC will not access any century byte; some modern versions will.
+ * If a modern RTC or BIOS increments the century byte it may go from 0x19
+ * to 0x20, but in some buggy cases 0x1a is produced.
+ */
+/*
+ * A struct tm has int fields
+ *   tm_sec	0-59, 60 or 61 only for leap seconds
+ *   tm_min	0-59
+ *   tm_hour	0-23
+ *   tm_mday	1-31
+ *   tm_mon	0-11
+ *   tm_year	number of years since 1900
+ *   tm_wday	0-6, 0=Sunday
+ *   tm_yday	0-365
+ *   tm_isdst	>0: yes, 0: no, <0: unknown
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "c.h"
+#include "nls.h"
+#include "pathnames.h"
+
+/* for inb, outb */
+#if defined(__i386__) || defined(__x86_64__)
+# ifdef HAVE_SYS_IO_H
+#  include <sys/io.h>
+# elif defined(HAVE_ASM_IO_H)
+#  include <asm/io.h>
+# else
+#  undef __i386__
+#  undef __x86_64__
+#  warning "disable cmos access - no sys/io.h or asm/io.h"
+static void outb(int a __attribute__((__unused__)),
+		 int b __attribute__((__unused__)))
+{
+}
+
+static int inb(int c __attribute__((__unused__)))
+{
+	return 0;
+}
+# endif				/* __i386__ __x86_64__ */
+#else
+# warning "disable cmos access - not i386 or x86_64"
+static void outb(int a __attribute__((__unused__)),
+		 int b __attribute__((__unused__)))
+{
+}
+
+static int inb(int c __attribute__((__unused__)))
+{
+	return 0;
+}
+#endif				/* for inb, outb */
+
+#include "hwclock.h"
+
+#define BCD_TO_BIN(val) ((val)=((val)&15) + ((val)>>4)*10)
+#define BIN_TO_BCD(val) ((val)=(((val)/10)<<4) + (val)%10)
+
+#define IOPL_NOT_IMPLEMENTED -2
+
+/*
+ * POSIX uses 1900 as epoch for a struct tm, and 1970 for a time_t.
+ */
+#define TM_EPOCH 1900
+
+static unsigned short clock_ctl_addr = 0x70;
+static unsigned short clock_data_addr = 0x71;
+
+/*
+ * Hmmh, this isn't very atomic. Maybe we should force an error instead?
+ *
+ * TODO: optimize the access to CMOS by mlockall(MCL_CURRENT) and SCHED_FIFO
+ */
+static unsigned long atomic(unsigned long (*op) (unsigned long),
+			    unsigned long arg)
+{
+	return (*op) (arg);
+}
+
+/*
+ * We only want to read CMOS data, but unfortunately writing to bit 7
+ * disables (1) or enables (0) NMI; since this bit is read-only we have
+ * to guess the old status. Various docs suggest that one should disable
+ * NMI while reading/writing CMOS data, and enable it again afterwards.
+ * This would yield the sequence
+ *
+ *  outb (reg | 0x80, 0x70);
+ *  val = inb(0x71);
+ *  outb (0x0d, 0x70);  // 0x0d: random read-only location
+ *
+ * Other docs state that "any write to 0x70 should be followed by an
+ * action to 0x71 or the RTC will be left in an unknown state". Most
+ * docs say that it doesn't matter at all what one does.
+ *
+ * bit 0x80: disable NMI while reading - should we? Let us follow the
+ * kernel and not disable. Called only with 0 <= reg < 128
+ */
+
+static inline unsigned long cmos_read(unsigned long reg)
+{
+	outb(reg, clock_ctl_addr);
+	return inb(clock_data_addr);
+}
+
+static inline unsigned long cmos_write(unsigned long reg, unsigned long val)
+{
+	outb(reg, clock_ctl_addr);
+	outb(val, clock_data_addr);
+	return 0;
+}
+
+static unsigned long cmos_set_time(unsigned long arg)
+{
+	unsigned char save_control, save_freq_select, pmbit = 0;
+	struct tm tm = *(struct tm *)arg;
+
+/*
+ * CMOS byte 10 (clock status register A) has 3 bitfields:
+ * bit 7: 1 if data invalid, update in progress (read-only bit)
+ *         (this is raised 224 us before the actual update starts)
+ *  6-4    select base frequency
+ *         010: 32768 Hz time base (default)
+ *         111: reset
+ *         all other combinations are manufacturer-dependent
+ *         (e.g.: DS1287: 010 = start oscillator, anything else = stop)
+ *  3-0    rate selection bits for interrupt
+ *         0000 none (may stop RTC)
+ *         0001, 0010 give same frequency as 1000, 1001
+ *         0011 122 microseconds (minimum, 8192 Hz)
+ *         .... each increase by 1 halves the frequency, doubles the period
+ *         1111 500 milliseconds (maximum, 2 Hz)
+ *         0110 976.562 microseconds (default 1024 Hz)
+ */
+	save_control = cmos_read(11);	/* tell the clock it's being set */
+	cmos_write(11, (save_control | 0x80));
+	save_freq_select = cmos_read(10);	/* stop and reset prescaler */
+	cmos_write(10, (save_freq_select | 0x70));
+
+	tm.tm_year %= 100;
+	tm.tm_mon += 1;
+	tm.tm_wday += 1;
+
+	if (!(save_control & 0x02)) {	/* 12hr mode; the default is 24hr mode */
+		if (tm.tm_hour == 0)
+			tm.tm_hour = 24;
+		if (tm.tm_hour > 12) {
+			tm.tm_hour -= 12;
+			pmbit = 0x80;
+		}
+	}
+
+	if (!(save_control & 0x04)) {	/* BCD mode - the default */
+		BIN_TO_BCD(tm.tm_sec);
+		BIN_TO_BCD(tm.tm_min);
+		BIN_TO_BCD(tm.tm_hour);
+		BIN_TO_BCD(tm.tm_wday);
+		BIN_TO_BCD(tm.tm_mday);
+		BIN_TO_BCD(tm.tm_mon);
+		BIN_TO_BCD(tm.tm_year);
+	}
+
+	cmos_write(0, tm.tm_sec);
+	cmos_write(2, tm.tm_min);
+	cmos_write(4, tm.tm_hour | pmbit);
+	cmos_write(6, tm.tm_wday);
+	cmos_write(7, tm.tm_mday);
+	cmos_write(8, tm.tm_mon);
+	cmos_write(9, tm.tm_year);
+
+	/*
+	 * The kernel sources, linux/arch/i386/kernel/time.c, have the
+	 * following comment:
+	 *
+	 * The following flags have to be released exactly in this order,
+	 * otherwise the DS12887 (popular MC146818A clone with integrated
+	 * battery and quartz) will not reset the oscillator and will not
+	 * update precisely 500 ms later. You won't find this mentioned in
+	 * the Dallas Semiconductor data sheets, but who believes data
+	 * sheets anyway ... -- Markus Kuhn
+	 */
+	cmos_write(11, save_control);
+	cmos_write(10, save_freq_select);
+	return 0;
+}
+
+static int hclock_read(unsigned long reg)
+{
+	return atomic(cmos_read, reg);
+}
+
+static void hclock_set_time(const struct tm *tm)
+{
+	atomic(cmos_set_time, (unsigned long)(tm));
+}
+
+static inline int cmos_clock_busy(void)
+{
+	return
+	    /* poll bit 7 (UIP) of Control Register A */
+	    (hclock_read(10) & 0x80);
+}
+
+static int synchronize_to_clock_tick_cmos(const struct hwclock_control *ctl
+					  __attribute__((__unused__)))
+{
+	int i;
+
+	/*
+	 * Wait for rise. Should be within a second, but in case something
+	 * weird happens, we have a limit on this loop to reduce the impact
+	 * of this failure.
+	 */
+	for (i = 0; !cmos_clock_busy(); i++)
+		if (i >= 10000000)
+			return 1;
+
+	/* Wait for fall.  Should be within 2.228 ms. */
+	for (i = 0; cmos_clock_busy(); i++)
+		if (i >= 1000000)
+			return 1;
+	return 0;
+}
+
+/*
+ * Read the hardware clock and return the current time via <tm> argument.
+ * Assume we have an ISA machine and read the clock directly with CPU I/O
+ * instructions.
+ *
+ * This function is not totally reliable.  It takes a finite and
+ * unpredictable amount of time to execute the code below. During that time,
+ * the clock may change and we may even read an invalid value in the middle
+ * of an update. We do a few checks to minimize this possibility, but only
+ * the kernel can actually read the clock properly, since it can execute
+ * code in a short and predictable amount of time (by turning of
+ * interrupts).
+ *
+ * In practice, the chance of this function returning the wrong time is
+ * extremely remote.
+ */
+static int read_hardware_clock_cmos(const struct hwclock_control *ctl
+				    __attribute__((__unused__)), struct tm *tm)
+{
+	unsigned char status = 0, pmbit = 0;
+
+	while (1) {
+		/*
+		 * Bit 7 of Byte 10 of the Hardware Clock value is the
+		 * Update In Progress (UIP) bit, which is on while and 244
+		 * uS before the Hardware Clock updates itself. It updates
+		 * the counters individually, so reading them during an
+		 * update would produce garbage. The update takes 2mS, so we
+		 * could be spinning here that long waiting for this bit to
+		 * turn off.
+		 *
+		 * Furthermore, it is pathologically possible for us to be
+		 * in this code so long that even if the UIP bit is not on
+		 * at first, the clock has changed while we were running. We
+		 * check for that too, and if it happens, we start over.
+		 */
+		if (!cmos_clock_busy()) {
+			/* No clock update in progress, go ahead and read */
+			tm->tm_sec = hclock_read(0);
+			tm->tm_min = hclock_read(2);
+			tm->tm_hour = hclock_read(4);
+			tm->tm_wday = hclock_read(6);
+			tm->tm_mday = hclock_read(7);
+			tm->tm_mon = hclock_read(8);
+			tm->tm_year = hclock_read(9);
+			status = hclock_read(11);
+			/*
+			 * Unless the clock changed while we were reading,
+			 * consider this a good clock read .
+			 */
+			if (tm->tm_sec == hclock_read(0))
+				break;
+		}
+		/*
+		 * Yes, in theory we could have been running for 60 seconds
+		 * and the above test wouldn't work!
+		 */
+	}
+
+	if (!(status & 0x04)) {	/* BCD mode - the default */
+		BCD_TO_BIN(tm->tm_sec);
+		BCD_TO_BIN(tm->tm_min);
+		pmbit = (tm->tm_hour & 0x80);
+		tm->tm_hour &= 0x7f;
+		BCD_TO_BIN(tm->tm_hour);
+		BCD_TO_BIN(tm->tm_wday);
+		BCD_TO_BIN(tm->tm_mday);
+		BCD_TO_BIN(tm->tm_mon);
+		BCD_TO_BIN(tm->tm_year);
+	}
+
+	/*
+	 * We don't use the century byte of the Hardware Clock since we
+	 * don't know its address (usually 50 or 55). Here, we follow the
+	 * advice of the X/Open Base Working Group: "if century is not
+	 * specified, then values in the range [69-99] refer to years in the
+	 * twentieth century (1969 to 1999 inclusive), and values in the
+	 * range [00-68] refer to years in the twenty-first century (2000 to
+	 * 2068 inclusive)."
+	 */
+	tm->tm_wday -= 1;
+	tm->tm_mon -= 1;
+	if (tm->tm_year < 69)
+		tm->tm_year += 100;
+	if (pmbit) {
+		tm->tm_hour += 12;
+		if (tm->tm_hour == 24)
+			tm->tm_hour = 0;
+	}
+
+	tm->tm_isdst = -1;	/* don't know whether it's daylight */
+	return 0;
+}
+
+static int set_hardware_clock_cmos(const struct hwclock_control *ctl
+				   __attribute__((__unused__)),
+				   const struct tm *new_broken_time)
+{
+	hclock_set_time(new_broken_time);
+	return 0;
+}
+
+#if defined(__i386__) || defined(__x86_64__)
+# if defined(HAVE_IOPL)
+static int i386_iopl(const int level)
+{
+	return iopl(level);
+}
+# else
+static int i386_iopl(const int level __attribute__ ((__unused__)))
+{
+	extern int ioperm(unsigned long from, unsigned long num, int turn_on);
+	return ioperm(clock_ctl_addr, 2, 1);
+}
+# endif
+#else
+static int i386_iopl(const int level __attribute__ ((__unused__)))
+{
+	return IOPL_NOT_IMPLEMENTED;
+}
+#endif
+
+static int get_permissions_cmos(void)
+{
+	int rc;
+
+	rc = i386_iopl(3);
+	if (rc == IOPL_NOT_IMPLEMENTED) {
+		warnx(_("ISA port access is not implemented"));
+	} else if (rc != 0) {
+		warn(_("iopl() port access failed"));
+	}
+	return rc;
+}
+
+static const char *get_device_path(void)
+{
+	return NULL;
+}
+
+static struct clock_ops cmos_interface = {
+	N_("Using direct ISA access to the clock"),
+	get_permissions_cmos,
+	read_hardware_clock_cmos,
+	set_hardware_clock_cmos,
+	synchronize_to_clock_tick_cmos,
+	get_device_path,
+};
+
+/*
+ * return &cmos if cmos clock present, NULL otherwise.
+ */
+struct clock_ops *probe_for_cmos_clock(void)
+{
+#if defined(__i386__) || defined(__x86_64__)
+	return &cmos_interface;
+#else
+	return NULL;
+#endif
+}
diff --git a/sys-utils/hwclock-rtc.c b/sys-utils/hwclock-rtc.c
new file mode 100644
index 0000000..32feb35
--- /dev/null
+++ b/sys-utils/hwclock-rtc.c
@@ -0,0 +1,448 @@
+/*
+ * rtc.c - Use /dev/rtc for clock access
+ */
+#include <asm/ioctl.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "nls.h"
+
+#include "hwclock.h"
+
+/*
+ * Get defines for rtc stuff.
+ *
+ * Getting the rtc defines is nontrivial. The obvious way is by including
+ * <linux/mc146818rtc.h> but that again includes <asm/io.h> which again
+ * includes ... and on sparc and alpha this gives compilation errors for
+ * many kernel versions. So, we give the defines ourselves here. Moreover,
+ * some Sparc person decided to be incompatible, and used a struct rtc_time
+ * different from that used in mc146818rtc.h.
+ */
+
+/*
+ * On Sparcs, there is a <asm/rtc.h> that defines different ioctls (that are
+ * required on my machine). However, this include file does not exist on
+ * other architectures.
+ */
+/* One might do:
+#ifdef __sparc__
+# include <asm/rtc.h>
+#endif
+ */
+#ifdef __sparc__
+/* The following is roughly equivalent */
+struct sparc_rtc_time
+{
+	int sec;	/* Seconds		0-59 */
+	int min;	/* Minutes		0-59 */
+	int hour;	/* Hour			0-23 */
+	int dow;	/* Day of the week	1-7  */
+	int dom;	/* Day of the month	1-31 */
+	int month;	/* Month of year	1-12 */
+	int year;	/* Year			0-99 */
+};
+#define RTCGET _IOR('p', 20, struct sparc_rtc_time)
+#define RTCSET _IOW('p', 21, struct sparc_rtc_time)
+#endif
+
+/*
+ * struct rtc_time is present since 1.3.99.
+ * Earlier (since 1.3.89), a struct tm was used.
+ */
+struct linux_rtc_time {
+	int tm_sec;
+	int tm_min;
+	int tm_hour;
+	int tm_mday;
+	int tm_mon;
+	int tm_year;
+	int tm_wday;
+	int tm_yday;
+	int tm_isdst;
+};
+
+/* RTC_RD_TIME etc have this definition since 1.99.9 (pre2.0-9) */
+#ifndef RTC_RD_TIME
+# define RTC_RD_TIME	_IOR('p', 0x09, struct linux_rtc_time)
+# define RTC_SET_TIME	_IOW('p', 0x0a, struct linux_rtc_time)
+# define RTC_UIE_ON	_IO('p', 0x03)	/* Update int. enable on */
+# define RTC_UIE_OFF	_IO('p', 0x04)	/* Update int. enable off */
+#endif
+
+/* RTC_EPOCH_READ and RTC_EPOCH_SET are present since 2.0.34 and 2.1.89 */
+#ifndef RTC_EPOCH_READ
+# define RTC_EPOCH_READ	_IOR('p', 0x0d, unsigned long)	/* Read epoch */
+# define RTC_EPOCH_SET	_IOW('p', 0x0e, unsigned long)	/* Set epoch */
+#endif
+
+/*
+ * /dev/rtc is conventionally chardev 10/135
+ * ia64 uses /dev/efirtc, chardev 10/136
+ * devfs (obsolete) used /dev/misc/... for miscdev
+ * new RTC framework + udev uses dynamic major and /dev/rtc0.../dev/rtcN
+ * ... so we need an overridable default
+ */
+
+/* default or user defined dev (by hwclock --rtc=<path>) */
+static const char *rtc_dev_name;
+static int rtc_dev_fd = -1;
+
+static void close_rtc(void)
+{
+	if (rtc_dev_fd != -1)
+		close(rtc_dev_fd);
+	rtc_dev_fd = -1;
+}
+
+static int open_rtc(const struct hwclock_control *ctl)
+{
+	static const char *fls[] = {
+#ifdef __ia64__
+		"/dev/efirtc",
+		"/dev/misc/efirtc",
+#endif
+		"/dev/rtc0",
+		"/dev/rtc",
+		"/dev/misc/rtc"
+	};
+	size_t i;
+
+	if (rtc_dev_fd != -1)
+		return rtc_dev_fd;
+
+	/* --rtc option has been given */
+	if (ctl->rtc_dev_name) {
+		rtc_dev_name = ctl->rtc_dev_name;
+		rtc_dev_fd = open(rtc_dev_name, O_RDONLY);
+	} else {
+		for (i = 0; i < ARRAY_SIZE(fls); i++) {
+			if (ctl->verbose)
+				printf(_("Trying to open: %s\n"), fls[i]);
+			rtc_dev_fd = open(fls[i], O_RDONLY);
+
+			if (rtc_dev_fd < 0
+			    && (errno == ENOENT || errno == ENODEV))
+				continue;
+			rtc_dev_name = fls[i];
+			break;
+		}
+		if (rtc_dev_fd < 0)
+			rtc_dev_name = *fls;	/* default for error messages */
+	}
+	if (rtc_dev_fd != -1)
+		atexit(close_rtc);
+	return rtc_dev_fd;
+}
+
+static int open_rtc_or_exit(const struct hwclock_control *ctl)
+{
+	int rtc_fd = open_rtc(ctl);
+
+	if (rtc_fd < 0) {
+		warn(_("cannot open rtc device"));
+		hwclock_exit(ctl, EXIT_FAILURE);
+	}
+	return rtc_fd;
+}
+
+static int do_rtc_read_ioctl(int rtc_fd, struct tm *tm)
+{
+	int rc = -1;
+	char *ioctlname;
+#ifdef __sparc__
+	/* some but not all sparcs use a different ioctl and struct */
+	struct sparc_rtc_time stm;
+#endif
+
+	ioctlname = "RTC_RD_TIME";
+	rc = ioctl(rtc_fd, RTC_RD_TIME, tm);
+
+#ifdef __sparc__
+	if (rc == -1) {		/* sparc sbus */
+		ioctlname = "RTCGET";
+		rc = ioctl(rtc_fd, RTCGET, &stm);
+		if (rc == 0) {
+			tm->tm_sec = stm.sec;
+			tm->tm_min = stm.min;
+			tm->tm_hour = stm.hour;
+			tm->tm_mday = stm.dom;
+			tm->tm_mon = stm.month - 1;
+			tm->tm_year = stm.year - 1900;
+			tm->tm_wday = stm.dow - 1;
+			tm->tm_yday = -1;	/* day in the year */
+		}
+	}
+#endif
+
+	if (rc == -1) {
+		warn(_("ioctl(%s) to %s to read the time failed"),
+			ioctlname, rtc_dev_name);
+		return -1;
+	}
+
+	tm->tm_isdst = -1;	/* don't know whether it's dst */
+	return 0;
+}
+
+/*
+ * Wait for the top of a clock tick by reading /dev/rtc in a busy loop
+ * until we see it. This function is used for rtc drivers without ioctl
+ * interrupts. This is typical on an Alpha, where the Hardware Clock
+ * interrupts are used by the kernel for the system clock, so aren't at
+ * the user's disposal.
+ */
+static int busywait_for_rtc_clock_tick(const struct hwclock_control *ctl,
+				       const int rtc_fd)
+{
+	struct tm start_time;
+	/* The time when we were called (and started waiting) */
+	struct tm nowtime;
+	int rc;
+	struct timeval begin, now;
+
+	if (ctl->verbose) {
+		printf("ioctl(%d, RTC_UIE_ON, 0): %s\n",
+		       rtc_fd, strerror(errno));
+		printf(_("Waiting in loop for time from %s to change\n"),
+		       rtc_dev_name);
+	}
+
+	if (do_rtc_read_ioctl(rtc_fd, &start_time))
+		return 1;
+
+	/*
+	 * Wait for change.  Should be within a second, but in case
+	 * something weird happens, we have a time limit (1.5s) on this loop
+	 * to reduce the impact of this failure.
+	 */
+	gettimeofday(&begin, NULL);
+	do {
+		rc = do_rtc_read_ioctl(rtc_fd, &nowtime);
+		if (rc || start_time.tm_sec != nowtime.tm_sec)
+			break;
+		gettimeofday(&now, NULL);
+		if (time_diff(now, begin) > 1.5) {
+			warnx(_("Timed out waiting for time change."));
+			return 1;
+		}
+	} while (1);
+
+	if (rc)
+		return 1;
+	return 0;
+}
+
+/*
+ * Same as synchronize_to_clock_tick(), but just for /dev/rtc.
+ */
+static int synchronize_to_clock_tick_rtc(const struct hwclock_control *ctl)
+{
+	int rtc_fd;		/* File descriptor of /dev/rtc */
+	int ret = 1;
+
+	rtc_fd = open_rtc(ctl);
+	if (rtc_fd == -1) {
+		warn(_("cannot open rtc device"));
+		return ret;
+	} else {
+		/* Turn on update interrupts (one per second) */
+		int rc = ioctl(rtc_fd, RTC_UIE_ON, 0);
+
+		if (rc != -1) {
+			/*
+			 * Just reading rtc_fd fails on broken hardware: no
+			 * update interrupt comes and a bootscript with a
+			 * hwclock call hangs
+			 */
+			fd_set rfds;
+			struct timeval tv;
+
+			/*
+			 * Wait up to ten seconds for the next update
+			 * interrupt
+			 */
+			FD_ZERO(&rfds);
+			FD_SET(rtc_fd, &rfds);
+			tv.tv_sec = 10;
+			tv.tv_usec = 0;
+			rc = select(rtc_fd + 1, &rfds, NULL, NULL, &tv);
+			if (0 < rc)
+				ret = 0;
+			else if (rc == 0) {
+				warnx(_("select() to %s to wait for clock tick timed out"),
+				      rtc_dev_name);
+			} else
+				warn(_("select() to %s to wait for clock tick failed"),
+				     rtc_dev_name);
+			/* Turn off update interrupts */
+			rc = ioctl(rtc_fd, RTC_UIE_OFF, 0);
+			if (rc == -1)
+				warn(_("ioctl() to %s to turn off update interrupts failed"),
+				     rtc_dev_name);
+		} else if (errno == ENOTTY || errno == EINVAL) {
+			/* rtc ioctl interrupts are unimplemented */
+			ret = busywait_for_rtc_clock_tick(ctl, rtc_fd);
+		} else
+			warn(_("ioctl(%d, RTC_UIE_ON, 0) to %s failed"),
+			     rtc_fd, rtc_dev_name);
+	}
+	return ret;
+}
+
+static int read_hardware_clock_rtc(const struct hwclock_control *ctl,
+				   struct tm *tm)
+{
+	int rtc_fd, rc;
+
+	rtc_fd = open_rtc_or_exit(ctl);
+
+	/* Read the RTC time/date, return answer via tm */
+	rc = do_rtc_read_ioctl(rtc_fd, tm);
+
+	return rc;
+}
+
+/*
+ * Set the Hardware Clock to the broken down time <new_broken_time>. Use
+ * ioctls to "rtc" device /dev/rtc.
+ */
+static int set_hardware_clock_rtc(const struct hwclock_control *ctl,
+				  const struct tm *new_broken_time)
+{
+	int rc = -1;
+	int rtc_fd;
+	char *ioctlname;
+
+	rtc_fd = open_rtc_or_exit(ctl);
+
+	ioctlname = "RTC_SET_TIME";
+	rc = ioctl(rtc_fd, RTC_SET_TIME, new_broken_time);
+
+#ifdef __sparc__
+	if (rc == -1) {		/* sparc sbus */
+		struct sparc_rtc_time stm;
+
+		stm.sec = new_broken_time->tm_sec;
+		stm.min = new_broken_time->tm_min;
+		stm.hour = new_broken_time->tm_hour;
+		stm.dom = new_broken_time->tm_mday;
+		stm.month = new_broken_time->tm_mon + 1;
+		stm.year = new_broken_time->tm_year + 1900;
+		stm.dow = new_broken_time->tm_wday + 1;
+
+		ioctlname = "RTCSET";
+		rc = ioctl(rtc_fd, RTCSET, &stm);
+	}
+#endif
+
+	if (rc == -1) {
+		warn(_("ioctl(%s) to %s to set the time failed"),
+			ioctlname, rtc_dev_name);
+		hwclock_exit(ctl, EXIT_FAILURE);
+	}
+
+	if (ctl->verbose)
+		printf(_("ioctl(%s) was successful.\n"), ioctlname);
+
+	return 0;
+}
+
+static int get_permissions_rtc(void)
+{
+	return 0;
+}
+
+static const char *get_device_path(void)
+{
+	return rtc_dev_name;
+}
+
+static struct clock_ops rtc_interface = {
+	N_("Using the rtc interface to the clock."),
+	get_permissions_rtc,
+	read_hardware_clock_rtc,
+	set_hardware_clock_rtc,
+	synchronize_to_clock_tick_rtc,
+	get_device_path,
+};
+
+/* return &rtc if /dev/rtc can be opened, NULL otherwise */
+struct clock_ops *probe_for_rtc_clock(const struct hwclock_control *ctl)
+{
+	const int rtc_fd = open_rtc(ctl);
+
+	if (rtc_fd < 0)
+		return NULL;
+	return &rtc_interface;
+}
+
+#ifdef __alpha__
+/*
+ * Get the Hardware Clock epoch setting from the kernel.
+ */
+int get_epoch_rtc(const struct hwclock_control *ctl, unsigned long *epoch_p)
+{
+	int rtc_fd;
+
+	rtc_fd = open_rtc(ctl);
+	if (rtc_fd < 0) {
+		warn(_("cannot open %s"), rtc_dev_name);
+		return 1;
+	}
+
+	if (ioctl(rtc_fd, RTC_EPOCH_READ, epoch_p) == -1) {
+		warn(_("ioctl(%d, RTC_EPOCH_READ, epoch_p) to %s failed"),
+		     rtc_fd, rtc_dev_name);
+		return 1;
+	}
+
+	if (ctl->verbose)
+		printf(_("ioctl(%d, RTC_EPOCH_READ, epoch_p) to %s succeeded.\n"),
+		       rtc_fd, rtc_dev_name);
+
+	return 0;
+}
+
+/*
+ * Set the Hardware Clock epoch in the kernel.
+ */
+int set_epoch_rtc(const struct hwclock_control *ctl)
+{
+	int rtc_fd;
+	unsigned long epoch;
+
+	epoch = strtoul(ctl->epoch_option, NULL, 10);
+
+	/* There were no RTC clocks before 1900. */
+	if (epoch < 1900 || epoch == ULONG_MAX) {
+		warnx(_("invalid epoch '%s'."), ctl->epoch_option);
+		return 1;
+	}
+
+	rtc_fd = open_rtc(ctl);
+	if (rtc_fd < 0) {
+		warn(_("cannot open %s"), rtc_dev_name);
+		return 1;
+	}
+
+	if (ioctl(rtc_fd, RTC_EPOCH_SET, epoch) == -1) {
+		warn(_("ioctl(%d, RTC_EPOCH_SET, %lu) to %s failed"),
+		     rtc_fd, epoch, rtc_dev_name);
+		return 1;
+	}
+
+	if (ctl->verbose)
+		printf(_("ioctl(%d, RTC_EPOCH_SET, %lu) to %s succeeded.\n"),
+		       rtc_fd, epoch, rtc_dev_name);
+
+	return 0;
+}
+#endif	/* __alpha__ */
diff --git a/sys-utils/hwclock.8 b/sys-utils/hwclock.8
new file mode 100644
index 0000000..8a10e7a
--- /dev/null
+++ b/sys-utils/hwclock.8
@@ -0,0 +1,998 @@
+.\" hwclock.8.in -- man page for util-linux' hwclock
+.\"
+.\" 2015-01-07 J William Piggott
+.\"   Authored new section: DATE-TIME CONFIGURATION.
+.\"   Subsections: Keeping Time..., LOCAL vs UTC, POSIX vs 'RIGHT'.
+.\"
+.TH HWCLOCK 8 "July 2017" "util-linux" "System Administration"
+.SH NAME
+hwclock \- time clocks utility
+.SH SYNOPSIS
+.B hwclock
+.RI [ function ]
+.RI [ option ...]
+.
+.SH DESCRIPTION
+.B hwclock
+is an administration tool for the time clocks.  It can: display the
+Hardware Clock time; set the Hardware Clock to a specified time; set the
+Hardware Clock from the System Clock; set the System Clock from the
+Hardware Clock; compensate for Hardware Clock drift; correct the System
+Clock timescale; set the kernel's timezone, NTP timescale, and epoch
+(Alpha only); and predict future
+Hardware Clock values based on its drift rate.
+.PP
+Since v2.26 important changes were made to the
+.B \-\-hctosys
+function and the
+.B \-\-directisa
+option, and a new option
+.B \-\-update\-drift
+was added.  See their respective descriptions below.
+.
+.SH FUNCTIONS
+The following functions are mutually exclusive, only one can be given at
+a time.  If none is given, the default is \fB\-\-show\fR.
+.TP
+.B \-a, \-\-adjust
+Add or subtract time from the Hardware Clock to account for systematic
+drift since the last time the clock was set or adjusted.  See the
+discussion below, under
+.BR "The Adjust Function" .
+.
+.TP
+.B \-\-getepoch
+.TQ
+.B \-\-setepoch
+These functions are for Alpha machines only, and are only available
+through the Linux kernel RTC driver.
+.sp
+They are used to read and set the kernel's Hardware Clock epoch value.
+Epoch is the number of years into AD to which a zero year value in the
+Hardware Clock refers.  For example, if the machine's BIOS sets the year
+counter in the Hardware Clock to contain the number of full years since
+1952, then the kernel's Hardware Clock epoch value must be 1952.
+.sp
+The \fB\%\-\-setepoch\fR function requires using the
+.B \%\-\-epoch
+option to specify the year.  For example:
+.RS
+.IP "" 4
+.B hwclock\ \-\-setepoch\ \-\-epoch=1952
+.PP
+The RTC driver attempts to guess the correct epoch value, so setting it
+may not be required.
+.PP
+This epoch value is used whenever
+.B \%hwclock
+reads or sets the Hardware Clock on an Alpha machine.  For ISA machines
+the kernel uses the fixed Hardware Clock epoch of 1900.
+.RE
+.
+.TP
+.B \-\-predict
+Predict what the Hardware Clock will read in the future based upon the
+time given by the
+.B \-\-date
+option and the information in
+.IR /etc/adjtime .
+This is useful, for example, to account for drift when setting a
+Hardware Clock wakeup (aka alarm). See
+.BR \%rtcwake (8).
+.sp
+Do not use this function if the Hardware Clock is being modified by
+anything other than the current operating system's
+.B \%hwclock
+command, such as \%'11\ minute\ mode' or from dual-booting another OS.
+.
+.TP
+.BR \-r , \ \-\-show
+.TQ
+.B \-\-get
+.br
+Read the Hardware Clock and print its time to standard output in the
+.B ISO 8601
+format.
+The time shown is always in local time, even if you keep your Hardware Clock
+in UTC.  See the
+.B \%\-\-localtime
+option.
+.sp
+Showing the Hardware Clock time is the default when no function is specified.
+.sp
+The
+.B \-\-get
+function also applies drift correction to the time read, based upon the
+information in
+.IR /etc/adjtime .
+Do not use this function if the Hardware Clock is being modified by
+anything other than the current operating system's
+.B \%hwclock
+command, such as \%'11\ minute\ mode' or from dual-booting another OS.
+.
+.TP
+.BR \-s , \ \-\-hctosys
+Set the System Clock from the Hardware Clock.  The time read from the Hardware
+Clock is compensated to account for systematic drift before using it to set the
+System Clock.  See the discussion below, under
+.BR "The Adjust Function" .
+.sp
+The System Clock must be kept in the UTC timescale for date-time
+applications to work correctly in conjunction with the timezone configured
+for the system.  If the Hardware Clock is kept in local time then the time read
+from it must be shifted to the UTC timescale before using it to set the System
+Clock.  The
+.B \%\-\-hctosys
+function does this based upon the information in the
+.I /etc/adjtime
+file or the command line arguments
+.BR \%\-\-localtime " and " \-\-utc .
+Note: no daylight saving adjustment is made.  See the discussion below, under
+.BR "LOCAL vs UTC" .
+.sp
+The kernel also keeps a timezone value, the
+.B \%\-\-hctosys
+function sets it to the timezone configured for the system.  The system
+timezone is configured by the TZ environment variable or the
+.I \%/etc/localtime
+file, as
+.BR \%tzset (3)
+would interpret them.
+The obsolete tz_dsttime field of the kernel's timezone value is set
+to zero.  (For details on what this field used to mean, see
+.BR \%settimeofday (2).)
+.sp
+When used in a startup script, making the
+.B \%\-\-hctosys
+function the first caller of
+.BR \%settimeofday (2)
+from boot, it will set the NTP \%'11\ minute\ mode' timescale via the
+.I \%persistent_clock_is_local
+kernel variable.  If the Hardware Clock's timescale configuration is
+changed then a reboot is required to inform the kernel.  See the
+discussion below, under
+.BR "Automatic Hardware Clock Synchronization by the Kernel" .
+.sp
+This is a good function to use in one of the system startup scripts before the
+file systems are mounted read/write.
+.sp
+This function should never be used on a running system. Jumping system time
+will cause problems, such as corrupted filesystem timestamps.  Also, if
+something has changed the Hardware Clock, like NTP's \%'11\ minute\ mode', then
+.B \%\-\-hctosys
+will set the time incorrectly by including drift compensation.
+.sp
+Drift compensation can be inhibited by setting the drift factor in
+.I /etc/adjtime
+to zero.  This setting will be persistent as long as the
+.BR \%\-\-update\-drift " option is not used with " \%\-\-systohc
+at shutdown (or anywhere else).  Another way to inhibit this is by using the
+.BR \%\-\-noadjfile " option when calling the " \%\-\-hctosys
+function.  A third method is to delete the
+.IR /etc/adjtime " file."
+.B Hwclock
+will then default to using the UTC timescale for the Hardware Clock.  If
+the Hardware Clock is ticking local time it will need to be defined in
+the file.  This can be done by calling
+.BR hwclock\ \-\-localtime\ \-\-adjust ;
+when the file is not present this command will not actually
+adjust the Clock, but it will create the file with local time
+configured, and a drift factor of zero.
+.sp
+A condition under which inhibiting
+.BR hwclock 's
+drift correction may be desired is when dual-booting multiple operating
+systems.  If while this instance of Linux is stopped, another OS changes
+the Hardware Clock's value, then when this instance is started again the
+drift correction applied will be incorrect.
+.sp
+.RB "For " hwclock 's
+drift correction to work properly it is imperative that nothing changes
+the Hardware Clock while its Linux instance is not running.
+.
+.TP
+.B \-\-set
+Set the Hardware Clock to the time given by the
+.B \-\-date
+option, and update the timestamps in
+.IR /etc/adjtime .
+With the
+.B \%\-\-update-drift
+option also (re)calculate the drift factor.  Try it without the option if
+.BR \%\-\-set " fails.  See " \%\-\-update-drift " below."
+.
+.TP
+.B \-\-systz
+This is an alternate to the
+.B \%\-\-hctosys
+function that does not read the Hardware Clock nor set the System Clock;
+consequently there is not any drift correction.  It is intended to be
+used in a startup script on systems with kernels above version 2.6 where
+you know the System Clock has been set from the Hardware Clock by the
+kernel during boot.
+.sp
+It does the following things that are detailed above in the
+.BR \%\-\-hctosys " function:"
+.RS
+.IP \(bu 2
+Corrects the System Clock timescale to UTC as needed.  Only instead of
+accomplishing this by setting the System Clock,
+.B hwclock
+simply informs the kernel and it handles the change.
+.IP \(bu 2
+Sets the kernel's NTP \%'11\ minute\ mode' timescale.
+.IP \(bu 2
+Sets the kernel's timezone.
+.PP
+The first two are only available on the first call of
+.BR \%settimeofday (2)
+after boot.  Consequently this option only makes sense when used in a
+startup script.  If the Hardware Clocks timescale configuration is
+changed then a reboot would be required to inform the kernel.
+.RE
+.
+.TP
+.BR \-w , \ \-\-systohc
+Set the Hardware Clock from the System Clock, and update the timestamps in
+.IR /etc/adjtime .
+With the
+.B \%\-\-update-drift
+option also (re)calculate the drift factor.  Try it without the option if
+.BR \%\-\-systohc " fails.  See " \%\-\-update-drift " below."
+.
+.TP
+.BR \-V , \ \-\-version
+Display version information and exit.
+.
+.TP
+.BR \-h , \ \-\-help
+Display help text and exit.
+.
+.SH OPTIONS
+.
+.TP
+.BI \-\-adjfile= filename
+.RI "Override the default " /etc/adjtime " file path."
+.
+.TP
+.BI \%\-\-date= date_string
+This option must be used with the
+.B \-\-set
+or
+.B \%\-\-predict
+functions, otherwise it is ignored.
+.RS
+.IP "" 4
+.B "hwclock\ \-\-set\ \-\-date='16:45'"
+.IP "" 4
+.B "hwclock\ \-\-predict\ \-\-date='2525-08-14\ 07:11:05'"
+.PP
+The argument must be in local time, even if you keep your Hardware Clock in
+UTC.  See the
+.B \%\-\-localtime
+option.  Therefore, the argument should not include any timezone information.
+It also should not be a relative time like "+5 minutes", because
+.BR \%hwclock 's
+precision depends upon correlation between the argument's value and when the
+enter key is pressed.  Fractional seconds are silently dropped.  This option is
+capable of understanding many time and date formats, but the previous
+parameters should be observed.
+.RE
+.
+.TP
+.BI \%\-\-delay= seconds
+This option allows to overwrite internally used delay when set clock time. The
+default is 0.5 (500ms) for rtc_cmos, for another RTC types the delay is 0. If
+RTC type is impossible to determine (from sysfs) then it defaults also to 0.5
+to be backwardly compatible.
+.RS
+.PP
+The 500ms default is based on commonly used MC146818A-compatible (x86) hardware clock. This
+Hardware Clock can only be set to any integer time plus one half second.  The
+integer time is required because there is no interface to set or get a
+fractional second.  The additional half second delay is because the Hardware
+Clock updates to the following second precisely 500 ms after setting the new
+time. Unfortunately, this behavior is hardware specific and in same cases
+another delay is required.
+.RE
+.
+.TP
+.TP
+.BR \-D ", " \-\-debug
+.RB Use\  \-\-verbose .
+.RB The\  \%\-\-debug\  option
+has been deprecated and may be repurposed or removed in a future release.
+.
+.TP
+.B \-\-directisa
+This option is meaningful for ISA compatible machines in the x86 and
+x86_64 family.  For other machines, it has no effect.  This option tells
+.B \%hwclock
+to use explicit I/O instructions to access the Hardware Clock.
+Without this option,
+.B \%hwclock
+will use the rtc device file, which it assumes to be driven by the Linux
+RTC device driver.  As of v2.26 it will no longer automatically use
+directisa when the rtc driver is unavailable; this was causing an unsafe
+condition that could allow two processes to access the Hardware Clock at
+the same time.  Direct hardware access from userspace should only be
+used for testing, troubleshooting, and as a last resort when all other
+methods fail.  See the
+.BR \-\-rtc " option."
+.
+.TP
+.BI \-\-epoch= year
+This option is required when using the
+.BR \%\-\-setepoch \ function.
+.RI "The minimum " year
+value is 1900. The maximum is system dependent
+.RB ( ULONG_MAX\ -\ 1 ).
+.
+.TP
+.BR \-f , \ \-\-rtc=\fIfilename\fR
+.RB "Override " \%hwclock 's
+default rtc device file name.  Otherwise it will
+use the first one found in this order:
+.in +4
+.br
+.I /dev/rtc0
+.br
+.I /dev/rtc
+.br
+.I /dev/misc/rtc
+.br
+.in
+.RB "For " IA-64:
+.in +4
+.br
+.I /dev/efirtc
+.br
+.I /dev/misc/efirtc
+.in
+.
+.TP
+.BR \-l , \ \-\-localtime
+.TQ
+.BR \-u ", " \-\-utc
+Indicate which timescale the Hardware Clock is set to.
+.sp
+The Hardware Clock may be configured to use either the UTC or the local
+timescale, but nothing in the clock itself says which alternative is
+being used.  The
+.BR \%\-\-localtime " or " \-\-utc
+options give this information to the
+.B \%hwclock
+command.  If you specify the wrong one (or specify neither and take a
+wrong default), both setting and reading the Hardware Clock will be
+incorrect.
+.sp
+If you specify neither
+.BR \-\-utc " nor " \%\-\-localtime
+then the one last given with a set function
+.RB ( \-\-set ", " \%\-\-systohc ", or " \%\-\-adjust ),
+as recorded in
+.IR /etc/adjtime ,
+will be used.  If the adjtime file doesn't exist, the default is UTC.
+.sp
+Note: daylight saving time changes may be inconsistent when the
+Hardware Clock is kept in local time.  See the discussion below, under
+.BR "LOCAL vs UTC" .
+.
+.TP
+.B \-\-noadjfile
+Disable the facilities provided by
+.IR /etc/adjtime .
+.B \%hwclock
+will not read nor write to that file with this option.  Either
+.BR \-\-utc " or " \%\-\-localtime
+must be specified when using this option.
+.
+.TP
+.B \-\-test
+Do not actually change anything on the system, that is, the Clocks or
+.I /etc/adjtime
+.RB ( \%\-\-verbose
+is implicit with this option).
+.
+.TP
+.B \-\-update\-drift
+Update the Hardware Clock's drift factor in
+.IR /etc/adjtime .
+It can only be used with
+.BR \-\-set " or " \%\-\-systohc ,
+.sp
+A minimum four hour period between settings is required.  This is to
+avoid invalid calculations.  The longer the period, the more precise the
+resulting drift factor will be.
+.sp
+This option was added in v2.26, because
+it is typical for systems to call
+.B \%hwclock\ \-\-systohc
+at shutdown; with the old behaviour this would automatically
+(re)calculate the drift factor which caused several problems:
+.RS
+.IP \(bu 2
+When using NTP with an \%'11\ minute\ mode' kernel the drift factor
+would be clobbered to near zero.
+.IP \(bu 2
+It would not allow the use of 'cold' drift correction.  With most
+configurations using 'cold' drift will yield favorable results.  Cold,
+means when the machine is turned off which can have a significant impact
+on the drift factor.
+.IP \(bu 2
+(Re)calculating drift factor on every shutdown delivers suboptimal
+results.  For example, if ephemeral conditions cause the machine to be
+abnormally hot the drift factor calculation would be out of range.
+.IP \(bu 2
+Significantly increased system shutdown times (as of v2.31 when not
+using
+.B \%\-\-update\-drift
+the RTC is not read).
+.PP
+.RB "Having " \%hwclock
+calculate the drift factor is a good starting point, but for optimal
+results it will likely need to be adjusted by directly editing the
+.I /etc/adjtime
+file.  For most configurations once a machine's optimal drift factor is
+crafted it should not need to be changed.  Therefore, the old behavior to
+automatically (re)calculate drift was changed and now requires this
+option to be used.  See the discussion below, under
+.BR "The Adjust Function" .
+.PP
+This option requires reading the Hardware Clock before setting it.  If
+it cannot be read, then this option will cause the set functions to fail.
+This can happen, for example, if the Hardware Clock is corrupted by a
+power failure.  In that case, the clock must first be set without this
+option.  Despite it not working, the resulting drift correction factor
+would be invalid anyway.
+.RE
+.
+.TP
+.BR \-v ", " \-\-verbose
+Display more details about what
+.B \%hwclock
+is doing internally.
+.
+.SH NOTES
+.
+.SS Clocks in a Linux System
+.PP
+There are two types of date-time clocks:
+.PP
+.B The Hardware Clock:
+This clock is an independent hardware device, with its own power domain
+(battery, capacitor, etc), that operates when the machine is powered off,
+or even unplugged.
+.PP
+On an ISA compatible system, this clock is specified as part of the ISA
+standard.  A control program can read or set this clock only to a whole
+second, but it can also detect the edges of the 1 second clock ticks, so
+the clock actually has virtually infinite precision.
+.PP
+This clock is commonly called the hardware clock, the real time clock,
+the RTC, the BIOS clock, and the CMOS clock.  Hardware Clock, in its
+capitalized form, was coined for use by
+.BR \%hwclock .
+The Linux kernel also refers to it as the persistent clock.
+.PP
+Some non-ISA systems have a few real time clocks with
+only one of them having its own power domain.
+A very low power external I2C or SPI clock chip might be used with a
+backup battery as the hardware clock to initialize a more functional
+integrated real-time clock which is used for most other purposes.
+.PP
+.B The System Clock:
+This clock is part of the Linux kernel and is driven by
+a timer interrupt.  (On an ISA machine, the timer interrupt is part of
+the ISA standard.)  It has meaning only while Linux is running on the
+machine.  The System Time is the number of seconds since 00:00:00
+January 1, 1970 UTC (or more succinctly, the number of seconds since
+1969 UTC).  The System Time is not an integer, though.  It has virtually
+infinite precision.
+.PP
+The System Time is the time that matters.  The Hardware Clock's basic
+purpose is to keep time when Linux is not running so that the System
+Clock can be initialized from it at boot.  Note that in DOS, for which
+ISA was designed, the Hardware Clock is the only real time clock.
+.PP
+It is important that the System Time not have any discontinuities such as
+would happen if you used the
+.BR \%date (1)
+program to set it while the system is running.  You can, however, do whatever
+you want to the Hardware Clock while the system is running, and the next
+time Linux starts up, it will do so with the adjusted time from the Hardware
+Clock.  Note: currently this is not possible on most systems because
+.B \%hwclock\ \-\-systohc
+is called at shutdown.
+.PP
+The Linux kernel's timezone is set by
+.BR hwclock .
+But don't be misled -- almost nobody cares what timezone the kernel
+thinks it is in.  Instead, programs that care about the timezone
+(perhaps because they want to display a local time for you) almost
+always use a more traditional method of determining the timezone: They
+use the TZ environment variable or the
+.I \%/etc/localtime
+file, as explained in the man page for
+.BR \%tzset (3).
+However, some programs and fringe parts of the Linux kernel such as filesystems
+use the kernel's timezone value.  An example is the vfat filesystem.  If the
+kernel timezone value is wrong, the vfat filesystem will report and set the
+wrong timestamps on files.  Another example is the kernel's NTP \%'11\ minute\ mode'.
+If the kernel's timezone value and/or the
+.I \%persistent_clock_is_local
+variable are wrong, then the Hardware Clock will be set incorrectly
+by \%'11\ minute\ mode'.  See the discussion below, under
+.BR "Automatic Hardware Clock Synchronization by the Kernel" .
+.PP
+.B \%hwclock
+sets the kernel's timezone to the value indicated by TZ or
+.IR \%/etc/localtime " with the"
+.BR \%\-\-hctosys " or " \%\-\-systz " functions."
+.PP
+The kernel's timezone value actually consists of two parts: 1) a field
+tz_minuteswest indicating how many minutes local time (not adjusted
+for DST) lags behind UTC, and 2) a field tz_dsttime indicating
+the type of Daylight Savings Time (DST) convention that is in effect
+in the locality at the present time.
+This second field is not used under Linux and is always zero.
+See also
+.BR \%settimeofday (2).
+.
+.SS Hardware Clock Access Methods
+.PP
+.B \%hwclock
+uses many different ways to get and set Hardware Clock values.  The most
+normal way is to do I/O to the rtc device special file, which is
+presumed to be driven by the rtc device driver.  Also, Linux systems
+using the rtc framework with udev, are capable of supporting multiple
+Hardware Clocks.  This may bring about the need to override the default
+rtc device by specifying one with the
+.BR \-\-rtc " option."
+.PP
+However, this method is not always available as older systems do not
+have an rtc driver.  On these systems, the method of accessing the
+Hardware Clock depends on the system hardware.
+.PP
+On an ISA compatible system,
+.B \%hwclock
+can directly access the "CMOS memory" registers that
+constitute the clock, by doing I/O to Ports 0x70 and 0x71.  It does
+this with actual I/O instructions and consequently can only do it if
+running with superuser effective userid.  This method may be used by
+specifying the
+.BR \%\-\-directisa " option."
+.PP
+This is a really poor method of accessing the clock, for all the
+reasons that userspace programs are generally not supposed to do
+direct I/O and disable interrupts.
+.B \%hwclock
+provides it for testing, troubleshooting, and  because it may be the
+only method available on ISA systems which do not have a working rtc
+device driver.
+.SS The Adjust Function
+.PP
+The Hardware Clock is usually not very accurate.  However, much of its
+inaccuracy is completely predictable - it gains or loses the same amount
+of time every day.  This is called systematic drift.
+.BR \%hwclock "'s " \%\-\-adjust
+function lets you apply systematic drift corrections to the
+Hardware Clock.
+.PP
+It works like this:
+.BR \%hwclock " keeps a file,"
+.IR /etc/adjtime ,
+that keeps some historical information.  This is called the adjtime file.
+.PP
+Suppose you start with no adjtime file.  You issue a
+.B \%hwclock\ \-\-set
+command to set the Hardware Clock to the true current time.
+.B \%hwclock
+creates the adjtime file and records in it the current time as the
+last time the clock was calibrated.
+Five days later, the clock has gained 10 seconds, so you issue a
+.B \%hwclock\ \-\-set\ \-\-update\-drift
+command to set it back 10 seconds.
+.B \%hwclock
+updates the adjtime file to show the current time as the last time the
+clock was calibrated, and records 2 seconds per day as the systematic
+drift rate.  24 hours go by, and then you issue a
+.B \%hwclock\ \-\-adjust
+command.
+.B \%hwclock
+consults the adjtime file and sees that the clock gains 2 seconds per
+day when left alone and that it has been left alone for exactly one
+day.  So it subtracts 2 seconds from the Hardware Clock.  It then
+records the current time as the last time the clock was adjusted.
+Another 24 hours go by and you issue another
+.BR \%hwclock\ \-\-adjust .
+.B \%hwclock
+does the same thing: subtracts 2 seconds and updates the adjtime file
+with the current time as the last time the clock was adjusted.
+.PP
+When you use the
+.BR \%\-\-update\-drift " option with " \-\-set " or " \%\-\-systohc ,
+the systematic drift rate is (re)calculated by comparing the fully drift
+corrected current Hardware Clock time with the new set time, from that
+it derives the 24 hour drift rate based on the last calibrated timestamp
+from the adjtime file.  This updated drift factor is then saved in
+.IR /etc/adjtime .
+.PP
+A small amount of error creeps in when
+the Hardware Clock is set, so
+.B \%\-\-adjust
+refrains from making any adjustment that is less
+than 1 second.  Later on, when you request an adjustment again, the accumulated
+drift will be more than 1 second and
+.B \%\-\-adjust
+will make the adjustment including any fractional amount.
+.PP
+.B \%hwclock\ \-\-hctosys
+also uses the adjtime file data to compensate the value read from the Hardware
+Clock before using it to set the System Clock.  It does not share the 1 second
+limitation of
+.BR \%\-\-adjust ,
+and will correct sub-second drift values immediately.  It does not
+change the Hardware Clock time nor the adjtime file.  This may eliminate
+the need to use
+.BR \%\-\-adjust ,
+unless something else on the system needs the Hardware Clock to be
+compensated.
+.
+.SS The Adjtime File
+While named for its historical purpose of controlling adjustments only,
+it actually contains other information used by
+.B hwclock
+from one invocation to the next.
+.PP
+The format of the adjtime file is, in ASCII:
+.PP
+Line 1: Three numbers, separated by blanks: 1) the systematic drift rate
+in seconds per day, floating point decimal; 2) the resulting number of
+seconds since 1969 UTC of most recent adjustment or calibration,
+decimal integer; 3) zero (for compatibility with
+.BR \%clock (8))
+as a decimal integer.
+.PP
+Line 2: One number: the resulting number of seconds since 1969 UTC of most
+recent calibration.  Zero if there has been no calibration yet or it
+is known that any previous calibration is moot (for example, because
+the Hardware Clock has been found, since that calibration, not to
+contain a valid time).  This is a decimal integer.
+.PP
+Line 3: "UTC" or "LOCAL".  Tells whether the Hardware Clock is set to
+Coordinated Universal Time or local time.  You can always override this
+value with options on the
+.B \%hwclock
+command line.
+.PP
+You can use an adjtime file that was previously used with the
+.BR \%clock "(8) program with " \%hwclock .
+.
+.SS Automatic Hardware Clock Synchronization by the Kernel
+.PP
+You should be aware of another way that the Hardware Clock is kept
+synchronized in some systems.  The Linux kernel has a mode wherein it
+copies the System Time to the Hardware Clock every 11 minutes. This mode
+is a compile time option, so not all kernels will have this capability.
+This is a good mode to use when you are using something sophisticated
+like NTP to keep your System Clock synchronized. (NTP is a way to keep
+your System Time synchronized either to a time server somewhere on the
+network or to a radio clock hooked up to your system.  See RFC 1305.)
+.PP
+If the kernel is compiled with the \%'11\ minute\ mode' option it will
+be active when the kernel's clock discipline is in a synchronized state.
+When in this state, bit 6 (the bit that is set in the mask 0x0040)
+of the kernel's
+.I \%time_status
+variable is unset. This value is output as the 'status' line of the
+.BR \%adjtimex\ --print " or " \%ntptime " commands."
+.PP
+It takes an outside influence, like the NTP daemon
+to put the kernel's clock discipline into a synchronized state, and
+therefore turn on \%'11\ minute\ mode'.
+It can be turned off by running anything that sets the System Clock the old
+fashioned way, including
+.BR "\%hwclock\ \-\-hctosys" .
+However, if the NTP daemon is still running, it will turn \%'11\ minute\ mode'
+back on again the next time it synchronizes the System Clock.
+.PP
+If your system runs with \%'11\ minute\ mode' on, it may need to use either
+.BR \%\-\-hctosys " or " \%\-\-systz
+in a startup script, especially if the Hardware Clock is configured to use
+the local timescale. Unless the kernel is informed of what timescale the
+Hardware Clock is using, it may clobber it with the wrong one. The kernel
+uses UTC by default.
+.PP
+The first userspace command to set the System Clock informs the
+kernel what timescale the Hardware Clock is using.  This happens via the
+.I \%persistent_clock_is_local
+kernel variable.  If
+.BR \%\-\-hctosys " or " \%\-\-systz
+is the first, it will set this variable according to the adjtime file or the
+appropriate command-line argument.  Note that when using this capability and the
+Hardware Clock timescale configuration is changed, then a reboot is required to
+notify the kernel.
+.PP
+.B \%hwclock\ \-\-adjust
+should not be used with NTP \%'11\ minute\ mode'.
+.
+.SS ISA Hardware Clock Century value
+.PP
+There is some sort of standard that defines CMOS memory Byte 50 on an ISA
+machine as an indicator of what century it is.
+.B \%hwclock
+does not use or set that byte because there are some machines that
+don't define the byte that way, and it really isn't necessary anyway,
+since the year-of-century does a good job of implying which century it
+is.
+.PP
+If you have a bona fide use for a CMOS century byte, contact the
+.B \%hwclock
+maintainer; an option may be appropriate.
+.PP
+Note that this section is only relevant when you are using the "direct
+ISA" method of accessing the Hardware Clock.
+ACPI provides a standard way to access century values, when they
+are supported by the hardware.
+.
+.SH DATE-TIME CONFIGURATION
+.in +4
+.SS Keeping Time without External Synchronization
+.in
+.PP
+This discussion is based on the following conditions:
+.IP \(bu 2
+Nothing is running that alters the date-time clocks, such as NTP daemon or a cron job."
+.IP \(bu 2
+The system timezone is configured for the correct local time.  See below, under
+.BR "POSIX vs 'RIGHT'" .
+.IP \(bu 2
+Early during startup the following are called, in this order:
+.br
+.BI \%adjtimex\ \-\-tick \ value\  \-\-frequency \ value
+.br
+.B \%hwclock\ \-\-hctosys
+.IP \(bu 2
+During shutdown the following is called:
+.br
+.B \%hwclock\ \-\-systohc
+.PP
+.in +4
+.BR * " Systems without " adjtimex " may use " ntptime .
+.in
+.PP
+Whether maintaining precision time with NTP daemon
+or not, it makes sense to configure the system to keep reasonably good
+date-time on its own.
+.PP
+The first step in making that happen is having a clear understanding of
+the big picture.  There are two completely separate hardware devices
+running at their own speed and drifting away from the 'correct' time at
+their own rates.  The methods and software for drift correction are
+different for each of them.  However, most systems are configured to
+exchange values between these two clocks at startup and shutdown.  Now
+the individual device's time keeping errors are transferred back and
+forth between each other.  Attempt to configure drift correction for only
+one of them, and the other's drift will be overlaid upon it.
+.PP
+This problem can be avoided when configuring drift correction for the
+System Clock by simply not shutting down the machine.  This, plus the
+fact that all of
+.BR \%hwclock 's
+precision (including calculating drift factors) depends upon the System
+Clock's rate being correct, means that configuration of the System Clock
+should be done first.
+.PP
+The System Clock drift is corrected with the
+.BR \%adjtimex "(8) command's " \-\-tick " and " \%\-\-frequency
+options.  These two work together: tick is the coarse adjustment and
+frequency is the fine adjustment.  (For systems that do not have an
+.BR \%adjtimex " package,"
+.BI \%ntptime\ \-f\  ppm
+may be used instead.)
+.PP
+Some Linux distributions attempt to automatically calculate the System
+Clock drift with
+.BR \%adjtimex 's
+compare operation.  Trying to correct one
+drifting clock by using another drifting clock as a reference is akin to
+a dog trying to catch its own tail.  Success may happen eventually, but
+great effort and frustration will likely precede it.  This automation may
+yield an improvement over no configuration, but expecting optimum
+results would be in error.  A better choice for manual configuration
+would be
+.BR \%adjtimex 's " \-\-log " options.
+.PP
+It may be more effective to simply track the System Clock drift with
+.BR \%sntp ", or " \%date\ \-Ins
+and a precision timepiece, and then calculate the correction manually.
+.PP
+After setting the tick and frequency values, continue to test and refine the
+adjustments until the System Clock keeps good time.  See
+.BR \%adjtimex (8)
+for more information and the example demonstrating manual drift
+calculations.
+.PP
+Once the System Clock is ticking smoothly, move on to the Hardware Clock.
+.PP
+As a rule, cold drift will work best for most use cases.  This should be
+true even for 24/7 machines whose normal downtime consists of a reboot.
+In that case the drift factor value makes little difference.  But on the
+rare occasion that the machine is shut down for an extended period, then
+cold drift should yield better results.
+.PP
+.B Steps to calculate cold drift:
+.IP 1 2
+.B "Ensure that NTP daemon will not be launched at startup."
+.IP 2 2
+.RI The " System Clock " "time must be correct at shutdown!"
+.IP 3 2
+Shut down the system.
+.IP 4 2
+Let an extended period pass without changing the Hardware Clock.
+.IP 5 2
+Start the system.
+.IP 6 2
+.RB "Immediately use " hwclock " to set the correct time, adding the"
+.BR \%\-\-update\-drift " option."
+.PP
+Note: if step 6 uses
+.BR \%\-\-systohc ,
+then the System Clock must be set correctly (step 6a) just before doing so.
+.PP
+.RB "Having " hwclock
+calculate the drift factor is a good starting point, but for optimal
+results it will likely need to be adjusted by directly editing the
+.I /etc/adjtime
+file.  Continue to test and refine the drift factor until the Hardware
+Clock is corrected properly at startup.  To check this, first make sure
+that the System Time is correct before shutdown and then use
+.BR \%sntp ", or " \%date\ \-Ins
+and a precision timepiece, immediately after startup.
+.SS LOCAL vs UTC
+Keeping the Hardware Clock in a local timescale causes inconsistent
+daylight saving time results:
+.IP \(bu 2
+If Linux is running during a daylight saving time change, the time
+written to the Hardware Clock will be adjusted for the change.
+.IP \(bu 2
+If Linux is NOT running during a daylight saving time change, the time
+read from the Hardware Clock will NOT be adjusted for the change.
+.PP
+The Hardware Clock on an ISA compatible system keeps only a date and time,
+it has no concept of timezone nor daylight saving. Therefore, when
+.B hwclock
+is told that it is in local time, it assumes it is in the 'correct'
+local time and makes no adjustments to the time read from it.
+.PP
+Linux handles daylight saving time changes transparently only when the
+Hardware Clock is kept in the UTC timescale. Doing so is made easy for
+system administrators as
+.B \%hwclock
+uses local time for its output and as the argument to the
+.BR \%\-\-date " option."
+.PP
+POSIX systems, like Linux, are designed to have the System Clock operate
+in the UTC timescale. The Hardware Clock's purpose is to initialize the
+System Clock, so also keeping it in UTC makes sense.
+.PP
+Linux does, however, attempt to accommodate the Hardware Clock being in
+the local timescale. This is primarily for dual-booting with older
+versions of MS Windows. From Windows 7 on, the RealTimeIsUniversal
+registry key is supposed to be working properly so that its Hardware
+Clock can be kept in UTC.
+.
+.SS POSIX vs 'RIGHT'
+A discussion on date-time configuration would be incomplete without
+addressing timezones, this is mostly well covered by
+.BR tzset (3).
+One area that seems to have no documentation is the 'right'
+directory of the Time Zone Database, sometimes called tz or zoneinfo.
+.PP
+There are two separate databases in the zoneinfo system, posix
+and 'right'. 'Right' (now named zoneinfo\-leaps) includes leap seconds and posix
+does not. To use the 'right' database the System Clock must be set to
+\%(UTC\ +\ leap seconds), which is equivalent to \%(TAI\ \-\ 10). This
+allows calculating the
+exact number of seconds between two dates that cross a leap second
+epoch. The System Clock is then converted to the correct civil time,
+including UTC, by using the 'right' timezone files which subtract the
+leap seconds. Note: this configuration is considered experimental and is
+known to have issues.
+.PP
+To configure a system to use a particular database all of the files
+located in its directory must be copied to the root of
+.IR \%/usr/share/zoneinfo .
+Files are never used directly from the posix or 'right' subdirectories, e.g.,
+.RI \%TZ=' right/Europe/Dublin '.
+This habit was becoming so common that the upstream zoneinfo project
+restructured the system's file tree by moving the posix and 'right'
+subdirectories out of the zoneinfo directory and into sibling directories:
+.PP
+.in +2
+.I /usr/share/zoneinfo
+.br
+.I /usr/share/zoneinfo\-posix
+.br
+.I /usr/share/zoneinfo\-leaps
+.PP
+Unfortunately, some Linux distributions are changing it back to the old
+tree structure in their packages. So the problem of system
+administrators reaching into the 'right' subdirectory persists. This
+causes the system timezone to be configured to include leap seconds
+while the zoneinfo database is still configured to exclude them. Then
+when an application such as a World Clock needs the South_Pole timezone
+file; or an email MTA, or
+.B hwclock
+needs the UTC timezone file; they fetch it from the root of
+.I \%/usr/share/zoneinfo
+, because that is what they are supposed to do. Those files exclude leap
+seconds, but the System Clock now includes them, causing an incorrect
+time conversion.
+.PP
+Attempting to mix and match files from these separate databases will not
+work, because they each require the System Clock to use a different
+timescale. The zoneinfo database must be configured to use either posix
+or 'right', as described above, or by assigning a database path to the
+.SB TZDIR
+environment variable.
+.SH EXIT STATUS
+One of the following exit values will be returned:
+.TP
+.BR EXIT_SUCCESS " ('0' on POSIX systems)"
+Successful program execution.
+.TP
+.BR EXIT_FAILURE " ('1' on POSIX systems)"
+The operation failed or the command syntax was not valid.
+.SH ENVIRONMENT
+.TP
+.B TZ
+If this variable is set its value takes precedence over the system
+configured timezone.
+.TP
+.B TZDIR
+If this variable is set its value takes precedence over the system
+configured timezone database directory path.
+.SH FILES
+.TP
+.I /etc/adjtime
+The configuration and state file for hwclock.
+.TP
+.I /etc/localtime
+The system timezone file.
+.TP
+.I /usr/share/zoneinfo/
+The system timezone database directory.
+.PP
+Device files
+.B hwclock
+may try for Hardware Clock access:
+.br
+.I /dev/rtc0
+.br
+.I /dev/rtc
+.br
+.I /dev/misc/rtc
+.br
+.I /dev/efirtc
+.br
+.I /dev/misc/efirtc
+.SH "SEE ALSO"
+.BR date (1),
+.BR adjtimex (8),
+.BR gettimeofday (2),
+.BR settimeofday (2),
+.BR crontab (1),
+.BR tzset (3)
+.
+.SH AUTHORS
+Written by Bryan Henderson, September 1996 (bryanh@giraffe-data.com),
+based on work done on the
+.BR \%clock (8)
+program by Charles Hedrick, Rob Hooft, and Harald Koenig.
+See the source code for complete history and credits.
+.
+.SH AVAILABILITY
+The hwclock command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/hwclock.8.in b/sys-utils/hwclock.8.in
new file mode 100644
index 0000000..dacdd27
--- /dev/null
+++ b/sys-utils/hwclock.8.in
@@ -0,0 +1,998 @@
+.\" hwclock.8.in -- man page for util-linux' hwclock
+.\"
+.\" 2015-01-07 J William Piggott
+.\"   Authored new section: DATE-TIME CONFIGURATION.
+.\"   Subsections: Keeping Time..., LOCAL vs UTC, POSIX vs 'RIGHT'.
+.\"
+.TH HWCLOCK 8 "July 2017" "util-linux" "System Administration"
+.SH NAME
+hwclock \- time clocks utility
+.SH SYNOPSIS
+.B hwclock
+.RI [ function ]
+.RI [ option ...]
+.
+.SH DESCRIPTION
+.B hwclock
+is an administration tool for the time clocks.  It can: display the
+Hardware Clock time; set the Hardware Clock to a specified time; set the
+Hardware Clock from the System Clock; set the System Clock from the
+Hardware Clock; compensate for Hardware Clock drift; correct the System
+Clock timescale; set the kernel's timezone, NTP timescale, and epoch
+(Alpha only); and predict future
+Hardware Clock values based on its drift rate.
+.PP
+Since v2.26 important changes were made to the
+.B \-\-hctosys
+function and the
+.B \-\-directisa
+option, and a new option
+.B \-\-update\-drift
+was added.  See their respective descriptions below.
+.
+.SH FUNCTIONS
+The following functions are mutually exclusive, only one can be given at
+a time.  If none is given, the default is \fB\-\-show\fR.
+.TP
+.B \-a, \-\-adjust
+Add or subtract time from the Hardware Clock to account for systematic
+drift since the last time the clock was set or adjusted.  See the
+discussion below, under
+.BR "The Adjust Function" .
+.
+.TP
+.B \-\-getepoch
+.TQ
+.B \-\-setepoch
+These functions are for Alpha machines only, and are only available
+through the Linux kernel RTC driver.
+.sp
+They are used to read and set the kernel's Hardware Clock epoch value.
+Epoch is the number of years into AD to which a zero year value in the
+Hardware Clock refers.  For example, if the machine's BIOS sets the year
+counter in the Hardware Clock to contain the number of full years since
+1952, then the kernel's Hardware Clock epoch value must be 1952.
+.sp
+The \fB\%\-\-setepoch\fR function requires using the
+.B \%\-\-epoch
+option to specify the year.  For example:
+.RS
+.IP "" 4
+.B hwclock\ \-\-setepoch\ \-\-epoch=1952
+.PP
+The RTC driver attempts to guess the correct epoch value, so setting it
+may not be required.
+.PP
+This epoch value is used whenever
+.B \%hwclock
+reads or sets the Hardware Clock on an Alpha machine.  For ISA machines
+the kernel uses the fixed Hardware Clock epoch of 1900.
+.RE
+.
+.TP
+.B \-\-predict
+Predict what the Hardware Clock will read in the future based upon the
+time given by the
+.B \-\-date
+option and the information in
+.IR @ADJTIME_PATH@ .
+This is useful, for example, to account for drift when setting a
+Hardware Clock wakeup (aka alarm). See
+.BR \%rtcwake (8).
+.sp
+Do not use this function if the Hardware Clock is being modified by
+anything other than the current operating system's
+.B \%hwclock
+command, such as \%'11\ minute\ mode' or from dual-booting another OS.
+.
+.TP
+.BR \-r , \ \-\-show
+.TQ
+.B \-\-get
+.br
+Read the Hardware Clock and print its time to standard output in the
+.B ISO 8601
+format.
+The time shown is always in local time, even if you keep your Hardware Clock
+in UTC.  See the
+.B \%\-\-localtime
+option.
+.sp
+Showing the Hardware Clock time is the default when no function is specified.
+.sp
+The
+.B \-\-get
+function also applies drift correction to the time read, based upon the
+information in
+.IR @ADJTIME_PATH@ .
+Do not use this function if the Hardware Clock is being modified by
+anything other than the current operating system's
+.B \%hwclock
+command, such as \%'11\ minute\ mode' or from dual-booting another OS.
+.
+.TP
+.BR \-s , \ \-\-hctosys
+Set the System Clock from the Hardware Clock.  The time read from the Hardware
+Clock is compensated to account for systematic drift before using it to set the
+System Clock.  See the discussion below, under
+.BR "The Adjust Function" .
+.sp
+The System Clock must be kept in the UTC timescale for date-time
+applications to work correctly in conjunction with the timezone configured
+for the system.  If the Hardware Clock is kept in local time then the time read
+from it must be shifted to the UTC timescale before using it to set the System
+Clock.  The
+.B \%\-\-hctosys
+function does this based upon the information in the
+.I @ADJTIME_PATH@
+file or the command line arguments
+.BR \%\-\-localtime " and " \-\-utc .
+Note: no daylight saving adjustment is made.  See the discussion below, under
+.BR "LOCAL vs UTC" .
+.sp
+The kernel also keeps a timezone value, the
+.B \%\-\-hctosys
+function sets it to the timezone configured for the system.  The system
+timezone is configured by the TZ environment variable or the
+.I \%/etc/localtime
+file, as
+.BR \%tzset (3)
+would interpret them.
+The obsolete tz_dsttime field of the kernel's timezone value is set
+to zero.  (For details on what this field used to mean, see
+.BR \%settimeofday (2).)
+.sp
+When used in a startup script, making the
+.B \%\-\-hctosys
+function the first caller of
+.BR \%settimeofday (2)
+from boot, it will set the NTP \%'11\ minute\ mode' timescale via the
+.I \%persistent_clock_is_local
+kernel variable.  If the Hardware Clock's timescale configuration is
+changed then a reboot is required to inform the kernel.  See the
+discussion below, under
+.BR "Automatic Hardware Clock Synchronization by the Kernel" .
+.sp
+This is a good function to use in one of the system startup scripts before the
+file systems are mounted read/write.
+.sp
+This function should never be used on a running system. Jumping system time
+will cause problems, such as corrupted filesystem timestamps.  Also, if
+something has changed the Hardware Clock, like NTP's \%'11\ minute\ mode', then
+.B \%\-\-hctosys
+will set the time incorrectly by including drift compensation.
+.sp
+Drift compensation can be inhibited by setting the drift factor in
+.I @ADJTIME_PATH@
+to zero.  This setting will be persistent as long as the
+.BR \%\-\-update\-drift " option is not used with " \%\-\-systohc
+at shutdown (or anywhere else).  Another way to inhibit this is by using the
+.BR \%\-\-noadjfile " option when calling the " \%\-\-hctosys
+function.  A third method is to delete the
+.IR @ADJTIME_PATH@ " file."
+.B Hwclock
+will then default to using the UTC timescale for the Hardware Clock.  If
+the Hardware Clock is ticking local time it will need to be defined in
+the file.  This can be done by calling
+.BR hwclock\ \-\-localtime\ \-\-adjust ;
+when the file is not present this command will not actually
+adjust the Clock, but it will create the file with local time
+configured, and a drift factor of zero.
+.sp
+A condition under which inhibiting
+.BR hwclock 's
+drift correction may be desired is when dual-booting multiple operating
+systems.  If while this instance of Linux is stopped, another OS changes
+the Hardware Clock's value, then when this instance is started again the
+drift correction applied will be incorrect.
+.sp
+.RB "For " hwclock 's
+drift correction to work properly it is imperative that nothing changes
+the Hardware Clock while its Linux instance is not running.
+.
+.TP
+.B \-\-set
+Set the Hardware Clock to the time given by the
+.B \-\-date
+option, and update the timestamps in
+.IR @ADJTIME_PATH@ .
+With the
+.B \%\-\-update-drift
+option also (re)calculate the drift factor.  Try it without the option if
+.BR \%\-\-set " fails.  See " \%\-\-update-drift " below."
+.
+.TP
+.B \-\-systz
+This is an alternate to the
+.B \%\-\-hctosys
+function that does not read the Hardware Clock nor set the System Clock;
+consequently there is not any drift correction.  It is intended to be
+used in a startup script on systems with kernels above version 2.6 where
+you know the System Clock has been set from the Hardware Clock by the
+kernel during boot.
+.sp
+It does the following things that are detailed above in the
+.BR \%\-\-hctosys " function:"
+.RS
+.IP \(bu 2
+Corrects the System Clock timescale to UTC as needed.  Only instead of
+accomplishing this by setting the System Clock,
+.B hwclock
+simply informs the kernel and it handles the change.
+.IP \(bu 2
+Sets the kernel's NTP \%'11\ minute\ mode' timescale.
+.IP \(bu 2
+Sets the kernel's timezone.
+.PP
+The first two are only available on the first call of
+.BR \%settimeofday (2)
+after boot.  Consequently this option only makes sense when used in a
+startup script.  If the Hardware Clocks timescale configuration is
+changed then a reboot would be required to inform the kernel.
+.RE
+.
+.TP
+.BR \-w , \ \-\-systohc
+Set the Hardware Clock from the System Clock, and update the timestamps in
+.IR @ADJTIME_PATH@ .
+With the
+.B \%\-\-update-drift
+option also (re)calculate the drift factor.  Try it without the option if
+.BR \%\-\-systohc " fails.  See " \%\-\-update-drift " below."
+.
+.TP
+.BR \-V , \ \-\-version
+Display version information and exit.
+.
+.TP
+.BR \-h , \ \-\-help
+Display help text and exit.
+.
+.SH OPTIONS
+.
+.TP
+.BI \-\-adjfile= filename
+.RI "Override the default " @ADJTIME_PATH@ " file path."
+.
+.TP
+.BI \%\-\-date= date_string
+This option must be used with the
+.B \-\-set
+or
+.B \%\-\-predict
+functions, otherwise it is ignored.
+.RS
+.IP "" 4
+.B "hwclock\ \-\-set\ \-\-date='16:45'"
+.IP "" 4
+.B "hwclock\ \-\-predict\ \-\-date='2525-08-14\ 07:11:05'"
+.PP
+The argument must be in local time, even if you keep your Hardware Clock in
+UTC.  See the
+.B \%\-\-localtime
+option.  Therefore, the argument should not include any timezone information.
+It also should not be a relative time like "+5 minutes", because
+.BR \%hwclock 's
+precision depends upon correlation between the argument's value and when the
+enter key is pressed.  Fractional seconds are silently dropped.  This option is
+capable of understanding many time and date formats, but the previous
+parameters should be observed.
+.RE
+.
+.TP
+.BI \%\-\-delay= seconds
+This option allows to overwrite internally used delay when set clock time. The
+default is 0.5 (500ms) for rtc_cmos, for another RTC types the delay is 0. If
+RTC type is impossible to determine (from sysfs) then it defaults also to 0.5
+to be backwardly compatible.
+.RS
+.PP
+The 500ms default is based on commonly used MC146818A-compatible (x86) hardware clock. This
+Hardware Clock can only be set to any integer time plus one half second.  The
+integer time is required because there is no interface to set or get a
+fractional second.  The additional half second delay is because the Hardware
+Clock updates to the following second precisely 500 ms after setting the new
+time. Unfortunately, this behavior is hardware specific and in same cases
+another delay is required.
+.RE
+.
+.TP
+.TP
+.BR \-D ", " \-\-debug
+.RB Use\  \-\-verbose .
+.RB The\  \%\-\-debug\  option
+has been deprecated and may be repurposed or removed in a future release.
+.
+.TP
+.B \-\-directisa
+This option is meaningful for ISA compatible machines in the x86 and
+x86_64 family.  For other machines, it has no effect.  This option tells
+.B \%hwclock
+to use explicit I/O instructions to access the Hardware Clock.
+Without this option,
+.B \%hwclock
+will use the rtc device file, which it assumes to be driven by the Linux
+RTC device driver.  As of v2.26 it will no longer automatically use
+directisa when the rtc driver is unavailable; this was causing an unsafe
+condition that could allow two processes to access the Hardware Clock at
+the same time.  Direct hardware access from userspace should only be
+used for testing, troubleshooting, and as a last resort when all other
+methods fail.  See the
+.BR \-\-rtc " option."
+.
+.TP
+.BI \-\-epoch= year
+This option is required when using the
+.BR \%\-\-setepoch \ function.
+.RI "The minimum " year
+value is 1900. The maximum is system dependent
+.RB ( ULONG_MAX\ -\ 1 ).
+.
+.TP
+.BR \-f , \ \-\-rtc=\fIfilename\fR
+.RB "Override " \%hwclock 's
+default rtc device file name.  Otherwise it will
+use the first one found in this order:
+.in +4
+.br
+.I /dev/rtc0
+.br
+.I /dev/rtc
+.br
+.I /dev/misc/rtc
+.br
+.in
+.RB "For " IA-64:
+.in +4
+.br
+.I /dev/efirtc
+.br
+.I /dev/misc/efirtc
+.in
+.
+.TP
+.BR \-l , \ \-\-localtime
+.TQ
+.BR \-u ", " \-\-utc
+Indicate which timescale the Hardware Clock is set to.
+.sp
+The Hardware Clock may be configured to use either the UTC or the local
+timescale, but nothing in the clock itself says which alternative is
+being used.  The
+.BR \%\-\-localtime " or " \-\-utc
+options give this information to the
+.B \%hwclock
+command.  If you specify the wrong one (or specify neither and take a
+wrong default), both setting and reading the Hardware Clock will be
+incorrect.
+.sp
+If you specify neither
+.BR \-\-utc " nor " \%\-\-localtime
+then the one last given with a set function
+.RB ( \-\-set ", " \%\-\-systohc ", or " \%\-\-adjust ),
+as recorded in
+.IR @ADJTIME_PATH@ ,
+will be used.  If the adjtime file doesn't exist, the default is UTC.
+.sp
+Note: daylight saving time changes may be inconsistent when the
+Hardware Clock is kept in local time.  See the discussion below, under
+.BR "LOCAL vs UTC" .
+.
+.TP
+.B \-\-noadjfile
+Disable the facilities provided by
+.IR @ADJTIME_PATH@ .
+.B \%hwclock
+will not read nor write to that file with this option.  Either
+.BR \-\-utc " or " \%\-\-localtime
+must be specified when using this option.
+.
+.TP
+.B \-\-test
+Do not actually change anything on the system, that is, the Clocks or
+.I @ADJTIME_PATH@
+.RB ( \%\-\-verbose
+is implicit with this option).
+.
+.TP
+.B \-\-update\-drift
+Update the Hardware Clock's drift factor in
+.IR @ADJTIME_PATH@ .
+It can only be used with
+.BR \-\-set " or " \%\-\-systohc ,
+.sp
+A minimum four hour period between settings is required.  This is to
+avoid invalid calculations.  The longer the period, the more precise the
+resulting drift factor will be.
+.sp
+This option was added in v2.26, because
+it is typical for systems to call
+.B \%hwclock\ \-\-systohc
+at shutdown; with the old behaviour this would automatically
+(re)calculate the drift factor which caused several problems:
+.RS
+.IP \(bu 2
+When using NTP with an \%'11\ minute\ mode' kernel the drift factor
+would be clobbered to near zero.
+.IP \(bu 2
+It would not allow the use of 'cold' drift correction.  With most
+configurations using 'cold' drift will yield favorable results.  Cold,
+means when the machine is turned off which can have a significant impact
+on the drift factor.
+.IP \(bu 2
+(Re)calculating drift factor on every shutdown delivers suboptimal
+results.  For example, if ephemeral conditions cause the machine to be
+abnormally hot the drift factor calculation would be out of range.
+.IP \(bu 2
+Significantly increased system shutdown times (as of v2.31 when not
+using
+.B \%\-\-update\-drift
+the RTC is not read).
+.PP
+.RB "Having " \%hwclock
+calculate the drift factor is a good starting point, but for optimal
+results it will likely need to be adjusted by directly editing the
+.I @ADJTIME_PATH@
+file.  For most configurations once a machine's optimal drift factor is
+crafted it should not need to be changed.  Therefore, the old behavior to
+automatically (re)calculate drift was changed and now requires this
+option to be used.  See the discussion below, under
+.BR "The Adjust Function" .
+.PP
+This option requires reading the Hardware Clock before setting it.  If
+it cannot be read, then this option will cause the set functions to fail.
+This can happen, for example, if the Hardware Clock is corrupted by a
+power failure.  In that case, the clock must first be set without this
+option.  Despite it not working, the resulting drift correction factor
+would be invalid anyway.
+.RE
+.
+.TP
+.BR \-v ", " \-\-verbose
+Display more details about what
+.B \%hwclock
+is doing internally.
+.
+.SH NOTES
+.
+.SS Clocks in a Linux System
+.PP
+There are two types of date-time clocks:
+.PP
+.B The Hardware Clock:
+This clock is an independent hardware device, with its own power domain
+(battery, capacitor, etc), that operates when the machine is powered off,
+or even unplugged.
+.PP
+On an ISA compatible system, this clock is specified as part of the ISA
+standard.  A control program can read or set this clock only to a whole
+second, but it can also detect the edges of the 1 second clock ticks, so
+the clock actually has virtually infinite precision.
+.PP
+This clock is commonly called the hardware clock, the real time clock,
+the RTC, the BIOS clock, and the CMOS clock.  Hardware Clock, in its
+capitalized form, was coined for use by
+.BR \%hwclock .
+The Linux kernel also refers to it as the persistent clock.
+.PP
+Some non-ISA systems have a few real time clocks with
+only one of them having its own power domain.
+A very low power external I2C or SPI clock chip might be used with a
+backup battery as the hardware clock to initialize a more functional
+integrated real-time clock which is used for most other purposes.
+.PP
+.B The System Clock:
+This clock is part of the Linux kernel and is driven by
+a timer interrupt.  (On an ISA machine, the timer interrupt is part of
+the ISA standard.)  It has meaning only while Linux is running on the
+machine.  The System Time is the number of seconds since 00:00:00
+January 1, 1970 UTC (or more succinctly, the number of seconds since
+1969 UTC).  The System Time is not an integer, though.  It has virtually
+infinite precision.
+.PP
+The System Time is the time that matters.  The Hardware Clock's basic
+purpose is to keep time when Linux is not running so that the System
+Clock can be initialized from it at boot.  Note that in DOS, for which
+ISA was designed, the Hardware Clock is the only real time clock.
+.PP
+It is important that the System Time not have any discontinuities such as
+would happen if you used the
+.BR \%date (1)
+program to set it while the system is running.  You can, however, do whatever
+you want to the Hardware Clock while the system is running, and the next
+time Linux starts up, it will do so with the adjusted time from the Hardware
+Clock.  Note: currently this is not possible on most systems because
+.B \%hwclock\ \-\-systohc
+is called at shutdown.
+.PP
+The Linux kernel's timezone is set by
+.BR hwclock .
+But don't be misled -- almost nobody cares what timezone the kernel
+thinks it is in.  Instead, programs that care about the timezone
+(perhaps because they want to display a local time for you) almost
+always use a more traditional method of determining the timezone: They
+use the TZ environment variable or the
+.I \%/etc/localtime
+file, as explained in the man page for
+.BR \%tzset (3).
+However, some programs and fringe parts of the Linux kernel such as filesystems
+use the kernel's timezone value.  An example is the vfat filesystem.  If the
+kernel timezone value is wrong, the vfat filesystem will report and set the
+wrong timestamps on files.  Another example is the kernel's NTP \%'11\ minute\ mode'.
+If the kernel's timezone value and/or the
+.I \%persistent_clock_is_local
+variable are wrong, then the Hardware Clock will be set incorrectly
+by \%'11\ minute\ mode'.  See the discussion below, under
+.BR "Automatic Hardware Clock Synchronization by the Kernel" .
+.PP
+.B \%hwclock
+sets the kernel's timezone to the value indicated by TZ or
+.IR \%/etc/localtime " with the"
+.BR \%\-\-hctosys " or " \%\-\-systz " functions."
+.PP
+The kernel's timezone value actually consists of two parts: 1) a field
+tz_minuteswest indicating how many minutes local time (not adjusted
+for DST) lags behind UTC, and 2) a field tz_dsttime indicating
+the type of Daylight Savings Time (DST) convention that is in effect
+in the locality at the present time.
+This second field is not used under Linux and is always zero.
+See also
+.BR \%settimeofday (2).
+.
+.SS Hardware Clock Access Methods
+.PP
+.B \%hwclock
+uses many different ways to get and set Hardware Clock values.  The most
+normal way is to do I/O to the rtc device special file, which is
+presumed to be driven by the rtc device driver.  Also, Linux systems
+using the rtc framework with udev, are capable of supporting multiple
+Hardware Clocks.  This may bring about the need to override the default
+rtc device by specifying one with the
+.BR \-\-rtc " option."
+.PP
+However, this method is not always available as older systems do not
+have an rtc driver.  On these systems, the method of accessing the
+Hardware Clock depends on the system hardware.
+.PP
+On an ISA compatible system,
+.B \%hwclock
+can directly access the "CMOS memory" registers that
+constitute the clock, by doing I/O to Ports 0x70 and 0x71.  It does
+this with actual I/O instructions and consequently can only do it if
+running with superuser effective userid.  This method may be used by
+specifying the
+.BR \%\-\-directisa " option."
+.PP
+This is a really poor method of accessing the clock, for all the
+reasons that userspace programs are generally not supposed to do
+direct I/O and disable interrupts.
+.B \%hwclock
+provides it for testing, troubleshooting, and  because it may be the
+only method available on ISA systems which do not have a working rtc
+device driver.
+.SS The Adjust Function
+.PP
+The Hardware Clock is usually not very accurate.  However, much of its
+inaccuracy is completely predictable - it gains or loses the same amount
+of time every day.  This is called systematic drift.
+.BR \%hwclock "'s " \%\-\-adjust
+function lets you apply systematic drift corrections to the
+Hardware Clock.
+.PP
+It works like this:
+.BR \%hwclock " keeps a file,"
+.IR @ADJTIME_PATH@ ,
+that keeps some historical information.  This is called the adjtime file.
+.PP
+Suppose you start with no adjtime file.  You issue a
+.B \%hwclock\ \-\-set
+command to set the Hardware Clock to the true current time.
+.B \%hwclock
+creates the adjtime file and records in it the current time as the
+last time the clock was calibrated.
+Five days later, the clock has gained 10 seconds, so you issue a
+.B \%hwclock\ \-\-set\ \-\-update\-drift
+command to set it back 10 seconds.
+.B \%hwclock
+updates the adjtime file to show the current time as the last time the
+clock was calibrated, and records 2 seconds per day as the systematic
+drift rate.  24 hours go by, and then you issue a
+.B \%hwclock\ \-\-adjust
+command.
+.B \%hwclock
+consults the adjtime file and sees that the clock gains 2 seconds per
+day when left alone and that it has been left alone for exactly one
+day.  So it subtracts 2 seconds from the Hardware Clock.  It then
+records the current time as the last time the clock was adjusted.
+Another 24 hours go by and you issue another
+.BR \%hwclock\ \-\-adjust .
+.B \%hwclock
+does the same thing: subtracts 2 seconds and updates the adjtime file
+with the current time as the last time the clock was adjusted.
+.PP
+When you use the
+.BR \%\-\-update\-drift " option with " \-\-set " or " \%\-\-systohc ,
+the systematic drift rate is (re)calculated by comparing the fully drift
+corrected current Hardware Clock time with the new set time, from that
+it derives the 24 hour drift rate based on the last calibrated timestamp
+from the adjtime file.  This updated drift factor is then saved in
+.IR @ADJTIME_PATH@ .
+.PP
+A small amount of error creeps in when
+the Hardware Clock is set, so
+.B \%\-\-adjust
+refrains from making any adjustment that is less
+than 1 second.  Later on, when you request an adjustment again, the accumulated
+drift will be more than 1 second and
+.B \%\-\-adjust
+will make the adjustment including any fractional amount.
+.PP
+.B \%hwclock\ \-\-hctosys
+also uses the adjtime file data to compensate the value read from the Hardware
+Clock before using it to set the System Clock.  It does not share the 1 second
+limitation of
+.BR \%\-\-adjust ,
+and will correct sub-second drift values immediately.  It does not
+change the Hardware Clock time nor the adjtime file.  This may eliminate
+the need to use
+.BR \%\-\-adjust ,
+unless something else on the system needs the Hardware Clock to be
+compensated.
+.
+.SS The Adjtime File
+While named for its historical purpose of controlling adjustments only,
+it actually contains other information used by
+.B hwclock
+from one invocation to the next.
+.PP
+The format of the adjtime file is, in ASCII:
+.PP
+Line 1: Three numbers, separated by blanks: 1) the systematic drift rate
+in seconds per day, floating point decimal; 2) the resulting number of
+seconds since 1969 UTC of most recent adjustment or calibration,
+decimal integer; 3) zero (for compatibility with
+.BR \%clock (8))
+as a decimal integer.
+.PP
+Line 2: One number: the resulting number of seconds since 1969 UTC of most
+recent calibration.  Zero if there has been no calibration yet or it
+is known that any previous calibration is moot (for example, because
+the Hardware Clock has been found, since that calibration, not to
+contain a valid time).  This is a decimal integer.
+.PP
+Line 3: "UTC" or "LOCAL".  Tells whether the Hardware Clock is set to
+Coordinated Universal Time or local time.  You can always override this
+value with options on the
+.B \%hwclock
+command line.
+.PP
+You can use an adjtime file that was previously used with the
+.BR \%clock "(8) program with " \%hwclock .
+.
+.SS Automatic Hardware Clock Synchronization by the Kernel
+.PP
+You should be aware of another way that the Hardware Clock is kept
+synchronized in some systems.  The Linux kernel has a mode wherein it
+copies the System Time to the Hardware Clock every 11 minutes. This mode
+is a compile time option, so not all kernels will have this capability.
+This is a good mode to use when you are using something sophisticated
+like NTP to keep your System Clock synchronized. (NTP is a way to keep
+your System Time synchronized either to a time server somewhere on the
+network or to a radio clock hooked up to your system.  See RFC 1305.)
+.PP
+If the kernel is compiled with the \%'11\ minute\ mode' option it will
+be active when the kernel's clock discipline is in a synchronized state.
+When in this state, bit 6 (the bit that is set in the mask 0x0040)
+of the kernel's
+.I \%time_status
+variable is unset. This value is output as the 'status' line of the
+.BR \%adjtimex\ --print " or " \%ntptime " commands."
+.PP
+It takes an outside influence, like the NTP daemon
+to put the kernel's clock discipline into a synchronized state, and
+therefore turn on \%'11\ minute\ mode'.
+It can be turned off by running anything that sets the System Clock the old
+fashioned way, including
+.BR "\%hwclock\ \-\-hctosys" .
+However, if the NTP daemon is still running, it will turn \%'11\ minute\ mode'
+back on again the next time it synchronizes the System Clock.
+.PP
+If your system runs with \%'11\ minute\ mode' on, it may need to use either
+.BR \%\-\-hctosys " or " \%\-\-systz
+in a startup script, especially if the Hardware Clock is configured to use
+the local timescale. Unless the kernel is informed of what timescale the
+Hardware Clock is using, it may clobber it with the wrong one. The kernel
+uses UTC by default.
+.PP
+The first userspace command to set the System Clock informs the
+kernel what timescale the Hardware Clock is using.  This happens via the
+.I \%persistent_clock_is_local
+kernel variable.  If
+.BR \%\-\-hctosys " or " \%\-\-systz
+is the first, it will set this variable according to the adjtime file or the
+appropriate command-line argument.  Note that when using this capability and the
+Hardware Clock timescale configuration is changed, then a reboot is required to
+notify the kernel.
+.PP
+.B \%hwclock\ \-\-adjust
+should not be used with NTP \%'11\ minute\ mode'.
+.
+.SS ISA Hardware Clock Century value
+.PP
+There is some sort of standard that defines CMOS memory Byte 50 on an ISA
+machine as an indicator of what century it is.
+.B \%hwclock
+does not use or set that byte because there are some machines that
+don't define the byte that way, and it really isn't necessary anyway,
+since the year-of-century does a good job of implying which century it
+is.
+.PP
+If you have a bona fide use for a CMOS century byte, contact the
+.B \%hwclock
+maintainer; an option may be appropriate.
+.PP
+Note that this section is only relevant when you are using the "direct
+ISA" method of accessing the Hardware Clock.
+ACPI provides a standard way to access century values, when they
+are supported by the hardware.
+.
+.SH DATE-TIME CONFIGURATION
+.in +4
+.SS Keeping Time without External Synchronization
+.in
+.PP
+This discussion is based on the following conditions:
+.IP \(bu 2
+Nothing is running that alters the date-time clocks, such as NTP daemon or a cron job."
+.IP \(bu 2
+The system timezone is configured for the correct local time.  See below, under
+.BR "POSIX vs 'RIGHT'" .
+.IP \(bu 2
+Early during startup the following are called, in this order:
+.br
+.BI \%adjtimex\ \-\-tick \ value\  \-\-frequency \ value
+.br
+.B \%hwclock\ \-\-hctosys
+.IP \(bu 2
+During shutdown the following is called:
+.br
+.B \%hwclock\ \-\-systohc
+.PP
+.in +4
+.BR * " Systems without " adjtimex " may use " ntptime .
+.in
+.PP
+Whether maintaining precision time with NTP daemon
+or not, it makes sense to configure the system to keep reasonably good
+date-time on its own.
+.PP
+The first step in making that happen is having a clear understanding of
+the big picture.  There are two completely separate hardware devices
+running at their own speed and drifting away from the 'correct' time at
+their own rates.  The methods and software for drift correction are
+different for each of them.  However, most systems are configured to
+exchange values between these two clocks at startup and shutdown.  Now
+the individual device's time keeping errors are transferred back and
+forth between each other.  Attempt to configure drift correction for only
+one of them, and the other's drift will be overlaid upon it.
+.PP
+This problem can be avoided when configuring drift correction for the
+System Clock by simply not shutting down the machine.  This, plus the
+fact that all of
+.BR \%hwclock 's
+precision (including calculating drift factors) depends upon the System
+Clock's rate being correct, means that configuration of the System Clock
+should be done first.
+.PP
+The System Clock drift is corrected with the
+.BR \%adjtimex "(8) command's " \-\-tick " and " \%\-\-frequency
+options.  These two work together: tick is the coarse adjustment and
+frequency is the fine adjustment.  (For systems that do not have an
+.BR \%adjtimex " package,"
+.BI \%ntptime\ \-f\  ppm
+may be used instead.)
+.PP
+Some Linux distributions attempt to automatically calculate the System
+Clock drift with
+.BR \%adjtimex 's
+compare operation.  Trying to correct one
+drifting clock by using another drifting clock as a reference is akin to
+a dog trying to catch its own tail.  Success may happen eventually, but
+great effort and frustration will likely precede it.  This automation may
+yield an improvement over no configuration, but expecting optimum
+results would be in error.  A better choice for manual configuration
+would be
+.BR \%adjtimex 's " \-\-log " options.
+.PP
+It may be more effective to simply track the System Clock drift with
+.BR \%sntp ", or " \%date\ \-Ins
+and a precision timepiece, and then calculate the correction manually.
+.PP
+After setting the tick and frequency values, continue to test and refine the
+adjustments until the System Clock keeps good time.  See
+.BR \%adjtimex (8)
+for more information and the example demonstrating manual drift
+calculations.
+.PP
+Once the System Clock is ticking smoothly, move on to the Hardware Clock.
+.PP
+As a rule, cold drift will work best for most use cases.  This should be
+true even for 24/7 machines whose normal downtime consists of a reboot.
+In that case the drift factor value makes little difference.  But on the
+rare occasion that the machine is shut down for an extended period, then
+cold drift should yield better results.
+.PP
+.B Steps to calculate cold drift:
+.IP 1 2
+.B "Ensure that NTP daemon will not be launched at startup."
+.IP 2 2
+.RI The " System Clock " "time must be correct at shutdown!"
+.IP 3 2
+Shut down the system.
+.IP 4 2
+Let an extended period pass without changing the Hardware Clock.
+.IP 5 2
+Start the system.
+.IP 6 2
+.RB "Immediately use " hwclock " to set the correct time, adding the"
+.BR \%\-\-update\-drift " option."
+.PP
+Note: if step 6 uses
+.BR \%\-\-systohc ,
+then the System Clock must be set correctly (step 6a) just before doing so.
+.PP
+.RB "Having " hwclock
+calculate the drift factor is a good starting point, but for optimal
+results it will likely need to be adjusted by directly editing the
+.I @ADJTIME_PATH@
+file.  Continue to test and refine the drift factor until the Hardware
+Clock is corrected properly at startup.  To check this, first make sure
+that the System Time is correct before shutdown and then use
+.BR \%sntp ", or " \%date\ \-Ins
+and a precision timepiece, immediately after startup.
+.SS LOCAL vs UTC
+Keeping the Hardware Clock in a local timescale causes inconsistent
+daylight saving time results:
+.IP \(bu 2
+If Linux is running during a daylight saving time change, the time
+written to the Hardware Clock will be adjusted for the change.
+.IP \(bu 2
+If Linux is NOT running during a daylight saving time change, the time
+read from the Hardware Clock will NOT be adjusted for the change.
+.PP
+The Hardware Clock on an ISA compatible system keeps only a date and time,
+it has no concept of timezone nor daylight saving. Therefore, when
+.B hwclock
+is told that it is in local time, it assumes it is in the 'correct'
+local time and makes no adjustments to the time read from it.
+.PP
+Linux handles daylight saving time changes transparently only when the
+Hardware Clock is kept in the UTC timescale. Doing so is made easy for
+system administrators as
+.B \%hwclock
+uses local time for its output and as the argument to the
+.BR \%\-\-date " option."
+.PP
+POSIX systems, like Linux, are designed to have the System Clock operate
+in the UTC timescale. The Hardware Clock's purpose is to initialize the
+System Clock, so also keeping it in UTC makes sense.
+.PP
+Linux does, however, attempt to accommodate the Hardware Clock being in
+the local timescale. This is primarily for dual-booting with older
+versions of MS Windows. From Windows 7 on, the RealTimeIsUniversal
+registry key is supposed to be working properly so that its Hardware
+Clock can be kept in UTC.
+.
+.SS POSIX vs 'RIGHT'
+A discussion on date-time configuration would be incomplete without
+addressing timezones, this is mostly well covered by
+.BR tzset (3).
+One area that seems to have no documentation is the 'right'
+directory of the Time Zone Database, sometimes called tz or zoneinfo.
+.PP
+There are two separate databases in the zoneinfo system, posix
+and 'right'. 'Right' (now named zoneinfo\-leaps) includes leap seconds and posix
+does not. To use the 'right' database the System Clock must be set to
+\%(UTC\ +\ leap seconds), which is equivalent to \%(TAI\ \-\ 10). This
+allows calculating the
+exact number of seconds between two dates that cross a leap second
+epoch. The System Clock is then converted to the correct civil time,
+including UTC, by using the 'right' timezone files which subtract the
+leap seconds. Note: this configuration is considered experimental and is
+known to have issues.
+.PP
+To configure a system to use a particular database all of the files
+located in its directory must be copied to the root of
+.IR \%/usr/share/zoneinfo .
+Files are never used directly from the posix or 'right' subdirectories, e.g.,
+.RI \%TZ=' right/Europe/Dublin '.
+This habit was becoming so common that the upstream zoneinfo project
+restructured the system's file tree by moving the posix and 'right'
+subdirectories out of the zoneinfo directory and into sibling directories:
+.PP
+.in +2
+.I /usr/share/zoneinfo
+.br
+.I /usr/share/zoneinfo\-posix
+.br
+.I /usr/share/zoneinfo\-leaps
+.PP
+Unfortunately, some Linux distributions are changing it back to the old
+tree structure in their packages. So the problem of system
+administrators reaching into the 'right' subdirectory persists. This
+causes the system timezone to be configured to include leap seconds
+while the zoneinfo database is still configured to exclude them. Then
+when an application such as a World Clock needs the South_Pole timezone
+file; or an email MTA, or
+.B hwclock
+needs the UTC timezone file; they fetch it from the root of
+.I \%/usr/share/zoneinfo
+, because that is what they are supposed to do. Those files exclude leap
+seconds, but the System Clock now includes them, causing an incorrect
+time conversion.
+.PP
+Attempting to mix and match files from these separate databases will not
+work, because they each require the System Clock to use a different
+timescale. The zoneinfo database must be configured to use either posix
+or 'right', as described above, or by assigning a database path to the
+.SB TZDIR
+environment variable.
+.SH EXIT STATUS
+One of the following exit values will be returned:
+.TP
+.BR EXIT_SUCCESS " ('0' on POSIX systems)"
+Successful program execution.
+.TP
+.BR EXIT_FAILURE " ('1' on POSIX systems)"
+The operation failed or the command syntax was not valid.
+.SH ENVIRONMENT
+.TP
+.B TZ
+If this variable is set its value takes precedence over the system
+configured timezone.
+.TP
+.B TZDIR
+If this variable is set its value takes precedence over the system
+configured timezone database directory path.
+.SH FILES
+.TP
+.I @ADJTIME_PATH@
+The configuration and state file for hwclock.
+.TP
+.I /etc/localtime
+The system timezone file.
+.TP
+.I /usr/share/zoneinfo/
+The system timezone database directory.
+.PP
+Device files
+.B hwclock
+may try for Hardware Clock access:
+.br
+.I /dev/rtc0
+.br
+.I /dev/rtc
+.br
+.I /dev/misc/rtc
+.br
+.I /dev/efirtc
+.br
+.I /dev/misc/efirtc
+.SH "SEE ALSO"
+.BR date (1),
+.BR adjtimex (8),
+.BR gettimeofday (2),
+.BR settimeofday (2),
+.BR crontab (1),
+.BR tzset (3)
+.
+.SH AUTHORS
+Written by Bryan Henderson, September 1996 (bryanh@giraffe-data.com),
+based on work done on the
+.BR \%clock (8)
+program by Charles Hedrick, Rob Hooft, and Harald Koenig.
+See the source code for complete history and credits.
+.
+.SH AVAILABILITY
+The hwclock command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/hwclock.c b/sys-utils/hwclock.c
new file mode 100644
index 0000000..d9acbaf
--- /dev/null
+++ b/sys-utils/hwclock.c
@@ -0,0 +1,1551 @@
+/*
+ * hwclock.c
+ *
+ * clock.c was written by Charles Hedrick, hedrick@cs.rutgers.edu, Apr 1992
+ * Modified for clock adjustments - Rob Hooft <hooft@chem.ruu.nl>, Nov 1992
+ * Improvements by Harald Koenig <koenig@nova.tat.physik.uni-tuebingen.de>
+ * and Alan Modra <alan@spri.levels.unisa.edu.au>.
+ *
+ * Major rewrite by Bryan Henderson <bryanh@giraffe-data.com>, 96.09.19.
+ * The new program is called hwclock. New features:
+ *
+ *	- You can set the hardware clock without also modifying the system
+ *	  clock.
+ *	- You can read and set the clock with finer than 1 second precision.
+ *	- When you set the clock, hwclock automatically refigures the drift
+ *	  rate, based on how far off the clock was before you set it.
+ *
+ * Reshuffled things, added sparc code, and re-added alpha stuff
+ * by David Mosberger <davidm@azstarnet.com>
+ * and Jay Estabrook <jestabro@amt.tay1.dec.com>
+ * and Martin Ostermann <ost@coments.rwth-aachen.de>, aeb@cwi.nl, 990212.
+ *
+ * Fix for Award 2094 bug, Dave Coffin (dcoffin@shore.net) 11/12/98
+ * Change of local time handling, Stefan Ring <e9725446@stud3.tuwien.ac.at>
+ * Change of adjtime handling, James P. Rutledge <ao112@rgfn.epcc.edu>.
+ *
+ * Distributed under GPL
+ */
+/*
+ * Explanation of `adjusting' (Rob Hooft):
+ *
+ * The problem with my machine is that its CMOS clock is 10 seconds
+ * per day slow. With this version of clock.c, and my '/etc/rc.local'
+ * reading '/etc/clock -au' instead of '/etc/clock -u -s', this error
+ * is automatically corrected at every boot.
+ *
+ * To do this job, the program reads and writes the file '/etc/adjtime'
+ * to determine the correction, and to save its data. In this file are
+ * three numbers:
+ *
+ *	1) the correction in seconds per day. (So if your clock runs 5
+ *	   seconds per day fast, the first number should read -5.0)
+ *	2) the number of seconds since 1/1/1970 the last time the program
+ *	   was used
+ *	3) the remaining part of a second which was leftover after the last
+ *	   adjustment
+ *
+ * Installation and use of this program:
+ *
+ *	a) create a file '/etc/adjtime' containing as the first and only
+ *	   line: '0.0 0 0.0'
+ *	b) run 'clock -au' or 'clock -a', depending on whether your cmos is
+ *	   in universal or local time. This updates the second number.
+ *	c) set your system time using the 'date' command.
+ *	d) update your cmos time using 'clock -wu' or 'clock -w'
+ *	e) replace the first number in /etc/adjtime by your correction.
+ *	f) put the command 'clock -au' or 'clock -a' in your '/etc/rc.local'
+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "c.h"
+#include "closestream.h"
+#include "nls.h"
+#include "optutils.h"
+#include "pathnames.h"
+#include "hwclock.h"
+#include "timeutils.h"
+#include "env.h"
+#include "xalloc.h"
+#include "path.h"
+#include "strutils.h"
+
+#ifdef HAVE_LIBAUDIT
+#include <libaudit.h>
+static int hwaudit_fd = -1;
+#endif
+
+UL_DEBUG_DEFINE_MASK(hwclock);
+UL_DEBUG_DEFINE_MASKNAMES(hwclock) = UL_DEBUG_EMPTY_MASKNAMES;
+
+/* The struct that holds our hardware access routines */
+static struct clock_ops *ur;
+
+/* Maximal clock adjustment in seconds per day.
+   (adjtime() glibc call has 2145 seconds limit on i386, so it is good enough for us as well,
+   43219 is a maximal safe value preventing exact_adjustment overflow.) */
+#define MAX_DRIFT 2145.0
+
+struct adjtime {
+	/*
+	 * This is information we keep in the adjtime file that tells us how
+	 * to do drift corrections. Elements are all straight from the
+	 * adjtime file, so see documentation of that file for details.
+	 * Exception is <dirty>, which is an indication that what's in this
+	 * structure is not what's in the disk file (because it has been
+	 * updated since read from the disk file).
+	 */
+	int dirty;
+	/* line 1 */
+	double drift_factor;
+	time_t last_adj_time;
+	double not_adjusted;
+	/* line 2 */
+	time_t last_calib_time;
+	/*
+	 * The most recent time that we set the clock from an external
+	 * authority (as opposed to just doing a drift adjustment)
+	 */
+	/* line 3 */
+	enum a_local_utc { UTC = 0, LOCAL, UNKNOWN } local_utc;
+	/*
+	 * To which time zone, local or UTC, we most recently set the
+	 * hardware clock.
+	 */
+};
+
+static void hwclock_init_debug(const char *str)
+{
+	__UL_INIT_DEBUG_FROM_STRING(hwclock, HWCLOCK_DEBUG_, 0, str);
+
+	DBG(INIT, ul_debug("hwclock debug mask: 0x%04x", hwclock_debug_mask));
+	DBG(INIT, ul_debug("hwclock version: %s", PACKAGE_STRING));
+}
+
+/* FOR TESTING ONLY: inject random delays of up to 1000ms */
+static void up_to_1000ms_sleep(void)
+{
+	int usec = random() % 1000000;
+
+	DBG(RANDOM_SLEEP, ul_debug("sleeping ~%d usec", usec));
+	xusleep(usec);
+}
+
+/*
+ * time_t to timeval conversion.
+ */
+static struct timeval t2tv(time_t timet)
+{
+	struct timeval rettimeval;
+
+	rettimeval.tv_sec = timet;
+	rettimeval.tv_usec = 0;
+	return rettimeval;
+}
+
+/*
+ * The difference in seconds between two times in "timeval" format.
+ */
+double time_diff(struct timeval subtrahend, struct timeval subtractor)
+{
+	return (subtrahend.tv_sec - subtractor.tv_sec)
+	    + (subtrahend.tv_usec - subtractor.tv_usec) / 1E6;
+}
+
+/*
+ * The time, in "timeval" format, which is <increment> seconds after the
+ * time <addend>. Of course, <increment> may be negative.
+ */
+static struct timeval time_inc(struct timeval addend, double increment)
+{
+	struct timeval newtime;
+
+	newtime.tv_sec = addend.tv_sec + (int)increment;
+	newtime.tv_usec = addend.tv_usec + (increment - (int)increment) * 1E6;
+
+	/*
+	 * Now adjust it so that the microsecond value is between 0 and 1
+	 * million.
+	 */
+	if (newtime.tv_usec < 0) {
+		newtime.tv_usec += 1E6;
+		newtime.tv_sec -= 1;
+	} else if (newtime.tv_usec >= 1E6) {
+		newtime.tv_usec -= 1E6;
+		newtime.tv_sec += 1;
+	}
+	return newtime;
+}
+
+static int
+hw_clock_is_utc(const struct hwclock_control *ctl,
+		const struct adjtime adjtime)
+{
+	int ret;
+
+	if (ctl->utc)
+		ret = 1;	/* --utc explicitly given on command line */
+	else if (ctl->local_opt)
+		ret = 0;	/* --localtime explicitly given */
+	else
+		/* get info from adjtime file - default is UTC */
+		ret = (adjtime.local_utc != LOCAL);
+	if (ctl->verbose)
+		printf(_("Assuming hardware clock is kept in %s time.\n"),
+		       ret ? _("UTC") : _("local"));
+	return ret;
+}
+
+/*
+ * Read the adjustment parameters out of the /etc/adjtime file.
+ *
+ * Return them as the adjtime structure <*adjtime_p>. Its defaults are
+ * initialized in main().
+ */
+static int read_adjtime(const struct hwclock_control *ctl,
+			struct adjtime *adjtime_p)
+{
+	FILE *adjfile;
+	char line1[81];		/* String: first line of adjtime file */
+	char line2[81];		/* String: second line of adjtime file */
+	char line3[81];		/* String: third line of adjtime file */
+
+	if (access(ctl->adj_file_name, R_OK) != 0)
+		return EXIT_SUCCESS;
+
+	adjfile = fopen(ctl->adj_file_name, "r");	/* open file for reading */
+	if (adjfile == NULL) {
+		warn(_("cannot open %s"), ctl->adj_file_name);
+		return EXIT_FAILURE;
+	}
+
+	if (!fgets(line1, sizeof(line1), adjfile))
+		line1[0] = '\0';	/* In case fgets fails */
+	if (!fgets(line2, sizeof(line2), adjfile))
+		line2[0] = '\0';	/* In case fgets fails */
+	if (!fgets(line3, sizeof(line3), adjfile))
+		line3[0] = '\0';	/* In case fgets fails */
+
+	fclose(adjfile);
+
+	sscanf(line1, "%lf %ld %lf",
+	       &adjtime_p->drift_factor,
+	       &adjtime_p->last_adj_time,
+	       &adjtime_p->not_adjusted);
+
+	sscanf(line2, "%ld", &adjtime_p->last_calib_time);
+
+	if (!strcmp(line3, "UTC\n")) {
+		adjtime_p->local_utc = UTC;
+	} else if (!strcmp(line3, "LOCAL\n")) {
+		adjtime_p->local_utc = LOCAL;
+	} else {
+		adjtime_p->local_utc = UNKNOWN;
+		if (line3[0]) {
+			warnx(_("Warning: unrecognized third line in adjtime file\n"
+				"(Expected: `UTC' or `LOCAL' or nothing.)"));
+		}
+	}
+
+	if (ctl->verbose) {
+		printf(_
+		       ("Last drift adjustment done at %ld seconds after 1969\n"),
+		       (long)adjtime_p->last_adj_time);
+		printf(_("Last calibration done at %ld seconds after 1969\n"),
+		       (long)adjtime_p->last_calib_time);
+		printf(_("Hardware clock is on %s time\n"),
+		       (adjtime_p->local_utc ==
+			LOCAL) ? _("local") : (adjtime_p->local_utc ==
+					       UTC) ? _("UTC") : _("unknown"));
+	}
+
+	return EXIT_SUCCESS;
+}
+
+/*
+ * Wait until the falling edge of the Hardware Clock's update flag so that
+ * any time that is read from the clock immediately after we return will be
+ * exact.
+ *
+ * The clock only has 1 second precision, so it gives the exact time only
+ * once per second, right on the falling edge of the update flag.
+ *
+ * We wait (up to one second) either blocked waiting for an rtc device or in
+ * a CPU spin loop. The former is probably not very accurate.
+ *
+ * Return 0 if it worked, nonzero if it didn't.
+ */
+static int synchronize_to_clock_tick(const struct hwclock_control *ctl)
+{
+	int rc;
+
+	if (ctl->verbose)
+		printf(_("Waiting for clock tick...\n"));
+
+	rc = ur->synchronize_to_clock_tick(ctl);
+
+	if (ctl->verbose) {
+		if (rc)
+			printf(_("...synchronization failed\n"));
+		else
+			printf(_("...got clock tick\n"));
+	}
+
+	return rc;
+}
+
+/*
+ * Convert a time in broken down format (hours, minutes, etc.) into standard
+ * unix time (seconds into epoch). Return it as *systime_p.
+ *
+ * The broken down time is argument <tm>. This broken down time is either
+ * in local time zone or UTC, depending on value of logical argument
+ * "universal". True means it is in UTC.
+ *
+ * If the argument contains values that do not constitute a valid time, and
+ * mktime() recognizes this, return *valid_p == false and *systime_p
+ * undefined. However, mktime() sometimes goes ahead and computes a
+ * fictional time "as if" the input values were valid, e.g. if they indicate
+ * the 31st day of April, mktime() may compute the time of May 1. In such a
+ * case, we return the same fictional value mktime() does as *systime_p and
+ * return *valid_p == true.
+ */
+static int
+mktime_tz(const struct hwclock_control *ctl, struct tm tm,
+	  time_t *systime_p)
+{
+	int valid;
+
+	if (ctl->universal)
+		*systime_p = timegm(&tm);
+	else
+		*systime_p = mktime(&tm);
+	if (*systime_p == -1) {
+		/*
+		 * This apparently (not specified in mktime() documentation)
+		 * means the 'tm' structure does not contain valid values
+		 * (however, not containing valid values does _not_ imply
+		 * mktime() returns -1).
+		 */
+		valid = 0;
+		if (ctl->verbose)
+			printf(_("Invalid values in hardware clock: "
+				 "%4d/%.2d/%.2d %.2d:%.2d:%.2d\n"),
+			       tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
+			       tm.tm_hour, tm.tm_min, tm.tm_sec);
+	} else {
+		valid = 1;
+		if (ctl->verbose)
+			printf(_
+			       ("Hw clock time : %4d/%.2d/%.2d %.2d:%.2d:%.2d = "
+				"%ld seconds since 1969\n"), tm.tm_year + 1900,
+			       tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min,
+			       tm.tm_sec, (long)*systime_p);
+	}
+	return valid;
+}
+
+/*
+ * Read the hardware clock and return the current time via <tm> argument.
+ *
+ * Use the method indicated by <method> argument to access the hardware
+ * clock.
+ */
+static int
+read_hardware_clock(const struct hwclock_control *ctl,
+		    int *valid_p, time_t *systime_p)
+{
+	struct tm tm;
+	int err;
+
+	err = ur->read_hardware_clock(ctl, &tm);
+	if (err)
+		return err;
+
+	if (ctl->verbose)
+		printf(_
+		       ("Time read from Hardware Clock: %4d/%.2d/%.2d %02d:%02d:%02d\n"),
+		       tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour,
+		       tm.tm_min, tm.tm_sec);
+	*valid_p = mktime_tz(ctl, tm, systime_p);
+
+	return 0;
+}
+
+/*
+ * Set the Hardware Clock to the time <newtime>, in local time zone or UTC,
+ * according to <universal>.
+ */
+static void
+set_hardware_clock(const struct hwclock_control *ctl, const time_t newtime)
+{
+	struct tm new_broken_time;
+	/*
+	 * Time to which we will set Hardware Clock, in broken down format,
+	 * in the time zone of caller's choice
+	 */
+
+	if (ctl->universal)
+		gmtime_r(&newtime, &new_broken_time);
+	else
+		localtime_r(&newtime, &new_broken_time);
+
+	if (ctl->verbose)
+		printf(_("Setting Hardware Clock to %.2d:%.2d:%.2d "
+			 "= %ld seconds since 1969\n"),
+		       new_broken_time.tm_hour, new_broken_time.tm_min,
+		       new_broken_time.tm_sec, (long)newtime);
+
+	if (!ctl->testing)
+		ur->set_hardware_clock(ctl, &new_broken_time);
+}
+
+static double
+get_hardware_delay(const struct hwclock_control *ctl)
+{
+	const char *devpath, *rtcname;
+	char name[128 + 1];
+	struct path_cxt *pc;
+	int rc;
+
+	devpath = ur->get_device_path();
+	if (!devpath)
+		goto unknown;
+
+	rtcname = strrchr(devpath, '/');
+	if (!rtcname || !*(rtcname + 1))
+		goto unknown;
+	rtcname++;
+
+	pc = ul_new_path("/sys/class/rtc/%s", rtcname);
+	if (!pc)
+		goto unknown;
+	rc = ul_path_scanf(pc, "name", "%128[^\n ]", &name);
+	ul_unref_path(pc);
+
+	if (rc != 1 || !*name)
+		goto unknown;
+
+	if (ctl->verbose)
+		printf(_("RTC type: '%s'\n"), name);
+
+	/* MC146818A-compatible (x86) */
+	if (strcmp(name, "rtc_cmos") == 0)
+		return 0.5;
+
+	/* Another HW */
+	return 0;
+unknown:
+	/* Let's be backwardly compatible */
+	return 0.5;
+}
+
+
+/*
+ * Set the Hardware Clock to the time "sethwtime", in local time zone or
+ * UTC, according to "universal".
+ *
+ * Wait for a fraction of a second so that "sethwtime" is the value of the
+ * Hardware Clock as of system time "refsystime", which is in the past. For
+ * example, if "sethwtime" is 14:03:05 and "refsystime" is 12:10:04.5 and
+ * the current system time is 12:10:06.0: Wait .5 seconds (to make exactly 2
+ * seconds since "refsystime") and then set the Hardware Clock to 14:03:07,
+ * thus getting a precise and retroactive setting of the clock. The .5 delay is
+ * default on x86, see --delay and get_hardware_delay().
+ *
+ * (Don't be confused by the fact that the system clock and the Hardware
+ * Clock differ by two hours in the above example. That's just to remind you
+ * that there are two independent time scales here).
+ *
+ * This function ought to be able to accept set times as fractional times.
+ * Idea for future enhancement.
+ */
+static void
+set_hardware_clock_exact(const struct hwclock_control *ctl,
+			 const time_t sethwtime,
+			 const struct timeval refsystime)
+{
+	/*
+	 * The Hardware Clock can only be set to any integer time plus one
+	 * half second.	 The integer time is required because there is no
+	 * interface to set or get a fractional second.	 The additional half
+	 * second is because the Hardware Clock updates to the following
+	 * second precisely 500 ms (not 1 second!) after you release the
+	 * divider reset (after setting the new time) - see description of
+	 * DV2, DV1, DV0 in Register A in the MC146818A data sheet (and note
+	 * that although that document doesn't say so, real-world code seems
+	 * to expect that the SET bit in Register B functions the same way).
+	 * That means that, e.g., when you set the clock to 1:02:03, it
+	 * effectively really sets it to 1:02:03.5, because it will update to
+	 * 1:02:04 only half a second later.  Our caller passes the desired
+	 * integer Hardware Clock time in sethwtime, and the corresponding
+	 * system time (which may have a fractional part, and which may or may
+	 * not be the same!) in refsystime.  In an ideal situation, we would
+	 * then apply sethwtime to the Hardware Clock at refsystime+500ms, so
+	 * that when the Hardware Clock ticks forward to sethwtime+1s half a
+	 * second later at refsystime+1000ms, everything is in sync.  So we
+	 * spin, waiting for gettimeofday() to return a time at or after that
+	 * time (refsystime+500ms) up to a tolerance value, initially 1ms.  If
+	 * we miss that time due to being preempted for some other process,
+	 * then we increase the margin a little bit (initially 1ms, doubling
+	 * each time), add 1 second (or more, if needed to get a time that is
+	 * in the future) to both the time for which we are waiting and the
+	 * time that we will apply to the Hardware Clock, and start waiting
+	 * again.
+	 * 
+	 * For example, the caller requests that we set the Hardware Clock to
+	 * 1:02:03, with reference time (current system time) = 6:07:08.250.
+	 * We want the Hardware Clock to update to 1:02:04 at 6:07:09.250 on
+	 * the system clock, and the first such update will occur 0.500
+	 * seconds after we write to the Hardware Clock, so we spin until the
+	 * system clock reads 6:07:08.750.  If we get there, great, but let's
+	 * imagine the system is so heavily loaded that our process is
+	 * preempted and by the time we get to run again, the system clock
+	 * reads 6:07:11.990.  We now want to wait until the next xx:xx:xx.750
+	 * time, which is 6:07:12.750 (4.5 seconds after the reference time),
+	 * at which point we will set the Hardware Clock to 1:02:07 (4 seconds
+	 * after the originally requested time).  If we do that successfully,
+	 * then at 6:07:13.250 (5 seconds after the reference time), the
+	 * Hardware Clock will update to 1:02:08 (5 seconds after the
+	 * originally requested time), and all is well thereafter.
+	 */
+
+	time_t newhwtime = sethwtime;
+	double target_time_tolerance_secs = 0.001;  /* initial value */
+	double tolerance_incr_secs = 0.001;	    /* initial value */
+	double delay;
+	struct timeval rtc_set_delay_tv;
+
+	struct timeval targetsystime;
+	struct timeval nowsystime;
+	struct timeval prevsystime = refsystime;
+	double deltavstarget;
+
+	if (ctl->rtc_delay != -1.0)        /* --delay specified */
+		delay = ctl->rtc_delay;
+	else
+		delay = get_hardware_delay(ctl);
+
+	if (ctl->verbose)
+		printf(_("Using delay: %.6f seconds\n"), delay);
+
+	rtc_set_delay_tv.tv_sec = 0;
+	rtc_set_delay_tv.tv_usec = delay * 1E6;
+
+	timeradd(&refsystime, &rtc_set_delay_tv, &targetsystime);
+
+	while (1) {
+		double ticksize;
+
+		ON_DBG(RANDOM_SLEEP, up_to_1000ms_sleep());
+
+		gettimeofday(&nowsystime, NULL);
+		deltavstarget = time_diff(nowsystime, targetsystime);
+		ticksize = time_diff(nowsystime, prevsystime);
+		prevsystime = nowsystime;
+
+		if (ticksize < 0) {
+			if (ctl->verbose)
+				printf(_("time jumped backward %.6f seconds "
+					 "to %ld.%06ld - retargeting\n"),
+				       ticksize, nowsystime.tv_sec,
+				       nowsystime.tv_usec);
+			/* The retarget is handled at the end of the loop. */
+		} else if (deltavstarget < 0) {
+			/* deltavstarget < 0 if current time < target time */
+			DBG(DELTA_VS_TARGET,
+			    ul_debug("%ld.%06ld < %ld.%06ld (%.6f)",
+				     nowsystime.tv_sec, nowsystime.tv_usec,
+				     targetsystime.tv_sec,
+				     targetsystime.tv_usec, deltavstarget));
+			continue;  /* not there yet - keep spinning */
+		} else if (deltavstarget <= target_time_tolerance_secs) {
+			/* Close enough to the target time; done waiting. */
+			break;
+		} else /* (deltavstarget > target_time_tolerance_secs) */ {
+			/*
+			 * We missed our window.  Increase the tolerance and
+			 * aim for the next opportunity.
+			 */
+			if (ctl->verbose)
+				printf(_("missed it - %ld.%06ld is too far "
+					 "past %ld.%06ld (%.6f > %.6f)\n"),
+				       nowsystime.tv_sec,
+				       nowsystime.tv_usec,
+				       targetsystime.tv_sec,
+				       targetsystime.tv_usec,
+				       deltavstarget,
+				       target_time_tolerance_secs);
+			target_time_tolerance_secs += tolerance_incr_secs;
+			tolerance_incr_secs *= 2;
+		}
+
+		/*
+		 * Aim for the same offset (tv_usec) within the second in
+		 * either the current second (if that offset hasn't arrived
+		 * yet), or the next second.
+		 */
+		if (nowsystime.tv_usec < targetsystime.tv_usec)
+			targetsystime.tv_sec = nowsystime.tv_sec;
+		else
+			targetsystime.tv_sec = nowsystime.tv_sec + 1;
+	}
+
+	newhwtime = sethwtime
+		    + (int)(time_diff(nowsystime, refsystime)
+			    - delay /* don't count this */
+			    + 0.5 /* for rounding */);
+	if (ctl->verbose)
+		printf(_("%ld.%06ld is close enough to %ld.%06ld (%.6f < %.6f)\n"
+			 "Set RTC to %ld (%ld + %d; refsystime = %ld.%06ld)\n"),
+		       nowsystime.tv_sec, nowsystime.tv_usec,
+		       targetsystime.tv_sec, targetsystime.tv_usec,
+		       deltavstarget, target_time_tolerance_secs,
+		       newhwtime, sethwtime,
+		       (int)(newhwtime - sethwtime),
+		       refsystime.tv_sec, refsystime.tv_usec);
+
+	set_hardware_clock(ctl, newhwtime);
+}
+
+static int
+display_time(struct timeval hwctime)
+{
+	char buf[ISO_BUFSIZ];
+
+	if (strtimeval_iso(&hwctime, ISO_TIMESTAMP_DOT, buf, sizeof(buf)))
+		return EXIT_FAILURE;
+
+	printf("%s\n", buf);
+	return EXIT_SUCCESS;
+}
+
+/*
+ * Adjusts System time, sets the kernel's timezone and RTC timescale.
+ *
+ * The kernel warp_clock function adjusts the System time according to the
+ * tz.tz_minuteswest argument and sets PCIL (see below). At boot settimeofday(2)
+ * has one-shot access to this function as shown in the table below.
+ *
+ * +-------------------------------------------------------------------+
+ * |                       settimeofday(tv, tz)                        |
+ * |-------------------------------------------------------------------|
+ * |     Arguments     |  System Time  | PCIL |           | warp_clock |
+ * |   tv    |   tz    | set  | warped | set  | firsttime |   locked   |
+ * |---------|---------|---------------|------|-----------|------------|
+ * | pointer | NULL    |  yes |   no   |  no  |     1     |    no      |
+ * | pointer | pointer |  yes |   no   |  no  |     0     |    yes     |
+ * | NULL    | ptr2utc |  no  |   no   |  no  |     0     |    yes     |
+ * | NULL    | pointer |  no  |   yes  |  yes |     0     |    yes     |
+ * +-------------------------------------------------------------------+
+ * ptr2utc: tz.tz_minuteswest is zero (UTC).
+ * PCIL: persistent_clock_is_local, sets the "11 minute mode" timescale.
+ * firsttime: locks the warp_clock function (initialized to 1 at boot).
+ *
+ * +---------------------------------------------------------------------------+
+ * |  op     | RTC scale | settimeofday calls                                  |
+ * |---------|-----------|-----------------------------------------------------|
+ * | systz   |   Local   | 1) warps system time*, sets PCIL* and kernel tz     |
+ * | systz   |   UTC     | 1st) locks warp_clock* 2nd) sets kernel tz          |
+ * | hctosys |   Local   | 1st) sets PCIL* 2nd) sets system time and kernel tz |
+ * | hctosys |   UTC     | 1) sets system time and kernel tz                   |
+ * +---------------------------------------------------------------------------+
+ *                         * only on first call after boot
+ */
+static int
+set_system_clock(const struct hwclock_control *ctl,
+		 const struct timeval newtime)
+{
+	struct tm broken;
+	int minuteswest;
+	int rc = 0;
+	const struct timezone tz_utc = { 0 };
+
+	localtime_r(&newtime.tv_sec, &broken);
+	minuteswest = -get_gmtoff(&broken) / 60;
+
+	if (ctl->verbose) {
+		if (ctl->hctosys && !ctl->universal)
+			printf(_("Calling settimeofday(NULL, %d) to set "
+				 "persistent_clock_is_local.\n"), minuteswest);
+		if (ctl->systz && ctl->universal)
+			puts(_("Calling settimeofday(NULL, 0) "
+				"to lock the warp function."));
+		if (ctl->hctosys)
+			printf(_("Calling settimeofday(%ld.%06ld, %d)\n"),
+			       newtime.tv_sec, newtime.tv_usec, minuteswest);
+		else {
+			printf(_("Calling settimeofday(NULL, %d) "), minuteswest);
+			if (ctl->universal)
+				 puts(_("to set the kernel timezone."));
+			else
+				 puts(_("to warp System time."));
+		}
+	}
+
+	if (!ctl->testing) {
+		const struct timezone tz = { minuteswest };
+
+		if (ctl->hctosys && !ctl->universal)	/* set PCIL */
+			rc = settimeofday(NULL, &tz);
+		if (ctl->systz && ctl->universal)	/* lock warp_clock */
+			rc = settimeofday(NULL, &tz_utc);
+		if (!rc && ctl->hctosys)
+			rc = settimeofday(&newtime, &tz);
+		else if (!rc)
+			rc = settimeofday(NULL, &tz);
+
+		if (rc) {
+			warn(_("settimeofday() failed"));
+			return  EXIT_FAILURE;
+		}
+	}
+	return EXIT_SUCCESS;
+}
+
+/*
+ * Refresh the last calibrated and last adjusted timestamps in <*adjtime_p>
+ * to facilitate future drift calculations based on this set point.
+ *
+ * With the --update-drift option:
+ * Update the drift factor in <*adjtime_p> based on the fact that the
+ * Hardware Clock was just calibrated to <nowtime> and before that was
+ * set to the <hclocktime> time scale.
+ */
+static void
+adjust_drift_factor(const struct hwclock_control *ctl,
+		    struct adjtime *adjtime_p,
+		    const struct timeval nowtime,
+		    const struct timeval hclocktime)
+{
+	if (!ctl->update) {
+		if (ctl->verbose)
+			printf(_("Not adjusting drift factor because the "
+				 "--update-drift option was not used.\n"));
+	} else if (adjtime_p->last_calib_time == 0) {
+		if (ctl->verbose)
+			printf(_("Not adjusting drift factor because last "
+				 "calibration time is zero,\n"
+				 "so history is bad and calibration startover "
+				 "is necessary.\n"));
+	} else if ((hclocktime.tv_sec - adjtime_p->last_calib_time) < 4 * 60 * 60) {
+		if (ctl->verbose)
+			printf(_("Not adjusting drift factor because it has "
+				 "been less than four hours since the last "
+				 "calibration.\n"));
+	} else {
+		/*
+		 * At adjustment time we drift correct the hardware clock
+		 * according to the contents of the adjtime file and refresh
+		 * its last adjusted timestamp.
+		 *
+		 * At calibration time we set the Hardware Clock and refresh
+		 * both timestamps in <*adjtime_p>.
+		 *
+		 * Here, with the --update-drift option, we also update the
+		 * drift factor in <*adjtime_p>.
+		 *
+		 * Let us do computation in doubles. (Floats almost suffice,
+		 * but 195 days + 1 second equals 195 days in floats.)
+		 */
+		const double sec_per_day = 24.0 * 60.0 * 60.0;
+		double factor_adjust;
+		double drift_factor;
+		struct timeval last_calib;
+
+		last_calib = t2tv(adjtime_p->last_calib_time);
+		/*
+		 * Correction to apply to the current drift factor.
+		 *
+		 * Simplified: uncorrected_drift / days_since_calibration.
+		 *
+		 * hclocktime is fully corrected with the current drift factor.
+		 * Its difference from nowtime is the missed drift correction.
+		 */
+		factor_adjust = time_diff(nowtime, hclocktime) /
+				(time_diff(nowtime, last_calib) / sec_per_day);
+
+		drift_factor = adjtime_p->drift_factor + factor_adjust;
+		if (fabs(drift_factor) > MAX_DRIFT) {
+			if (ctl->verbose)
+				printf(_("Clock drift factor was calculated as "
+					 "%f seconds/day.\n"
+					 "It is far too much. Resetting to zero.\n"),
+				       drift_factor);
+			drift_factor = 0;
+		} else {
+			if (ctl->verbose)
+				printf(_("Clock drifted %f seconds in the past "
+					 "%f seconds\nin spite of a drift factor of "
+					 "%f seconds/day.\n"
+					 "Adjusting drift factor by %f seconds/day\n"),
+				       time_diff(nowtime, hclocktime),
+				       time_diff(nowtime, last_calib),
+				       adjtime_p->drift_factor, factor_adjust);
+		}
+
+		adjtime_p->drift_factor = drift_factor;
+	}
+	adjtime_p->last_calib_time = nowtime.tv_sec;
+
+	adjtime_p->last_adj_time = nowtime.tv_sec;
+
+	adjtime_p->not_adjusted = 0;
+
+	adjtime_p->dirty = 1;
+}
+
+/*
+ * Calculate the drift correction currently needed for the
+ * Hardware Clock based on the last time it was adjusted,
+ * and the current drift factor, as stored in the adjtime file.
+ *
+ * The total drift adjustment needed is stored at tdrift_p.
+ *
+ */
+static void
+calculate_adjustment(const struct hwclock_control *ctl,
+		     const double factor,
+		     const time_t last_time,
+		     const double not_adjusted,
+		     const time_t systime, struct timeval *tdrift_p)
+{
+	double exact_adjustment;
+
+	exact_adjustment =
+	    ((double)(systime - last_time)) * factor / (24 * 60 * 60)
+	    + not_adjusted;
+	tdrift_p->tv_sec = (time_t) floor(exact_adjustment);
+	tdrift_p->tv_usec = (exact_adjustment -
+				 (double)tdrift_p->tv_sec) * 1E6;
+	if (ctl->verbose) {
+		printf(P_("Time since last adjustment is %ld second\n",
+			"Time since last adjustment is %ld seconds\n",
+		       (systime - last_time)),
+		       (systime - last_time));
+		printf(_("Calculated Hardware Clock drift is %ld.%06ld seconds\n"),
+		       tdrift_p->tv_sec, tdrift_p->tv_usec);
+	}
+}
+
+/*
+ * Write the contents of the <adjtime> structure to its disk file.
+ *
+ * But if the contents are clean (unchanged since read from disk), don't
+ * bother.
+ */
+static int save_adjtime(const struct hwclock_control *ctl,
+			 const struct adjtime *adjtime)
+{
+	char *content;		/* Stuff to write to disk file */
+	FILE *fp;
+
+	xasprintf(&content, "%f %ld %f\n%ld\n%s\n",
+		  adjtime->drift_factor,
+		  adjtime->last_adj_time,
+		  adjtime->not_adjusted,
+		  adjtime->last_calib_time,
+		  (adjtime->local_utc == LOCAL) ? "LOCAL" : "UTC");
+
+	if (ctl->verbose){
+		printf(_("New %s data:\n%s"),
+		       ctl->adj_file_name, content);
+	}
+
+	if (!ctl->testing) {
+		fp = fopen(ctl->adj_file_name, "w");
+		if (fp == NULL) {
+			warn(_("cannot open %s"), ctl->adj_file_name);
+			return EXIT_FAILURE;
+		} else if (fputs(content, fp) < 0 || close_stream(fp) != 0) {
+			warn(_("cannot update %s"), ctl->adj_file_name);
+			return EXIT_FAILURE;
+		}
+	}
+	return EXIT_SUCCESS;
+}
+
+/*
+ * Do the adjustment requested, by 1) setting the Hardware Clock (if
+ * necessary), and 2) updating the last-adjusted time in the adjtime
+ * structure.
+ *
+ * Do not update anything if the Hardware Clock does not currently present a
+ * valid time.
+ *
+ * <hclocktime> is the drift corrected time read from the Hardware Clock.
+ *
+ * <read_time> was the system time when the <hclocktime> was read, which due
+ * to computational delay could be a short time ago. It is used to define a
+ * trigger point for setting the Hardware Clock. The fractional part of the
+ * Hardware clock set time is subtracted from read_time to 'refer back', or
+ * delay, the trigger point.  Fractional parts must be accounted for in this
+ * way, because the Hardware Clock can only be set to a whole second.
+ *
+ * <universal>: the Hardware Clock is kept in UTC.
+ *
+ * <testing>:  We are running in test mode (no updating of clock).
+ *
+ */
+static void
+do_adjustment(const struct hwclock_control *ctl, struct adjtime *adjtime_p,
+	      const struct timeval hclocktime,
+	      const struct timeval read_time)
+{
+	if (adjtime_p->last_adj_time == 0) {
+		if (ctl->verbose)
+			printf(_("Not setting clock because last adjustment time is zero, "
+				 "so history is bad.\n"));
+	} else if (fabs(adjtime_p->drift_factor) > MAX_DRIFT) {
+		if (ctl->verbose)
+			printf(_("Not setting clock because drift factor %f is far too high.\n"),
+				adjtime_p->drift_factor);
+	} else {
+		set_hardware_clock_exact(ctl, hclocktime.tv_sec,
+					 time_inc(read_time,
+						  -(hclocktime.tv_usec / 1E6)));
+		adjtime_p->last_adj_time = hclocktime.tv_sec;
+		adjtime_p->not_adjusted = 0;
+		adjtime_p->dirty = 1;
+	}
+}
+
+static void determine_clock_access_method(const struct hwclock_control *ctl)
+{
+	ur = NULL;
+
+	if (ctl->directisa)
+		ur = probe_for_cmos_clock();
+#ifdef __linux__
+	if (!ur)
+		ur = probe_for_rtc_clock(ctl);
+#endif
+	if (ur) {
+		if (ctl->verbose)
+			puts(ur->interface_name);
+
+	} else {
+		if (ctl->verbose)
+			printf(_("No usable clock interface found.\n"));
+		warnx(_("Cannot access the Hardware Clock via "
+			"any known method."));
+		if (!ctl->verbose)
+			warnx(_("Use the --verbose option to see the "
+				"details of our search for an access "
+				"method."));
+		hwclock_exit(ctl, EXIT_FAILURE);
+	}
+}
+
+/* Do all the normal work of hwclock - read, set clock, etc. */
+static int
+manipulate_clock(const struct hwclock_control *ctl, const time_t set_time,
+		 const struct timeval startup_time, struct adjtime *adjtime)
+{
+	/* The time at which we read the Hardware Clock */
+	struct timeval read_time;
+	/*
+	 * The Hardware Clock gives us a valid time, or at
+	 * least something close enough to fool mktime().
+	 */
+	int hclock_valid = 0;
+	/*
+	 * Tick synchronized time read from the Hardware Clock and
+	 * then drift corrected for all operations except --show.
+	 */
+	struct timeval hclocktime = { 0 };
+	/*
+	 * hclocktime correlated to startup_time. That is, what drift
+	 * corrected Hardware Clock time would have been at start up.
+	 */
+	struct timeval startup_hclocktime = { 0 };
+	/* Total Hardware Clock drift correction needed. */
+	struct timeval tdrift;
+
+	if ((ctl->set || ctl->systohc || ctl->adjust) &&
+	    (adjtime->local_utc == UTC) != ctl->universal) {
+		adjtime->local_utc = ctl->universal ? UTC : LOCAL;
+		adjtime->dirty = 1;
+	}
+	/*
+	 * Negate the drift correction, because we want to 'predict' a
+	 * Hardware Clock time that includes drift.
+	 */
+	if (ctl->predict) {
+		hclocktime = t2tv(set_time);
+		calculate_adjustment(ctl, adjtime->drift_factor,
+				     adjtime->last_adj_time,
+				     adjtime->not_adjusted,
+				     hclocktime.tv_sec, &tdrift);
+		hclocktime = time_inc(hclocktime, (double)
+				      -(tdrift.tv_sec + tdrift.tv_usec / 1E6));
+		if (ctl->verbose) {
+			printf(_ ("Target date:   %ld\n"), set_time);
+			printf(_ ("Predicted RTC: %ld\n"), hclocktime.tv_sec);
+		}
+		return display_time(hclocktime);
+	}
+
+	if (ctl->systz)
+		return set_system_clock(ctl, startup_time);
+
+	if (ur->get_permissions())
+		return EXIT_FAILURE;
+
+	/*
+	 * Read and drift correct RTC time; except for RTC set functions
+	 * without the --update-drift option because: 1) it's not needed;
+	 * 2) it enables setting a corrupted RTC without reading it first;
+	 * 3) it significantly reduces system shutdown time.
+	 */
+	if ( ! ((ctl->set || ctl->systohc) && !ctl->update)) {
+		/*
+		 * Timing critical - do not change the order of, or put
+		 * anything between the follow three statements.
+		 * Synchronization failure MUST exit, because all drift
+		 * operations are invalid without it.
+		 */
+		if (synchronize_to_clock_tick(ctl))
+			return EXIT_FAILURE;
+		read_hardware_clock(ctl, &hclock_valid, &hclocktime.tv_sec);
+		gettimeofday(&read_time, NULL);
+
+		if (!hclock_valid) {
+			warnx(_("RTC read returned an invalid value."));
+			return EXIT_FAILURE;
+		}
+		/*
+		 * Calculate and apply drift correction to the Hardware Clock
+		 * time for everything except --show
+		 */
+		calculate_adjustment(ctl, adjtime->drift_factor,
+				     adjtime->last_adj_time,
+				     adjtime->not_adjusted,
+				     hclocktime.tv_sec, &tdrift);
+		if (!ctl->show)
+			hclocktime = time_inc(tdrift, hclocktime.tv_sec);
+
+		startup_hclocktime =
+		 time_inc(hclocktime, time_diff(startup_time, read_time));
+	}
+	if (ctl->show || ctl->get) {
+		return display_time(startup_hclocktime);
+	} else if (ctl->set) {
+		set_hardware_clock_exact(ctl, set_time, startup_time);
+		if (!ctl->noadjfile)
+			adjust_drift_factor(ctl, adjtime, t2tv(set_time),
+					    startup_hclocktime);
+	} else if (ctl->adjust) {
+		if (tdrift.tv_sec > 0 || tdrift.tv_sec < -1)
+			do_adjustment(ctl, adjtime, hclocktime, read_time);
+		else
+			printf(_("Needed adjustment is less than one second, "
+				 "so not setting clock.\n"));
+	} else if (ctl->systohc) {
+		struct timeval nowtime, reftime;
+		/*
+		 * We can only set_hardware_clock_exact to a
+		 * whole seconds time, so we set it with
+		 * reference to the most recent whole
+		 * seconds time.
+		 */
+		gettimeofday(&nowtime, NULL);
+		reftime.tv_sec = nowtime.tv_sec;
+		reftime.tv_usec = 0;
+		set_hardware_clock_exact(ctl, (time_t) reftime.tv_sec, reftime);
+		if (!ctl->noadjfile)
+			adjust_drift_factor(ctl, adjtime, nowtime,
+					    hclocktime);
+	} else if (ctl->hctosys) {
+		return set_system_clock(ctl, hclocktime);
+	}
+	if (!ctl->noadjfile && adjtime->dirty)
+		return save_adjtime(ctl, adjtime);
+	return EXIT_SUCCESS;
+}
+
+/**
+ * Get or set the kernel RTC driver's epoch on Alpha machines.
+ * ISA machines are hard coded for 1900.
+ */
+#if defined(__linux__) && defined(__alpha__)
+static void
+manipulate_epoch(const struct hwclock_control *ctl)
+{
+	if (ctl->getepoch) {
+		unsigned long epoch;
+
+		if (get_epoch_rtc(ctl, &epoch))
+			warnx(_("unable to read the RTC epoch."));
+		else
+			printf(_("The RTC epoch is set to %lu.\n"), epoch);
+	} else if (ctl->setepoch) {
+		if (!ctl->epoch_option)
+			warnx(_("--epoch is required for --setepoch."));
+		else if (!ctl->testing)
+			if (set_epoch_rtc(ctl))
+				warnx(_("unable to set the RTC epoch."));
+	}
+}
+#endif		/* __linux__ __alpha__ */
+
+static void out_version(void)
+{
+	printf(UTIL_LINUX_VERSION);
+}
+
+static void __attribute__((__noreturn__))
+usage(void)
+{
+	fputs(USAGE_HEADER, stdout);
+	printf(_(" %s [function] [option...]\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, stdout);
+	puts(_("Time clocks utility."));
+
+	fputs(USAGE_FUNCTIONS, stdout);
+	puts(_(" -r, --show           display the RTC time"));
+	puts(_("     --get            display drift corrected RTC time"));
+	puts(_("     --set            set the RTC according to --date"));
+	puts(_(" -s, --hctosys        set the system time from the RTC"));
+	puts(_(" -w, --systohc        set the RTC from the system time"));
+	puts(_("     --systz          send timescale configurations to the kernel"));
+	puts(_(" -a, --adjust         adjust the RTC to account for systematic drift"));
+#if defined(__linux__) && defined(__alpha__)
+	puts(_("     --getepoch       display the RTC epoch"));
+	puts(_("     --setepoch       set the RTC epoch according to --epoch"));
+#endif
+	puts(_("     --predict        predict the drifted RTC time according to --date"));
+	fputs(USAGE_OPTIONS, stdout);
+	puts(_(" -u, --utc            the RTC timescale is UTC"));
+	puts(_(" -l, --localtime      the RTC timescale is Local"));
+#ifdef __linux__
+	printf(_(
+	       " -f, --rtc <file>     use an alternate file to %1$s\n"), _PATH_RTC_DEV);
+#endif
+	printf(_(
+	       "     --directisa      use the ISA bus instead of %1$s access\n"), _PATH_RTC_DEV);
+	puts(_("     --date <time>    date/time input for --set and --predict"));
+	puts(_("     --delay <sec>    delay used when set new RTC time"));
+#if defined(__linux__) && defined(__alpha__)
+	puts(_("     --epoch <year>   epoch input for --setepoch"));
+#endif
+	puts(_("     --update-drift   update the RTC drift factor"));
+	printf(_(
+	       "     --noadjfile      do not use %1$s\n"), _PATH_ADJTIME);
+	printf(_(
+	       "     --adjfile <file> use an alternate file to %1$s\n"), _PATH_ADJTIME);
+	puts(_("     --test           dry run; implies --verbose"));
+	puts(_(" -v, --verbose        display more details"));
+	fputs(USAGE_SEPARATOR, stdout);
+	printf(USAGE_HELP_OPTIONS(22));
+	printf(USAGE_MAN_TAIL("hwclock(8)"));
+	exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+	struct hwclock_control ctl = {
+			.show = 1,		/* default op is show */
+			.rtc_delay = -1.0	/* unspecified */
+	};
+	struct timeval startup_time;
+	struct adjtime adjtime = { 0 };
+	struct timespec when = { 0 };
+	/*
+	 * The time we started up, in seconds into the epoch, including
+	 * fractions.
+	 */
+	time_t set_time = 0;	/* Time to which user said to set Hardware Clock */
+	int rc, c;
+
+	/* Long only options. */
+	enum {
+		OPT_ADJFILE = CHAR_MAX + 1,
+		OPT_DATE,
+		OPT_DELAY,
+		OPT_DIRECTISA,
+		OPT_EPOCH,
+		OPT_GET,
+		OPT_GETEPOCH,
+		OPT_NOADJFILE,
+		OPT_PREDICT,
+		OPT_SET,
+		OPT_SETEPOCH,
+		OPT_SYSTZ,
+		OPT_TEST,
+		OPT_UPDATE
+	};
+
+	static const struct option longopts[] = {
+		{ "adjust",       no_argument,       NULL, 'a'            },
+		{ "help",         no_argument,       NULL, 'h'            },
+		{ "localtime",    no_argument,       NULL, 'l'            },
+		{ "show",         no_argument,       NULL, 'r'            },
+		{ "hctosys",      no_argument,       NULL, 's'            },
+		{ "utc",          no_argument,       NULL, 'u'            },
+		{ "version",      no_argument,       NULL, 'V'            },
+		{ "systohc",      no_argument,       NULL, 'w'            },
+		{ "debug",        no_argument,       NULL, 'D'            },
+		{ "ul-debug",     required_argument, NULL, 'd'            },
+		{ "verbose",      no_argument,       NULL, 'v'            },
+		{ "set",          no_argument,       NULL, OPT_SET        },
+#if defined(__linux__) && defined(__alpha__)
+		{ "getepoch",     no_argument,       NULL, OPT_GETEPOCH   },
+		{ "setepoch",     no_argument,       NULL, OPT_SETEPOCH   },
+		{ "epoch",        required_argument, NULL, OPT_EPOCH      },
+#endif
+		{ "noadjfile",    no_argument,       NULL, OPT_NOADJFILE  },
+		{ "directisa",    no_argument,       NULL, OPT_DIRECTISA  },
+		{ "test",         no_argument,       NULL, OPT_TEST       },
+		{ "date",         required_argument, NULL, OPT_DATE       },
+		{ "delay",        required_argument, NULL, OPT_DELAY      },
+#ifdef __linux__
+		{ "rtc",          required_argument, NULL, 'f'            },
+#endif
+		{ "adjfile",      required_argument, NULL, OPT_ADJFILE    },
+		{ "systz",        no_argument,       NULL, OPT_SYSTZ      },
+		{ "predict",      no_argument,       NULL, OPT_PREDICT    },
+		{ "get",          no_argument,       NULL, OPT_GET        },
+		{ "update-drift", no_argument,       NULL, OPT_UPDATE     },
+		{ NULL, 0, NULL, 0 }
+	};
+
+	static const ul_excl_t excl[] = {	/* rows and cols in ASCII order */
+		{ 'a','r','s','w',
+		  OPT_GET, OPT_GETEPOCH, OPT_PREDICT,
+		  OPT_SET, OPT_SETEPOCH, OPT_SYSTZ },
+		{ 'l', 'u' },
+		{ OPT_ADJFILE, OPT_NOADJFILE },
+		{ OPT_NOADJFILE, OPT_UPDATE },
+		{ 0 }
+	};
+	int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+	/* Remember what time we were invoked */
+	gettimeofday(&startup_time, NULL);
+
+#ifdef HAVE_LIBAUDIT
+	hwaudit_fd = audit_open();
+	if (hwaudit_fd < 0 && !(errno == EINVAL || errno == EPROTONOSUPPORT ||
+				errno == EAFNOSUPPORT)) {
+		/*
+		 * You get these error codes only when the kernel doesn't
+		 * have audit compiled in.
+		 */
+		warnx(_("Unable to connect to audit system"));
+		return EXIT_FAILURE;
+	}
+#endif
+	setlocale(LC_ALL, "");
+#ifdef LC_NUMERIC
+	/*
+	 * We need LC_CTYPE and LC_TIME and LC_MESSAGES, but must avoid
+	 * LC_NUMERIC since it gives problems when we write to /etc/adjtime.
+	 *  - gqueri@mail.dotcom.fr
+	 */
+	setlocale(LC_NUMERIC, "C");
+#endif
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	while ((c = getopt_long(argc, argv,
+				"hvVDd:alrsuwf:", longopts, NULL)) != -1) {
+
+		err_exclusive_options(c, longopts, excl, excl_st);
+
+		switch (c) {
+		case 'D':
+			warnx(_("use --verbose, --debug has been deprecated."));
+			break;
+		case 'v':
+			ctl.verbose = 1;
+			break;
+		case 'd':
+			hwclock_init_debug(optarg);
+			break;
+		case 'a':
+			ctl.adjust = 1;
+			ctl.show = 0;
+			ctl.hwaudit_on = 1;
+			break;
+		case 'l':
+			ctl.local_opt = 1;	/* --localtime */
+			break;
+		case 'r':
+			ctl.show = 1;
+			break;
+		case 's':
+			ctl.hctosys = 1;
+			ctl.show = 0;
+			ctl.hwaudit_on = 1;
+			break;
+		case 'u':
+			ctl.utc = 1;
+			break;
+		case 'w':
+			ctl.systohc = 1;
+			ctl.show = 0;
+			ctl.hwaudit_on = 1;
+			break;
+		case OPT_SET:
+			ctl.set = 1;
+			ctl.show = 0;
+			ctl.hwaudit_on = 1;
+			break;
+#if defined(__linux__) && defined(__alpha__)
+		case OPT_GETEPOCH:
+			ctl.getepoch = 1;
+			ctl.show = 0;
+			break;
+		case OPT_SETEPOCH:
+			ctl.setepoch = 1;
+			ctl.show = 0;
+			ctl.hwaudit_on = 1;
+			break;
+		case OPT_EPOCH:
+			ctl.epoch_option = optarg;	/* --epoch */
+			break;
+#endif
+		case OPT_NOADJFILE:
+			ctl.noadjfile = 1;
+			break;
+		case OPT_DIRECTISA:
+			ctl.directisa = 1;
+			break;
+		case OPT_TEST:
+			ctl.testing = 1;	/* --test */
+			ctl.verbose = 1;
+			break;
+		case OPT_DATE:
+			ctl.date_opt = optarg;	/* --date */
+			break;
+		case OPT_DELAY:
+			ctl.rtc_delay = strtod_or_err(optarg, "invalid --delay argument");
+			break;
+		case OPT_ADJFILE:
+			ctl.adj_file_name = optarg;	/* --adjfile */
+			break;
+		case OPT_SYSTZ:
+			ctl.systz = 1;		/* --systz */
+			ctl.show = 0;
+			ctl.hwaudit_on = 1;
+			break;
+		case OPT_PREDICT:
+			ctl.predict = 1;	/* --predict */
+			ctl.show = 0;
+			break;
+		case OPT_GET:
+			ctl.get = 1;		/* --get */
+			ctl.show = 0;
+			break;
+		case OPT_UPDATE:
+			ctl.update = 1;		/* --update-drift */
+			break;
+#ifdef __linux__
+		case 'f':
+			ctl.rtc_dev_name = optarg;	/* --rtc */
+			break;
+#endif
+		case 'V':			/* --version */
+			out_version();
+			return 0;
+		case 'h':			/* --help */
+			usage();
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if (argc -= optind) {
+		warnx(_("%d too many arguments given"), argc);
+		errtryhelp(EXIT_FAILURE);
+	}
+
+	if (!ctl.adj_file_name)
+		ctl.adj_file_name = _PATH_ADJTIME;
+
+	if (ctl.update && !ctl.set && !ctl.systohc) {
+		warnx(_("--update-drift requires --set or --systohc"));
+		exit(EXIT_FAILURE);
+	}
+
+	if (ctl.noadjfile && !ctl.utc && !ctl.local_opt) {
+		warnx(_("With --noadjfile, you must specify "
+			"either --utc or --localtime"));
+		exit(EXIT_FAILURE);
+	}
+
+	if (ctl.set || ctl.predict) {
+		if (!ctl.date_opt) {
+		warnx(_("--date is required for --set or --predict"));
+		exit(EXIT_FAILURE);
+		}
+		if (parse_date(&when, ctl.date_opt, NULL))
+			set_time = when.tv_sec;
+		else {
+			warnx(_("invalid date '%s'"), ctl.date_opt);
+			exit(EXIT_FAILURE);
+		}
+	}
+
+#if defined(__linux__) && defined(__alpha__)
+	if (ctl.getepoch || ctl.setepoch) {
+		manipulate_epoch(&ctl);
+		hwclock_exit(&ctl, EXIT_SUCCESS);
+	}
+#endif
+
+	if (ctl.verbose) {
+		out_version();
+		printf(_("System Time: %ld.%06ld\n"),
+		       startup_time.tv_sec, startup_time.tv_usec);
+	}
+
+	if (!ctl.systz && !ctl.predict)
+		determine_clock_access_method(&ctl);
+
+	if (!ctl.noadjfile && !(ctl.systz && (ctl.utc || ctl.local_opt))) {
+		if ((rc = read_adjtime(&ctl, &adjtime)) != 0)
+			hwclock_exit(&ctl, rc);
+	} else
+		/* Avoid writing adjtime file if we don't have to. */
+		adjtime.dirty = 0;
+	ctl.universal = hw_clock_is_utc(&ctl, adjtime);
+	rc = manipulate_clock(&ctl, set_time, startup_time, &adjtime);
+	if (ctl.testing)
+		puts(_("Test mode: nothing was changed."));
+	hwclock_exit(&ctl, rc);
+	return rc;		/* Not reached */
+}
+
+void
+hwclock_exit(const struct hwclock_control *ctl
+#ifndef HAVE_LIBAUDIT
+	     __attribute__((__unused__))
+#endif
+	     , int status)
+{
+#ifdef HAVE_LIBAUDIT
+	if (ctl->hwaudit_on && !ctl->testing) {
+		audit_log_user_message(hwaudit_fd, AUDIT_USYS_CONFIG,
+				       "op=change-system-time", NULL, NULL, NULL,
+				       status);
+	}
+	close(hwaudit_fd);
+#endif
+	exit(status);
+}
+
+/*
+ * History of this program:
+ *
+ * 98.08.12 BJH Version 2.4
+ *
+ * Don't use century byte from Hardware Clock. Add comments telling why.
+ *
+ * 98.06.20 BJH Version 2.3.
+ *
+ * Make --hctosys set the kernel timezone from TZ environment variable
+ * and/or /usr/lib/zoneinfo. From Klaus Ripke (klaus@ripke.com).
+ *
+ * 98.03.05 BJH. Version 2.2.
+ *
+ * Add --getepoch and --setepoch.
+ *
+ * Fix some word length things so it works on Alpha.
+ *
+ * Make it work when /dev/rtc doesn't have the interrupt functions. In this
+ * case, busywait for the top of a second instead of blocking and waiting
+ * for the update complete interrupt.
+ *
+ * Fix a bunch of bugs too numerous to mention.
+ *
+ * 97.06.01: BJH. Version 2.1. Read and write the century byte (Byte 50) of
+ * the ISA Hardware Clock when using direct ISA I/O. Problem discovered by
+ * job (jei@iclnl.icl.nl).
+ *
+ * Use the rtc clock access method in preference to the KDGHWCLK method.
+ * Problem discovered by Andreas Schwab <schwab@LS5.informatik.uni-dortmund.de>.
+ *
+ * November 1996: Version 2.0.1. Modifications by Nicolai Langfeldt
+ * (janl@math.uio.no) to make it compile on linux 1.2 machines as well as
+ * more recent versions of the kernel. Introduced the NO_CLOCK access method
+ * and wrote feature test code to detect absence of rtc headers.
+ *
+ ***************************************************************************
+ * Maintenance notes
+ *
+ * To compile this, you must use GNU compiler optimization (-O option) in
+ * order to make the "extern inline" functions from asm/io.h (inb(), etc.)
+ * compile. If you don't optimize, which means the compiler will generate no
+ * inline functions, the references to these functions in this program will
+ * be compiled as external references. Since you probably won't be linking
+ * with any functions by these names, you will have unresolved external
+ * references when you link.
+ *
+ * Here's some info on how we must deal with the time that elapses while
+ * this program runs: There are two major delays as we run:
+ *
+ *	1) Waiting up to 1 second for a transition of the Hardware Clock so
+ *	   we are synchronized to the Hardware Clock.
+ *	2) Running the "date" program to interpret the value of our --date
+ *	   option.
+ *
+ * Reading the /etc/adjtime file is the next biggest source of delay and
+ * uncertainty.
+ *
+ * The user wants to know what time it was at the moment he invoked us, not
+ * some arbitrary time later. And in setting the clock, he is giving us the
+ * time at the moment we are invoked, so if we set the clock some time
+ * later, we have to add some time to that.
+ *
+ * So we check the system time as soon as we start up, then run "date" and
+ * do file I/O if necessary, then wait to synchronize with a Hardware Clock
+ * edge, then check the system time again to see how much time we spent. We
+ * immediately read the clock then and (if appropriate) report that time,
+ * and additionally, the delay we measured.
+ *
+ * If we're setting the clock to a time given by the user, we wait some more
+ * so that the total delay is an integral number of seconds, then set the
+ * Hardware Clock to the time the user requested plus that integral number
+ * of seconds. N.B. The Hardware Clock can only be set in integral seconds.
+ *
+ * If we're setting the clock to the system clock value, we wait for the
+ * system clock to reach the top of a second, and then set the Hardware
+ * Clock to the system clock's value.
+ *
+ * Here's an interesting point about setting the Hardware Clock:  On my
+ * machine, when you set it, it sets to that precise time. But one can
+ * imagine another clock whose update oscillator marches on a steady one
+ * second period, so updating the clock between any two oscillator ticks is
+ * the same as updating it right at the earlier tick. To avoid any
+ * complications that might cause, we set the clock as soon as possible
+ * after an oscillator tick.
+ *
+ * About synchronizing to the Hardware Clock when reading the time: The
+ * precision of the Hardware Clock counters themselves is one second. You
+ * can't read the counters and find out that is 12:01:02.5. But if you
+ * consider the location in time of the counter's ticks as part of its
+ * value, then its precision is as infinite as time is continuous! What I'm
+ * saying is this: To find out the _exact_ time in the hardware clock, we
+ * wait until the next clock tick (the next time the second counter changes)
+ * and measure how long we had to wait. We then read the value of the clock
+ * counters and subtract the wait time and we know precisely what time it
+ * was when we set out to query the time.
+ *
+ * hwclock uses this method, and considers the Hardware Clock to have
+ * infinite precision.
+ */
diff --git a/sys-utils/hwclock.h b/sys-utils/hwclock.h
new file mode 100644
index 0000000..92fdb5f
--- /dev/null
+++ b/sys-utils/hwclock.h
@@ -0,0 +1,80 @@
+#ifndef HWCLOCK_CLOCK_H
+#define HWCLOCK_CLOCK_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <time.h>
+
+#include "c.h"
+#include "debug.h"
+
+#define HWCLOCK_DEBUG_INIT		(1 << 0)
+#define HWCLOCK_DEBUG_RANDOM_SLEEP	(1 << 1)
+#define HWCLOCK_DEBUG_DELTA_VS_TARGET	(1 << 2)
+#define HWCLOCK_DEBUG_ALL		0xFFFF
+
+UL_DEBUG_DECLARE_MASK(hwclock);
+#define DBG(m, x)	__UL_DBG(hwclock, HWCLOCK_DEBUG_, m, x)
+#define ON_DBG(m, x)	__UL_DBG_CALL(hwclock, HWCLOCK_DEBUG_, m, x)
+
+struct hwclock_control {
+	char *date_opt;
+	char *adj_file_name;
+	double rtc_delay;	/* --delay <seconds> */
+#if defined(__linux__) && defined(__alpha__)
+	char *epoch_option;
+#endif
+#ifdef __linux__
+	char *rtc_dev_name;
+#endif
+	unsigned int
+		hwaudit_on:1,
+		adjust:1,
+		show:1,
+		hctosys:1,
+		utc:1,
+		systohc:1,
+#if defined(__linux__) && defined(__alpha__)
+		getepoch:1,
+		setepoch:1,
+#endif
+		noadjfile:1,
+		local_opt:1,
+		directisa:1,
+		testing:1,
+		systz:1,
+		predict:1,
+		get:1,
+		set:1,
+		update:1,
+		universal:1,	/* will store hw_clock_is_utc() return value */
+		verbose:1;
+};
+
+struct clock_ops {
+	char *interface_name;
+	int (*get_permissions) (void);
+	int (*read_hardware_clock) (const struct hwclock_control *ctl, struct tm * tm);
+	int (*set_hardware_clock) (const struct hwclock_control *ctl, const struct tm * tm);
+	int (*synchronize_to_clock_tick) (const struct hwclock_control *ctl);
+	const char *(*get_device_path) (void);
+};
+
+extern struct clock_ops *probe_for_cmos_clock(void);
+extern struct clock_ops *probe_for_rtc_clock(const struct hwclock_control *ctl);
+
+/* hwclock.c */
+extern double time_diff(struct timeval subtrahend, struct timeval subtractor);
+
+/* rtc.c */
+#if defined(__linux__) && defined(__alpha__)
+extern int get_epoch_rtc(const struct hwclock_control *ctl, unsigned long *epoch);
+extern int set_epoch_rtc(const struct hwclock_control *ctl);
+#endif
+
+extern void __attribute__((__noreturn__))
+hwclock_exit(const struct hwclock_control *ctl, int status);
+
+#endif				/* HWCLOCK_CLOCK_H */
diff --git a/sys-utils/ipcmk.1 b/sys-utils/ipcmk.1
new file mode 100644
index 0000000..e6ed434
--- /dev/null
+++ b/sys-utils/ipcmk.1
@@ -0,0 +1,54 @@
+.\" Copyright 2008 Hayden A. James (hayden.james@gmail.com)
+.\" May be distributed under the GNU General Public License
+.TH IPCMK "1" "July 2014" "util-linux" "User Commands"
+.SH "NAME"
+ipcmk \- make various IPC resources
+.SH "SYNOPSIS"
+.B ipcmk
+[options]
+.SH "DESCRIPTION"
+.B ipcmk
+allows you to create shared memory segments, message queues,
+and semaphore arrays.
+.SH "OPTIONS"
+.TP
+Resources can be specified with these options:
+.TP
+.BR \-M , " \-\-shmem " \fIsize
+Create a shared memory segment of
+.I size
+bytes.
+The \fIsize\fR argument may be followed by the multiplicative suffixes KiB (=1024), MiB (=1024*1024), and so on for GiB, etc. (the
+"iB" is optional, e.g., "K" has the same meaning as "KiB") or the suffixes KB (=1000), MB (=1000*1000), and so on for GB, etc.
+.TP
+.BR \-Q , " \-\-queue"
+Create a message queue.
+.TP
+.BR \-S , " \-\-semaphore " \fInumber
+Create a semaphore array with
+.I number
+of elements.
+.PP
+Other options are:
+.TP
+.BR \-p , " \-\-mode " \fImode
+Access permissions for the resource.  Default is 0644.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.PP
+.SH "SEE ALSO"
+.BR ipcrm (1),
+.BR ipcs (1)
+.SH "AUTHOR"
+.MT hayden.james@gmail.com
+Hayden A. James
+.ME
+.SH "AVAILABILITY"
+The ipcmk command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/ipcmk.c b/sys-utils/ipcmk.c
new file mode 100644
index 0000000..df83652
--- /dev/null
+++ b/sys-utils/ipcmk.c
@@ -0,0 +1,163 @@
+/*
+ *  ipcmk.c - used to create ad-hoc IPC segments
+ *
+ *  Copyright (C) 2008 Hayden A. James (hayden.james@gmail.com)
+ *  Copyright (C) 2008 Karel Zak <kzak@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ipc.h>
+#include <sys/msg.h>
+#include <sys/sem.h>
+#include <sys/shm.h>
+#include <sys/time.h>
+
+#include "c.h"
+#include "nls.h"
+#include "randutils.h"
+#include "strutils.h"
+#include "closestream.h"
+
+static int create_shm(size_t size, int permission)
+{
+	key_t key;
+
+	random_get_bytes(&key, sizeof(key));
+	return shmget(key, size, permission | IPC_CREAT);
+}
+
+static int create_msg(int permission)
+{
+	key_t key;
+
+	random_get_bytes(&key, sizeof(key));
+	return msgget(key, permission | IPC_CREAT);
+}
+
+static int create_sem(int nsems, int permission)
+{
+	key_t key;
+
+	random_get_bytes(&key, sizeof(key));
+	return semget(key, nsems, permission | IPC_CREAT);
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	fputs(USAGE_HEADER, out);
+	fprintf(out, _(" %s [options]\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Create various IPC resources.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -M, --shmem <size>       create shared memory segment of size <size>\n"), out);
+	fputs(_(" -S, --semaphore <number> create semaphore array with <number> elements\n"), out);
+	fputs(_(" -Q, --queue              create message queue\n"), out);
+	fputs(_(" -p, --mode <mode>        permission for the resource (default is 0644)\n"), out);
+
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(26));
+	printf(USAGE_MAN_TAIL("ipcmk(1)"));
+
+	exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+	int permission = 0644;
+	int opt;
+	size_t size = 0;
+	int nsems = 0;
+	int ask_shm = 0, ask_msg = 0, ask_sem = 0;
+	static const struct option longopts[] = {
+		{"shmem", required_argument, NULL, 'M'},
+		{"semaphore", required_argument, NULL, 'S'},
+		{"queue", no_argument, NULL, 'Q'},
+		{"mode", required_argument, NULL, 'p'},
+		{"version", no_argument, NULL, 'V'},
+		{"help", no_argument, NULL, 'h'},
+		{NULL, 0, NULL, 0}
+	};
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	while((opt = getopt_long(argc, argv, "hM:QS:p:Vh", longopts, NULL)) != -1) {
+		switch(opt) {
+		case 'M':
+			size = strtosize_or_err(optarg, _("failed to parse size"));
+			ask_shm = 1;
+			break;
+		case 'Q':
+			ask_msg = 1;
+			break;
+		case 'S':
+			nsems = strtos32_or_err(optarg, _("failed to parse elements"));
+			ask_sem = 1;
+			break;
+		case 'p':
+			permission = strtoul(optarg, NULL, 8);
+			break;
+		case 'h':
+			usage();
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if(!ask_shm && !ask_msg && !ask_sem) {
+		warnx(_("bad usage"));
+		errtryhelp(EXIT_FAILURE);
+	}
+	if (ask_shm) {
+		int shmid;
+		if (-1 == (shmid = create_shm(size, permission)))
+			err(EXIT_FAILURE, _("create share memory failed"));
+		else
+			printf(_("Shared memory id: %d\n"), shmid);
+	}
+
+	if (ask_msg) {
+		int msgid;
+		if (-1 == (msgid = create_msg(permission)))
+			err(EXIT_FAILURE, _("create message queue failed"));
+		else
+			printf(_("Message queue id: %d\n"), msgid);
+	}
+
+	if (ask_sem) {
+		int semid;
+		if (-1 == (semid = create_sem(nsems, permission)))
+			err(EXIT_FAILURE, _("create semaphore failed"));
+		else
+			printf(_("Semaphore id: %d\n"), semid);
+	}
+
+	return EXIT_SUCCESS;
+}
diff --git a/sys-utils/ipcrm.1 b/sys-utils/ipcrm.1
new file mode 100644
index 0000000..be73dff
--- /dev/null
+++ b/sys-utils/ipcrm.1
@@ -0,0 +1,117 @@
+.\" Copyright 2002 Andre C. Mazzone (linuxdev@karagee.com)
+.\" May be distributed under the GNU General Public License
+.TH IPCRM "1" "July 2014" "util-linux" "User Commands"
+.SH NAME
+ipcrm \- remove certain IPC resources
+.SH SYNOPSIS
+.B ipcrm
+[options]
+.sp
+.B ipcrm
+.RB { shm | msg | sem }
+.IR id ...
+.SH DESCRIPTION
+.B ipcrm
+removes System V inter-process communication (IPC) objects
+and associated data structures from the system.
+In order to delete such objects, you must be superuser, or
+the creator or owner of the object.
+.PP
+System V IPC objects are of three types: shared memory,
+message queues, and semaphores.
+Deletion of a message queue or semaphore object is immediate
+(regardless of whether any process still holds an IPC
+identifier for the object).
+A shared memory object is only removed
+after all currently attached processes have detached
+.RB ( shmdt (2))
+the object from their virtual address space.
+.PP
+Two syntax styles are supported.  The old Linux historical syntax specifies
+a three-letter keyword indicating which class of object is to be deleted,
+followed by one or more IPC identifiers for objects of this type.
+.PP
+The SUS-compliant syntax allows the specification of
+zero or more objects of all three types in a single command line,
+with objects specified either by key or by identifier (see below).
+Both keys and identifiers may be specified in decimal, hexadecimal
+(specified with an initial '0x' or '0X'), or octal (specified with
+an initial '0').
+.PP
+The details of the removes are described in
+.BR shmctl (2),
+.BR msgctl (2),
+and
+.BR semctl (2).
+The identifiers and keys can be found by using
+.BR ipcs (1).
+.SH OPTIONS
+.TP
+\fB-a\fR, \fB\-\-all\fR [\fBshm\fR] [\fBmsg\fR] [\fBsem\fR]
+Remove all resources.  When an option argument is provided, the removal is
+performed only for the specified resource types.  \fIWarning!\fR  Do not use
+.B \-a
+if you are unsure how the software using the resources might react to missing
+objects.  Some programs create these resources at startup and may not have
+any code to deal with an unexpected disappearance.
+.TP
+.BR \-M , " \-\-shmem\-key " \fIshmkey
+Remove the shared memory segment created with
+.I shmkey
+after the last detach is performed.
+.TP
+.BR \-m , " \-\-shmem\-id " \fIshmid
+Remove the shared memory segment identified by
+.I shmid
+after the last detach is performed.
+.TP
+.BR \-Q , " \-\-queue\-key " \fImsgkey
+Remove the message queue created with
+.IR msgkey .
+.TP
+.BR \-q , " \-\-queue\-id " \fImsgid
+Remove the message queue identified by
+.IR msgid .
+.TP
+.BR \-S , " \-\-semaphore\-key " \fIsemkey
+Remove the semaphore created with
+.IR semkey .
+.TP
+.BR \-s , " \-\-semaphore\-id " \fIsemid
+Remove the semaphore identified by
+.IR semid .
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH NOTES
+In its first Linux implementation, \fBipcrm\fR used the deprecated syntax
+shown in the second line of the
+.BR SYNOPSIS .
+Functionality present in other *nix implementations of \fBipcrm\fR has since
+been added, namely the ability to delete resources by key (not just
+identifier), and to respect the same command-line syntax.  For backward
+compatibility the previous syntax is still supported.
+.\" .SH AUTHORS
+.\" Andre C. Mazzone (linuxdev@karagee.com)
+.\" .br
+.\" Krishna Balasubramanian (balasub@cis.ohio-state.edu)
+.SH SEE ALSO
+.nh
+.BR ipcmk (1),
+.BR ipcs (1),
+.BR msgctl (2),
+.BR msgget (2),
+.BR semctl (2),
+.BR semget (2),
+.BR shmctl (2),
+.BR shmdt (2),
+.BR shmget (2),
+.BR ftok (3)
+.SH AVAILABILITY
+The ipcrm command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/ipcrm.c b/sys-utils/ipcrm.c
new file mode 100644
index 0000000..a9f2d1b
--- /dev/null
+++ b/sys-utils/ipcrm.c
@@ -0,0 +1,423 @@
+/*
+ * krishna balasubramanian 1993
+ *
+ * 1999-02-22 Arkadiusz Miśkiewicz <misiek@pld.ORG.PL>
+ * - added Native Language Support
+ *
+ * 1999-04-02 frank zago
+ * - can now remove several id's in the same call
+ *
+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ipc.h>
+#include <sys/msg.h>
+#include <sys/sem.h>
+#include <sys/shm.h>
+#include <sys/types.h>
+#include "c.h"
+#include "nls.h"
+#include "strutils.h"
+#include "closestream.h"
+
+#ifndef HAVE_UNION_SEMUN
+/* according to X/OPEN we have to define it ourselves */
+union semun {
+	int val;
+	struct semid_ds *buf;
+	unsigned short int *array;
+	struct seminfo *__buf;
+};
+#endif
+
+typedef enum type_id {
+	SHM,
+	SEM,
+	MSG,
+	ALL
+} type_id;
+
+static int verbose = 0;
+
+/* print the usage */
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	fputs(USAGE_HEADER, out);
+	fprintf(out, _(" %1$s [options]\n"
+		       " %1$s shm|msg|sem <id>...\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Remove certain IPC resources.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -m, --shmem-id <id>        remove shared memory segment by id\n"), out);
+	fputs(_(" -M, --shmem-key <key>      remove shared memory segment by key\n"), out);
+	fputs(_(" -q, --queue-id <id>        remove message queue by id\n"), out);
+	fputs(_(" -Q, --queue-key <key>      remove message queue by key\n"), out);
+	fputs(_(" -s, --semaphore-id <id>    remove semaphore by id\n"), out);
+	fputs(_(" -S, --semaphore-key <key>  remove semaphore by key\n"), out);
+	fputs(_(" -a, --all[=shm|msg|sem]    remove all (in the specified category)\n"), out);
+	fputs(_(" -v, --verbose              explain what is being done\n"), out);
+
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(28));
+	printf(USAGE_MAN_TAIL("ipcrm(1)"));
+
+	exit(EXIT_SUCCESS);
+}
+
+static int remove_id(int type, int iskey, int id)
+{
+        int ret;
+	char *errmsg;
+	/* needed to delete semaphores */
+	union semun arg;
+	arg.val = 0;
+
+	/* do the removal */
+	switch (type) {
+	case SHM:
+		if (verbose)
+			printf(_("removing shared memory segment id `%d'\n"), id);
+		ret = shmctl(id, IPC_RMID, NULL);
+		break;
+	case MSG:
+		if (verbose)
+			printf(_("removing message queue id `%d'\n"), id);
+		ret = msgctl(id, IPC_RMID, NULL);
+		break;
+	case SEM:
+		if (verbose)
+			printf(_("removing semaphore id `%d'\n"), id);
+		ret = semctl(id, 0, IPC_RMID, arg);
+		break;
+	default:
+		errx(EXIT_FAILURE, "impossible occurred");
+	}
+
+	/* how did the removal go? */
+	if (ret < 0) {
+		switch (errno) {
+		case EACCES:
+		case EPERM:
+			errmsg = iskey ? _("permission denied for key") : _("permission denied for id");
+			break;
+		case EINVAL:
+			errmsg = iskey ? _("invalid key") : _("invalid id");
+			break;
+		case EIDRM:
+			errmsg = iskey ? _("already removed key") : _("already removed id");
+			break;
+		default:
+			err(EXIT_FAILURE, "%s", iskey ? _("key failed") : _("id failed"));
+		}
+		warnx("%s (%d)", errmsg, id);
+		return 1;
+	}
+	return 0;
+}
+
+static int remove_arg_list(type_id type, int argc, char **argv)
+{
+	int id;
+	char *end;
+	int nb_errors = 0;
+
+	do {
+		id = strtoul(argv[0], &end, 10);
+		if (*end != 0) {
+			warnx(_("invalid id: %s"), argv[0]);
+			nb_errors++;
+		} else {
+			if (remove_id(type, 0, id))
+				nb_errors++;
+		}
+		argc--;
+		argv++;
+	} while (argc);
+	return (nb_errors);
+}
+
+static int deprecated_main(int argc, char **argv)
+{
+	type_id type;
+
+	if (!strcmp(argv[1], "shm"))
+		type = SHM;
+	else if (!strcmp(argv[1], "msg"))
+		type = MSG;
+	else if (!strcmp(argv[1], "sem"))
+		type = SEM;
+	else
+		return 0;
+
+	if (argc < 3) {
+		warnx(_("not enough arguments"));
+		errtryhelp(EXIT_FAILURE);
+	}
+
+	if (remove_arg_list(type, argc - 2, &argv[2]))
+		exit(EXIT_FAILURE);
+
+	printf(_("resource(s) deleted\n"));
+	return 1;
+}
+
+static unsigned long strtokey(const char *str, const char *errmesg)
+{
+	unsigned long num;
+	char *end = NULL;
+
+	if (str == NULL || *str == '\0')
+		goto err;
+	errno = 0;
+	/* keys are in hex or decimal */
+	num = strtoul(str, &end, 0);
+
+	if (errno || str == end || (end && *end))
+		goto err;
+
+	return num;
+ err:
+	if (errno)
+		err(EXIT_FAILURE, "%s: '%s'", errmesg, str);
+	else
+		errx(EXIT_FAILURE, "%s: '%s'", errmesg, str);
+	return 0;
+}
+
+static int key_to_id(type_id type, char *s)
+{
+	int id;
+	/* keys are in hex or decimal */
+	key_t key = strtokey(s, "failed to parse argument");
+	if (key == IPC_PRIVATE) {
+		warnx(_("illegal key (%s)"), s);
+		return -1;
+	}
+	switch (type) {
+	case SHM:
+		id = shmget(key, 0, 0);
+		break;
+	case MSG:
+		id = msgget(key, 0);
+		break;
+	case SEM:
+		id = semget(key, 0, 0);
+		break;
+	case ALL:
+		abort();
+	default:
+		errx(EXIT_FAILURE, "impossible occurred");
+	}
+	if (id < 0) {
+		char *errmsg;
+		switch (errno) {
+		case EACCES:
+			errmsg = _("permission denied for key");
+			break;
+		case EIDRM:
+			errmsg = _("already removed key");
+			break;
+		case ENOENT:
+			errmsg = _("invalid key");
+			break;
+		default:
+			err(EXIT_FAILURE, _("key failed"));
+		}
+		warnx("%s (%s)", errmsg, s);
+	}
+	return id;
+}
+
+static int remove_all(type_id type)
+{
+	int ret = 0;
+	int id, rm_me, maxid;
+
+	struct shmid_ds shmseg;
+
+	struct semid_ds semary;
+	struct seminfo seminfo;
+	union semun arg;
+
+	struct msqid_ds msgque;
+	struct msginfo msginfo;
+
+	if (type == SHM || type == ALL) {
+		maxid = shmctl(0, SHM_INFO, &shmseg);
+		if (maxid < 0)
+			errx(EXIT_FAILURE,
+			     _("kernel not configured for shared memory"));
+		for (id = 0; id <= maxid; id++) {
+			rm_me = shmctl(id, SHM_STAT, &shmseg);
+			if (rm_me < 0)
+				continue;
+			ret |= remove_id(SHM, 0, rm_me);
+		}
+	}
+	if (type == SEM || type == ALL) {
+		arg.array = (ushort *) (void *)&seminfo;
+		maxid = semctl(0, 0, SEM_INFO, arg);
+		if (maxid < 0)
+			errx(EXIT_FAILURE,
+			     _("kernel not configured for semaphores"));
+		for (id = 0; id <= maxid; id++) {
+			arg.buf = (struct semid_ds *)&semary;
+			rm_me = semctl(id, 0, SEM_STAT, arg);
+			if (rm_me < 0)
+				continue;
+			ret |= remove_id(SEM, 0, rm_me);
+		}
+	}
+/* kFreeBSD hackery -- ah 20140723 */
+#ifndef MSG_STAT
+#define MSG_STAT 11
+#endif
+#ifndef MSG_INFO
+#define MSG_INFO 12
+#endif
+	if (type == MSG || type == ALL) {
+		maxid =
+		    msgctl(0, MSG_INFO, (struct msqid_ds *)(void *)&msginfo);
+		if (maxid < 0)
+			errx(EXIT_FAILURE,
+			     _("kernel not configured for message queues"));
+		for (id = 0; id <= maxid; id++) {
+			rm_me = msgctl(id, MSG_STAT, &msgque);
+			if (rm_me < 0)
+				continue;
+			ret |= remove_id(MSG, 0, rm_me);
+		}
+	}
+	return ret;
+}
+
+int main(int argc, char **argv)
+{
+	int c;
+	int ret = 0;
+	int id = -1;
+	int iskey;
+	int rm_all = 0;
+	type_id what_all = ALL;
+
+	static const struct option longopts[] = {
+		{"shmem-id", required_argument, NULL, 'm'},
+		{"shmem-key", required_argument, NULL, 'M'},
+		{"queue-id", required_argument, NULL, 'q'},
+		{"queue-key", required_argument, NULL, 'Q'},
+		{"semaphore-id", required_argument, NULL, 's'},
+		{"semaphore-key", required_argument, NULL, 'S'},
+		{"all", optional_argument, NULL, 'a'},
+		{"verbose", no_argument, NULL, 'v'},
+		{"version", no_argument, NULL, 'V'},
+		{"help", no_argument, NULL, 'h'},
+		{NULL, 0, NULL, 0}
+	};
+
+	/* if the command is executed without parameters, do nothing */
+	if (argc == 1)
+		return 0;
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	/* check to see if the command is being invoked in the old way if so
+	 * then remove argument list */
+	if (deprecated_main(argc, argv))
+		return EXIT_SUCCESS;
+
+	/* process new syntax to conform with SYSV ipcrm */
+	while((c = getopt_long(argc, argv, "q:m:s:Q:M:S:a::vhV", longopts, NULL)) != -1) {
+		iskey = 0;
+		switch (c) {
+		case 'M':
+			iskey = 1;
+			id = key_to_id(SHM, optarg);
+			if (id < 0) {
+				ret++;
+				break;
+			}
+			/* fallthrough */
+		case 'm':
+			if (!iskey)
+				id = strtos32_or_err(optarg, _("failed to parse argument"));
+			if (remove_id(SHM, iskey, id))
+				ret++;
+			break;
+		case 'Q':
+			iskey = 1;
+			id = key_to_id(MSG, optarg);
+			if (id < 0) {
+				ret++;
+				break;
+			}
+			/* fallthrough */
+		case 'q':
+			if (!iskey)
+				id = strtos32_or_err(optarg, _("failed to parse argument"));
+			if (remove_id(MSG, iskey, id))
+				ret++;
+			break;
+		case 'S':
+			iskey = 1;
+			id = key_to_id(SEM, optarg);
+			if (id < 0) {
+				ret++;
+				break;
+			}
+			/* fallthrough */
+		case 's':
+			if (!iskey)
+				id = strtos32_or_err(optarg, _("failed to parse argument"));
+			if (remove_id(SEM, iskey, id))
+				ret++;
+			break;
+		case 'a':
+			rm_all = 1;
+			if (optarg) {
+				if (!strcmp(optarg, "shm"))
+					what_all = SHM;
+				else if (!strcmp(optarg, "msg"))
+					what_all = MSG;
+				else if (!strcmp(optarg, "sem"))
+					what_all = SEM;
+				else
+					errx(EXIT_FAILURE,
+					     _("unknown argument: %s"), optarg);
+			} else {
+				what_all = ALL;
+			}
+			break;
+		case 'v':
+			verbose = 1;
+			break;
+		case 'h':
+			usage();
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if (rm_all && remove_all(what_all))
+		ret++;
+
+	/* print usage if we still have some arguments left over */
+	if (optind < argc) {
+		warnx(_("unknown argument: %s"), argv[optind]);
+		errtryhelp(EXIT_FAILURE);
+	}
+
+	return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/sys-utils/ipcs.1 b/sys-utils/ipcs.1
new file mode 100644
index 0000000..93c35e3
--- /dev/null
+++ b/sys-utils/ipcs.1
@@ -0,0 +1,116 @@
+.\" Copyright 1993 Rickard E. Faith (faith@cs.unc.edu)
+.\" May be distributed under the GNU General Public License
+.TH IPCS "1" "July 2014" "util-linux" "User Commands"
+.SH NAME
+ipcs \- show information on IPC facilities
+.SH SYNOPSIS
+.B ipcs
+[options]
+.SH DESCRIPTION
+.B ipcs
+shows information on the inter-process communication facilities
+for which the calling process has read access.
+By default it shows information about all three resources:
+shared memory segments, message queues, and semaphore arrays.
+.SH OPTIONS
+.TP
+\fB\-i\fR, \fB\-\-id\fR \fIid\fR
+Show full details on just the one resource element identified by
+.IR id .
+This option needs to be combined with one of the three resource options:
+.BR \-m ,
+.BR \-q " or"
+.BR \-s .
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+Display help text and exit.
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+Display version information and exit.
+.SS "Resource options"
+.TP
+\fB\-m\fR, \fB\-\-shmems\fR
+Write information about active shared memory segments.
+.TP
+\fB\-q\fR, \fB\-\-queues\fR
+Write information about active message queues.
+.TP
+\fB\-s\fR, \fB\-\-semaphores\fR
+Write information about active semaphore sets.
+.TP
+\fB\-a\fR, \fB\-\-all\fR
+Write information about all three resources (default).
+.SS "Output formats"
+Of these options only one takes effect: the last one specified.
+.TP
+\fB\-c\fR, \fB\-\-creator\fR
+Show creator and owner.
+.TP
+\fB\-l\fR, \fB\-\-limits\fR
+Show resource limits.
+.TP
+\fB\-p\fR, \fB\-\-pid\fR
+Show PIDs of creator and last operator.
+.TP
+\fB\-t\fR, \fB\-\-time\fR
+Write time information.  The time of the last control operation that changed
+the access permissions for all facilities, the time of the last
+.BR msgsnd (2)
+and
+.BR msgrcv (2)
+operations on message queues, the time of the last
+.BR shmat (2)
+and
+.BR shmdt (2)
+operations on shared memory, and the time of the last
+.BR semop (2)
+operation on semaphores.
+.TP
+\fB\-u\fR, \fB\-\-summary\fR
+Show status summary.
+.SS "Representation"
+These affect only the \fB\-l\fR (\fB\-\-limits\fR) option.
+.TP
+\fB\-b\fR, \fB\-\-bytes\fR
+Print sizes in bytes.
+.TP
+.B \-\-human
+Print sizes in human-readable format.
+.SH SEE ALSO
+.BR ipcmk (1),
+.BR ipcrm (1),
+.BR msgrcv (2),
+.BR msgsnd (2),
+.BR semget (2),
+.BR semop (2),
+.BR shmat (2),
+.BR shmdt (2),
+.BR shmget (2)
+.SH CONFORMING TO
+The Linux ipcs utility is not fully compatible to the POSIX ipcs utility.
+The Linux version does not support the POSIX
+.BR \-a ,
+.B \-b
+and
+.B \-o
+options, but does support the
+.B \-l
+and
+.B \-u
+options not defined by POSIX.  A portable application shall not use the
+.BR \-a ,
+.BR \-b ,
+.BR \-o ,
+.BR \-l ,
+and
+.B \-u
+options.
+.SH AUTHOR
+.UR balasub@cis.ohio-state.edu
+Krishna Balasubramanian
+.UE
+.SH AVAILABILITY
+The ipcs command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/ipcs.c b/sys-utils/ipcs.c
new file mode 100644
index 0000000..73cf28a
--- /dev/null
+++ b/sys-utils/ipcs.c
@@ -0,0 +1,668 @@
+/* Original author unknown, may be "krishna balasub@cis.ohio-state.edu" */
+/*
+ * Modified Sat Oct  9 10:55:28 1993 for 0.99.13
+ *
+ * Patches from Mike Jagdis (jaggy@purplet.demon.co.uk) applied Wed Feb 8
+ * 12:12:21 1995 by faith@cs.unc.edu to print numeric uids if no passwd file
+ * entry.
+ *
+ * Patch from arnolds@ifns.de (Heinz-Ado Arnolds) applied Mon Jul 1 19:30:41
+ * 1996 by janl@math.uio.no to add code missing in case PID: clauses.
+ *
+ * Patched to display the key field -- hy@picksys.com 12/18/96
+ *
+ * 1999-02-22 Arkadiusz Miśkiewicz <misiek@pld.ORG.PL>
+ * - added Native Language Support
+ */
+
+#include <errno.h>
+#include <getopt.h>
+
+#include "c.h"
+#include "nls.h"
+#include "closestream.h"
+
+#include "ipcutils.h"
+
+enum output_formats {
+	NOTSPECIFIED,
+	LIMITS,
+	STATUS,
+	CREATOR,
+	TIME,
+	PID
+};
+enum {
+	OPT_HUMAN = CHAR_MAX + 1
+};
+
+static void do_shm (char format, int unit);
+static void print_shm (int id, int unit);
+static void do_sem (char format);
+static void print_sem (int id);
+static void do_msg (char format, int unit);
+static void print_msg (int id, int unit);
+
+/* we read time as int64_t from /proc, so cast... */
+#define xctime(_x)	ctime((time_t *) (_x))
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	fputs(USAGE_HEADER, out);
+	fprintf(out, _(" %1$s [resource-option...] [output-option]\n"
+		       " %1$s -m|-q|-s -i <id>\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Show information on IPC facilities.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -i, --id <id>  print details on resource identified by <id>\n"), out);
+	printf(USAGE_HELP_OPTIONS(16));
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Resource options:\n"), out);
+	fputs(_(" -m, --shmems      shared memory segments\n"), out);
+	fputs(_(" -q, --queues      message queues\n"), out);
+	fputs(_(" -s, --semaphores  semaphores\n"), out);
+	fputs(_(" -a, --all         all (default)\n"), out);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Output options:\n"), out);
+	fputs(_(" -t, --time        show attach, detach and change times\n"), out);
+	fputs(_(" -p, --pid         show PIDs of creator and last operator\n"), out);
+	fputs(_(" -c, --creator     show creator and owner\n"), out);
+	fputs(_(" -l, --limits      show resource limits\n"), out);
+	fputs(_(" -u, --summary     show status summary\n"), out);
+	fputs(_("     --human       show sizes in human-readable format\n"), out);
+	fputs(_(" -b, --bytes       show sizes in bytes\n"), out);
+	printf(USAGE_MAN_TAIL("ipcs(1)"));
+
+	exit(EXIT_SUCCESS);
+}
+
+int main (int argc, char **argv)
+{
+	int opt, msg = 0, shm = 0, sem = 0, id = 0, specific = 0;
+	char format = NOTSPECIFIED;
+	int unit = IPC_UNIT_DEFAULT;
+	static const struct option longopts[] = {
+		{"id", required_argument, NULL, 'i'},
+		{"queues", no_argument, NULL, 'q'},
+		{"shmems", no_argument, NULL, 'm'},
+		{"semaphores", no_argument, NULL, 's'},
+		{"all", no_argument, NULL, 'a'},
+		{"time", no_argument, NULL, 't'},
+		{"pid", no_argument, NULL, 'p'},
+		{"creator", no_argument, NULL, 'c'},
+		{"limits", no_argument, NULL, 'l'},
+		{"summary", no_argument, NULL, 'u'},
+		{"human", no_argument, NULL, OPT_HUMAN},
+		{"bytes", no_argument, NULL, 'b'},
+		{"version", no_argument, NULL, 'V'},
+		{"help", no_argument, NULL, 'h'},
+		{NULL, 0, NULL, 0}
+	};
+	char options[] = "i:qmsatpclubVh";
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	while ((opt = getopt_long(argc, argv, options, longopts, NULL)) != -1) {
+		switch (opt) {
+		case 'i':
+			id = atoi (optarg);
+			specific = 1;
+			break;
+		case 'a':
+			msg = shm = sem = 1;
+			break;
+		case 'q':
+			msg = 1;
+			break;
+		case 'm':
+			shm = 1;
+			break;
+		case 's':
+			sem = 1;
+			break;
+		case 't':
+			format = TIME;
+			break;
+		case 'c':
+			format = CREATOR;
+			break;
+		case 'p':
+			format = PID;
+			break;
+		case 'l':
+			format = LIMITS;
+			break;
+		case 'u':
+			format = STATUS;
+			break;
+		case OPT_HUMAN:
+			unit = IPC_UNIT_HUMAN;
+			break;
+		case 'b':
+			unit = IPC_UNIT_BYTES;
+			break;
+		case 'h':
+			usage();
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if (specific && (msg + shm + sem != 1))
+		errx (EXIT_FAILURE,
+		      _("when using an ID, a single resource must be specified"));
+	if (specific) {
+		if (msg)
+			print_msg (id, unit);
+		if (shm)
+			print_shm (id, unit);
+		if (sem)
+			print_sem (id);
+	} else {
+		if (!msg && !shm && !sem)
+			msg = shm = sem = 1;
+		printf ("\n");
+		if (msg) {
+			do_msg (format, unit);
+			printf ("\n");
+		}
+		if (shm) {
+			do_shm (format, unit);
+			printf ("\n");
+		}
+		if (sem) {
+			do_sem (format);
+			printf ("\n");
+		}
+	}
+	return EXIT_SUCCESS;
+}
+
+static void do_shm (char format, int unit)
+{
+	struct passwd *pw;
+	struct shm_data *shmds, *shmdsp;
+
+	switch (format) {
+	case LIMITS:
+	{
+		struct ipc_limits lim;
+		uint64_t tmp, pgsz = getpagesize();
+
+		if (ipc_shm_get_limits(&lim)) {
+			printf (_("unable to fetch shared memory limits\n"));
+			return;
+		}
+		printf (_("------ Shared Memory Limits --------\n"));
+		printf (_("max number of segments = %ju\n"), lim.shmmni);
+		ipc_print_size(unit == IPC_UNIT_DEFAULT ? IPC_UNIT_KB : unit,
+			       _("max seg size"), lim.shmmax, "\n", 0);
+
+		tmp = (uint64_t) lim.shmall * pgsz;
+		/* overflow handling, at least we don't print ridiculous small values */
+		if (lim.shmall != 0 && tmp / lim.shmall != pgsz) {
+			tmp = UINT64_MAX - (UINT64_MAX % pgsz);
+		}
+		ipc_print_size(unit == IPC_UNIT_DEFAULT ? IPC_UNIT_KB : unit,
+			       _("max total shared memory"), tmp, "\n", 0);
+		ipc_print_size(unit == IPC_UNIT_DEFAULT ? IPC_UNIT_BYTES : unit,
+			       _("min seg size"), lim.shmmin, "\n", 0);
+		return;
+	}
+	case STATUS:
+	{
+		int maxid;
+		struct shmid_ds shmbuf;
+		struct shm_info *shm_info;
+
+		maxid = shmctl (0, SHM_INFO, &shmbuf);
+		shm_info =  (struct shm_info *) &shmbuf;
+		if (maxid < 0) {
+			printf (_("kernel not configured for shared memory\n"));
+			return;
+		}
+
+		printf (_("------ Shared Memory Status --------\n"));
+		/*
+		 * TRANSLATORS: This output format is maintained for backward
+		 * compatibility as ipcs is used in scripts. For consistency
+		 * with the rest, the translated form can follow this model:
+		 *
+		 * "segments allocated = %d\n"
+		 * "pages allocated = %ld\n"
+		 * "pages resident = %ld\n"
+		 * "pages swapped = %ld\n"
+		 * "swap performance = %ld attempts, %ld successes\n"
+		 */
+		printf (_("segments allocated %d\n"
+			  "pages allocated %ld\n"
+			  "pages resident  %ld\n"
+			  "pages swapped   %ld\n"
+			  "Swap performance: %ld attempts\t %ld successes\n"),
+			shm_info->used_ids,
+			shm_info->shm_tot,
+			shm_info->shm_rss,
+			shm_info->shm_swp,
+			shm_info->swap_attempts, shm_info->swap_successes);
+		return;
+	}
+
+	/*
+	 * Headers only
+	 */
+	case CREATOR:
+		printf (_("------ Shared Memory Segment Creators/Owners --------\n"));
+		printf ("%-10s %-10s %-10s %-10s %-10s %-10s\n",
+			_("shmid"),_("perms"),_("cuid"),_("cgid"),_("uid"),_("gid"));
+		break;
+
+	case TIME:
+		printf (_("------ Shared Memory Attach/Detach/Change Times --------\n"));
+		printf ("%-10s %-10s %-20s %-20s %-20s\n",
+			_("shmid"),_("owner"),_("attached"),_("detached"),
+			_("changed"));
+		break;
+
+	case PID:
+		printf (_("------ Shared Memory Creator/Last-op PIDs --------\n"));
+		printf ("%-10s %-10s %-10s %-10s\n",
+			_("shmid"),_("owner"),_("cpid"),_("lpid"));
+		break;
+
+	default:
+		printf (_("------ Shared Memory Segments --------\n"));
+		printf ("%-10s %-10s %-10s %-10s %-10s %-10s %-12s\n",
+			_("key"),_("shmid"),_("owner"),_("perms"),
+			unit == IPC_UNIT_HUMAN ? _("size") : _("bytes"),
+			_("nattch"),_("status"));
+		break;
+	}
+
+	/*
+	 * Print data
+	 */
+	if (ipc_shm_get_info(-1, &shmds) < 1)
+		return;
+
+	for (shmdsp = shmds; shmdsp->next != NULL; shmdsp = shmdsp->next) {
+		if (format == CREATOR)  {
+			ipc_print_perms(stdout, &shmdsp->shm_perm);
+			continue;
+		}
+		pw = getpwuid(shmdsp->shm_perm.uid);
+		switch (format) {
+		case TIME:
+			if (pw)
+				printf ("%-10d %-10.10s", shmdsp->shm_perm.id, pw->pw_name);
+			else
+				printf ("%-10d %-10u", shmdsp->shm_perm.id, shmdsp->shm_perm.uid);
+			/* ctime uses static buffer: use separate calls */
+			printf(" %-20.16s", shmdsp->shm_atim
+			       ? xctime(&shmdsp->shm_atim) + 4 : _("Not set"));
+			printf(" %-20.16s", shmdsp->shm_dtim
+			       ? xctime(&shmdsp->shm_dtim) + 4 : _("Not set"));
+			printf(" %-20.16s\n", shmdsp->shm_ctim
+			       ? xctime(&shmdsp->shm_ctim) + 4 : _("Not set"));
+			break;
+		case PID:
+			if (pw)
+				printf ("%-10d %-10.10s", shmdsp->shm_perm.id, pw->pw_name);
+			else
+				printf ("%-10d %-10u", shmdsp->shm_perm.id, shmdsp->shm_perm.uid);
+			printf (" %-10u %-10u\n",
+				shmdsp->shm_cprid, shmdsp->shm_lprid);
+			break;
+
+		default:
+			printf("0x%08x ", shmdsp->shm_perm.key);
+			if (pw)
+				printf ("%-10d %-10.10s", shmdsp->shm_perm.id, pw->pw_name);
+			else
+				printf ("%-10d %-10u", shmdsp->shm_perm.id, shmdsp->shm_perm.uid);
+			printf (" %-10o ", shmdsp->shm_perm.mode & 0777);
+
+			if (unit == IPC_UNIT_HUMAN)
+				ipc_print_size(unit, NULL, shmdsp->shm_segsz, "    ", 6);
+			else
+				ipc_print_size(unit, NULL, shmdsp->shm_segsz, NULL, -10);
+
+			printf (" %-10ju %-6s %-6s\n",
+				shmdsp->shm_nattch,
+				shmdsp->shm_perm.mode & SHM_DEST ? _("dest") : " ",
+				shmdsp->shm_perm.mode & SHM_LOCKED ? _("locked") : " ");
+			break;
+		}
+	}
+
+	ipc_shm_free_info(shmds);
+	return;
+}
+
+static void do_sem (char format)
+{
+	struct passwd *pw;
+	struct sem_data *semds, *semdsp;
+
+	switch (format) {
+	case LIMITS:
+	{
+		struct ipc_limits lim;
+
+		if (ipc_sem_get_limits(&lim)) {
+			printf (_("unable to fetch semaphore limits\n"));
+			return;
+		}
+		printf (_("------ Semaphore Limits --------\n"));
+		printf (_("max number of arrays = %d\n"), lim.semmni);
+		printf (_("max semaphores per array = %d\n"), lim.semmsl);
+		printf (_("max semaphores system wide = %d\n"), lim.semmns);
+		printf (_("max ops per semop call = %d\n"), lim.semopm);
+		printf (_("semaphore max value = %u\n"), lim.semvmx);
+		return;
+	}
+	case STATUS:
+	{
+		struct seminfo seminfo;
+		union semun arg;
+		arg.array = (ushort *)  (void *) &seminfo;
+		if (semctl (0, 0, SEM_INFO, arg) < 0) {
+			printf (_("kernel not configured for semaphores\n"));
+			return;
+		}
+		printf (_("------ Semaphore Status --------\n"));
+		printf (_("used arrays = %d\n"), seminfo.semusz);
+		printf (_("allocated semaphores = %d\n"), seminfo.semaem);
+		return;
+	}
+
+	case CREATOR:
+		printf (_("------ Semaphore Arrays Creators/Owners --------\n"));
+		printf ("%-10s %-10s %-10s %-10s %-10s %-10s\n",
+			_("semid"),_("perms"),_("cuid"),_("cgid"),_("uid"),_("gid"));
+		break;
+
+	case TIME:
+		printf (_("------ Semaphore Operation/Change Times --------\n"));
+		printf ("%-8s %-10s %-26.24s %-26.24s\n",
+			_("semid"),_("owner"),_("last-op"),_("last-changed"));
+		break;
+
+	case PID:
+		break;
+
+	default:
+		printf (_("------ Semaphore Arrays --------\n"));
+		printf ("%-10s %-10s %-10s %-10s %-10s\n",
+			_("key"),_("semid"),_("owner"),_("perms"),_("nsems"));
+		break;
+	}
+
+	/*
+	 * Print data
+	 */
+	if (ipc_sem_get_info(-1, &semds) < 1)
+		return;
+
+	for (semdsp = semds; semdsp->next != NULL; semdsp = semdsp->next) {
+		if (format == CREATOR)  {
+			ipc_print_perms(stdout, &semdsp->sem_perm);
+			continue;
+		}
+		pw = getpwuid(semdsp->sem_perm.uid);
+		switch (format) {
+		case TIME:
+			if (pw)
+				printf ("%-8d %-10.10s", semdsp->sem_perm.id, pw->pw_name);
+			else
+				printf ("%-8d %-10u", semdsp->sem_perm.id, semdsp->sem_perm.uid);
+			printf ("  %-26.24s", semdsp->sem_otime
+				? xctime(&semdsp->sem_otime) : _("Not set"));
+			printf (" %-26.24s\n", semdsp->sem_ctime
+				? xctime( &semdsp->sem_ctime) : _("Not set"));
+			break;
+		case PID:
+			break;
+
+		default:
+			printf("0x%08x ", semdsp->sem_perm.key);
+			if (pw)
+				printf ("%-10d %-10.10s", semdsp->sem_perm.id, pw->pw_name);
+			else
+				printf ("%-10d %-10u", semdsp->sem_perm.id, semdsp->sem_perm.uid);
+			printf (" %-10o %-10ju\n",
+				semdsp->sem_perm.mode & 0777,
+				semdsp->sem_nsems);
+			break;
+		}
+	}
+
+	ipc_sem_free_info(semds);
+	return;
+}
+
+static void do_msg (char format, int unit)
+{
+	struct passwd *pw;
+	struct msg_data *msgds, *msgdsp;
+
+	switch (format) {
+	case LIMITS:
+	{
+		struct ipc_limits lim;
+
+		if (ipc_msg_get_limits(&lim)) {
+			printf (_("unable to fetch message limits\n"));
+			return;
+		}
+		printf (_("------ Messages Limits --------\n"));
+		printf (_("max queues system wide = %d\n"), lim.msgmni);
+		ipc_print_size(unit == IPC_UNIT_DEFAULT ? IPC_UNIT_BYTES : unit,
+			       _("max size of message"), lim.msgmax, "\n", 0);
+		ipc_print_size(unit == IPC_UNIT_DEFAULT ? IPC_UNIT_BYTES : unit,
+			       _("default max size of queue"), lim.msgmnb, "\n", 0);
+		return;
+	}
+	case STATUS:
+	{
+		struct msginfo msginfo;
+		if (msgctl (0, MSG_INFO, (struct msqid_ds *) (void *) &msginfo) < 0) {
+			printf (_("kernel not configured for message queues\n"));
+			return;
+		}
+		printf (_("------ Messages Status --------\n"));
+#ifndef __FreeBSD_kernel__
+		printf (_("allocated queues = %d\n"), msginfo.msgpool);
+		printf (_("used headers = %d\n"), msginfo.msgmap);
+#endif
+		ipc_print_size(unit, _("used space"), msginfo.msgtql,
+			       unit == IPC_UNIT_DEFAULT ? _(" bytes\n") : "\n", 0);
+		return;
+	}
+	case CREATOR:
+		printf (_("------ Message Queues Creators/Owners --------\n"));
+		printf ("%-10s %-10s %-10s %-10s %-10s %-10s\n",
+			_("msqid"),_("perms"),_("cuid"),_("cgid"),_("uid"),_("gid"));
+		break;
+
+	case TIME:
+		printf (_("------ Message Queues Send/Recv/Change Times --------\n"));
+		printf ("%-8s %-10s %-20s %-20s %-20s\n",
+			_("msqid"),_("owner"),_("send"),_("recv"),_("change"));
+		break;
+
+	case PID:
+		printf (_("------ Message Queues PIDs --------\n"));
+		printf ("%-10s %-10s %-10s %-10s\n",
+			_("msqid"),_("owner"),_("lspid"),_("lrpid"));
+		break;
+
+	default:
+		printf (_("------ Message Queues --------\n"));
+		printf ("%-10s %-10s %-10s %-10s %-12s %-12s\n",
+			_("key"), _("msqid"), _("owner"), _("perms"),
+			unit == IPC_UNIT_HUMAN ? _("size") : _("used-bytes"),
+			_("messages"));
+		break;
+	}
+
+	/*
+	 * Print data
+	 */
+	if (ipc_msg_get_info(-1, &msgds) < 1)
+		return;
+
+	for (msgdsp = msgds; msgdsp->next != NULL; msgdsp = msgdsp->next) {
+		if (format == CREATOR) {
+			ipc_print_perms(stdout, &msgdsp->msg_perm);
+			continue;
+		}
+		pw = getpwuid(msgdsp->msg_perm.uid);
+		switch (format) {
+		case TIME:
+			if (pw)
+				printf ("%-8d %-10.10s", msgdsp->msg_perm.id, pw->pw_name);
+			else
+				printf ("%-8d %-10u", msgdsp->msg_perm.id, msgdsp->msg_perm.uid);
+			printf (" %-20.16s", msgdsp->q_stime
+				? xctime(&msgdsp->q_stime) + 4 : _("Not set"));
+			printf (" %-20.16s", msgdsp->q_rtime
+				? xctime(&msgdsp->q_rtime) + 4 : _("Not set"));
+			printf (" %-20.16s\n", msgdsp->q_ctime
+				? xctime(&msgdsp->q_ctime) + 4 : _("Not set"));
+			break;
+		case PID:
+			if (pw)
+				printf ("%-8d %-10.10s", msgdsp->msg_perm.id, pw->pw_name);
+			else
+				printf ("%-8d %-10u", msgdsp->msg_perm.id, msgdsp->msg_perm.uid);
+			printf ("  %5d     %5d\n",
+				msgdsp->q_lspid, msgdsp->q_lrpid);
+			break;
+
+		default:
+			printf( "0x%08x ",msgdsp->msg_perm.key );
+			if (pw)
+				printf ("%-10d %-10.10s", msgdsp->msg_perm.id, pw->pw_name);
+			else
+				printf ("%-10d %-10u", msgdsp->msg_perm.id, msgdsp->msg_perm.uid);
+			printf (" %-10o ", msgdsp->msg_perm.mode & 0777);
+
+			if (unit == IPC_UNIT_HUMAN)
+				ipc_print_size(unit, NULL, msgdsp->q_cbytes, "      ", 6);
+			else
+				ipc_print_size(unit, NULL, msgdsp->q_cbytes, NULL, -12);
+
+			printf (" %-12ju\n", msgdsp->q_qnum);
+			break;
+		}
+	}
+
+	ipc_msg_free_info(msgds);
+	return;
+}
+
+static void print_shm(int shmid, int unit)
+{
+	struct shm_data *shmdata;
+
+	if (ipc_shm_get_info(shmid, &shmdata) < 1) {
+		warnx(_("id %d not found"), shmid);
+		return;
+	}
+
+	printf(_("\nShared memory Segment shmid=%d\n"), shmid);
+	printf(_("uid=%u\tgid=%u\tcuid=%u\tcgid=%u\n"),
+	       shmdata->shm_perm.uid, shmdata->shm_perm.gid,
+	       shmdata->shm_perm.cuid, shmdata->shm_perm.cgid);
+	printf(_("mode=%#o\taccess_perms=%#o\n"), shmdata->shm_perm.mode,
+	       shmdata->shm_perm.mode & 0777);
+	ipc_print_size(unit, unit == IPC_UNIT_HUMAN ? _("size=") : _("bytes="),
+		       shmdata->shm_segsz, "\t", 0);
+	printf(_("lpid=%u\tcpid=%u\tnattch=%jd\n"),
+	       shmdata->shm_lprid, shmdata->shm_cprid,
+	       shmdata->shm_nattch);
+	printf(_("att_time=%-26.24s\n"),
+	       shmdata->shm_atim ? xctime(&(shmdata->shm_atim)) : _("Not set"));
+	printf(_("det_time=%-26.24s\n"),
+	       shmdata->shm_dtim ? xctime(&shmdata->shm_dtim) : _("Not set"));
+	printf(_("change_time=%-26.24s\n"), xctime(&shmdata->shm_ctim));
+	printf("\n");
+
+	ipc_shm_free_info(shmdata);
+}
+
+static void print_msg(int msgid, int unit)
+{
+	struct msg_data *msgdata;
+
+	if (ipc_msg_get_info(msgid, &msgdata) < 1) {
+		warnx(_("id %d not found"), msgid);
+		return;
+	}
+
+	printf(_("\nMessage Queue msqid=%d\n"), msgid);
+	printf(_("uid=%u\tgid=%u\tcuid=%u\tcgid=%u\tmode=%#o\n"),
+	       msgdata->msg_perm.uid, msgdata->msg_perm.gid,
+	       msgdata->msg_perm.cuid, msgdata->msg_perm.cgid,
+	       msgdata->msg_perm.mode);
+	ipc_print_size(unit, unit == IPC_UNIT_HUMAN ? _("csize=") : _("cbytes="),
+		       msgdata->q_cbytes, "\t", 0);
+	ipc_print_size(unit, unit == IPC_UNIT_HUMAN ? _("qsize=") : _("qbytes="),
+		       msgdata->q_qbytes, "\t", 0);
+	printf("qnum=%jd\tlspid=%d\tlrpid=%d\n",
+	       msgdata->q_qnum,
+	       msgdata->q_lspid, msgdata->q_lrpid);
+	printf(_("send_time=%-26.24s\n"),
+	       msgdata->q_stime ? xctime(&msgdata->q_stime) : _("Not set"));
+	printf(_("rcv_time=%-26.24s\n"),
+	       msgdata->q_rtime ? xctime(&msgdata->q_rtime) : _("Not set"));
+	printf(_("change_time=%-26.24s\n"),
+	       msgdata->q_ctime ? xctime(&msgdata->q_ctime) : _("Not set"));
+	printf("\n");
+
+	ipc_msg_free_info(msgdata);
+}
+
+static void print_sem(int semid)
+{
+	struct sem_data *semdata;
+	size_t i;
+
+	if (ipc_sem_get_info(semid, &semdata) < 1) {
+		warnx(_("id %d not found"), semid);
+		return;
+	}
+
+	printf(_("\nSemaphore Array semid=%d\n"), semid);
+	printf(_("uid=%u\t gid=%u\t cuid=%u\t cgid=%u\n"),
+	       semdata->sem_perm.uid, semdata->sem_perm.gid,
+	       semdata->sem_perm.cuid, semdata->sem_perm.cgid);
+	printf(_("mode=%#o, access_perms=%#o\n"),
+	       semdata->sem_perm.mode, semdata->sem_perm.mode & 0777);
+	printf(_("nsems = %ju\n"), semdata->sem_nsems);
+	printf(_("otime = %-26.24s\n"),
+	       semdata->sem_otime ? xctime(&semdata->sem_otime) : _("Not set"));
+	printf(_("ctime = %-26.24s\n"), xctime(&semdata->sem_ctime));
+
+	printf("%-10s %-10s %-10s %-10s %-10s\n",
+	       _("semnum"), _("value"), _("ncount"), _("zcount"), _("pid"));
+
+	for (i = 0; i < semdata->sem_nsems; i++) {
+		struct sem_elem *e = &semdata->elements[i];
+		printf("%-10zu %-10d %-10d %-10d %-10d\n",
+		       i, e->semval, e->ncount, e->zcount, e->pid);
+	}
+	printf("\n");
+	ipc_sem_free_info(semdata);
+}
diff --git a/sys-utils/ipcutils.c b/sys-utils/ipcutils.c
new file mode 100644
index 0000000..5fe297f
--- /dev/null
+++ b/sys-utils/ipcutils.c
@@ -0,0 +1,533 @@
+#include <inttypes.h>
+
+#include "c.h"
+#include "nls.h"
+#include "xalloc.h"
+#include "path.h"
+#include "pathnames.h"
+#include "ipcutils.h"
+#include "strutils.h"
+
+#ifndef SEMVMX
+# define SEMVMX  32767	/* <= 32767 semaphore maximum value */
+#endif
+#ifndef SHMMIN
+# define SHMMIN 1	/* min shared segment size in bytes */
+#endif
+
+
+int ipc_msg_get_limits(struct ipc_limits *lim)
+{
+	if (access(_PATH_PROC_IPC_MSGMNI, F_OK) == 0 &&
+	    access(_PATH_PROC_IPC_MSGMNB, F_OK) == 0 &&
+	    access(_PATH_PROC_IPC_MSGMAX, F_OK) == 0) {
+
+		ul_path_read_s32(NULL, &lim->msgmni, _PATH_PROC_IPC_MSGMNI);
+		ul_path_read_s32(NULL, &lim->msgmnb, _PATH_PROC_IPC_MSGMNB);
+		ul_path_read_u64(NULL, &lim->msgmax, _PATH_PROC_IPC_MSGMAX);
+	} else {
+		struct msginfo msginfo;
+
+		if (msgctl(0, IPC_INFO, (struct msqid_ds *) &msginfo) < 0)
+			return 1;
+		lim->msgmni = msginfo.msgmni;
+		lim->msgmnb = msginfo.msgmnb;
+		lim->msgmax = msginfo.msgmax;
+	}
+
+	return 0;
+}
+
+int ipc_sem_get_limits(struct ipc_limits *lim)
+{
+	FILE *f;
+	int rc = 0;
+
+	lim->semvmx = SEMVMX;
+
+	f = fopen(_PATH_PROC_IPC_SEM, "r");
+	if (f) {
+		rc = fscanf(f, "%d\t%d\t%d\t%d",
+		       &lim->semmsl, &lim->semmns, &lim->semopm, &lim->semmni);
+		fclose(f);
+	}
+
+	if (rc != 4) {
+		struct seminfo seminfo = { .semmni = 0 };
+		union semun arg = { .array = (ushort *) &seminfo };
+
+		if (semctl(0, 0, IPC_INFO, arg) < 0)
+			return 1;
+		lim->semmni = seminfo.semmni;
+		lim->semmsl = seminfo.semmsl;
+		lim->semmns = seminfo.semmns;
+		lim->semopm = seminfo.semopm;
+	}
+
+	return 0;
+}
+
+int ipc_shm_get_limits(struct ipc_limits *lim)
+{
+	lim->shmmin = SHMMIN;
+
+	if (access(_PATH_PROC_IPC_SHMALL, F_OK) == 0 &&
+	    access(_PATH_PROC_IPC_SHMMAX, F_OK) == 0 &&
+	    access(_PATH_PROC_IPC_SHMMNI, F_OK) == 0) {
+
+		ul_path_read_u64(NULL, &lim->shmall, _PATH_PROC_IPC_SHMALL);
+		ul_path_read_u64(NULL, &lim->shmmax, _PATH_PROC_IPC_SHMMAX);
+		ul_path_read_u64(NULL, &lim->shmmni, _PATH_PROC_IPC_SHMMNI);
+
+	} else {
+		struct shminfo *shminfo;
+		struct shmid_ds shmbuf;
+
+		if (shmctl(0, IPC_INFO, &shmbuf) < 0)
+			return 1;
+		shminfo = (struct shminfo *) &shmbuf;
+		lim->shmmni = shminfo->shmmni;
+		lim->shmall = shminfo->shmall;
+		lim->shmmax = shminfo->shmmax;
+	}
+
+	return 0;
+}
+
+int ipc_shm_get_info(int id, struct shm_data **shmds)
+{
+	FILE *f;
+	int i = 0, maxid;
+	char buf[BUFSIZ];
+	struct shm_data *p;
+	struct shmid_ds dummy;
+
+	p = *shmds = xcalloc(1, sizeof(struct shm_data));
+	p->next = NULL;
+
+	f = fopen(_PATH_PROC_SYSV_SHM, "r");
+	if (!f)
+		goto shm_fallback;
+
+	while (fgetc(f) != '\n');		/* skip header */
+
+	while (fgets(buf, sizeof(buf), f) != NULL) {
+		/* scan for the first 14-16 columns (e.g. Linux 2.6.32 has 14) */
+		p->shm_rss = 0xdead;
+		p->shm_swp = 0xdead;
+		if (sscanf(buf,
+			  "%d %d  %o %"SCNu64 " %u %u  "
+			  "%"SCNu64 " %u %u %u %u %"SCNi64 " %"SCNi64 " %"SCNi64
+			  " %"SCNu64 " %"SCNu64 "\n",
+			   &p->shm_perm.key,
+			   &p->shm_perm.id,
+			   &p->shm_perm.mode,
+			   &p->shm_segsz,
+			   &p->shm_cprid,
+			   &p->shm_lprid,
+			   &p->shm_nattch,
+			   &p->shm_perm.uid,
+			   &p->shm_perm.gid,
+			   &p->shm_perm.cuid,
+			   &p->shm_perm.cgid,
+			   &p->shm_atim,
+			   &p->shm_dtim,
+			   &p->shm_ctim,
+			   &p->shm_rss,
+			   &p->shm_swp) < 14)
+			continue; /* invalid line, skipped */
+
+		if (id > -1) {
+			/* ID specified */
+			if (id == p->shm_perm.id) {
+				i = 1;
+				break;
+			} else
+				continue;
+		}
+
+		p->next = xcalloc(1, sizeof(struct shm_data));
+		p = p->next;
+		p->next = NULL;
+		i++;
+	}
+
+	if (i == 0)
+		free(*shmds);
+	fclose(f);
+	return i;
+
+	/* Fallback; /proc or /sys file(s) missing. */
+shm_fallback:
+	maxid = shmctl(0, SHM_INFO, &dummy);
+
+	for (int j = 0; j <= maxid; j++) {
+		int shmid;
+		struct shmid_ds shmseg;
+		struct ipc_perm *ipcp = &shmseg.shm_perm;
+
+		shmid = shmctl(j, SHM_STAT, &shmseg);
+		if (shmid < 0 || (id > -1 && shmid != id)) {
+			continue;
+		}
+
+		i++;
+		p->shm_perm.key = ipcp->KEY;
+		p->shm_perm.id = shmid;
+		p->shm_perm.mode = ipcp->mode;
+		p->shm_segsz = shmseg.shm_segsz;
+		p->shm_cprid = shmseg.shm_cpid;
+		p->shm_lprid = shmseg.shm_lpid;
+		p->shm_nattch = shmseg.shm_nattch;
+		p->shm_perm.uid = ipcp->uid;
+		p->shm_perm.gid = ipcp->gid;
+		p->shm_perm.cuid = ipcp->cuid;
+		p->shm_perm.cgid = ipcp->cuid;
+		p->shm_atim = shmseg.shm_atime;
+		p->shm_dtim = shmseg.shm_dtime;
+		p->shm_ctim = shmseg.shm_ctime;
+		p->shm_rss = 0xdead;
+		p->shm_swp = 0xdead;
+
+		if (id < 0) {
+			p->next = xcalloc(1, sizeof(struct shm_data));
+			p = p->next;
+			p->next = NULL;
+		} else
+			break;
+	}
+
+	if (i == 0)
+		free(*shmds);
+	return i;
+}
+
+void ipc_shm_free_info(struct shm_data *shmds)
+{
+	while (shmds) {
+		struct shm_data *next = shmds->next;
+		free(shmds);
+		shmds = next;
+	}
+}
+
+static void get_sem_elements(struct sem_data *p)
+{
+	size_t i;
+
+	if (!p || !p->sem_nsems || p->sem_perm.id < 0)
+		return;
+
+	p->elements = xcalloc(p->sem_nsems, sizeof(struct sem_elem));
+
+	for (i = 0; i < p->sem_nsems; i++) {
+		struct sem_elem *e = &p->elements[i];
+		union semun arg = { .val = 0 };
+
+		e->semval = semctl(p->sem_perm.id, i, GETVAL, arg);
+		if (e->semval < 0)
+			err(EXIT_FAILURE, _("%s failed"), "semctl(GETVAL)");
+
+		e->ncount = semctl(p->sem_perm.id, i, GETNCNT, arg);
+		if (e->ncount < 0)
+			err(EXIT_FAILURE, _("%s failed"), "semctl(GETNCNT)");
+
+		e->zcount = semctl(p->sem_perm.id, i, GETZCNT, arg);
+		if (e->zcount < 0)
+			err(EXIT_FAILURE, _("%s failed"), "semctl(GETZCNT)");
+
+		e->pid = semctl(p->sem_perm.id, i, GETPID, arg);
+		if (e->pid < 0)
+			err(EXIT_FAILURE, _("%s failed"), "semctl(GETPID)");
+	}
+}
+
+int ipc_sem_get_info(int id, struct sem_data **semds)
+{
+	FILE *f;
+	int i = 0, maxid;
+	struct sem_data *p;
+	struct seminfo dummy;
+	union semun arg;
+
+	p = *semds = xcalloc(1, sizeof(struct sem_data));
+	p->next = NULL;
+
+	f = fopen(_PATH_PROC_SYSV_SEM, "r");
+	if (!f)
+		goto sem_fallback;
+
+	while (fgetc(f) != '\n') ;	/* skip header */
+
+	while (feof(f) == 0) {
+		if (fscanf(f,
+			   "%d %d  %o %" SCNu64 " %u %u %u %u %"
+			    SCNi64 " %" SCNi64 "\n",
+			   &p->sem_perm.key,
+			   &p->sem_perm.id,
+			   &p->sem_perm.mode,
+			   &p->sem_nsems,
+			   &p->sem_perm.uid,
+			   &p->sem_perm.gid,
+			   &p->sem_perm.cuid,
+			   &p->sem_perm.cgid,
+			   &p->sem_otime,
+			   &p->sem_ctime) != 10)
+			continue;
+
+		if (id > -1) {
+			/* ID specified */
+			if (id == p->sem_perm.id) {
+				get_sem_elements(p);
+				i = 1;
+				break;
+			} else
+				continue;
+		}
+
+		p->next = xcalloc(1, sizeof(struct sem_data));
+		p = p->next;
+		p->next = NULL;
+		i++;
+	}
+
+	if (i == 0)
+		free(*semds);
+	fclose(f);
+	return i;
+
+	/* Fallback; /proc or /sys file(s) missing. */
+sem_fallback:
+	arg.array = (ushort *) (void *)&dummy;
+	maxid = semctl(0, 0, SEM_INFO, arg);
+
+	for (int j = 0; j <= maxid; j++) {
+		int semid;
+		struct semid_ds semseg;
+		struct ipc_perm *ipcp = &semseg.sem_perm;
+		arg.buf = (struct semid_ds *)&semseg;
+
+		semid = semctl(j, 0, SEM_STAT, arg);
+		if (semid < 0 || (id > -1 && semid != id)) {
+			continue;
+		}
+
+		i++;
+		p->sem_perm.key = ipcp->KEY;
+		p->sem_perm.id = semid;
+		p->sem_perm.mode = ipcp->mode;
+		p->sem_nsems = semseg.sem_nsems;
+		p->sem_perm.uid = ipcp->uid;
+		p->sem_perm.gid = ipcp->gid;
+		p->sem_perm.cuid = ipcp->cuid;
+		p->sem_perm.cgid = ipcp->cuid;
+		p->sem_otime = semseg.sem_otime;
+		p->sem_ctime = semseg.sem_ctime;
+
+		if (id < 0) {
+			p->next = xcalloc(1, sizeof(struct sem_data));
+			p = p->next;
+			p->next = NULL;
+			i++;
+		} else {
+			get_sem_elements(p);
+			break;
+		}
+	}
+
+	if (i == 0)
+		free(*semds);
+	return i;
+}
+
+void ipc_sem_free_info(struct sem_data *semds)
+{
+	while (semds) {
+		struct sem_data *next = semds->next;
+		free(semds->elements);
+		free(semds);
+		semds = next;
+	}
+}
+
+int ipc_msg_get_info(int id, struct msg_data **msgds)
+{
+	FILE *f;
+	int i = 0, maxid;
+	struct msg_data *p;
+	struct msqid_ds dummy;
+	struct msqid_ds msgseg;
+
+	p = *msgds = xcalloc(1, sizeof(struct msg_data));
+	p->next = NULL;
+
+	f = fopen(_PATH_PROC_SYSV_MSG, "r");
+	if (!f)
+		goto msg_fallback;
+
+	while (fgetc(f) != '\n') ;	/* skip header */
+
+	while (feof(f) == 0) {
+		if (fscanf(f,
+			   "%d %d  %o  %" SCNu64 " %" SCNu64
+			   " %u %u %u %u %u %u %" SCNi64 " %" SCNi64 " %" SCNi64 "\n",
+			   &p->msg_perm.key,
+			   &p->msg_perm.id,
+			   &p->msg_perm.mode,
+			   &p->q_cbytes,
+			   &p->q_qnum,
+			   &p->q_lspid,
+			   &p->q_lrpid,
+			   &p->msg_perm.uid,
+			   &p->msg_perm.gid,
+			   &p->msg_perm.cuid,
+			   &p->msg_perm.cgid,
+			   &p->q_stime,
+			   &p->q_rtime,
+			   &p->q_ctime) != 14)
+			continue;
+
+		if (id > -1) {
+			/* ID specified */
+			if (id == p->msg_perm.id) {
+				if (msgctl(id, IPC_STAT, &msgseg) != -1)
+					p->q_qbytes = msgseg.msg_qbytes;
+				i = 1;
+				break;
+			} else
+				continue;
+		}
+
+		p->next = xcalloc(1, sizeof(struct msg_data));
+		p = p->next;
+		p->next = NULL;
+		i++;
+	}
+
+	if (i == 0)
+		free(*msgds);
+	fclose(f);
+	return i;
+
+	/* Fallback; /proc or /sys file(s) missing. */
+msg_fallback:
+	maxid = msgctl(0, MSG_INFO, &dummy);
+
+	for (int j = 0; j <= maxid; j++) {
+		int msgid;
+		struct ipc_perm *ipcp = &msgseg.msg_perm;
+
+		msgid = msgctl(j, MSG_STAT, &msgseg);
+		if (msgid < 0 || (id > -1 && msgid != id)) {
+			continue;
+		}
+
+		i++;
+		p->msg_perm.key = ipcp->KEY;
+		p->msg_perm.id = msgid;
+		p->msg_perm.mode = ipcp->mode;
+		p->q_cbytes = msgseg.msg_cbytes;
+		p->q_qnum = msgseg.msg_qnum;
+		p->q_lspid = msgseg.msg_lspid;
+		p->q_lrpid = msgseg.msg_lrpid;
+		p->msg_perm.uid = ipcp->uid;
+		p->msg_perm.gid = ipcp->gid;
+		p->msg_perm.cuid = ipcp->cuid;
+		p->msg_perm.cgid = ipcp->cgid;
+		p->q_stime = msgseg.msg_stime;
+		p->q_rtime = msgseg.msg_rtime;
+		p->q_ctime = msgseg.msg_ctime;
+		p->q_qbytes = msgseg.msg_qbytes;
+
+		if (id < 0) {
+			p->next = xcalloc(1, sizeof(struct msg_data));
+			p = p->next;
+			p->next = NULL;
+		} else
+			break;
+	}
+
+	if (i == 0)
+		free(*msgds);
+	return i;
+}
+
+void ipc_msg_free_info(struct msg_data *msgds)
+{
+	while (msgds) {
+		struct msg_data *next = msgds->next;
+		free(msgds);
+		msgds = next;
+	}
+}
+
+void ipc_print_perms(FILE *f, struct ipc_stat *is)
+{
+	struct passwd *pw;
+	struct group *gr;
+
+	fprintf(f, "%-10d %-10o", is->id, is->mode & 0777);
+
+	if ((pw = getpwuid(is->cuid)))
+		fprintf(f, " %-10s", pw->pw_name);
+	else
+		fprintf(f, " %-10u", is->cuid);
+
+	if ((gr = getgrgid(is->cgid)))
+		fprintf(f, " %-10s", gr->gr_name);
+	else
+		fprintf(f, " %-10u", is->cgid);
+
+	if ((pw = getpwuid(is->uid)))
+		fprintf(f, " %-10s", pw->pw_name);
+	else
+		fprintf(f, " %-10u", is->uid);
+
+	if ((gr = getgrgid(is->gid)))
+		fprintf(f, " %-10s\n", gr->gr_name);
+	else
+		fprintf(f, " %-10u\n", is->gid);
+}
+
+void ipc_print_size(int unit, char *msg, uint64_t size, const char *end,
+		    int width)
+{
+	char format[32];
+
+	if (!msg)
+		/* NULL */ ;
+	else if (msg[strlen(msg) - 1] == '=')
+		printf("%s", msg);
+	else if (unit == IPC_UNIT_BYTES)
+		printf(_("%s (bytes) = "), msg);
+	else if (unit == IPC_UNIT_KB)
+		printf(_("%s (kbytes) = "), msg);
+	else
+		printf("%s = ", msg);
+
+	switch (unit) {
+	case IPC_UNIT_DEFAULT:
+	case IPC_UNIT_BYTES:
+		sprintf(format, "%%%dju", width);
+		printf(format, size);
+		break;
+	case IPC_UNIT_KB:
+		sprintf(format, "%%%dju", width);
+		printf(format, size / 1024);
+		break;
+	case IPC_UNIT_HUMAN:
+	{
+		char *tmp;
+		sprintf(format, "%%%ds", width);
+		printf(format, (tmp = size_to_human_string(SIZE_SUFFIX_1LETTER, size)));
+		free(tmp);
+		break;
+	}
+	default:
+		/* impossible occurred */
+		abort();
+	}
+
+	if (end)
+		printf("%s", end);
+}
diff --git a/sys-utils/ipcutils.h b/sys-utils/ipcutils.h
new file mode 100644
index 0000000..db85f57
--- /dev/null
+++ b/sys-utils/ipcutils.h
@@ -0,0 +1,187 @@
+#ifndef UTIL_LINUX_IPCUTILS_H
+#define UTIL_LINUX_IPCUTILS_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ipc.h>
+#include <sys/msg.h>
+#include <sys/sem.h>
+#include <sys/shm.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+#include <grp.h>
+#include <pwd.h>
+#include <stdint.h>
+
+/*
+ * SHM_DEST and SHM_LOCKED are defined in kernel headers, but inside
+ * #ifdef __KERNEL__ ... #endif
+ */
+#ifndef SHM_DEST
+  /* shm_mode upper byte flags */
+# define SHM_DEST	01000	/* segment will be destroyed on last detach */
+# define SHM_LOCKED	02000	/* segment will not be swapped */
+#endif
+
+/* For older kernels the same holds for the defines below */
+#ifndef MSG_STAT
+# define MSG_STAT	11
+# define MSG_INFO	12
+#endif
+
+#ifndef SHM_STAT
+# define SHM_STAT	13
+# define SHM_INFO	14
+struct shm_info {
+	int used_ids;
+	unsigned long shm_tot;		/* total allocated shm */
+	unsigned long shm_rss;		/* total resident shm */
+	unsigned long shm_swp;		/* total swapped shm */
+	unsigned long swap_attempts;
+	unsigned long swap_successes;
+};
+#endif
+
+#ifndef SEM_STAT
+# define SEM_STAT	18
+# define SEM_INFO	19
+#endif
+
+/* Some versions of libc only define IPC_INFO when __USE_GNU is defined. */
+#ifndef IPC_INFO
+# define IPC_INFO	3
+#endif
+
+/*
+ *  * The last arg of semctl is a union semun, but where is it defined? X/OPEN
+ *   * tells us to define it ourselves, but until recently Linux include files
+ *    * would also define it.
+ *     */
+#ifndef HAVE_UNION_SEMUN
+/* according to X/OPEN we have to define it ourselves */
+union semun {
+	int val;
+	struct semid_ds *buf;
+	unsigned short int *array;
+	struct seminfo *__buf;
+};
+#endif
+
+/*
+ * X/OPEN (Jan 1987) does not define fields key, seq in struct ipc_perm;
+ *	glibc-1.09 has no support for sysv ipc.
+ *	glibc 2 uses __key, __seq
+ */
+#if defined (__GLIBC__) && __GLIBC__ >= 2
+# define KEY __key
+#else
+# define KEY key
+#endif
+
+/* Size printing in ipcs is using these. */
+enum {
+	IPC_UNIT_DEFAULT,
+	IPC_UNIT_BYTES,
+	IPC_UNIT_KB,
+	IPC_UNIT_HUMAN
+};
+
+struct ipc_limits {
+	uint64_t	shmmni;		/* max number of segments */
+	uint64_t	shmmax;		/* max segment size */
+	uint64_t	shmall;		/* max total shared memory */
+	uint64_t	shmmin;		/* min segment size */
+
+	int		semmni;		/* max number of arrays */
+	int		semmsl;		/* max semaphores per array */
+	int		semmns;		/* max semaphores system wide */
+	int		semopm;		/* max ops per semop call */
+	unsigned int	semvmx;		/* semaphore max value (constant) */
+
+	int		msgmni;		/* max queues system wide */
+	uint64_t	msgmax;		/* max size of message */
+	int		msgmnb;		/* default max size of queue */
+};
+
+extern int ipc_msg_get_limits(struct ipc_limits *lim);
+extern int ipc_sem_get_limits(struct ipc_limits *lim);
+extern int ipc_shm_get_limits(struct ipc_limits *lim);
+
+struct ipc_stat {
+	int		id;
+	key_t		key;
+	uid_t		uid;    /* current uid */
+	gid_t		gid;    /* current gid */
+	uid_t		cuid;    /* creator uid */
+	gid_t		cgid;    /* creator gid */
+	unsigned int	mode;
+};
+
+extern void ipc_print_perms(FILE *f, struct ipc_stat *is);
+extern void ipc_print_size(int unit, char *msg, uint64_t size, const char *end, int width);
+
+/* See 'struct shmid_kernel' in kernel sources
+ */
+struct shm_data {
+	struct ipc_stat	shm_perm;
+
+	uint64_t	shm_nattch;
+	uint64_t	shm_segsz;
+	int64_t		shm_atim;	/* __kernel_time_t is signed long */
+	int64_t		shm_dtim;
+	int64_t		shm_ctim;
+	pid_t		shm_cprid;
+	pid_t		shm_lprid;
+	uint64_t	shm_rss;
+	uint64_t	shm_swp;
+
+	struct shm_data  *next;
+};
+
+extern int ipc_shm_get_info(int id, struct shm_data **shmds);
+extern void ipc_shm_free_info(struct shm_data *shmds);
+
+/* See 'struct sem_array' in kernel sources
+ */
+struct sem_elem {
+	int	semval;
+	int	ncount;		/* processes waiting on increase semval */
+	int	zcount;		/* processes waiting on semval set to zero */
+	pid_t	pid;		/* process last executed semop(2) call */
+};
+struct sem_data {
+	struct ipc_stat sem_perm;
+
+	int64_t		sem_ctime;
+	int64_t		sem_otime;
+	uint64_t	sem_nsems;
+
+	struct sem_elem	*elements;
+	struct sem_data *next;
+};
+
+extern int ipc_sem_get_info(int id, struct sem_data **semds);
+extern void ipc_sem_free_info(struct sem_data *semds);
+
+/* See 'struct msg_queue' in kernel sources
+ */
+struct msg_data {
+	struct ipc_stat msg_perm;
+
+	int64_t		q_stime;
+	int64_t		q_rtime;
+	int64_t		q_ctime;
+	uint64_t	q_cbytes;
+	uint64_t	q_qnum;
+	uint64_t	q_qbytes;
+	pid_t		q_lspid;
+	pid_t		q_lrpid;
+
+	struct msg_data *next;
+};
+
+extern int ipc_msg_get_info(int id, struct msg_data **msgds);
+extern void ipc_msg_free_info(struct msg_data *msgds);
+
+#endif /* UTIL_LINUX_IPCUTILS_H */
diff --git a/sys-utils/ldattach.8 b/sys-utils/ldattach.8
new file mode 100644
index 0000000..1b4683d
--- /dev/null
+++ b/sys-utils/ldattach.8
@@ -0,0 +1,155 @@
+.\" Copyright 2008 Tilman Schmidt (tilman@imap.cc)
+.\" May be distributed under the GNU General Public License version 2 or later
+.TH LDATTACH 8 "July 2014" "util-linux" "System Administration"
+.SH NAME
+ldattach \- attach a line discipline to a serial line
+.SH SYNOPSIS
+.B ldattach
+.RB [ \-1278denoVh ]
+.RB [ \-i
+.IR iflag ]
+.RB [ \-s
+.IR speed ]
+.I ldisc device
+.SH DESCRIPTION
+The
+.B ldattach
+daemon opens the specified
+.I device
+file
+(which should refer to a serial device)
+and attaches the line discipline
+.I ldisc
+to it for processing of the sent and/or received data.
+It then goes into the background keeping the device open so that the
+line discipline stays loaded.
+.sp
+The line discipline
+.I ldisc
+may be specified either by name
+or by number.
+.sp
+In order to detach the line discipline,
+.BR kill (1)
+the
+.B ldattach
+process.
+.sp
+With no arguments,
+.B ldattach
+prints usage information.
+.SH LINE DISCIPLINES
+Depending on the kernel release, the following line disciplines are supported:
+.TP
+.BR TTY ( 0 )
+The default line discipline,
+providing transparent operation (raw mode)
+as well as the habitual terminal line editing capabilities (cooked mode).
+.TP
+.BR SLIP ( 1 )
+Serial Line IP (SLIP) protocol processor
+for transmitting TCP/IP packets over serial lines.
+.TP
+.BR MOUSE ( 2 )
+Device driver for RS232 connected pointing devices (serial mice).
+.TP
+.BR PPP ( 3 )
+Point to Point Protocol (PPP) processor
+for transmitting network packets over serial lines.
+.TP
+.BR STRIP ( 4 )
+.TP
+.BR AX25 ( 5 )
+.TP
+.BR X25 ( 6 )
+Line driver for transmitting X.25 packets over asynchronous serial lines.
+.TP
+.BR 6PACK ( 7 )
+.TP
+.BR R3964 ( 9 )
+Driver for Simatic R3964 module.
+.TP
+.BR IRDA ( 11 )
+Linux IrDa (infrared data transmission) driver -
+see http://irda.sourceforge.net/
+.TP
+.BR HDLC ( 13 )
+Synchronous HDLC driver.
+.TP
+.BR SYNC_PPP ( 14 )
+Synchronous PPP driver.
+.TP
+.BR HCI ( 15 )
+Bluetooth HCI UART driver.
+.TP
+.BR GIGASET_M101 ( 16 )
+Driver for Siemens Gigaset M101 serial DECT adapter.
+.TP
+.BR PPS ( 18 )
+Driver for serial line Pulse Per Second (PPS) source.
+.TP
+.BR GSM0710 ( 21 )
+Driver for GSM 07.10 multiplexing protocol modem (CMUX).
+.SH OPTIONS
+.TP
+.BR \-1 , " \-\-onestopbit"
+Set the number of stop bits of the serial line to one.
+.TP
+.BR \-2 , " \-\-twostopbits"
+Set the number of stop bits of the serial line to two.
+.TP
+.BR \-7 , " \-\-sevenbits"
+Set the character size of the serial line to 7 bits.
+.TP
+.BR \-8 , " \-\-eightbits"
+Set the character size of the serial line to 8 bits.
+.TP
+.BR \-d , " \-\-debug"
+Keep
+.B ldattach
+in the foreground so that it can be interrupted or debugged,
+and to print verbose messages about its progress to standard error output.
+.TP
+.BR \-e , " \-\-evenparity"
+Set the parity of the serial line to even.
+.TP
+.BR -i , " --iflag " [ \- ] \fIvalue\fR...
+Set the specified bits in the c_iflag word of the serial line.
+The given \fIvalue\fP may be a number or a symbolic name.
+If \fIvalue\fP is prefixed by a minus sign, the specified bits are cleared
+instead.  Several comma-separated values may be given in order to
+set and clear multiple bits.
+.TP
+.BR \-n , " \-\-noparity"
+Set the parity of the serial line to none.
+.TP
+.BR \-o , " \-\-oddparity"
+Set the parity of the serial line to odd.
+.TP
+.BR  \-s , " \-\-speed " \fIvalue
+Set the speed (the baud rate) of the serial line to the specified \fIvalue\fR.
+.TP
+.BR \-c , " \-\-intro\-command " \fIstring
+Define an intro command that is sent through the serial line before the invocation
+of ldattach. E.g. in conjunction with line discipline GSM0710, the command
+\'AT+CMUX=0\\r\' is commonly suitable to switch the modem into the CMUX mode.
+.TP
+.BR \-p , " \-\-pause "  \fIvalue
+Sleep for \fIvalue\fR seconds before the invocation of ldattach. Default is one second.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH "SEE ALSO"
+.BR inputattach (1),
+.BR ttys (4)
+.SH AUTHOR
+.nf
+Tilman Schmidt (tilman@imap.cc)
+.fi
+.SH AVAILABILITY
+The ldattach command is part of the util-linux package
+and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/ldattach.c b/sys-utils/ldattach.c
new file mode 100644
index 0000000..d33d685
--- /dev/null
+++ b/sys-utils/ldattach.c
@@ -0,0 +1,489 @@
+/* line discipline loading daemon
+ * open a serial device and attach a line discipline on it
+ *
+ * Usage:
+ *	ldattach GIGASET_M101 /dev/ttyS0
+ *
+ * =====================================================================
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ * =====================================================================
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <termios.h>
+#include <unistd.h>
+
+#include "c.h"
+#include "all-io.h"
+#include "nls.h"
+#include "strutils.h"
+#include "closestream.h"
+
+#include <signal.h>
+#include <sys/socket.h>
+#include <linux/if.h>
+
+#include <linux/tty.h>		/* for N_GSM0710 */
+
+#ifdef LINUX_GSMMUX_H
+# include <linux/gsmmux.h>	/* Add by guowenxue */
+#else
+struct gsm_config
+{
+	unsigned int adaption;
+	unsigned int encapsulation;
+	unsigned int initiator;
+	unsigned int t1;
+	unsigned int t2;
+	unsigned int t3;
+	unsigned int n2;
+	unsigned int mru;
+	unsigned int mtu;
+	unsigned int k;
+	unsigned int i;
+	unsigned int unused[8];		/* Padding for expansion without
+					   breaking stuff */
+};
+# define GSMIOC_GETCONF		_IOR('G', 0, struct gsm_config)
+# define GSMIOC_SETCONF		_IOW('G', 1, struct gsm_config)
+#endif
+
+#ifndef N_GIGASET_M101
+# define N_GIGASET_M101 16
+#endif
+
+#ifndef N_PPS
+# define N_PPS 18
+#endif
+
+#ifndef N_GSM0710
+# define N_GSM0710 21
+#endif
+
+#define MAXINTROPARMLEN 32
+
+/* attach a line discipline ioctl */
+#ifndef TIOCSETD
+# define TIOCSETD   0x5423
+#endif
+
+static int debug = 0;
+
+struct ld_table {
+	const char *name;
+	int value;
+};
+
+/* currently supported line disciplines, plus some aliases */
+static const struct ld_table ld_discs[] = {
+	{ "TTY",		N_TTY },
+	{ "SLIP",		N_SLIP },
+	{ "MOUSE",		N_MOUSE },
+	{ "PPP",		N_PPP },
+	{ "STRIP",		N_STRIP },
+	{ "AX25",		N_AX25 },
+	{ "X25",		N_X25 },
+	{ "6PACK",		N_6PACK },
+	{ "R3964",		N_R3964 },
+	{ "IRDA",		N_IRDA },
+	{ "HDLC",		N_HDLC },
+	{ "SYNC_PPP",		N_SYNC_PPP },
+	{ "SYNCPPP",		N_SYNC_PPP },
+	{ "HCI",		N_HCI },
+	{ "GIGASET_M101",	N_GIGASET_M101 },
+	{ "M101",		N_GIGASET_M101 },
+	{ "GIGASET",		N_GIGASET_M101 },
+	{ "PPS",		N_PPS },
+	{ "GSM0710",		N_GSM0710},
+	{ NULL,	0 }
+};
+
+/* known c_iflag names */
+static const struct ld_table ld_iflags[] =
+{
+	{ "IGNBRK",	IGNBRK },
+	{ "BRKINT",	BRKINT },
+	{ "IGNPAR",	IGNPAR },
+	{ "PARMRK",	PARMRK },
+	{ "INPCK",	INPCK },
+	{ "ISTRIP",	ISTRIP },
+	{ "INLCR",	INLCR },
+	{ "IGNCR",	IGNCR },
+	{ "ICRNL",	ICRNL },
+	{ "IUCLC",	IUCLC },
+	{ "IXON",	IXON },
+	{ "IXANY",	IXANY },
+	{ "IXOFF",	IXOFF },
+	{ "IMAXBEL",	IMAXBEL },
+	{ "IUTF8",	IUTF8 },
+	{ NULL,		0 }
+};
+
+static void dbg(char *fmt, ...)
+{
+	va_list args;
+
+	if (debug == 0)
+		return;
+	fflush(NULL);
+	va_start(args, fmt);
+#ifdef HAVE_VWARNX
+	vwarnx(fmt, args);
+#else
+	fprintf(stderr, "%s: ", program_invocation_short_name);
+	vfprintf(stderr, fmt, args);
+	fprintf(stderr, "\n");
+#endif
+	va_end(args);
+	fflush(NULL);
+	return;
+}
+
+static int lookup_table(const struct ld_table *tab, const char *str)
+{
+	const struct ld_table *t;
+
+	for (t = tab; t && t->name; t++)
+		if (!strcasecmp(t->name, str))
+			return t->value;
+	return -1;
+}
+
+static void print_table(FILE * out, const struct ld_table *tab)
+{
+	const struct ld_table *t;
+	int i;
+
+	for (t = tab, i = 1; t && t->name; t++, i++) {
+		fprintf(out, "  %-12s", t->name);
+		if (!(i % 5))
+			fputc('\n', out);
+	}
+}
+
+static int parse_iflag(char *str, int *set_iflag, int *clr_iflag)
+{
+	int iflag;
+	char *s;
+
+	for (s = strtok(str, ","); s != NULL; s = strtok(NULL, ",")) {
+		if (*s == '-')
+			s++;
+		if ((iflag = lookup_table(ld_iflags, s)) < 0)
+			iflag = strtos32_or_err(s, _("invalid iflag"));
+		if (s > str && *(s - 1) == '-')
+			*clr_iflag |= iflag;
+		else
+			*set_iflag |= iflag;
+	}
+	dbg("iflag (set/clear): %d/%d", *set_iflag, *clr_iflag);
+	return 0;
+}
+
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+
+	fputs(USAGE_HEADER, out);
+	fprintf(out, _(" %s [options] <ldisc> <device>\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Attach a line discipline to a serial line.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -d, --debug             print verbose messages to stderr\n"), out);
+	fputs(_(" -s, --speed <value>     set serial line speed\n"), out);
+	fputs(_(" -c, --intro-command <string> intro sent before ldattach\n"), out);
+	fputs(_(" -p, --pause <seconds>   pause between intro and ldattach\n"), out);
+	fputs(_(" -7, --sevenbits         set character size to 7 bits\n"), out);
+	fputs(_(" -8, --eightbits         set character size to 8 bits\n"), out);
+	fputs(_(" -n, --noparity          set parity to none\n"), out);
+	fputs(_(" -e, --evenparity        set parity to even\n"), out);
+	fputs(_(" -o, --oddparity         set parity to odd\n"), out);
+	fputs(_(" -1, --onestopbit        set stop bits to one\n"), out);
+	fputs(_(" -2, --twostopbits       set stop bits to two\n"), out);
+	fputs(_(" -i, --iflag [-]<iflag>  set input mode flag\n"), out);
+
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(25));
+
+	fputs(_("\nKnown <ldisc> names:\n"), out);
+	print_table(out, ld_discs);
+	fputs(USAGE_SEPARATOR, out);
+
+	fputs(_("\nKnown <iflag> names:\n"), out);
+	print_table(out, ld_iflags);
+
+	printf(USAGE_MAN_TAIL("ldattach(8)"));
+	exit(EXIT_SUCCESS);
+}
+
+static int my_cfsetspeed(struct termios *ts, int speed)
+{
+	/* Standard speeds
+	 * -- cfsetspeed() is able to translate number to Bxxx constants
+	 */
+	if (cfsetspeed(ts, speed) == 0)
+		return 0;
+
+	/* Nonstandard speeds
+	 * -- we have to bypass glibc and set the speed manually (because glibc
+	 *    checks for speed and supports Bxxx bit rates only)...
+	 */
+#ifdef _HAVE_STRUCT_TERMIOS_C_ISPEED
+# define BOTHER 0010000		/* non standard rate */
+	dbg("using non-standard speeds");
+	ts->c_ospeed = ts->c_ispeed = speed;
+	ts->c_cflag &= ~CBAUD;
+	ts->c_cflag |= BOTHER;
+	return 0;
+#else
+	return -1;
+#endif
+}
+
+static void handler(int s)
+{
+	dbg("got SIG %i -> exiting", s);
+	exit(EXIT_SUCCESS);
+}
+
+static void gsm0710_set_conf(int tty_fd)
+{
+	struct gsm_config c;
+
+	/* Add by guowenxue */
+	/*  get n_gsm configuration */
+	ioctl(tty_fd, GSMIOC_GETCONF, &c);
+	/*  we are initiator and need encoding 0 (basic) */
+	c.initiator = 1;
+	c.encapsulation = 0;
+	/*  our modem defaults to a maximum size of 127 bytes */
+	c.mru = 127;
+	c.mtu = 127;
+	/*  set the new configuration */
+	ioctl(tty_fd, GSMIOC_SETCONF, &c);
+	/* Add by guowenxue end*/
+}
+
+int main(int argc, char **argv)
+{
+	int tty_fd;
+	struct termios ts;
+	int speed = 0, bits = '-', parity = '-', stop = '-';
+	int set_iflag = 0, clr_iflag = 0;
+	int ldisc;
+	int optc;
+	char *dev;
+	int intropause = 1;
+	char *introparm = NULL;
+
+	static const struct option opttbl[] = {
+		{"speed", required_argument, NULL, 's'},
+		{"sevenbits", no_argument, NULL, '7'},
+		{"eightbits", no_argument, NULL, '8'},
+		{"noparity", no_argument, NULL, 'n'},
+		{"evenparity", no_argument, NULL, 'e'},
+		{"oddparity", no_argument, NULL, 'o'},
+		{"onestopbit", no_argument, NULL, '1'},
+		{"twostopbits", no_argument, NULL, '2'},
+		{"iflag", required_argument, NULL, 'i'},
+		{"help", no_argument, NULL, 'h'},
+		{"version", no_argument, NULL, 'V'},
+		{"debug", no_argument, NULL, 'd'},
+	        {"intro-command", no_argument, NULL, 'c'},
+	        {"pause", no_argument, NULL, 'p'},
+		{NULL, 0, NULL, 0}
+	};
+
+	signal(SIGKILL, handler);
+	signal(SIGINT, handler);
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	/* parse options */
+	if (argc == 0)
+		errx(EXIT_FAILURE, _("bad usage"));
+
+	while ((optc =
+		getopt_long(argc, argv, "dhV78neo12s:i:c:p:", opttbl,
+			    NULL)) >= 0) {
+		switch (optc) {
+		case 'd':
+			debug = 1;
+			break;
+		case '1':
+		case '2':
+			stop = optc;
+			break;
+		case '7':
+		case '8':
+			bits = optc;
+			break;
+		case 'n':
+		case 'e':
+		case 'o':
+			parity = optc;
+			break;
+		case 's':
+			speed = strtos32_or_err(optarg, _("invalid speed argument"));
+			break;
+		case 'p':
+			intropause = strtou32_or_err(optarg, _("invalid pause argument"));
+			if (intropause > 10)
+				errx(EXIT_FAILURE, "invalid pause: %s", optarg);
+			break;
+		case 'c':
+			introparm = optarg;
+			break;
+		case 'i':
+			parse_iflag(optarg, &set_iflag, &clr_iflag);
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		case 'h':
+			usage();
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if (argc - optind != 2) {
+		warnx(_("not enough arguments"));
+		errtryhelp(EXIT_FAILURE);
+	}
+	/* parse line discipline specification */
+	ldisc = lookup_table(ld_discs, argv[optind]);
+	if (ldisc < 0)
+		ldisc = strtos32_or_err(argv[optind], _("invalid line discipline argument"));
+
+	/* ldisc specific option settings */
+	if (ldisc == N_GIGASET_M101) {
+		/* device specific defaults for line speed and data format */
+		if (speed == 0)
+			speed = 115200;
+		if (bits == '-')
+			bits = '8';
+		if (parity == '-')
+			parity = 'n';
+		if (stop == '-')
+			stop = '1';
+	}
+
+	/* open device */
+	dev = argv[optind + 1];
+	if ((tty_fd = open(dev, O_RDWR | O_NOCTTY)) < 0)
+		err(EXIT_FAILURE, _("cannot open %s"), dev);
+	if (!isatty(tty_fd))
+		errx(EXIT_FAILURE, _("%s is not a serial line"), dev);
+
+	dbg("opened %s", dev);
+
+	/* set line speed and format */
+	if (tcgetattr(tty_fd, &ts) < 0)
+		err(EXIT_FAILURE,
+		    _("cannot get terminal attributes for %s"), dev);
+	cfmakeraw(&ts);
+	if (speed && my_cfsetspeed(&ts, speed) < 0)
+		errx(EXIT_FAILURE, _("speed %d unsupported"), speed);
+
+	switch (stop) {
+	case '1':
+		ts.c_cflag &= ~CSTOPB;
+		break;
+	case '2':
+		ts.c_cflag |= CSTOPB;
+		break;
+	case '-':
+		break;
+	default:
+		abort();
+	}
+	switch (bits) {
+	case '7':
+		ts.c_cflag = (ts.c_cflag & ~CSIZE) | CS7;
+		break;
+	case '8':
+		ts.c_cflag = (ts.c_cflag & ~CSIZE) | CS8;
+		break;
+	case '-':
+		break;
+	default:
+		abort();
+	}
+	switch (parity) {
+	case 'n':
+		ts.c_cflag &= ~(PARENB | PARODD);
+		break;
+	case 'e':
+		ts.c_cflag |= PARENB;
+		ts.c_cflag &= ~PARODD;
+		break;
+	case 'o':
+		ts.c_cflag |= (PARENB | PARODD);
+		break;
+	case '-':
+		break;
+	default:
+		abort();
+	}
+
+	ts.c_cflag |= CREAD;	/* just to be on the safe side */
+	ts.c_iflag |= set_iflag;
+	ts.c_iflag &= ~clr_iflag;
+
+	if (tcsetattr(tty_fd, TCSAFLUSH, &ts) < 0)
+		err(EXIT_FAILURE,
+		    _("cannot set terminal attributes for %s"), dev);
+
+	dbg("set to raw %d %c%c%c: cflag=0x%x",
+	    speed, bits, parity, stop, ts.c_cflag);
+
+	if (introparm && *introparm)
+	{
+		dbg("intro command is '%s'", introparm);
+		if (write_all(tty_fd, introparm, strlen(introparm)) != 0)
+			err(EXIT_FAILURE,
+			    _("cannot write intro command to %s"), dev);
+
+		if (intropause) {
+			dbg("waiting for %d seconds", intropause);
+			sleep(intropause);
+		}
+	}
+
+	/* Attach the line discipline. */
+	if (ioctl(tty_fd, TIOCSETD, &ldisc) < 0)
+		err(EXIT_FAILURE, _("cannot set line discipline"));
+
+	dbg("line discipline set to %d", ldisc);
+
+	/* ldisc specific post-attach actions */
+	if (ldisc == N_GSM0710)
+		gsm0710_set_conf(tty_fd);
+
+	/* Go into background if not in debug mode. */
+	if (!debug && daemon(0, 0) < 0)
+		err(EXIT_FAILURE, _("cannot daemonize"));
+
+	/* Sleep to keep the line discipline active. */
+	pause();
+
+	exit(EXIT_SUCCESS);
+}
diff --git a/sys-utils/losetup.8 b/sys-utils/losetup.8
new file mode 100644
index 0000000..c31d747
--- /dev/null
+++ b/sys-utils/losetup.8
@@ -0,0 +1,208 @@
+.TH LOSETUP 8 "November 2015" "util-linux" "System Administration"
+.SH NAME
+losetup \- set up and control loop devices
+.SH SYNOPSIS
+.ad l
+Get info:
+.sp
+.in +5
+.B losetup
+[\fIloopdev\fP]
+.sp
+.B losetup -l
+.RB [ \-a ]
+.sp
+.B losetup -j
+.I file
+.RB [ \-o
+.IR offset ]
+.sp
+.in -5
+Detach a loop device:
+.sp
+.in +5
+.B "losetup \-d"
+.IR loopdev ...
+.sp
+.in -5
+Detach all associated loop devices:
+.sp
+.in +5
+.B "losetup \-D"
+.sp
+.in -5
+Set up a loop device:
+.sp
+.in +5
+.B losetup
+.RB [ \-o
+.IR offset ]
+.RB [ \-\-sizelimit
+.IR size ]
+.RB [ \-\-sector\-size
+.IR size ]
+.in +8
+.RB [ \-Pr ]
+.RB [ \-\-show ]  " \-f" | \fIloopdev\fP
+.I file
+.sp
+.in -13
+Resize a loop device:
+.sp
+.in +5
+.B "losetup \-c"
+.I loopdev
+.in -5
+.ad b
+.SH DESCRIPTION
+.B losetup
+is used to associate loop devices with regular files or block devices,
+to detach loop devices, and to query the status of a loop device.  If only the
+\fIloopdev\fP argument is given, the status of the corresponding loop
+device is shown.  If no option is given, all loop devices are shown.
+.sp
+Note that the old output format (i.e., \fBlosetup -a\fR) with comma-delimited
+strings is deprecated in favour of the \fB--list\fR output format.
+.sp
+It's possible to create more independent loop devices for the same backing
+file.
+.B This setup may be dangerous, can cause data loss, corruption and overwrites.
+Use \fB\-\-nooverlap\fR with \fB\-\-find\fR during setup to avoid this problem.
+
+.SH OPTIONS
+The \fIsize\fR and \fIoffset\fR
+arguments may be followed by the multiplicative suffixes KiB (=1024),
+MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB" is
+optional, e.g., "K" has the same meaning as "KiB") or the suffixes
+KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB and YB.
+
+.TP
+.BR \-a , " \-\-all"
+Show the status of all loop devices.  Note that not all information is accessible
+for non-root users.  See also \fB\-\-list\fR.  The old output format (as printed
+without \fB--list)\fR is deprecated.
+.TP
+.BR \-d , " \-\-detach " \fIloopdev\fR...
+Detach the file or device associated with the specified loop device(s). Note
+that since Linux v3.7 kernel uses "lazy device destruction".  The detach
+operation does not return EBUSY error anymore if device is actively used by
+system, but it is marked by autoclear flag and destroyed later.
+.TP
+.BR \-D , " \-\-detach\-all"
+Detach all associated loop devices.
+.TP
+.BR \-f , " \-\-find " "\fR[\fIfile\fR]"
+Find the first unused loop device.  If a \fIfile\fR argument is present, use
+the found device as loop device.  Otherwise, just print its name.
+.IP "\fB\-\-show\fP"
+Display the name of the assigned loop device if the \fB\-f\fP option and a
+\fIfile\fP argument are present.
+.TP
+.BR \-L , " \-\-nooverlap"
+Check for conflicts between loop devices to avoid situation when the same
+backing file is shared between more loop devices. If the file is already used
+by another device then re-use the device rather than a new one. The option
+makes sense only with \fB\-\-find\fP.
+.TP
+.BR \-j , " \-\-associated " \fIfile\fR " \fR[\fB\-o \fIoffset\fR]"
+Show the status of all loop devices associated with the given \fIfile\fR.
+.TP
+.BR \-o , " \-\-offset " \fIoffset
+The data start is moved \fIoffset\fP bytes into the specified file or device.  The \fIoffset\fP
+may be followed by the multiplicative suffixes; see above.
+.IP "\fB\-\-sizelimit \fIsize\fP"
+The data end is set to no more than \fIsize\fP bytes after the data start.  The \fIsize\fP
+may be followed by the multiplicative suffixes; see above.
+.TP
+.BR \-b , " \-\-sector-size " \fIsize
+Set the logical sector size of the loop device in bytes (since Linux 4.14). The
+option may be used when create a new loop device as well as stand-alone command
+to modify sector size of the already existing loop device.
+.TP
+.BR \-c , " \-\-set\-capacity " \fIloopdev
+Force the loop driver to reread the size of the file associated with the
+specified loop device.
+.TP
+.BR \-P , " \-\-partscan"
+Force the kernel to scan the partition table on a newly created loop device.
+.TP
+.BR \-r , " \-\-read\-only"
+Set up a read-only loop device.
+.TP
+.BR \-\-direct\-io [ =on | off ]
+Enable or disable direct I/O for the backing file.  The optional argument
+can be either \fBon\fR or \fBoff\fR.  If the argument is omitted, it defaults
+to \fBon\fR.
+.TP
+.BR \-v , " \-\-verbose"
+Verbose mode.
+.TP
+.BR \-l , " \-\-list"
+If a loop device or the \fB-a\fR option is specified, print the default columns
+for either the specified loop device or all loop devices; the default is to
+print info about all devices.  See also \fB\-\-output\fP, \fB\-\-noheadings\fP,
+\fB\-\-raw\fP, and \fB\-\-json\fP.
+.TP
+.BR \-O , " \-\-output " \fIcolumn\fR[,\fIcolumn\fR]...
+Specify the columns that are to be printed for the \fB\-\-list\fP output.
+Use \fB\-\-help\fR to get a list of all supported columns.
+.TP
+.B \-\-output\-all
+Output all available columns.
+.TP
+.BR \-n , " \-\-noheadings"
+Don't print headings for \fB\-\-list\fP output format.
+.IP "\fB\-\-raw\fP"
+Use the raw \fB\-\-list\fP output format.
+.TP
+.BR \-J , " \-\-json"
+Use JSON format for \fB\-\-list\fP output.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+
+.SH ENCRYPTION
+.B Cryptoloop is no longer supported in favor of dm-crypt.
+.B For more details see cryptsetup(8).
+
+.SH RETURN VALUE
+.B losetup
+returns 0 on success, nonzero on failure.  When
+.B losetup
+displays the status of a loop device, it returns 1 if the device
+is not configured and 2 if an error occurred which prevented
+determining the status of the device.
+
+.SH FILES
+.TP
+.I /dev/loop[0..N]
+loop block devices
+.TP
+.I /dev/loop-control
+loop control device
+
+.SH EXAMPLE
+The following commands can be used as an example of using the loop device.
+.nf
+.IP
+# dd if=/dev/zero of=~/file.img bs=1024k count=10
+# losetup --find --show ~/file.img
+/dev/loop0
+# mkfs -t ext2 /dev/loop0
+# mount /dev/loop0 /mnt
+ ...
+# umount /dev/loop0
+# losetup --detach /dev/loop0
+.fi
+.SH ENVIRONMENT
+.IP LOOPDEV_DEBUG=all
+enables debug output.
+.SH AUTHORS
+Karel Zak <kzak@redhat.com>, based on the original version from
+Theodore Ts'o <tytso@athena.mit.edu>
+.SH AVAILABILITY
+The losetup command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/losetup.c b/sys-utils/losetup.c
new file mode 100644
index 0000000..7d14f56
--- /dev/null
+++ b/sys-utils/losetup.c
@@ -0,0 +1,917 @@
+/*
+ * Copyright (C) 2011 Karel Zak <kzak@redhat.com>
+ * Originally from Ted's losetup.c
+ *
+ * losetup.c - setup and control loop devices
+ */
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <inttypes.h>
+#include <getopt.h>
+
+#include <libsmartcols.h>
+
+#include "c.h"
+#include "nls.h"
+#include "strutils.h"
+#include "loopdev.h"
+#include "closestream.h"
+#include "optutils.h"
+#include "xalloc.h"
+#include "canonicalize.h"
+#include "pathnames.h"
+
+enum {
+	A_CREATE = 1,		/* setup a new device */
+	A_DELETE,		/* delete given device(s) */
+	A_DELETE_ALL,		/* delete all devices */
+	A_SHOW,			/* list devices */
+	A_SHOW_ONE,		/* print info about one device */
+	A_FIND_FREE,		/* find first unused */
+	A_SET_CAPACITY,		/* set device capacity */
+	A_SET_DIRECT_IO,	/* set accessing backing file by direct io */
+	A_SET_BLOCKSIZE,	/* set logical block size of the loop device */
+};
+
+enum {
+	COL_NAME = 0,
+	COL_AUTOCLR,
+	COL_BACK_FILE,
+	COL_BACK_INO,
+	COL_BACK_MAJMIN,
+	COL_MAJMIN,
+	COL_OFFSET,
+	COL_PARTSCAN,
+	COL_RO,
+	COL_SIZELIMIT,
+	COL_DIO,
+	COL_LOGSEC,
+};
+
+/* basic output flags */
+static int no_headings;
+static int raw;
+static int json;
+
+struct colinfo {
+	const char *name;
+	double whint;
+	int flags;
+	const char *help;
+
+	int json_type;	/* default is string */
+};
+
+static struct colinfo infos[] = {
+	[COL_AUTOCLR]     = { "AUTOCLEAR",    1, SCOLS_FL_RIGHT, N_("autoclear flag set"), SCOLS_JSON_BOOLEAN},
+	[COL_BACK_FILE]   = { "BACK-FILE",  0.3, 0, N_("device backing file")},
+	[COL_BACK_INO]    = { "BACK-INO",     4, SCOLS_FL_RIGHT, N_("backing file inode number"), SCOLS_JSON_NUMBER},
+	[COL_BACK_MAJMIN] = { "BACK-MAJ:MIN", 6, 0, N_("backing file major:minor device number")},
+	[COL_NAME]        = { "NAME",      0.25, 0, N_("loop device name")},
+	[COL_OFFSET]      = { "OFFSET",       5, SCOLS_FL_RIGHT, N_("offset from the beginning"), SCOLS_JSON_NUMBER},
+	[COL_PARTSCAN]    = { "PARTSCAN",     1, SCOLS_FL_RIGHT, N_("partscan flag set"), SCOLS_JSON_BOOLEAN},
+	[COL_RO]          = { "RO",           1, SCOLS_FL_RIGHT, N_("read-only device"), SCOLS_JSON_BOOLEAN},
+	[COL_SIZELIMIT]   = { "SIZELIMIT",    5, SCOLS_FL_RIGHT, N_("size limit of the file in bytes"), SCOLS_JSON_NUMBER},
+	[COL_MAJMIN]      = { "MAJ:MIN",      3, 0, N_("loop device major:minor number")},
+	[COL_DIO]         = { "DIO",          1, SCOLS_FL_RIGHT, N_("access backing file with direct-io"), SCOLS_JSON_BOOLEAN},
+	[COL_LOGSEC]      = { "LOG-SEC",      4, SCOLS_FL_RIGHT, N_("logical sector size in bytes"), SCOLS_JSON_NUMBER},
+};
+
+static int columns[ARRAY_SIZE(infos) * 2] = {-1};
+static size_t ncolumns;
+
+static int get_column_id(int num)
+{
+	assert(num >= 0);
+	assert((size_t) num < ncolumns);
+	assert(columns[num] < (int) ARRAY_SIZE(infos));
+	return columns[num];
+}
+
+static struct colinfo *get_column_info(int num)
+{
+	return &infos[ get_column_id(num) ];
+}
+
+static int column_name_to_id(const char *name, size_t namesz)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(infos); i++) {
+		const char *cn = infos[i].name;
+
+		if (!strncasecmp(name, cn, namesz) && !*(cn + namesz))
+			return i;
+	}
+	warnx(_("unknown column: %s"), name);
+	return -1;
+}
+
+static int printf_loopdev(struct loopdev_cxt *lc)
+{
+	uint64_t x;
+	dev_t dev = 0;
+	ino_t ino = 0;
+	char *fname;
+	uint32_t type;
+
+	fname = loopcxt_get_backing_file(lc);
+	if (!fname)
+		return -EINVAL;
+
+	if (loopcxt_get_backing_devno(lc, &dev) == 0)
+		loopcxt_get_backing_inode(lc, &ino);
+
+	if (!dev && !ino) {
+		/*
+		 * Probably non-root user (no permissions to
+		 * call LOOP_GET_STATUS ioctls).
+		 */
+		printf("%s: []: (%s)",
+			loopcxt_get_device(lc), fname);
+
+		if (loopcxt_get_offset(lc, &x) == 0 && x)
+				printf(_(", offset %ju"), x);
+
+		if (loopcxt_get_sizelimit(lc, &x) == 0 && x)
+				printf(_(", sizelimit %ju"), x);
+		goto done;
+	}
+
+	printf("%s: [%04d]:%" PRIu64 " (%s)",
+		loopcxt_get_device(lc), (int) dev, ino, fname);
+
+	if (loopcxt_get_offset(lc, &x) == 0 && x)
+			printf(_(", offset %ju"), x);
+
+	if (loopcxt_get_sizelimit(lc, &x) == 0 && x)
+			printf(_(", sizelimit %ju"), x);
+
+	if (loopcxt_get_encrypt_type(lc, &type) == 0) {
+		const char *e = loopcxt_get_crypt_name(lc);
+
+		if ((!e || !*e) && type == 1)
+			e = "XOR";
+		if (e && *e)
+			printf(_(", encryption %s (type %u)"), e, type);
+	}
+
+done:
+	free(fname);
+	printf("\n");
+	return 0;
+}
+
+static int show_all_loops(struct loopdev_cxt *lc, const char *file,
+			  uint64_t offset, int flags)
+{
+	struct stat sbuf, *st = &sbuf;
+	char *cn_file = NULL;
+
+	if (loopcxt_init_iterator(lc, LOOPITER_FL_USED))
+		return -1;
+
+	if (!file || stat(file, st))
+		st = NULL;
+
+	while (loopcxt_next(lc) == 0) {
+		if (file) {
+			int used;
+			const char *bf = cn_file ? cn_file : file;
+
+			used = loopcxt_is_used(lc, st, bf, offset, 0, flags);
+			if (!used && !cn_file) {
+				bf = cn_file = canonicalize_path(file);
+				used = loopcxt_is_used(lc, st, bf, offset, 0, flags);
+			}
+			if (!used)
+				continue;
+		}
+		printf_loopdev(lc);
+	}
+	loopcxt_deinit_iterator(lc);
+	free(cn_file);
+	return 0;
+}
+
+static int delete_loop(struct loopdev_cxt *lc)
+{
+	if (loopcxt_delete_device(lc))
+		warn(_("%s: detach failed"), loopcxt_get_device(lc));
+	else
+		return 0;
+
+	return -1;
+}
+
+static int delete_all_loops(struct loopdev_cxt *lc)
+{
+	int res = 0;
+
+	if (loopcxt_init_iterator(lc, LOOPITER_FL_USED))
+		return -1;
+
+	while (loopcxt_next(lc) == 0)
+		res += delete_loop(lc);
+
+	loopcxt_deinit_iterator(lc);
+	return res;
+}
+
+static int set_scols_data(struct loopdev_cxt *lc, struct libscols_line *ln)
+{
+	size_t i;
+
+	for (i = 0; i < ncolumns; i++) {
+		const char *p = NULL;			/* external data */
+		char *np = NULL;			/* allocated here */
+		uint64_t x = 0;
+		int rc = 0;
+
+		switch(get_column_id(i)) {
+		case COL_NAME:
+			p = loopcxt_get_device(lc);
+			break;
+		case COL_BACK_FILE:
+			p = loopcxt_get_backing_file(lc);
+			break;
+		case COL_OFFSET:
+			if (loopcxt_get_offset(lc, &x) == 0)
+				xasprintf(&np, "%jd", x);
+			break;
+		case COL_SIZELIMIT:
+			if (loopcxt_get_sizelimit(lc, &x) == 0)
+				xasprintf(&np, "%jd", x);
+			break;
+		case COL_BACK_MAJMIN:
+		{
+			dev_t dev = 0;
+			if (loopcxt_get_backing_devno(lc, &dev) == 0 && dev)
+				xasprintf(&np, "%8u:%-3u", major(dev), minor(dev));
+			break;
+		}
+		case COL_MAJMIN:
+		{
+			struct stat st;
+
+			if (loopcxt_get_device(lc)
+			    && stat(loopcxt_get_device(lc), &st) == 0
+			    && S_ISBLK(st.st_mode)
+			    && major(st.st_rdev) == LOOPDEV_MAJOR)
+				xasprintf(&np, "%3u:%-3u", major(st.st_rdev),
+						           minor(st.st_rdev));
+			break;
+		}
+		case COL_BACK_INO:
+		{
+			ino_t ino = 0;
+			if (loopcxt_get_backing_inode(lc, &ino) == 0 && ino)
+				xasprintf(&np, "%ju", ino);
+			break;
+		}
+		case COL_AUTOCLR:
+			p = loopcxt_is_autoclear(lc) ? "1" : "0";
+			break;
+		case COL_RO:
+			p = loopcxt_is_readonly(lc) ? "1" : "0";
+			break;
+		case COL_DIO:
+			p = loopcxt_is_dio(lc) ? "1" : "0";
+			break;
+		case COL_PARTSCAN:
+			p = loopcxt_is_partscan(lc) ? "1" : "0";
+			break;
+		case COL_LOGSEC:
+			if (loopcxt_get_blocksize(lc, &x) == 0)
+				xasprintf(&np, "%jd", x);
+			break;
+		default:
+			return -EINVAL;
+		}
+
+
+		if (p)
+			rc = scols_line_set_data(ln, i, p);	/* calls strdup() */
+		else if (np)
+			rc = scols_line_refer_data(ln, i, np);	/* only refers */
+
+		if (rc)
+			err(EXIT_FAILURE, _("failed to add output data"));
+	}
+
+	return 0;
+}
+
+static int show_table(struct loopdev_cxt *lc,
+		      const char *file,
+		      uint64_t offset,
+		      int flags)
+{
+	struct stat sbuf, *st = &sbuf;
+	struct libscols_table *tb;
+	struct libscols_line *ln;
+	int rc = 0;
+	size_t i;
+
+	scols_init_debug(0);
+
+	if (!(tb = scols_new_table()))
+		err(EXIT_FAILURE, _("failed to allocate output table"));
+	scols_table_enable_raw(tb, raw);
+	scols_table_enable_json(tb, json);
+	scols_table_enable_noheadings(tb, no_headings);
+
+	if (json)
+		scols_table_set_name(tb, "loopdevices");
+
+	for (i = 0; i < ncolumns; i++) {
+		struct colinfo *ci = get_column_info(i);
+		struct libscols_column *cl;
+
+		cl = scols_table_new_column(tb, ci->name, ci->whint, ci->flags);
+		if (!cl)
+			err(EXIT_FAILURE, _("failed to allocate output column"));
+		if (json)
+			scols_column_set_json_type(cl, ci->json_type);
+	}
+
+	/* only one loopdev requested (already assigned to loopdev_cxt) */
+	if (loopcxt_get_device(lc)) {
+		ln = scols_table_new_line(tb, NULL);
+		if (!ln)
+			err(EXIT_FAILURE, _("failed to allocate output line"));
+		rc = set_scols_data(lc, ln);
+
+	/* list all loopdevs */
+	} else {
+		char *cn_file = NULL;
+
+		rc = loopcxt_init_iterator(lc, LOOPITER_FL_USED);
+		if (rc)
+			goto done;
+		if (!file || stat(file, st))
+			st = NULL;
+
+		while (loopcxt_next(lc) == 0) {
+			if (file) {
+				int used;
+				const char *bf = cn_file ? cn_file : file;
+
+				used = loopcxt_is_used(lc, st, bf, offset, 0, flags);
+				if (!used && !cn_file) {
+					bf = cn_file = canonicalize_path(file);
+					used = loopcxt_is_used(lc, st, bf, offset, 0, flags);
+				}
+				if (!used)
+					continue;
+			}
+
+			ln = scols_table_new_line(tb, NULL);
+			if (!ln)
+				err(EXIT_FAILURE, _("failed to allocate output line"));
+			rc = set_scols_data(lc, ln);
+			if (rc)
+				break;
+		}
+
+		loopcxt_deinit_iterator(lc);
+		free(cn_file);
+	}
+done:
+	if (rc == 0)
+		rc = scols_print_table(tb);
+	scols_unref_table(tb);
+	return rc;
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	size_t i;
+
+	fputs(USAGE_HEADER, out);
+
+	fprintf(out,
+	      _(" %1$s [options] [<loopdev>]\n"
+		" %1$s [options] -f | <loopdev> <file>\n"),
+		program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Set up and control loop devices.\n"), out);
+
+	/* commands */
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -a, --all                     list all used devices\n"), out);
+	fputs(_(" -d, --detach <loopdev>...     detach one or more devices\n"), out);
+	fputs(_(" -D, --detach-all              detach all used devices\n"), out);
+	fputs(_(" -f, --find                    find first unused device\n"), out);
+	fputs(_(" -c, --set-capacity <loopdev>  resize the device\n"), out);
+	fputs(_(" -j, --associated <file>       list all devices associated with <file>\n"), out);
+	fputs(_(" -L, --nooverlap               avoid possible conflict between devices\n"), out);
+
+	/* commands options */
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_(" -o, --offset <num>            start at offset <num> into file\n"), out);
+	fputs(_("     --sizelimit <num>         device is limited to <num> bytes of the file\n"), out);
+	fputs(_(" -b  --sector-size <num>       set the logical sector size to <num>\n"), out);
+	fputs(_(" -P, --partscan                create a partitioned loop device\n"), out);
+	fputs(_(" -r, --read-only               set up a read-only loop device\n"), out);
+	fputs(_("     --direct-io[=<on|off>]    open backing file with O_DIRECT\n"), out);
+	fputs(_("     --show                    print device name after setup (with -f)\n"), out);
+	fputs(_(" -v, --verbose                 verbose mode\n"), out);
+
+	/* output options */
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_(" -J, --json                    use JSON --list output format\n"), out);
+	fputs(_(" -l, --list                    list info about all or specified (default)\n"), out);
+	fputs(_(" -n, --noheadings              don't print headings for --list output\n"), out);
+	fputs(_(" -O, --output <cols>           specify columns to output for --list\n"), out);
+	fputs(_("     --output-all              output all columns\n"), out);
+	fputs(_("     --raw                     use raw --list output format\n"), out);
+
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(31));
+
+	fputs(USAGE_COLUMNS, out);
+	for (i = 0; i < ARRAY_SIZE(infos); i++)
+		fprintf(out, " %12s  %s\n", infos[i].name, _(infos[i].help));
+
+	printf(USAGE_MAN_TAIL("losetup(8)"));
+
+	exit(EXIT_SUCCESS);
+}
+
+static void warn_size(const char *filename, uint64_t size)
+{
+	struct stat st;
+
+	if (!size) {
+		if (stat(filename, &st) || S_ISBLK(st.st_mode))
+			return;
+		size = st.st_size;
+	}
+
+	if (size < 512)
+		warnx(_("%s: Warning: file is smaller than 512 bytes; the loop device "
+			"may be useless or invisible for system tools."),
+			filename);
+	else if (size % 512)
+		warnx(_("%s: Warning: file does not fit into a 512-byte sector; "
+		        "the end of the file will be ignored."),
+			filename);
+}
+
+static int create_loop(struct loopdev_cxt *lc,
+		       int nooverlap, int lo_flags, int flags,
+		       const char *file, uint64_t offset, uint64_t sizelimit)
+{
+	int hasdev = loopcxt_has_device(lc);
+	int rc = 0;
+
+	/* losetup --find --noverlap file.img */
+	if (!hasdev && nooverlap) {
+		rc = loopcxt_find_overlap(lc, file, offset, sizelimit);
+		switch (rc) {
+		case 0: /* not found */
+			break;
+
+		case 1:	/* overlap */
+			loopcxt_deinit(lc);
+			errx(EXIT_FAILURE, _("%s: overlapping loop device exists"), file);
+
+		case 2: /* overlap -- full size and offset match (reuse) */
+		{
+			uint32_t lc_encrypt_type;
+
+			/* Once a loop is initialized RO, there is no
+			 * way to change its parameters. */
+			if (loopcxt_is_readonly(lc)
+			    && !(lo_flags & LO_FLAGS_READ_ONLY)) {
+				loopcxt_deinit(lc);
+				errx(EXIT_FAILURE, _("%s: overlapping read-only loop device exists"), file);
+			}
+
+			/* This is no more supported, but check to be safe. */
+			if (loopcxt_get_encrypt_type(lc, &lc_encrypt_type) == 0
+			    && lc_encrypt_type != LO_CRYPT_NONE) {
+				loopcxt_deinit(lc);
+				errx(EXIT_FAILURE, _("%s: overlapping encrypted loop device exists"), file);
+			}
+
+			lc->info.lo_flags &= ~LO_FLAGS_AUTOCLEAR;
+			if (loopcxt_set_status(lc)) {
+				loopcxt_deinit(lc);
+				errx(EXIT_FAILURE, _("%s: failed to re-use loop device"), file);
+			}
+			return 0;	/* success, re-use */
+		}
+		default: /* error */
+			loopcxt_deinit(lc);
+			errx(EXIT_FAILURE, _("failed to inspect loop devices"));
+			return -errno;
+		}
+	}
+
+	if (hasdev && !is_loopdev(loopcxt_get_device(lc)))
+		loopcxt_add_device(lc);
+
+	/* losetup --noverlap /dev/loopN file.img */
+	if (hasdev && nooverlap) {
+		struct loopdev_cxt lc2;
+
+		if (loopcxt_init(&lc2, 0)) {
+			loopcxt_deinit(lc);
+			err(EXIT_FAILURE, _("failed to initialize loopcxt"));
+		}
+		rc = loopcxt_find_overlap(&lc2, file, offset, sizelimit);
+		loopcxt_deinit(&lc2);
+
+		if (rc) {
+			loopcxt_deinit(lc);
+			if (rc > 0)
+				errx(EXIT_FAILURE, _("%s: overlapping loop device exists"), file);
+			err(EXIT_FAILURE, _("%s: failed to check for conflicting loop devices"), file);
+		}
+	}
+
+	/* Create a new device */
+	do {
+		const char *errpre;
+
+		/* Note that loopcxt_{find_unused,set_device}() resets
+		 * loopcxt struct.
+		 */
+		if (!hasdev && (rc = loopcxt_find_unused(lc))) {
+			warnx(_("cannot find an unused loop device"));
+			break;
+		}
+		if (flags & LOOPDEV_FL_OFFSET)
+			loopcxt_set_offset(lc, offset);
+		if (flags & LOOPDEV_FL_SIZELIMIT)
+			loopcxt_set_sizelimit(lc, sizelimit);
+		if (lo_flags)
+			loopcxt_set_flags(lc, lo_flags);
+		if ((rc = loopcxt_set_backing_file(lc, file))) {
+			warn(_("%s: failed to use backing file"), file);
+			break;
+		}
+		errno = 0;
+		rc = loopcxt_setup_device(lc);
+		if (rc == 0)
+			break;			/* success */
+		if (errno == EBUSY && !hasdev)
+			continue;
+
+		/* errors */
+		errpre = hasdev && loopcxt_get_fd(lc) < 0 ?
+				 loopcxt_get_device(lc) : file;
+		warn(_("%s: failed to set up loop device"), errpre);
+		break;
+	} while (hasdev == 0);
+
+	return rc;
+}
+
+int main(int argc, char **argv)
+{
+	struct loopdev_cxt lc;
+	int act = 0, flags = 0, no_overlap = 0, c;
+	char *file = NULL;
+	uint64_t offset = 0, sizelimit = 0, blocksize = 0;
+	int res = 0, showdev = 0, lo_flags = 0;
+	char *outarg = NULL;
+	int list = 0;
+	unsigned long use_dio = 0, set_dio = 0, set_blocksize = 0;
+
+	enum {
+		OPT_SIZELIMIT = CHAR_MAX + 1,
+		OPT_SHOW,
+		OPT_RAW,
+		OPT_DIO,
+		OPT_OUTPUT_ALL
+	};
+	static const struct option longopts[] = {
+		{ "all",          no_argument,       NULL, 'a'           },
+		{ "set-capacity", required_argument, NULL, 'c'           },
+		{ "detach",       required_argument, NULL, 'd'           },
+		{ "detach-all",   no_argument,       NULL, 'D'           },
+		{ "find",         no_argument,       NULL, 'f'           },
+		{ "nooverlap",    no_argument,       NULL, 'L'           },
+		{ "help",         no_argument,       NULL, 'h'           },
+		{ "associated",   required_argument, NULL, 'j'           },
+		{ "json",         no_argument,       NULL, 'J'           },
+		{ "list",         no_argument,       NULL, 'l'           },
+		{ "sector-size",  required_argument, NULL, 'b'      },
+		{ "noheadings",   no_argument,       NULL, 'n'           },
+		{ "offset",       required_argument, NULL, 'o'           },
+		{ "output",       required_argument, NULL, 'O'           },
+		{ "output-all",   no_argument,       NULL, OPT_OUTPUT_ALL },
+		{ "sizelimit",    required_argument, NULL, OPT_SIZELIMIT },
+		{ "partscan",     no_argument,       NULL, 'P'           },
+		{ "read-only",    no_argument,       NULL, 'r'           },
+		{ "direct-io",    optional_argument, NULL, OPT_DIO       },
+		{ "raw",          no_argument,       NULL, OPT_RAW       },
+		{ "show",         no_argument,       NULL, OPT_SHOW      },
+		{ "verbose",      no_argument,       NULL, 'v'           },
+		{ "version",      no_argument,       NULL, 'V'           },
+		{ NULL, 0, NULL, 0 }
+	};
+
+	static const ul_excl_t excl[] = {	/* rows and cols in ASCII order */
+		{ 'D','a','c','d','f','j' },
+		{ 'D','c','d','f','l' },
+		{ 'D','c','d','f','O' },
+		{ 'J',OPT_RAW },
+		{ 0 }
+	};
+	int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	if (loopcxt_init(&lc, 0))
+		err(EXIT_FAILURE, _("failed to initialize loopcxt"));
+
+	while ((c = getopt_long(argc, argv, "ab:c:d:Dfhj:JlLno:O:PrvV",
+				longopts, NULL)) != -1) {
+
+		err_exclusive_options(c, longopts, excl, excl_st);
+
+		switch (c) {
+		case 'a':
+			act = A_SHOW;
+			break;
+		case 'b':
+			set_blocksize = 1;
+			blocksize = strtosize_or_err(optarg, _("failed to parse logical block size"));
+			break;
+		case 'c':
+			act = A_SET_CAPACITY;
+			if (!is_loopdev(optarg) ||
+			    loopcxt_set_device(&lc, optarg))
+				err(EXIT_FAILURE, _("%s: failed to use device"),
+						optarg);
+			break;
+		case 'r':
+			lo_flags |= LO_FLAGS_READ_ONLY;
+			break;
+		case 'd':
+			act = A_DELETE;
+			if (!is_loopdev(optarg) ||
+			    loopcxt_set_device(&lc, optarg))
+				err(EXIT_FAILURE, _("%s: failed to use device"),
+						optarg);
+			break;
+		case 'D':
+			act = A_DELETE_ALL;
+			break;
+		case 'f':
+			act = A_FIND_FREE;
+			break;
+		case 'h':
+			usage();
+			break;
+		case 'J':
+			json = 1;
+			break;
+		case 'j':
+			act = A_SHOW;
+			file = optarg;
+			break;
+		case 'l':
+			list = 1;
+			break;
+		case 'L':
+			no_overlap = 1;
+			break;
+		case 'n':
+			no_headings = 1;
+			break;
+		case OPT_RAW:
+			raw = 1;
+			break;
+		case 'o':
+			offset = strtosize_or_err(optarg, _("failed to parse offset"));
+			flags |= LOOPDEV_FL_OFFSET;
+			break;
+		case 'O':
+			outarg = optarg;
+			list = 1;
+			break;
+		case OPT_OUTPUT_ALL:
+			for (ncolumns = 0; ncolumns < ARRAY_SIZE(infos); ncolumns++)
+				columns[ncolumns] = ncolumns;
+			break;
+		case 'P':
+			lo_flags |= LO_FLAGS_PARTSCAN;
+			break;
+		case OPT_SHOW:
+			showdev = 1;
+			break;
+		case OPT_DIO:
+			use_dio = set_dio = 1;
+			if (optarg)
+				use_dio = parse_switch(optarg, _("argument error"), "on", "off", NULL);
+			break;
+		case 'v':
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		case OPT_SIZELIMIT:			/* --sizelimit */
+			sizelimit = strtosize_or_err(optarg, _("failed to parse size"));
+			flags |= LOOPDEV_FL_SIZELIMIT;
+                        break;
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	ul_path_init_debug();
+	ul_sysfs_init_debug();
+
+	/* default is --list --all */
+	if (argc == 1) {
+		act = A_SHOW;
+		list = 1;
+	}
+
+	if (!act && argc == 2 && (raw || json)) {
+		act = A_SHOW;
+		list = 1;
+	}
+
+	/* default --list output columns */
+	if (list && !ncolumns) {
+		columns[ncolumns++] = COL_NAME;
+		columns[ncolumns++] = COL_SIZELIMIT;
+		columns[ncolumns++] = COL_OFFSET;
+		columns[ncolumns++] = COL_AUTOCLR;
+		columns[ncolumns++] = COL_RO;
+		columns[ncolumns++] = COL_BACK_FILE;
+		columns[ncolumns++] = COL_DIO;
+		columns[ncolumns++] = COL_LOGSEC;
+	}
+
+	if (act == A_FIND_FREE && optind < argc) {
+		/*
+		 * losetup -f <backing_file>
+		 */
+		act = A_CREATE;
+		file = argv[optind++];
+
+		if (optind < argc)
+			errx(EXIT_FAILURE, _("unexpected arguments"));
+	}
+
+	if (list && !act && optind == argc)
+		/*
+		 * losetup --list	defaults to --all
+		 */
+		act = A_SHOW;
+
+	if (!act && optind + 1 == argc) {
+		/*
+		 * losetup [--list] <device>
+		 * OR
+		 * losetup {--direct-io[=off]|--logical-blocksize=size}... <device>
+		 */
+		if (!(set_dio || set_blocksize))
+			act = A_SHOW_ONE;
+		if (set_dio)
+			act = A_SET_DIRECT_IO;
+		if (set_blocksize)
+			act = A_SET_BLOCKSIZE;
+		if (!is_loopdev(argv[optind]) ||
+		    loopcxt_set_device(&lc, argv[optind]))
+			err(EXIT_FAILURE, _("%s: failed to use device"),
+					argv[optind]);
+		optind++;
+	}
+	if (!act) {
+		/*
+		 * losetup <loopdev> <backing_file>
+		 */
+		act = A_CREATE;
+
+		if (optind >= argc)
+			errx(EXIT_FAILURE, _("no loop device specified"));
+		/* don't use is_loopdev() here, the device does not have exist yet */
+		if (loopcxt_set_device(&lc, argv[optind]))
+			err(EXIT_FAILURE, _("%s: failed to use device"),
+					argv[optind]);
+		optind++;
+
+		if (optind >= argc)
+			errx(EXIT_FAILURE, _("no file specified"));
+		file = argv[optind++];
+	}
+
+	if (act != A_CREATE &&
+	    (sizelimit || lo_flags || showdev))
+		errx(EXIT_FAILURE,
+			_("the options %s are allowed during loop device setup only"),
+			"--{sizelimit,read-only,show}");
+
+	if ((flags & LOOPDEV_FL_OFFSET) &&
+	    act != A_CREATE && (act != A_SHOW || !file))
+		errx(EXIT_FAILURE, _("the option --offset is not allowed in this context"));
+
+	if (outarg && string_add_to_idarray(outarg, columns, ARRAY_SIZE(columns),
+					 &ncolumns, column_name_to_id) < 0)
+		return EXIT_FAILURE;
+
+	switch (act) {
+	case A_CREATE:
+		res = create_loop(&lc, no_overlap, lo_flags, flags, file, offset, sizelimit);
+		if (res == 0) {
+			if (showdev)
+				printf("%s\n", loopcxt_get_device(&lc));
+			warn_size(file, sizelimit);
+			if (set_dio || set_blocksize)
+				goto lo_set_post;
+		}
+		break;
+	case A_DELETE:
+		res = delete_loop(&lc);
+		while (optind < argc) {
+			if (!is_loopdev(argv[optind]) ||
+			    loopcxt_set_device(&lc, argv[optind]))
+				warn(_("%s: failed to use device"),
+						argv[optind]);
+			optind++;
+			res += delete_loop(&lc);
+		}
+		break;
+	case A_DELETE_ALL:
+		res = delete_all_loops(&lc);
+		break;
+	case A_FIND_FREE:
+		res = loopcxt_find_unused(&lc);
+		if (res) {
+			int errsv = errno;
+
+			if (access(_PATH_DEV_LOOPCTL, F_OK) == 0 &&
+			    access(_PATH_DEV_LOOPCTL, W_OK) != 0)
+				;
+			else
+				errno = errsv;
+
+			warn(_("cannot find an unused loop device"));
+		} else
+			printf("%s\n", loopcxt_get_device(&lc));
+		break;
+	case A_SHOW:
+		if (list)
+			res = show_table(&lc, file, offset, flags);
+		else
+			res = show_all_loops(&lc, file, offset, flags);
+		break;
+	case A_SHOW_ONE:
+		if (list)
+			res = show_table(&lc, NULL, 0, 0);
+		else
+			res = printf_loopdev(&lc);
+		if (res)
+			warn("%s", loopcxt_get_device(&lc));
+		break;
+	case A_SET_CAPACITY:
+		res = loopcxt_set_capacity(&lc);
+		if (res)
+			warn(_("%s: set capacity failed"),
+			        loopcxt_get_device(&lc));
+		break;
+	case A_SET_DIRECT_IO:
+	case A_SET_BLOCKSIZE:
+ lo_set_post:
+		if (set_dio) {
+			res = loopcxt_set_dio(&lc, use_dio);
+			if (res)
+				warn(_("%s: set direct io failed"),
+				        loopcxt_get_device(&lc));
+		}
+		if (set_blocksize) {
+			res = loopcxt_set_blocksize(&lc, blocksize);
+			if (res)
+				warn(_("%s: set logical block size failed"),
+				        loopcxt_get_device(&lc));
+		}
+		break;
+	default:
+		warnx(_("bad usage"));
+		errtryhelp(EXIT_FAILURE);
+		break;
+	}
+
+	loopcxt_deinit(&lc);
+	return res ? EXIT_FAILURE : EXIT_SUCCESS;
+}
+
diff --git a/sys-utils/lscpu-arm.c b/sys-utils/lscpu-arm.c
new file mode 100644
index 0000000..37b8f66
--- /dev/null
+++ b/sys-utils/lscpu-arm.c
@@ -0,0 +1,252 @@
+/*
+ * lscpu-arm.c - ARM CPU identification tables
+ *
+ * Copyright (C) 2018 Riku Voipio <riku.voipio@iki.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The information here is gathered from
+ *  - ARM manuals
+ *  - Linux kernel: arch/armX/include/asm/cputype.h
+ *  - GCC sources: config/arch/arch-cores.def
+ *  - Ancient wisdom
+ */
+#include "lscpu.h"
+
+struct id_part {
+    const int id;
+    const char* name;
+};
+
+static const struct id_part arm_part[] = {
+    { 0x810, "ARM810" },
+    { 0x920, "ARM920" },
+    { 0x922, "ARM922" },
+    { 0x926, "ARM926" },
+    { 0x940, "ARM940" },
+    { 0x946, "ARM946" },
+    { 0x966, "ARM966" },
+    { 0xa20, "ARM1020" },
+    { 0xa22, "ARM1022" },
+    { 0xa26, "ARM1026" },
+    { 0xb02, "ARM11 MPCore" },
+    { 0xb36, "ARM1136" },
+    { 0xb56, "ARM1156" },
+    { 0xb76, "ARM1176" },
+    { 0xc05, "Cortex-A5" },
+    { 0xc07, "Cortex-A7" },
+    { 0xc08, "Cortex-A8" },
+    { 0xc09, "Cortex-A9" },
+    { 0xc0d, "Cortex-A17" },	/* Originally A12 */
+    { 0xc0f, "Cortex-A15" },
+    { 0xc0e, "Cortex-A17" },
+    { 0xc14, "Cortex-R4" },
+    { 0xc15, "Cortex-R5" },
+    { 0xc17, "Cortex-R7" },
+    { 0xc18, "Cortex-R8" },
+    { 0xc20, "Cortex-M0" },
+    { 0xc21, "Cortex-M1" },
+    { 0xc23, "Cortex-M3" },
+    { 0xc24, "Cortex-M4" },
+    { 0xc27, "Cortex-M7" },
+    { 0xc60, "Cortex-M0+" },
+    { 0xd01, "Cortex-A32" },
+    { 0xd03, "Cortex-A53" },
+    { 0xd04, "Cortex-A35" },
+    { 0xd05, "Cortex-A55" },
+    { 0xd07, "Cortex-A57" },
+    { 0xd08, "Cortex-A72" },
+    { 0xd09, "Cortex-A73" },
+    { 0xd0a, "Cortex-A75" },
+    { 0xd13, "Cortex-R52" },
+    { 0xd20, "Cortex-M23" },
+    { 0xd21, "Cortex-M33" },
+    { -1, "unknown" },
+};
+
+static const struct id_part brcm_part[] = {
+    { 0x0f, "Brahma B15" },
+    { 0x100, "Brahma B53" },
+    { 0x516, "ThunderX2" },
+    { -1, "unknown" },
+};
+
+static const struct id_part dec_part[] = {
+    { 0xa10, "SA110" },
+    { 0xa11, "SA1100" },
+    { -1, "unknown" },
+};
+
+static const struct id_part cavium_part[] = {
+    { 0x0a0, "ThunderX" },
+    { 0x0a1, "ThunderX 88XX" },
+    { 0x0a2, "ThunderX 81XX" },
+    { 0x0a3, "ThunderX 83XX" },
+    { 0x0af, "ThunderX2 99xx" },
+    { -1, "unknown" },
+};
+
+static const struct id_part apm_part[] = {
+    { 0x000, "X-Gene" },
+    { -1, "unknown" },
+};
+
+static const struct id_part qcom_part[] = {
+    { 0x00f, "Scorpion" },
+    { 0x02d, "Scorpion" },
+    { 0x04d, "Krait" },
+    { 0x06f, "Krait" },
+    { 0x201, "Kryo" },
+    { 0x205, "Kryo" },
+    { 0x211, "Kryo" },
+    { 0x800, "Falkor V1/Kryo" },
+    { 0x801, "Kryo V2" },
+    { 0xc00, "Falkor" },
+    { 0xc01, "Saphira" },
+    { -1, "unknown" },
+};
+
+static const struct id_part samsung_part[] = {
+    { 0x001, "exynos-m1" },
+    { -1, "unknown" },
+};
+
+static const struct id_part nvidia_part[] = {
+    { 0x000, "Denver" },
+    { 0x003, "Denver 2" },
+    { -1, "unknown" },
+};
+
+static const struct id_part marvell_part[] = {
+    { 0x131, "Feroceon 88FR131" },
+    { 0x581, "PJ4/PJ4b" },
+    { 0x584, "PJ4B-MP" },
+    { -1, "unknown" },
+};
+
+static const struct id_part faraday_part[] = {
+    { 0x526, "FA526" },
+    { 0x626, "FA626" },
+    { -1, "unknown" },
+};
+
+static const struct id_part intel_part[] = {
+    { 0x200, "i80200" },
+    { 0x210, "PXA250A" },
+    { 0x212, "PXA210A" },
+    { 0x242, "i80321-400" },
+    { 0x243, "i80321-600" },
+    { 0x290, "PXA250B/PXA26x" },
+    { 0x292, "PXA210B" },
+    { 0x2c2, "i80321-400-B0" },
+    { 0x2c3, "i80321-600-B0" },
+    { 0x2d0, "PXA250C/PXA255/PXA26x" },
+    { 0x2d2, "PXA210C" },
+    { 0x411, "PXA27x" },
+    { 0x41c, "IPX425-533" },
+    { 0x41d, "IPX425-400" },
+    { 0x41f, "IPX425-266" },
+    { 0x682, "PXA32x" },
+    { 0x683, "PXA930/PXA935" },
+    { 0x688, "PXA30x" },
+    { 0x689, "PXA31x" },
+    { 0xb11, "SA1110" },
+    { 0xc12, "IPX1200" },
+    { -1, "unknown" },
+};
+
+static const struct id_part unknown_part[] = {
+    { -1, "unknown" },
+};
+
+struct hw_impl {
+   const int    id;
+   const struct id_part     *parts;
+   const char   *name;
+};
+
+static const struct hw_impl hw_implementer[] = {
+    { 0x41, arm_part,     "ARM" },
+    { 0x42, brcm_part,    "Broadcom" },
+    { 0x43, cavium_part,  "Cavium" },
+    { 0x44, dec_part,     "DEC" },
+    { 0x4e, nvidia_part,  "Nvidia" },
+    { 0x50, apm_part,     "APM" },
+    { 0x51, qcom_part,    "Qualcomm" },
+    { 0x53, samsung_part, "Samsung" },
+    { 0x56, marvell_part, "Marvell" },
+    { 0x66, faraday_part, "Faraday" },
+    { 0x69, intel_part,   "Intel" },
+    { -1,   unknown_part, "unknown" },
+};
+
+void arm_cpu_decode(struct lscpu_desc *desc)
+{
+	int j, impl, part;
+	const struct id_part *parts = NULL;
+	char *end;
+
+	if (desc->vendor == NULL || desc->model == NULL)
+		return;
+	if ((strncmp(desc->vendor,"0x",2) || strncmp(desc->model,"0x",2) ))
+		return;
+
+	errno = 0;
+	impl = (int) strtol(desc->vendor, &end, 0);
+	if (errno || desc->vendor == end)
+		return;
+
+	errno = 0;
+	part = (int) strtol(desc->model, &end, 0);
+	if (errno || desc->model == end)
+		return;
+
+	for (j = 0; hw_implementer[j].id != -1; j++) {
+		if (hw_implementer[j].id == impl) {
+			parts = hw_implementer[j].parts;
+			desc->vendor = (char *) hw_implementer[j].name;
+			break;
+		}
+	}
+
+	if (parts == NULL)
+		return;
+
+	for (j = 0; parts[j].id != -1; j++) {
+		if (parts[j].id == part) {
+			desc->modelname = (char *) parts[j].name;
+			break;
+		}
+	}
+
+	/* Print out the rXpY string for ARM cores */
+	if (impl == 0x41 && desc->revision && desc->stepping) {
+		int revision, variant;
+		char buf[8];
+
+		errno = 0;
+		revision = (int) strtol(desc->revision, &end, 10);
+		if (errno || desc->revision == end)
+			return;
+
+		errno = 0;
+		variant = (int) strtol(desc->stepping, &end, 0);
+		if (errno || desc->stepping == end)
+			return;
+
+		snprintf(buf, sizeof(buf), "r%dp%d", variant, revision);
+		desc->stepping = xstrdup(buf);
+	}
+}
diff --git a/sys-utils/lscpu-dmi.c b/sys-utils/lscpu-dmi.c
new file mode 100644
index 0000000..29bd2e4
--- /dev/null
+++ b/sys-utils/lscpu-dmi.c
@@ -0,0 +1,305 @@
+/*
+ * lscpu-dmi - Module to parse SMBIOS information
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Code originally taken from the dmidecode utility and slightly rewritten
+ * to suite the needs of lscpu
+ */
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "lscpu.h"
+
+#define _PATH_SYS_DMI	 "/sys/firmware/dmi/tables/DMI"
+
+#define WORD(x) (uint16_t)(*(const uint16_t *)(x))
+#define DWORD(x) (uint32_t)(*(const uint32_t *)(x))
+
+struct dmi_header
+{
+	uint8_t type;
+	uint8_t length;
+	uint16_t handle;
+	uint8_t *data;
+};
+
+static int checksum(const uint8_t *buf, size_t len)
+{
+	uint8_t sum = 0;
+	size_t a;
+
+	for (a = 0; a < len; a++)
+		sum += buf[a];
+	return (sum == 0);
+}
+
+static void *get_mem_chunk(size_t base, size_t len, const char *devmem)
+{
+	void *p = NULL;
+	int fd;
+
+	if ((fd = open(devmem, O_RDONLY)) < 0)
+		return NULL;
+
+	if (!(p = malloc(len)))
+		goto nothing;
+	if (lseek(fd, base, SEEK_SET) == -1)
+		goto nothing;
+	if (read_all(fd, p, len) == -1)
+		goto nothing;
+
+	close(fd);
+	return p;
+
+nothing:
+	free(p);
+	close(fd);
+	return NULL;
+}
+
+static void to_dmi_header(struct dmi_header *h, uint8_t *data)
+{
+	h->type = data[0];
+	h->length = data[1];
+	h->handle = WORD(data + 2);
+	h->data = data;
+}
+
+static char *dmi_string(const struct dmi_header *dm, uint8_t s)
+{
+	char *bp = (char *)dm->data;
+
+	if (s == 0)
+		return NULL;
+
+	bp += dm->length;
+	while (s > 1 && *bp)
+	{
+		bp += strlen(bp);
+		bp++;
+		s--;
+	}
+
+	if (!*bp)
+		return NULL;
+
+	return bp;
+}
+
+static int hypervisor_from_dmi_table(uint32_t base, uint16_t len,
+				uint16_t num, const char *devmem)
+{
+	uint8_t *buf;
+	uint8_t *data;
+	int i = 0;
+	char *vendor = NULL;
+	char *product = NULL;
+	char *manufacturer = NULL;
+	int rc = HYPER_NONE;
+
+	data = buf = get_mem_chunk(base, len, devmem);
+	if (!buf)
+		goto done;
+
+	 /* 4 is the length of an SMBIOS structure header */
+	while (i < num && data + 4 <= buf + len) {
+		uint8_t *next;
+		struct dmi_header h;
+
+		to_dmi_header(&h, data);
+
+		/*
+		 * If a short entry is found (less than 4 bytes), not only it
+		 * is invalid, but we cannot reliably locate the next entry.
+		 * Better stop at this point.
+		 */
+		if (h.length < 4)
+			goto done;
+
+		/* look for the next handle */
+		next = data + h.length;
+		while (next - buf + 1 < len && (next[0] != 0 || next[1] != 0))
+			next++;
+		next += 2;
+		switch (h.type) {
+			case 0:
+				vendor = dmi_string(&h, data[0x04]);
+				break;
+			case 1:
+				manufacturer = dmi_string(&h, data[0x04]);
+				product = dmi_string(&h, data[0x05]);
+				break;
+			default:
+				break;
+		}
+
+		data = next;
+		i++;
+	}
+	if (manufacturer && !strcmp(manufacturer, "innotek GmbH"))
+		rc = HYPER_INNOTEK;
+	else if (manufacturer && strstr(manufacturer, "HITACHI") &&
+					product && strstr(product, "LPAR"))
+		rc = HYPER_HITACHI;
+	else if (vendor && !strcmp(vendor, "Parallels"))
+		rc = HYPER_PARALLELS;
+done:
+	free(buf);
+	return rc;
+}
+
+#if defined(__x86_64__) || defined(__i386__)
+static int hypervisor_decode_legacy(uint8_t *buf, const char *devmem)
+{
+	if (!checksum(buf, 0x0F))
+		return -1;
+
+	return hypervisor_from_dmi_table(DWORD(buf + 0x08), WORD(buf + 0x06),
+			 WORD(buf + 0x0C),
+		devmem);
+}
+#endif
+
+static int hypervisor_decode_smbios(uint8_t *buf, const char *devmem)
+{
+	if (!checksum(buf, buf[0x05])
+	    || memcmp(buf + 0x10, "_DMI_", 5) != 0
+	    || !checksum(buf + 0x10, 0x0F))
+		return -1;
+
+	return hypervisor_from_dmi_table(DWORD(buf + 0x18), WORD(buf + 0x16),
+			 WORD(buf + 0x1C),
+		devmem);
+}
+
+static int hypervisor_decode_sysfw(void)
+{
+	static char const sys_fw_dmi_tables[] = _PATH_SYS_DMI;
+	struct stat st;
+
+	if (stat(sys_fw_dmi_tables, &st))
+		return -1;
+
+	return hypervisor_from_dmi_table(0, st.st_size, st.st_size / 4,
+					 sys_fw_dmi_tables);
+}
+
+/*
+ * Probe for EFI interface
+ */
+#define EFI_NOT_FOUND   (-1)
+#define EFI_NO_SMBIOS   (-2)
+static int address_from_efi(size_t *address)
+{
+	FILE *tab;
+	char linebuf[64];
+	int ret;
+
+	*address = 0; /* Prevent compiler warning */
+
+	/*
+	 * Linux up to 2.6.6: /proc/efi/systab
+	 * Linux 2.6.7 and up: /sys/firmware/efi/systab
+	 */
+	if (!(tab = fopen("/sys/firmware/efi/systab", "r")) &&
+	    !(tab = fopen("/proc/efi/systab", "r")))
+		return EFI_NOT_FOUND;		/* No EFI interface */
+
+	ret = EFI_NO_SMBIOS;
+	while ((fgets(linebuf, sizeof(linebuf) - 1, tab)) != NULL) {
+		char *addrp = strchr(linebuf, '=');
+		if (!addrp)
+			continue;
+		*(addrp++) = '\0';
+		if (strcmp(linebuf, "SMBIOS") == 0) {
+			*address = strtoul(addrp, NULL, 0);
+			ret = 0;
+			break;
+		}
+	}
+
+	fclose(tab);
+	return ret;
+}
+
+int read_hypervisor_dmi(void)
+{
+	int rc = HYPER_NONE;
+	uint8_t *buf = NULL;
+	size_t fp = 0;
+
+	if (sizeof(uint8_t) != 1
+	    || sizeof(uint16_t) != 2
+	    || sizeof(uint32_t) != 4
+	    || '\0' != 0)
+		goto done;
+
+	/* -1 : no DMI in /sys,
+	 *  0 : DMI exist, nothing detected (HYPER_NONE)
+	 * >0 : hypervisor detected
+	 */
+	rc = hypervisor_decode_sysfw();
+	if (rc >= HYPER_NONE)
+		goto done;
+
+	/* First try EFI (ia64, Intel-based Mac) */
+	switch (address_from_efi(&fp)) {
+		case EFI_NOT_FOUND:
+			goto memory_scan;
+		case EFI_NO_SMBIOS:
+			goto done;
+	}
+
+	buf = get_mem_chunk(fp, 0x20, _PATH_DEV_MEM);
+	if (!buf)
+		goto done;
+
+	rc = hypervisor_decode_smbios(buf, _PATH_DEV_MEM);
+	if (rc >= HYPER_NONE)
+		goto done;
+
+	free(buf);
+	buf = NULL;
+memory_scan:
+#if defined(__x86_64__) || defined(__i386__)
+	/* Fallback to memory scan (x86, x86_64) */
+	buf = get_mem_chunk(0xF0000, 0x10000, _PATH_DEV_MEM);
+	if (!buf)
+		goto done;
+
+	for (fp = 0; fp <= 0xFFF0; fp += 16) {
+		if (memcmp(buf + fp, "_SM_", 4) == 0 && fp <= 0xFFE0) {
+			rc = hypervisor_decode_smbios(buf + fp, _PATH_DEV_MEM);
+			if (rc < 0)
+				fp += 16;
+
+		} else if (memcmp(buf + fp, "_DMI_", 5) == 0)
+			rc = hypervisor_decode_legacy(buf + fp, _PATH_DEV_MEM);
+
+		if (rc >= HYPER_NONE)
+			break;
+	}
+#endif
+done:
+	free(buf);
+	return rc < 0 ? HYPER_NONE : rc;
+}
diff --git a/sys-utils/lscpu.1 b/sys-utils/lscpu.1
new file mode 100644
index 0000000..23dee9b
--- /dev/null
+++ b/sys-utils/lscpu.1
@@ -0,0 +1,184 @@
+.TH LSCPU 1 "November 2015" "util-linux" "User Commands"
+.SH NAME
+lscpu \- display information about the CPU architecture
+.SH SYNOPSIS
+.B lscpu
+.RB [ \-a | \-b | \-c | \-J "] [" \-x "] [" \-y "] [" \-s " \fIdirectory\fP] [" \-e [=\fIlist\fP]| \-p [=\fIlist\fP]]
+.br
+.B lscpu
+.BR \-h | \-V
+.SH DESCRIPTION
+.B lscpu
+gathers CPU architecture information from sysfs, /proc/cpuinfo and any
+applicable architecture-specific libraries (e.g.\& librtas on Powerpc).  The
+command output can be optimized for parsing or for easy readability by humans.
+The information includes, for example, the number of CPUs, threads, cores,
+sockets, and Non-Uniform Memory Access (NUMA) nodes.  There is also information
+about the CPU caches and cache sharing, family, model, bogoMIPS, byte order,
+and stepping.
+.sp
+In virtualized environments, the CPU architecture information displayed
+reflects the configuration of the guest operating system which is
+typically different from the physical (host) system.  On architectures that
+support retrieving physical topology information,
+.B lscpu
+also displays the number of physical sockets, chips, cores in the host system.
+.sp
+Options that result in an output table have a \fIlist\fP argument.  Use this
+argument to customize the command output.  Specify a comma-separated list of
+column labels to limit the output table to only the specified columns, arranged
+in the specified order.  See \fBCOLUMNS\fP for a list of valid column labels.  The
+column labels are not case sensitive.
+.sp
+Not all columns are supported on all architectures.  If an unsupported column is
+specified, \fBlscpu\fP prints the column but does not provide any data for it.
+
+.SS COLUMNS
+Note that topology elements (core, socket, etc.) use a sequential unique ID
+starting from zero, but CPU logical numbers follow the kernel where there is
+no guarantee of sequential numbering.
+.TP
+.B CPU
+The logical CPU number of a CPU as used by the Linux kernel.
+.TP
+.B CORE
+The logical core number.  A core can contain several CPUs.
+.TP
+.B SOCKET
+The logical socket number.  A socket can contain several cores.
+.TP
+.B BOOK
+The logical book number.  A book can contain several sockets.
+.TP
+.B DRAWER
+The logical drawer number.  A drawer can contain several books.
+.TP
+.B NODE
+The logical NUMA node number.  A node can contain several drawers.
+.TP
+.B CACHE
+Information about how caches are shared between CPUs.
+.TP
+.B ADDRESS
+The physical address of a CPU.
+.TP
+.B ONLINE
+Indicator that shows whether the Linux instance currently makes use of the CPU.
+.TP
+.B CONFIGURED
+Indicator that shows if the hypervisor has allocated the CPU to the virtual
+hardware on which the Linux instance runs.  CPUs that are configured can be set
+online by the Linux instance.
+This column contains data only if your hardware system and hypervisor support
+dynamic CPU resource allocation.
+.TP
+.B POLARIZATION
+This column contains data for Linux instances that run on virtual hardware with
+a hypervisor that can switch the CPU dispatching mode (polarization).  The
+polarization can be:
+.RS
+.TP 12
+.B horizontal\fP
+The workload is spread across all available CPUs.
+.TP 12
+.B vertical
+The workload is concentrated on few CPUs.
+.P
+For vertical polarization, the column also shows the degree of concentration,
+high, medium, or low.  This column contains data only if your hardware system
+and hypervisor support CPU polarization.
+.RE
+.TP
+.B MAXMHZ
+Maximum megahertz value for the CPU. Useful when \fBlscpu\fP is used as hardware
+inventory information gathering tool.  Notice that the megahertz value is
+dynamic, and driven by CPU governor depending on current resource need.
+.TP
+.B MINMHZ
+Minimum megahertz value for the CPU.
+.SH OPTIONS
+.TP
+.BR \-a , " \-\-all"
+Include lines for online and offline CPUs in the output (default for \fB-e\fR).
+This option may only be specified together with option \fB-e\fR or \fB-p\fR.
+.TP
+.BR \-b , " \-\-online"
+Limit the output to online CPUs (default for \fB-p\fR).
+This option may only be specified together with option \fB-e\fR or \fB-p\fR.
+.TP
+.BR \-c , " \-\-offline"
+Limit the output to offline CPUs.
+This option may only be specified together with option \fB-e\fR or \fB-p\fR.
+.TP
+.BR \-e , " \-\-extended" [=\fIlist\fP]
+Display the CPU information in human-readable format.
+
+If the \fIlist\fP argument is omitted, all columns for which data is available
+are included in the command output.
+
+When specifying the \fIlist\fP argument, the string of option, equal sign (=), and
+\fIlist\fP must not contain any blanks or other whitespace.
+Examples: '\fB-e=cpu,node\fP' or '\fB--extended=cpu,node\fP'.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.TP
+.BR \-J , " \-\-json"
+Use JSON output format for the default summary or extended output (see \fB\-\-extended\fP).
+.TP
+.BR \-p , " \-\-parse" [=\fIlist\fP]
+Optimize the command output for easy parsing.
+
+If the \fIlist\fP argument is omitted, the command output is compatible with earlier
+versions of \fBlscpu\fP.  In this compatible format, two commas are used to separate
+CPU cache columns.  If no CPU caches are identified the cache column is omitted.
+.br
+If the \fIlist\fP argument is used, cache columns are separated with a colon (:).
+
+When specifying the \fIlist\fP argument, the string of option, equal sign (=), and
+\fIlist\fP must not contain any blanks or other whitespace.
+Examples: '\fB-p=cpu,node\fP' or '\fB--parse=cpu,node\fP'.
+.TP
+.BR \-s , " \-\-sysroot " \fIdirectory\fP
+Gather CPU data for a Linux instance other than the instance from which the
+\fBlscpu\fP command is issued.  The specified \fIdirectory\fP is the system root
+of the Linux instance to be inspected.
+.TP
+.BR \-x , " \-\-hex"
+Use hexadecimal masks for CPU sets (for example 0x3).  The default is to print
+the sets in list format (for example 0,1).
+.TP
+.BR \-y , " \-\-physical"
+Display physical IDs for all columns with topology elements (core, socket, etc.).
+Other than logical IDs, which are assigned by \fBlscpu\fP, physical IDs are
+platform-specific values that are provided by the kernel. Physical IDs are not
+necessarily unique and they might not be arranged sequentially.
+If the kernel could not retrieve a physical ID for an element \fBlscpu\fP prints
+the dash (-) character.
+
+The CPU logical numbers are not affected by this option.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.B \-\-output\-all
+Output all available columns.  This option must be combined with either
+.BR \-\-extended " or " \-\-parse .
+.SH BUGS
+The basic overview of CPU family, model, etc. is always based on the first
+CPU only.
+
+Sometimes in Xen Dom0 the kernel reports wrong data.
+
+On virtual hardware the number of cores per socket, etc. can be wrong.
+.SH AUTHOR
+.nf
+Cai Qian <qcai@redhat.com>
+Karel Zak <kzak@redhat.com>
+Heiko Carstens <heiko.carstens@de.ibm.com>
+.fi
+.SH "SEE ALSO"
+.BR chcpu (8)
+.SH AVAILABILITY
+The lscpu command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/lscpu.c b/sys-utils/lscpu.c
new file mode 100644
index 0000000..1ff9069
--- /dev/null
+++ b/sys-utils/lscpu.c
@@ -0,0 +1,2134 @@
+/*
+ * lscpu - CPU architecture information helper
+ *
+ * Copyright (C) 2008 Cai Qian <qcai@redhat.com>
+ * Copyright (C) 2008 Karel Zak <kzak@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <assert.h>
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#if (defined(__x86_64__) || defined(__i386__))
+# if !defined( __SANITIZE_ADDRESS__)
+#  define INCLUDE_VMWARE_BDOOR
+# else
+#  warning VMWARE detection disabled by __SANITIZE_ADDRESS__
+# endif
+#endif
+
+#ifdef INCLUDE_VMWARE_BDOOR
+# include <stdint.h>
+# include <signal.h>
+# include <strings.h>
+# include <setjmp.h>
+# ifdef HAVE_SYS_IO_H
+#  include <sys/io.h>
+# endif
+#endif
+
+#if defined(HAVE_LIBRTAS)
+#include <librtas.h>
+#endif
+
+#include <libsmartcols.h>
+
+#include "closestream.h"
+#include "optutils.h"
+
+#include "lscpu.h"
+
+#define CACHE_MAX 100
+
+/* /sys paths */
+#define _PATH_SYS_SYSTEM	"/sys/devices/system"
+#define _PATH_SYS_HYP_FEATURES	"/sys/hypervisor/properties/features"
+#define _PATH_SYS_CPU		_PATH_SYS_SYSTEM "/cpu"
+#define _PATH_SYS_NODE		_PATH_SYS_SYSTEM "/node"
+
+/* Xen Domain feature flag used for /sys/hypervisor/properties/features */
+#define XENFEAT_supervisor_mode_kernel		3
+#define XENFEAT_mmu_pt_update_preserve_ad	5
+#define XENFEAT_hvm_callback_vector			8
+
+#define XEN_FEATURES_PV_MASK	(1U << XENFEAT_mmu_pt_update_preserve_ad)
+#define XEN_FEATURES_PVH_MASK	( (1U << XENFEAT_supervisor_mode_kernel) \
+								| (1U << XENFEAT_hvm_callback_vector) )
+
+static const char *virt_types[] = {
+	[VIRT_NONE]	= N_("none"),
+	[VIRT_PARA]	= N_("para"),
+	[VIRT_FULL]	= N_("full"),
+	[VIRT_CONT]	= N_("container"),
+};
+
+static const char *hv_vendors[] = {
+	[HYPER_NONE]	= NULL,
+	[HYPER_XEN]	= "Xen",
+	[HYPER_KVM]	= "KVM",
+	[HYPER_MSHV]	= "Microsoft",
+	[HYPER_VMWARE]  = "VMware",
+	[HYPER_IBM]	= "IBM",
+	[HYPER_VSERVER]	= "Linux-VServer",
+	[HYPER_UML]	= "User-mode Linux",
+	[HYPER_INNOTEK]	= "Innotek GmbH",
+	[HYPER_HITACHI]	= "Hitachi",
+	[HYPER_PARALLELS] = "Parallels",
+	[HYPER_VBOX]	= "Oracle",
+	[HYPER_OS400]	= "OS/400",
+	[HYPER_PHYP]	= "pHyp",
+	[HYPER_SPAR]	= "Unisys s-Par",
+	[HYPER_WSL]	= "Windows Subsystem for Linux"
+};
+
+static const int hv_vendor_pci[] = {
+	[HYPER_NONE]	= 0x0000,
+	[HYPER_XEN]	= 0x5853,
+	[HYPER_KVM]	= 0x0000,
+	[HYPER_MSHV]	= 0x1414,
+	[HYPER_VMWARE]	= 0x15ad,
+	[HYPER_VBOX]	= 0x80ee,
+};
+
+static const int hv_graphics_pci[] = {
+	[HYPER_NONE]	= 0x0000,
+	[HYPER_XEN]	= 0x0001,
+	[HYPER_KVM]	= 0x0000,
+	[HYPER_MSHV]	= 0x5353,
+	[HYPER_VMWARE]	= 0x0710,
+	[HYPER_VBOX]	= 0xbeef,
+};
+
+
+/* dispatching modes */
+static const char *disp_modes[] = {
+	[DISP_HORIZONTAL]	= N_("horizontal"),
+	[DISP_VERTICAL]		= N_("vertical")
+};
+
+static struct polarization_modes polar_modes[] = {
+	[POLAR_UNKNOWN]	   = {"U",  "-"},
+	[POLAR_VLOW]	   = {"VL", "vert-low"},
+	[POLAR_VMEDIUM]	   = {"VM", "vert-medium"},
+	[POLAR_VHIGH]	   = {"VH", "vert-high"},
+	[POLAR_HORIZONTAL] = {"H",  "horizontal"},
+};
+
+static int maxcpus;		/* size in bits of kernel cpu mask */
+
+#define is_cpu_online(_d, _cpu) \
+	((_d) && (_d)->online ? \
+		CPU_ISSET_S((_cpu), CPU_ALLOC_SIZE(maxcpus), (_d)->online) : 0)
+#define is_cpu_present(_d, _cpu) \
+	((_d) && (_d)->present ? \
+		CPU_ISSET_S((_cpu), CPU_ALLOC_SIZE(maxcpus), (_d)->present) : 0)
+
+#define real_cpu_num(_d, _i)	((_d)->idx2cpunum[(_i)])
+
+/*
+ * IDs
+ */
+enum {
+	COL_CPU,
+	COL_CORE,
+	COL_SOCKET,
+	COL_NODE,
+	COL_BOOK,
+	COL_DRAWER,
+	COL_CACHE,
+	COL_POLARIZATION,
+	COL_ADDRESS,
+	COL_CONFIGURED,
+	COL_ONLINE,
+	COL_MAXMHZ,
+	COL_MINMHZ,
+};
+
+/* column description
+ */
+struct lscpu_coldesc {
+	const char *name;
+	const char *help;
+
+	unsigned int  is_abbr:1;	/* name is abbreviation */
+};
+
+static struct lscpu_coldesc coldescs[] =
+{
+	[COL_CPU]          = { "CPU", N_("logical CPU number"), 1 },
+	[COL_CORE]         = { "CORE", N_("logical core number") },
+	[COL_SOCKET]       = { "SOCKET", N_("logical socket number") },
+	[COL_NODE]         = { "NODE", N_("logical NUMA node number") },
+	[COL_BOOK]         = { "BOOK", N_("logical book number") },
+	[COL_DRAWER]       = { "DRAWER", N_("logical drawer number") },
+	[COL_CACHE]        = { "CACHE", N_("shows how caches are shared between CPUs") },
+	[COL_POLARIZATION] = { "POLARIZATION", N_("CPU dispatching mode on virtual hardware") },
+	[COL_ADDRESS]      = { "ADDRESS", N_("physical address of a CPU") },
+	[COL_CONFIGURED]   = { "CONFIGURED", N_("shows if the hypervisor has allocated the CPU") },
+	[COL_ONLINE]       = { "ONLINE", N_("shows if Linux currently makes use of the CPU") },
+	[COL_MAXMHZ]	   = { "MAXMHZ", N_("shows the maximum MHz of the CPU") },
+	[COL_MINMHZ]	   = { "MINMHZ", N_("shows the minimum MHz of the CPU") }
+};
+
+static int
+column_name_to_id(const char *name, size_t namesz)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(coldescs); i++) {
+		const char *cn = coldescs[i].name;
+
+		if (!strncasecmp(name, cn, namesz) && !*(cn + namesz))
+			return i;
+	}
+	warnx(_("unknown column: %s"), name);
+	return -1;
+}
+
+/* Lookup a pattern and get the value from cpuinfo.
+ * Format is:
+ *
+ *	"<pattern>   : <key>"
+ */
+static int
+lookup(char *line, char *pattern, char **value)
+{
+	char *p, *v;
+	int len = strlen(pattern);
+
+	/* don't re-fill already found tags, first one wins */
+	if (!*line || *value)
+		return 0;
+
+	/* pattern */
+	if (strncmp(line, pattern, len))
+		return 0;
+
+	/* white spaces */
+	for (p = line + len; isspace(*p); p++);
+
+	/* separator */
+	if (*p != ':')
+		return 0;
+
+	/* white spaces */
+	for (++p; isspace(*p); p++);
+
+	/* value */
+	if (!*p)
+		return 0;
+	v = p;
+
+	/* end of value */
+	len = strlen(line) - 1;
+	for (p = line + len; isspace(*(p-1)); p--);
+	*p = '\0';
+
+	*value = xstrdup(v);
+	return 1;
+}
+
+/* Parse extra cache lines contained within /proc/cpuinfo but which are not
+ * part of the cache topology information within the sysfs filesystem.
+ * This is true for all shared caches on e.g. s390. When there are layers of
+ * hypervisors in between it is not knows which CPUs share which caches.
+ * Therefore information about shared caches is only available in
+ * /proc/cpuinfo.
+ * Format is:
+ * "cache<nr> : level=<lvl> type=<type> scope=<scope> size=<size> line_size=<lsz> associativity=<as>"
+ */
+static int
+lookup_cache(char *line, struct lscpu_desc *desc)
+{
+	struct cpu_cache *cache;
+	long long size;
+	char *p, type;
+	int level;
+
+	/* Make sure line starts with "cache<nr> :" */
+	if (strncmp(line, "cache", 5))
+		return 0;
+	for (p = line + 5; isdigit(*p); p++);
+	for (; isspace(*p); p++);
+	if (*p != ':')
+		return 0;
+
+	p = strstr(line, "scope=") + 6;
+	/* Skip private caches, also present in sysfs */
+	if (!p || strncmp(p, "Private", 7) == 0)
+		return 0;
+	p = strstr(line, "level=");
+	if (!p || sscanf(p, "level=%d", &level) != 1)
+		return 0;
+	p = strstr(line, "type=") + 5;
+	if (!p || !*p)
+		return 0;
+	type = 0;
+	if (strncmp(p, "Data", 4) == 0)
+		type = 'd';
+	if (strncmp(p, "Instruction", 11) == 0)
+		type = 'i';
+	p = strstr(line, "size=");
+	if (!p || sscanf(p, "size=%lld", &size) != 1)
+	       return 0;
+
+	desc->necaches++;
+	desc->ecaches = xrealloc(desc->ecaches,
+				 desc->necaches * sizeof(struct cpu_cache));
+	cache = &desc->ecaches[desc->necaches - 1];
+	memset(cache, 0 , sizeof(*cache));
+	if (type)
+		xasprintf(&cache->name, "L%d%c", level, type);
+	else
+		xasprintf(&cache->name, "L%d", level);
+	xasprintf(&cache->size, "%lldK", size);
+	return 1;
+}
+
+/* Don't init the mode for platforms where we are not able to
+ * detect that CPU supports 64-bit mode.
+ */
+static int
+init_mode(struct lscpu_modifier *mod)
+{
+	int m = 0;
+
+	if (mod->system == SYSTEM_SNAPSHOT)
+		/* reading info from any /{sys,proc} dump, don't mix it with
+		 * information about our real CPU */
+		return 0;
+
+#if defined(__alpha__) || defined(__ia64__)
+	m |= MODE_64BIT;	/* 64bit platforms only */
+#endif
+	/* platforms with 64bit flag in /proc/cpuinfo, define
+	 * 32bit default here */
+#if defined(__i386__) || defined(__x86_64__) || \
+    defined(__s390x__) || defined(__s390__) || defined(__sparc_v9__)
+	m |= MODE_32BIT;
+#endif
+	return m;
+}
+
+#if defined(HAVE_LIBRTAS)
+#define PROCESSOR_MODULE_INFO	43
+static int strbe16toh(const char *buf, int offset)
+{
+	return (buf[offset] << 8) + buf[offset+1];
+}
+
+static void read_physical_info_powerpc(struct lscpu_desc *desc)
+{
+	char buf[BUFSIZ];
+	int rc, len, ntypes;
+
+	desc->physsockets = desc->physchips = desc->physcoresperchip = 0;
+
+	rc = rtas_get_sysparm(PROCESSOR_MODULE_INFO, sizeof(buf), buf);
+	if (rc < 0)
+		return;
+
+	len = strbe16toh(buf, 0);
+	if (len < 8)
+		return;
+
+	ntypes = strbe16toh(buf, 2);
+
+	assert(ntypes <= 1);
+	if (!ntypes)
+		return;
+
+	desc->physsockets = strbe16toh(buf, 4);
+	desc->physchips = strbe16toh(buf, 6);
+	desc->physcoresperchip = strbe16toh(buf, 8);
+}
+#else
+static void read_physical_info_powerpc(
+		struct lscpu_desc *desc __attribute__((__unused__)))
+{
+}
+#endif
+
+
+static void
+read_basicinfo(struct lscpu_desc *desc, struct lscpu_modifier *mod)
+{
+	FILE *fp;
+	char buf[BUFSIZ];
+	struct utsname utsbuf;
+	size_t setsize;
+	cpu_set_t *cpuset = NULL;
+
+	/* architecture */
+	if (uname(&utsbuf) == -1)
+		err(EXIT_FAILURE, _("error: uname failed"));
+
+	fp = ul_path_fopen(desc->procfs, "r", "cpuinfo");
+	if (!fp)
+		err(EXIT_FAILURE, _("cannot open %s"), "/proc/cpuinfo");
+	desc->arch = xstrdup(utsbuf.machine);
+
+	/* details */
+	while (fgets(buf, sizeof(buf), fp) != NULL) {
+		if (lookup(buf, "vendor", &desc->vendor)) ;
+		else if (lookup(buf, "vendor_id", &desc->vendor)) ;
+		else if (lookup(buf, "CPU implementer", &desc->vendor)) ; /* ARM and aarch64 */
+		else if (lookup(buf, "family", &desc->family)) ;
+		else if (lookup(buf, "cpu family", &desc->family)) ;
+		else if (lookup(buf, "model", &desc->model)) ;
+		else if (lookup(buf, "CPU part", &desc->model)) ; /* ARM and aarch64 */
+		else if (lookup(buf, "model name", &desc->modelname)) ;
+		else if (lookup(buf, "stepping", &desc->stepping)) ;
+		else if (lookup(buf, "CPU variant", &desc->stepping)) ; /* aarch64 */
+		else if (lookup(buf, "cpu MHz", &desc->mhz)) ;
+		else if (lookup(buf, "cpu MHz dynamic", &desc->dynamic_mhz)) ; /* s390 */
+		else if (lookup(buf, "cpu MHz static", &desc->static_mhz)) ;   /* s390 */
+		else if (lookup(buf, "flags", &desc->flags)) ;		/* x86 */
+		else if (lookup(buf, "features", &desc->flags)) ;	/* s390 */
+		else if (lookup(buf, "Features", &desc->flags)) ;	/* aarch64 */
+		else if (lookup(buf, "type", &desc->flags)) ;		/* sparc64 */
+		else if (lookup(buf, "bogomips", &desc->bogomips)) ;
+		else if (lookup(buf, "BogoMIPS", &desc->bogomips)) ;	/* aarch64 */
+		else if (lookup(buf, "bogomips per cpu", &desc->bogomips)) ; /* s390 */
+		else if (lookup(buf, "cpu", &desc->cpu)) ;
+		else if (lookup(buf, "revision", &desc->revision)) ;
+		else if (lookup(buf, "CPU revision", &desc->revision)) ; /* aarch64 */
+		else if (lookup(buf, "max thread id", &desc->mtid)) ; /* s390 */
+		else if (lookup(buf, "address sizes", &desc->addrsz)) ; /* x86 */
+		else if (lookup_cache(buf, desc)) ;
+		else
+			continue;
+	}
+
+	desc->mode = init_mode(mod);
+
+	if (desc->flags) {
+		snprintf(buf, sizeof(buf), " %s ", desc->flags);
+		if (strstr(buf, " svm "))
+			desc->virtflag = xstrdup("svm");
+		else if (strstr(buf, " vmx "))
+			desc->virtflag = xstrdup("vmx");
+		if (strstr(buf, " lm "))
+			desc->mode |= MODE_32BIT | MODE_64BIT;		/* x86_64 */
+		if (strstr(buf, " zarch "))
+			desc->mode |= MODE_32BIT | MODE_64BIT;		/* s390x */
+		if (strstr(buf, " sun4v ") || strstr(buf, " sun4u "))
+			desc->mode |= MODE_32BIT | MODE_64BIT;		/* sparc64 */
+	}
+
+	if (desc->arch && mod->system != SYSTEM_SNAPSHOT) {
+		if (strcmp(desc->arch, "ppc64") == 0)
+			desc->mode |= MODE_32BIT | MODE_64BIT;
+		else if (strcmp(desc->arch, "ppc") == 0)
+			desc->mode |= MODE_32BIT;
+	}
+
+	fclose(fp);
+
+	if (ul_path_read_s32(desc->syscpu, &maxcpus, "kernel_max") == 0)
+		/* note that kernel_max is maximum index [NR_CPUS-1] */
+		maxcpus += 1;
+
+	else if (mod->system == SYSTEM_LIVE)
+		/* the root is '/' so we are working with data from the current kernel */
+		maxcpus = get_max_number_of_cpus();
+
+	if (maxcpus <= 0)
+		/* error or we are reading some /sys snapshot instead of the
+		 * real /sys, let's use any crazy number... */
+		maxcpus = 2048;
+
+	setsize = CPU_ALLOC_SIZE(maxcpus);
+
+	if (ul_path_readf_cpulist(desc->syscpu, &cpuset, maxcpus, "possible") == 0) {
+		int num, idx;
+
+		desc->ncpuspos = CPU_COUNT_S(setsize, cpuset);
+		desc->idx2cpunum = xcalloc(desc->ncpuspos, sizeof(int));
+
+		for (num = 0, idx = 0; num < maxcpus; num++) {
+			if (CPU_ISSET_S(num, setsize, cpuset))
+				desc->idx2cpunum[idx++] = num;
+		}
+		cpuset_free(cpuset);
+		cpuset = NULL;
+	} else
+		err(EXIT_FAILURE, _("failed to determine number of CPUs: %s"),
+				_PATH_SYS_CPU "/possible");
+
+
+	/* get mask for present CPUs */
+	if (ul_path_readf_cpulist(desc->syscpu, &desc->present, maxcpus, "present") == 0)
+		desc->ncpus = CPU_COUNT_S(setsize, desc->present);
+
+	/* get mask for online CPUs */
+	if (ul_path_readf_cpulist(desc->syscpu, &desc->online, maxcpus, "online") == 0)
+		desc->nthreads = CPU_COUNT_S(setsize, desc->online);
+
+	/* get dispatching mode */
+	if (ul_path_read_s32(desc->syscpu, &desc->dispatching, "dispatching") != 0)
+		desc->dispatching = -1;
+
+	if (mod->system == SYSTEM_LIVE)
+		read_physical_info_powerpc(desc);
+
+	if ((fp = ul_path_fopen(desc->procfs, "r", "sysinfo"))) {
+		while (fgets(buf, sizeof(buf), fp) != NULL && !desc->machinetype)
+			lookup(buf, "Type", &desc->machinetype);
+		fclose(fp);
+	}
+}
+
+static int
+has_pci_device(struct lscpu_desc *desc, unsigned int vendor, unsigned int device)
+{
+	FILE *f;
+	unsigned int num, fn, ven, dev;
+	int res = 1;
+
+	f = ul_path_fopen(desc->procfs, "r", "bus/pci/devices");
+	if (!f)
+		return 0;
+
+	 /* for more details about bus/pci/devices format see
+	  * drivers/pci/proc.c in linux kernel
+	  */
+	while(fscanf(f, "%02x%02x\t%04x%04x\t%*[^\n]",
+			&num, &fn, &ven, &dev) == 4) {
+
+		if (ven == vendor && dev == device)
+			goto found;
+	}
+
+	res = 0;
+found:
+	fclose(f);
+	return res;
+}
+
+#if defined(__x86_64__) || defined(__i386__)
+
+/*
+ * This CPUID leaf returns the information about the hypervisor.
+ * EAX : maximum input value for CPUID supported by the hypervisor.
+ * EBX, ECX, EDX : Hypervisor vendor ID signature. E.g. VMwareVMware.
+ */
+#define HYPERVISOR_INFO_LEAF   0x40000000
+
+static inline void
+cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx,
+			 unsigned int *ecx, unsigned int *edx)
+{
+	__asm__(
+#if defined(__PIC__) && defined(__i386__)
+		/* x86 PIC cannot clobber ebx -- gcc bitches */
+		"xchg %%ebx, %%esi;"
+		"cpuid;"
+		"xchg %%esi, %%ebx;"
+		: "=S" (*ebx),
+#else
+		"cpuid;"
+		: "=b" (*ebx),
+#endif
+		  "=a" (*eax),
+		  "=c" (*ecx),
+		  "=d" (*edx)
+		: "1" (op), "c"(0));
+}
+
+static void
+read_hypervisor_cpuid(struct lscpu_desc *desc)
+{
+	unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
+	char hyper_vendor_id[13];
+
+	memset(hyper_vendor_id, 0, sizeof(hyper_vendor_id));
+
+	cpuid(HYPERVISOR_INFO_LEAF, &eax, &ebx, &ecx, &edx);
+	memcpy(hyper_vendor_id + 0, &ebx, 4);
+	memcpy(hyper_vendor_id + 4, &ecx, 4);
+	memcpy(hyper_vendor_id + 8, &edx, 4);
+	hyper_vendor_id[12] = '\0';
+
+	if (!hyper_vendor_id[0])
+		return;
+
+	if (!strncmp("XenVMMXenVMM", hyper_vendor_id, 12))
+		desc->hyper = HYPER_XEN;
+	else if (!strncmp("KVMKVMKVM", hyper_vendor_id, 9))
+		desc->hyper = HYPER_KVM;
+	else if (!strncmp("Microsoft Hv", hyper_vendor_id, 12))
+		desc->hyper = HYPER_MSHV;
+	else if (!strncmp("VMwareVMware", hyper_vendor_id, 12))
+		desc->hyper = HYPER_VMWARE;
+	else if (!strncmp("UnisysSpar64", hyper_vendor_id, 12))
+		desc->hyper = HYPER_SPAR;
+}
+
+#else /* ! (__x86_64__ || __i386__) */
+static void
+read_hypervisor_cpuid(struct lscpu_desc *desc __attribute__((__unused__)))
+{
+}
+#endif
+
+static int is_devtree_compatible(struct lscpu_desc *desc, const char *str)
+{
+	FILE *fd = ul_path_fopen(desc->procfs, "r", "device-tree/compatible");
+
+	if (fd) {
+		char buf[256];
+		size_t i, len;
+
+		memset(buf, 0, sizeof(buf));
+		len = fread(buf, 1, sizeof(buf) - 1, fd);
+		fclose(fd);
+
+		for (i = 0; i < len;) {
+			if (!strcmp(&buf[i], str))
+				return 1;
+			i += strlen(&buf[i]);
+			i++;
+		}
+	}
+
+	return 0;
+}
+
+static int
+read_hypervisor_powerpc(struct lscpu_desc *desc)
+{
+	assert(!desc->hyper);
+
+	 /* IBM iSeries: legacy, para-virtualized on top of OS/400 */
+	if (ul_path_access(desc->procfs, F_OK, "iSeries") == 0) {
+		desc->hyper = HYPER_OS400;
+		desc->virtype = VIRT_PARA;
+
+	/* PowerNV (POWER Non-Virtualized, bare-metal) */
+	} else if (is_devtree_compatible(desc, "ibm,powernv")) {
+		desc->hyper = HYPER_NONE;
+		desc->virtype = VIRT_NONE;
+
+	/* PowerVM (IBM's proprietary hypervisor, aka pHyp) */
+	} else if (ul_path_access(desc->procfs, F_OK, "device-tree/ibm,partition-name") == 0
+		   && ul_path_access(desc->procfs, F_OK, "device-tree/hmc-managed?") == 0
+		   && ul_path_access(desc->procfs, F_OK, "device-tree/chosen/qemu,graphic-width") != 0) {
+
+		FILE *fd;
+		desc->hyper = HYPER_PHYP;
+		desc->virtype = VIRT_PARA;
+
+		fd = ul_path_fopen(desc->procfs, "r", "device-tree/ibm,partition-name");
+		if (fd) {
+			char buf[256];
+			if (fscanf(fd, "%255s", buf) == 1 && !strcmp(buf, "full"))
+				desc->virtype = VIRT_NONE;
+			fclose(fd);
+		}
+
+	/* Qemu */
+	} else if (is_devtree_compatible(desc, "qemu,pseries")) {
+		desc->hyper = HYPER_KVM;
+		desc->virtype = VIRT_PARA;
+	}
+	return desc->hyper;
+}
+
+#ifdef INCLUDE_VMWARE_BDOOR
+
+#define VMWARE_BDOOR_MAGIC          0x564D5868
+#define VMWARE_BDOOR_PORT           0x5658
+#define VMWARE_BDOOR_CMD_GETVERSION 10
+
+static UL_ASAN_BLACKLIST
+void vmware_bdoor(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
+{
+	__asm__(
+#if defined(__PIC__) && defined(__i386__)
+		/* x86 PIC cannot clobber ebx -- gcc bitches */
+		"xchg %%ebx, %%esi;"
+		"inl (%%dx), %%eax;"
+		"xchg %%esi, %%ebx;"
+		: "=S" (*ebx),
+#else
+		"inl (%%dx), %%eax;"
+		: "=b" (*ebx),
+#endif
+		  "=a" (*eax),
+		  "=c" (*ecx),
+		  "=d" (*edx)
+		: "0" (VMWARE_BDOOR_MAGIC),
+		  "1" (VMWARE_BDOOR_CMD_GETVERSION),
+		  "2" (VMWARE_BDOOR_PORT),
+		  "3" (0)
+		: "memory");
+}
+
+static jmp_buf segv_handler_env;
+
+static void
+segv_handler(__attribute__((__unused__)) int sig,
+             __attribute__((__unused__)) siginfo_t *info,
+             __attribute__((__unused__)) void *ignored)
+{
+	siglongjmp(segv_handler_env, 1);
+}
+
+static int
+is_vmware_platform(void)
+{
+	uint32_t eax, ebx, ecx, edx;
+	struct sigaction act, oact;
+
+	/*
+	 * FIXME: Not reliable for non-root users. Note it works as expected if
+	 * vmware_bdoor() is not optimized for PIE, but then it fails to build
+	 * on 32bit x86 systems. See lscpu git log for more details (commit
+	 * 7845b91dbc7690064a2be6df690e4aaba728fb04).     kzak [3-Nov-2016]
+	 */
+	if (getuid() != 0)
+		return 0;
+
+	/*
+	 * The assembly routine for vmware detection works
+	 * fine under vmware, even if ran as regular user. But
+	 * on real HW or under other hypervisors, it segfaults (which is
+	 * expected). So we temporarily install SIGSEGV handler to catch
+	 * the signal. All this magic is needed because lscpu
+	 * isn't supposed to require root privileges.
+	 */
+	if (sigsetjmp(segv_handler_env, 1))
+		return 0;
+
+	memset(&act, 0, sizeof(act));
+	act.sa_sigaction = segv_handler;
+	act.sa_flags = SA_SIGINFO;
+
+	if (sigaction(SIGSEGV, &act, &oact))
+		err(EXIT_FAILURE, _("cannot set signal handler"));
+
+	vmware_bdoor(&eax, &ebx, &ecx, &edx);
+
+	if (sigaction(SIGSEGV, &oact, NULL))
+		err(EXIT_FAILURE, _("cannot restore signal handler"));
+
+	return eax != (uint32_t)-1 && ebx == VMWARE_BDOOR_MAGIC;
+}
+
+#else /* ! INCLUDE_VMWARE_BDOOR */
+
+static int
+is_vmware_platform(void)
+{
+	return 0;
+}
+
+#endif /* INCLUDE_VMWARE_BDOOR */
+
+static void
+read_hypervisor(struct lscpu_desc *desc, struct lscpu_modifier *mod)
+{
+	FILE *fd;
+
+	/* We have to detect WSL first. is_vmware_platform() crashes on Windows 10. */
+
+	if ((fd = ul_path_fopen(desc->procfs, "r", "sys/kernel/osrelease"))) {
+		char buf[256];
+
+		if (fgets(buf, sizeof(buf), fd) != NULL) {
+			if (strstr(buf, "Microsoft")) {
+				desc->hyper = HYPER_WSL;
+				desc->virtype = VIRT_CONT;
+			}
+		}
+		fclose(fd);
+		if (desc->virtype)
+			return;
+	}
+
+	if (mod->system != SYSTEM_SNAPSHOT) {
+		read_hypervisor_cpuid(desc);
+		if (!desc->hyper)
+			desc->hyper = read_hypervisor_dmi();
+		if (!desc->hyper && is_vmware_platform())
+			desc->hyper = HYPER_VMWARE;
+	}
+
+	if (desc->hyper) {
+		desc->virtype = VIRT_FULL;
+
+		if (desc->hyper == HYPER_XEN) {
+			uint32_t features;
+
+			fd = ul_prefix_fopen(desc->prefix, "r", _PATH_SYS_HYP_FEATURES);
+
+			if (fd && fscanf(fd, "%x", &features) == 1) {
+				/* Xen PV domain */
+				if (features & XEN_FEATURES_PV_MASK)
+					desc->virtype = VIRT_PARA;
+				/* Xen PVH domain */
+				else if ((features & XEN_FEATURES_PVH_MASK)
+								== XEN_FEATURES_PVH_MASK)
+					desc->virtype = VIRT_PARA;
+			}
+			if (fd)
+				fclose(fd);
+		}
+	} else if (read_hypervisor_powerpc(desc) > 0) {}
+
+	/* Xen para-virt or dom0 */
+	else if (ul_path_access(desc->procfs, F_OK, "xen") == 0) {
+		int dom0 = 0;
+
+		fd = ul_path_fopen(desc->procfs, "r", "xen/capabilities");
+		if (fd) {
+			char buf[256];
+
+			if (fscanf(fd, "%255s", buf) == 1 &&
+			    !strcmp(buf, "control_d"))
+				dom0 = 1;
+			fclose(fd);
+		}
+		desc->virtype = dom0 ? VIRT_NONE : VIRT_PARA;
+		desc->hyper = HYPER_XEN;
+
+	/* Xen full-virt on non-x86_64 */
+	} else if (has_pci_device(desc, hv_vendor_pci[HYPER_XEN], hv_graphics_pci[HYPER_XEN])) {
+		desc->hyper = HYPER_XEN;
+		desc->virtype = VIRT_FULL;
+	} else if (has_pci_device(desc, hv_vendor_pci[HYPER_VMWARE], hv_graphics_pci[HYPER_VMWARE])) {
+		desc->hyper = HYPER_VMWARE;
+		desc->virtype = VIRT_FULL;
+	} else if (has_pci_device(desc, hv_vendor_pci[HYPER_VBOX], hv_graphics_pci[HYPER_VBOX])) {
+		desc->hyper = HYPER_VBOX;
+		desc->virtype = VIRT_FULL;
+
+	/* IBM PR/SM */
+	} else if ((fd = ul_path_fopen(desc->procfs, "r", "sysinfo"))) {
+		char buf[BUFSIZ];
+
+		desc->hyper = HYPER_IBM;
+		desc->hypervisor = "PR/SM";
+		desc->virtype = VIRT_FULL;
+		while (fgets(buf, sizeof(buf), fd) != NULL) {
+			char *str, *p;
+
+			if (!strstr(buf, "Control Program:"))
+				continue;
+			if (!strstr(buf, "KVM"))
+				desc->hyper = HYPER_IBM;
+			else
+				desc->hyper = HYPER_KVM;
+			p = strchr(buf, ':');
+			if (!p)
+				continue;
+			xasprintf(&str, "%s", p + 1);
+
+			/* remove leading, trailing and repeating whitespace */
+			while (*str == ' ')
+				str++;
+			desc->hypervisor = str;
+			str += strlen(str) - 1;
+			while ((*str == '\n') || (*str == ' '))
+				*(str--) = '\0';
+			while ((str = strstr(desc->hypervisor, "  ")))
+				memmove(str, str + 1, strlen(str));
+			break;
+		}
+		fclose(fd);
+	}
+
+	/* OpenVZ/Virtuozzo - /proc/vz dir should exist
+	 *		      /proc/bc should not */
+	else if (ul_path_access(desc->procfs, F_OK, "vz") == 0 &&
+		 ul_path_access(desc->procfs, F_OK, "bc") != 0) {
+		desc->hyper = HYPER_PARALLELS;
+		desc->virtype = VIRT_CONT;
+
+	/* IBM */
+	} else if (desc->vendor &&
+		 (strcmp(desc->vendor, "PowerVM Lx86") == 0 ||
+		  strcmp(desc->vendor, "IBM/S390") == 0)) {
+		desc->hyper = HYPER_IBM;
+		desc->virtype = VIRT_FULL;
+
+	/* User-mode-linux */
+	} else if (desc->modelname && strstr(desc->modelname, "UML")) {
+		desc->hyper = HYPER_UML;
+		desc->virtype = VIRT_PARA;
+
+	/* Linux-VServer */
+	} else if ((fd = ul_path_fopen(desc->procfs, "r", "self/status"))) {
+		char buf[BUFSIZ];
+		char *val = NULL;
+
+		while (fgets(buf, sizeof(buf), fd) != NULL) {
+			if (lookup(buf, "VxID", &val))
+				break;
+		}
+		fclose(fd);
+
+		if (val) {
+			char *org = val;
+
+			while (isdigit(*val))
+				++val;
+			if (!*val) {
+				desc->hyper = HYPER_VSERVER;
+				desc->virtype = VIRT_CONT;
+			}
+			free(org);
+		}
+	}
+}
+
+/* add @set to the @ary, unnecessary set is deallocated. */
+static int add_cpuset_to_array(cpu_set_t **ary, int *items, cpu_set_t *set)
+{
+	int i;
+	size_t setsize = CPU_ALLOC_SIZE(maxcpus);
+
+	if (!ary)
+		return -1;
+
+	for (i = 0; i < *items; i++) {
+		if (CPU_EQUAL_S(setsize, set, ary[i]))
+			break;
+	}
+	if (i == *items) {
+		ary[*items] = set;
+		++*items;
+		return 0;
+	}
+	CPU_FREE(set);
+	return 1;
+}
+
+static void
+read_topology(struct lscpu_desc *desc, int idx)
+{
+	cpu_set_t *thread_siblings, *core_siblings;
+	cpu_set_t *book_siblings, *drawer_siblings;
+	int coreid, socketid, bookid, drawerid;
+	int i, num = real_cpu_num(desc, idx);
+
+	if (ul_path_accessf(desc->syscpu, F_OK, "cpu%d/topology/thread_siblings", num) != 0)
+		return;
+
+	ul_path_readf_cpuset(desc->syscpu, &thread_siblings, maxcpus,
+					"cpu%d/topology/thread_siblings", num);
+	ul_path_readf_cpuset(desc->syscpu, &core_siblings, maxcpus,
+					"cpu%d/topology/core_siblings", num);
+	ul_path_readf_cpuset(desc->syscpu, &book_siblings, maxcpus,
+					"cpu%d/topology/book_siblings", num);
+	ul_path_readf_cpuset(desc->syscpu, &drawer_siblings, maxcpus,
+					"cpu%d/topology/drawer_siblings", num);
+
+	if (ul_path_readf_s32(desc->syscpu, &coreid, "cpu%d/topology/core_id", num) != 0)
+		coreid = -1;
+
+	if (ul_path_readf_s32(desc->syscpu, &socketid, "cpu%d/topology/physical_package_id", num) != 0)
+		socketid = -1;
+
+	if (ul_path_readf_s32(desc->syscpu, &bookid, "cpu%d/topology/book_id", num) != 0)
+		bookid = -1;
+
+	if (ul_path_readf_s32(desc->syscpu, &drawerid, "cpu%d/topology/drawer_id", num) != 0)
+		drawerid = -1;
+
+	if (!desc->coremaps) {
+		int ndrawers, nbooks, nsockets, ncores, nthreads;
+		size_t setsize = CPU_ALLOC_SIZE(maxcpus);
+
+		/* threads within one core */
+		nthreads = CPU_COUNT_S(setsize, thread_siblings);
+		if (!nthreads)
+			nthreads = 1;
+
+		/* cores within one socket */
+		ncores = CPU_COUNT_S(setsize, core_siblings) / nthreads;
+		if (!ncores)
+			ncores = 1;
+
+		/* number of sockets within one book.  Because of odd /
+		 * non-present cpu maps and to keep calculation easy we make
+		 * sure that nsockets and nbooks is at least 1.
+		 */
+		nsockets = desc->ncpus / nthreads / ncores;
+		if (!nsockets)
+			nsockets = 1;
+
+		/* number of books */
+		nbooks = desc->ncpus / nthreads / ncores / nsockets;
+		if (!nbooks)
+			nbooks = 1;
+
+		/* number of drawers */
+		ndrawers = desc->ncpus / nbooks / nthreads / ncores / nsockets;
+		if (!ndrawers)
+			ndrawers = 1;
+
+		/* all threads, see also read_basicinfo()
+		 * -- fallback for kernels without
+		 *    /sys/devices/system/cpu/online.
+		 */
+		if (!desc->nthreads)
+			desc->nthreads = ndrawers * nbooks * nsockets * ncores * nthreads;
+
+		/* For each map we make sure that it can have up to ncpuspos
+		 * entries. This is because we cannot reliably calculate the
+		 * number of cores, sockets and books on all architectures.
+		 * E.g. completely virtualized architectures like s390 may
+		 * have multiple sockets of different sizes.
+		 */
+		desc->coremaps = xcalloc(desc->ncpuspos, sizeof(cpu_set_t *));
+		desc->socketmaps = xcalloc(desc->ncpuspos, sizeof(cpu_set_t *));
+		desc->coreids = xcalloc(desc->ncpuspos, sizeof(*desc->drawerids));
+		desc->socketids = xcalloc(desc->ncpuspos, sizeof(*desc->drawerids));
+		for (i = 0; i < desc->ncpuspos; i++)
+			desc->coreids[i] = desc->socketids[i] = -1;
+		if (book_siblings) {
+			desc->bookmaps = xcalloc(desc->ncpuspos, sizeof(cpu_set_t *));
+			desc->bookids = xcalloc(desc->ncpuspos, sizeof(*desc->drawerids));
+			for (i = 0; i < desc->ncpuspos; i++)
+				desc->bookids[i] = -1;
+		}
+		if (drawer_siblings) {
+			desc->drawermaps = xcalloc(desc->ncpuspos, sizeof(cpu_set_t *));
+			desc->drawerids = xcalloc(desc->ncpuspos, sizeof(*desc->drawerids));
+			for (i = 0; i < desc->ncpuspos; i++)
+				desc->drawerids[i] = -1;
+		}
+	}
+
+	add_cpuset_to_array(desc->socketmaps, &desc->nsockets, core_siblings);
+	desc->coreids[idx] = coreid;
+	add_cpuset_to_array(desc->coremaps, &desc->ncores, thread_siblings);
+	desc->socketids[idx] = socketid;
+	if (book_siblings) {
+		add_cpuset_to_array(desc->bookmaps, &desc->nbooks, book_siblings);
+		desc->bookids[idx] = bookid;
+	}
+	if (drawer_siblings) {
+		add_cpuset_to_array(desc->drawermaps, &desc->ndrawers, drawer_siblings);
+		desc->drawerids[idx] = drawerid;
+	}
+}
+
+static void
+read_polarization(struct lscpu_desc *desc, int idx)
+{
+	char mode[64];
+	int num = real_cpu_num(desc, idx);
+
+	if (desc->dispatching < 0)
+		return;
+	if (ul_path_accessf(desc->syscpu, F_OK, "cpu%d/polarization", num) != 0)
+		return;
+	if (!desc->polarization)
+		desc->polarization = xcalloc(desc->ncpuspos, sizeof(int));
+
+	ul_path_readf_buffer(desc->syscpu, mode, sizeof(mode), "cpu%d/polarization", num);
+
+	if (strncmp(mode, "vertical:low", sizeof(mode)) == 0)
+		desc->polarization[idx] = POLAR_VLOW;
+	else if (strncmp(mode, "vertical:medium", sizeof(mode)) == 0)
+		desc->polarization[idx] = POLAR_VMEDIUM;
+	else if (strncmp(mode, "vertical:high", sizeof(mode)) == 0)
+		desc->polarization[idx] = POLAR_VHIGH;
+	else if (strncmp(mode, "horizontal", sizeof(mode)) == 0)
+		desc->polarization[idx] = POLAR_HORIZONTAL;
+	else
+		desc->polarization[idx] = POLAR_UNKNOWN;
+}
+
+static void
+read_address(struct lscpu_desc *desc, int idx)
+{
+	int num = real_cpu_num(desc, idx);
+
+	if (ul_path_accessf(desc->syscpu, F_OK, "cpu%d/address", num) != 0)
+		return;
+	if (!desc->addresses)
+		desc->addresses = xcalloc(desc->ncpuspos, sizeof(int));
+	ul_path_readf_s32(desc->syscpu, &desc->addresses[idx], "cpu%d/address", num);
+}
+
+static void
+read_configured(struct lscpu_desc *desc, int idx)
+{
+	int num = real_cpu_num(desc, idx);
+
+	if (ul_path_accessf(desc->syscpu, F_OK, "cpu%d/configure", num) != 0)
+		return;
+	if (!desc->configured)
+		desc->configured = xcalloc(desc->ncpuspos, sizeof(int));
+	ul_path_readf_s32(desc->syscpu, &desc->configured[idx], "cpu%d/configure", num);
+}
+
+/* Read overall maximum frequency of cpu */
+static char *
+cpu_max_mhz(struct lscpu_desc *desc, char *buf, size_t bufsz)
+{
+	int i;
+	float cpu_freq = 0.0;
+	size_t setsize = CPU_ALLOC_SIZE(maxcpus);
+
+	if (desc->present) {
+		for (i = 0; i < desc->ncpuspos; i++) {
+			if (CPU_ISSET_S(real_cpu_num(desc, i), setsize, desc->present)
+			    && desc->maxmhz[i]) {
+				float freq = atof(desc->maxmhz[i]);
+
+				if (freq > cpu_freq)
+					cpu_freq = freq;
+			}
+		}
+	}
+	snprintf(buf, bufsz, "%.4f", cpu_freq);
+	return buf;
+}
+
+/* Read overall minimum frequency of cpu */
+static char *
+cpu_min_mhz(struct lscpu_desc *desc, char *buf, size_t bufsz)
+{
+	int i;
+	float cpu_freq = -1.0;
+	size_t setsize = CPU_ALLOC_SIZE(maxcpus);
+
+	if (desc->present) {
+		for (i = 0; i < desc->ncpuspos; i++) {
+			if (CPU_ISSET_S(real_cpu_num(desc, i), setsize, desc->present)
+			    && desc->minmhz[i]) {
+				float freq = atof(desc->minmhz[i]);
+
+				if (cpu_freq < 0.0 || freq < cpu_freq)
+					cpu_freq = freq;
+			}
+		}
+	}
+        snprintf(buf, bufsz, "%.4f", cpu_freq);
+	return buf;
+}
+
+
+static void
+read_max_mhz(struct lscpu_desc *desc, int idx)
+{
+	int num = real_cpu_num(desc, idx);
+	int mhz;
+
+	if (ul_path_readf_s32(desc->syscpu, &mhz, "cpu%d/cpufreq/cpuinfo_max_freq", num) != 0)
+		return;
+	if (!desc->maxmhz)
+		desc->maxmhz = xcalloc(desc->ncpuspos, sizeof(char *));
+	xasprintf(&desc->maxmhz[idx], "%.4f", (float) mhz / 1000);
+}
+
+static void
+read_min_mhz(struct lscpu_desc *desc, int idx)
+{
+	int num = real_cpu_num(desc, idx);
+	int mhz;
+
+	if (ul_path_readf_s32(desc->syscpu, &mhz, "cpu%d/cpufreq/cpuinfo_min_freq", num) != 0)
+		return;
+	if (!desc->minmhz)
+		desc->minmhz = xcalloc(desc->ncpuspos, sizeof(char *));
+	xasprintf(&desc->minmhz[idx], "%.4f", (float) mhz / 1000);
+}
+
+static int
+cachecmp(const void *a, const void *b)
+{
+	struct cpu_cache *c1 = (struct cpu_cache *) a;
+	struct cpu_cache *c2 = (struct cpu_cache *) b;
+
+	return strcmp(c2->name, c1->name);
+}
+
+static void
+read_cache(struct lscpu_desc *desc, int idx)
+{
+	char buf[256];
+	int i;
+	int num = real_cpu_num(desc, idx);
+
+	if (!desc->ncaches) {
+		while (ul_path_accessf(desc->syscpu, F_OK,
+					"cpu%d/cache/index%d",
+					num, desc->ncaches) == 0)
+			desc->ncaches++;
+
+		if (!desc->ncaches)
+			return;
+		desc->caches = xcalloc(desc->ncaches, sizeof(*desc->caches));
+	}
+	for (i = 0; i < desc->ncaches; i++) {
+		struct cpu_cache *ca = &desc->caches[i];
+		cpu_set_t *map;
+
+		if (ul_path_accessf(desc->syscpu, F_OK,
+					"cpu%d/cache/index%d", num, i) != 0)
+			continue;
+		if (!ca->name) {
+			int type = 0, level;
+
+			/* cache type */
+			if (ul_path_readf_buffer(desc->syscpu, buf, sizeof(buf),
+					"cpu%d/cache/index%d/type", num, i) > 0) {
+				if (!strcmp(buf, "Data"))
+					type = 'd';
+				else if (!strcmp(buf, "Instruction"))
+					type = 'i';
+			}
+
+			/* cache level */
+			ul_path_readf_s32(desc->syscpu, &level,
+					"cpu%d/cache/index%d/level", num, i);
+			if (type)
+				snprintf(buf, sizeof(buf), "L%d%c", level, type);
+			else
+				snprintf(buf, sizeof(buf), "L%d", level);
+
+			ca->name = xstrdup(buf);
+
+			/* cache size */
+			if (ul_path_readf_string(desc->syscpu, &ca->size,
+					"cpu%d/cache/index%d/size", num, i) < 0)
+				ca->size = xstrdup("unknown size");
+		}
+
+		/* information about how CPUs share different caches */
+		ul_path_readf_cpuset(desc->syscpu, &map, maxcpus,
+				  "cpu%d/cache/index%d/shared_cpu_map", num, i);
+
+		if (!ca->sharedmaps)
+			ca->sharedmaps = xcalloc(desc->ncpuspos, sizeof(cpu_set_t *));
+		add_cpuset_to_array(ca->sharedmaps, &ca->nsharedmaps, map);
+	}
+}
+
+static inline int is_node_dirent(struct dirent *d)
+{
+	return
+		d &&
+#ifdef _DIRENT_HAVE_D_TYPE
+		(d->d_type == DT_DIR || d->d_type == DT_UNKNOWN) &&
+#endif
+		strncmp(d->d_name, "node", 4) == 0 &&
+		isdigit_string(d->d_name + 4);
+}
+
+static int
+nodecmp(const void *ap, const void *bp)
+{
+	int *a = (int *) ap, *b = (int *) bp;
+	return *a - *b;
+}
+
+static void
+read_nodes(struct lscpu_desc *desc)
+{
+	int i = 0;
+	DIR *dir;
+	struct dirent *d;
+	struct path_cxt *sysnode;
+
+	desc->nnodes = 0;
+
+	sysnode = ul_new_path(_PATH_SYS_NODE);
+	if (!sysnode)
+		err(EXIT_FAILURE, _("failed to initialize %s handler"), _PATH_SYS_NODE);
+	ul_path_set_prefix(sysnode, desc->prefix);
+
+	dir = ul_path_opendir(sysnode, NULL);
+	if (!dir)
+		goto done;
+
+	while ((d = readdir(dir))) {
+		if (is_node_dirent(d))
+			desc->nnodes++;
+	}
+
+	if (!desc->nnodes) {
+		closedir(dir);
+		goto done;
+	}
+
+	desc->nodemaps = xcalloc(desc->nnodes, sizeof(cpu_set_t *));
+	desc->idx2nodenum = xmalloc(desc->nnodes * sizeof(int));
+
+	rewinddir(dir);
+	while ((d = readdir(dir)) && i < desc->nnodes) {
+		if (is_node_dirent(d))
+			desc->idx2nodenum[i++] = strtol_or_err(((d->d_name) + 4),
+						_("Failed to extract the node number"));
+	}
+	closedir(dir);
+	qsort(desc->idx2nodenum, desc->nnodes, sizeof(int), nodecmp);
+
+	/* information about how nodes share different CPUs */
+	for (i = 0; i < desc->nnodes; i++)
+		ul_path_readf_cpuset(sysnode, &desc->nodemaps[i], maxcpus,
+				"node%d/cpumap", desc->idx2nodenum[i]);
+done:
+	ul_unref_path(sysnode);
+}
+
+static char *
+get_cell_data(struct lscpu_desc *desc, int idx, int col,
+	      struct lscpu_modifier *mod,
+	      char *buf, size_t bufsz)
+{
+	size_t setsize = CPU_ALLOC_SIZE(maxcpus);
+	size_t i;
+	int cpu = real_cpu_num(desc, idx);
+
+	*buf = '\0';
+
+	switch (col) {
+	case COL_CPU:
+		snprintf(buf, bufsz, "%d", cpu);
+		break;
+	case COL_CORE:
+		if (mod->physical) {
+			if (desc->coreids[idx] == -1)
+				snprintf(buf, bufsz, "-");
+			else
+				snprintf(buf, bufsz, "%d", desc->coreids[idx]);
+		} else {
+			if (cpuset_ary_isset(cpu, desc->coremaps,
+					     desc->ncores, setsize, &i) == 0)
+				snprintf(buf, bufsz, "%zu", i);
+		}
+		break;
+	case COL_SOCKET:
+		if (mod->physical) {
+			if (desc->socketids[idx] ==  -1)
+				snprintf(buf, bufsz, "-");
+			else
+				snprintf(buf, bufsz, "%d", desc->socketids[idx]);
+		} else {
+			if (cpuset_ary_isset(cpu, desc->socketmaps,
+					     desc->nsockets, setsize, &i) == 0)
+				snprintf(buf, bufsz, "%zu", i);
+		}
+		break;
+	case COL_NODE:
+		if (cpuset_ary_isset(cpu, desc->nodemaps,
+				     desc->nnodes, setsize, &i) == 0)
+			snprintf(buf, bufsz, "%d", desc->idx2nodenum[i]);
+		break;
+	case COL_DRAWER:
+		if (mod->physical) {
+			if (desc->drawerids[idx] == -1)
+				snprintf(buf, bufsz, "-");
+			else
+				snprintf(buf, bufsz, "%d", desc->drawerids[idx]);
+		} else {
+			if (cpuset_ary_isset(cpu, desc->drawermaps,
+					     desc->ndrawers, setsize, &i) == 0)
+				snprintf(buf, bufsz, "%zu", i);
+		}
+		break;
+	case COL_BOOK:
+		if (mod->physical) {
+			if (desc->bookids[idx] == -1)
+				snprintf(buf, bufsz, "-");
+			else
+				snprintf(buf, bufsz, "%d", desc->bookids[idx]);
+		} else {
+			if (cpuset_ary_isset(cpu, desc->bookmaps,
+					     desc->nbooks, setsize, &i) == 0)
+				snprintf(buf, bufsz, "%zu", i);
+		}
+		break;
+	case COL_CACHE:
+	{
+		char *p = buf;
+		size_t sz = bufsz;
+		int j;
+
+		for (j = desc->ncaches - 1; j >= 0; j--) {
+			struct cpu_cache *ca = &desc->caches[j];
+
+			if (cpuset_ary_isset(cpu, ca->sharedmaps,
+					     ca->nsharedmaps, setsize, &i) == 0) {
+				int x = snprintf(p, sz, "%zu", i);
+				if (x < 0 || (size_t) x >= sz)
+					return NULL;
+				p += x;
+				sz -= x;
+			}
+			if (j != 0) {
+				if (sz < 2)
+					return NULL;
+				*p++ = mod->compat ? ',' : ':';
+				*p = '\0';
+				sz--;
+			}
+		}
+		break;
+	}
+	case COL_POLARIZATION:
+		if (desc->polarization) {
+			int x = desc->polarization[idx];
+
+			snprintf(buf, bufsz, "%s",
+				 mod->mode == OUTPUT_PARSABLE ?
+						polar_modes[x].parsable :
+						polar_modes[x].readable);
+		}
+		break;
+	case COL_ADDRESS:
+		if (desc->addresses)
+			snprintf(buf, bufsz, "%d", desc->addresses[idx]);
+		break;
+	case COL_CONFIGURED:
+		if (!desc->configured)
+			break;
+		if (mod->mode == OUTPUT_PARSABLE)
+			snprintf(buf, bufsz, "%s",
+				 desc->configured[idx] ? _("Y") : _("N"));
+		else
+			snprintf(buf, bufsz, "%s",
+				 desc->configured[idx] ? _("yes") : _("no"));
+		break;
+	case COL_ONLINE:
+		if (!desc->online)
+			break;
+		if (mod->mode == OUTPUT_PARSABLE)
+			snprintf(buf, bufsz, "%s",
+				 is_cpu_online(desc, cpu) ? _("Y") : _("N"));
+		else
+			snprintf(buf, bufsz, "%s",
+				 is_cpu_online(desc, cpu) ? _("yes") : _("no"));
+		break;
+	case COL_MAXMHZ:
+		if (desc->maxmhz && desc->maxmhz[idx])
+			xstrncpy(buf, desc->maxmhz[idx], bufsz);
+		break;
+	case COL_MINMHZ:
+		if (desc->minmhz && desc->minmhz[idx])
+			xstrncpy(buf, desc->minmhz[idx], bufsz);
+		break;
+	}
+	return buf;
+}
+
+static char *
+get_cell_header(struct lscpu_desc *desc, int col,
+		struct lscpu_modifier *mod,
+	        char *buf, size_t bufsz)
+{
+	*buf = '\0';
+
+	if (col == COL_CACHE) {
+		char *p = buf;
+		size_t sz = bufsz;
+		int i;
+
+		for (i = desc->ncaches - 1; i >= 0; i--) {
+			int x = snprintf(p, sz, "%s", desc->caches[i].name);
+			if (x < 0 || (size_t) x >= sz)
+				return NULL;
+			sz -= x;
+			p += x;
+			if (i > 0) {
+				if (sz < 2)
+					return NULL;
+				*p++ = mod->compat ? ',' : ':';
+				*p = '\0';
+				sz--;
+			}
+		}
+		if (desc->ncaches)
+			return buf;
+	}
+	snprintf(buf, bufsz, "%s", coldescs[col].name);
+	return buf;
+}
+
+/*
+ * [-p] backend, we support two parsable formats:
+ *
+ * 1) "compatible" -- this format is compatible with the original lscpu(1)
+ * output and it contains fixed set of the columns. The CACHE columns are at
+ * the end of the line and the CACHE is not printed if the number of the caches
+ * is zero. The CACHE columns are separated by two commas, for example:
+ *
+ *    $ lscpu --parse
+ *    # CPU,Core,Socket,Node,,L1d,L1i,L2
+ *    0,0,0,0,,0,0,0
+ *    1,1,0,0,,1,1,0
+ *
+ * 2) "user defined output" -- this format prints always all columns without
+ * special prefix for CACHE column. If there are not CACHEs then the column is
+ * empty and the header "Cache" is printed rather than a real name of the cache.
+ * The CACHE columns are separated by ':'.
+ *
+ *	$ lscpu --parse=CPU,CORE,SOCKET,NODE,CACHE
+ *	# CPU,Core,Socket,Node,L1d:L1i:L2
+ *	0,0,0,0,0:0:0
+ *	1,1,0,0,1:1:0
+ */
+static void
+print_parsable(struct lscpu_desc *desc, int cols[], int ncols,
+	       struct lscpu_modifier *mod)
+{
+	char buf[BUFSIZ], *data;
+	int i;
+
+	/*
+	 * Header
+	 */
+	printf(_(
+	"# The following is the parsable format, which can be fed to other\n"
+	"# programs. Each different item in every column has an unique ID\n"
+	"# starting from zero.\n"));
+
+	fputs("# ", stdout);
+	for (i = 0; i < ncols; i++) {
+		int col = cols[i];
+
+		if (col == COL_CACHE) {
+			if (mod->compat && !desc->ncaches)
+				continue;
+			if (mod->compat && i != 0)
+				putchar(',');
+		}
+		if (i > 0)
+			putchar(',');
+
+		data = get_cell_header(desc, col, mod, buf, sizeof(buf));
+
+		if (data && * data && col != COL_CACHE &&
+		    !coldescs[col].is_abbr) {
+			/*
+			 * For normal column names use mixed case (e.g. "Socket")
+			 */
+			char *p = data + 1;
+
+			while (p && *p != '\0') {
+				*p = tolower((unsigned int) *p);
+				p++;
+			}
+		}
+		fputs(data && *data ? data : "", stdout);
+	}
+	putchar('\n');
+
+	/*
+	 * Data
+	 */
+	for (i = 0; i < desc->ncpuspos; i++) {
+		int c;
+		int cpu = real_cpu_num(desc, i);
+
+		if (!mod->offline && desc->online && !is_cpu_online(desc, cpu))
+			continue;
+		if (!mod->online && desc->online && is_cpu_online(desc, cpu))
+			continue;
+		if (desc->present && !is_cpu_present(desc, cpu))
+			continue;
+		for (c = 0; c < ncols; c++) {
+			if (mod->compat && cols[c] == COL_CACHE) {
+				if (!desc->ncaches)
+					continue;
+				if (c > 0)
+					putchar(',');
+			}
+			if (c > 0)
+				putchar(',');
+
+			data = get_cell_data(desc, i, cols[c], mod,
+					     buf, sizeof(buf));
+			fputs(data && *data ? data : "", stdout);
+		}
+		putchar('\n');
+	}
+}
+
+/*
+ * [-e] backend
+ */
+static void
+print_readable(struct lscpu_desc *desc, int cols[], int ncols,
+	       struct lscpu_modifier *mod)
+{
+	int i;
+	char buf[BUFSIZ];
+	const char *data;
+	struct libscols_table *table;
+
+	scols_init_debug(0);
+
+	table = scols_new_table();
+	if (!table)
+		 err(EXIT_FAILURE, _("failed to allocate output table"));
+	if (mod->json) {
+		scols_table_enable_json(table, 1);
+		scols_table_set_name(table, "cpus");
+	}
+
+	for (i = 0; i < ncols; i++) {
+		data = get_cell_header(desc, cols[i], mod, buf, sizeof(buf));
+		if (!scols_table_new_column(table, data, 0, 0))
+			err(EXIT_FAILURE, _("failed to allocate output column"));
+	}
+
+	for (i = 0; i < desc->ncpuspos; i++) {
+		int c;
+		struct libscols_line *line;
+		int cpu = real_cpu_num(desc, i);
+
+		if (!mod->offline && desc->online && !is_cpu_online(desc, cpu))
+			continue;
+		if (!mod->online && desc->online && is_cpu_online(desc, cpu))
+			continue;
+		if (desc->present && !is_cpu_present(desc, cpu))
+			continue;
+
+		line = scols_table_new_line(table, NULL);
+		if (!line)
+			err(EXIT_FAILURE, _("failed to allocate output line"));
+
+		for (c = 0; c < ncols; c++) {
+			data = get_cell_data(desc, i, cols[c], mod,
+					     buf, sizeof(buf));
+			if (!data || !*data)
+				data = "-";
+			if (scols_line_set_data(line, c, data))
+				err(EXIT_FAILURE, _("failed to add output data"));
+		}
+	}
+
+	scols_print_table(table);
+	scols_unref_table(table);
+}
+
+
+static void __attribute__ ((__format__(printf, 3, 4)))
+	add_summary_sprint(struct libscols_table *tb,
+			const char *txt,
+			const char *fmt,
+			...)
+{
+	struct libscols_line *ln = scols_table_new_line(tb, NULL);
+	char *data;
+	va_list args;
+
+	if (!ln)
+		err(EXIT_FAILURE, _("failed to allocate output line"));
+
+	/* description column */
+	scols_line_set_data(ln, 0, txt);
+
+	/* data column */
+	va_start(args, fmt);
+	xvasprintf(&data, fmt, args);
+	va_end(args);
+
+	if (data && scols_line_refer_data(ln, 1, data))
+		 err(EXIT_FAILURE, _("failed to add output data"));
+}
+
+#define add_summary_n(tb, txt, num)	add_summary_sprint(tb, txt, "%d", num)
+#define add_summary_s(tb, txt, str)	add_summary_sprint(tb, txt, "%s", str)
+
+static void
+print_cpuset(struct libscols_table *tb,
+	     const char *key, cpu_set_t *set, int hex)
+{
+	size_t setsize = CPU_ALLOC_SIZE(maxcpus);
+	size_t setbuflen = 7 * maxcpus;
+	char setbuf[setbuflen], *p;
+
+	if (hex) {
+		p = cpumask_create(setbuf, setbuflen, set, setsize);
+		add_summary_s(tb, key, p);
+	} else {
+		p = cpulist_create(setbuf, setbuflen, set, setsize);
+		add_summary_s(tb, key, p);
+	}
+}
+
+/*
+ * default output
+ */
+static void
+print_summary(struct lscpu_desc *desc, struct lscpu_modifier *mod)
+{
+	char buf[BUFSIZ];
+	int i = 0;
+	size_t setsize = CPU_ALLOC_SIZE(maxcpus);
+	struct libscols_table *tb;
+
+	scols_init_debug(0);
+
+	tb = scols_new_table();
+	if (!tb)
+		err(EXIT_FAILURE, _("failed to allocate output table"));
+
+	scols_table_enable_noheadings(tb, 1);
+	if (mod->json) {
+		scols_table_enable_json(tb, 1);
+		scols_table_set_name(tb, "lscpu");
+	}
+
+	if (scols_table_new_column(tb, "field", 0, 0) == NULL ||
+	    scols_table_new_column(tb, "data", 0, SCOLS_FL_NOEXTREMES) == NULL)
+		err(EXIT_FAILURE, _("failed to initialize output column"));
+
+	add_summary_s(tb, _("Architecture:"), desc->arch);
+	if (desc->mode) {
+		char *p = buf;
+
+		if (desc->mode & MODE_32BIT) {
+			strcpy(p, "32-bit, ");
+			p += 8;
+		}
+		if (desc->mode & MODE_64BIT) {
+			strcpy(p, "64-bit, ");
+			p += 8;
+		}
+		*(p - 2) = '\0';
+		add_summary_s(tb, _("CPU op-mode(s):"), buf);
+	}
+#if !defined(WORDS_BIGENDIAN)
+	add_summary_s(tb, _("Byte Order:"), "Little Endian");
+#else
+	add_summary_s(tb, _("Byte Order:"), "Big Endian");
+#endif
+
+	if (desc->addrsz)
+		add_summary_s(tb, _("Address sizes:"), desc->addrsz);
+
+	add_summary_n(tb, _("CPU(s):"), desc->ncpus);
+
+	if (desc->online)
+		print_cpuset(tb, mod->hex ? _("On-line CPU(s) mask:") :
+					    _("On-line CPU(s) list:"),
+				desc->online, mod->hex);
+
+	if (desc->online && CPU_COUNT_S(setsize, desc->online) != desc->ncpus) {
+		cpu_set_t *set;
+
+		/* Linux kernel provides cpuset of off-line CPUs that contains
+		 * all configured CPUs (see /sys/devices/system/cpu/offline),
+		 * but want to print real (present in system) off-line CPUs only.
+		 */
+		set = cpuset_alloc(maxcpus, NULL, NULL);
+		if (!set)
+			err(EXIT_FAILURE, _("failed to callocate cpu set"));
+		CPU_ZERO_S(setsize, set);
+		for (i = 0; i < desc->ncpuspos; i++) {
+			int cpu = real_cpu_num(desc, i);
+			if (!is_cpu_online(desc, cpu) && is_cpu_present(desc, cpu))
+				CPU_SET_S(cpu, setsize, set);
+		}
+		print_cpuset(tb, mod->hex ? _("Off-line CPU(s) mask:") :
+					    _("Off-line CPU(s) list:"),
+			     set, mod->hex);
+		cpuset_free(set);
+	}
+
+	if (desc->nsockets) {
+		int threads_per_core, cores_per_socket, sockets_per_book;
+		int books_per_drawer, drawers;
+		FILE *fd;
+
+		threads_per_core = cores_per_socket = sockets_per_book = 0;
+		books_per_drawer = drawers = 0;
+		/* s390 detects its cpu topology via /proc/sysinfo, if present.
+		 * Using simply the cpu topology masks in sysfs will not give
+		 * usable results since everything is virtualized. E.g.
+		 * virtual core 0 may have only 1 cpu, but virtual core 2 may
+		 * five cpus.
+		 * If the cpu topology is not exported (e.g. 2nd level guest)
+		 * fall back to old calculation scheme.
+		 */
+		if ((fd = ul_path_fopen(desc->procfs, "r", "sysinfo"))) {
+			int t0, t1;
+
+			while (fd && fgets(buf, sizeof(buf), fd) != NULL) {
+				if (sscanf(buf, "CPU Topology SW:%d%d%d%d%d%d",
+					   &t0, &t1, &drawers, &books_per_drawer,
+					   &sockets_per_book,
+					   &cores_per_socket) == 6)
+					break;
+			}
+			if (fd)
+				fclose(fd);
+		}
+		if (desc->mtid)
+			threads_per_core = atoi(desc->mtid) + 1;
+		add_summary_n(tb, _("Thread(s) per core:"),
+			threads_per_core ?: desc->nthreads / desc->ncores);
+		add_summary_n(tb, _("Core(s) per socket:"),
+			cores_per_socket ?: desc->ncores / desc->nsockets);
+		if (desc->nbooks) {
+			add_summary_n(tb, _("Socket(s) per book:"),
+				sockets_per_book ?: desc->nsockets / desc->nbooks);
+			if (desc->ndrawers) {
+				add_summary_n(tb, _("Book(s) per drawer:"),
+					books_per_drawer ?: desc->nbooks / desc->ndrawers);
+				add_summary_n(tb, _("Drawer(s):"), drawers ?: desc->ndrawers);
+			} else {
+				add_summary_n(tb, _("Book(s):"), books_per_drawer ?: desc->nbooks);
+			}
+		} else {
+			add_summary_n(tb, _("Socket(s):"), sockets_per_book ?: desc->nsockets);
+		}
+	}
+	if (desc->nnodes)
+		add_summary_n(tb, _("NUMA node(s):"), desc->nnodes);
+	if (desc->vendor)
+		add_summary_s(tb, _("Vendor ID:"), desc->vendor);
+	if (desc->machinetype)
+		add_summary_s(tb, _("Machine type:"), desc->machinetype);
+	if (desc->family)
+		add_summary_s(tb, _("CPU family:"), desc->family);
+	if (desc->model || desc->revision)
+		add_summary_s(tb, _("Model:"), desc->revision ? desc->revision : desc->model);
+	if (desc->modelname || desc->cpu)
+		add_summary_s(tb, _("Model name:"), desc->cpu ? desc->cpu : desc->modelname);
+	if (desc->stepping)
+		add_summary_s(tb, _("Stepping:"), desc->stepping);
+	if (desc->mhz)
+		add_summary_s(tb, _("CPU MHz:"), desc->mhz);
+	if (desc->dynamic_mhz)
+		add_summary_s(tb, _("CPU dynamic MHz:"), desc->dynamic_mhz);
+	if (desc->static_mhz)
+		add_summary_s(tb, _("CPU static MHz:"), desc->static_mhz);
+	if (desc->maxmhz)
+		add_summary_s(tb, _("CPU max MHz:"), cpu_max_mhz(desc, buf, sizeof(buf)));
+	if (desc->minmhz)
+		add_summary_s(tb, _("CPU min MHz:"), cpu_min_mhz(desc, buf, sizeof(buf)));
+	if (desc->bogomips)
+		add_summary_s(tb, _("BogoMIPS:"), desc->bogomips);
+	if (desc->virtflag) {
+		if (!strcmp(desc->virtflag, "svm"))
+			add_summary_s(tb, _("Virtualization:"), "AMD-V");
+		else if (!strcmp(desc->virtflag, "vmx"))
+			add_summary_s(tb, _("Virtualization:"), "VT-x");
+	}
+	if (desc->hypervisor)
+		add_summary_s(tb, _("Hypervisor:"), desc->hypervisor);
+	if (desc->hyper) {
+		add_summary_s(tb, _("Hypervisor vendor:"), hv_vendors[desc->hyper]);
+		add_summary_s(tb, _("Virtualization type:"), _(virt_types[desc->virtype]));
+	}
+	if (desc->dispatching >= 0)
+		add_summary_s(tb, _("Dispatching mode:"), _(disp_modes[desc->dispatching]));
+	if (desc->ncaches) {
+		for (i = desc->ncaches - 1; i >= 0; i--) {
+			snprintf(buf, sizeof(buf),
+					_("%s cache:"), desc->caches[i].name);
+			add_summary_s(tb, buf, desc->caches[i].size);
+		}
+	}
+	if (desc->necaches) {
+		for (i = desc->necaches - 1; i >= 0; i--) {
+			snprintf(buf, sizeof(buf),
+					_("%s cache:"), desc->ecaches[i].name);
+			add_summary_s(tb, buf, desc->ecaches[i].size);
+		}
+	}
+
+	for (i = 0; i < desc->nnodes; i++) {
+		snprintf(buf, sizeof(buf), _("NUMA node%d CPU(s):"), desc->idx2nodenum[i]);
+		print_cpuset(tb, buf, desc->nodemaps[i], mod->hex);
+	}
+
+	if (desc->physsockets) {
+		add_summary_n(tb, _("Physical sockets:"), desc->physsockets);
+		add_summary_n(tb, _("Physical chips:"), desc->physchips);
+		add_summary_n(tb, _("Physical cores/chip:"), desc->physcoresperchip);
+	}
+
+	if (desc->flags)
+		add_summary_s(tb, _("Flags:"), desc->flags);
+
+	scols_print_table(tb);
+	scols_unref_table(tb);
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	size_t i;
+
+	fputs(USAGE_HEADER, out);
+	fprintf(out, _(" %s [options]\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Display information about the CPU architecture.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -a, --all               print both online and offline CPUs (default for -e)\n"), out);
+	fputs(_(" -b, --online            print online CPUs only (default for -p)\n"), out);
+	fputs(_(" -c, --offline           print offline CPUs only\n"), out);
+	fputs(_(" -J, --json              use JSON for default or extended format\n"), out);
+	fputs(_(" -e, --extended[=<list>] print out an extended readable format\n"), out);
+	fputs(_(" -p, --parse[=<list>]    print out a parsable format\n"), out);
+	fputs(_(" -s, --sysroot <dir>     use specified directory as system root\n"), out);
+	fputs(_(" -x, --hex               print hexadecimal masks rather than lists of CPUs\n"), out);
+	fputs(_(" -y, --physical          print physical instead of logical IDs\n"), out);
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(25));
+
+	fputs(USAGE_COLUMNS, out);
+	for (i = 0; i < ARRAY_SIZE(coldescs); i++)
+		fprintf(out, " %13s  %s\n", coldescs[i].name, _(coldescs[i].help));
+
+	printf(USAGE_MAN_TAIL("lscpu(1)"));
+
+	exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char *argv[])
+{
+	struct lscpu_modifier _mod = { .mode = OUTPUT_SUMMARY }, *mod = &_mod;
+	struct lscpu_desc _desc = { .flags = NULL }, *desc = &_desc;
+	int c, i;
+	int columns[ARRAY_SIZE(coldescs)], ncolumns = 0;
+	int cpu_modifier_specified = 0;
+	size_t setsize;
+
+	enum {
+		OPT_OUTPUT_ALL = CHAR_MAX + 1,
+	};
+	static const struct option longopts[] = {
+		{ "all",        no_argument,       NULL, 'a' },
+		{ "online",     no_argument,       NULL, 'b' },
+		{ "offline",    no_argument,       NULL, 'c' },
+		{ "help",	no_argument,       NULL, 'h' },
+		{ "extended",	optional_argument, NULL, 'e' },
+		{ "json",       no_argument,       NULL, 'J' },
+		{ "parse",	optional_argument, NULL, 'p' },
+		{ "sysroot",	required_argument, NULL, 's' },
+		{ "physical",	no_argument,	   NULL, 'y' },
+		{ "hex",	no_argument,	   NULL, 'x' },
+		{ "version",	no_argument,	   NULL, 'V' },
+		{ "output-all",	no_argument,	   NULL, OPT_OUTPUT_ALL },
+		{ NULL,		0, NULL, 0 }
+	};
+
+	static const ul_excl_t excl[] = {	/* rows and cols in ASCII order */
+		{ 'a','b','c' },
+		{ 'e','p' },
+		{ 0 }
+	};
+	int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	while ((c = getopt_long(argc, argv, "abce::hJp::s:xyV", longopts, NULL)) != -1) {
+
+		err_exclusive_options(c, longopts, excl, excl_st);
+
+		switch (c) {
+		case 'a':
+			mod->online = mod->offline = 1;
+			cpu_modifier_specified = 1;
+			break;
+		case 'b':
+			mod->online = 1;
+			cpu_modifier_specified = 1;
+			break;
+		case 'c':
+			mod->offline = 1;
+			cpu_modifier_specified = 1;
+			break;
+		case 'h':
+			usage();
+		case 'J':
+			mod->json = 1;
+			break;
+		case 'p':
+		case 'e':
+			if (optarg) {
+				if (*optarg == '=')
+					optarg++;
+				ncolumns = string_to_idarray(optarg,
+						columns, ARRAY_SIZE(columns),
+						column_name_to_id);
+				if (ncolumns < 0)
+					return EXIT_FAILURE;
+			}
+			mod->mode = c == 'p' ? OUTPUT_PARSABLE : OUTPUT_READABLE;
+			break;
+		case 's':
+			desc->prefix = optarg;
+			mod->system = SYSTEM_SNAPSHOT;
+			break;
+		case 'x':
+			mod->hex = 1;
+			break;
+		case 'y':
+			mod->physical = 1;
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		case OPT_OUTPUT_ALL:
+		{
+			size_t sz;
+			for (sz = 0; sz < ARRAY_SIZE(coldescs); sz++)
+				columns[sz] = 1;
+			break;
+		}
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if (cpu_modifier_specified && mod->mode == OUTPUT_SUMMARY) {
+		fprintf(stderr,
+			_("%s: options --all, --online and --offline may only "
+			  "be used with options --extended or --parse.\n"),
+			program_invocation_short_name);
+		return EXIT_FAILURE;
+	}
+
+	if (argc != optind) {
+		warnx(_("bad usage"));
+		errtryhelp(EXIT_FAILURE);
+	}
+
+	/* set default cpu display mode if none was specified */
+	if (!mod->online && !mod->offline) {
+		mod->online = 1;
+		mod->offline = mod->mode == OUTPUT_READABLE ? 1 : 0;
+	}
+
+	ul_path_init_debug();
+
+	/* /sys/devices/system/cpu */
+	desc->syscpu = ul_new_path(_PATH_SYS_CPU);
+	if (!desc->syscpu)
+		err(EXIT_FAILURE, _("failed to initialize CPUs sysfs handler"));
+	if (desc->prefix)
+		ul_path_set_prefix(desc->syscpu, desc->prefix);
+
+	/* /proc */
+	desc->procfs = ul_new_path("/proc");
+	if (!desc->procfs)
+		err(EXIT_FAILURE, _("failed to initialize procfs handler"));
+	if (desc->prefix)
+		ul_path_set_prefix(desc->procfs, desc->prefix);
+
+	read_basicinfo(desc, mod);
+
+	setsize = CPU_ALLOC_SIZE(maxcpus);
+
+	for (i = 0; i < desc->ncpuspos; i++) {
+		/* only consider present CPUs */
+		if (desc->present &&
+		    !CPU_ISSET_S(real_cpu_num(desc, i), setsize, desc->present))
+			continue;
+		read_topology(desc, i);
+		read_cache(desc, i);
+		read_polarization(desc, i);
+		read_address(desc, i);
+		read_configured(desc, i);
+		read_max_mhz(desc, i);
+		read_min_mhz(desc, i);
+	}
+
+	if (desc->caches)
+		qsort(desc->caches, desc->ncaches,
+				sizeof(struct cpu_cache), cachecmp);
+
+	if (desc->ecaches)
+		qsort(desc->ecaches, desc->necaches,
+				sizeof(struct cpu_cache), cachecmp);
+
+	read_nodes(desc);
+	read_hypervisor(desc, mod);
+	arm_cpu_decode(desc);
+
+	switch(mod->mode) {
+	case OUTPUT_SUMMARY:
+		print_summary(desc, mod);
+		break;
+	case OUTPUT_PARSABLE:
+		if (!ncolumns) {
+			columns[ncolumns++] = COL_CPU;
+			columns[ncolumns++] = COL_CORE;
+			columns[ncolumns++] = COL_SOCKET;
+			columns[ncolumns++] = COL_NODE;
+			columns[ncolumns++] = COL_CACHE;
+			mod->compat = 1;
+		}
+		print_parsable(desc, columns, ncolumns, mod);
+		break;
+	case OUTPUT_READABLE:
+		if (!ncolumns) {
+			/* No list was given. Just print whatever is there. */
+			columns[ncolumns++] = COL_CPU;
+			if (desc->nodemaps)
+				columns[ncolumns++] = COL_NODE;
+			if (desc->drawermaps)
+				columns[ncolumns++] = COL_DRAWER;
+			if (desc->bookmaps)
+				columns[ncolumns++] = COL_BOOK;
+			if (desc->socketmaps)
+				columns[ncolumns++] = COL_SOCKET;
+			if (desc->coremaps)
+				columns[ncolumns++] = COL_CORE;
+			if (desc->caches)
+				columns[ncolumns++] = COL_CACHE;
+			if (desc->online)
+				columns[ncolumns++] = COL_ONLINE;
+			if (desc->configured)
+				columns[ncolumns++] = COL_CONFIGURED;
+			if (desc->polarization)
+				columns[ncolumns++] = COL_POLARIZATION;
+			if (desc->addresses)
+				columns[ncolumns++] = COL_ADDRESS;
+			if (desc->maxmhz)
+				columns[ncolumns++] = COL_MAXMHZ;
+			if (desc->minmhz)
+				columns[ncolumns++] = COL_MINMHZ;
+		}
+		print_readable(desc, columns, ncolumns, mod);
+		break;
+	}
+
+	ul_unref_path(desc->syscpu);
+	ul_unref_path(desc->procfs);
+	return EXIT_SUCCESS;
+}
diff --git a/sys-utils/lscpu.h b/sys-utils/lscpu.h
new file mode 100644
index 0000000..24bc11e
--- /dev/null
+++ b/sys-utils/lscpu.h
@@ -0,0 +1,194 @@
+#ifndef LSCPU_H
+#define LSCPU_H
+
+#include "c.h"
+#include "nls.h"
+#include "cpuset.h"
+#include "xalloc.h"
+#include "strutils.h"
+#include "bitops.h"
+#include "path.h"
+#include "pathnames.h"
+#include "all-io.h"
+
+/* virtualization types */
+enum {
+	VIRT_NONE	= 0,
+	VIRT_PARA,
+	VIRT_FULL,
+	VIRT_CONT
+};
+
+/* hypervisor vendors */
+enum {
+	HYPER_NONE	= 0,
+	HYPER_XEN,
+	HYPER_KVM,
+	HYPER_MSHV,
+	HYPER_VMWARE,
+	HYPER_IBM,		/* sys-z powervm */
+	HYPER_VSERVER,
+	HYPER_UML,
+	HYPER_INNOTEK,		/* VBOX */
+	HYPER_HITACHI,
+	HYPER_PARALLELS,	/* OpenVZ/VIrtuozzo */
+	HYPER_VBOX,
+	HYPER_OS400,
+	HYPER_PHYP,
+	HYPER_SPAR,
+	HYPER_WSL,
+};
+
+/* CPU modes */
+enum {
+	MODE_32BIT	= (1 << 1),
+	MODE_64BIT	= (1 << 2)
+};
+
+/* cache(s) description */
+struct cpu_cache {
+	char		*name;
+	char		*size;
+
+	int		nsharedmaps;
+	cpu_set_t	**sharedmaps;
+};
+
+/* dispatching modes */
+enum {
+	DISP_HORIZONTAL = 0,
+	DISP_VERTICAL	= 1
+};
+
+/* cpu polarization */
+enum {
+	POLAR_UNKNOWN	= 0,
+	POLAR_VLOW,
+	POLAR_VMEDIUM,
+	POLAR_VHIGH,
+	POLAR_HORIZONTAL
+};
+
+struct polarization_modes {
+	char *parsable;
+	char *readable;
+};
+
+
+/* global description */
+struct lscpu_desc {
+	const char *prefix;	 /* path to /sys and /proc snapshot or NULL */
+
+	struct path_cxt	*syscpu; /* _PATH_SYS_CPU path handler */
+	struct path_cxt *procfs; /* /proc path handler */
+
+	char	*arch;
+	char	*vendor;
+	char	*machinetype;	/* s390 */
+	char	*family;
+	char	*model;
+	char	*modelname;
+	char	*revision;  /* alternative for model (ppc) */
+	char	*cpu;       /* alternative for modelname (ppc, sparc) */
+	char	*virtflag;	/* virtualization flag (vmx, svm) */
+	char	*hypervisor;	/* hypervisor software */
+	int	hyper;		/* hypervisor vendor ID */
+	int	virtype;	/* VIRT_PARA|FULL|NONE ? */
+	char	*mhz;
+	char	*dynamic_mhz;	/* dynamic mega hertz (s390) */
+	char	*static_mhz;	/* static mega hertz (s390) */
+	char	**maxmhz;	/* maximum mega hertz */
+	char	**minmhz;	/* minimum mega hertz */
+	char	*stepping;
+	char    *bogomips;
+	char	*flags;
+	char	*mtid;		/* maximum thread id (s390) */
+	char	*addrsz;	/* address sizes */
+	int	dispatching;	/* none, horizontal or vertical */
+	int	mode;		/* rm, lm or/and tm */
+
+	int		ncpuspos;	/* maximal possible CPUs */
+	int		ncpus;		/* number of present CPUs */
+	cpu_set_t	*present;	/* mask with present CPUs */
+	cpu_set_t	*online;	/* mask with online CPUs */
+
+	int		nthreads;	/* number of online threads */
+
+	int		ncaches;
+	struct cpu_cache *caches;
+
+	int		necaches;	/* extra caches (s390) */
+	struct cpu_cache *ecaches;
+
+	/*
+	 * All maps are sequentially indexed (0..ncpuspos), the array index
+	 * does not have match with cpuX number as presented by kernel. You
+	 * have to use real_cpu_num() to get the real cpuX number.
+	 *
+	 * For example, the possible system CPUs are: 1,3,5, it means that
+	 * ncpuspos=3, so all arrays are in range 0..3.
+	 */
+	int		*idx2cpunum;	/* mapping index to CPU num */
+
+	int		nnodes;		/* number of NUMA modes */
+	int		*idx2nodenum;	/* Support for discontinuous nodes */
+	cpu_set_t	**nodemaps;	/* array with NUMA nodes */
+
+	/* drawers -- based on drawer_siblings (internal kernel map of cpuX's
+	 * hardware threads within the same drawer */
+	int		ndrawers;	/* number of all online drawers */
+	cpu_set_t	**drawermaps;	/* unique drawer_siblings */
+	int		*drawerids;	/* physical drawer ids */
+
+	/* books -- based on book_siblings (internal kernel map of cpuX's
+	 * hardware threads within the same book */
+	int		nbooks;		/* number of all online books */
+	cpu_set_t	**bookmaps;	/* unique book_siblings */
+	int		*bookids;	/* physical book ids */
+
+	/* sockets -- based on core_siblings (internal kernel map of cpuX's
+	 * hardware threads within the same physical_package_id (socket)) */
+	int		nsockets;	/* number of all online sockets */
+	cpu_set_t	**socketmaps;	/* unique core_siblings */
+	int		*socketids;	/* physical socket ids */
+
+	/* cores -- based on thread_siblings (internal kernel map of cpuX's
+	 * hardware threads within the same core as cpuX) */
+	int		ncores;		/* number of all online cores */
+	cpu_set_t	**coremaps;	/* unique thread_siblings */
+	int		*coreids;	/* physical core ids */
+
+	int		*polarization;	/* cpu polarization */
+	int		*addresses;	/* physical cpu addresses */
+	int		*configured;	/* cpu configured */
+	int		physsockets;	/* Physical sockets (modules) */
+	int		physchips;	/* Physical chips */
+	int		physcoresperchip;	/* Physical cores per chip */
+};
+
+enum {
+	OUTPUT_SUMMARY	= 0,	/* default */
+	OUTPUT_PARSABLE,	/* -p */
+	OUTPUT_READABLE,	/* -e */
+};
+
+enum {
+	SYSTEM_LIVE = 0,	/* analyzing a live system */
+	SYSTEM_SNAPSHOT,	/* analyzing a snapshot of a different system */
+};
+
+struct lscpu_modifier {
+	int		mode;		/* OUTPUT_* */
+	int		system;		/* SYSTEM_* */
+	unsigned int	hex:1,		/* print CPU masks rather than CPU lists */
+			compat:1,	/* use backwardly compatible format */
+			online:1,	/* print online CPUs */
+			offline:1,	/* print offline CPUs */
+			json:1,		/* JSON output format */
+			physical:1;	/* use physical numbers */
+};
+
+extern int read_hypervisor_dmi(void);
+extern void arm_cpu_decode(struct lscpu_desc *desc);
+
+#endif /* LSCPU_H */
diff --git a/sys-utils/lsipc.1 b/sys-utils/lsipc.1
new file mode 100644
index 0000000..9bb1dce
--- /dev/null
+++ b/sys-utils/lsipc.1
@@ -0,0 +1,139 @@
+.\" Copyright 2015 Ondrej Oprala(ooprala@redhat.com)
+.\" May be distributed under the GNU General Public License
+.TH LSIPC "1" "November 2015" "util-linux" "User Commands"
+.SH NAME
+lsipc \- show information on IPC facilities currently employed in the system
+.SH SYNOPSIS
+.B lsipc
+[options]
+.SH DESCRIPTION
+.B lsipc
+shows information on the inter-process communication facilities
+for which the calling process has read access.
+.SH OPTIONS
+.TP
+\fB\-i\fR, \fB\-\-id\fR \fIid\fR
+Show full details on just the one resource element identified by
+.IR id .
+This option needs to be combined with one of the three resource options:
+.BR \-m ,
+.BR \-q " or"
+.BR \-s .
+It is possible to override the default output format for this option with the
+\fB\-\-list\fR, \fB\-\-raw\fR, \fB\-\-json\fR or \fB\-\-export\fR option.
+.TP
+\fB\-g\fR, \fB\-\-global\fR
+Show system-wide usage and limits of IPC resources.
+This option may be combined with one of the three resource options:
+.BR \-m ,
+.BR \-q " or"
+.BR \-s .
+The default is to show information about all resources.
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+Display help text and exit.
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+Display version information and exit.
+.SS "Resource options"
+.TP
+\fB\-m\fR, \fB\-\-shmems\fR
+Write information about active shared memory segments.
+.TP
+\fB\-q\fR, \fB\-\-queues\fR
+Write information about active message queues.
+.TP
+\fB\-s\fR, \fB\-\-semaphores\fR
+Write information about active semaphore sets.
+.SS "Output formatting"
+.TP
+\fB\-c\fR, \fB\-\-creator\fR
+Show creator and owner.
+.TP
+\fB\-e\fR, \fB\-\-export\fR
+Output data in the format of NAME=VALUE.
+.TP
+\fB\-J\fR, \fB\-\-json\fR
+Use the JSON output format.
+.TP
+\fB\-l\fR, \fB\-\-list\fR
+Use the list output format.  This is the default, except when \fB\-\-id\fR
+is used.
+.TP
+\fB\-n\fR, \fB\-\-newline\fR
+Display each piece of information on a separate line.
+.TP
+\fB\-\-noheadings\fR
+Do not print a header line.
+.TP
+\fB\-\-notruncate\fR
+Don't truncate output.
+.TP
+\fB\-o\fR, \fB\-\-output \fIlist\fP
+Specify which output columns to print.  Use
+.B \-\-help
+to get a list of all supported columns.
+.TP
+\fB\-b\fR, \fB\-\-bytes\fR
+Print size in bytes rather than in human readable format.
+.TP
+\fB\-r\fR, \fB\-\-raw\fR
+Raw output (no columnation).
+.TP
+\fB\-t\fR, \fB\-\-time\fR
+Write time information.  The time of the last control operation that changed
+the access permissions for all facilities, the time of the last
+.BR msgsnd (2)
+and
+.BR msgrcv (2)
+operations on message queues, the time of the last
+.BR shmat (2)
+and
+.BR shmdt (2)
+operations on shared memory, and the time of the last
+.BR semop (2)
+operation on semaphores.
+.TP
+\fB\-\-time\-format\fR \fItype\fP
+Display dates in short, full or iso format.  The default is short, this time
+format is designed to be space efficient and human readable.
+.TP
+\fB\-P\fR, \fB\-\-numeric\-perms\fR
+Print numeric permissions in PERMS column.
+
+.SH EXIT STATUS
+.TP
+0
+if OK,
+.TP
+1
+if incorrect arguments specified,
+.TP
+2
+if a serious error occurs.
+.SH SEE ALSO
+.BR ipcmk (1),
+.BR ipcrm (1),
+.BR msgrcv (2),
+.BR msgsnd (2),
+.BR semget (2),
+.BR semop (2),
+.BR shmat (2),
+.BR shmdt (2),
+.BR shmget (2)
+.SH HISTORY
+The \fBlsipc\fP utility is inspired by the \fBipcs\fP utility.
+.SH AUTHORS
+.MT ooprala@redhat.com
+Ondrej Oprala
+.ME
+.br
+.MT kzak@redhat.com
+Karel Zak
+.ME
+
+.SH AVAILABILITY
+The lsipc command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/lsipc.c b/sys-utils/lsipc.c
new file mode 100644
index 0000000..e8ada57
--- /dev/null
+++ b/sys-utils/lsipc.c
@@ -0,0 +1,1338 @@
+/*
+ * lsipc - List information about IPC instances employed in the system
+ *
+ * Copyright (C) 2015 Ondrej Oprala <ooprala@redhat.com>
+ * Copyright (C) 2015 Karel Zak <ooprala@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ *
+ * lsipc is inspired by the ipcs utility. The aim is to create
+ * a utility unencumbered by a standard to provide more flexible
+ * means of controlling the output.
+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include <libsmartcols.h>
+
+#include "c.h"
+#include "nls.h"
+#include "closestream.h"
+#include "strutils.h"
+#include "optutils.h"
+#include "xalloc.h"
+#include "procutils.h"
+#include "ipcutils.h"
+#include "timeutils.h"
+
+/*
+ * time modes
+ * */
+enum {
+	TIME_INVALID = 0,
+	TIME_SHORT,
+	TIME_FULL,
+	TIME_ISO
+};
+
+/*
+ * IDs
+ */
+enum {
+	/* generic */
+	COLDESC_IDX_GEN_FIRST = 0,
+		COL_KEY = COLDESC_IDX_GEN_FIRST,
+		COL_ID,
+		COL_OWNER,
+		COL_PERMS,
+		COL_CUID,
+		COL_CUSER,
+		COL_CGID,
+		COL_CGROUP,
+		COL_UID,
+		COL_USER,
+		COL_GID,
+		COL_GROUP,
+		COL_CTIME,
+	COLDESC_IDX_GEN_LAST = COL_CTIME,
+
+	/* msgq-specific */
+	COLDESC_IDX_MSG_FIRST,
+		COL_USEDBYTES = COLDESC_IDX_MSG_FIRST,
+		COL_MSGS,
+		COL_SEND,
+		COL_RECV,
+		COL_LSPID,
+		COL_LRPID,
+	COLDESC_IDX_MSG_LAST = COL_LRPID,
+
+	/* shm-specific */
+	COLDESC_IDX_SHM_FIRST,
+		COL_SIZE = COLDESC_IDX_SHM_FIRST,
+		COL_NATTCH,
+		COL_STATUS,
+		COL_ATTACH,
+		COL_DETACH,
+		COL_COMMAND,
+		COL_CPID,
+		COL_LPID,
+	COLDESC_IDX_SHM_LAST = COL_LPID,
+
+	/* sem-specific */
+	COLDESC_IDX_SEM_FIRST,
+		COL_NSEMS = COLDESC_IDX_SEM_FIRST,
+		COL_OTIME,
+	COLDESC_IDX_SEM_LAST = COL_OTIME,
+
+	/* summary (--global) */
+	COLDESC_IDX_SUM_FIRST,
+		COL_RESOURCE = COLDESC_IDX_SUM_FIRST,
+		COL_DESC,
+		COL_LIMIT,
+		COL_USED,
+		COL_USEPERC,
+	COLDESC_IDX_SUM_LAST = COL_USEPERC
+};
+
+/* not all columns apply to all options, so we specify a legal range for each */
+static size_t LOWER, UPPER;
+
+/*
+ * output modes
+ */
+enum {
+	OUT_EXPORT = 1,
+	OUT_NEWLINE,
+	OUT_RAW,
+	OUT_JSON,
+	OUT_PRETTY,
+	OUT_LIST
+};
+
+struct lsipc_control {
+	int outmode;
+	unsigned int noheadings : 1,		/* don't print header line */
+		     notrunc : 1,		/* don't truncate columns */
+		     bytes : 1,			/* SIZE in bytes */
+		     numperms : 1,		/* numeric permissions */
+		     time_mode : 2;
+};
+
+struct lsipc_coldesc {
+	const char *name;
+	const char *help;
+	const char *pretty_name;
+
+	double whint;	/* width hint */
+	long flag;
+};
+
+static const struct lsipc_coldesc coldescs[] =
+{
+	/* common */
+	[COL_KEY]	= { "KEY",	N_("Resource key"), N_("Key"), 1},
+	[COL_ID]	= { "ID",	N_("Resource ID"), N_("ID"), 1},
+	[COL_OWNER]	= { "OWNER",	N_("Owner's username or UID"), N_("Owner"), 1, SCOLS_FL_RIGHT},
+	[COL_PERMS]	= { "PERMS",	N_("Permissions"), N_("Permissions"), 1, SCOLS_FL_RIGHT},
+	[COL_CUID]	= { "CUID",	N_("Creator UID"), N_("Creator UID"), 1, SCOLS_FL_RIGHT},
+	[COL_CUSER]     = { "CUSER",    N_("Creator user"), N_("Creator user"), 1 },
+	[COL_CGID]	= { "CGID",	N_("Creator GID"), N_("Creator GID"), 1, SCOLS_FL_RIGHT},
+	[COL_CGROUP]    = { "CGROUP",   N_("Creator group"), N_("Creator group"), 1 },
+	[COL_UID]	= { "UID",	N_("User ID"), N_("UID"), 1, SCOLS_FL_RIGHT},
+	[COL_USER]	= { "USER",	N_("User name"), N_("User name"), 1},
+	[COL_GID]	= { "GID",	N_("Group ID"), N_("GID"), 1, SCOLS_FL_RIGHT},
+	[COL_GROUP]	= { "GROUP",	N_("Group name"), N_("Group name"), 1},
+	[COL_CTIME]	= { "CTIME",	N_("Time of the last change"), N_("Last change"), 1, SCOLS_FL_RIGHT},
+
+	/* msgq-specific */
+	[COL_USEDBYTES]	= { "USEDBYTES",N_("Bytes used"), N_("Bytes used"), 1, SCOLS_FL_RIGHT},
+	[COL_MSGS]	= { "MSGS",	N_("Number of messages"), N_("Messages"), 1},
+	[COL_SEND]	= { "SEND",	N_("Time of last msg sent"), N_("Msg sent"), 1, SCOLS_FL_RIGHT},
+	[COL_RECV]	= { "RECV",	N_("Time of last msg received"), N_("Msg received"), 1, SCOLS_FL_RIGHT},
+	[COL_LSPID]	= { "LSPID",	N_("PID of the last msg sender"), N_("Msg sender"), 1, SCOLS_FL_RIGHT},
+	[COL_LRPID]	= { "LRPID",	N_("PID of the last msg receiver"), N_("Msg receiver"), 1, SCOLS_FL_RIGHT},
+
+	/* shm-specific */
+	[COL_SIZE]	= { "SIZE",	N_("Segment size"), N_("Segment size"), 1, SCOLS_FL_RIGHT},
+	[COL_NATTCH]	= { "NATTCH",	N_("Number of attached processes"), N_("Attached processes"), 1, SCOLS_FL_RIGHT},
+	[COL_STATUS]	= { "STATUS",	N_("Status"), N_("Status"), 1, SCOLS_FL_NOEXTREMES},
+	[COL_ATTACH]	= { "ATTACH",	N_("Attach time"), N_("Attach time"), 1, SCOLS_FL_RIGHT},
+	[COL_DETACH]	= { "DETACH",	N_("Detach time"), N_("Detach time"), 1, SCOLS_FL_RIGHT},
+	[COL_COMMAND]	= { "COMMAND",  N_("Creator command line"), N_("Creator command"), 0, SCOLS_FL_TRUNC},
+	[COL_CPID]	= { "CPID",	N_("PID of the creator"), N_("Creator PID"), 1, SCOLS_FL_RIGHT},
+	[COL_LPID]	= { "LPID",	N_("PID of last user"), N_("Last user PID"), 1, SCOLS_FL_RIGHT},
+
+	/* sem-specific */
+	[COL_NSEMS]	= { "NSEMS",	N_("Number of semaphores"), N_("Semaphores"), 1, SCOLS_FL_RIGHT},
+	[COL_OTIME]	= { "OTIME",	N_("Time of the last operation"), N_("Last operation"), 1, SCOLS_FL_RIGHT},
+
+	/* cols for summarized information */
+	[COL_RESOURCE]  = { "RESOURCE", N_("Resource name"), N_("Resource"), 1 },
+	[COL_DESC]      = { "DESCRIPTION",N_("Resource description"), N_("Description"), 1 },
+	[COL_USED]      = { "USED",     N_("Currently used"), N_("Used"), 1, SCOLS_FL_RIGHT },
+	[COL_USEPERC]	= { "USE%",     N_("Currently use percentage"), N_("Use"), 1, SCOLS_FL_RIGHT },
+	[COL_LIMIT]     = { "LIMIT",    N_("System-wide limit"), N_("Limit"), 1, SCOLS_FL_RIGHT },
+};
+
+
+/* columns[] array specifies all currently wanted output column. The columns
+ * are defined by coldescs[] array and you can specify (on command line) each
+ * column twice. That's enough, dynamically allocated array of the columns is
+ * unnecessary overkill and over-engineering in this case */
+static int columns[ARRAY_SIZE(coldescs) * 2];
+static size_t ncolumns;
+
+static inline size_t err_columns_index(size_t arysz, size_t idx)
+{
+	if (idx >= arysz)
+		errx(EXIT_FAILURE, _("too many columns specified, "
+				     "the limit is %zu columns"),
+				arysz - 1);
+	return idx;
+}
+
+#define add_column(ary, n, id)	\
+		((ary)[ err_columns_index(ARRAY_SIZE(ary), (n)) ] = (id))
+
+static int column_name_to_id(const char *name, size_t namesz)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(coldescs); i++) {
+		const char *cn = coldescs[i].name;
+
+		if (!strncasecmp(name, cn, namesz) && !*(cn + namesz)) {
+			if (i > COL_CTIME) {
+				if (i >= LOWER && i <= UPPER)
+					return i;
+				else {
+					warnx(_("column %s does not apply to the specified IPC"), name);
+					return -1;
+				}
+			} else
+				return i;
+		}
+	}
+	warnx(_("unknown column: %s"), name);
+	return -1;
+}
+
+static int get_column_id(int num)
+{
+	assert(num >= 0);
+	assert((size_t) num < ncolumns);
+	assert((size_t) columns[num] < ARRAY_SIZE(coldescs));
+	return columns[num];
+}
+
+static const struct lsipc_coldesc *get_column_desc(int num)
+{
+	return &coldescs[ get_column_id(num) ];
+}
+
+static char *get_username(struct passwd **pw, uid_t id)
+{
+	if (!*pw || (*pw)->pw_uid != id)
+		*pw = getpwuid(id);
+
+	return *pw ? xstrdup((*pw)->pw_name) : NULL;
+}
+
+static char *get_groupname(struct group **gr, gid_t id)
+{
+	if (!*gr || (*gr)->gr_gid != id)
+		*gr = getgrgid(id);
+
+	return *gr ? xstrdup((*gr)->gr_name) : NULL;
+}
+
+static int parse_time_mode(const char *s)
+{
+	struct lsipc_timefmt {
+		const char *name;
+		const int val;
+	};
+	static const struct lsipc_timefmt timefmts[] = {
+		{"iso", TIME_ISO},
+		{"full", TIME_FULL},
+		{"short", TIME_SHORT},
+	};
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(timefmts); i++) {
+		if (strcmp(timefmts[i].name, s) == 0)
+			return timefmts[i].val;
+	}
+	errx(EXIT_FAILURE, _("unknown time format: %s"), s);
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	size_t i;
+
+	fputs(USAGE_HEADER, out);
+	fprintf(out, _(" %s [options]\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Show information on IPC facilities.\n"), out);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Resource options:\n"), out);
+	fputs(_(" -m, --shmems      shared memory segments\n"), out);
+	fputs(_(" -q, --queues      message queues\n"), out);
+	fputs(_(" -s, --semaphores  semaphores\n"), out);
+	fputs(_(" -g, --global      info about system-wide usage (may be used with -m, -q and -s)\n"), out);
+	fputs(_(" -i, --id <id>     print details on resource identified by <id>\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_("     --noheadings         don't print headings\n"), out);
+	fputs(_("     --notruncate         don't truncate output\n"), out);
+	fputs(_("     --time-format=<type> display dates in short, full or iso format\n"), out);
+	fputs(_(" -b, --bytes              print SIZE in bytes rather than in human readable format\n"), out);
+	fputs(_(" -c, --creator            show creator and owner\n"), out);
+	fputs(_(" -e, --export             display in an export-able output format\n"), out);
+	fputs(_(" -J, --json               use the JSON output format\n"), out);
+	fputs(_(" -n, --newline            display each piece of information on a new line\n"), out);
+	fputs(_(" -l, --list               force list output format (for example with --id)\n"), out);
+	fputs(_(" -o, --output[=<list>]    define the columns to output\n"), out);
+	fputs(_(" -P, --numeric-perms      print numeric permissions (PERMS column)\n"), out);
+	fputs(_(" -r, --raw                display in raw mode\n"), out);
+	fputs(_(" -t, --time               show attach, detach and change times\n"), out);
+
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(26));
+
+	fprintf(out, _("\nGeneric columns:\n"));
+	for (i = COLDESC_IDX_GEN_FIRST; i <= COLDESC_IDX_GEN_LAST; i++)
+		fprintf(out, " %14s  %s\n", coldescs[i].name, _(coldescs[i].help));
+
+	fprintf(out, _("\nShared-memory columns (--shmems):\n"));
+	for (i = COLDESC_IDX_SHM_FIRST; i <= COLDESC_IDX_SHM_LAST; i++)
+		fprintf(out, " %14s  %s\n", coldescs[i].name, _(coldescs[i].help));
+
+	fprintf(out, _("\nMessage-queue columns (--queues):\n"));
+	for (i = COLDESC_IDX_MSG_FIRST; i <= COLDESC_IDX_MSG_LAST; i++)
+		fprintf(out, " %14s  %s\n", coldescs[i].name, _(coldescs[i].help));
+
+	fprintf(out, _("\nSemaphore columns (--semaphores):\n"));
+	for (i = COLDESC_IDX_SEM_FIRST; i <= COLDESC_IDX_SEM_LAST; i++)
+		fprintf(out, " %14s  %s\n", coldescs[i].name, _(coldescs[i].help));
+
+	fprintf(out, _("\nSummary columns (--global):\n"));
+	for (i = COLDESC_IDX_SUM_FIRST; i <= COLDESC_IDX_SUM_LAST; i++)
+		fprintf(out, " %14s  %s\n", coldescs[i].name, _(coldescs[i].help));
+
+	printf(USAGE_MAN_TAIL("lsipc(1)"));
+	exit(EXIT_SUCCESS);
+}
+
+static struct libscols_table *new_table(struct lsipc_control *ctl)
+{
+	struct libscols_table *table = scols_new_table();
+
+	if (!table)
+		err(EXIT_FAILURE, _("failed to allocate output table"));
+
+	if (ctl->noheadings)
+		scols_table_enable_noheadings(table, 1);
+
+	switch(ctl->outmode) {
+	case OUT_NEWLINE:
+		scols_table_set_column_separator(table, "\n");
+		/* fallthrough */
+	case OUT_EXPORT:
+		scols_table_enable_export(table, 1);
+		break;
+	case OUT_RAW:
+		scols_table_enable_raw(table, 1);
+		break;
+	case OUT_PRETTY:
+		scols_table_enable_noheadings(table, 1);
+		break;
+	case OUT_JSON:
+		scols_table_enable_json(table, 1);
+		break;
+	default:
+		break;
+	}
+	return table;
+}
+
+static struct libscols_table *setup_table(struct lsipc_control *ctl)
+{
+	struct libscols_table *table = new_table(ctl);
+	size_t n;
+
+	for (n = 0; n < ncolumns; n++) {
+		const struct lsipc_coldesc *desc = get_column_desc(n);
+		int flags = desc->flag;
+
+		if (ctl->notrunc)
+			flags &= ~SCOLS_FL_TRUNC;
+		if (!scols_table_new_column(table, desc->name, desc->whint, flags))
+			goto fail;
+	}
+	return table;
+fail:
+	scols_unref_table(table);
+	return NULL;
+}
+
+static int print_pretty(struct libscols_table *table)
+{
+	struct libscols_iter *itr = scols_new_iter(SCOLS_ITER_FORWARD);
+	struct libscols_column *col;
+	struct libscols_cell *data;
+	struct libscols_line *ln;
+	const char *hstr, *dstr;
+	int n = 0;
+
+	ln = scols_table_get_line(table, 0);
+	while (!scols_table_next_column(table, itr, &col)) {
+
+		data = scols_line_get_cell(ln, n);
+
+		hstr = N_(get_column_desc(n)->pretty_name);
+		dstr = scols_cell_get_data(data);
+
+		if (dstr)
+			printf("%s:%*c%-36s\n", hstr, 35 - (int)strlen(hstr), ' ', dstr);
+		++n;
+	}
+
+	/* this is used to pretty-print detailed info about a semaphore array */
+	if (ln) {
+		struct libscols_table *subtab = scols_line_get_userdata(ln);
+		if (subtab) {
+			printf(_("Elements:\n\n"));
+			scols_print_table(subtab);
+		}
+	}
+
+	scols_free_iter(itr);
+	return 0;
+
+}
+
+static int print_table(struct lsipc_control *ctl, struct libscols_table *tb)
+{
+	if (ctl->outmode == OUT_PRETTY)
+		print_pretty(tb);
+	else
+		scols_print_table(tb);
+	return 0;
+}
+static struct timeval now;
+
+static char *make_time(int mode, time_t time)
+{
+	char buf[64] = {0};
+
+	switch(mode) {
+	case TIME_FULL:
+	{
+		struct tm tm;
+		char *s;
+
+		localtime_r(&time, &tm);
+		asctime_r(&tm, buf);
+		if (*(s = buf + strlen(buf) - 1) == '\n')
+			*s = '\0';
+		break;
+	}
+	case TIME_SHORT:
+		strtime_short(&time, &now, 0, buf, sizeof(buf));
+		break;
+	case TIME_ISO:
+		strtime_iso(&time, ISO_TIMESTAMP_T, buf, sizeof(buf));
+		break;
+	default:
+		errx(EXIT_FAILURE, _("unsupported time type"));
+	}
+	return xstrdup(buf);
+}
+
+static void global_set_data(struct libscols_table *tb, const char *resource,
+			    const char *desc, uintmax_t used, uintmax_t limit, int usage)
+{
+	struct libscols_line *ln;
+	size_t n;
+
+	ln = scols_table_new_line(tb, NULL);
+	if (!ln)
+		err(EXIT_FAILURE, _("failed to allocate output line"));
+
+	for (n = 0; n < ncolumns; n++) {
+		int rc = 0;
+		char *arg = NULL;
+
+		switch (get_column_id(n)) {
+		case COL_RESOURCE:
+			rc = scols_line_set_data(ln, n, resource);
+			break;
+		case COL_DESC:
+			rc = scols_line_set_data(ln, n, desc);
+			break;
+		case COL_USED:
+			if (usage) {
+				xasprintf(&arg, "%ju", used);
+				rc = scols_line_refer_data(ln, n, arg);
+			} else
+				rc = scols_line_set_data(ln, n, "-");
+			break;
+		case COL_USEPERC:
+			if (usage) {
+				xasprintf(&arg, "%2.2f%%", (double) used / limit * 100);
+				rc = scols_line_refer_data(ln, n, arg);
+			} else
+				rc = scols_line_set_data(ln, n, "-");
+			break;
+		case COL_LIMIT:
+			xasprintf(&arg, "%ju", limit);
+			rc = scols_line_refer_data(ln, n, arg);
+			break;
+		}
+
+		if (rc != 0)
+			err(EXIT_FAILURE, _("failed to add output data"));
+	}
+}
+
+static void setup_sem_elements_columns(struct libscols_table *tb)
+{
+	scols_table_set_name(tb, "elements");
+	if (!scols_table_new_column(tb, "SEMNUM", 0, SCOLS_FL_RIGHT))
+		err_oom();
+	if (!scols_table_new_column(tb, "VALUE", 0, SCOLS_FL_RIGHT))
+		err_oom();
+	if (!scols_table_new_column(tb, "NCOUNT", 0, SCOLS_FL_RIGHT))
+		err_oom();
+	if (!scols_table_new_column(tb, "ZCOUNT", 0, SCOLS_FL_RIGHT))
+		err_oom();
+	if (!scols_table_new_column(tb, "PID", 0, SCOLS_FL_RIGHT))
+		err_oom();
+	if (!scols_table_new_column(tb, "COMMAND", 0, SCOLS_FL_RIGHT))
+		err_oom();
+}
+
+static void do_sem(int id, struct lsipc_control *ctl, struct libscols_table *tb)
+{
+	struct libscols_line *ln;
+	struct passwd *pw = NULL, *cpw = NULL;
+	struct group *gr = NULL, *cgr = NULL;
+	struct sem_data *semds, *semdsp;
+	char *arg = NULL;
+
+	scols_table_set_name(tb, "semaphores");
+
+	if (ipc_sem_get_info(id, &semds) < 1) {
+		if (id > -1)
+			warnx(_("id %d not found"), id);
+		return;
+	}
+	for (semdsp = semds;  semdsp->next != NULL || id > -1; semdsp = semdsp->next) {
+		size_t n;
+
+		ln = scols_table_new_line(tb, NULL);
+		if (!ln)
+			err(EXIT_FAILURE, _("failed to allocate output line"));
+
+		for (n = 0; n < ncolumns; n++) {
+			int rc = 0;
+			switch (get_column_id(n)) {
+			case COL_KEY:
+				xasprintf(&arg, "0x%08x",semdsp->sem_perm.key);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_ID:
+				xasprintf(&arg, "%d",semdsp->sem_perm.id);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_OWNER:
+				arg = get_username(&pw, semdsp->sem_perm.uid);
+				if (!arg)
+					xasprintf(&arg, "%u", semdsp->sem_perm.uid);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_PERMS:
+				if (ctl->numperms)
+					xasprintf(&arg, "%#o", semdsp->sem_perm.mode & 0777);
+				else {
+					arg = xmalloc(11);
+					xstrmode(semdsp->sem_perm.mode & 0777, arg);
+				}
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_CUID:
+				xasprintf(&arg, "%u", semdsp->sem_perm.cuid);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_CUSER:
+				arg = get_username(&cpw, semdsp->sem_perm.cuid);
+				if (arg)
+					rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_CGID:
+				xasprintf(&arg, "%u", semdsp->sem_perm.cgid);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_CGROUP:
+				arg = get_groupname(&cgr, semdsp->sem_perm.cgid);
+				if (arg)
+					rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_UID:
+				xasprintf(&arg, "%u", semdsp->sem_perm.uid);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_USER:
+				arg = get_username(&pw, semdsp->sem_perm.uid);
+				if (arg)
+					rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_GID:
+				xasprintf(&arg, "%u", semdsp->sem_perm.gid);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_GROUP:
+				arg = get_groupname(&gr, semdsp->sem_perm.gid);
+				if (arg)
+					rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_CTIME:
+				if (semdsp->sem_ctime != 0) {
+					rc = scols_line_refer_data(ln, n,
+							make_time(ctl->time_mode,
+							  (time_t)semdsp->sem_ctime));
+				}
+				break;
+			case COL_NSEMS:
+				xasprintf(&arg, "%ju", semdsp->sem_nsems);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_OTIME:
+				if (semdsp->sem_otime != 0) {
+					rc = scols_line_refer_data(ln, n,
+							make_time(ctl->time_mode,
+							  (time_t)semdsp->sem_otime));
+				}
+				break;
+			}
+			if (rc != 0)
+				err(EXIT_FAILURE, _("failed to add output data"));
+			arg = NULL;
+		}
+
+		if (id > -1 && semds->sem_nsems) {
+			/* Create extra table with ID specific semaphore elements */
+			struct libscols_table *sub = new_table(ctl);
+			size_t i;
+			int rc = 0;
+
+			scols_table_enable_noheadings(sub, 0);
+			setup_sem_elements_columns(sub);
+
+			for (i = 0; i < semds->sem_nsems; i++) {
+				struct sem_elem *e = &semds->elements[i];
+				struct libscols_line *sln = scols_table_new_line(sub, NULL);
+
+				if (!sln)
+					err(EXIT_FAILURE, _("failed to allocate output line"));
+
+				/* SEMNUM */
+				xasprintf(&arg, "%zu", i);
+				rc = scols_line_refer_data(sln, 0, arg);
+				if (rc)
+					break;
+
+				/* VALUE */
+				xasprintf(&arg, "%d", e->semval);
+				rc = scols_line_refer_data(sln, 1, arg);
+				if (rc)
+					break;
+
+				/* NCOUNT */
+				xasprintf(&arg, "%d", e->ncount);
+				rc = scols_line_refer_data(sln, 2, arg);
+				if (rc)
+					break;
+
+				/* ZCOUNT */
+				xasprintf(&arg, "%d", e->zcount);
+				rc = scols_line_refer_data(sln, 3, arg);
+				if (rc)
+					break;
+
+				/* PID */
+				xasprintf(&arg, "%d", e->pid);
+				rc = scols_line_refer_data(sln, 4, arg);
+				if (rc)
+					break;
+
+				/* COMMAND */
+				arg = proc_get_command(e->pid);
+				rc = scols_line_refer_data(sln, 5, arg);
+				if (rc)
+					break;
+			}
+
+			if (rc != 0)
+				err(EXIT_FAILURE, _("failed to set data"));
+
+			scols_line_set_userdata(ln, (void *)sub);
+			break;
+		}
+	}
+	ipc_sem_free_info(semds);
+}
+
+static void do_sem_global(struct libscols_table *tb)
+{
+	struct sem_data *semds, *semdsp;
+	struct ipc_limits lim;
+	int nsems = 0, nsets = 0;
+
+	ipc_sem_get_limits(&lim);
+
+	if (ipc_sem_get_info(-1, &semds) > 0) {
+		for (semdsp = semds; semdsp->next != NULL; semdsp = semdsp->next) {
+			++nsets;
+			nsems += semds->sem_nsems;
+		}
+		ipc_sem_free_info(semds);
+	}
+
+	global_set_data(tb, "SEMMNI", _("Number of semaphore identifiers"), nsets, lim.semmni, 1);
+	global_set_data(tb, "SEMMNS", _("Total number of semaphores"), nsems, lim.semmns, 1);
+	global_set_data(tb, "SEMMSL", _("Max semaphores per semaphore set."), 0, lim.semmsl, 0);
+	global_set_data(tb, "SEMOPM", _("Max number of operations per semop(2)"), 0, lim.semopm, 0);
+	global_set_data(tb, "SEMVMX", _("Semaphore max value"), 0, lim.semvmx, 0);
+}
+
+static void do_msg(int id, struct lsipc_control *ctl, struct libscols_table *tb)
+{
+	struct libscols_line *ln;
+	struct passwd *pw = NULL;
+	struct group *gr = NULL;
+	struct msg_data *msgds, *msgdsp;
+	char *arg = NULL;
+
+	if (ipc_msg_get_info(id, &msgds) < 1) {
+		if (id > -1)
+			warnx(_("id %d not found"), id);
+		return;
+	}
+	scols_table_set_name(tb, "messages");
+
+	for (msgdsp = msgds; msgdsp->next != NULL || id > -1 ; msgdsp = msgdsp->next) {
+		size_t n;
+		ln = scols_table_new_line(tb, NULL);
+
+		if (!ln)
+			err(EXIT_FAILURE, _("failed to allocate output line"));
+
+		/* no need to call getpwuid() for the same user */
+		if (!(pw && pw->pw_uid == msgdsp->msg_perm.uid))
+			pw = getpwuid(msgdsp->msg_perm.uid);
+
+		/* no need to call getgrgid() for the same user */
+		if (!(gr && gr->gr_gid == msgdsp->msg_perm.gid))
+			gr = getgrgid(msgdsp->msg_perm.gid);
+
+		for (n = 0; n < ncolumns; n++) {
+			int rc = 0;
+
+			switch (get_column_id(n)) {
+			case COL_KEY:
+				xasprintf(&arg, "0x%08x",msgdsp->msg_perm.key);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_ID:
+				xasprintf(&arg, "%d",msgdsp->msg_perm.id);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_OWNER:
+				arg = get_username(&pw, msgdsp->msg_perm.uid);
+				if (!arg)
+					xasprintf(&arg, "%u", msgdsp->msg_perm.uid);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_PERMS:
+				if (ctl->numperms)
+					xasprintf(&arg, "%#o", msgdsp->msg_perm.mode & 0777);
+				else {
+					arg = xmalloc(11);
+					xstrmode(msgdsp->msg_perm.mode & 0777, arg);
+					rc = scols_line_refer_data(ln, n, arg);
+				}
+				break;
+			case COL_CUID:
+				xasprintf(&arg, "%u", msgdsp->msg_perm.cuid);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_CUSER:
+				arg = get_username(&pw, msgdsp->msg_perm.cuid);
+				if (arg)
+					rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_CGID:
+				xasprintf(&arg, "%u", msgdsp->msg_perm.cuid);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_CGROUP:
+				arg = get_groupname(&gr, msgdsp->msg_perm.cgid);
+				if (arg)
+					rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_UID:
+				xasprintf(&arg, "%u", msgdsp->msg_perm.uid);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_USER:
+				arg = get_username(&pw, msgdsp->msg_perm.uid);
+				if (arg)
+					rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_GID:
+				xasprintf(&arg, "%u", msgdsp->msg_perm.gid);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_GROUP:
+				arg = get_groupname(&gr,msgdsp->msg_perm.gid);
+				if (arg)
+					rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_CTIME:
+				if (msgdsp->q_ctime != 0)
+					rc = scols_line_refer_data(ln, n,
+						make_time(ctl->time_mode,
+							  (time_t)msgdsp->q_ctime));
+				break;
+			case COL_USEDBYTES:
+				xasprintf(&arg, "%ju", msgdsp->q_cbytes);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_MSGS:
+				xasprintf(&arg, "%ju", msgdsp->q_qnum);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_SEND:
+				if (msgdsp->q_stime != 0)
+					rc = scols_line_refer_data(ln, n,
+						make_time(ctl->time_mode,
+							  (time_t)msgdsp->q_stime));
+				break;
+			case COL_RECV:
+				if (msgdsp->q_rtime != 0)
+					rc = scols_line_refer_data(ln, n,
+						make_time(ctl->time_mode,
+							  (time_t)msgdsp->q_rtime));
+				break;
+			case COL_LSPID:
+				xasprintf(&arg, "%u", msgdsp->q_lspid);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_LRPID:
+				xasprintf(&arg, "%u", msgdsp->q_lrpid);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			}
+			if (rc != 0)
+				err(EXIT_FAILURE, _("failed to set data"));
+			arg = NULL;
+		}
+		if (id > -1)
+			break;
+	}
+	ipc_msg_free_info(msgds);
+}
+
+
+static void do_msg_global(struct libscols_table *tb)
+{
+	struct msg_data *msgds, *msgdsp;
+	struct ipc_limits lim;
+	int msgqs = 0;
+
+	ipc_msg_get_limits(&lim);
+
+	/* count number of used queues */
+	if (ipc_msg_get_info(-1, &msgds) > 0) {
+		for (msgdsp = msgds; msgdsp->next != NULL; msgdsp = msgdsp->next)
+			++msgqs;
+		ipc_msg_free_info(msgds);
+	}
+
+	global_set_data(tb, "MSGMNI", _("Number of message queues"), msgqs, lim.msgmni, 1);
+	global_set_data(tb, "MSGMAX", _("Max size of message (bytes)"),	0, lim.msgmax, 0);
+	global_set_data(tb, "MSGMNB", _("Default max size of queue (bytes)"), 0, lim.msgmnb, 0);
+}
+
+
+static void do_shm(int id, struct lsipc_control *ctl, struct libscols_table *tb)
+{
+	struct libscols_line *ln;
+	struct passwd *pw = NULL;
+	struct group *gr = NULL;
+	struct shm_data *shmds, *shmdsp;
+	char *arg = NULL;
+
+	if (ipc_shm_get_info(id, &shmds) < 1) {
+		if (id > -1)
+			warnx(_("id %d not found"), id);
+		return;
+	}
+
+	scols_table_set_name(tb, "sharedmemory");
+
+	for (shmdsp = shmds; shmdsp->next != NULL || id > -1 ; shmdsp = shmdsp->next) {
+		size_t n;
+		ln = scols_table_new_line(tb, NULL);
+
+		if (!ln)
+			err(EXIT_FAILURE, _("failed to allocate output line"));
+
+		for (n = 0; n < ncolumns; n++) {
+			int rc = 0;
+
+			switch (get_column_id(n)) {
+			case COL_KEY:
+				xasprintf(&arg, "0x%08x",shmdsp->shm_perm.key);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_ID:
+				xasprintf(&arg, "%d",shmdsp->shm_perm.id);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_OWNER:
+				arg = get_username(&pw, shmdsp->shm_perm.uid);
+				if (!arg)
+					xasprintf(&arg, "%u", shmdsp->shm_perm.uid);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_PERMS:
+				if (ctl->numperms)
+					xasprintf(&arg, "%#o", shmdsp->shm_perm.mode & 0777);
+				else {
+					arg = xmalloc(11);
+					xstrmode(shmdsp->shm_perm.mode & 0777, arg);
+				}
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_CUID:
+				xasprintf(&arg, "%u", shmdsp->shm_perm.cuid);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_CUSER:
+				arg = get_username(&pw, shmdsp->shm_perm.cuid);
+				if (arg)
+					rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_CGID:
+				xasprintf(&arg, "%u", shmdsp->shm_perm.cuid);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_CGROUP:
+				arg = get_groupname(&gr, shmdsp->shm_perm.cgid);
+				if (arg)
+					rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_UID:
+				xasprintf(&arg, "%u", shmdsp->shm_perm.uid);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_USER:
+				arg = get_username(&pw, shmdsp->shm_perm.uid);
+				if (arg)
+					rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_GID:
+				xasprintf(&arg, "%u", shmdsp->shm_perm.gid);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_GROUP:
+				arg = get_groupname(&gr, shmdsp->shm_perm.gid);
+				if (arg)
+					rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_CTIME:
+				if (shmdsp->shm_ctim != 0)
+					rc = scols_line_refer_data(ln, n,
+						make_time(ctl->time_mode,
+							  (time_t)shmdsp->shm_ctim));
+				break;
+			case COL_SIZE:
+				if (ctl->bytes)
+					xasprintf(&arg, "%ju", shmdsp->shm_segsz);
+				else
+					arg = size_to_human_string(SIZE_SUFFIX_1LETTER, shmdsp->shm_segsz);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_NATTCH:
+				xasprintf(&arg, "%ju", shmdsp->shm_nattch);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_STATUS: {
+					int comma = 0;
+					size_t offt = 0;
+
+					free(arg);
+					arg = xcalloc(1, sizeof(char) * strlen(_("dest"))
+							+ strlen(_("locked"))
+							+ strlen(_("hugetlb"))
+							+ strlen(_("noreserve")) + 4);
+#ifdef SHM_DEST
+					if (shmdsp->shm_perm.mode & SHM_DEST) {
+						offt += sprintf(arg, "%s", _("dest"));
+						comma++;
+					}
+#endif
+#ifdef SHM_LOCKED
+					if (shmdsp->shm_perm.mode & SHM_LOCKED) {
+						if (comma)
+							arg[offt++] = ',';
+						offt += sprintf(arg + offt, "%s", _("locked"));
+					}
+#endif
+#ifdef SHM_HUGETLB
+					if (shmdsp->shm_perm.mode & SHM_HUGETLB) {
+						if (comma)
+							arg[offt++] = ',';
+						offt += sprintf(arg + offt, "%s", _("hugetlb"));
+					}
+#endif
+#ifdef SHM_NORESERVE
+					if (shmdsp->shm_perm.mode & SHM_NORESERVE) {
+						if (comma)
+							arg[offt++] = ',';
+						sprintf(arg + offt, "%s", _("noreserve"));
+					}
+#endif
+					rc = scols_line_refer_data(ln, n, arg);
+				}
+				break;
+			case COL_ATTACH:
+				if (shmdsp->shm_atim != 0)
+					rc = scols_line_refer_data(ln, n,
+							make_time(ctl->time_mode,
+							  (time_t)shmdsp->shm_atim));
+				break;
+			case COL_DETACH:
+				if (shmdsp->shm_dtim != 0)
+					rc = scols_line_refer_data(ln, n,
+							make_time(ctl->time_mode,
+							  (time_t)shmdsp->shm_dtim));
+				break;
+			case COL_CPID:
+				xasprintf(&arg, "%u", shmdsp->shm_cprid);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_LPID:
+				xasprintf(&arg, "%u", shmdsp->shm_lprid);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			case COL_COMMAND:
+				arg = proc_get_command(shmdsp->shm_cprid);
+				rc = scols_line_refer_data(ln, n, arg);
+				break;
+			}
+			if (rc != 0)
+				err(EXIT_FAILURE, _("failed to set data"));
+			arg = NULL;
+		}
+		if (id > -1)
+			break;
+	}
+	ipc_shm_free_info(shmds);
+}
+
+static void do_shm_global(struct libscols_table *tb)
+{
+	struct shm_data *shmds, *shmdsp;
+	uint64_t nsegs = 0, sum_segsz = 0;
+	struct ipc_limits lim;
+
+	ipc_shm_get_limits(&lim);
+
+	if (ipc_shm_get_info(-1, &shmds) > 0) {
+		for (shmdsp = shmds; shmdsp->next != NULL; shmdsp = shmdsp->next) {
+			++nsegs;
+			sum_segsz += shmdsp->shm_segsz;
+		}
+		ipc_shm_free_info(shmds);
+	}
+
+	global_set_data(tb, "SHMMNI", _("Shared memory segments"), nsegs, lim.shmmni, 1);
+	global_set_data(tb, "SHMALL", _("Shared memory pages"), sum_segsz / getpagesize(), lim.shmall, 1);
+	global_set_data(tb, "SHMMAX", _("Max size of shared memory segment (bytes)"), 0, lim.shmmax, 0);
+	global_set_data(tb, "SHMMIN", _("Min size of shared memory segment (bytes)"), 0, lim.shmmin, 0);
+}
+
+int main(int argc, char *argv[])
+{
+	int opt, msg = 0, sem = 0, shm = 0, id = -1;
+	int show_time = 0, show_creat = 0, global = 0;
+	size_t i;
+	struct lsipc_control *ctl = xcalloc(1, sizeof(struct lsipc_control));
+	static struct libscols_table *tb;
+	char *outarg = NULL;
+
+	/* long only options. */
+	enum {
+		OPT_NOTRUNC = CHAR_MAX + 1,
+		OPT_NOHEAD,
+		OPT_TIME_FMT
+	};
+
+	static const struct option longopts[] = {
+		{ "bytes",          no_argument,        NULL, 'b' },
+		{ "creator",        no_argument,	NULL, 'c' },
+		{ "export",         no_argument,	NULL, 'e' },
+		{ "global",         no_argument,	NULL, 'g' },
+		{ "help",           no_argument,	NULL, 'h' },
+		{ "id",             required_argument,	NULL, 'i' },
+		{ "json",           no_argument,	NULL, 'J' },
+		{ "list",           no_argument,        NULL, 'l' },
+		{ "newline",        no_argument,	NULL, 'n' },
+		{ "noheadings",     no_argument,	NULL, OPT_NOHEAD },
+		{ "notruncate",     no_argument,	NULL, OPT_NOTRUNC },
+		{ "numeric-perms",  no_argument,	NULL, 'P' },
+		{ "output",         required_argument,	NULL, 'o' },
+		{ "queues",         no_argument,	NULL, 'q' },
+		{ "raw",            no_argument,	NULL, 'r' },
+		{ "semaphores",     no_argument,	NULL, 's' },
+		{ "shmems",         no_argument,	NULL, 'm' },
+		{ "time",           no_argument,	NULL, 't' },
+		{ "time-format",    required_argument,	NULL, OPT_TIME_FMT },
+		{ "version",        no_argument,	NULL, 'V' },
+		{NULL, 0, NULL, 0}
+	};
+
+	static const ul_excl_t excl[] = {	/* rows and cols in ASCII order */
+		{ 'J', 'e', 'l', 'n', 'r' },
+		{ 'g', 'i' },
+		{ 'c', 'o', 't' },
+		{ 'm', 'q', 's' },
+		{ 0 }
+	};
+	int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	ctl->time_mode = 0;
+
+	scols_init_debug(0);
+
+	while ((opt = getopt_long(argc, argv, "bceghi:Jlmno:PqrstV", longopts, NULL)) != -1) {
+
+		err_exclusive_options(opt, longopts, excl, excl_st);
+
+		switch (opt) {
+		case 'b':
+			ctl->bytes = 1;
+			break;
+		case 'i':
+			id = strtos32_or_err(optarg, _("failed to parse IPC identifier"));
+			break;
+		case 'e':
+			ctl->outmode = OUT_EXPORT;
+			break;
+		case 'r':
+			ctl->outmode = OUT_RAW;
+			break;
+		case 'o':
+			outarg = optarg;
+			break;
+		case 'g':
+			global = 1;
+			break;
+		case 'q':
+			msg = 1;
+			add_column(columns, ncolumns++, COL_KEY);
+			add_column(columns, ncolumns++, COL_ID);
+			add_column(columns, ncolumns++, COL_PERMS);
+			add_column(columns, ncolumns++, COL_OWNER);
+			add_column(columns, ncolumns++, COL_USEDBYTES);
+			add_column(columns, ncolumns++, COL_MSGS);
+			add_column(columns, ncolumns++, COL_LSPID);
+			add_column(columns, ncolumns++, COL_LRPID);
+			LOWER = COLDESC_IDX_MSG_FIRST;
+			UPPER = COLDESC_IDX_MSG_LAST;
+			break;
+		case 'l':
+			ctl->outmode = OUT_LIST;
+			break;
+		case 'm':
+			shm = 1;
+			add_column(columns, ncolumns++, COL_KEY);
+			add_column(columns, ncolumns++, COL_ID);
+			add_column(columns, ncolumns++, COL_PERMS);
+			add_column(columns, ncolumns++, COL_OWNER);
+			add_column(columns, ncolumns++, COL_SIZE);
+			add_column(columns, ncolumns++, COL_NATTCH);
+			add_column(columns, ncolumns++, COL_STATUS);
+			add_column(columns, ncolumns++, COL_CTIME);
+			add_column(columns, ncolumns++, COL_CPID);
+			add_column(columns, ncolumns++, COL_LPID);
+			add_column(columns, ncolumns++, COL_COMMAND);
+			LOWER = COLDESC_IDX_SHM_FIRST;
+			UPPER = COLDESC_IDX_SHM_LAST;
+			break;
+		case 'n':
+			ctl->outmode = OUT_NEWLINE;
+			break;
+		case 'P':
+			ctl->numperms = 1;
+			break;
+		case 's':
+			sem = 1;
+			add_column(columns, ncolumns++, COL_KEY);
+			add_column(columns, ncolumns++, COL_ID);
+			add_column(columns, ncolumns++, COL_PERMS);
+			add_column(columns, ncolumns++, COL_OWNER);
+			add_column(columns, ncolumns++, COL_NSEMS);
+			LOWER = COLDESC_IDX_SEM_FIRST;
+			UPPER = COLDESC_IDX_SEM_LAST;
+			break;
+		case OPT_NOTRUNC:
+			ctl->notrunc = 1;
+			break;
+		case OPT_NOHEAD:
+			ctl->noheadings = 1;
+			break;
+		case OPT_TIME_FMT:
+			ctl->time_mode = parse_time_mode(optarg);
+			break;
+		case 'J':
+			ctl->outmode = OUT_JSON;
+			break;
+		case 't':
+			show_time = 1;
+			break;
+		case 'c':
+			show_creat = 1;
+			break;
+		case 'h':
+			usage();
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	/* default is global */
+	if (msg + shm + sem == 0) {
+		msg = shm = sem = global = 1;
+		if (show_time || show_creat || id != -1)
+			errx(EXIT_FAILURE, _("--global is mutually exclusive with --creator, --id and --time"));
+	}
+	if (global) {
+		add_column(columns, ncolumns++, COL_RESOURCE);
+		add_column(columns, ncolumns++, COL_DESC);
+		add_column(columns, ncolumns++, COL_LIMIT);
+		add_column(columns, ncolumns++, COL_USED);
+		add_column(columns, ncolumns++, COL_USEPERC);
+		LOWER = COLDESC_IDX_SUM_FIRST;
+		UPPER = COLDESC_IDX_SUM_LAST;
+	}
+
+	/* default to pretty-print if --id specified */
+	if (id != -1 && !ctl->outmode)
+		ctl->outmode = OUT_PRETTY;
+
+	if (!ctl->time_mode)
+		ctl->time_mode = ctl->outmode == OUT_PRETTY ? TIME_FULL : TIME_SHORT;
+
+	if (ctl->outmode == OUT_PRETTY && !(optarg || show_creat || show_time)) {
+		/* all columns for lsipc --<RESOURCE> --id <ID> */
+		for (ncolumns = 0, i = 0; i < ARRAY_SIZE(coldescs); i++)
+			 columns[ncolumns++] = i;
+	} else {
+		if (show_creat) {
+			add_column(columns, ncolumns++, COL_CUID);
+			add_column(columns, ncolumns++, COL_CGID);
+			add_column(columns, ncolumns++, COL_UID);
+			add_column(columns, ncolumns++, COL_GID);
+		}
+		if (msg && show_time) {
+			add_column(columns, ncolumns++, COL_SEND);
+			add_column(columns, ncolumns++, COL_RECV);
+			add_column(columns, ncolumns++, COL_CTIME);
+		}
+		if (shm && show_time) {
+			/* keep "COMMAND" as last column */
+			size_t cmd = columns[ncolumns - 1] == COL_COMMAND;
+
+			if (cmd)
+				ncolumns--;
+			add_column(columns, ncolumns++, COL_ATTACH);
+			add_column(columns, ncolumns++, COL_DETACH);
+			if (cmd)
+				add_column(columns, ncolumns++, COL_COMMAND);
+		}
+		if (sem && show_time) {
+			add_column(columns, ncolumns++, COL_OTIME);
+			add_column(columns, ncolumns++, COL_CTIME);
+		}
+	}
+
+	if (outarg && string_add_to_idarray(outarg, columns, ARRAY_SIZE(columns),
+					 &ncolumns, column_name_to_id) < 0)
+		return EXIT_FAILURE;
+
+	tb = setup_table(ctl);
+	if (!tb)
+		return EXIT_FAILURE;
+
+	if (global)
+		scols_table_set_name(tb, "ipclimits");
+
+	if (msg) {
+		if (global)
+			do_msg_global(tb);
+		else
+			do_msg(id, ctl, tb);
+	}
+	if (shm) {
+		if (global)
+			do_shm_global(tb);
+		else
+			do_shm(id, ctl, tb);
+	}
+	if (sem) {
+		if (global)
+			do_sem_global(tb);
+		else
+			do_sem(id, ctl, tb);
+	}
+
+	print_table(ctl, tb);
+
+	scols_unref_table(tb);
+	free(ctl);
+
+	return EXIT_SUCCESS;
+}
+
diff --git a/sys-utils/lsmem.1 b/sys-utils/lsmem.1
new file mode 100644
index 0000000..4476d3e
--- /dev/null
+++ b/sys-utils/lsmem.1
@@ -0,0 +1,99 @@
+.TH LSMEM 1 "October 2016" "util-linux" "User Commands"
+.SH NAME
+lsmem \- list the ranges of available memory with their online status
+.SH SYNOPSIS
+.B lsmem
+[options]
+.SH DESCRIPTION
+The \fBlsmem\fP command lists the ranges of available memory with their online
+status. The listed memory blocks correspond to the memory block representation
+in sysfs. The command also shows the memory block size and the amount of memory
+in online and offline state.
+
+The default output compatible with original implementation from s390-tools, but
+it's strongly recommended to avoid using default outputs in your scripts.
+Always explicitly define expected columns by using the \fB\-\-output\fR option
+together with a columns list in environments where a stable output is required.
+
+The \fBlsmem\fP command lists a new memory range always when the current memory
+block distinguish from the previous block by some output column.  This default
+behavior is possible to override by the \fB\-\-split\fR option (e.g. \fBlsmem
+\-\-split=ZONES\fR).  The special word "none" may be used to ignore all
+differences between memory blocks and to create as large as possible continuous
+ranges.  The opposite semantic is \fB\-\-all\fR to list individual memory
+blocks.
+
+Note that some output columns may provide inaccurate information if a split policy
+forces \fBlsmem\fP to ignore differences in some attributes. For example if you
+merge removable and non-removable memory blocks to the one range than all
+the range will be marked as non-removable on \fBlsmem\fP output.
+
+Not all columns are supported on all systems.  If an unsupported column is
+specified, \fBlsmem\fP prints the column but does not provide any data for it.
+
+Use the \fB\-\-help\fR option to see the columns description.
+
+.SH OPTIONS
+.TP
+.BR \-a ", " \-\-all
+List each individual memory block, instead of combining memory blocks with
+similar attributes.
+.TP
+.BR \-b , " \-\-bytes"
+Print the SIZE column in bytes rather than in a human-readable format.
+.TP
+.BR \-h ", " \-\-help
+Display help text and exit.
+.TP
+.BR \-J , " \-\-json"
+Use JSON output format.
+.TP
+.BR \-n , " \-\-noheadings"
+Do not print a header line.
+.TP
+.BR \-o , " \-\-output " \fIlist\fP
+Specify which output columns to print.  Use \fB\-\-help\fR
+to get a list of all supported columns.
+The default list of columns may be extended if \fIlist\fP is
+specified in the format \fB+\fIlist\fP (e.g. \fBlsmem \-o +NODE\fP).
+.TP
+.B \-\-output\-all
+Output all available columns.
+.TP
+.BR \-P , " \-\-pairs"
+Produce output in the form of key="value" pairs.
+All potentially unsafe characters are hex-escaped (\\x<code>).
+.TP
+.BR \-r , " \-\-raw"
+Produce output in raw format.  All potentially unsafe characters are hex-escaped
+(\\x<code>).
+.TP
+.BR \-S , " \-\-split " \fIlist\fP
+Specify which columns (attributes) use to split memory blocks to ranges.  The
+supported columns are STATE, REMOVABLE, NODE and ZONES, or "none". The another columns are
+silently ignored. For more details see DESCRIPTION above.
+.TP
+.BR \-s , " \-\-sysroot " \fIdirectory\fP
+Gather memory data for a Linux instance other than the instance from which the
+\fBlsmem\fP command is issued.  The specified \fIdirectory\fP is the system
+root of the Linux instance to be inspected.
+.TP
+.BR \-V ", " \-\-version
+Display version information and exit.
+.TP
+\fB\-\-summary\fR[=\fIwhen\fR]
+This option controls summary lines output.  The optional argument \fIwhen\fP can be
+\fBnever\fR, \fBalways\fR or \fBonly\fR.  If the \fIwhen\fR argument is
+omitted, it defaults to \fB"only"\fR. The summary output is suppressed for
+\fB\-\-raw\fR, \fB\-\-pairs\fR and \fB\-\-json\fR.
+.SH AUTHOR
+.B lsmem
+was originally written by Gerald Schaefer for s390-tools in Perl. The C version
+for util-linux was written by Clemens von Mann, Heiko Carstens and Karel Zak.
+.SH SEE ALSO
+.BR chmem (8)
+.SH AVAILABILITY
+The \fBlsmem\fP command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/lsmem.c b/sys-utils/lsmem.c
new file mode 100644
index 0000000..8638336
--- /dev/null
+++ b/sys-utils/lsmem.c
@@ -0,0 +1,747 @@
+/*
+ * lsmem - Show memory configuration
+ *
+ * Copyright IBM Corp. 2016
+ * Copyright (C) 2016 Karel Zak <kzak@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include <c.h>
+#include <nls.h>
+#include <path.h>
+#include <strutils.h>
+#include <closestream.h>
+#include <xalloc.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <assert.h>
+#include <optutils.h>
+#include <libsmartcols.h>
+
+#define _PATH_SYS_MEMORY		"/sys/devices/system/memory"
+
+#define MEMORY_STATE_ONLINE		0
+#define MEMORY_STATE_OFFLINE		1
+#define MEMORY_STATE_GOING_OFFLINE	2
+#define MEMORY_STATE_UNKNOWN		3
+
+enum zone_id {
+	ZONE_DMA = 0,
+	ZONE_DMA32,
+	ZONE_NORMAL,
+	ZONE_HIGHMEM,
+	ZONE_MOVABLE,
+	ZONE_DEVICE,
+	ZONE_NONE,
+	ZONE_UNKNOWN,
+	MAX_NR_ZONES,
+};
+
+struct memory_block {
+	uint64_t	index;
+	uint64_t	count;
+	int		state;
+	int		node;
+	int		nr_zones;
+	int		zones[MAX_NR_ZONES];
+	unsigned int	removable:1;
+};
+
+struct lsmem {
+	struct path_cxt		*sysmem;		/* _PATH_SYS_MEMORY directory handler */
+	struct dirent		**dirs;
+	int			ndirs;
+	struct memory_block	*blocks;
+	int			nblocks;
+	uint64_t		block_size;
+	uint64_t		mem_online;
+	uint64_t		mem_offline;
+
+	struct libscols_table	*table;
+	unsigned int		have_nodes : 1,
+				raw : 1,
+				export : 1,
+				json : 1,
+				noheadings : 1,
+				summary : 1,
+				list_all : 1,
+				bytes : 1,
+				want_summary : 1,
+				want_table : 1,
+				split_by_node : 1,
+				split_by_state : 1,
+				split_by_removable : 1,
+				split_by_zones : 1,
+				have_zones : 1;
+};
+
+
+enum {
+	COL_RANGE,
+	COL_SIZE,
+	COL_STATE,
+	COL_REMOVABLE,
+	COL_BLOCK,
+	COL_NODE,
+	COL_ZONES,
+};
+
+static char *zone_names[] = {
+	[ZONE_DMA]	= "DMA",
+	[ZONE_DMA32]	= "DMA32",
+	[ZONE_NORMAL]	= "Normal",
+	[ZONE_HIGHMEM]	= "Highmem",
+	[ZONE_MOVABLE]	= "Movable",
+	[ZONE_DEVICE]	= "Device",
+	[ZONE_NONE]	= "None",	/* block contains more than one zone, can't be offlined */
+	[ZONE_UNKNOWN]	= "Unknown",
+};
+
+/* column names */
+struct coldesc {
+	const char	*name;		/* header */
+	double		whint;		/* width hint (N < 1 is in percent of termwidth) */
+	int		flags;		/* SCOLS_FL_* */
+	const char      *help;
+};
+
+/* columns descriptions */
+static struct coldesc coldescs[] = {
+	[COL_RANGE]	= { "RANGE", 0, 0, N_("start and end address of the memory range")},
+	[COL_SIZE]	= { "SIZE", 5, SCOLS_FL_RIGHT, N_("size of the memory range")},
+	[COL_STATE]	= { "STATE", 0, SCOLS_FL_RIGHT, N_("online status of the memory range")},
+	[COL_REMOVABLE]	= { "REMOVABLE", 0, SCOLS_FL_RIGHT, N_("memory is removable")},
+	[COL_BLOCK]	= { "BLOCK", 0, SCOLS_FL_RIGHT, N_("memory block number or blocks range")},
+	[COL_NODE]	= { "NODE", 0, SCOLS_FL_RIGHT, N_("numa node of memory")},
+	[COL_ZONES]	= { "ZONES", 0, SCOLS_FL_RIGHT, N_("valid zones for the memory range")},
+};
+
+/* columns[] array specifies all currently wanted output column. The columns
+ * are defined by coldescs[] array and you can specify (on command line) each
+ * column twice. That's enough, dynamically allocated array of the columns is
+ * unnecessary overkill and over-engineering in this case */
+static int columns[ARRAY_SIZE(coldescs) * 2];
+static size_t ncolumns;
+
+static inline size_t err_columns_index(size_t arysz, size_t idx)
+{
+	if (idx >= arysz)
+		errx(EXIT_FAILURE, _("too many columns specified, "
+				     "the limit is %zu columns"),
+				arysz - 1);
+	return idx;
+}
+
+/*
+ * name must be null-terminated
+ */
+static int zone_name_to_id(const char *name)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(zone_names); i++) {
+		if (!strcasecmp(name, zone_names[i]))
+			return i;
+	}
+	return ZONE_UNKNOWN;
+}
+
+#define add_column(ary, n, id)	\
+		((ary)[ err_columns_index(ARRAY_SIZE(ary), (n)) ] = (id))
+
+static int column_name_to_id(const char *name, size_t namesz)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(coldescs); i++) {
+		const char *cn = coldescs[i].name;
+
+		if (!strncasecmp(name, cn, namesz) && !*(cn + namesz))
+			return i;
+	}
+	warnx(_("unknown column: %s"), name);
+	return -1;
+}
+
+static inline int get_column_id(int num)
+{
+	assert(num >= 0);
+	assert((size_t) num < ncolumns);
+	assert(columns[num] < (int) ARRAY_SIZE(coldescs));
+
+	return columns[num];
+}
+
+static inline struct coldesc *get_column_desc(int num)
+{
+	return &coldescs[ get_column_id(num) ];
+}
+
+static inline void reset_split_policy(struct lsmem *l, int enable)
+{
+	l->split_by_state = enable;
+	l->split_by_node = enable;
+	l->split_by_removable = enable;
+	l->split_by_zones = enable;
+}
+
+static void set_split_policy(struct lsmem *l, int cols[], size_t ncols)
+{
+	size_t i;
+
+	reset_split_policy(l, 0);
+
+	for (i = 0; i < ncols; i++) {
+		switch (cols[i]) {
+		case COL_STATE:
+			l->split_by_state = 1;
+			break;
+		case COL_NODE:
+			l->split_by_node = 1;
+			break;
+		case COL_REMOVABLE:
+			l->split_by_removable = 1;
+			break;
+		case COL_ZONES:
+			l->split_by_zones = 1;
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+static void add_scols_line(struct lsmem *lsmem, struct memory_block *blk)
+{
+	size_t i;
+	struct libscols_line *line;
+
+	line = scols_table_new_line(lsmem->table, NULL);
+	if (!line)
+		err_oom();
+
+	for (i = 0; i < ncolumns; i++) {
+		char *str = NULL;
+
+		switch (get_column_id(i)) {
+		case COL_RANGE:
+		{
+			uint64_t start = blk->index * lsmem->block_size;
+			uint64_t size = blk->count * lsmem->block_size;
+			xasprintf(&str, "0x%016"PRIx64"-0x%016"PRIx64, start, start + size - 1);
+			break;
+		}
+		case COL_SIZE:
+			if (lsmem->bytes)
+				xasprintf(&str, "%"PRId64, (uint64_t) blk->count * lsmem->block_size);
+			else
+				str = size_to_human_string(SIZE_SUFFIX_1LETTER,
+						(uint64_t) blk->count * lsmem->block_size);
+			break;
+		case COL_STATE:
+			str = xstrdup(
+				blk->state == MEMORY_STATE_ONLINE ? _("online") :
+				blk->state == MEMORY_STATE_OFFLINE ? _("offline") :
+				blk->state == MEMORY_STATE_GOING_OFFLINE ? _("on->off") :
+				"?");
+			break;
+		case COL_REMOVABLE:
+			if (blk->state == MEMORY_STATE_ONLINE)
+				str = xstrdup(blk->removable ? _("yes") : _("no"));
+			break;
+		case COL_BLOCK:
+			if (blk->count == 1)
+				xasprintf(&str, "%"PRId64, blk->index);
+			else
+				xasprintf(&str, "%"PRId64"-%"PRId64,
+					 blk->index, blk->index + blk->count - 1);
+			break;
+		case COL_NODE:
+			if (lsmem->have_nodes)
+				xasprintf(&str, "%d", blk->node);
+			break;
+		case COL_ZONES:
+			if (lsmem->have_zones) {
+				char valid_zones[BUFSIZ];
+				int j, zone_id;
+
+				valid_zones[0] = '\0';
+				for (j = 0; j < blk->nr_zones; j++) {
+					zone_id = blk->zones[j];
+					if (strlen(valid_zones) +
+					    strlen(zone_names[zone_id]) > BUFSIZ - 2)
+						break;
+					strcat(valid_zones, zone_names[zone_id]);
+					if (j + 1 < blk->nr_zones)
+						strcat(valid_zones, "/");
+				}
+				str = xstrdup(valid_zones);
+			}
+			break;
+		}
+
+		if (str && scols_line_refer_data(line, i, str) != 0)
+			err_oom();
+	}
+}
+
+static void fill_scols_table(struct lsmem *lsmem)
+{
+	int i;
+
+	for (i = 0; i < lsmem->nblocks; i++)
+		add_scols_line(lsmem, &lsmem->blocks[i]);
+}
+
+static void print_summary(struct lsmem *lsmem)
+{
+	if (lsmem->bytes) {
+		printf("%-23s %15"PRId64"\n",_("Memory block size:"), lsmem->block_size);
+		printf("%-23s %15"PRId64"\n",_("Total online memory:"), lsmem->mem_online);
+		printf("%-23s %15"PRId64"\n",_("Total offline memory:"), lsmem->mem_offline);
+	} else {
+		char *p;
+
+		if ((p = size_to_human_string(SIZE_SUFFIX_1LETTER, lsmem->block_size)))
+			printf("%-23s %5s\n",_("Memory block size:"), p);
+		free(p);
+
+		if ((p = size_to_human_string(SIZE_SUFFIX_1LETTER, lsmem->mem_online)))
+			printf("%-23s %5s\n",_("Total online memory:"), p);
+		free(p);
+
+		if ((p = size_to_human_string(SIZE_SUFFIX_1LETTER, lsmem->mem_offline)))
+			printf("%-23s %5s\n",_("Total offline memory:"), p);
+		free(p);
+	}
+}
+
+static int memory_block_get_node(struct lsmem *lsmem, char *name)
+{
+	struct dirent *de;
+	DIR *dir;
+	int node;
+
+	dir = ul_path_opendir(lsmem->sysmem, name);
+	if (!dir)
+		err(EXIT_FAILURE, _("Failed to open %s"), name);
+
+	node = -1;
+	while ((de = readdir(dir)) != NULL) {
+		if (strncmp("node", de->d_name, 4))
+			continue;
+		if (!isdigit_string(de->d_name + 4))
+			continue;
+		node = strtol(de->d_name + 4, NULL, 10);
+		break;
+	}
+	closedir(dir);
+	return node;
+}
+
+static void memory_block_read_attrs(struct lsmem *lsmem, char *name,
+				    struct memory_block *blk)
+{
+	char *line = NULL;
+	int i, x = 0;
+
+	memset(blk, 0, sizeof(*blk));
+
+	blk->count = 1;
+	blk->state = MEMORY_STATE_UNKNOWN;
+	blk->index = strtoumax(name + 6, NULL, 10); /* get <num> of "memory<num>" */
+
+	if (ul_path_readf_s32(lsmem->sysmem, &x, "%s/removable", name) == 0)
+		blk->removable = x == 1;
+
+	if (ul_path_readf_string(lsmem->sysmem, &line, "%s/state", name) > 0) {
+		if (strcmp(line, "offline") == 0)
+			blk->state = MEMORY_STATE_OFFLINE;
+		else if (strcmp(line, "online") == 0)
+			blk->state = MEMORY_STATE_ONLINE;
+		else if (strcmp(line, "going-offline") == 0)
+			blk->state = MEMORY_STATE_GOING_OFFLINE;
+		free(line);
+	}
+
+	if (lsmem->have_nodes)
+		blk->node = memory_block_get_node(lsmem, name);
+
+	blk->nr_zones = 0;
+	if (lsmem->have_zones &&
+	    ul_path_readf_string(lsmem->sysmem, &line, "%s/valid_zones", name) > 0) {
+
+		char *token = strtok(line, " ");
+
+		for (i = 0; token && i < MAX_NR_ZONES; i++) {
+			blk->zones[i] = zone_name_to_id(token);
+			blk->nr_zones++;
+			token = strtok(NULL, " ");
+		}
+
+		free(line);
+	}
+}
+
+static int is_mergeable(struct lsmem *lsmem, struct memory_block *blk)
+{
+	struct memory_block *curr;
+	int i;
+
+	if (!lsmem->nblocks)
+		return 0;
+	curr = &lsmem->blocks[lsmem->nblocks - 1];
+	if (lsmem->list_all)
+		return 0;
+	if (curr->index + curr->count != blk->index)
+		return 0;
+	if (lsmem->split_by_state && curr->state != blk->state)
+		return 0;
+	if (lsmem->split_by_removable && curr->removable != blk->removable)
+		return 0;
+	if (lsmem->split_by_node && lsmem->have_nodes) {
+		if (curr->node != blk->node)
+			return 0;
+	}
+	if (lsmem->split_by_zones && lsmem->have_zones) {
+		if (curr->nr_zones != blk->nr_zones)
+			return 0;
+		for (i = 0; i < curr->nr_zones; i++) {
+			if (curr->zones[i] == ZONE_UNKNOWN ||
+			    curr->zones[i] != blk->zones[i])
+				return 0;
+		}
+	}
+	return 1;
+}
+
+static void read_info(struct lsmem *lsmem)
+{
+	struct memory_block blk;
+	char buf[128];
+	int i;
+
+	if (ul_path_read_buffer(lsmem->sysmem, buf, sizeof(buf), "block_size_bytes") <= 0)
+		err(EXIT_FAILURE, _("failed to read memory block size"));
+	lsmem->block_size = strtoumax(buf, NULL, 16);
+
+	for (i = 0; i < lsmem->ndirs; i++) {
+		memory_block_read_attrs(lsmem, lsmem->dirs[i]->d_name, &blk);
+		if (blk.state == MEMORY_STATE_ONLINE)
+			lsmem->mem_online += lsmem->block_size;
+		else
+			lsmem->mem_offline += lsmem->block_size;
+		if (is_mergeable(lsmem, &blk)) {
+			lsmem->blocks[lsmem->nblocks - 1].count++;
+			continue;
+		}
+		lsmem->nblocks++;
+		lsmem->blocks = xrealloc(lsmem->blocks, lsmem->nblocks * sizeof(blk));
+		*&lsmem->blocks[lsmem->nblocks - 1] = blk;
+	}
+}
+
+static int memory_block_filter(const struct dirent *de)
+{
+	if (strncmp("memory", de->d_name, 6))
+		return 0;
+	return isdigit_string(de->d_name + 6);
+}
+
+static void read_basic_info(struct lsmem *lsmem)
+{
+	char dir[PATH_MAX];
+
+	if (ul_path_access(lsmem->sysmem, F_OK, "block_size_bytes") != 0)
+		errx(EXIT_FAILURE, _("This system does not support memory blocks"));
+
+	ul_path_get_abspath(lsmem->sysmem, dir, sizeof(dir), NULL);
+
+	lsmem->ndirs = scandir(dir, &lsmem->dirs, memory_block_filter, versionsort);
+	if (lsmem->ndirs <= 0)
+		err(EXIT_FAILURE, _("Failed to read %s"), dir);
+
+	if (memory_block_get_node(lsmem, lsmem->dirs[0]->d_name) != -1)
+		lsmem->have_nodes = 1;
+
+	/* The valid_zones sysmem attribute was introduced with kernel 3.18 */
+	if (ul_path_access(lsmem->sysmem, F_OK, "memory0/valid_zones") == 0)
+		lsmem->have_zones = 1;
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	size_t i;
+
+	fputs(USAGE_HEADER, out);
+	fprintf(out, _(" %s [options]\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("List the ranges of available memory with their online status.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -J, --json           use JSON output format\n"), out);
+	fputs(_(" -P, --pairs          use key=\"value\" output format\n"), out);
+	fputs(_(" -a, --all            list each individual memory block\n"), out);
+	fputs(_(" -b, --bytes          print SIZE in bytes rather than in human readable format\n"), out);
+	fputs(_(" -n, --noheadings     don't print headings\n"), out);
+	fputs(_(" -o, --output <list>  output columns\n"), out);
+	fputs(_("     --output-all     output all columns\n"), out);
+	fputs(_(" -r, --raw            use raw output format\n"), out);
+	fputs(_(" -S, --split <list>   split ranges by specified columns\n"), out);
+	fputs(_(" -s, --sysroot <dir>  use the specified directory as system root\n"), out);
+	fputs(_("     --summary[=when] print summary information (never,always or only)\n"), out);
+
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(22));
+
+	fputs(USAGE_COLUMNS, out);
+	for (i = 0; i < ARRAY_SIZE(coldescs); i++)
+		fprintf(out, " %10s  %s\n", coldescs[i].name, _(coldescs[i].help));
+
+	printf(USAGE_MAN_TAIL("lsmem(1)"));
+
+	exit(out == stderr ? EXIT_FAILURE : EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+	struct lsmem _lsmem = {
+			.want_table = 1,
+			.want_summary = 1
+		}, *lsmem = &_lsmem;
+
+	const char *outarg = NULL, *splitarg = NULL, *prefix = NULL;
+	int c;
+	size_t i;
+
+	enum {
+		LSMEM_OPT_SUMARRY = CHAR_MAX + 1,
+		OPT_OUTPUT_ALL
+	};
+
+	static const struct option longopts[] = {
+		{"all",		no_argument,		NULL, 'a'},
+		{"bytes",	no_argument,		NULL, 'b'},
+		{"help",	no_argument,		NULL, 'h'},
+		{"json",	no_argument,		NULL, 'J'},
+		{"noheadings",	no_argument,		NULL, 'n'},
+		{"output",	required_argument,	NULL, 'o'},
+		{"output-all",	no_argument,		NULL, OPT_OUTPUT_ALL},
+		{"pairs",	no_argument,		NULL, 'P'},
+		{"raw",		no_argument,		NULL, 'r'},
+		{"sysroot",	required_argument,	NULL, 's'},
+		{"split",       required_argument,      NULL, 'S'},
+		{"version",	no_argument,		NULL, 'V'},
+		{"summary",     optional_argument,	NULL, LSMEM_OPT_SUMARRY },
+		{NULL,		0,			NULL, 0}
+	};
+	static const ul_excl_t excl[] = {	/* rows and cols in ASCII order */
+		{ 'J', 'P', 'r' },
+		{ 'S', 'a' },
+		{ 0 }
+	};
+	int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	while ((c = getopt_long(argc, argv, "abhJno:PrS:s:V", longopts, NULL)) != -1) {
+
+		err_exclusive_options(c, longopts, excl, excl_st);
+
+		switch (c) {
+		case 'a':
+			lsmem->list_all = 1;
+			break;
+		case 'b':
+			lsmem->bytes = 1;
+			break;
+		case 'h':
+			usage();
+			break;
+		case 'J':
+			lsmem->json = 1;
+			lsmem->want_summary = 0;
+			break;
+		case 'n':
+			lsmem->noheadings = 1;
+			break;
+		case 'o':
+			outarg = optarg;
+			break;
+		case OPT_OUTPUT_ALL:
+			for (ncolumns = 0; (size_t)ncolumns < ARRAY_SIZE(coldescs); ncolumns++)
+				columns[ncolumns] = ncolumns;
+			break;
+		case 'P':
+			lsmem->export = 1;
+			lsmem->want_summary = 0;
+			break;
+		case 'r':
+			lsmem->raw = 1;
+			lsmem->want_summary = 0;
+			break;
+		case 's':
+			prefix = optarg;
+			break;
+		case 'S':
+			splitarg = optarg;
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return 0;
+		case LSMEM_OPT_SUMARRY:
+			if (optarg) {
+				if (strcmp(optarg, "never") == 0)
+					lsmem->want_summary = 0;
+				else if (strcmp(optarg, "only") == 0)
+					lsmem->want_table = 0;
+				else if (strcmp(optarg, "always") == 0)
+					lsmem->want_summary = 1;
+				else
+					errx(EXIT_FAILURE, _("unsupported --summary argument"));
+			} else
+				lsmem->want_table = 0;
+			break;
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if (argc != optind) {
+		warnx(_("bad usage"));
+		errtryhelp(EXIT_FAILURE);
+	}
+
+	if (lsmem->want_table + lsmem->want_summary == 0)
+		errx(EXIT_FAILURE, _("options --{raw,json,pairs} and --summary=only are mutually exclusive"));
+
+	ul_path_init_debug();
+
+	lsmem->sysmem = ul_new_path(_PATH_SYS_MEMORY);
+	if (!lsmem->sysmem)
+		err(EXIT_FAILURE, _("failed to initialize %s handler"), _PATH_SYS_MEMORY);
+	if (prefix && ul_path_set_prefix(lsmem->sysmem, prefix) != 0)
+		err(EXIT_FAILURE, _("invalid argument to --sysroot"));
+
+	/* Shortcut to avoid scols machinery on --summary=only */
+	if (lsmem->want_table == 0 && lsmem->want_summary) {
+		read_basic_info(lsmem);
+		read_info(lsmem);
+		print_summary(lsmem);
+		return EXIT_SUCCESS;
+	}
+
+	/*
+	 * Default columns
+	 */
+	if (!ncolumns) {
+		add_column(columns, ncolumns++, COL_RANGE);
+		add_column(columns, ncolumns++, COL_SIZE);
+		add_column(columns, ncolumns++, COL_STATE);
+		add_column(columns, ncolumns++, COL_REMOVABLE);
+		add_column(columns, ncolumns++, COL_BLOCK);
+	}
+
+	if (outarg && string_add_to_idarray(outarg, columns, ARRAY_SIZE(columns),
+					 &ncolumns, column_name_to_id) < 0)
+		return EXIT_FAILURE;
+
+	/*
+	 * Initialize output
+	 */
+	scols_init_debug(0);
+
+	if (!(lsmem->table = scols_new_table()))
+		errx(EXIT_FAILURE, _("failed to initialize output table"));
+	scols_table_enable_raw(lsmem->table, lsmem->raw);
+	scols_table_enable_export(lsmem->table, lsmem->export);
+	scols_table_enable_json(lsmem->table, lsmem->json);
+	scols_table_enable_noheadings(lsmem->table, lsmem->noheadings);
+
+	if (lsmem->json)
+		scols_table_set_name(lsmem->table, "memory");
+
+	for (i = 0; i < ncolumns; i++) {
+		struct coldesc *ci = get_column_desc(i);
+		struct libscols_column *cl;
+
+		cl = scols_table_new_column(lsmem->table, ci->name, ci->whint, ci->flags);
+		if (!cl)
+			err(EXIT_FAILURE, _("Failed to initialize output column"));
+
+		if (lsmem->json) {
+			int id = get_column_id(i);
+
+			switch (id) {
+			case COL_SIZE:
+				if (!lsmem->bytes)
+					break;
+				/* fallthrough */
+			case COL_NODE:
+				scols_column_set_json_type(cl, SCOLS_JSON_NUMBER);
+				break;
+			case COL_REMOVABLE:
+				scols_column_set_json_type(cl, SCOLS_JSON_BOOLEAN);
+				break;
+			}
+		}
+	}
+
+	if (splitarg) {
+		int split[ARRAY_SIZE(coldescs)] = { 0 };
+		static size_t nsplits = 0;
+
+		if (strcasecmp(splitarg, "none") == 0)
+			;
+		else if (string_add_to_idarray(splitarg, split, ARRAY_SIZE(split),
+					&nsplits, column_name_to_id) < 0)
+			return EXIT_FAILURE;
+
+		set_split_policy(lsmem, split, nsplits);
+
+	} else
+		/* follow output columns */
+		set_split_policy(lsmem, columns, ncolumns);
+
+	/*
+	 * Read data and print output
+	 */
+	read_basic_info(lsmem);
+	read_info(lsmem);
+
+	if (lsmem->want_table) {
+		fill_scols_table(lsmem);
+		scols_print_table(lsmem->table);
+
+		if (lsmem->want_summary)
+			fputc('\n', stdout);
+	}
+
+	if (lsmem->want_summary)
+		print_summary(lsmem);
+
+	scols_unref_table(lsmem->table);
+	ul_unref_path(lsmem->sysmem);
+	return 0;
+}
diff --git a/sys-utils/lsns.8 b/sys-utils/lsns.8
new file mode 100644
index 0000000..aba3726
--- /dev/null
+++ b/sys-utils/lsns.8
@@ -0,0 +1,93 @@
+.\" Man page for the lsns command.
+.\" Copyright 2015 Karel Zak <kzak@redhat.com>
+.\" May be distributed under the GNU General Public License
+
+.TH LSNS 8 "December 2015" "util-linux" "System Administration"
+.SH NAME
+lsns \- list namespaces
+.SH SYNOPSIS
+.B lsns
+[options]
+.RI [ namespace ]
+
+.SH DESCRIPTION
+.B lsns
+lists information about all the currently accessible namespaces or about the
+given \fInamespace\fP.  The \fInamespace\fP identifier is an inode number.
+
+The default output is subject to change.  So whenever possible, you should
+avoid using default outputs in your scripts.  Always explicitly define expected
+columns by using the \fB\-\-output\fR option together with a columns list in
+environments where a stable output is required.
+
+\fBNSFS\fP column, printed when \fBnet\fP is specified for
+\fB\-\-type\fR option, is special; it uses multi-line cells.
+Use the option \fB\-\-nowrap\fR is for switching to "," separated single-line
+representation.
+
+Note that \fBlsns\fR reads information directly from the /proc filesystem and
+for non-root users it may return incomplete information.  The current /proc
+filesystem may be unshared and affected by a PID namespace
+(see \fBunshare \-\-mount\-proc\fP for more details).
+.B lsns
+is not able to see persistent namespaces without processes where the namespace
+instance is held by a bind mount to /proc/\fIpid\fR/ns/\fItype\fR.
+
+.SH OPTIONS
+.TP
+.BR \-J , " \-\-json"
+Use JSON output format.
+.TP
+.BR \-l , " \-\-list"
+Use list output format.
+.TP
+.BR \-n , " \-\-noheadings"
+Do not print a header line.
+.TP
+.BR \-o , " \-\-output " \fIlist\fP
+Specify which output columns to print.  Use \fB\-\-help\fR
+to get a list of all supported columns.
+
+The default list of columns may be extended if \fIlist\fP is
+specified in the format \fB+\fIlist\fP (e.g. \fBlsns \-o +PATH\fP).
+.TP
+.B \-\-output\-all
+Output all available columns.
+.TP
+.BR \-p , " \-\-task " \fIpid\fP
+Display only the namespaces held by the process with this \fIpid\fR.
+.TP
+.BR \-r , " \-\-raw"
+Use the raw output format.
+.TP
+.BR \-t , " \-\-type " \fItype\fP
+Display the specified \fItype\fP of namespaces only.  The supported types are
+\fBmnt\fP, \fBnet\fP, \fBipc\fP, \fBuser\fP, \fBpid\fP, \fButs\fP and
+\fBcgroup\fP.  This option may be given more than once.
+.TP
+.BR \-u , " \-\-notruncate"
+Do not truncate text in columns.
+.TP
+.BR \-W , " \-\-nowrap"
+Do not use multi-line text in columns.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+
+.SH AUTHORS
+.nf
+Karel Zak <kzak@redhat.com>
+.fi
+
+.SH "SEE ALSO"
+.BR nsenter (1),
+.BR unshare (1),
+.BR clone (2),
+.BR namespaces (7)
+
+.SH AVAILABILITY
+The lsns command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/lsns.c b/sys-utils/lsns.c
new file mode 100644
index 0000000..38ea2e0
--- /dev/null
+++ b/sys-utils/lsns.c
@@ -0,0 +1,1100 @@
+/*
+ * lsns(8) - list system namespaces
+ *
+ * Copyright (C) 2015 Karel Zak <kzak@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <stdio.h>
+#include <string.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <wchar.h>
+#include <libsmartcols.h>
+#include <libmount.h>
+
+#ifdef HAVE_LINUX_NET_NAMESPACE_H
+#include <stdbool.h>
+#include <sys/socket.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/net_namespace.h>
+#endif
+
+#include "pathnames.h"
+#include "nls.h"
+#include "xalloc.h"
+#include "c.h"
+#include "list.h"
+#include "closestream.h"
+#include "optutils.h"
+#include "procutils.h"
+#include "strutils.h"
+#include "namespace.h"
+#include "idcache.h"
+
+#include "debug.h"
+
+static UL_DEBUG_DEFINE_MASK(lsns);
+UL_DEBUG_DEFINE_MASKNAMES(lsns) = UL_DEBUG_EMPTY_MASKNAMES;
+
+#define LSNS_DEBUG_INIT		(1 << 1)
+#define LSNS_DEBUG_PROC		(1 << 2)
+#define LSNS_DEBUG_NS		(1 << 3)
+#define LSNS_DEBUG_ALL		0xFFFF
+
+#define LSNS_NETNS_UNUSABLE -2
+
+#define DBG(m, x)       __UL_DBG(lsns, LSNS_DEBUG_, m, x)
+#define ON_DBG(m, x)    __UL_DBG_CALL(lsns, LSNS_DEBUG_, m, x)
+
+#define UL_DEBUG_CURRENT_MASK	UL_DEBUG_MASK(lsns)
+#include "debugobj.h"
+
+static struct idcache *uid_cache = NULL;
+
+/* column IDs */
+enum {
+	COL_NS = 0,
+	COL_TYPE,
+	COL_PATH,
+	COL_NPROCS,
+	COL_PID,
+	COL_PPID,
+	COL_COMMAND,
+	COL_UID,
+	COL_USER,
+	COL_NETNSID,
+	COL_NSFS,
+};
+
+/* column names */
+struct colinfo {
+	const char *name; /* header */
+	double	   whint; /* width hint (N < 1 is in percent of termwidth) */
+	int	   flags; /* SCOLS_FL_* */
+	const char *help;
+	int        json_type;
+};
+
+/* columns descriptions */
+static const struct colinfo infos[] = {
+	[COL_NS]      = { "NS",     10, SCOLS_FL_RIGHT, N_("namespace identifier (inode number)"), SCOLS_JSON_NUMBER },
+	[COL_TYPE]    = { "TYPE",    5, 0, N_("kind of namespace") },
+	[COL_PATH]    = { "PATH",    0, 0, N_("path to the namespace")},
+	[COL_NPROCS]  = { "NPROCS",  5, SCOLS_FL_RIGHT, N_("number of processes in the namespace"), SCOLS_JSON_NUMBER },
+	[COL_PID]     = { "PID",     5, SCOLS_FL_RIGHT, N_("lowest PID in the namespace"), SCOLS_JSON_NUMBER },
+	[COL_PPID]    = { "PPID",    5, SCOLS_FL_RIGHT, N_("PPID of the PID"), SCOLS_JSON_NUMBER },
+	[COL_COMMAND] = { "COMMAND", 0, SCOLS_FL_TRUNC, N_("command line of the PID")},
+	[COL_UID]     = { "UID",     0, SCOLS_FL_RIGHT, N_("UID of the PID"), SCOLS_JSON_NUMBER},
+	[COL_USER]    = { "USER",    0, 0, N_("username of the PID")},
+	[COL_NETNSID] = { "NETNSID", 0, SCOLS_FL_RIGHT, N_("namespace ID as used by network subsystem")},
+	[COL_NSFS]    = { "NSFS",    0, SCOLS_FL_WRAP, N_("nsfs mountpoint (usually used network subsystem)")}
+};
+
+static int columns[ARRAY_SIZE(infos) * 2];
+static size_t ncolumns;
+
+enum {
+	LSNS_ID_MNT = 0,
+	LSNS_ID_NET,
+	LSNS_ID_PID,
+	LSNS_ID_UTS,
+	LSNS_ID_IPC,
+	LSNS_ID_USER,
+	LSNS_ID_CGROUP
+};
+
+static char *ns_names[] = {
+	[LSNS_ID_MNT] = "mnt",
+	[LSNS_ID_NET] = "net",
+	[LSNS_ID_PID] = "pid",
+	[LSNS_ID_UTS] = "uts",
+	[LSNS_ID_IPC] = "ipc",
+	[LSNS_ID_USER] = "user",
+	[LSNS_ID_CGROUP] = "cgroup"
+};
+
+struct lsns_namespace {
+	ino_t id;
+	int type;			/* LSNS_* */
+	int nprocs;
+	int netnsid;
+
+	struct lsns_process *proc;
+
+	struct list_head namespaces;	/* lsns->processes member */
+	struct list_head processes;	/* head of lsns_process *siblings */
+};
+
+struct lsns_process {
+	pid_t pid;		/* process PID */
+	pid_t ppid;		/* parent's PID */
+	pid_t tpid;		/* thread group */
+	char state;
+	uid_t uid;
+
+	ino_t            ns_ids[ARRAY_SIZE(ns_names)];
+	struct list_head ns_siblings[ARRAY_SIZE(ns_names)];
+
+	struct list_head processes;	/* list of processes */
+
+	struct libscols_line *outline;
+	struct lsns_process *parent;
+
+	int netnsid;
+};
+
+struct lsns {
+	struct list_head processes;
+	struct list_head namespaces;
+
+	pid_t	fltr_pid;	/* filter out by PID */
+	ino_t	fltr_ns;	/* filter out by namespace */
+	int	fltr_types[ARRAY_SIZE(ns_names)];
+	int	fltr_ntypes;
+
+	unsigned int raw	: 1,
+		     json	: 1,
+		     tree	: 1,
+		     list	: 1,
+		     no_trunc	: 1,
+		     no_headings: 1,
+		     no_wrap    : 1;
+
+	struct libmnt_table *tab;
+};
+
+struct netnsid_cache {
+	ino_t ino;
+	int   id;
+	struct list_head netnsids;
+};
+
+static struct list_head netnsids_cache;
+
+static int netlink_fd = -1;
+
+static void lsns_init_debug(void)
+{
+	__UL_INIT_DEBUG_FROM_ENV(lsns, LSNS_DEBUG_, 0, LSNS_DEBUG);
+}
+
+static int ns_name2type(const char *name)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(ns_names); i++) {
+		if (strcmp(ns_names[i], name) == 0)
+			return i;
+	}
+	return -1;
+}
+
+static int column_name_to_id(const char *name, size_t namesz)
+{
+	size_t i;
+
+	assert(name);
+
+	for (i = 0; i < ARRAY_SIZE(infos); i++) {
+		const char *cn = infos[i].name;
+
+		if (!strncasecmp(name, cn, namesz) && !*(cn + namesz))
+			return i;
+	}
+	warnx(_("unknown column: %s"), name);
+	return -1;
+}
+
+static int has_column(int id)
+{
+	size_t i;
+
+	for (i = 0; i < ncolumns; i++) {
+		if (columns[i] == id)
+			return 1;
+	}
+	return 0;
+}
+
+static inline int get_column_id(int num)
+{
+	assert(num >= 0);
+	assert((size_t) num < ncolumns);
+	assert(columns[num] < (int) ARRAY_SIZE(infos));
+
+	return columns[num];
+}
+
+static inline const struct colinfo *get_column_info(unsigned num)
+{
+	return &infos[ get_column_id(num) ];
+}
+
+static int get_ns_ino(int dir, const char *nsname, ino_t *ino)
+{
+	struct stat st;
+	char path[16];
+
+	snprintf(path, sizeof(path), "ns/%s", nsname);
+
+	if (fstatat(dir, path, &st, 0) != 0)
+		return -errno;
+	*ino = st.st_ino;
+	return 0;
+}
+
+static int parse_proc_stat(FILE *fp, pid_t *pid, char *state, pid_t *ppid)
+{
+	char *line = NULL, *p;
+	size_t len = 0;
+	int rc;
+
+	if (getline(&line, &len, fp) < 0) {
+		rc = -errno;
+		goto error;
+	}
+
+	p = strrchr(line, ')');
+	if (p == NULL ||
+	    sscanf(line, "%d (", pid) != 1 ||
+	    sscanf(p, ") %c %d*[^\n]", state, ppid) != 2) {
+		rc = -EINVAL;
+		goto error;
+	}
+	rc = 0;
+
+error:
+	free(line);
+	return rc;
+}
+
+#ifdef HAVE_LINUX_NET_NAMESPACE_H
+static int netnsid_cache_find(ino_t netino, int *netnsid)
+{
+	struct list_head *p;
+
+	list_for_each(p, &netnsids_cache) {
+		struct netnsid_cache *e = list_entry(p,
+						     struct netnsid_cache,
+						     netnsids);
+		if (e->ino == netino) {
+			*netnsid = e->id;
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+static void netnsid_cache_add(ino_t netino, int netnsid)
+{
+	struct netnsid_cache *e;
+
+	e = xcalloc(1, sizeof(*e));
+	e->ino = netino;
+	e->id  = netnsid;
+	INIT_LIST_HEAD(&e->netnsids);
+	list_add(&e->netnsids, &netnsids_cache);
+}
+
+static int get_netnsid_via_netlink_send_request(int target_fd)
+{
+	unsigned char req[NLMSG_SPACE(sizeof(struct rtgenmsg))
+			  + RTA_SPACE(sizeof(int32_t))];
+
+	struct nlmsghdr *nlh = (struct nlmsghdr *)req;
+	struct rtgenmsg *rt = NLMSG_DATA(req);
+	struct rtattr *rta = (struct rtattr *)
+		(req + NLMSG_SPACE(sizeof(struct rtgenmsg)));
+	int32_t *fd = RTA_DATA(rta);
+
+	nlh->nlmsg_len = sizeof(req);
+	nlh->nlmsg_flags = NLM_F_REQUEST;
+	nlh->nlmsg_type = RTM_GETNSID;
+	rt->rtgen_family = AF_UNSPEC;
+	rta->rta_type = NETNSA_FD;
+	rta->rta_len = RTA_SPACE(sizeof(int32_t));
+	*fd = target_fd;
+
+	if (send(netlink_fd, req, sizeof(req), 0) < 0)
+		return -1;
+	return 0;
+}
+
+static int get_netnsid_via_netlink_recv_response(int *netnsid)
+{
+	unsigned char res[NLMSG_SPACE(sizeof(struct rtgenmsg))
+			  + ((RTA_SPACE(sizeof(int32_t))
+			      < RTA_SPACE(sizeof(struct nlmsgerr)))
+			     ? RTA_SPACE(sizeof(struct nlmsgerr))
+			     : RTA_SPACE(sizeof(int32_t)))];
+	int rtalen;
+	ssize_t reslen;
+
+	struct nlmsghdr *nlh;
+	struct rtattr *rta;
+
+	reslen = recv(netlink_fd, res, sizeof(res), 0);
+	if (reslen < 0)
+		return -1;
+
+	nlh = (struct nlmsghdr *)res;
+	if (!(NLMSG_OK(nlh, (size_t)reslen)
+	      && nlh->nlmsg_type == RTM_NEWNSID))
+		return -1;
+
+	rtalen = NLMSG_PAYLOAD(nlh, sizeof(struct rtgenmsg));
+	rta = (struct rtattr *)(res + NLMSG_SPACE(sizeof(struct rtgenmsg)));
+	if (!(RTA_OK(rta, rtalen)
+	      && rta->rta_type == NETNSA_NSID))
+		return -1;
+
+	*netnsid = *(int *)RTA_DATA(rta);
+
+	return 0;
+}
+
+static int get_netnsid_via_netlink(int dir, const char *path)
+{
+	int netnsid;
+	int target_fd;
+
+	if (netlink_fd < 0)
+		return LSNS_NETNS_UNUSABLE;
+
+	target_fd = openat(dir, path, O_RDONLY);
+	if (target_fd < 0)
+		return LSNS_NETNS_UNUSABLE;
+
+	if (get_netnsid_via_netlink_send_request(target_fd) < 0) {
+		netnsid = LSNS_NETNS_UNUSABLE;
+		goto out;
+	}
+
+	if (get_netnsid_via_netlink_recv_response(&netnsid) < 0) {
+		netnsid = LSNS_NETNS_UNUSABLE;
+		goto out;
+	}
+
+ out:
+	close(target_fd);
+	return netnsid;
+}
+
+static int get_netnsid(int dir, ino_t netino)
+{
+	int netnsid;
+
+	if (!netnsid_cache_find(netino, &netnsid)) {
+		netnsid = get_netnsid_via_netlink(dir, "ns/net");
+		netnsid_cache_add(netino, netnsid);
+	}
+
+	return netnsid;
+}
+#else
+static int get_netnsid(int dir __attribute__((__unused__)),
+		       ino_t netino __attribute__((__unused__)))
+{
+	return LSNS_NETNS_UNUSABLE;
+}
+#endif /* HAVE_LINUX_NET_NAMESPACE_H */
+
+static int read_process(struct lsns *ls, pid_t pid)
+{
+	struct lsns_process *p = NULL;
+	char buf[BUFSIZ];
+	DIR *dir;
+	int rc = 0, fd;
+	FILE *f = NULL;
+	size_t i;
+	struct stat st;
+
+	DBG(PROC, ul_debug("reading %d", (int) pid));
+
+	snprintf(buf, sizeof(buf), "/proc/%d", pid);
+	dir = opendir(buf);
+	if (!dir)
+		return -errno;
+
+	p = xcalloc(1, sizeof(*p));
+	p->netnsid = LSNS_NETNS_UNUSABLE;
+
+	if (fstat(dirfd(dir), &st) == 0) {
+		p->uid = st.st_uid;
+		add_uid(uid_cache, st.st_uid);
+	}
+
+	fd = openat(dirfd(dir), "stat", O_RDONLY);
+	if (fd < 0) {
+		rc = -errno;
+		goto done;
+	}
+	if (!(f = fdopen(fd, "r"))) {
+		rc = -errno;
+		goto done;
+	}
+	rc = parse_proc_stat(f, &p->pid, &p->state, &p->ppid);
+	if (rc < 0)
+		goto done;
+	rc = 0;
+
+	for (i = 0; i < ARRAY_SIZE(p->ns_ids); i++) {
+		INIT_LIST_HEAD(&p->ns_siblings[i]);
+
+		if (!ls->fltr_types[i])
+			continue;
+
+		rc = get_ns_ino(dirfd(dir), ns_names[i], &p->ns_ids[i]);
+		if (rc && rc != -EACCES && rc != -ENOENT)
+			goto done;
+		if (i == LSNS_ID_NET)
+			p->netnsid = get_netnsid(dirfd(dir), p->ns_ids[i]);
+		rc = 0;
+	}
+
+	INIT_LIST_HEAD(&p->processes);
+
+	DBG(PROC, ul_debugobj(p, "new pid=%d", p->pid));
+	list_add_tail(&p->processes, &ls->processes);
+done:
+	if (f)
+		fclose(f);
+	closedir(dir);
+	if (rc)
+		free(p);
+	return rc;
+}
+
+static int read_processes(struct lsns *ls)
+{
+	struct proc_processes *proc = NULL;
+	pid_t pid;
+	int rc = 0;
+
+	DBG(PROC, ul_debug("opening /proc"));
+
+	if (!(proc = proc_open_processes())) {
+		rc = -errno;
+		goto done;
+	}
+
+	while (proc_next_pid(proc, &pid) == 0) {
+		rc = read_process(ls, pid);
+		if (rc && rc != -EACCES && rc != -ENOENT)
+			break;
+		rc = 0;
+	}
+done:
+	DBG(PROC, ul_debug("closing /proc"));
+	proc_close_processes(proc);
+	return rc;
+}
+
+static struct lsns_namespace *get_namespace(struct lsns *ls, ino_t ino)
+{
+	struct list_head *p;
+
+	list_for_each(p, &ls->namespaces) {
+		struct lsns_namespace *ns = list_entry(p, struct lsns_namespace, namespaces);
+
+		if (ns->id == ino)
+			return ns;
+	}
+	return NULL;
+}
+
+static int namespace_has_process(struct lsns_namespace *ns, pid_t pid)
+{
+	struct list_head *p;
+
+	list_for_each(p, &ns->processes) {
+		struct lsns_process *proc = list_entry(p, struct lsns_process, ns_siblings[ns->type]);
+
+		if (proc->pid == pid)
+			return 1;
+	}
+	return 0;
+}
+
+static struct lsns_namespace *add_namespace(struct lsns *ls, int type, ino_t ino)
+{
+	struct lsns_namespace *ns = xcalloc(1, sizeof(*ns));
+
+	if (!ns)
+		return NULL;
+
+	DBG(NS, ul_debugobj(ns, "new %s[%ju]", ns_names[type], (uintmax_t)ino));
+
+	INIT_LIST_HEAD(&ns->processes);
+	INIT_LIST_HEAD(&ns->namespaces);
+
+	ns->type = type;
+	ns->id = ino;
+
+	list_add_tail(&ns->namespaces, &ls->namespaces);
+	return ns;
+}
+
+static int add_process_to_namespace(struct lsns *ls, struct lsns_namespace *ns, struct lsns_process *proc)
+{
+	struct list_head *p;
+
+	DBG(NS, ul_debugobj(ns, "add process [%p] pid=%d to %s[%ju]",
+		proc, proc->pid, ns_names[ns->type], (uintmax_t)ns->id));
+
+	list_for_each(p, &ls->processes) {
+		struct lsns_process *xproc = list_entry(p, struct lsns_process, processes);
+
+		if (xproc->pid == proc->ppid)		/* my parent */
+			proc->parent = xproc;
+		else if (xproc->ppid == proc->pid)	/* my child */
+			xproc->parent = proc;
+	}
+
+	list_add_tail(&proc->ns_siblings[ns->type], &ns->processes);
+	ns->nprocs++;
+
+	if (!ns->proc || ns->proc->pid > proc->pid)
+		ns->proc = proc;
+
+	return 0;
+}
+
+static int cmp_namespaces(struct list_head *a, struct list_head *b,
+			  __attribute__((__unused__)) void *data)
+{
+	struct lsns_namespace *xa = list_entry(a, struct lsns_namespace, namespaces),
+			      *xb = list_entry(b, struct lsns_namespace, namespaces);
+
+	return cmp_numbers(xa->id, xb->id);
+}
+
+static int netnsid_xasputs(char **str, int netnsid)
+{
+	if (netnsid >= 0)
+		return xasprintf(str, "%d", netnsid);
+#ifdef NETNSA_NSID_NOT_ASSIGNED
+	else if (netnsid == NETNSA_NSID_NOT_ASSIGNED)
+		return xasprintf(str, "%s", "unassigned");
+#endif
+	else
+		return 0;
+}
+
+static int read_namespaces(struct lsns *ls)
+{
+	struct list_head *p;
+
+	DBG(NS, ul_debug("reading namespace"));
+
+	list_for_each(p, &ls->processes) {
+		size_t i;
+		struct lsns_namespace *ns;
+		struct lsns_process *proc = list_entry(p, struct lsns_process, processes);
+
+		for (i = 0; i < ARRAY_SIZE(proc->ns_ids); i++) {
+			if (proc->ns_ids[i] == 0)
+				continue;
+			if (!(ns = get_namespace(ls, proc->ns_ids[i]))) {
+				ns = add_namespace(ls, i, proc->ns_ids[i]);
+				if (!ns)
+					return -ENOMEM;
+			}
+			add_process_to_namespace(ls, ns, proc);
+		}
+	}
+
+	list_sort(&ls->namespaces, cmp_namespaces, NULL);
+
+	return 0;
+}
+
+static int is_nsfs_root(struct libmnt_fs *fs, void *data)
+{
+	if (!mnt_fs_match_fstype(fs, "nsfs") || !mnt_fs_get_root(fs))
+		return 0;
+
+	return (strcmp(mnt_fs_get_root(fs), (char *)data) == 0);
+}
+
+static int is_path_included(const char *path_set, const char *elt,
+			      const char sep)
+{
+	size_t elt_len;
+	size_t path_set_len;
+	char *tmp;
+
+
+	tmp = strstr(path_set, elt);
+	if (!tmp)
+		return 0;
+
+	elt_len = strlen(elt);
+	path_set_len = strlen(path_set);
+
+	/* path_set includes only elt or
+	 * path_set includes elt as the first element.
+	 */
+	if (tmp == path_set
+	    && ((path_set_len == elt_len)
+		|| (path_set[elt_len] == sep)))
+		return 1;
+
+	/* path_set includes elt at the middle
+	 * or as the last element.
+	 */
+	if ((*(tmp - 1) == sep)
+	    && ((*(tmp + elt_len) == sep)
+		|| (*(tmp + elt_len) == '\0')))
+		return 1;
+
+	return 0;
+}
+
+static int nsfs_xasputs(char **str,
+			struct lsns_namespace *ns,
+			struct libmnt_table *tab,
+			char sep)
+{
+	struct libmnt_iter *itr = mnt_new_iter(MNT_ITER_FORWARD);
+	char *expected_root;
+	struct libmnt_fs *fs = NULL;
+
+	xasprintf(&expected_root, "%s:[%ju]", ns_names[ns->type], (uintmax_t)ns->id);
+	*str = NULL;
+
+	while (mnt_table_find_next_fs(tab, itr, is_nsfs_root,
+				      expected_root, &fs) == 0) {
+
+		const char *tgt = mnt_fs_get_target(fs);
+
+		if (!*str)
+			xasprintf(str, "%s", tgt);
+
+		else if (!is_path_included(*str, tgt, sep)) {
+			char *tmp = NULL;
+
+			xasprintf(&tmp, "%s%c%s", *str, sep, tgt);
+			free(*str);
+			*str = tmp;
+		}
+	}
+	free(expected_root);
+	mnt_free_iter(itr);
+
+	return 1;
+}
+static void add_scols_line(struct lsns *ls, struct libscols_table *table,
+			   struct lsns_namespace *ns, struct lsns_process *proc)
+{
+	size_t i;
+	struct libscols_line *line;
+
+	assert(ns);
+	assert(table);
+
+	line = scols_table_new_line(table,
+			ls->tree && proc->parent ? proc->parent->outline : NULL);
+	if (!line) {
+		warn(_("failed to add line to output"));
+		return;
+	}
+
+	for (i = 0; i < ncolumns; i++) {
+		char *str = NULL;
+
+		switch (get_column_id(i)) {
+		case COL_NS:
+			xasprintf(&str, "%ju", (uintmax_t)ns->id);
+			break;
+		case COL_PID:
+			xasprintf(&str, "%d", (int) proc->pid);
+			break;
+		case COL_PPID:
+			xasprintf(&str, "%d", (int) proc->ppid);
+			break;
+		case COL_TYPE:
+			xasprintf(&str, "%s", ns_names[ns->type]);
+			break;
+		case COL_NPROCS:
+			xasprintf(&str, "%d", ns->nprocs);
+			break;
+		case COL_COMMAND:
+			str = proc_get_command(proc->pid);
+			if (!str)
+				str = proc_get_command_name(proc->pid);
+			break;
+		case COL_PATH:
+			xasprintf(&str, "/proc/%d/ns/%s", (int) proc->pid, ns_names[ns->type]);
+			break;
+		case COL_UID:
+			xasprintf(&str, "%d", (int) proc->uid);
+			break;
+		case COL_USER:
+			xasprintf(&str, "%s", get_id(uid_cache, proc->uid)->name);
+			break;
+		case COL_NETNSID:
+			if (ns->type == LSNS_ID_NET)
+				netnsid_xasputs(&str, proc->netnsid);
+			break;
+		case COL_NSFS:
+			nsfs_xasputs(&str, ns, ls->tab, ls->no_wrap ? ',' : '\n');
+			break;
+		default:
+			break;
+		}
+
+		if (str && scols_line_refer_data(line, i, str) != 0)
+			err_oom();
+	}
+
+	proc->outline = line;
+}
+
+static struct libscols_table *init_scols_table(struct lsns *ls)
+{
+	struct libscols_table *tab;
+	size_t i;
+
+	tab = scols_new_table();
+	if (!tab) {
+		warn(_("failed to initialize output table"));
+		return NULL;
+	}
+
+	scols_table_enable_raw(tab, ls->raw);
+	scols_table_enable_json(tab, ls->json);
+	scols_table_enable_noheadings(tab, ls->no_headings);
+
+	if (ls->json)
+		scols_table_set_name(tab, "namespaces");
+
+	for (i = 0; i < ncolumns; i++) {
+		const struct colinfo *col = get_column_info(i);
+		int flags = col->flags;
+		struct libscols_column *cl;
+
+		if (ls->no_trunc)
+		       flags &= ~SCOLS_FL_TRUNC;
+		if (ls->tree && get_column_id(i) == COL_COMMAND)
+			flags |= SCOLS_FL_TREE;
+		if (ls->no_wrap)
+			flags &= ~SCOLS_FL_WRAP;
+
+		cl = scols_table_new_column(tab, col->name, col->whint, flags);
+		if (cl == NULL) {
+			warnx(_("failed to initialize output column"));
+			goto err;
+		}
+		if (ls->json)
+			scols_column_set_json_type(cl, col->json_type);
+
+		if (!ls->no_wrap && get_column_id(i) == COL_NSFS) {
+			scols_column_set_wrapfunc(cl,
+						  scols_wrapnl_chunksize,
+						  scols_wrapnl_nextchunk,
+						  NULL);
+			scols_column_set_safechars(cl, "\n");
+		}
+	}
+
+	return tab;
+err:
+	scols_unref_table(tab);
+	return NULL;
+}
+
+static int show_namespaces(struct lsns *ls)
+{
+	struct libscols_table *tab;
+	struct list_head *p;
+	int rc = 0;
+
+	tab = init_scols_table(ls);
+	if (!tab)
+		return -ENOMEM;
+
+	list_for_each(p, &ls->namespaces) {
+		struct lsns_namespace *ns = list_entry(p, struct lsns_namespace, namespaces);
+
+		if (ls->fltr_pid != 0 && !namespace_has_process(ns, ls->fltr_pid))
+			continue;
+
+		add_scols_line(ls, tab, ns, ns->proc);
+	}
+
+	scols_print_table(tab);
+	scols_unref_table(tab);
+	return rc;
+}
+
+static void show_process(struct lsns *ls, struct libscols_table *tab,
+			 struct lsns_process *proc, struct lsns_namespace *ns)
+{
+	/*
+	 * create a tree from parent->child relation, but only if the parent is
+	 * within the same namespace
+	 */
+	if (ls->tree
+	    && proc->parent
+	    && !proc->parent->outline
+	    && proc->parent->ns_ids[ns->type] == proc->ns_ids[ns->type])
+		show_process(ls, tab, proc->parent, ns);
+
+	add_scols_line(ls, tab, ns, proc);
+}
+
+
+static int show_namespace_processes(struct lsns *ls, struct lsns_namespace *ns)
+{
+	struct libscols_table *tab;
+	struct list_head *p;
+
+	tab = init_scols_table(ls);
+	if (!tab)
+		return -ENOMEM;
+
+	list_for_each(p, &ns->processes) {
+		struct lsns_process *proc = list_entry(p, struct lsns_process, ns_siblings[ns->type]);
+
+		if (!proc->outline)
+			show_process(ls, tab, proc, ns);
+	}
+
+
+	scols_print_table(tab);
+	scols_unref_table(tab);
+	return 0;
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	size_t i;
+
+	fputs(USAGE_HEADER, out);
+
+	fprintf(out,
+		_(" %s [options] [<namespace>]\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("List system namespaces.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -J, --json             use JSON output format\n"), out);
+	fputs(_(" -l, --list             use list format output\n"), out);
+	fputs(_(" -n, --noheadings       don't print headings\n"), out);
+	fputs(_(" -o, --output <list>    define which output columns to use\n"), out);
+	fputs(_("     --output-all       output all columns\n"), out);
+	fputs(_(" -p, --task <pid>       print process namespaces\n"), out);
+	fputs(_(" -r, --raw              use the raw output format\n"), out);
+	fputs(_(" -u, --notruncate       don't truncate text in columns\n"), out);
+	fputs(_(" -W, --nowrap           don't use multi-line representation\n"), out);
+	fputs(_(" -t, --type <name>      namespace type (mnt, net, ipc, user, pid, uts, cgroup)\n"), out);
+
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(24));
+
+	fputs(USAGE_COLUMNS, out);
+	for (i = 0; i < ARRAY_SIZE(infos); i++)
+		fprintf(out, " %11s  %s\n", infos[i].name, _(infos[i].help));
+
+	printf(USAGE_MAN_TAIL("lsns(8)"));
+
+	exit(EXIT_SUCCESS);
+}
+
+
+int main(int argc, char *argv[])
+{
+	struct lsns ls;
+	int c;
+	int r = 0;
+	char *outarg = NULL;
+	enum {
+		OPT_OUTPUT_ALL = CHAR_MAX + 1
+	};
+	static const struct option long_opts[] = {
+		{ "json",       no_argument,       NULL, 'J' },
+		{ "task",       required_argument, NULL, 'p' },
+		{ "help",	no_argument,       NULL, 'h' },
+		{ "output",     required_argument, NULL, 'o' },
+		{ "output-all", no_argument,       NULL, OPT_OUTPUT_ALL },
+		{ "notruncate", no_argument,       NULL, 'u' },
+		{ "version",    no_argument,       NULL, 'V' },
+		{ "noheadings", no_argument,       NULL, 'n' },
+		{ "nowrap",     no_argument,       NULL, 'W' },
+		{ "list",       no_argument,       NULL, 'l' },
+		{ "raw",        no_argument,       NULL, 'r' },
+		{ "type",       required_argument, NULL, 't' },
+		{ NULL, 0, NULL, 0 }
+	};
+
+	static const ul_excl_t excl[] = {	/* rows and cols in ASCII order */
+		{ 'J','r' },
+		{ 0 }
+	};
+	int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+	int is_net = 0;
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	lsns_init_debug();
+	memset(&ls, 0, sizeof(ls));
+
+	INIT_LIST_HEAD(&ls.processes);
+	INIT_LIST_HEAD(&ls.namespaces);
+	INIT_LIST_HEAD(&netnsids_cache);
+
+	while ((c = getopt_long(argc, argv,
+				"Jlp:o:nruhVt:W", long_opts, NULL)) != -1) {
+
+		err_exclusive_options(c, long_opts, excl, excl_st);
+
+		switch(c) {
+		case 'J':
+			ls.json = 1;
+			break;
+		case 'l':
+			ls.list = 1;
+			break;
+		case 'o':
+			outarg = optarg;
+			break;
+		case OPT_OUTPUT_ALL:
+			for (ncolumns = 0; ncolumns < ARRAY_SIZE(infos); ncolumns++)
+				columns[ncolumns] = ncolumns;
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		case 'p':
+			ls.fltr_pid = strtos32_or_err(optarg, _("invalid PID argument"));
+			break;
+		case 'h':
+			usage();
+		case 'n':
+			ls.no_headings = 1;
+			break;
+		case 'r':
+			ls.no_wrap = ls.raw = 1;
+			break;
+		case 'u':
+			ls.no_trunc = 1;
+			break;
+		case 't':
+		{
+			int type = ns_name2type(optarg);
+			if (type < 0)
+				errx(EXIT_FAILURE, _("unknown namespace type: %s"), optarg);
+			ls.fltr_types[type] = 1;
+			ls.fltr_ntypes++;
+			if (type == LSNS_ID_NET)
+				is_net = 1;
+			break;
+		}
+		case 'W':
+			ls.no_wrap = 1;
+			break;
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if (!ls.fltr_ntypes) {
+		size_t i;
+
+		for (i = 0; i < ARRAY_SIZE(ns_names); i++)
+			ls.fltr_types[i] = 1;
+	}
+
+	if (optind < argc) {
+		if (ls.fltr_pid)
+			errx(EXIT_FAILURE, _("--task is mutually exclusive with <namespace>"));
+		ls.fltr_ns = strtou64_or_err(argv[optind], _("invalid namespace argument"));
+		ls.tree = ls.list ? 0 : 1;
+
+		if (!ncolumns) {
+			columns[ncolumns++] = COL_PID;
+			columns[ncolumns++] = COL_PPID;
+			columns[ncolumns++] = COL_USER;
+			columns[ncolumns++] = COL_COMMAND;
+		}
+	}
+
+	if (!ncolumns) {
+		columns[ncolumns++] = COL_NS;
+		columns[ncolumns++] = COL_TYPE;
+		columns[ncolumns++] = COL_NPROCS;
+		columns[ncolumns++] = COL_PID;
+		columns[ncolumns++] = COL_USER;
+		if (is_net) {
+			columns[ncolumns++] = COL_NETNSID;
+			columns[ncolumns++] = COL_NSFS;
+		}
+		columns[ncolumns++] = COL_COMMAND;
+	}
+
+	if (outarg && string_add_to_idarray(outarg, columns, ARRAY_SIZE(columns),
+				  &ncolumns, column_name_to_id) < 0)
+		return EXIT_FAILURE;
+
+	scols_init_debug(0);
+
+	uid_cache = new_idcache();
+	if (!uid_cache)
+		err(EXIT_FAILURE, _("failed to allocate UID cache"));
+
+#ifdef HAVE_LINUX_NET_NAMESPACE_H
+	if (has_column(COL_NETNSID))
+		netlink_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+#endif
+	if (has_column(COL_NSFS)) {
+		ls.tab = mnt_new_table_from_file(_PATH_PROC_MOUNTINFO);
+		if (!ls.tab)
+			err(MNT_EX_FAIL, _("failed to parse %s"), _PATH_PROC_MOUNTINFO);
+	}
+
+	r = read_processes(&ls);
+	if (!r)
+		r = read_namespaces(&ls);
+	if (!r) {
+		if (ls.fltr_ns) {
+			struct lsns_namespace *ns = get_namespace(&ls, ls.fltr_ns);
+
+			if (!ns)
+				errx(EXIT_FAILURE, _("not found namespace: %ju"), (uintmax_t) ls.fltr_ns);
+			r = show_namespace_processes(&ls, ns);
+		} else
+			r = show_namespaces(&ls);
+	}
+
+	mnt_free_table(ls.tab);
+	if (netlink_fd >= 0)
+		close(netlink_fd);
+	free_idcache(uid_cache);
+	return r == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/sys-utils/mount.8 b/sys-utils/mount.8
new file mode 100644
index 0000000..da0ac5b
--- /dev/null
+++ b/sys-utils/mount.8
@@ -0,0 +1,2589 @@
+.\" Copyright (c) 1996-2004 Andries Brouwer
+.\" Copyright (C) 2006-2012 Karel Zak <kzak@redhat.com>
+.\"
+.\" This page is somewhat derived from a page that was
+.\" (c) 1980, 1989, 1991 The Regents of the University of California
+.\" and had been heavily modified by Rik Faith and myself.
+.\" (Probably no BSD text remains.)
+.\" Fragments of text were written by Werner Almesberger, Remy Card,
+.\" Stephen Tweedie and Eric Youngdale.
+.\"
+.\" This is free documentation; you can redistribute it and/or
+.\" modify it under the terms of the GNU General Public License as
+.\" published by the Free Software Foundation; either version 2 of
+.\" the License, or (at your option) any later version.
+.\"
+.\" The GNU General Public License's references to "object code"
+.\" and "executables" are to be interpreted as the output of any
+.\" document formatting or typesetting system, including
+.\" intermediate and printed output.
+.\"
+.\" This manual is distributed in the hope that it will be useful,
+.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
+.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+.\" GNU General Public License for more details.
+.\"
+.\" You should have received a copy of the GNU General Public License along
+.\" with this program; if not, write to the Free Software Foundation, Inc.,
+.\" 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+.\"
+.TH MOUNT 8 "August 2015" "util-linux" "System Administration"
+.SH NAME
+mount \- mount a filesystem
+.SH SYNOPSIS
+.B mount
+.RB [ \-l | \-h | \-V ]
+.LP
+.B mount \-a
+.RB [ \-fFnrsvw ]
+.RB [ \-t
+.IR fstype ]
+.RB [ \-O
+.IR optlist ]
+.LP
+.B mount
+.RB [ \-fnrsvw ]
+.RB [ \-o
+.IR options ]
+.IR device | dir
+.LP
+.B mount
+.RB [ \-fnrsvw ]
+.RB [ \-t
+.IB fstype ]
+.RB [ \-o
+.IR options ]
+.I device dir
+.SH DESCRIPTION
+All files accessible in a Unix system are arranged in one big
+tree, the file hierarchy, rooted at
+.IR / .
+These files can be spread out over several devices.  The
+.B mount
+command serves to attach the filesystem found on some device
+to the big file tree.  Conversely, the
+.BR umount (8)
+command will detach it again.  The filesystem is used to control how data is
+stored on the device or provided in a virtual way by network or another services.
+
+The standard form of the
+.B mount
+command is:
+.RS
+
+.br
+.BI "mount \-t" " type device dir"
+.br
+
+.RE
+This tells the kernel to attach the filesystem found on
+.I device
+(which is of type
+.IR type )
+at the directory
+.IR dir .
+The option \fB\-t \fItype\fR is optional.  The
+.B mount
+command is usually able to detect a filesystem.  The root permissions are necessary
+to mount a filesystem by default.  See section "Non-superuser mounts" below for more details.
+The previous contents (if any) and owner and mode of
+.I dir
+become invisible, and as long as this filesystem remains mounted,
+the pathname
+.I dir
+refers to the root of the filesystem on
+.IR device .
+
+If only the directory or the device is given, for example:
+.RS
+.sp
+.B mount /dir
+.sp
+.RE
+then \fBmount\fR looks for a mountpoint (and if not found then for a device) in the
+.I /etc/fstab
+file.  It's possible to use the
+.B \-\-target
+or
+.B \-\-source
+options to avoid ambivalent interpretation of the given argument.  For example:
+.RS
+.sp
+.B mount \-\-target /mountpoint
+.sp
+.RE
+
+The same filesystem may be mounted more than once, and in some cases (e.g.
+network filesystems) the same filesystem maybe be mounted on the same
+mountpoint more times. The mount command does not implement any policy to
+control this behavior. All behavior is controlled by kernel and it is usually
+specific to filesystem driver. The exception is \fB\-\-all\fR, in this case
+already mounted filesystems are ignored (see \fB\-\-all\fR below for more details).
+
+.SS Listing the mounts
+The listing mode is maintained for backward compatibility only.
+
+For more robust and customizable output use
+.BR findmnt (8),
+\fBespecially in your scripts\fP.  Note that control characters in the
+mountpoint name are replaced with '?'.
+
+The following command lists all mounted filesystems (of type
+.IR type ):
+.RS
+.sp
+.BR "mount " [ \-l "] [" "\-t \fItype\/\fP" ]
+.sp
+.RE
+The option \fB\-l\fR adds labels to this listing.  See below.
+
+.SS Indicating the device and filesystem
+Most devices are indicated by a filename (of a block special device), like
+.IR /dev/sda1 ,
+but there are other possibilities.  For example, in the case of an NFS mount,
+.I device
+may look like
+.IR knuth.cwi.nl:/dir .
+It is also possible to indicate a block special device using its filesystem label
+or UUID (see the \fB\-L\fR and \fB\-U\fR options below), or its partition label
+or UUID.  Partition identifiers are supported for example for GUID Partition
+Tables (GPT).
+
+The device name of disk partitions are unstable; hardware reconfiguration,
+adding or removing a device can cause change in names. This is reason why it's
+strongly recommended to use filesystem or partition identificators like UUID or
+LABEL.
+
+The command \fBlsblk --fs\fR provides overview of filesystems, LABELs and UUIDs
+on available block devices.  The command \fBblkid -p <device>\fR provides details about
+a filesystem on the specified device.
+
+Don't forget that there is no guarantee that UUIDs and labels are really
+unique, especially if you move, share or copy the device.  Use
+.B "lsblk \-o +UUID,PARTUUID"
+to verify that the UUIDs are really unique in your system.
+
+The recommended setup is to use tags (e.g.\& \fBUUID=\fIuuid\fR) rather than
+.I /dev/disk/by-{label,uuid,partuuid,partlabel}
+udev symlinks in the
+.I /etc/fstab
+file.  Tags are
+more readable, robust and portable.  The
+.BR mount (8)
+command internally uses udev
+symlinks, so the use of symlinks in /etc/fstab has no advantage over tags.
+For more details see
+.BR libblkid (3).
+
+Note that
+.BR mount (8)
+uses UUIDs as strings.  The UUIDs from the command line or from
+.BR fstab (5)
+are not converted to internal binary representation.  The string representation
+of the UUID should be based on lower case characters.
+
+The
+.I proc
+filesystem is not associated with a special device, and when
+mounting it, an arbitrary keyword, such as
+.I proc
+can be used instead of a device specification.
+(The customary choice
+.I none
+is less fortunate: the error message `none already mounted' from
+.B mount
+can be confusing.)
+
+.SS The files /etc/fstab, /etc/mtab and /proc/mounts
+The file
+.I /etc/fstab
+(see
+.BR fstab (5)),
+may contain lines describing what devices are usually
+mounted where, using which options.  The default location of the
+.BR fstab (5)
+file can be overridden with the
+.BI \-\-fstab " path"
+command-line option (see below for more details).
+.LP
+The command
+.RS
+.sp
+.B mount \-a
+.RB [ \-t
+.IR type ]
+.RB [ \-O
+.IR optlist ]
+.sp
+.RE
+(usually given in a bootscript) causes all filesystems mentioned in
+.I fstab
+(of the proper type and/or having or not having the proper options)
+to be mounted as indicated, except for those whose line contains the
+.B noauto
+keyword.  Adding the
+.B \-F
+option will make \fBmount\fR fork, so that the
+filesystems are mounted simultaneously.
+.LP
+When mounting a filesystem mentioned in
+.I fstab
+or
+.IR mtab ,
+it suffices to specify on the command line only the device, or only the mount point.
+.sp
+The programs
+.B mount
+and
+.B umount
+traditionally maintained a list of currently mounted filesystems in the file
+.IR /etc/mtab .
+The support for regular classic
+.I /etc/mtab
+is completely disabled in compile time by default, because on current Linux
+systems it is better to make it a symlink to
+.I /proc/mounts
+instead. The regular mtab file maintained in userspace cannot reliably
+work with namespaces, containers and other advanced Linux features.
+If the regular mtab support is enabled than it's possible to
+use the file as well as the symlink.
+.sp
+If no arguments are given to
+.BR mount ,
+the list of mounted filesystems is printed.
+.sp
+If you want to override mount options from
+.I /etc/fstab
+you have to use the \fB\-o\fR option:
+.RS
+.sp
+.BI mount " device" \fR| "dir " \-o " options"
+.sp
+.RE
+and then the mount options from the command line will be appended to
+the list of options from
+.IR /etc/fstab .
+This default behaviour is possible to change by command line
+option \fB\-\-options\-mode\fR.
+The usual behavior is that the last option wins if there are conflicting
+ones.
+.sp
+The
+.B mount
+program does not read the
+.I /etc/fstab
+file if both
+.I device
+(or LABEL, UUID, PARTUUID or PARTLABEL) and
+.I dir
+are specified.  For example, to mount device
+.BR foo " at " /dir :
+.RS
+.sp
+.B "mount /dev/foo /dir"
+.sp
+.RE
+This default behaviour is possible to change by command line option
+\fB\-\-options\-source\-force\fR to always read configuration from fstab. For
+non-root users
+.B mount
+always read fstab configuration.
+
+.SS Non-superuser mounts
+Normally, only the superuser can mount filesystems.
+However, when
+.I fstab
+contains the
+.B user
+option on a line, anybody can mount the corresponding filesystem.
+.LP
+Thus, given a line
+.RS
+.sp
+.B "/dev/cdrom  /cd  iso9660  ro,user,noauto,unhide"
+.sp
+.RE
+any user can mount the iso9660 filesystem found on an inserted CDROM
+using the command:
+.RS
+.B "mount /cd"
+.sp
+.RE
+Note that \fBmount\fR is very strict about non-root users and all paths
+specified on command line are verified before fstab is parsed or a helper
+program is executed. It's strongly recommended to use a valid mountpoint to
+specify filesystem, otherwise \fBmount\fR may fail. For example it's bad idea
+to use NFS or CIFS source on command line.
+.PP
+For more details, see
+.BR fstab (5).
+Only the user that mounted a filesystem can unmount it again.
+If any user should be able to unmount it, then use
+.B users
+instead of
+.B user
+in the
+.I fstab
+line.
+The
+.B owner
+option is similar to the
+.B user
+option, with the restriction that the user must be the owner
+of the special file.  This may be useful e.g.\& for
+.I /dev/fd
+if a login script makes the console user owner of this device.
+The
+.B group
+option is similar, with the restriction that the user must be
+member of the group of the special file.
+
+.SS Bind mount operation
+Remount part of the file hierarchy somewhere else.  The call is:
+
+.RS
+.br
+.B mount \-\-bind
+.I olddir newdir
+.RE
+
+or by using this fstab entry:
+
+.RS
+.br
+.BI / olddir
+.BI / newdir
+.B none bind
+.RE
+
+After this call the same contents are accessible in two places.
+
+It is important to understand that "bind" does not to create any second-class
+or special node in the kernel VFS. The "bind" is just another operation to
+attach a filesystem. There is nowhere stored information that the filesystem
+has been attached by "bind" operation. The \fIolddir\fR and \fInewdir\fR are
+independent and the \fIolddir\fR maybe be umounted.
+
+One can also remount a single file (on a single file).  It's also
+possible to use the bind mount to create a mountpoint from a regular
+directory, for example:
+
+.RS
+.br
+.B mount \-\-bind foo foo
+.RE
+
+The bind mount call attaches only (part of) a single filesystem, not possible
+submounts.  The entire file hierarchy including submounts is attached
+a second place by using:
+
+.RS
+.br
+.B mount \-\-rbind
+.I olddir newdir
+.RE
+
+Note that the filesystem mount options maintained by kernel will remain the same as those
+on the original mount point.  The userspace mount options (e.g. _netdev) will not be copied
+by
+.BR mount (8)
+and it's necessary explicitly specify the options on mount command line.
+
+.BR mount (8)
+since v2.27 allows to change the mount options by passing the
+relevant options along with
+.BR \-\-bind .
+For example:
+
+.RS
+.br
+.B mount -o bind,ro foo foo
+.RE
+
+This feature is not supported by the Linux kernel; it is implemented in userspace
+by an additional \fBmount\fR(2) remounting system call.
+This solution is not atomic.
+
+The alternative (classic) way to create a read-only bind mount is to use the remount
+operation, for example:
+
+.RS
+.br
+.B mount \-\-bind
+.I olddir newdir
+.br
+.B mount \-o remount,bind,ro
+.I olddir newdir
+.RE
+
+Note that a read-only bind will create a read-only mountpoint (VFS entry),
+but the original filesystem superblock will still be writable, meaning that the
+.I olddir
+will be writable, but the
+.I newdir
+will be read-only.
+
+It's also possible to change nosuid, nodev, noexec, noatime, nodiratime and
+relatime VFS entry flags by "remount,bind" operation.  The another (for example
+filesystem specific flags) are silently ignored.  It's impossible to change mount
+options recursively (for example with \fB-o rbind,ro\fR).
+
+.BR mount (8)
+since v2.31 ignores the \fBbind\fR flag from
+.I /etc/fstab
+on
+.B remount operation
+(if "-o remount" specified on command line). This is necessary to fully control
+mount options on remount by command line. In the previous versions the bind
+flag has been always applied and it was impossible to re-define mount options
+without interaction with the bind semantic. This
+.BR mount (8)
+behavior does not affect situations when "remount,bind" is specified in the
+.I /etc/fstab
+file.
+.RE
+
+.SS The move operation
+Move a
+.B mounted tree
+to another place (atomically).  The call is:
+
+.RS
+.br
+.B mount \-\-move
+.I olddir newdir
+.RE
+
+This will cause the contents which previously appeared under
+.I olddir
+to now be accessible under
+.IR newdir .
+The physical location of the files is not changed.
+Note that
+.I olddir
+has to be a mountpoint.
+
+Note also that moving a mount residing under a shared mount is invalid and
+unsupported.  Use
+.B findmnt \-o TARGET,PROPAGATION
+to see the current propagation flags.
+
+.SS Shared subtree operations
+Since Linux 2.6.15 it is possible to mark a mount and its submounts as shared,
+private, slave or unbindable.  A shared mount provides the ability to create mirrors
+of that mount such that mounts and unmounts within any of the mirrors propagate
+to the other mirror.  A slave mount receives propagation from its master, but
+not vice versa.  A private mount carries no propagation abilities.  An
+unbindable mount is a private mount which cannot be cloned through a bind
+operation.  The detailed semantics are documented in
+.I Documentation/filesystems/sharedsubtree.txt
+file in the kernel source tree.
+
+Supported operations are:
+
+.RS
+.nf
+.BI "mount \-\-make\-shared " mountpoint
+.BI "mount \-\-make\-slave " mountpoint
+.BI "mount \-\-make\-private " mountpoint
+.BI "mount \-\-make\-unbindable " mountpoint
+.fi
+.RE
+
+The following commands allow one to recursively change the type of all the
+mounts under a given mountpoint.
+
+.RS
+.nf
+.BI "mount \-\-make\-rshared " mountpoint
+.BI "mount \-\-make\-rslave " mountpoint
+.BI "mount \-\-make\-rprivate " mountpoint
+.BI "mount \-\-make\-runbindable " mountpoint
+.fi
+.RE
+
+.BR mount (8)
+.B does not read
+.BR fstab (5)
+when a \fB\-\-make-\fR* operation is requested.  All necessary information has to be
+specified on the command line.
+
+Note that the Linux kernel does not allow to change multiple propagation flags
+with a single
+.BR mount (2)
+system call, and the flags cannot be mixed with other mount options and operations.
+
+Since util-linux 2.23 the \fBmount\fR command allows to do more propagation
+(topology) changes by one mount(8) call and do it also together with other
+mount operations.  This feature is EXPERIMENTAL.  The propagation flags are applied
+by additional \fBmount\fR(2) system calls when the preceding mount operations
+were successful.  Note that this use case is not atomic.  It is possible to
+specify the propagation flags in
+.BR fstab (5)
+as mount options
+.RB ( private ,
+.BR slave ,
+.BR shared ,
+.BR unbindable ,
+.BR rprivate ,
+.BR rslave ,
+.BR rshared ,
+.BR runbindable ).
+
+For example:
+
+.RS
+.nf
+.B mount \-\-make\-private \-\-make\-unbindable /dev/sda1 /foo
+.fi
+.RE
+
+is the same as:
+
+.RS
+.nf
+.B mount /dev/sda1 /foox
+.B mount \-\-make\-private /foo
+.B mount \-\-make\-unbindable /foo
+.fi
+.RE
+
+.SH COMMAND-LINE OPTIONS
+The full set of mount options used by an invocation of
+.B mount
+is determined by first extracting the
+mount options for the filesystem from the
+.I fstab
+table, then applying any options specified by the
+.B \-o
+argument, and finally applying a
+.BR \-r " or " \-w
+option, when present.
+
+The command \fBmount\fR does not pass all command-line options to the
+\fB/sbin/mount.\fIsuffix\fR mount helpers.  The interface between \fBmount\fR
+and the mount helpers is described below in the section \fBEXTERNAL HELPERS\fR.
+.sp
+Command-line options available for the
+.B mount
+command are:
+.TP
+.BR \-a , " \-\-all"
+Mount all filesystems (of the given types) mentioned in
+.I fstab
+(except for those whose line contains the
+.B noauto
+keyword).  The filesystems are mounted following their order in
+.IR fstab .
+The mount command compares filesystem source, target (and fs root for bind
+mount or btrfs) to detect already mounted filesystems. The kernel table with
+already mounted filesystems is cached during \fBmount \-\-all\fR. It means
+that all duplicated fstab entries will be mounted.
+.sp
+Note that it is a bad practice to use \fBmount \-a\fR for
+.I fstab
+checking. The recommended solution is \fBfindmnt \-\-verify\fR.
+.TP
+.BR \-B , " \-\-bind"
+Remount a subtree somewhere else (so that its contents are available
+in both places).  See above, under \fBBind mounts\fR.
+.TP
+.BR \-c , " \-\-no\-canonicalize"
+Don't canonicalize paths.  The mount command canonicalizes all paths
+(from command line or fstab) by default.  This option can be used
+together with the
+.B \-f
+flag for already canonicalized absolute paths.  The option is designed for mount
+helpers which call \fBmount -i\fR.  It is strongly recommended to not use this
+command-line option for normal mount operations.
+.sp
+Note that \fBmount\fR(8) does not pass this option to the
+\fB/sbin/mount.\fItype\fR helpers.
+.TP
+.BR \-F , " \-\-fork"
+(Used in conjunction with
+.BR \-a .)
+Fork off a new incarnation of \fBmount\fR for each device.
+This will do the mounts on different devices or different NFS servers
+in parallel.
+This has the advantage that it is faster; also NFS timeouts go in
+parallel.  A disadvantage is that the mounts are done in undefined order.
+Thus, you cannot use this option if you want to mount both
+.I /usr
+and
+.IR /usr/spool .
+.IP "\fB\-f, \-\-fake\fP"
+Causes everything to be done except for the actual system call; if it's not
+obvious, this ``fakes'' mounting the filesystem.  This option is useful in
+conjunction with the
+.B \-v
+flag to determine what the
+.B mount
+command is trying to do.  It can also be used to add entries for devices
+that were mounted earlier with the \fB\-n\fR option.  The \fB\-f\fR option
+checks for an existing record in /etc/mtab and fails when the record already
+exists (with a regular non-fake mount, this check is done by the kernel).
+.IP "\fB\-i, \-\-internal\-only\fP"
+Don't call the \fB/sbin/mount.\fIfilesystem\fR helper even if it exists.
+.TP
+.BR \-L , " \-\-label " \fIlabel
+Mount the partition that has the specified
+.IR label .
+.TP
+.BR \-l , " \-\-show\-labels"
+Add the labels in the mount output.  \fBmount\fR must have
+permission to read the disk device (e.g.\& be set-user-ID root) for this to work.
+One can set such a label for ext2, ext3 or ext4 using the
+.BR e2label (8)
+utility, or for XFS using
+.BR xfs_admin (8),
+or for reiserfs using
+.BR reiserfstune (8).
+.TP
+.BR \-M , " \-\-move"
+Move a subtree to some other place.  See above, the subsection
+\fBThe move operation\fR.
+.TP
+.BR \-n , " \-\-no\-mtab"
+Mount without writing in
+.IR /etc/mtab .
+This is necessary for example when
+.I /etc
+is on a read-only filesystem.
+.TP
+.BR \-N , " \-\-namespace " \fIns
+Perform mount in namespace specified by \fIns\fR.
+\fIns\fR is either PID of process running in that namespace
+or special file representing that namespace.
+.sp
+.BR mount (8)
+switches to the namespace when it reads /etc/fstab, writes /etc/mtab (or writes to /run/mount) and calls
+.BR mount (2)
+system call, otherwise it runs in the original namespace. It means that the target namespace does not have
+to contain any libraries or another requirements necessary to execute
+.BR mount (2)
+command.
+.sp
+See \fBnamespaces\fR(7) for more information.
+.TP
+.BR \-O , " \-\-test\-opts " \fIopts
+Limit the set of filesystems to which the
+.B \-a
+option applies.  In this regard it is like the
+.B \-t
+option except that
+.B \-O
+is useless without
+.BR \-a .
+For example, the command:
+.RS
+.RS
+.sp
+.B "mount \-a \-O no_netdev"
+.sp
+.RE
+mounts all filesystems except those which have the option
+.I _netdev
+specified in the options field in the
+.I /etc/fstab
+file.
+
+It is different from
+.B \-t
+in that each option is matched exactly; a leading
+.B no
+at the beginning of one option does not negate the rest.
+
+The
+.B \-t
+and
+.B \-O
+options are cumulative in effect; that is, the command
+.RS
+.sp
+.B "mount \-a \-t ext2 \-O _netdev"
+.sp
+.RE
+mounts all ext2 filesystems with the _netdev option, not all filesystems
+that are either ext2 or have the _netdev option specified.
+.RE
+.TP
+.BR \-o , " \-\-options " \fIopts
+Use the specified mount options.  The \fIopts\fR argument is
+a comma-separated list.  For example:
+.RS
+.RS
+.sp
+.B "mount LABEL=mydisk \-o noatime,nodev,nosuid"
+.sp
+.RE
+
+For more details, see the
+.B FILESYSTEM-INDEPENDENT MOUNT OPTIONS
+and
+.B FILESYSTEM-SPECIFIC MOUNT OPTIONS
+sections.
+.RE
+
+.TP
+.BR "\-\-options\-mode " \fImode
+Controls how to combine options from fstab/mtab with options from command line.
+\fImode\fR can be one of
+.BR ignore ", " append ", " prepend " or " replace .
+For example \fBappend\fR means that options from fstab are appended to options from command line.
+Default value is \fBprepend\fR -- it means command line options are evaluated after fstab options.
+Note that the last option wins if there are conflicting ones.
+
+.TP
+.BR "\-\-options\-source " \fIsource
+Source of default options.
+\fIsource\fR is comma separated list of
+.BR fstab ", " mtab " and " disable .
+\fBdisable\fR disables
+.BR fstab " and " mtab
+and disables \fB\-\-options\-source\-force\fR.
+Default value is \fBfstab,mtab\fR.
+
+.TP
+.B \-\-options\-source\-force
+Use options from fstab/mtab even if both \fIdevice\fR and \fIdir\fR are specified.
+
+.TP
+.BR \-R , " \-\-rbind"
+Remount a subtree and all possible submounts somewhere else (so that its
+contents are available in both places).  See above, the subsection
+\fBBind mounts\fR.
+.TP
+.BR \-r , " \-\-read\-only"
+Mount the filesystem read-only.  A synonym is
+.BR "\-o ro" .
+.sp
+Note that, depending on the filesystem type, state and kernel behavior, the
+system may still write to the device.  For example, ext3 and ext4 will replay the
+journal if the filesystem is dirty.  To prevent this kind of write access, you
+may want to mount an ext3 or ext4 filesystem with the \fBro,noload\fR mount
+options or set the block device itself to read-only mode, see the
+.BR blockdev (8)
+command.
+.TP
+.B \-s
+Tolerate sloppy mount options rather than failing.  This will ignore mount
+options not supported by a filesystem type.  Not all filesystems support this
+option.  Currently it's supported by the \fBmount.nfs\fR mount helper only.
+.TP
+.BI \-\-source " device"
+If only one argument for the mount command is given then the argument might be
+interpreted as target (mountpoint) or source (device).  This option allows to
+explicitly define that the argument is the mount source.
+.TP
+.BI \-\-target " directory"
+If only one argument for the mount command is given then the argument might be
+interpreted as target (mountpoint) or source (device).  This option allows to
+explicitly define that the argument is the mount target.
+.TP
+.BR \-T , " \-\-fstab " \fIpath
+Specifies an alternative fstab file.  If \fIpath\fP is a directory then the files
+in the directory are sorted by
+.BR strverscmp (3);
+files that start with "."\& or without an \&.fstab extension are ignored.  The option
+can be specified more than once.  This option is mostly designed for initramfs
+or chroot scripts where additional configuration is specified beyond standard
+system configuration.
+.sp
+Note that \fBmount\fR(8) does not pass the option \fB\-\-fstab\fP to the
+\fB/sbin/mount.\fItype\fR helpers, meaning that the alternative fstab files will be
+invisible for the helpers.  This is no problem for normal mounts, but user
+(non-root) mounts always require fstab to verify the user's rights.
+.TP
+.BR \-t , " \-\-types " \fIfstype
+The argument following the
+.B \-t
+is used to indicate the filesystem type.  The filesystem types which are
+currently supported depend on the running kernel.  See
+.I /proc/filesystems
+and
+.I /lib/modules/$(uname -r)/kernel/fs
+for a complete list of the filesystems.  The most common are ext2, ext3, ext4,
+xfs, btrfs, vfat, sysfs, proc, nfs and cifs.
+.sp
+The programs
+.B mount
+and
+.B umount
+support filesystem subtypes.  The subtype is defined by a '.subtype' suffix.  For
+example  'fuse.sshfs'.  It's recommended to use subtype notation rather than add
+any prefix to the mount source (for example 'sshfs#example.com' is
+deprecated).
+
+If no
+.B \-t
+option is given, or if the
+.B auto
+type is specified, mount will try to guess the desired type.
+Mount uses the blkid library for guessing the filesystem
+type; if that does not turn up anything that looks familiar,
+mount will try to read the file
+.IR /etc/filesystems ,
+or, if that does not exist,
+.IR /proc/filesystems .
+All of the filesystem types listed there will be tried,
+except for those that are labeled "nodev" (e.g.\&
+.IR devpts ,
+.I proc
+and
+.IR nfs ).
+If
+.I /etc/filesystems
+ends in a line with a single *, mount will read
+.I /proc/filesystems
+afterwards.  While trying, all filesystem types will be
+mounted with the mount option \fBsilent\fR.
+.sp
+The
+.B auto
+type may be useful for user-mounted floppies.
+Creating a file
+.I /etc/filesystems
+can be useful to change the probe order (e.g., to try vfat before msdos
+or ext3 before ext2) or if you use a kernel module autoloader.
+.sp
+More than one type may be specified in a comma-separated
+list, for option
+.B \-t
+as well as in an
+.I /etc/fstab
+entry.  The list of filesystem types for option
+.B \-t
+can be prefixed with
+.B no
+to specify the filesystem types on which no action should be taken.
+The prefix
+.B no
+has no effect when specified in an
+.I /etc/fstab
+entry.
+.sp
+The prefix
+.B no
+can be meaningful with the
+.B \-a
+option.  For example, the command
+.RS
+.RS
+.sp
+.B "mount \-a \-t nomsdos,smbfs"
+.sp
+.RE
+mounts all filesystems except those of type
+.I msdos
+and
+.IR smbfs .
+.sp
+For most types all the
+.B mount
+program has to do is issue a simple
+.BR mount (2)
+system call, and no detailed knowledge of the filesystem type is required.
+For a few types however (like nfs, nfs4, cifs, smbfs, ncpfs) an ad hoc code is
+necessary.  The nfs, nfs4, cifs, smbfs, and ncpfs filesystems
+have a separate mount program.  In order to make it possible to
+treat all types in a uniform way, \fBmount\fR will execute the program
+.BI /sbin/mount. type
+(if that exists) when called with type
+.IR type .
+Since different versions of the
+.B smbmount
+program have different calling conventions,
+.B /sbin/mount.smbfs
+may have to be a shell script that sets up the desired call.
+.RE
+.TP
+.BR \-U , " \-\-uuid " \fIuuid
+Mount the partition that has the specified
+.IR uuid .
+.TP
+.BR \-v , " \-\-verbose"
+Verbose mode.
+.TP
+.BR \-w , " \-\-rw" , " \-\-read\-write"
+Mount the filesystem read/write. The read-write is kernel default.  A synonym is
+.BR "\-o rw" .
+
+Note that specify \fB\-w\fR on command line forces \fBmount\fR command
+to never try read-only mount on write-protected devices. The default is
+try read-only if the previous mount syscall with read-write flags failed.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+
+.SH FILESYSTEM-INDEPENDENT MOUNT OPTIONS
+Some of these options are only useful when they appear in the
+.I /etc/fstab
+file.
+
+Some of these options could be enabled or disabled by default
+in the system kernel.  To check the current setting see the options
+in /proc/mounts.  Note that filesystems also have per-filesystem
+specific default mount options (see for example \fBtune2fs \-l\fP
+output for extN filesystems).
+
+The following options apply to any filesystem that is being
+mounted (but not every filesystem actually honors them \(en e.g.\&, the
+.B sync
+option today has an effect only for ext2, ext3, ext4, fat, vfat and ufs):
+
+.TP
+.B async
+All I/O to the filesystem should be done asynchronously.  (See also the
+.B sync
+option.)
+.TP
+.B atime
+Do not use the \fBnoatime\fR feature, so the inode access time is controlled
+by kernel defaults.  See also the descriptions of the \fB\%relatime\fR and
+.B strictatime
+mount options.
+.TP
+.B noatime
+Do not update inode access times on this filesystem (e.g.\& for faster
+access on the news spool to speed up news servers).  This works for all
+inode types (directories too), so it implies \fB\%nodiratime\fR.
+.TP
+.B auto
+Can be mounted with the
+.B \-a
+option.
+.TP
+.B noauto
+Can only be mounted explicitly (i.e., the
+.B \-a
+option will not cause the filesystem to be mounted).
+.TP
+.na
+.BR context=\fIcontext ", " fscontext=\fIcontext ", " defcontext=\fIcontext ", and " \%rootcontext=\fIcontext
+.ad
+The
+.B context=
+option is useful when mounting filesystems that do not support
+extended attributes, such as a floppy or hard disk formatted with VFAT, or
+systems that are not normally running under SELinux, such as an ext3 or ext4 formatted
+
+disk from a non-SELinux workstation.  You can also use
+.B context=
+on filesystems you do not trust, such as a floppy.  It also helps in compatibility with
+xattr-supporting filesystems on earlier 2.4.<x> kernel versions.  Even where
+xattrs are supported, you can save time not having to label every file by
+assigning the entire disk one security context.
+
+A commonly used option for removable media is
+.BR \%context="system_u:object_r:removable_t" .
+
+Two other options are
+.B fscontext=
+and
+.BR defcontext= ,
+both of which are mutually exclusive of the context option.  This means you
+can use fscontext and defcontext with each other, but neither can be used with
+context.
+
+The
+.B fscontext=
+option works for all filesystems, regardless of their xattr
+support.  The fscontext option sets the overarching filesystem label to a
+specific security context.  This filesystem label is separate from the
+individual labels on the files.  It represents the entire filesystem for
+certain kinds of permission checks, such as during mount or file creation.
+Individual file labels are still obtained from the xattrs on the files
+themselves.  The context option actually sets the aggregate context that
+fscontext provides, in addition to supplying the same label for individual
+files.
+
+You can set the default security context for unlabeled files using
+.B defcontext=
+option.  This overrides the value set for unlabeled files in the policy and requires a
+filesystem that supports xattr labeling.
+
+The
+.B rootcontext=
+option allows you to explicitly label the root inode of a FS being mounted
+before that FS or inode becomes visible to userspace.  This was found to be
+useful for things like stateless linux.
+
+Note that the kernel rejects any remount request that includes the context
+option, \fBeven\fP when unchanged from the current context.
+
+.BR "Warning: the \fIcontext\fP value might contain commas" ,
+in which case the value has to be properly quoted, otherwise
+.BR mount (8)
+will interpret the comma as a separator between mount options.  Don't forget that
+the shell strips off quotes and thus
+.BR "double quoting is required" .
+For example:
+.RS
+.RS
+.sp
+.nf
+.B mount \-t tmpfs none /mnt \-o \e
+.B \ \ 'context="system_u:object_r:tmp_t:s0:c127,c456",noexec'
+.fi
+.sp
+.RE
+For more details, see
+.BR selinux (8).
+.RE
+
+.TP
+.B defaults
+Use the default options:
+.BR rw ", " suid ", " dev ", " exec ", " auto ", " nouser ", and " async .
+
+Note that the real set of all default mount options depends on kernel
+and filesystem type.  See the beginning of this section for more details.
+.TP
+.B dev
+Interpret character or block special devices on the filesystem.
+.TP
+.B nodev
+Do not interpret character or block special devices on the file
+system.
+.TP
+.B diratime
+Update directory inode access times on this filesystem.  This is the default.
+(This option is ignored when \fBnoatime\fR is set.)
+.TP
+.B nodiratime
+Do not update directory inode access times on this filesystem.
+(This option is implied when \fBnoatime\fR is set.)
+.TP
+.B dirsync
+All directory updates within the filesystem should be done synchronously.
+This affects the following system calls: creat, link, unlink, symlink,
+mkdir, rmdir, mknod and rename.
+.TP
+.B exec
+Permit execution of binaries.
+.TP
+.B noexec
+Do not permit direct execution of any binaries on the mounted filesystem.
+.TP
+.B group
+Allow an ordinary user to mount the filesystem if one
+of that user's groups matches the group of the device.
+This option implies the options
+.BR nosuid " and " nodev
+(unless overridden by subsequent options, as in the option line
+.BR group,dev,suid ).
+.TP
+.B iversion
+Every time the inode is modified, the i_version field will be incremented.
+.TP
+.B noiversion
+Do not increment the i_version inode field.
+.TP
+.B mand
+Allow mandatory locks on this filesystem.  See
+.BR fcntl (2).
+.TP
+.B nomand
+Do not allow mandatory locks on this filesystem.
+.TP
+.B _netdev
+The filesystem resides on a device that requires network access
+(used to prevent the system from attempting to mount these filesystems
+until the network has been enabled on the system).
+.TP
+.B nofail
+Do not report errors for this device if it does not exist.
+.TP
+.B relatime
+Update inode access times relative to modify or change time.  Access
+time is only updated if the previous access time was earlier than the
+current modify or change time.  (Similar to \fB\%noatime\fR, but it doesn't
+break \fBmutt\fR or other applications that need to know if a file has been
+read since the last time it was modified.)
+
+Since Linux 2.6.30, the kernel defaults to the behavior provided by this
+option (unless
+.B \%noatime
+was specified), and the
+.B \%strictatime
+option is required to obtain traditional semantics.  In addition, since Linux
+2.6.30, the file's last access time is always updated if it is more than 1
+day old.
+.TP
+.B norelatime
+Do not use the
+.B relatime
+feature.  See also the
+.B strictatime
+mount option.
+.TP
+.B strictatime
+Allows to explicitly request full atime updates.  This makes it
+possible for the kernel to default to
+.B \%relatime
+or
+.B \%noatime
+but still allow userspace to override it.  For more details about the default
+system mount options see /proc/mounts.
+.TP
+.B nostrictatime
+Use the kernel's default behavior for inode access time updates.
+.TP
+.B lazytime
+Only update times (atime, mtime, ctime) on the in-memory version of the file inode.
+
+This mount option significantly reduces writes to the inode table for
+workloads that perform frequent random writes to preallocated files.
+
+The on-disk timestamps are updated only when:
+.sp
+.RS
+- the inode needs to be updated for some change unrelated to file timestamps
+.sp
+- the application employs
+.BR fsync (2),
+.BR syncfs (2),
+or
+.BR sync (2)
+.sp
+- an undeleted inode is evicted from memory
+.sp
+- more than 24 hours have passed since the i-node was written to disk.
+.RE
+.sp
+.TP
+.B nolazytime
+Do not use the lazytime feature.
+.TP
+.B suid
+Allow set-user-ID or set-group-ID bits to take
+effect.
+.TP
+.B nosuid
+Do not allow set-user-ID or set-group-ID bits to take
+effect.
+.TP
+.B silent
+Turn on the silent flag.
+.TP
+.B loud
+Turn off the silent flag.
+.TP
+.B owner
+Allow an ordinary user to mount the filesystem if that
+user is the owner of the device.
+This option implies the options
+.BR nosuid " and " nodev
+(unless overridden by subsequent options, as in the option line
+.BR owner,dev,suid ).
+.TP
+.B remount
+Attempt to remount an already-mounted filesystem.  This is commonly
+used to change the mount flags for a filesystem, especially to make a
+readonly filesystem writable.  It does not change device or mount point.
+
+The remount operation together with the
+.B bind
+flag has special semantic. See above, the subsection \fBBind mounts\fR.
+
+The remount functionality follows the standard way the mount command works
+with options from fstab.  This means that \fBmount\fR does not
+read fstab (or mtab) only when both
+.I device
+and
+.I dir
+are specified.
+.sp
+.in +4
+.B "mount \-o remount,rw /dev/foo /dir"
+.in
+.sp
+After this call all old mount options are replaced and arbitrary stuff from
+fstab (or mtab) is ignored, except the loop= option which is internally
+generated and maintained by the mount command.
+.sp
+.in +4
+.B "mount \-o remount,rw  /dir"
+.in
+.sp
+After this call, mount reads fstab and merges these options with
+the options from the command line (\fB\-o\fR).
+If no mountpoint is found in fstab, then a remount with unspecified source is
+allowed.
+.TP
+.B ro
+Mount the filesystem read-only.
+.TP
+.B rw
+Mount the filesystem read-write.
+.TP
+.B sync
+All I/O to the filesystem should be done synchronously.  In the case of
+media with a limited number of write cycles
+(e.g.\& some flash drives), \fBsync\fR may cause life-cycle shortening.
+.TP
+.B user
+Allow an ordinary user to mount the filesystem.
+The name of the mounting user is written to the mtab file (or to the private
+libmount file in /run/mount on systems without a regular mtab) so that this
+same user can unmount the filesystem again.
+This option implies the options
+.BR noexec ", " nosuid ", and " nodev
+(unless overridden by subsequent options, as in the option line
+.BR user,exec,dev,suid ).
+.TP
+.B nouser
+Forbid an ordinary user to mount the filesystem.
+This is the default; it does not imply any other options.
+.TP
+.B users
+Allow any user to mount and to unmount the filesystem, even
+when some other ordinary user mounted it.
+This option implies the options
+.BR noexec ", " nosuid ", and " nodev
+(unless overridden by subsequent options, as in the option line
+.BR users,exec,dev,suid ).
+.TP
+.B X-*
+All options prefixed with "X-" are interpreted as comments or as userspace
+application-specific options.  These options are not stored in the user space (e.g. mtab file),
+nor sent to the mount.\fItype\fR helpers nor to the
+.BR mount (2)
+system call.  The suggested format is \fBX-\fIappname\fR.\fIoption\fR.
+.TP
+.B x-*
+The same as \fBX-*\fR options, but stored permanently in the user space. It
+means the options are also available for umount or another operations.  Note
+that maintain mount options in user space is tricky, because it's necessary use
+libmount based tools and there is no guarantee that the options will be always
+available (for example after a move mount operation or in unshared namespace).
+
+Note that before util-linux v2.30 the x-* options have not been maintained by
+libmount and stored in user space (functionality was the same as have X-* now),
+but due to growing number of use-cases (in initrd, systemd etc.) the
+functionality have been extended to keep existing fstab configurations usable
+without a change.
+.TP
+.BR X-mount.mkdir [ = \fImode\fR ]
+Allow to make a target directory (mountpoint).  The optional argument
+.I mode
+specifies the filesystem access mode used for
+.BR mkdir (2)
+in octal notation.  The default mode is 0755.  This functionality is supported
+only for root users.  The option is also supported as x-mount.mkdir, this notation
+is deprecated for mount.mkdir since v2.30.
+
+.SH "FILESYSTEM-SPECIFIC MOUNT OPTIONS"
+You should consult the respective man page for the filesystem first.
+If you want to know what options the ext4 filesystem supports, then check the
+.BR ext4 (5)
+man page.
+If that doesn't exist, you can also check the corresponding mount page like
+.BR mount.cifs (8).
+Note that you might have to install the respective userland tools.
+.sp
+The following options apply only to certain filesystems.
+We sort them by filesystem.  They all follow the
+.B \-o
+flag.
+.sp
+What options are supported depends a bit on the running kernel.
+More info may be found in the kernel source subdirectory
+.IR Documentation/filesystems .
+
+.SS "Mount options for adfs"
+.TP
+\fBuid=\fP\,\fIvalue\fP and \fBgid=\fP\,\fIvalue\fP
+Set the owner and group of the files in the filesystem (default: uid=gid=0).
+.TP
+\fBownmask=\fP\,\fIvalue\fP and \fBothmask=\fP\,\fIvalue\fP
+Set the permission mask for ADFS 'owner' permissions and 'other' permissions,
+respectively (default: 0700 and 0077, respectively).
+See also
+.IR /usr/src/linux/Documentation/filesystems/adfs.txt .
+
+.SS "Mount options for affs"
+.TP
+\fBuid=\fP\,\fIvalue\fP and \fBgid=\fP\,\fIvalue\fP
+Set the owner and group of the root of the filesystem (default: uid=gid=0,
+but with option
+.B uid
+or
+.B gid
+without specified value, the UID and GID of the current process are taken).
+.TP
+\fBsetuid=\fP\,\fIvalue\fP and \fBsetgid=\fP\,\fIvalue\fP
+Set the owner and group of all files.
+.TP
+.BI mode= value
+Set the mode of all files to
+.IR value " & 0777"
+disregarding the original permissions.
+Add search permission to directories that have read permission.
+The value is given in octal.
+.TP
+.B protect
+Do not allow any changes to the protection bits on the filesystem.
+.TP
+.B usemp
+Set UID and GID of the root of the filesystem to the UID and GID
+of the mount point upon the first sync or umount, and then
+clear this option.  Strange...
+.TP
+.B verbose
+Print an informational message for each successful mount.
+.TP
+.BI prefix= string
+Prefix used before volume name, when following a link.
+.TP
+.BI volume= string
+Prefix (of length at most 30) used before '/' when following a symbolic link.
+.TP
+.BI reserved= value
+(Default: 2.) Number of unused blocks at the start of the device.
+.TP
+.BI root= value
+Give explicitly the location of the root block.
+.TP
+.BI bs= value
+Give blocksize.  Allowed values are 512, 1024, 2048, 4096.
+.TP
+.BR grpquota | noquota | quota | usrquota
+These options are accepted but ignored.
+(However, quota utilities may react to such strings in
+.IR /etc/fstab .)
+
+.SS "Mount options for debugfs"
+The debugfs filesystem is a pseudo filesystem, traditionally mounted on
+.IR /sys/kernel/debug .
+.\" or just /debug
+.\" present since 2.6.11
+As of kernel version 3.4, debugfs has the following options:
+.TP
+.BI uid= n ", gid=" n
+Set the owner and group of the mountpoint.
+.TP
+.BI mode= value
+Sets the mode of the mountpoint.
+
+.SS "Mount options for devpts"
+The devpts filesystem is a pseudo filesystem, traditionally mounted on
+.IR /dev/pts .
+In order to acquire a pseudo terminal, a process opens
+.IR /dev/ptmx ;
+the number of the pseudo terminal is then made available to the process
+and the pseudo terminal slave can be accessed as
+.IR /dev/pts/ <number>.
+.TP
+\fBuid=\fP\,\fIvalue\fP and \fBgid=\fP\,\fIvalue\fP
+This sets the owner or the group of newly created PTYs to
+the specified values.  When nothing is specified, they will
+be set to the UID and GID of the creating process.
+For example, if there is a tty group with GID 5, then
+.B gid=5
+will cause newly created PTYs to belong to the tty group.
+.TP
+.BI mode= value
+Set the mode of newly created PTYs to the specified value.
+The default is 0600.
+A value of
+.B mode=620
+and
+.B gid=5
+makes "mesg y" the default on newly created PTYs.
+.TP
+\fBnewinstance
+Create a private instance of devpts filesystem, such that
+indices of ptys allocated in this new instance are
+independent of indices created in other instances of devpts.
+
+All mounts of devpts without this
+.B newinstance
+option share the same set of pty indices (i.e. legacy mode).
+Each mount of devpts with the
+.B newinstance
+option has a private set of pty indices.
+
+This option is mainly used to support containers in the
+linux kernel.  It is implemented in linux kernel versions
+starting with 2.6.29.  Further, this mount option is valid
+only if CONFIG_DEVPTS_MULTIPLE_INSTANCES is enabled in the
+kernel configuration.
+
+To use this option effectively,
+.I /dev/ptmx
+must be a symbolic link to
+.I pts/ptmx.
+See
+.I Documentation/filesystems/devpts.txt
+in the linux kernel source tree for details.
+.TP
+.BI ptmxmode= value
+
+Set the mode for the new
+.I ptmx
+device node in the devpts filesystem.
+
+With the support for multiple instances of devpts (see
+.B newinstance
+option above), each instance has a private
+.I ptmx
+node in the root of the devpts filesystem (typically
+.IR /dev/pts/ptmx ).
+
+For compatibility with older versions of the kernel, the
+default mode of the new
+.I ptmx
+node is 0000.
+.BI ptmxmode= value
+specifies a more useful mode for the
+.I ptmx
+node and is highly recommended when the
+.B newinstance
+option is specified.
+
+This option is only implemented in linux kernel versions
+starting with 2.6.29.  Further, this option is valid only if
+CONFIG_DEVPTS_MULTIPLE_INSTANCES is enabled in the kernel
+configuration.
+
+.SS "Mount options for fat"
+(Note:
+.I fat
+is not a separate filesystem, but a common part of the
+.IR msdos ,
+.I umsdos
+and
+.I vfat
+filesystems.)
+.TP
+.BR blocksize= { 512 | 1024 | 2048 }
+Set blocksize (default 512).  This option is obsolete.
+.TP
+\fBuid=\fP\,\fIvalue\fP and \fBgid=\fP\,\fIvalue\fP
+Set the owner and group of all files.
+(Default: the UID and GID of the current process.)
+.TP
+.BI umask= value
+Set the umask (the bitmask of the permissions that are
+.B not
+present).  The default is the umask of the current process.
+The value is given in octal.
+.TP
+.BI dmask= value
+Set the umask applied to directories only.
+The default is the umask of the current process.
+The value is given in octal.
+.TP
+.BI fmask= value
+Set the umask applied to regular files only.
+The default is the umask of the current process.
+The value is given in octal.
+.TP
+.BI allow_utime= value
+This option controls the permission check of mtime/atime.
+.RS
+.TP
+.B 20
+If current process is in group of file's group ID, you can change timestamp.
+.TP
+.B 2
+Other users can change timestamp.
+.PP
+The default is set from `dmask' option. (If the directory is writable,
+.BR utime (2)
+is also allowed.  I.e.\& \s+3~\s0dmask & 022)
+
+Normally
+.BR utime (2)
+checks current process is owner of the file, or it has
+CAP_FOWNER capability.  But FAT filesystem doesn't have UID/GID on disk, so
+normal check is too inflexible.  With this option you can relax it.
+.RE
+.TP
+.BI check= value
+Three different levels of pickiness can be chosen:
+.RS
+.TP
+.BR r [ elaxed ]
+Upper and lower case are accepted and equivalent, long name parts are
+truncated (e.g.\&
+.I verylongname.foobar
+becomes
+.IR verylong.foo ),
+leading and embedded spaces are accepted in each name part (name and extension).
+.TP
+.BR n [ ormal ]
+Like "relaxed", but many special characters (*, ?, <, spaces, etc.) are
+rejected.  This is the default.
+.TP
+.BR s [ trict ]
+Like "normal", but names that contain long parts or special characters
+that are sometimes used on Linux but are not accepted by MS-DOS
+(+, =, etc.) are rejected.
+.RE
+.TP
+.BI codepage= value
+Sets the codepage for converting to shortname characters on FAT
+and VFAT filesystems.  By default, codepage 437 is used.
+.TP
+.BI conv= mode
+This option is obsolete and may fail or being ignored.
+.TP
+.BI cvf_format= module
+Forces the driver to use the CVF (Compressed Volume File) module
+.RI cvf_ module
+instead of auto-detection.  If the kernel supports kmod, the
+cvf_format=xxx option also controls on-demand CVF module loading.
+This option is obsolete.
+.TP
+.BI cvf_option= option
+Option passed to the CVF module.  This option is obsolete.
+.TP
+.B debug
+Turn on the
+.I debug
+flag.  A version string and a list of filesystem parameters will be
+printed (these data are also printed if the parameters appear to be
+inconsistent).
+.TP
+.B discard
+If set, causes discard/TRIM commands to be issued to the block device
+when blocks are freed.  This is useful for SSD devices and
+sparse/thinly-provisioned LUNs.
+.TP
+.B dos1xfloppy
+If set, use a fallback default BIOS Parameter Block configuration, determined
+by backing device size.  These static parameters match defaults assumed by DOS
+1.x for 160 kiB, 180 kiB, 320 kiB, and 360 kiB floppies and floppy images.
+.TP
+.BR errors= { panic | continue | remount-ro }
+Specify FAT behavior on critical errors: panic, continue without doing
+anything, or remount the partition in read-only mode (default behavior).
+.TP
+.BR fat= { 12 | 16 | 32 }
+Specify a 12, 16 or 32 bit fat.  This overrides
+the automatic FAT type detection routine.  Use with caution!
+.TP
+.BI iocharset= value
+Character set to use for converting between 8 bit characters
+and 16 bit Unicode characters.  The default is iso8859-1.
+Long filenames are stored on disk in Unicode format.
+.TP
+.BR nfs= { stale_rw | nostale_ro }
+Enable this only if you want to export the FAT filesystem over NFS.
+
+.BR stale_rw :
+This option maintains an index (cache) of directory inodes which is used by the
+nfs-related code to improve look-ups.  Full file operations (read/write) over
+NFS are supported but with cache eviction at NFS server, this could result in
+spurious
+.B ESTALE
+errors.
+
+.BR nostale_ro :
+This option bases the inode number and file handle
+on the on-disk location of a file in the FAT directory entry.
+This ensures that
+.B ESTALE
+will not be returned after a file is
+evicted from the inode cache.  However, it means that operations
+such as rename, create and unlink could cause file handles that
+previously pointed at one file to point at a different file,
+potentially causing data corruption.  For this reason, this
+option also mounts the filesystem readonly.
+
+To maintain backward compatibility, '-o nfs' is also accepted,
+defaulting to
+.BR stale_rw .
+.TP
+.B tz=UTC
+This option disables the conversion of timestamps
+between local time (as used by Windows on FAT) and UTC
+(which Linux uses internally).  This is particularly
+useful when mounting devices (like digital cameras)
+that are set to UTC in order to avoid the pitfalls of
+local time.
+.TP
+.BI time_offset= minutes
+Set offset for conversion of timestamps from local time used by FAT to UTC.
+I.e.,
+.I minutes
+will be subtracted from each timestamp to convert it to UTC used
+internally by Linux.  This is useful when the time zone set in the kernel via
+.BR settimeofday (2)
+is not the time zone used by the filesystem.  Note
+that this option still does not provide correct time stamps in all cases in
+presence of DST - time stamps in a different DST setting will be off by one
+hour.
+.TP
+.B quiet
+Turn on the
+.I quiet
+flag.  Attempts to chown or chmod files do not return errors,
+although they fail.  Use with caution!
+.TP
+.B rodir
+FAT has the ATTR_RO (read-only) attribute.  On Windows, the ATTR_RO of the
+directory will just be ignored, and is used only by applications as a flag
+(e.g.\& it's set for the customized folder).
+
+If you want to use ATTR_RO as read-only flag even for the directory, set this
+option.
+.TP
+.B showexec
+If set, the execute permission bits of the file will be allowed only if
+the extension part of the name is \&.EXE, \&.COM, or \&.BAT.  Not set by default.
+.TP
+.B sys_immutable
+If set, ATTR_SYS attribute on FAT is handled as IMMUTABLE flag on Linux.
+Not set by default.
+.TP
+.B flush
+If set, the filesystem will try to flush to disk more early than normal.
+Not set by default.
+.TP
+.B usefree
+Use the "free clusters" value stored on FSINFO.  It'll
+be used to determine number of free clusters without
+scanning disk.  But it's not used by default, because
+recent Windows don't update it correctly in some
+case.  If you are sure the "free clusters" on FSINFO is
+correct, by this option you can avoid scanning disk.
+.TP
+.BR dots ", " nodots ", " dotsOK= [ yes | no ]
+Various misguided attempts to force Unix or DOS conventions
+onto a FAT filesystem.
+
+.SS "Mount options for hfs"
+.TP
+.BI creator= cccc ", type=" cccc
+Set the creator/type values as shown by the MacOS finder
+used for creating new files.  Default values: '????'.
+.TP
+.BI uid= n ", gid=" n
+Set the owner and group of all files.
+(Default: the UID and GID of the current process.)
+.TP
+.BI dir_umask= n ", file_umask=" n ", umask=" n
+Set the umask used for all directories, all regular files, or all
+files and directories.  Defaults to the umask of the current process.
+.TP
+.BI session= n
+Select the CDROM session to mount.
+Defaults to leaving that decision to the CDROM driver.
+This option will fail with anything but a CDROM as underlying device.
+.TP
+.BI part= n
+Select partition number n from the device.
+Only makes sense for CDROMs.
+Defaults to not parsing the partition table at all.
+.TP
+.B quiet
+Don't complain about invalid mount options.
+
+.SS "Mount options for hpfs"
+.TP
+\fBuid=\fP\,\fIvalue\fP and \fBgid=\fP\,\fIvalue\fP
+Set the owner and group of all files. (Default: the UID and GID
+of the current process.)
+.TP
+.BI umask= value
+Set the umask (the bitmask of the permissions that are
+.B not
+present).  The default is the umask of the current process.
+The value is given in octal.
+.TP
+.BR case= { lower | asis }
+Convert all files names to lower case, or leave them.
+(Default:
+.BR case=lower .)
+.TP
+.BI conv= mode
+This option is obsolete and may fail or being ignored.
+.TP
+.B nocheck
+Do not abort mounting when certain consistency checks fail.
+
+.SS "Mount options for iso9660"
+ISO 9660 is a standard describing a filesystem structure to be used
+on CD-ROMs. (This filesystem type is also seen on some DVDs.  See also the
+.I udf
+filesystem.)
+
+Normal
+.I iso9660
+filenames appear in an 8.3 format (i.e., DOS-like restrictions on filename
+length), and in addition all characters are in upper case.  Also there is
+no field for file ownership, protection, number of links, provision for
+block/character devices, etc.
+
+Rock Ridge is an extension to iso9660 that provides all of these UNIX-like
+features.  Basically there are extensions to each directory record that
+supply all of the additional information, and when Rock Ridge is in use,
+the filesystem is indistinguishable from a normal UNIX filesystem (except
+that it is read-only, of course).
+.TP
+.B norock
+Disable the use of Rock Ridge extensions, even if available.  Cf.\&
+.BR map .
+.TP
+.B nojoliet
+Disable the use of Microsoft Joliet extensions, even if available.  Cf.\&
+.BR map .
+.TP
+.BR check= { r [ elaxed ]| s [ trict ]}
+With
+.BR check=relaxed ,
+a filename is first converted to lower case before doing the lookup.
+This is probably only meaningful together with
+.B norock
+and
+.BR map=normal .
+(Default:
+.BR check=strict .)
+.TP
+\fBuid=\fP\,\fIvalue\fP and \fBgid=\fP\,\fIvalue\fP
+Give all files in the filesystem the indicated user or group id,
+possibly overriding the information found in the Rock Ridge extensions.
+(Default:
+.BR uid=0,gid=0 .)
+.TP
+.BR map= { n [ ormal ]| o [ ff ]| a [ corn ]}
+For non-Rock Ridge volumes, normal name translation maps upper
+to lower case ASCII, drops a trailing `;1', and converts `;' to `.'.
+With
+.B map=off
+no name translation is done.  See
+.BR norock .
+(Default:
+.BR map=normal .)
+.B map=acorn
+is like
+.B map=normal
+but also apply Acorn extensions if present.
+.TP
+.BI mode= value
+For non-Rock Ridge volumes, give all files the indicated mode.
+(Default: read and execute permission for everybody.)
+Octal mode values require a leading 0.
+.TP
+.B unhide
+Also show hidden and associated files.
+(If the ordinary files and the associated or hidden files have
+the same filenames, this may make the ordinary files inaccessible.)
+.TP
+.BR block= { 512 | 1024 | 2048 }
+Set the block size to the indicated value.
+(Default:
+.BR block=1024 .)
+.TP
+.BI conv= mode
+This option is obsolete and may fail or being ignored.
+.TP
+.B cruft
+If the high byte of the file length contains other garbage,
+set this mount option to ignore the high order bits of the file length.
+This implies that a file cannot be larger than 16\ MB.
+.TP
+.BI session= x
+Select number of session on multisession CD.
+.TP
+.BI sbsector= xxx
+Session begins from sector xxx.
+.LP
+The following options are the same as for vfat and specifying them only makes
+sense when using discs encoded using Microsoft's Joliet extensions.
+.TP
+.BI iocharset= value
+Character set to use for converting 16 bit Unicode characters on CD
+to 8 bit characters.  The default is iso8859-1.
+.TP
+.B utf8
+Convert 16 bit Unicode characters on CD to UTF-8.
+
+.SS "Mount options for jfs"
+.TP
+.BI iocharset= name
+Character set to use for converting from Unicode to ASCII.  The default is
+to do no conversion.  Use
+.B iocharset=utf8
+for UTF8 translations.  This requires CONFIG_NLS_UTF8 to be set in
+the kernel
+.I ".config"
+file.
+.TP
+.BI resize= value
+Resize the volume to
+.I value
+blocks.  JFS only supports growing a volume, not shrinking it.  This option
+is only valid during a remount, when the volume is mounted read-write.  The
+.B resize
+keyword with no value will grow the volume to the full size of the partition.
+.TP
+.B nointegrity
+Do not write to the journal.  The primary use of this option is to allow
+for higher performance when restoring a volume from backup media.  The
+integrity of the volume is not guaranteed if the system abnormally ends.
+.TP
+.B integrity
+Default.  Commit metadata changes to the journal.  Use this option to remount
+a volume where the
+.B nointegrity
+option was previously specified in order to restore normal behavior.
+.TP
+.BR errors= { continue | remount-ro | panic }
+Define the behavior when an error is encountered.
+(Either ignore errors and just mark the filesystem erroneous and continue,
+or remount the filesystem read-only, or panic and halt the system.)
+.TP
+.BR noquota | quota | usrquota | grpquota
+These options are accepted but ignored.
+
+.SS "Mount options for msdos"
+See mount options for fat.
+If the
+.I msdos
+filesystem detects an inconsistency, it reports an error and sets the file
+system read-only.  The filesystem can be made writable again by remounting
+it.
+
+.SS "Mount options for ncpfs"
+Just like
+.IR nfs ", the " ncpfs
+implementation expects a binary argument (a
+.IR "struct ncp_mount_data" )
+to the mount system call.  This argument is constructed by
+.BR ncpmount (8)
+and the current version of
+.B mount
+(2.12) does not know anything about ncpfs.
+
+.SS "Mount options for ntfs"
+.TP
+.BI iocharset= name
+Character set to use when returning file names.
+Unlike VFAT, NTFS suppresses names that contain
+nonconvertible characters.  Deprecated.
+.TP
+.BI nls= name
+New name for the option earlier called
+.IR iocharset .
+.TP
+.B utf8
+Use UTF-8 for converting file names.
+.TP
+.BR uni_xlate= { 0 | 1 | 2 }
+For 0 (or `no' or `false'), do not use escape sequences
+for unknown Unicode characters.
+For 1 (or `yes' or `true') or 2, use vfat-style 4-byte escape sequences
+starting with ":".  Here 2 give a little-endian encoding
+and 1 a byteswapped bigendian encoding.
+.TP
+.B posix=[0|1]
+If enabled (posix=1), the filesystem distinguishes between
+upper and lower case.  The 8.3 alias names are presented as
+hard links instead of being suppressed.  This option is obsolete.
+.TP
+\fBuid=\fP\,\fIvalue\fP, \fBgid=\fP\,\fIvalue\fP and \fBumask=\fP\,\fIvalue\fP
+Set the file permission on the filesystem.
+The umask value is given in octal.
+By default, the files are owned by root and not readable by somebody else.
+
+.SS "Mount options for overlay"
+Since Linux 3.18 the overlay pseudo filesystem implements a union mount for
+other filesystems.
+
+An overlay filesystem combines two filesystems - an \fBupper\fR filesystem and
+a \fBlower\fR filesystem.  When a name exists in both filesystems, the object
+in the upper filesystem is visible while the object in the lower filesystem is
+either hidden or, in the case of directories, merged with the upper object.
+
+The lower filesystem can be any filesystem supported by Linux and does not need
+to be writable.  The lower filesystem can even be another overlayfs.  The upper
+filesystem will normally be writable and if it is it must support the creation
+of trusted.* extended attributes, and must provide a valid d_type in readdir
+responses, so NFS is not suitable.
+
+A read-only overlay of two read-only filesystems may use any filesystem type.
+The options \fBlowerdir\fR and \fBupperdir\fR are combined into a merged
+directory by using:
+
+.RS
+.br
+.nf
+.B "mount \-t overlay  overlay  \e"
+.B "  \-olowerdir=/lower,upperdir=/upper,workdir=/work  /merged"
+.fi
+.br
+.RE
+
+.TP
+.BI lowerdir= directory
+Any filesystem, does not need to be on a writable filesystem.
+.TP
+.BI upperdir= directory
+The upperdir is normally on a writable filesystem.
+.TP
+.BI workdir= directory
+The workdir needs to be an empty directory on the same filesystem as upperdir.
+
+.SS "Mount options for reiserfs"
+Reiserfs is a journaling filesystem.
+.TP
+.B conv
+Instructs version 3.6 reiserfs software to mount a version 3.5 filesystem,
+using the 3.6 format for newly created objects.  This filesystem will no
+longer be compatible with reiserfs 3.5 tools.
+.TP
+.BR hash= { rupasov | tea | r5 | detect }
+Choose which hash function reiserfs will use to find files within directories.
+.RS
+.TP
+.B rupasov
+A hash invented by Yury Yu.\& Rupasov.  It is fast and preserves locality,
+mapping lexicographically close file names to close hash values.
+This option should not be used, as it causes a high probability of hash
+collisions.
+.TP
+.B tea
+A Davis-Meyer function implemented by Jeremy Fitzhardinge.
+It uses hash permuting bits in the name.  It gets high randomness
+and, therefore, low probability of hash collisions at some CPU cost.
+This may be used if EHASHCOLLISION errors are experienced with the r5 hash.
+.TP
+.B r5
+A modified version of the rupasov hash.  It is used by default and is
+the best choice unless the filesystem has huge directories and
+unusual file-name patterns.
+.TP
+.B detect
+Instructs
+.I mount
+to detect which hash function is in use by examining
+the filesystem being mounted, and to write this information into
+the reiserfs superblock.  This is only useful on the first mount of
+an old format filesystem.
+.RE
+.TP
+.B hashed_relocation
+Tunes the block allocator.  This may provide performance improvements
+in some situations.
+.TP
+.B no_unhashed_relocation
+Tunes the block allocator.  This may provide performance improvements
+in some situations.
+.TP
+.B noborder
+Disable the border allocator algorithm invented by Yury Yu.\& Rupasov.
+This may provide performance improvements in some situations.
+.TP
+.B nolog
+Disable journaling.  This will provide slight performance improvements in
+some situations at the cost of losing reiserfs's fast recovery from crashes.
+Even with this option turned on, reiserfs still performs all journaling
+operations, save for actual writes into its journaling area.  Implementation
+of
+.I nolog
+is a work in progress.
+.TP
+.B notail
+By default, reiserfs stores small files and `file tails' directly into its
+tree.  This confuses some utilities such as
+.BR LILO (8).
+This option is used to disable packing of files into the tree.
+.TP
+.B replayonly
+Replay the transactions which are in the journal, but do not actually
+mount the filesystem.  Mainly used by
+.IR reiserfsck .
+.TP
+.BI resize= number
+A remount option which permits online expansion of reiserfs partitions.
+Instructs reiserfs to assume that the device has
+.I number
+blocks.
+This option is designed for use with devices which are under logical
+volume management (LVM).
+There is a special
+.I resizer
+utility which can be obtained from
+.IR ftp://ftp.namesys.com/pub/reiserfsprogs .
+.TP
+.B user_xattr
+Enable Extended User Attributes.  See the
+.BR attr (5)
+manual page.
+.TP
+.B acl
+Enable POSIX Access Control Lists.  See the
+.BR acl (5)
+manual page.
+.TP
+.BR barrier=none " / " barrier=flush "
+This disables / enables the use of write barriers in the journaling code.
+barrier=none disables, barrier=flush enables (default).  This also requires an
+IO stack which can support barriers, and if reiserfs gets an error on a barrier
+write, it will disable barriers again with a warning.  Write barriers enforce
+proper on-disk ordering of journal commits, making volatile disk write caches
+safe to use, at some performance penalty.  If your disks are battery-backed in
+one way or another, disabling barriers may safely improve performance.
+
+.SS "Mount options for ubifs"
+UBIFS is a flash filesystem which works on top of UBI volumes.  Note that
+\fBatime\fR is not supported and is always turned off.
+.TP
+The device name may be specified as
+.RS
+.B ubiX_Y
+UBI device number
+.BR X ,
+volume number
+.B Y
+.TP
+.B ubiY
+UBI device number
+.BR 0 ,
+volume number
+.B Y
+.TP
+.B ubiX:NAME
+UBI device number
+.BR X ,
+volume with name
+.B NAME
+.TP
+.B ubi:NAME
+UBI device number
+.BR 0 ,
+volume with name
+.B NAME
+.RE
+Alternative
+.B !
+separator may be used instead of
+.BR : .
+.TP
+The following mount options are available:
+.TP
+.B bulk_read
+Enable bulk-read.  VFS read-ahead is disabled because it slows down the file
+system.  Bulk-Read is an internal optimization.  Some flashes may read faster if
+the data are read at one go, rather than at several read requests.  For
+example, OneNAND can do "read-while-load" if it reads more than one NAND page.
+.TP
+.B no_bulk_read
+Do not bulk-read.  This is the default.
+.TP
+.B chk_data_crc
+Check data CRC-32 checksums.  This is the default.
+.TP
+.BR no_chk_data_crc .
+Do not check data CRC-32 checksums.  With this option, the filesystem does not
+check CRC-32 checksum for data, but it does check it for the internal indexing
+information.  This option only affects reading, not writing.  CRC-32 is always
+calculated when writing the data.
+.TP
+.BR compr= { none | lzo | zlib }
+Select the default compressor which is used when new files are written.  It is
+still possible to read compressed files if mounted with the
+.B none
+option.
+
+.SS "Mount options for udf"
+UDF is the "Universal Disk Format" filesystem defined by OSTA, the Optical
+Storage Technology Association, and is often used for DVD-ROM, frequently
+in the form of a hybrid UDF/ISO-9660 filesystem. It is, however,
+perfectly usable by itself on disk drives, flash drives and other block devices.
+See also
+.IR iso9660 .
+.TP
+.B uid=
+Make all files in the filesystem belong to the given user.
+uid=forget can be specified independently of (or usually in
+addition to) uid=<user> and results in UDF
+not storing uids to the media. In fact the recorded uid
+is the 32-bit overflow uid -1 as defined by the UDF standard.
+The value is given as either <user> which is a valid user name or the corresponding
+decimal user id, or the special string "forget".
+.TP
+.B gid=
+Make all files in the filesystem belong to the given group.
+gid=forget can be specified independently of (or usually in
+addition to) gid=<group> and results in UDF
+not storing gids to the media. In fact the recorded gid
+is the 32-bit overflow gid -1 as defined by the UDF standard.
+The value is given as either <group> which is a valid group name or the corresponding
+decimal group id, or the special string "forget".
+.TP
+.B umask=
+Mask out the given permissions from all inodes read from the filesystem.
+The value is given in octal.
+.TP
+.B mode=
+If mode= is set the permissions of all non-directory inodes read from the
+filesystem will be set to the given mode. The value is given in octal.
+.TP
+.B dmode=
+If dmode= is set the permissions of all directory inodes read from the
+filesystem will be set to the given dmode. The value is given in octal.
+.TP
+.B bs=
+Set the block size. Default value prior to kernel version 2.6.30 was
+2048. Since 2.6.30 and prior to 4.11 it was logical device block size with
+fallback to 2048. Since 4.11 it is logical block size with fallback to
+any valid block size between logical device block size and 4096.
+
+For other details see the \fBmkudffs\fP(8) 2.0+ manpage, sections
+\fBCOMPATIBILITY\fP and \fBBLOCK SIZE\fP.
+.TP
+.B unhide
+Show otherwise hidden files.
+.TP
+.B undelete
+Show deleted files in lists.
+.TP
+.B adinicb
+Embed data in the inode. (default)
+.TP
+.B noadinicb
+Don't embed data in the inode.
+.TP
+.B shortad
+Use short UDF address descriptors.
+.TP
+.B longad
+Use long UDF address descriptors. (default)
+.TP
+.B nostrict
+Unset strict conformance.
+.TP
+.B iocharset=
+Set the NLS character set. This requires kernel compiled with CONFIG_UDF_NLS option.
+.TP
+.B utf8
+Set the UTF-8 character set.
+.SS Mount options for debugging and disaster recovery
+.TP
+.B novrs
+Ignore the Volume Recognition Sequence and attempt to mount anyway.
+.TP
+.B session=
+Select the session number for multi-session recorded optical media. (default= last session)
+.TP
+.B anchor=
+Override standard anchor location. (default= 256)
+.TP
+.B lastblock=
+Set the last block of the filesystem.
+.SS Unused historical mount options that may be encountered and should be removed
+.TP
+.B uid=ignore
+Ignored, use uid=<user> instead.
+.TP
+.B gid=ignore
+Ignored, use gid=<group> instead.
+.TP
+.B volume=
+Unimplemented and ignored.
+.TP
+.B partition=
+Unimplemented and ignored.
+.TP
+.B fileset=
+Unimplemented and ignored.
+.TP
+.B rootdir=
+Unimplemented and ignored.
+
+.SS "Mount options for ufs"
+.TP
+.BI ufstype= value
+UFS is a filesystem widely used in different operating systems.
+The problem are differences among implementations.  Features of some
+implementations are undocumented, so its hard to recognize the
+type of ufs automatically.
+That's why the user must specify the type of ufs by mount option.
+Possible values are:
+.RS
+.TP
+.B old
+Old format of ufs, this is the default, read only.
+(Don't forget to give the \-r option.)
+.TP
+.B 44bsd
+For filesystems created by a BSD-like system (NetBSD, FreeBSD, OpenBSD).
+.TP
+.B ufs2
+Used in FreeBSD 5.x supported as read-write.
+.TP
+.B 5xbsd
+Synonym for ufs2.
+.TP
+.B sun
+For filesystems created by SunOS or Solaris on Sparc.
+.TP
+.B sunx86
+For filesystems created by Solaris on x86.
+.TP
+.B hp
+For filesystems created by HP-UX, read-only.
+.TP
+.B nextstep
+For filesystems created by NeXTStep (on NeXT station) (currently read only).
+.TP
+.B nextstep-cd
+For NextStep CDROMs (block_size == 2048), read-only.
+.TP
+.B openstep
+For filesystems created by OpenStep (currently read only).
+The same filesystem type is also used by Mac OS X.
+.RE
+
+.TP
+.BI onerror= value
+Set behavior on error:
+.RS
+.TP
+.B panic
+If an error is encountered, cause a kernel panic.
+.TP
+.RB [ lock | umount | repair ]
+These mount options don't do anything at present;
+when an error is encountered only a console message is printed.
+.RE
+
+.SS "Mount options for umsdos"
+See mount options for msdos.
+The
+.B dotsOK
+option is explicitly killed by
+.IR umsdos .
+
+.SS "Mount options for vfat"
+First of all, the mount options for
+.I fat
+are recognized.
+The
+.B dotsOK
+option is explicitly killed by
+.IR vfat .
+Furthermore, there are
+.TP
+.B uni_xlate
+Translate unhandled Unicode characters to special escaped sequences.
+This lets you backup and restore filenames that are created with any
+Unicode characters.  Without this option, a '?' is used when no
+translation is possible.  The escape character is ':' because it is
+otherwise invalid on the vfat filesystem.  The escape sequence
+that gets used, where u is the Unicode character,
+is: ':', (u & 0x3f), ((u>>6) & 0x3f), (u>>12).
+.TP
+.B posix
+Allow two files with names that only differ in case.
+This option is obsolete.
+.TP
+.B nonumtail
+First try to make a short name without sequence number,
+before trying
+.IR name\s+3~\s0num.ext .
+.TP
+.B utf8
+UTF8 is the filesystem safe 8-bit encoding of Unicode that is used by the
+console.  It can be enabled for the filesystem with this option or disabled
+with utf8=0, utf8=no or utf8=false.  If `uni_xlate' gets set, UTF8 gets
+disabled.
+.TP
+.BI shortname= mode
+Defines the behavior for creation and display of filenames which fit into
+8.3 characters.  If a long name for a file exists, it will always be the
+preferred one for display.  There are four \fImode\fRs:
+.RS
+.TP
+.B lower
+Force the short name to lower case upon display; store a long name when
+the short name is not all upper case.
+.TP
+.B win95
+Force the short name to upper case upon display; store a long name when
+the short name is not all upper case.
+.TP
+.B winnt
+Display the short name as is; store a long name when the short name is
+not all lower case or all upper case.
+.TP
+.B mixed
+Display the short name as is; store a long name when the short name is not
+all upper case.  This mode is the default since Linux 2.6.32.
+.RE
+
+.SS "Mount options for usbfs"
+.TP
+\fBdevuid=\fP\,\fIuid\fP and \fBdevgid=\fP\,\fIgid\fP and \fBdevmode=\fP\,\fImode\fP
+Set the owner and group and mode of the device files in the usbfs filesystem
+(default: uid=gid=0, mode=0644).  The mode is given in octal.
+.TP
+\fBbusuid=\fP\,\fIuid\fP and \fBbusgid=\fP\,\fIgid\fP and \fBbusmode=\fP\,\fImode\fP
+Set the owner and group and mode of the bus directories in the usbfs
+filesystem (default: uid=gid=0, mode=0555).  The mode is given in octal.
+.TP
+\fBlistuid=\fP\,\fIuid\fP and \fBlistgid=\fP\,\fIgid\fP and \fBlistmode=\fP\,\fImode\fP
+Set the owner and group and mode of the file
+.I devices
+(default: uid=gid=0, mode=0444).  The mode is given in octal.
+
+.SH "THE LOOP DEVICE"
+One further possible type is a mount via the loop device.  For example,
+the command
+.RS
+.sp
+.B "mount /tmp/disk.img /mnt \-t vfat \-o loop=/dev/loop3"
+.sp
+.RE
+will set up the loop device
+.I /dev/loop3
+to correspond to the file
+.IR /tmp/disk.img ,
+and then mount this device on
+.IR /mnt .
+
+If no explicit loop device is mentioned
+(but just an option `\fB\-o loop\fP' is given), then
+.B mount
+will try to find some unused loop device and use that, for example
+.RS
+.sp
+.B "mount /tmp/disk.img /mnt \-o loop"
+.sp
+.RE
+The mount command
+.B automatically
+creates a loop device from a regular file if a filesystem type is
+not specified or the filesystem is known for libblkid, for example:
+.RS
+.sp
+.B "mount /tmp/disk.img /mnt"
+.sp
+.B "mount \-t ext4 /tmp/disk.img /mnt"
+.sp
+.RE
+This type of mount knows about three options, namely
+.BR loop ", " offset " and " sizelimit ,
+that are really options to
+.BR \%losetup (8).
+(These options can be used in addition to those specific
+to the filesystem type.)
+
+Since Linux 2.6.25 auto-destruction of loop devices is supported,
+meaning that any loop device allocated by
+.B mount
+will be freed by
+.B umount
+independently of
+.IR /etc/mtab .
+
+You can also free a loop device by hand, using
+.BR "losetup \-d " or " umount \-d" .
+
+Since util-linux v2.29 mount command re-uses the loop device rather than
+initialize a new device if the same backing file is already used for some loop
+device with the same offset and sizelimit. This is necessary to avoid
+a filesystem corruption.
+
+.SH RETURN CODES
+.B mount
+has the following return codes (the bits can be ORed):
+.TP
+.B 0
+success
+.TP
+.B 1
+incorrect invocation or permissions
+.TP
+.B 2
+system error (out of memory, cannot fork, no more loop devices)
+.TP
+.B 4
+internal
+.B mount
+bug
+.TP
+.B 8
+user interrupt
+.TP
+.B 16
+problems writing or locking /etc/mtab
+.TP
+.B 32
+mount failure
+.TP
+.B 64
+some mount succeeded
+.RE
+
+The command \fBmount \-a\fR returns 0 (all succeeded), 32 (all failed), or 64 (some
+failed, some succeeded).
+
+.SH "EXTERNAL HELPERS"
+The syntax of external mount helpers is:
+.sp
+.in +4
+.BI /sbin/mount. suffix
+.I spec dir
+.RB [ \-sfnv ]
+.RB [ \-N
+.IR namespace ]
+.RB [ \-o
+.IR options ]
+.RB [ \-t
+.IR type \fB. subtype ]
+.in
+.sp
+where the \fIsuffix\fR is the filesystem type and the \fB\-sfnvoN\fR options have
+the same meaning as the normal mount options.  The \fB\-t\fR option is used for
+filesystems with subtypes support (for example
+.BR "/sbin/mount.fuse \-t fuse.sshfs" ).
+
+The command \fBmount\fR does not pass the mount options
+.BR unbindable ,
+.BR runbindable ,
+.BR private ,
+.BR rprivate ,
+.BR slave ,
+.BR rslave ,
+.BR shared ,
+.BR rshared ,
+.BR auto ,
+.BR noauto ,
+.BR comment ,
+.BR x-* ,
+.BR loop ,
+.B offset
+and
+.B sizelimit
+to the mount.<suffix> helpers.  All other options are used in a
+comma-separated list as argument to the \fB\-o\fR option.
+
+.SH FILES
+See also "\fBThe files /etc/fstab, /etc/mtab and /proc/mounts\fR" section above.
+.TP 18n
+.I /etc/fstab
+filesystem table
+.TP
+.I /run/mount
+libmount private runtime directory
+.TP
+.I /etc/mtab
+table of mounted filesystems or symlink to /proc/mounts
+.TP
+.I /etc/mtab\s+3~\s0
+lock file (unused on systems with mtab symlink)
+.TP
+.I /etc/mtab.tmp
+temporary file (unused on systems with mtab symlink)
+.TP
+.I /etc/filesystems
+a list of filesystem types to try
+.SH ENVIRONMENT
+.IP LIBMOUNT_FSTAB=<path>
+overrides the default location of the fstab file (ignored for suid)
+.IP LIBMOUNT_MTAB=<path>
+overrides the default location of the mtab file (ignored for suid)
+.IP LIBMOUNT_DEBUG=all
+enables libmount debug output
+.IP LIBBLKID_DEBUG=all
+enables libblkid debug output
+.IP LOOPDEV_DEBUG=all
+enables loop device setup debug output
+.SH "SEE ALSO"
+.na
+.BR mount (2),
+.BR umount (2),
+.BR umount (8),
+.BR fstab (5),
+.BR nfs (5),
+.BR xfs (5),
+.BR e2label (8),
+.BR findmnt (8),
+.BR losetup (8),
+.BR mke2fs (8),
+.BR mountd (8),
+.BR nfsd (8),
+.BR swapon (8),
+.BR tune2fs (8),
+.BR xfs_admin (8)
+.ad
+.SH BUGS
+It is possible for a corrupted filesystem to cause a crash.
+.PP
+Some Linux filesystems don't support
+.BR "\-o sync " nor " \-o dirsync"
+(the ext2, ext3, ext4, fat and vfat filesystems
+.I do
+support synchronous updates (a la BSD) when mounted with the
+.B sync
+option).
+.PP
+The
+.B "\-o remount"
+may not be able to change mount parameters (all
+.IR ext2fs -specific
+parameters, except
+.BR sb ,
+are changeable with a remount, for example, but you can't change
+.B gid
+or
+.B umask
+for the
+.IR fatfs ).
+.PP
+It is possible that the files
+.I /etc/mtab
+and
+.I /proc/mounts
+don't match on systems with a regular mtab file.  The first file is based only on
+the mount command options, but the content of the second file also depends on
+the kernel and others settings (e.g.\& on a remote NFS server -- in certain cases
+the mount command may report unreliable information about an NFS mount point
+and the /proc/mounts file usually contains more reliable information.)  This is
+another reason to replace the mtab file with a symlink to the
+.I /proc/mounts
+file.
+.PP
+Checking files on NFS filesystems referenced by file descriptors (i.e.\& the
+.B fcntl
+and
+.B ioctl
+families of functions) may lead to inconsistent results due to the lack of
+a consistency check in the kernel even if noac is used.
+.PP
+The
+.B loop
+option with the
+.B offset
+or
+.B sizelimit
+options used may fail when using older kernels if the
+.B mount
+command can't confirm that the size of the block device has been configured
+as requested.  This situation can be worked around by using
+the
+.B losetup
+command manually before calling
+.B mount
+with the configured loop device.
+.SH HISTORY
+A
+.B mount
+command existed in Version 5 AT&T UNIX.
+.SH AUTHORS
+.nf
+Karel Zak <kzak@redhat.com>
+.fi
+.SH AVAILABILITY
+The mount command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/mount.c b/sys-utils/mount.c
new file mode 100644
index 0000000..5e139e8
--- /dev/null
+++ b/sys-utils/mount.c
@@ -0,0 +1,918 @@
+/*
+ * mount(8) -- mount a filesystem
+ *
+ * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
+ * Written by Karel Zak <kzak@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <stdarg.h>
+#include <libmount.h>
+#include <ctype.h>
+
+#include "nls.h"
+#include "c.h"
+#include "env.h"
+#include "strutils.h"
+#include "closestream.h"
+#include "canonicalize.h"
+
+#define XALLOC_EXIT_CODE MNT_EX_SYSERR
+#include "xalloc.h"
+
+#define OPTUTILS_EXIT_CODE MNT_EX_USAGE
+#include "optutils.h"
+
+/*** TODO: DOCS:
+ *
+ *  --options-mode={ignore,append,prepend,replace}	MNT_OMODE_{IGNORE, ...}
+ *  --options-source={fstab,mtab,disable}		MNT_OMODE_{FSTAB,MTAB,NOTAB}
+ *  --options-source-force				MNT_OMODE_FORCE
+ */
+
+static int mk_exit_code(struct libmnt_context *cxt, int rc);
+
+static void __attribute__((__noreturn__)) exit_non_root(const char *option)
+{
+	const uid_t ruid = getuid();
+	const uid_t euid = geteuid();
+
+	if (ruid == 0 && euid != 0) {
+		/* user is root, but setuid to non-root */
+		if (option)
+			errx(MNT_EX_USAGE, _("only root can use \"--%s\" option "
+					 "(effective UID is %u)"),
+					option, euid);
+		errx(MNT_EX_USAGE, _("only root can do that "
+				 "(effective UID is %u)"), euid);
+	}
+	if (option)
+		errx(MNT_EX_USAGE, _("only root can use \"--%s\" option"), option);
+	errx(MNT_EX_USAGE, _("only root can do that"));
+}
+
+static void __attribute__((__noreturn__)) print_version(void)
+{
+	const char *ver = NULL;
+	const char **features = NULL, **p;
+
+	mnt_get_library_version(&ver);
+	mnt_get_library_features(&features);
+
+	printf(_("%s from %s (libmount %s"),
+			program_invocation_short_name,
+			PACKAGE_STRING,
+			ver);
+	p = features;
+	while (p && *p) {
+		fputs(p == features ? ": " : ", ", stdout);
+		fputs(*p++, stdout);
+	}
+	fputs(")\n", stdout);
+	exit(MNT_EX_SUCCESS);
+}
+
+static int table_parser_errcb(struct libmnt_table *tb __attribute__((__unused__)),
+			const char *filename, int line)
+{
+	if (filename)
+		warnx(_("%s: parse error at line %d -- ignored"), filename, line);
+	return 1;
+}
+
+/*
+ * Replace control chars with '?' to be compatible with coreutils. For more
+ * robust solution use findmnt(1) where we use \x?? hex encoding.
+ */
+static void safe_fputs(const char *data)
+{
+	const char *p;
+
+	for (p = data; p && *p; p++) {
+		if (iscntrl((unsigned char) *p))
+			fputc('?', stdout);
+		else
+			fputc(*p, stdout);
+	}
+}
+
+static void print_all(struct libmnt_context *cxt, char *pattern, int show_label)
+{
+	struct libmnt_table *tb;
+	struct libmnt_iter *itr = NULL;
+	struct libmnt_fs *fs;
+	struct libmnt_cache *cache = NULL;
+
+	if (mnt_context_get_mtab(cxt, &tb))
+		err(MNT_EX_SYSERR, _("failed to read mtab"));
+
+	itr = mnt_new_iter(MNT_ITER_FORWARD);
+	if (!itr)
+		err(MNT_EX_SYSERR, _("failed to initialize libmount iterator"));
+	if (show_label)
+		cache = mnt_new_cache();
+
+	while (mnt_table_next_fs(tb, itr, &fs) == 0) {
+		const char *type = mnt_fs_get_fstype(fs);
+		const char *src = mnt_fs_get_source(fs);
+		const char *optstr = mnt_fs_get_options(fs);
+		char *xsrc = NULL;
+
+		if (type && pattern && !mnt_match_fstype(type, pattern))
+			continue;
+
+		if (!mnt_fs_is_pseudofs(fs))
+			xsrc = mnt_pretty_path(src, cache);
+		printf ("%s on ", xsrc ? xsrc : src);
+		safe_fputs(mnt_fs_get_target(fs));
+
+		if (type)
+			printf (" type %s", type);
+		if (optstr)
+			printf (" (%s)", optstr);
+		if (show_label && src) {
+			char *lb = mnt_cache_find_tag_value(cache, src, "LABEL");
+			if (lb)
+				printf (" [%s]", lb);
+		}
+		fputc('\n', stdout);
+		free(xsrc);
+	}
+
+	mnt_unref_cache(cache);
+	mnt_free_iter(itr);
+}
+
+/*
+ * mount -a [-F]
+ */
+static int mount_all(struct libmnt_context *cxt)
+{
+	struct libmnt_iter *itr;
+	struct libmnt_fs *fs;
+	int mntrc, ignored, rc = MNT_EX_SUCCESS;
+
+	int nsucc = 0, nerrs = 0;
+
+	itr = mnt_new_iter(MNT_ITER_FORWARD);
+	if (!itr) {
+		warn(_("failed to initialize libmount iterator"));
+		return MNT_EX_SYSERR;
+	}
+
+	while (mnt_context_next_mount(cxt, itr, &fs, &mntrc, &ignored) == 0) {
+
+		const char *tgt = mnt_fs_get_target(fs);
+
+		if (ignored) {
+			if (mnt_context_is_verbose(cxt))
+				printf(ignored == 1 ? _("%-25s: ignored\n") :
+						      _("%-25s: already mounted\n"),
+						tgt);
+		} else if (mnt_context_is_fork(cxt)) {
+			if (mnt_context_is_verbose(cxt))
+				printf("%-25s: mount successfully forked\n", tgt);
+		} else {
+			if (mk_exit_code(cxt, mntrc) == MNT_EX_SUCCESS) {
+				nsucc++;
+
+				/* Note that MNT_EX_SUCCESS return code does
+				 * not mean that FS has been really mounted
+				 * (e.g. nofail option) */
+				if (mnt_context_get_status(cxt) 
+				    && mnt_context_is_verbose(cxt))
+					printf("%-25s: successfully mounted\n", tgt);
+			} else
+				nerrs++;
+		}
+	}
+
+	if (mnt_context_is_parent(cxt)) {
+		/* wait for mount --fork children */
+		int nchildren = 0;
+
+		nerrs = 0, nsucc = 0;
+
+		rc = mnt_context_wait_for_children(cxt, &nchildren, &nerrs);
+		if (!rc && nchildren)
+			nsucc = nchildren - nerrs;
+	}
+
+	if (nerrs == 0)
+		rc = MNT_EX_SUCCESS;		/* all success */
+	else if (nsucc == 0)
+		rc = MNT_EX_FAIL;		/* all failed */
+	else
+		rc = MNT_EX_SOMEOK;		/* some success, some failed */
+
+	mnt_free_iter(itr);
+	return rc;
+}
+
+static void success_message(struct libmnt_context *cxt)
+{
+	unsigned long mflags = 0;
+	const char *tgt, *src, *pr = program_invocation_short_name;
+
+	if (mnt_context_helper_executed(cxt)
+	    || mnt_context_get_status(cxt) != 1)
+		return;
+
+	mnt_context_get_mflags(cxt, &mflags);
+	tgt = mnt_context_get_target(cxt);
+	src = mnt_context_get_source(cxt);
+
+	if (mflags & MS_MOVE)
+		printf(_("%s: %s moved to %s.\n"), pr, src, tgt);
+	else if (mflags & MS_BIND)
+		printf(_("%s: %s bound on %s.\n"), pr, src, tgt);
+	else if (mflags & MS_PROPAGATION) {
+		if (src && strcmp(src, "none") != 0 && tgt)
+			printf(_("%s: %s mounted on %s.\n"), pr, src, tgt);
+
+		printf(_("%s: %s propagation flags changed.\n"), pr, tgt);
+	} else
+		printf(_("%s: %s mounted on %s.\n"), pr, src, tgt);
+}
+
+#if defined(HAVE_LIBSELINUX) && defined(HAVE_SECURITY_GET_INITIAL_CONTEXT)
+#include <selinux/selinux.h>
+#include <selinux/context.h>
+
+static void selinux_warning(struct libmnt_context *cxt, const char *tgt)
+{
+
+	if (tgt && mnt_context_is_verbose(cxt) && is_selinux_enabled() > 0) {
+		security_context_t raw = NULL, def = NULL;
+
+		if (getfilecon(tgt, &raw) > 0
+		    && security_get_initial_context("file", &def) == 0) {
+
+		if (!selinux_file_context_cmp(raw, def))
+			printf(_(
+	"mount: %s does not contain SELinux labels.\n"
+	"       You just mounted an file system that supports labels which does not\n"
+	"       contain labels, onto an SELinux box. It is likely that confined\n"
+	"       applications will generate AVC messages and not be allowed access to\n"
+	"       this file system.  For more details see restorecon(8) and mount(8).\n"),
+				tgt);
+		}
+		freecon(raw);
+		freecon(def);
+	}
+}
+#else
+# define selinux_warning(_x, _y)
+#endif
+
+/*
+ * Returns exit status (MNT_EX_*) and/or prints error message.
+ */
+static int mk_exit_code(struct libmnt_context *cxt, int rc)
+{
+	const char *tgt;
+	char buf[BUFSIZ] = { 0 };
+
+	rc = mnt_context_get_excode(cxt, rc, buf, sizeof(buf));
+	tgt = mnt_context_get_target(cxt);
+
+	if (*buf) {
+		const char *spec = tgt;
+		if (!spec)
+			spec = mnt_context_get_source(cxt);
+		if (!spec)
+			spec = "???";
+		warnx("%s: %s.", spec, buf);
+	}
+
+	if (rc == MNT_EX_SUCCESS && mnt_context_get_status(cxt) == 1) {
+		selinux_warning(cxt, tgt);
+	}
+	return rc;
+}
+
+static struct libmnt_table *append_fstab(struct libmnt_context *cxt,
+					 struct libmnt_table *fstab,
+					 const char *path)
+{
+
+	if (!fstab) {
+		fstab = mnt_new_table();
+		if (!fstab)
+			err(MNT_EX_SYSERR, _("failed to initialize libmount table"));
+
+		mnt_table_set_parser_errcb(fstab, table_parser_errcb);
+		mnt_context_set_fstab(cxt, fstab);
+
+		mnt_unref_table(fstab);	/* reference is handled by @cxt now */
+	}
+
+	if (mnt_table_parse_fstab(fstab, path))
+		errx(MNT_EX_USAGE,_("%s: failed to parse"), path);
+
+	return fstab;
+}
+
+/*
+ * Check source and target paths -- non-root user should not be able to
+ * resolve paths which are unreadable for him.
+ */
+static void sanitize_paths(struct libmnt_context *cxt)
+{
+	const char *p;
+	struct libmnt_fs *fs = mnt_context_get_fs(cxt);
+
+	if (!fs)
+		return;
+
+	p = mnt_fs_get_target(fs);
+	if (p) {
+		char *np = canonicalize_path_restricted(p);
+		if (!np)
+			err(MNT_EX_USAGE, "%s", p);
+		mnt_fs_set_target(fs, np);
+		free(np);
+	}
+
+	p = mnt_fs_get_srcpath(fs);
+	if (p) {
+		char *np = canonicalize_path_restricted(p);
+		if (!np)
+			err(MNT_EX_USAGE, "%s", p);
+		mnt_fs_set_source(fs, np);
+		free(np);
+	}
+}
+
+static void append_option(struct libmnt_context *cxt, const char *opt)
+{
+	if (opt && (*opt == '=' || *opt == '\'' || *opt == '\"' || isblank(*opt)))
+		errx(MNT_EX_USAGE, _("unsupported option format: %s"), opt);
+	if (mnt_context_append_options(cxt, opt))
+		err(MNT_EX_SYSERR, _("failed to append option '%s'"), opt);
+}
+
+static int has_remount_flag(struct libmnt_context *cxt)
+{
+	unsigned long mflags = 0;
+
+	if (mnt_context_get_mflags(cxt, &mflags))
+		return 0;
+
+	return mflags & MS_REMOUNT;
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	fputs(USAGE_HEADER, out);
+	fprintf(out, _(
+		" %1$s [-lhV]\n"
+		" %1$s -a [options]\n"
+		" %1$s [options] [--source] <source> | [--target] <directory>\n"
+		" %1$s [options] <source> <directory>\n"
+		" %1$s <operation> <mountpoint> [<target>]\n"),
+		program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Mount a filesystem.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fprintf(out, _(
+	" -a, --all               mount all filesystems mentioned in fstab\n"
+	" -c, --no-canonicalize   don't canonicalize paths\n"
+	" -f, --fake              dry run; skip the mount(2) syscall\n"
+	" -F, --fork              fork off for each device (use with -a)\n"
+	" -T, --fstab <path>      alternative file to /etc/fstab\n"));
+	fprintf(out, _(
+	" -i, --internal-only     don't call the mount.<type> helpers\n"));
+	fprintf(out, _(
+	" -l, --show-labels       show also filesystem labels\n"));
+	fprintf(out, _(
+	" -n, --no-mtab           don't write to /etc/mtab\n"));
+	fprintf(out, _(
+	"     --options-mode <mode>\n"
+	"                         what to do with options loaded from fstab\n"
+	"     --options-source <source>\n"
+	"                         mount options source\n"
+	"     --options-source-force\n"
+	"                         force use of options from fstab/mtab\n"));
+	fprintf(out, _(
+	" -o, --options <list>    comma-separated list of mount options\n"
+	" -O, --test-opts <list>  limit the set of filesystems (use with -a)\n"
+	" -r, --read-only         mount the filesystem read-only (same as -o ro)\n"
+	" -t, --types <list>      limit the set of filesystem types\n"));
+	fprintf(out, _(
+	"     --source <src>      explicitly specifies source (path, label, uuid)\n"
+	"     --target <target>   explicitly specifies mountpoint\n"));
+	fprintf(out, _(
+	" -v, --verbose           say what is being done\n"));
+	fprintf(out, _(
+	" -w, --rw, --read-write  mount the filesystem read-write (default)\n"));
+	fprintf(out, _(
+	" -N, --namespace <ns>    perform mount in another namespace\n"));
+
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(25));
+
+	fprintf(out, _(
+	"\nSource:\n"
+	" -L, --label <label>     synonym for LABEL=<label>\n"
+	" -U, --uuid <uuid>       synonym for UUID=<uuid>\n"
+	" LABEL=<label>           specifies device by filesystem label\n"
+	" UUID=<uuid>             specifies device by filesystem UUID\n"
+	" PARTLABEL=<label>       specifies device by partition label\n"
+	" PARTUUID=<uuid>         specifies device by partition UUID\n"));
+
+	fprintf(out, _(
+	" <device>                specifies device by path\n"
+	" <directory>             mountpoint for bind mounts (see --bind/rbind)\n"
+	" <file>                  regular file for loopdev setup\n"));
+
+	fprintf(out, _(
+	"\nOperations:\n"
+	" -B, --bind              mount a subtree somewhere else (same as -o bind)\n"
+	" -M, --move              move a subtree to some other place\n"
+	" -R, --rbind             mount a subtree and all submounts somewhere else\n"));
+	fprintf(out, _(
+	" --make-shared           mark a subtree as shared\n"
+	" --make-slave            mark a subtree as slave\n"
+	" --make-private          mark a subtree as private\n"
+	" --make-unbindable       mark a subtree as unbindable\n"));
+	fprintf(out, _(
+	" --make-rshared          recursively mark a whole subtree as shared\n"
+	" --make-rslave           recursively mark a whole subtree as slave\n"
+	" --make-rprivate         recursively mark a whole subtree as private\n"
+	" --make-runbindable      recursively mark a whole subtree as unbindable\n"));
+
+	printf(USAGE_MAN_TAIL("mount(8)"));
+
+	exit(MNT_EX_SUCCESS);
+}
+
+struct flag_str {
+	int value;
+	char *str;
+};
+
+static int omode2mask(const char *str)
+{
+	size_t i;
+
+	static const struct flag_str flags[] = {
+		{ MNT_OMODE_IGNORE, "ignore" },
+		{ MNT_OMODE_APPEND, "append" },
+		{ MNT_OMODE_PREPEND, "prepend" },
+		{ MNT_OMODE_REPLACE, "replace" },
+	};
+
+	for (i = 0; i < ARRAY_SIZE(flags); i++) {
+		if (!strcmp(str, flags[i].str))
+			return flags[i].value;
+	}
+	return -EINVAL;
+}
+
+static long osrc2mask(const char *str, size_t len)
+{
+	size_t i;
+
+	static const struct flag_str flags[] = {
+		{ MNT_OMODE_FSTAB, "fstab" },
+		{ MNT_OMODE_MTAB, "mtab" },
+		{ MNT_OMODE_NOTAB, "disable" },
+	};
+
+	for (i = 0; i < ARRAY_SIZE(flags); i++) {
+		if (!strncmp(str, flags[i].str, len) && !flags[i].str[len])
+			return flags[i].value;
+	}
+	return -EINVAL;
+}
+
+static pid_t parse_pid(const char *str)
+{
+	char *end;
+	pid_t ret;
+
+	errno = 0;
+	ret = strtoul(str, &end, 10);
+
+	if (ret < 0 || errno || end == str || (end && *end))
+		return 0;
+	return ret;
+}
+
+int main(int argc, char **argv)
+{
+	int c, rc = MNT_EX_SUCCESS, all = 0, show_labels = 0;
+	struct libmnt_context *cxt;
+	struct libmnt_table *fstab = NULL;
+	char *srcbuf = NULL;
+	char *types = NULL;
+	int oper = 0, is_move = 0;
+	int propa = 0;
+	int optmode = 0, optmode_mode = 0, optmode_src = 0;
+
+	enum {
+		MOUNT_OPT_SHARED = CHAR_MAX + 1,
+		MOUNT_OPT_SLAVE,
+		MOUNT_OPT_PRIVATE,
+		MOUNT_OPT_UNBINDABLE,
+		MOUNT_OPT_RSHARED,
+		MOUNT_OPT_RSLAVE,
+		MOUNT_OPT_RPRIVATE,
+		MOUNT_OPT_RUNBINDABLE,
+		MOUNT_OPT_TARGET,
+		MOUNT_OPT_SOURCE,
+		MOUNT_OPT_OPTMODE,
+		MOUNT_OPT_OPTSRC,
+		MOUNT_OPT_OPTSRC_FORCE
+	};
+
+	static const struct option longopts[] = {
+		{ "all",              no_argument,       NULL, 'a'                   },
+		{ "fake",             no_argument,       NULL, 'f'                   },
+		{ "fstab",            required_argument, NULL, 'T'                   },
+		{ "fork",             no_argument,       NULL, 'F'                   },
+		{ "help",             no_argument,       NULL, 'h'                   },
+		{ "no-mtab",          no_argument,       NULL, 'n'                   },
+		{ "read-only",        no_argument,       NULL, 'r'                   },
+		{ "ro",               no_argument,       NULL, 'r'                   },
+		{ "verbose",          no_argument,       NULL, 'v'                   },
+		{ "version",          no_argument,       NULL, 'V'                   },
+		{ "read-write",       no_argument,       NULL, 'w'                   },
+		{ "rw",               no_argument,       NULL, 'w'                   },
+		{ "options",          required_argument, NULL, 'o'                   },
+		{ "test-opts",        required_argument, NULL, 'O'                   },
+		{ "types",            required_argument, NULL, 't'                   },
+		{ "uuid",             required_argument, NULL, 'U'                   },
+		{ "label",            required_argument, NULL, 'L'                   },
+		{ "bind",             no_argument,       NULL, 'B'                   },
+		{ "move",             no_argument,       NULL, 'M'                   },
+		{ "rbind",            no_argument,       NULL, 'R'                   },
+		{ "make-shared",      no_argument,       NULL, MOUNT_OPT_SHARED      },
+		{ "make-slave",       no_argument,       NULL, MOUNT_OPT_SLAVE       },
+		{ "make-private",     no_argument,       NULL, MOUNT_OPT_PRIVATE     },
+		{ "make-unbindable",  no_argument,       NULL, MOUNT_OPT_UNBINDABLE  },
+		{ "make-rshared",     no_argument,       NULL, MOUNT_OPT_RSHARED     },
+		{ "make-rslave",      no_argument,       NULL, MOUNT_OPT_RSLAVE      },
+		{ "make-rprivate",    no_argument,       NULL, MOUNT_OPT_RPRIVATE    },
+		{ "make-runbindable", no_argument,       NULL, MOUNT_OPT_RUNBINDABLE },
+		{ "no-canonicalize",  no_argument,       NULL, 'c'                   },
+		{ "internal-only",    no_argument,       NULL, 'i'                   },
+		{ "show-labels",      no_argument,       NULL, 'l'                   },
+		{ "target",           required_argument, NULL, MOUNT_OPT_TARGET      },
+		{ "source",           required_argument, NULL, MOUNT_OPT_SOURCE      },
+		{ "options-mode",     required_argument, NULL, MOUNT_OPT_OPTMODE     },
+		{ "options-source",   required_argument, NULL, MOUNT_OPT_OPTSRC      },
+		{ "options-source-force",   no_argument, NULL, MOUNT_OPT_OPTSRC_FORCE},
+		{ "namespace",        required_argument, NULL, 'N'                   },
+		{ NULL, 0, NULL, 0 }
+	};
+
+	static const ul_excl_t excl[] = {       /* rows and cols in ASCII order */
+		{ 'B','M','R' },			/* bind,move,rbind */
+		{ 'L','U', MOUNT_OPT_SOURCE },	/* label,uuid,source */
+		{ 0 }
+	};
+	int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+	sanitize_env();
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	strutils_set_exitcode(MNT_EX_USAGE);
+
+	mnt_init_debug(0);
+	cxt = mnt_new_context();
+	if (!cxt)
+		err(MNT_EX_SYSERR, _("libmount context allocation failed"));
+
+	mnt_context_set_tables_errcb(cxt, table_parser_errcb);
+
+	while ((c = getopt_long(argc, argv, "aBcfFhilL:Mno:O:rRsU:vVwt:T:N:",
+					longopts, NULL)) != -1) {
+
+		/* only few options are allowed for non-root users */
+		if (mnt_context_is_restricted(cxt) &&
+		    !strchr("hlLUVvrist", c) &&
+		    c != MOUNT_OPT_TARGET &&
+		    c != MOUNT_OPT_SOURCE)
+			exit_non_root(option_to_longopt(c, longopts));
+
+		err_exclusive_options(c, longopts, excl, excl_st);
+
+		switch(c) {
+		case 'a':
+			all = 1;
+			break;
+		case 'c':
+			mnt_context_disable_canonicalize(cxt, TRUE);
+			break;
+		case 'f':
+			mnt_context_enable_fake(cxt, TRUE);
+			break;
+		case 'F':
+			mnt_context_enable_fork(cxt, TRUE);
+			break;
+		case 'h':
+			usage();
+			break;
+		case 'i':
+			mnt_context_disable_helpers(cxt, TRUE);
+			break;
+		case 'n':
+			mnt_context_disable_mtab(cxt, TRUE);
+			break;
+		case 'r':
+			append_option(cxt, "ro");
+			mnt_context_enable_rwonly_mount(cxt, FALSE);
+			break;
+		case 'v':
+			mnt_context_enable_verbose(cxt, TRUE);
+			break;
+		case 'V':
+			print_version();
+			break;
+		case 'w':
+			append_option(cxt, "rw");
+			mnt_context_enable_rwonly_mount(cxt, TRUE);
+			break;
+		case 'o':
+			append_option(cxt, optarg);
+			break;
+		case 'O':
+			if (mnt_context_set_options_pattern(cxt, optarg))
+				err(MNT_EX_SYSERR, _("failed to set options pattern"));
+			break;
+		case 'L':
+			xasprintf(&srcbuf, "LABEL=\"%s\"", optarg);
+			mnt_context_disable_swapmatch(cxt, 1);
+			mnt_context_set_source(cxt, srcbuf);
+			free(srcbuf);
+			break;
+		case 'U':
+			xasprintf(&srcbuf, "UUID=\"%s\"", optarg);
+			mnt_context_disable_swapmatch(cxt, 1);
+			mnt_context_set_source(cxt, srcbuf);
+			free(srcbuf);
+			break;
+		case 'l':
+			show_labels = 1;
+			break;
+		case 't':
+			types = optarg;
+			break;
+		case 'T':
+			fstab = append_fstab(cxt, fstab, optarg);
+			break;
+		case 's':
+			mnt_context_enable_sloppy(cxt, TRUE);
+			break;
+		case 'B':
+			oper = 1;
+			append_option(cxt, "bind");
+			break;
+		case 'M':
+			oper = 1;
+			is_move = 1;
+			break;
+		case 'R':
+			oper = 1;
+			append_option(cxt, "rbind");
+			break;
+		case 'N':
+		{
+			char path[PATH_MAX];
+			pid_t pid = parse_pid(optarg);
+
+			if (pid)
+				snprintf(path, sizeof(path), "/proc/%i/ns/mnt", pid);
+
+			if (mnt_context_set_target_ns(cxt, pid ? path : optarg))
+				err(MNT_EX_SYSERR, _("failed to set target namespace to %s"), pid ? path : optarg);
+			break;
+		}
+		case MOUNT_OPT_SHARED:
+			append_option(cxt, "shared");
+			propa = 1;
+			break;
+		case MOUNT_OPT_SLAVE:
+			append_option(cxt, "slave");
+			propa = 1;
+			break;
+		case MOUNT_OPT_PRIVATE:
+			append_option(cxt, "private");
+			propa = 1;
+			break;
+		case MOUNT_OPT_UNBINDABLE:
+			append_option(cxt, "unbindable");
+			propa = 1;
+			break;
+		case MOUNT_OPT_RSHARED:
+			append_option(cxt, "rshared");
+			propa = 1;
+			break;
+		case MOUNT_OPT_RSLAVE:
+			append_option(cxt, "rslave");
+			propa = 1;
+			break;
+		case MOUNT_OPT_RPRIVATE:
+			append_option(cxt, "rprivate");
+			propa = 1;
+			break;
+		case MOUNT_OPT_RUNBINDABLE:
+			append_option(cxt, "runbindable");
+			propa = 1;
+			break;
+		case MOUNT_OPT_TARGET:
+			mnt_context_disable_swapmatch(cxt, 1);
+			mnt_context_set_target(cxt, optarg);
+			break;
+		case MOUNT_OPT_SOURCE:
+			mnt_context_disable_swapmatch(cxt, 1);
+			mnt_context_set_source(cxt, optarg);
+			break;
+		case MOUNT_OPT_OPTMODE:
+			optmode_mode = omode2mask(optarg);
+			if (optmode_mode == -EINVAL) {
+				warnx(_("bad usage"));
+				errtryhelp(MNT_EX_USAGE);
+			}
+			break;
+		case MOUNT_OPT_OPTSRC:
+		{
+			unsigned long tmp = 0;
+			if (string_to_bitmask(optarg, &tmp, osrc2mask)) {
+				warnx(_("bad usage"));
+				errtryhelp(MNT_EX_USAGE);
+			}
+			optmode_src = tmp;
+			break;
+		}
+		case MOUNT_OPT_OPTSRC_FORCE:
+			optmode |= MNT_OMODE_FORCE;
+			break;
+		default:
+			errtryhelp(MNT_EX_USAGE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	optmode |= optmode_mode | optmode_src;
+	if (optmode) {
+		if (!optmode_mode)
+			optmode |= MNT_OMODE_PREPEND;
+		if (!optmode_src)
+			optmode |= MNT_OMODE_FSTAB | MNT_OMODE_MTAB;
+		mnt_context_set_optsmode(cxt, optmode);
+	}
+
+	if (fstab && !mnt_context_is_nocanonicalize(cxt)) {
+		/*
+		 * We have external (context independent) fstab instance, let's
+		 * make a connection between the fstab and the canonicalization
+		 * cache.
+		 */
+		mnt_table_set_cache(fstab, mnt_context_get_cache(cxt));
+	}
+
+	if (!mnt_context_get_source(cxt) &&
+	    !mnt_context_get_target(cxt) &&
+	    !argc &&
+	    !all) {
+		if (oper || mnt_context_get_options(cxt)) {
+			warnx(_("bad usage"));
+			errtryhelp(MNT_EX_USAGE);
+		}
+		print_all(cxt, types, show_labels);
+		goto done;
+	}
+
+	/* Non-root users are allowed to use -t to print_all(),
+	   but not to mount */
+	if (mnt_context_is_restricted(cxt) && types)
+		exit_non_root("types");
+
+	if (oper && (types || all || mnt_context_get_source(cxt))) {
+		warnx(_("bad usage"));
+		errtryhelp(MNT_EX_USAGE);
+	}
+
+	if (types && (all || strchr(types, ',') ||
+			     strncmp(types, "no", 2) == 0))
+		mnt_context_set_fstype_pattern(cxt, types);
+	else if (types)
+		mnt_context_set_fstype(cxt, types);
+
+	if (all) {
+		/*
+		 * A) Mount all
+		 */
+		rc = mount_all(cxt);
+		goto done;
+
+	} else if (argc == 0 && (mnt_context_get_source(cxt) ||
+				 mnt_context_get_target(cxt))) {
+		/*
+		 * B) mount -L|-U|--source|--target
+		 *
+		 * non-root may specify source *or* target, but not both
+		 */
+		if (mnt_context_is_restricted(cxt) &&
+		    mnt_context_get_source(cxt) &&
+		    mnt_context_get_target(cxt))
+			exit_non_root(NULL);
+
+	} else if (argc == 1 && (!mnt_context_get_source(cxt) ||
+				 !mnt_context_get_target(cxt))) {
+		/*
+		 * C) mount [-L|-U|--source] <target>
+		 *    mount [--target <dir>] <source>
+		 *    mount <source|target>
+		 *
+		 * non-root may specify source *or* target, but not both
+		 *
+		 * It does not matter for libmount if we set source or target
+		 * here (the library is able to swap it), but it matters for
+		 * sanitize_paths().
+		 */
+		int istag = mnt_tag_is_valid(argv[0]);
+
+		if (istag && mnt_context_get_source(cxt))
+			/* -L, -U or --source together with LABEL= or UUID= */
+			errx(MNT_EX_USAGE, _("source specified more than once"));
+		else if (istag || mnt_context_get_target(cxt))
+			mnt_context_set_source(cxt, argv[0]);
+		else
+			mnt_context_set_target(cxt, argv[0]);
+
+		if (mnt_context_is_restricted(cxt) &&
+		    mnt_context_get_source(cxt) &&
+		    mnt_context_get_target(cxt))
+			exit_non_root(NULL);
+
+	} else if (argc == 2 && !mnt_context_get_source(cxt)
+			     && !mnt_context_get_target(cxt)) {
+		/*
+		 * D) mount <source> <target>
+		 */
+		if (mnt_context_is_restricted(cxt))
+			exit_non_root(NULL);
+
+		mnt_context_set_source(cxt, argv[0]);
+		mnt_context_set_target(cxt, argv[1]);
+
+	} else {
+		warnx(_("bad usage"));
+		errtryhelp(MNT_EX_USAGE);
+	}
+
+	if (mnt_context_is_restricted(cxt))
+		sanitize_paths(cxt);
+
+	if (is_move)
+		/* "move" as option string is not supported by libmount */
+		mnt_context_set_mflags(cxt, MS_MOVE);
+
+	if ((oper && !has_remount_flag(cxt)) || propa)
+		/* For --make-* or --bind is fstab/mtab unnecessary */
+		mnt_context_set_optsmode(cxt, MNT_OMODE_NOTAB);
+
+	rc = mnt_context_mount(cxt);
+	rc = mk_exit_code(cxt, rc);
+
+	if (rc == MNT_EX_SUCCESS && mnt_context_is_verbose(cxt))
+		success_message(cxt);
+done:
+	mnt_free_context(cxt);
+	return rc;
+}
+
diff --git a/sys-utils/mountpoint.1 b/sys-utils/mountpoint.1
new file mode 100644
index 0000000..afc469e
--- /dev/null
+++ b/sys-utils/mountpoint.1
@@ -0,0 +1,58 @@
+.TH MOUNTPOINT 1 "July 2014" "util-linux" "User Commands"
+.SH NAME
+mountpoint \- see if a directory or file is a mountpoint
+.SH SYNOPSIS
+.B mountpoint
+.RB [ \-d | \-q ]
+.I directory
+|
+.I file
+.sp
+.B mountpoint
+.B \-x
+.I device
+
+.SH DESCRIPTION
+.B mountpoint
+checks whether the given
+.I directory
+or
+.I file
+is mentioned in the /proc/self/mountinfo file.
+.SH OPTIONS
+.TP
+.BR \-d , " \-\-fs\-devno"
+Show the major/minor numbers of the device that is mounted on the given
+directory.
+.TP
+.BR \-q , " \-\-quiet"
+Be quiet - don't print anything.
+.TP
+.BR \-x , " \-\-devno"
+Show the major/minor numbers of the given blockdevice on standard output.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH EXIT STATUS
+Zero if the directory or file is a mountpoint, non-zero if not.
+.SH AUTHOR
+.PP
+Karel Zak <kzak@redhat.com>
+.SH ENVIRONMENT
+.IP LIBMOUNT_DEBUG=all
+enables libmount debug output.
+.SH NOTES
+.PP
+The util-linux
+.B mountpoint
+implementation was written from scratch for libmount.  The original version
+for sysvinit suite was written by Miquel van Smoorenburg.
+
+.SH SEE ALSO
+.BR mount (8)
+.SH AVAILABILITY
+The mountpoint command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/mountpoint.c b/sys-utils/mountpoint.c
new file mode 100644
index 0000000..00a74da
--- /dev/null
+++ b/sys-utils/mountpoint.c
@@ -0,0 +1,203 @@
+/*
+ * mountpoint(1) - see if a directory is a mountpoint
+ *
+ * This is libmount based reimplementation of the mountpoint(1)
+ * from sysvinit project.
+ *
+ *
+ * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
+ * Written by Karel Zak <kzak@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <libmount.h>
+
+#include "nls.h"
+#include "xalloc.h"
+#include "c.h"
+#include "closestream.h"
+#include "pathnames.h"
+
+struct mountpoint_control {
+	char *path;
+	dev_t dev;
+	struct stat st;
+	unsigned int
+		dev_devno:1,
+		fs_devno:1,
+		quiet:1;
+};
+
+static int dir_to_device(struct mountpoint_control *ctl)
+{
+	struct libmnt_table *tb = mnt_new_table_from_file(_PATH_PROC_MOUNTINFO);
+	struct libmnt_fs *fs;
+	struct libmnt_cache *cache;
+	int rc = -1;
+
+	if (!tb) {
+		/*
+		 * Fallback. Traditional way to detect mountpoints. This way
+		 * is independent on /proc, but not able to detect bind mounts.
+		 */
+		struct stat pst;
+		char buf[PATH_MAX], *cn;
+		int len;
+
+		cn = mnt_resolve_path(ctl->path, NULL);	/* canonicalize */
+
+		len = snprintf(buf, sizeof(buf), "%s/..", cn ? cn : ctl->path);
+		free(cn);
+
+		if (len < 0 || (size_t) len >= sizeof(buf))
+			return -1;
+		if (stat(buf, &pst) !=0)
+			return -1;
+
+		if (ctl->st.st_dev != pst.st_dev || ctl->st.st_ino == pst.st_ino) {
+			ctl->dev = ctl->st.st_dev;
+			return 0;
+		}
+
+		return -1;
+	}
+
+	/* to canonicalize all necessary paths */
+	cache = mnt_new_cache();
+	mnt_table_set_cache(tb, cache);
+	mnt_unref_cache(cache);
+
+	fs = mnt_table_find_target(tb, ctl->path, MNT_ITER_BACKWARD);
+	if (fs && mnt_fs_get_target(fs)) {
+		ctl->dev = mnt_fs_get_devno(fs);
+		rc = 0;
+	}
+
+	mnt_unref_table(tb);
+	return rc;
+}
+
+static int print_devno(const struct mountpoint_control *ctl)
+{
+	if (!S_ISBLK(ctl->st.st_mode)) {
+		if (!ctl->quiet)
+			warnx(_("%s: not a block device"), ctl->path);
+		return -1;
+	}
+	printf("%u:%u\n", major(ctl->st.st_rdev), minor(ctl->st.st_rdev));
+	return 0;
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	fputs(USAGE_HEADER, out);
+	fprintf(out,
+	      _(" %1$s [-qd] /path/to/directory\n"
+		" %1$s -x /dev/device\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Check whether a directory or file is a mountpoint.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -q, --quiet        quiet mode - don't print anything\n"
+		" -d, --fs-devno     print maj:min device number of the filesystem\n"
+		" -x, --devno        print maj:min device number of the block device\n"), out);
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(20));
+	printf(USAGE_MAN_TAIL("mountpoint(1)"));
+
+	exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+	int c;
+	struct mountpoint_control ctl = { NULL };
+
+	static const struct option longopts[] = {
+		{ "quiet",    no_argument, NULL, 'q' },
+		{ "fs-devno", no_argument, NULL, 'd' },
+		{ "devno",    no_argument, NULL, 'x' },
+		{ "help",     no_argument, NULL, 'h' },
+		{ "version",  no_argument, NULL, 'V' },
+		{ NULL, 0, NULL, 0 }
+	};
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	mnt_init_debug(0);
+
+	while ((c = getopt_long(argc, argv, "qdxhV", longopts, NULL)) != -1) {
+
+		switch(c) {
+		case 'q':
+			ctl.quiet = 1;
+			break;
+		case 'd':
+			ctl.fs_devno = 1;
+			break;
+		case 'x':
+			ctl.dev_devno = 1;
+			break;
+		case 'h':
+			usage();
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if (optind + 1 != argc) {
+		warnx(_("bad usage"));
+		errtryhelp(EXIT_FAILURE);
+	}
+
+	ctl.path = argv[optind];
+
+	if (stat(ctl.path, &ctl.st)) {
+		if (!ctl.quiet)
+			err(EXIT_FAILURE, "%s", ctl.path);
+		return EXIT_FAILURE;
+	}
+	if (ctl.dev_devno)
+		return print_devno(&ctl) ? EXIT_FAILURE : EXIT_SUCCESS;
+	if (dir_to_device(&ctl)) {
+		if (!ctl.quiet)
+			printf(_("%s is not a mountpoint\n"), ctl.path);
+		return EXIT_FAILURE;
+	}
+	if (ctl.fs_devno)
+		printf("%u:%u\n", major(ctl.dev), minor(ctl.dev));
+	else if (!ctl.quiet)
+		printf(_("%s is a mountpoint\n"), ctl.path);
+	return EXIT_SUCCESS;
+}
diff --git a/sys-utils/nsenter.1 b/sys-utils/nsenter.1
new file mode 100644
index 0000000..aacae53
--- /dev/null
+++ b/sys-utils/nsenter.1
@@ -0,0 +1,269 @@
+.TH NSENTER 1 "June 2013" "util-linux" "User Commands"
+.SH NAME
+nsenter \- run program with namespaces of other processes
+.SH SYNOPSIS
+.B nsenter
+[options]
+.RI [ program
+.RI [ arguments ]]
+.SH DESCRIPTION
+Enters the namespaces of one or more other processes and then executes the specified
+\fIprogram\fP. If \fIprogram\fP is not given, then ``${SHELL}'' is run (default: /bin\:/sh).
+.PP
+Enterable namespaces are:
+.TP
+.B mount namespace
+Mounting and unmounting filesystems will not affect the rest of the system,
+except for filesystems which are explicitly marked as shared (with
+\fBmount --make-\:shared\fP; see \fI/proc\:/self\:/mountinfo\fP for the
+\fBshared\fP flag).
+For further details, see
+.BR mount_namespaces (7)
+and the discussion of the
+.B CLONE_NEWNS
+flag in
+.BR clone (2).
+.TP
+.B UTS namespace
+Setting hostname or domainname will not affect the rest of the system.
+For further details, see
+.BR namespaces (7)
+and the discussion of the
+.B CLONE_NEWUTS
+flag in
+.BR clone (2).
+.TP
+.B IPC namespace
+The process will have an independent namespace for POSIX message queues
+as well as System V message queues,
+semaphore sets and shared memory segments.
+For further details, see
+.BR namespaces (7)
+and the discussion of the
+.B CLONE_NEWIPC
+flag in
+.BR clone (2).
+.TP
+.B network namespace
+The process will have independent IPv4 and IPv6 stacks, IP routing tables,
+firewall rules, the
+.I /proc\:/net
+and
+.I /sys\:/class\:/net
+directory trees, sockets, etc.
+For further details, see
+.BR namespaces (7)
+and the discussion of the
+.B CLONE_NEWNET
+flag in
+.BR clone (2).
+.TP
+.B PID namespace
+Children will have a set of PID to process mappings separate from the
+.B nsenter
+process
+For further details, see
+.BR pid_namespaces (7)
+and
+the discussion of the
+.B CLONE_NEWPID
+flag in
+.B nsenter
+will fork by default if changing the PID namespace, so that the new program
+and its children share the same PID namespace and are visible to each other.
+If \fB\-\-no\-fork\fP is used, the new program will be exec'ed without forking.
+.TP
+.B user namespace
+The process will have a distinct set of UIDs, GIDs and capabilities.
+For further details, see
+.BR user_namespaces (7)
+and the discussion of the
+.B CLONE_NEWUSER
+flag in
+.BR clone (2).
+.TP
+.B cgroup namespace
+The process will have a virtualized view of \fI/proc\:/self\:/cgroup\fP, and new
+cgroup mounts will be rooted at the namespace cgroup root.
+For further details, see
+.BR cgroup_namespaces (7)
+and the discussion of the
+.B CLONE_NEWCGROUP
+flag in
+.BR clone (2).
+.TP
+See \fBclone\fP(2) for the exact semantics of the flags.
+.SH OPTIONS
+Various of the options below that relate to namespaces take an optional
+.I file
+argument.
+This should be one of the
+.I /proc/[pid]/ns/*
+files described in
+.BR namespaces (7).
+.TP
+\fB\-a\fR, \fB\-\-all\fR
+Enter all namespaces of the target process by the default
+.I /proc/[pid]/ns/*
+namespace paths. The default paths to the target process namespaces may be
+overwritten by namespace specific options (e.g. --all --mount=[path]).
+
+The user namespace will be ignored if the same as the caller's current user
+namespace. It prevents a caller that has dropped capabilities from regaining
+those capabilities via a call to setns().  See
+.BR setns (2)
+for more details.
+.TP
+\fB\-t\fR, \fB\-\-target\fR \fIpid\fP
+Specify a target process to get contexts from.  The paths to the contexts
+specified by
+.I pid
+are:
+.RS
+.PD 0
+.IP "" 20
+.TP
+/proc/\fIpid\fR/ns/mnt
+the mount namespace
+.TP
+/proc/\fIpid\fR/ns/uts
+the UTS namespace
+.TP
+/proc/\fIpid\fR/ns/ipc
+the IPC namespace
+.TP
+/proc/\fIpid\fR/ns/net
+the network namespace
+.TP
+/proc/\fIpid\fR/ns/pid
+the PID namespace
+.TP
+/proc/\fIpid\fR/ns/user
+the user namespace
+.TP
+/proc/\fIpid\fR/ns/cgroup
+the cgroup namespace
+.TP
+/proc/\fIpid\fR/root
+the root directory
+.TP
+/proc/\fIpid\fR/cwd
+the working directory respectively
+.PD
+.RE
+.TP
+\fB\-m\fR, \fB\-\-mount\fR[=\fIfile\fR]
+Enter the mount namespace.  If no file is specified, enter the mount namespace
+of the target process.
+If
+.I file
+is specified, enter the mount namespace
+specified by
+.IR file .
+.TP
+\fB\-u\fR, \fB\-\-uts\fR[=\fIfile\fR]
+Enter the UTS namespace.  If no file is specified, enter the UTS namespace of
+the target process.
+If
+.I file
+is specified, enter the UTS namespace specified by
+.IR file .
+.TP
+\fB\-i\fR, \fB\-\-ipc\fR[=\fIfile\fR]
+Enter the IPC namespace.  If no file is specified, enter the IPC namespace of
+the target process.
+If
+.I file
+is specified, enter the IPC namespace specified by
+.IR file .
+.TP
+\fB\-n\fR, \fB\-\-net\fR[=\fIfile\fR]
+Enter the network namespace.  If no file is specified, enter the network
+namespace of the target process.
+If
+.I file
+is specified, enter the network namespace specified by
+.IR file .
+.TP
+\fB\-p\fR, \fB\-\-pid\fR[=\fIfile\fR]
+Enter the PID namespace.  If no file is specified, enter the PID namespace of
+the target process.
+If
+.I file
+is specified, enter the PID namespace specified by
+.IR file .
+.TP
+\fB\-U\fR, \fB\-\-user\fR[=\fIfile\fR]
+Enter the user namespace.  If no file is specified, enter the user namespace of
+the target process.
+If
+.I file
+is specified, enter the user namespace specified by
+.IR file .
+See also the \fB\-\-setuid\fR and \fB\-\-setgid\fR options.
+.TP
+\fB\-C\fR, \fB\-\-cgroup\fR[=\fIfile\fR]
+Enter the cgroup namespace.  If no file is specified, enter the cgroup namespace of
+the target process.
+If
+.I file
+is specified, enter the cgroup namespace specified by
+.IR file .
+.TP
+\fB\-G\fR, \fB\-\-setgid\fR \fIgid\fR
+Set the group ID which will be used in the entered namespace and drop
+supplementary groups.
+.BR nsenter (1)
+always sets GID for user namespaces, the default is 0.
+.TP
+\fB\-S\fR, \fB\-\-setuid\fR \fIuid\fR
+Set the user ID which will be used in the entered namespace.
+.BR nsenter (1)
+always sets UID for user namespaces, the default is 0.
+.TP
+\fB\-\-preserve\-credentials\fR
+Don't modify UID and GID when enter user namespace. The default is to
+drops supplementary groups and sets GID and UID to 0.
+.TP
+\fB\-r\fR, \fB\-\-root\fR[=\fIdirectory\fR]
+Set the root directory.  If no directory is specified, set the root directory to
+the root directory of the target process.  If directory is specified, set the
+root directory to the specified directory.
+.TP
+\fB\-w\fR, \fB\-\-wd\fR[=\fIdirectory\fR]
+Set the working directory.  If no directory is specified, set the working
+directory to the working directory of the target process.  If directory is
+specified, set the working directory to the specified directory.
+.TP
+\fB\-F\fR, \fB\-\-no\-fork\fR
+Do not fork before exec'ing the specified program.  By default, when entering a
+PID namespace, \fBnsenter\fP calls \fBfork\fP before calling \fBexec\fP so that
+any children will also be in the newly entered PID namespace.
+.TP
+\fB\-Z\fR, \fB\-\-follow\-context\fR
+Set the SELinux security context used for executing a new process according to
+already running process specified by \fB\-\-target\fR PID. (The util-linux has
+to be compiled with SELinux support otherwise the option is unavailable.)
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+Display version information and exit.
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+Display help text and exit.
+.SH SEE ALSO
+.BR clone (2),
+.BR setns (2),
+.BR namespaces (7)
+.SH AUTHORS
+.UR biederm@xmission.com
+Eric Biederman
+.UE
+.br
+.UR kzak@redhat.com
+Karel Zak
+.UE
+.SH AVAILABILITY
+The nsenter command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/nsenter.c b/sys-utils/nsenter.c
new file mode 100644
index 0000000..fbfcf98
--- /dev/null
+++ b/sys-utils/nsenter.c
@@ -0,0 +1,484 @@
+/*
+ * nsenter(1) - command-line interface for setns(2)
+ *
+ * Copyright (C) 2012-2013 Eric Biederman <ebiederm@xmission.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <getopt.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <grp.h>
+#include <sys/stat.h>
+
+#ifdef HAVE_LIBSELINUX
+# include <selinux/selinux.h>
+#endif
+
+#include "strutils.h"
+#include "nls.h"
+#include "c.h"
+#include "closestream.h"
+#include "namespace.h"
+#include "exec_shell.h"
+
+static struct namespace_file {
+	int nstype;
+	const char *name;
+	int fd;
+} namespace_files[] = {
+	/* Careful the order is significant in this array.
+	 *
+	 * The user namespace comes either first or last: first if
+	 * you're using it to increase your privilege and last if
+	 * you're using it to decrease.  We enter the namespaces in
+	 * two passes starting initially from offset 1 and then offset
+	 * 0 if that fails.
+	 */
+	{ .nstype = CLONE_NEWUSER,  .name = "ns/user", .fd = -1 },
+	{ .nstype = CLONE_NEWCGROUP,.name = "ns/cgroup", .fd = -1 },
+	{ .nstype = CLONE_NEWIPC,   .name = "ns/ipc",  .fd = -1 },
+	{ .nstype = CLONE_NEWUTS,   .name = "ns/uts",  .fd = -1 },
+	{ .nstype = CLONE_NEWNET,   .name = "ns/net",  .fd = -1 },
+	{ .nstype = CLONE_NEWPID,   .name = "ns/pid",  .fd = -1 },
+	{ .nstype = CLONE_NEWNS,    .name = "ns/mnt",  .fd = -1 },
+	{ .nstype = 0, .name = NULL, .fd = -1 }
+};
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+
+	fputs(USAGE_HEADER, out);
+	fprintf(out, _(" %s [options] [<program> [<argument>...]]\n"),
+		program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Run a program with namespaces of other processes.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -a, --all              enter all namespaces\n"), out);
+	fputs(_(" -t, --target <pid>     target process to get namespaces from\n"), out);
+	fputs(_(" -m, --mount[=<file>]   enter mount namespace\n"), out);
+	fputs(_(" -u, --uts[=<file>]     enter UTS namespace (hostname etc)\n"), out);
+	fputs(_(" -i, --ipc[=<file>]     enter System V IPC namespace\n"), out);
+	fputs(_(" -n, --net[=<file>]     enter network namespace\n"), out);
+	fputs(_(" -p, --pid[=<file>]     enter pid namespace\n"), out);
+	fputs(_(" -C, --cgroup[=<file>]  enter cgroup namespace\n"), out);
+	fputs(_(" -U, --user[=<file>]    enter user namespace\n"), out);
+	fputs(_(" -S, --setuid <uid>     set uid in entered namespace\n"), out);
+	fputs(_(" -G, --setgid <gid>     set gid in entered namespace\n"), out);
+	fputs(_("     --preserve-credentials do not touch uids or gids\n"), out);
+	fputs(_(" -r, --root[=<dir>]     set the root directory\n"), out);
+	fputs(_(" -w, --wd[=<dir>]       set the working directory\n"), out);
+	fputs(_(" -F, --no-fork          do not fork before exec'ing <program>\n"), out);
+#ifdef HAVE_LIBSELINUX
+	fputs(_(" -Z, --follow-context   set SELinux context according to --target PID\n"), out);
+#endif
+
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(24));
+	printf(USAGE_MAN_TAIL("nsenter(1)"));
+
+	exit(EXIT_SUCCESS);
+}
+
+static pid_t namespace_target_pid = 0;
+static int root_fd = -1;
+static int wd_fd = -1;
+
+static void open_target_fd(int *fd, const char *type, const char *path)
+{
+	char pathbuf[PATH_MAX];
+
+	if (!path && namespace_target_pid) {
+		snprintf(pathbuf, sizeof(pathbuf), "/proc/%u/%s",
+			 namespace_target_pid, type);
+		path = pathbuf;
+	}
+	if (!path)
+		errx(EXIT_FAILURE,
+		     _("neither filename nor target pid supplied for %s"),
+		     type);
+
+	if (*fd >= 0)
+		close(*fd);
+
+	*fd = open(path, O_RDONLY);
+	if (*fd < 0)
+		err(EXIT_FAILURE, _("cannot open %s"), path);
+}
+
+static void open_namespace_fd(int nstype, const char *path)
+{
+	struct namespace_file *nsfile;
+
+	for (nsfile = namespace_files; nsfile->nstype; nsfile++) {
+		if (nstype != nsfile->nstype)
+			continue;
+
+		open_target_fd(&nsfile->fd, nsfile->name, path);
+		return;
+	}
+	/* This should never happen */
+	assert(nsfile->nstype);
+}
+
+static int get_ns_ino(const char *path, ino_t *ino)
+{
+	struct stat st;
+
+	if (stat(path, &st) != 0)
+		return -errno;
+	*ino = st.st_ino;
+	return 0;
+}
+
+static int is_same_namespace(pid_t a, pid_t b, const char *type)
+{
+	char path[PATH_MAX];
+	ino_t a_ino = 0, b_ino = 0;
+
+	snprintf(path, sizeof(path), "/proc/%u/%s", a, type);
+	if (get_ns_ino(path, &a_ino) != 0)
+		err(EXIT_FAILURE, _("stat of %s failed"), path);
+
+	snprintf(path, sizeof(path), "/proc/%u/%s", b, type);
+	if (get_ns_ino(path, &b_ino) != 0)
+		err(EXIT_FAILURE, _("stat of %s failed"), path);
+
+	return a_ino == b_ino;
+}
+
+static void continue_as_child(void)
+{
+	pid_t child = fork();
+	int status;
+	pid_t ret;
+
+	if (child < 0)
+		err(EXIT_FAILURE, _("fork failed"));
+
+	/* Only the child returns */
+	if (child == 0)
+		return;
+
+	for (;;) {
+		ret = waitpid(child, &status, WUNTRACED);
+		if ((ret == child) && (WIFSTOPPED(status))) {
+			/* The child suspended so suspend us as well */
+			kill(getpid(), SIGSTOP);
+			kill(child, SIGCONT);
+		} else {
+			break;
+		}
+	}
+	/* Return the child's exit code if possible */
+	if (WIFEXITED(status)) {
+		exit(WEXITSTATUS(status));
+	} else if (WIFSIGNALED(status)) {
+		kill(getpid(), WTERMSIG(status));
+	}
+	exit(EXIT_FAILURE);
+}
+
+int main(int argc, char *argv[])
+{
+	enum {
+		OPT_PRESERVE_CRED = CHAR_MAX + 1
+	};
+	static const struct option longopts[] = {
+		{ "all", no_argument, NULL, 'a' },
+		{ "help", no_argument, NULL, 'h' },
+		{ "version", no_argument, NULL, 'V'},
+		{ "target", required_argument, NULL, 't' },
+		{ "mount", optional_argument, NULL, 'm' },
+		{ "uts", optional_argument, NULL, 'u' },
+		{ "ipc", optional_argument, NULL, 'i' },
+		{ "net", optional_argument, NULL, 'n' },
+		{ "pid", optional_argument, NULL, 'p' },
+		{ "user", optional_argument, NULL, 'U' },
+		{ "cgroup", optional_argument, NULL, 'C' },
+		{ "setuid", required_argument, NULL, 'S' },
+		{ "setgid", required_argument, NULL, 'G' },
+		{ "root", optional_argument, NULL, 'r' },
+		{ "wd", optional_argument, NULL, 'w' },
+		{ "no-fork", no_argument, NULL, 'F' },
+		{ "preserve-credentials", no_argument, NULL, OPT_PRESERVE_CRED },
+#ifdef HAVE_LIBSELINUX
+		{ "follow-context", no_argument, NULL, 'Z' },
+#endif
+		{ NULL, 0, NULL, 0 }
+	};
+
+	struct namespace_file *nsfile;
+	int c, pass, namespaces = 0, setgroups_nerrs = 0, preserve_cred = 0;
+	bool do_rd = false, do_wd = false, force_uid = false, force_gid = false;
+	bool do_all = false;
+	int do_fork = -1; /* unknown yet */
+	uid_t uid = 0;
+	gid_t gid = 0;
+#ifdef HAVE_LIBSELINUX
+	bool selinux = 0;
+#endif
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	while ((c =
+		getopt_long(argc, argv, "+ahVt:m::u::i::n::p::C::U::S:G:r::w::FZ",
+			    longopts, NULL)) != -1) {
+		switch (c) {
+		case 'h':
+			usage();
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		case 'a':
+			do_all = true;
+			break;
+		case 't':
+			namespace_target_pid =
+			    strtoul_or_err(optarg, _("failed to parse pid"));
+			break;
+		case 'm':
+			if (optarg)
+				open_namespace_fd(CLONE_NEWNS, optarg);
+			else
+				namespaces |= CLONE_NEWNS;
+			break;
+		case 'u':
+			if (optarg)
+				open_namespace_fd(CLONE_NEWUTS, optarg);
+			else
+				namespaces |= CLONE_NEWUTS;
+			break;
+		case 'i':
+			if (optarg)
+				open_namespace_fd(CLONE_NEWIPC, optarg);
+			else
+				namespaces |= CLONE_NEWIPC;
+			break;
+		case 'n':
+			if (optarg)
+				open_namespace_fd(CLONE_NEWNET, optarg);
+			else
+				namespaces |= CLONE_NEWNET;
+			break;
+		case 'p':
+			if (optarg)
+				open_namespace_fd(CLONE_NEWPID, optarg);
+			else
+				namespaces |= CLONE_NEWPID;
+			break;
+		case 'C':
+			if (optarg)
+				open_namespace_fd(CLONE_NEWCGROUP, optarg);
+			else
+				namespaces |= CLONE_NEWCGROUP;
+			break;
+		case 'U':
+			if (optarg)
+				open_namespace_fd(CLONE_NEWUSER, optarg);
+			else
+				namespaces |= CLONE_NEWUSER;
+			break;
+		case 'S':
+			uid = strtoul_or_err(optarg, _("failed to parse uid"));
+			force_uid = true;
+			break;
+		case 'G':
+			gid = strtoul_or_err(optarg, _("failed to parse gid"));
+			force_gid = true;
+			break;
+		case 'F':
+			do_fork = 0;
+			break;
+		case 'r':
+			if (optarg)
+				open_target_fd(&root_fd, "root", optarg);
+			else
+				do_rd = true;
+			break;
+		case 'w':
+			if (optarg)
+				open_target_fd(&wd_fd, "cwd", optarg);
+			else
+				do_wd = true;
+			break;
+		case OPT_PRESERVE_CRED:
+			preserve_cred = 1;
+			break;
+#ifdef HAVE_LIBSELINUX
+		case 'Z':
+			selinux = 1;
+			break;
+#endif
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+#ifdef HAVE_LIBSELINUX
+	if (selinux && is_selinux_enabled() > 0) {
+		char *scon = NULL;
+
+		if (!namespace_target_pid)
+			errx(EXIT_FAILURE, _("no target PID specified for --follow-context"));
+		if (getpidcon(namespace_target_pid, &scon) < 0)
+			errx(EXIT_FAILURE, _("failed to get %d SELinux context"),
+					(int) namespace_target_pid);
+		if (setexeccon(scon) < 0)
+			errx(EXIT_FAILURE, _("failed to set exec context to '%s'"), scon);
+		freecon(scon);
+	}
+#endif
+
+	if (do_all) {
+		if (!namespace_target_pid)
+			errx(EXIT_FAILURE, _("no target PID specified for --all"));
+		for (nsfile = namespace_files; nsfile->nstype; nsfile++) {
+			if (nsfile->fd >= 0)
+				continue;	/* namespace already specified */
+
+			/* It is not permitted to use setns(2) to reenter the caller's
+			 * current user namespace; see setns(2) man page for more details.
+			 */
+			if (nsfile->nstype & CLONE_NEWUSER
+			    && is_same_namespace(getpid(), namespace_target_pid, nsfile->name))
+				continue;
+
+			namespaces |= nsfile->nstype;
+		}
+	}
+
+	/*
+	 * Open remaining namespace and directory descriptors.
+	 */
+	for (nsfile = namespace_files; nsfile->nstype; nsfile++)
+		if (nsfile->nstype & namespaces)
+			open_namespace_fd(nsfile->nstype, NULL);
+	if (do_rd)
+		open_target_fd(&root_fd, "root", NULL);
+	if (do_wd)
+		open_target_fd(&wd_fd, "cwd", NULL);
+
+	/*
+	 * Update namespaces variable to contain all requested namespaces
+	 */
+	for (nsfile = namespace_files; nsfile->nstype; nsfile++) {
+		if (nsfile->fd < 0)
+			continue;
+		namespaces |= nsfile->nstype;
+	}
+
+	/* for user namespaces we always set UID and GID (default is 0)
+	 * and clear root's groups if --preserve-credentials is no specified */
+	if ((namespaces & CLONE_NEWUSER) && !preserve_cred) {
+		force_uid = true, force_gid = true;
+
+		/* We call setgroups() before and after we enter user namespace,
+		 * let's complain only if both fail */
+		if (setgroups(0, NULL) != 0)
+			setgroups_nerrs++;
+	}
+
+	/*
+	 * Now that we know which namespaces we want to enter, enter
+	 * them.  Do this in two passes, not entering the user
+	 * namespace on the first pass.  So if we're deprivileging the
+	 * container we'll enter the user namespace last and if we're
+	 * privileging it then we enter the user namespace first
+	 * (because the initial setns will fail).
+	 */
+	for (pass = 0; pass < 2; pass ++) {
+		for (nsfile = namespace_files + 1 - pass; nsfile->nstype; nsfile++) {
+			if (nsfile->fd < 0)
+				continue;
+			if (nsfile->nstype == CLONE_NEWPID && do_fork == -1)
+				do_fork = 1;
+			if (setns(nsfile->fd, nsfile->nstype)) {
+				if (pass != 0)
+					err(EXIT_FAILURE,
+					    _("reassociate to namespace '%s' failed"),
+					    nsfile->name);
+				else
+					continue;
+			}
+
+			close(nsfile->fd);
+			nsfile->fd = -1;
+		}
+	}
+
+	/* Remember the current working directory if I'm not changing it */
+	if (root_fd >= 0 && wd_fd < 0) {
+		wd_fd = open(".", O_RDONLY);
+		if (wd_fd < 0)
+			err(EXIT_FAILURE,
+			    _("cannot open current working directory"));
+	}
+
+	/* Change the root directory */
+	if (root_fd >= 0) {
+		if (fchdir(root_fd) < 0)
+			err(EXIT_FAILURE,
+			    _("change directory by root file descriptor failed"));
+
+		if (chroot(".") < 0)
+			err(EXIT_FAILURE, _("chroot failed"));
+
+		close(root_fd);
+		root_fd = -1;
+	}
+
+	/* Change the working directory */
+	if (wd_fd >= 0) {
+		if (fchdir(wd_fd) < 0)
+			err(EXIT_FAILURE,
+			    _("change directory by working directory file descriptor failed"));
+
+		close(wd_fd);
+		wd_fd = -1;
+	}
+
+	if (do_fork == 1)
+		continue_as_child();
+
+	if (force_uid || force_gid) {
+		if (force_gid && setgroups(0, NULL) != 0 && setgroups_nerrs)	/* drop supplementary groups */
+			err(EXIT_FAILURE, _("setgroups failed"));
+		if (force_gid && setgid(gid) < 0)		/* change GID */
+			err(EXIT_FAILURE, _("setgid failed"));
+		if (force_uid && setuid(uid) < 0)		/* change UID */
+			err(EXIT_FAILURE, _("setuid failed"));
+	}
+
+	if (optind < argc) {
+		execvp(argv[optind], argv + optind);
+		errexec(argv[optind]);
+	}
+	exec_shell();
+}
diff --git a/sys-utils/pivot_root.8 b/sys-utils/pivot_root.8
new file mode 100644
index 0000000..febedd0
--- /dev/null
+++ b/sys-utils/pivot_root.8
@@ -0,0 +1,75 @@
+.TH PIVOT_ROOT 8 "August 2011" "util-linux" "System Administration"
+.SH NAME
+pivot_root \- change the root filesystem
+.SH SYNOPSIS
+.B pivot_root
+.I new_root put_old
+.SH DESCRIPTION
+\fBpivot_root\fP moves the root file system of the current process to the
+directory \fIput_old\fP and makes \fInew_root\fP the new root file system.
+Since \fBpivot_root\fP(8) simply calls \fBpivot_root\fP(2), we refer to
+the man page of the latter for further details.
+
+Note that, depending on the implementation of \fBpivot_root\fP, root and
+cwd of the caller may or may not change. The following is a sequence for
+invoking \fBpivot_root\fP that works in either case, assuming that
+\fBpivot_root\fP and \fBchroot\fP are in the current \fBPATH\fP:
+.sp
+cd \fInew_root\fP
+.br
+pivot_root . \fIput_old\fP
+.br
+exec chroot . \fIcommand\fP
+.sp
+Note that \fBchroot\fP must be available under the old root and under the new
+root, because \fBpivot_root\fP may or may not have implicitly changed the
+root directory of the shell.
+
+Note that \fBexec chroot\fP changes the running executable, which is
+necessary if the old root directory should be unmounted afterwards.
+Also note that standard input, output, and error may still point to a
+device on the old root file system, keeping it busy. They can easily be
+changed when invoking \fBchroot\fP (see below; note the absence of
+leading slashes to make it work whether \fBpivot_root\fP has changed the
+shell's root or not).
+.SH OPTIONS
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+Display version information and exit.
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+Display help text and exit.
+.SH EXAMPLES
+Change the root file system to /dev/hda1 from an interactive shell:
+.sp
+.nf
+mount /dev/hda1 /new-root
+cd /new-root
+pivot_root . old-root
+exec chroot . sh <dev/console >dev/console 2>&1
+umount /old-root
+.fi
+.sp
+Mount the new root file system over NFS from 10.0.0.1:/my_root and run
+\fBinit\fP:
+.sp
+.nf
+ifconfig lo 127.0.0.1 up   # for portmap
+# configure Ethernet or such
+portmap   # for lockd (implicitly started by mount)
+mount -o ro 10.0.0.1:/my_root /mnt
+killall portmap   # portmap keeps old root busy
+cd /mnt
+pivot_root . old_root
+exec chroot . sh -c 'umount /old_root; exec /sbin/init' \\
+  <dev/console >dev/console 2>&1
+.fi
+.SH "SEE ALSO"
+.BR chroot (1),
+.BR pivot_root (2),
+.BR mount (8),
+.BR switch_root (8),
+.BR umount (8)
+.SH AVAILABILITY
+The pivot_root command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/pivot_root.c b/sys-utils/pivot_root.c
new file mode 100644
index 0000000..ea76d94
--- /dev/null
+++ b/sys-utils/pivot_root.c
@@ -0,0 +1,80 @@
+/*
+ * pivot_root.c - Change the root file system
+ *
+ * Copyright (C) 2000 Werner Almesberger
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#include <err.h>
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "c.h"
+#include "nls.h"
+#include "closestream.h"
+
+#define pivot_root(new_root,put_old) syscall(SYS_pivot_root,new_root,put_old)
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	fputs(USAGE_HEADER, out);
+	fprintf(out, _(" %s [options] new_root put_old\n"),
+		program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Change the root filesystem.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	printf(USAGE_HELP_OPTIONS(16));
+	printf(USAGE_MAN_TAIL("pivot_root(8)"));
+	exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+	int ch;
+	static const struct option longopts[] = {
+		{"version", no_argument, NULL, 'V'},
+		{"help", no_argument, NULL, 'h'},
+		{NULL, 0, NULL, 0}
+	};
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	while ((ch = getopt_long(argc, argv, "Vh", longopts, NULL)) != -1)
+		switch (ch) {
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		case 'h':
+			usage();
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+
+	if (argc != 3) {
+		warnx(_("bad usage"));
+		errtryhelp(EXIT_FAILURE);
+	}
+	if (pivot_root(argv[1], argv[2]) < 0)
+		err(EXIT_FAILURE, _("failed to change root from `%s' to `%s'"),
+		    argv[1], argv[2]);
+
+	return EXIT_SUCCESS;
+}
diff --git a/sys-utils/prlimit.1 b/sys-utils/prlimit.1
new file mode 100644
index 0000000..1230b3a
--- /dev/null
+++ b/sys-utils/prlimit.1
@@ -0,0 +1,120 @@
+.\" prlimit.1 --
+.\" Copyright 2011 Davidlohr Bueso <dave@gnu.org>
+.\" May be distributed under the GNU General Public License
+
+.TH PRLIMIT 1 "July 2014" "util-linux" "User Commands"
+.SH NAME
+prlimit \- get and set process resource limits
+.SH SYNOPSIS
+.BR prlimit " [options]"
+.RB [ \-\-\fIresource\fR [ =\fIlimits\fR]
+.RB [ \-\-pid\ \fIPID\fR]
+
+.BR prlimit " [options]"
+.RB [ \-\-\fIresource\fR [ =\fIlimits\fR]
+.IR "command " [ argument ...]
+
+.SH DESCRIPTION
+Given a process ID and one or more resources, \fBprlimit\fP tries to retrieve
+and/or modify the limits.
+
+When \fIcommand\fR is given,
+.B prlimit
+will run this command with the given arguments.
+
+The \fIlimits\fP parameter is composed of a soft and a hard value, separated
+by a colon (:), in order to modify the existing values.  If no \fIlimits\fR are
+given, \fBprlimit\fP will display the current values.  If one of the values
+is not given, then the existing one will be used.  To specify the unlimited or
+infinity limit (RLIM_INFINITY), the -1 or 'unlimited' string can be passed.
+
+Because of the nature of limits, the soft limit must be lower or equal to the
+high limit (also called the ceiling).  To see all available resource limits,
+refer to the RESOURCE OPTIONS section.
+
+.IP "\fIsoft\fP:\fIhard\fP    Specify both limits."
+.IP "\fIsoft\fP:        Specify only the soft limit."
+.IP ":\fIhard\fP        Specify only the hard limit."
+.IP "\fIvalue\fP        Specify both limits to the same value."
+
+.SH GENERAL OPTIONS
+.IP "\fB\-h, \-\-help\fP"
+Display help text and exit.
+.IP "\fB\-\-noheadings\fP"
+Do not print a header line.
+.IP "\fB\-o, \-\-output \fIlist\fP"
+Define the output columns to use.  If no output arrangement is specified,
+then a default set is used.
+Use \fB\-\-help\fP to get a list of all supported columns.
+.IP "\fB\-p, \-\-pid\fP"
+Specify the process id; if none is given, the running process will be used.
+.IP "\fB\-\-raw\fP"
+Use the raw output format.
+.IP "\fB\-\-verbose\fP"
+Verbose mode.
+.IP "\fB\-V, \-\-version\fP"
+Display version information and exit.
+
+.SH RESOURCE OPTIONS
+.IP "\fB\-c, \-\-core\fP[=\fIlimits\fR]"
+Maximum size of a core file.
+.IP "\fB\-d, \-\-data\fP[=\fIlimits\fR]"
+Maximum data size.
+.IP "\fB\-e, \-\-nice\fP[=\fIlimits\fR]"
+Maximum nice priority allowed to raise.
+.IP "\fB\-f, \-\-fsize\fP[=\fIlimits\fR]"
+Maximum file size.
+.IP "\fB\-i, \-\-sigpending\fP[=\fIlimits\fR]"
+Maximum number of pending signals.
+.IP "\fB\-l, \-\-memlock\fP[=\fIlimits\fR]"
+Maximum locked-in-memory address space.
+.IP "\fB\-m, \-\-rss\fP[=\fIlimits\fR]"
+Maximum Resident Set Size (RSS).
+.IP "\fB\-n, \-\-nofile\fP[=\fIlimits\fR]"
+Maximum number of open files.
+.IP "\fB\-q, \-\-msgqueue\fP[=\fIlimits\fR]"
+Maximum number of bytes in POSIX message queues.
+.IP "\fB\-r, \-\-rtprio\fP[=\fIlimits\fR]"
+Maximum real-time priority.
+.IP "\fB\-s, \-\-stack\fP[=\fIlimits\fR]"
+Maximum size of the stack.
+.IP "\fB\-t, \-\-cpu\fP[=\fIlimits\fR]"
+CPU time, in seconds.
+.IP "\fB\-u, \-\-nproc\fP[=\fIlimits\fR]"
+Maximum number of processes.
+.IP "\fB\-v, \-\-as\fP[=\fIlimits\fR]"
+Address space limit.
+.IP "\fB\-x, \-\-locks\fP[=\fIlimits\fR]"
+Maximum number of file locks held.
+.IP "\fB\-y, \-\-rttime\fP[=\fIlimits\fR]"
+Timeout for real-time tasks.
+
+.SH EXAMPLES
+.IP "\fBprlimit \-\-pid 13134\fP"
+Display limit values for all current resources.
+.IP "\fBprlimit \-\-pid 13134 \--rss --nofile=1024:4095\fP"
+Display the limits of the RSS, and set the soft and hard limits for the number
+of open files to 1024 and 4095, respectively.
+.IP "\fBprlimit \-\-pid 13134 --nproc=512:\fP"
+Modify only the soft limit for the number of processes.
+.IP "\fBprlimit \-\-pid $$ --nproc=unlimited\fP"
+Set for the current process both the soft and ceiling values for the number of
+processes to unlimited.
+.IP "\fBprlimit --cpu=10 sort -u hugefile\fP"
+Set both the soft and hard CPU time limit to ten seconds and run 'sort'.
+
+.SH "SEE ALSO"
+.BR ulimit (1),
+.BR prlimit (2)
+
+.SH NOTES
+The prlimit system call is supported since Linux 2.6.36, older kernels will
+break this program.
+
+.SH AUTHORS
+.nf
+Davidlohr Bueso <dave@gnu.org> - In memory of Dennis M. Ritchie.
+.fi
+.SH AVAILABILITY
+The prlimit command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/prlimit.c b/sys-utils/prlimit.c
new file mode 100644
index 0000000..6f80636
--- /dev/null
+++ b/sys-utils/prlimit.c
@@ -0,0 +1,646 @@
+/*
+ *  prlimit - get/set process resource limits.
+ *
+ *  Copyright (C) 2011 Davidlohr Bueso <dave@gnu.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <assert.h>
+#include <unistd.h>
+#include <sys/resource.h>
+
+#include <libsmartcols.h>
+
+#include "c.h"
+#include "nls.h"
+#include "xalloc.h"
+#include "strutils.h"
+#include "list.h"
+#include "closestream.h"
+
+#ifndef RLIMIT_RTTIME
+# define RLIMIT_RTTIME 15
+#endif
+
+enum {
+	AS,
+	CORE,
+	CPU,
+	DATA,
+	FSIZE,
+	LOCKS,
+	MEMLOCK,
+	MSGQUEUE,
+	NICE,
+	NOFILE,
+	NPROC,
+	RSS,
+	RTPRIO,
+	RTTIME,
+	SIGPENDING,
+	STACK
+};
+
+/* basic output flags */
+static int no_headings;
+static int raw;
+
+struct prlimit_desc {
+	const char *name;
+	const char *help;
+	const char *unit;
+	int resource;
+};
+
+static struct prlimit_desc prlimit_desc[] =
+{
+	[AS]         = { "AS",         N_("address space limit"),                N_("bytes"),     RLIMIT_AS },
+	[CORE]       = { "CORE",       N_("max core file size"),                 N_("bytes"),     RLIMIT_CORE },
+	[CPU]        = { "CPU",        N_("CPU time"),                           N_("seconds"),   RLIMIT_CPU },
+	[DATA]       = { "DATA",       N_("max data size"),                      N_("bytes"),     RLIMIT_DATA },
+	[FSIZE]      = { "FSIZE",      N_("max file size"),                      N_("bytes"),     RLIMIT_FSIZE },
+	[LOCKS]      = { "LOCKS",      N_("max number of file locks held"),      N_("locks"),     RLIMIT_LOCKS },
+	[MEMLOCK]    = { "MEMLOCK",    N_("max locked-in-memory address space"), N_("bytes"),     RLIMIT_MEMLOCK },
+	[MSGQUEUE]   = { "MSGQUEUE",   N_("max bytes in POSIX mqueues"),         N_("bytes"),     RLIMIT_MSGQUEUE },
+	[NICE]       = { "NICE",       N_("max nice prio allowed to raise"),     NULL,            RLIMIT_NICE },
+	[NOFILE]     = { "NOFILE",     N_("max number of open files"),           N_("files"),     RLIMIT_NOFILE },
+	[NPROC]      = { "NPROC",      N_("max number of processes"),            N_("processes"), RLIMIT_NPROC },
+	[RSS]        = { "RSS",        N_("max resident set size"),              N_("bytes"),     RLIMIT_RSS },
+	[RTPRIO]     = { "RTPRIO",     N_("max real-time priority"),             NULL,            RLIMIT_RTPRIO },
+	[RTTIME]     = { "RTTIME",     N_("timeout for real-time tasks"),        N_("microsecs"), RLIMIT_RTTIME },
+	[SIGPENDING] = { "SIGPENDING", N_("max number of pending signals"),      N_("signals"),   RLIMIT_SIGPENDING },
+	[STACK]      = { "STACK",      N_("max stack size"),                     N_("bytes"),     RLIMIT_STACK }
+};
+
+#define MAX_RESOURCES ARRAY_SIZE(prlimit_desc)
+
+struct prlimit {
+	struct list_head lims;
+
+	struct rlimit rlim;
+	struct prlimit_desc *desc;
+	int modify;			/* PRLIMIT_{SOFT,HARD} mask */
+};
+
+#define PRLIMIT_EMPTY_LIMIT	{{ 0, 0, }, NULL, 0 }
+
+enum {
+	COL_HELP,
+	COL_RES,
+	COL_SOFT,
+	COL_HARD,
+	COL_UNITS,
+};
+
+/* column names */
+struct colinfo {
+	const char	*name;	/* header */
+	double		whint;	/* width hint (N < 1 is in percent of termwidth) */
+	int		flags;	/* SCOLS_FL_* */
+	const char      *help;
+};
+
+/* columns descriptions */
+static struct colinfo infos[] = {
+	[COL_RES]     = { "RESOURCE",    0.25, SCOLS_FL_TRUNC, N_("resource name") },
+	[COL_HELP]    = { "DESCRIPTION", 0.1,  SCOLS_FL_TRUNC, N_("resource description")},
+	[COL_SOFT]    = { "SOFT",        0.1,  SCOLS_FL_RIGHT, N_("soft limit")},
+	[COL_HARD]    = { "HARD",        1,    SCOLS_FL_RIGHT, N_("hard limit (ceiling)")},
+	[COL_UNITS]   = { "UNITS",       0.1,  SCOLS_FL_TRUNC, N_("units")},
+};
+
+static int columns[ARRAY_SIZE(infos) * 2];
+static int ncolumns;
+
+
+
+#define INFINITY_STR	"unlimited"
+#define INFINITY_STRLEN	(sizeof(INFINITY_STR) - 1)
+
+#define PRLIMIT_SOFT	(1 << 1)
+#define PRLIMIT_HARD	(1 << 2)
+
+static pid_t pid; /* calling process (default) */
+static int verbose;
+
+#ifndef HAVE_PRLIMIT
+# include <sys/syscall.h>
+static int prlimit(pid_t p, int resource,
+		   const struct rlimit *new_limit,
+		   struct rlimit *old_limit)
+{
+	return syscall(SYS_prlimit64, p, resource, new_limit, old_limit);
+}
+#endif
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	size_t i;
+
+	fputs(USAGE_HEADER, out);
+
+	fprintf(out,
+		_(" %s [options] [-p PID]\n"), program_invocation_short_name);
+	fprintf(out,
+		_(" %s [options] COMMAND\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Show or change the resource limits of a process.\n"), out);
+
+	fputs(_("\nGeneral Options:\n"), out);
+	fputs(_(" -p, --pid <pid>        process id\n"
+		" -o, --output <list>    define which output columns to use\n"
+		"     --noheadings       don't print headings\n"
+		"     --raw              use the raw output format\n"
+		"     --verbose          verbose output\n"
+		), out);
+	printf(USAGE_HELP_OPTIONS(24));
+
+	fputs(_("\nResources Options:\n"), out);
+	fputs(_(" -c, --core             maximum size of core files created\n"
+		" -d, --data             maximum size of a process's data segment\n"
+		" -e, --nice             maximum nice priority allowed to raise\n"
+		" -f, --fsize            maximum size of files written by the process\n"
+		" -i, --sigpending       maximum number of pending signals\n"
+		" -l, --memlock          maximum size a process may lock into memory\n"
+		" -m, --rss              maximum resident set size\n"
+		" -n, --nofile           maximum number of open files\n"
+		" -q, --msgqueue         maximum bytes in POSIX message queues\n"
+		" -r, --rtprio           maximum real-time scheduling priority\n"
+		" -s, --stack            maximum stack size\n"
+		" -t, --cpu              maximum amount of CPU time in seconds\n"
+		" -u, --nproc            maximum number of user processes\n"
+		" -v, --as               size of virtual memory\n"
+		" -x, --locks            maximum number of file locks\n"
+		" -y, --rttime           CPU time in microseconds a process scheduled\n"
+		"                        under real-time scheduling\n"), out);
+
+	fputs(USAGE_COLUMNS, out);
+	for (i = 0; i < ARRAY_SIZE(infos); i++)
+		fprintf(out, " %11s  %s\n", infos[i].name, _(infos[i].help));
+
+	printf(USAGE_MAN_TAIL("prlimit(1)"));
+
+	exit(EXIT_SUCCESS);
+}
+
+static inline int get_column_id(int num)
+{
+	assert(num < ncolumns);
+	assert(columns[num] < (int) ARRAY_SIZE(infos));
+
+	return columns[num];
+}
+
+static inline struct colinfo *get_column_info(unsigned num)
+{
+	return &infos[ get_column_id(num) ];
+}
+
+static void add_scols_line(struct libscols_table *table, struct prlimit *l)
+{
+	int i;
+	struct libscols_line *line;
+
+	assert(table);
+	assert(l);
+
+	line = scols_table_new_line(table, NULL);
+	if (!line)
+		err(EXIT_FAILURE, _("failed to allocate output line"));
+
+	for (i = 0; i < ncolumns; i++) {
+		char *str = NULL;
+
+		switch (get_column_id(i)) {
+		case COL_RES:
+			str = xstrdup(l->desc->name);
+			break;
+		case COL_HELP:
+			str = xstrdup(l->desc->help);
+			break;
+		case COL_SOFT:
+			if (l->rlim.rlim_cur == RLIM_INFINITY)
+				str = xstrdup(_("unlimited"));
+			else
+				xasprintf(&str, "%llu", (unsigned long long) l->rlim.rlim_cur);
+			break;
+		case COL_HARD:
+			if (l->rlim.rlim_max == RLIM_INFINITY)
+				str = xstrdup(_("unlimited"));
+			else
+				xasprintf(&str, "%llu", (unsigned long long) l->rlim.rlim_max);
+			break;
+		case COL_UNITS:
+			str = l->desc->unit ? xstrdup(_(l->desc->unit)) : NULL;
+			break;
+		default:
+			break;
+		}
+
+		if (str && scols_line_refer_data(line, i, str))
+			err(EXIT_FAILURE, _("failed to add output data"));
+	}
+}
+
+static int column_name_to_id(const char *name, size_t namesz)
+{
+	size_t i;
+
+	assert(name);
+
+	for (i = 0; i < ARRAY_SIZE(infos); i++) {
+		const char *cn = infos[i].name;
+
+		if (!strncasecmp(name, cn, namesz) && !*(cn + namesz))
+			return i;
+	}
+	warnx(_("unknown column: %s"), name);
+	return -1;
+}
+
+static void rem_prlim(struct prlimit *lim)
+{
+	if (!lim)
+		return;
+	list_del(&lim->lims);
+	free(lim);
+}
+
+static int show_limits(struct list_head *lims)
+{
+	int i;
+	struct list_head *p, *pnext;
+	struct libscols_table *table;
+
+	table = scols_new_table();
+	if (!table)
+		err(EXIT_FAILURE, _("failed to allocate output table"));
+
+	scols_table_enable_raw(table, raw);
+	scols_table_enable_noheadings(table, no_headings);
+
+	for (i = 0; i < ncolumns; i++) {
+		struct colinfo *col = get_column_info(i);
+
+		if (!scols_table_new_column(table, col->name, col->whint, col->flags))
+			err(EXIT_FAILURE, _("failed to allocate output column"));
+	}
+
+	list_for_each_safe(p, pnext, lims) {
+		struct prlimit *lim = list_entry(p, struct prlimit, lims);
+
+		add_scols_line(table, lim);
+		rem_prlim(lim);
+	}
+
+	scols_print_table(table);
+	scols_unref_table(table);
+	return 0;
+}
+
+/*
+ * If one of the limits is unknown (default value for not being passed), we
+ * need to get the current limit and use it.  I see no other way other than
+ * using prlimit(2).
+ */
+static void get_unknown_hardsoft(struct prlimit *lim)
+{
+	struct rlimit old;
+
+	if (prlimit(pid, lim->desc->resource, NULL, &old) == -1)
+		err(EXIT_FAILURE, _("failed to get old %s limit"),
+				lim->desc->name);
+
+	if (!(lim->modify & PRLIMIT_SOFT))
+		lim->rlim.rlim_cur = old.rlim_cur;
+	else if (!(lim->modify & PRLIMIT_HARD))
+		lim->rlim.rlim_max = old.rlim_max;
+}
+
+static void do_prlimit(struct list_head *lims)
+{
+	struct list_head *p, *pnext;
+
+	list_for_each_safe(p, pnext, lims) {
+		struct rlimit *new = NULL, *old = NULL;
+		struct prlimit *lim = list_entry(p, struct prlimit, lims);
+
+		if (lim->modify) {
+			if (lim->modify != (PRLIMIT_HARD | PRLIMIT_SOFT))
+				get_unknown_hardsoft(lim);
+
+			if ((lim->rlim.rlim_cur > lim->rlim.rlim_max) &&
+				(lim->rlim.rlim_cur != RLIM_INFINITY ||
+				 lim->rlim.rlim_max != RLIM_INFINITY))
+				errx(EXIT_FAILURE, _("the soft limit %s cannot exceed the hard limit"),
+						lim->desc->name);
+			new = &lim->rlim;
+		} else
+			old = &lim->rlim;
+
+		if (verbose && new) {
+			printf(_("New %s limit for pid %d: "), lim->desc->name,
+				pid ? pid : getpid());
+			if (new->rlim_cur == RLIM_INFINITY)
+				printf("<%s", _("unlimited"));
+			else
+				printf("<%ju", (uintmax_t)new->rlim_cur);
+
+			if (new->rlim_max == RLIM_INFINITY)
+				printf(":%s>\n", _("unlimited"));
+			else
+				printf(":%ju>\n", (uintmax_t)new->rlim_max);
+		}
+
+		if (prlimit(pid, lim->desc->resource, new, old) == -1)
+			err(EXIT_FAILURE, lim->modify ?
+				_("failed to set the %s resource limit") :
+				_("failed to get the %s resource limit"),
+				lim->desc->name);
+
+		if (lim->modify)
+			rem_prlim(lim);		/* modify only; don't show */
+	}
+}
+
+static int get_range(char *str, rlim_t *soft, rlim_t *hard, int *found)
+{
+	char *end = NULL;
+
+	if (!str)
+		return 0;
+
+	*found = errno = 0;
+	*soft = *hard = RLIM_INFINITY;
+
+	if (!strcmp(str, INFINITY_STR)) {		/* <unlimited> */
+		*found |= PRLIMIT_SOFT | PRLIMIT_HARD;
+		return 0;
+
+	} else if (*str == ':') {			/* <:hard> */
+		str++;
+
+		if (strcmp(str, INFINITY_STR) != 0) {
+			*hard = strtoull(str, &end, 10);
+
+			if (errno || !end || *end || end == str)
+				return -1;
+		}
+		*found |= PRLIMIT_HARD;
+		return 0;
+
+	}
+
+	if (strncmp(str, INFINITY_STR, INFINITY_STRLEN) == 0) {
+		/* <unlimited> or <unlimited:> */
+		end = str + INFINITY_STRLEN;
+	} else {
+		/* <value> or <soft:> */
+		*hard = *soft = strtoull(str, &end, 10);
+		if (errno || !end || end == str)
+			return -1;
+	}
+
+	if (*end == ':' && !*(end + 1))			/* <soft:> */
+		*found |= PRLIMIT_SOFT;
+
+	else if (*end == ':') {				/* <soft:hard> */
+		str = end + 1;
+
+		if (!strcmp(str, INFINITY_STR))
+			*hard =  RLIM_INFINITY;
+		else {
+			end = NULL;
+			errno = 0;
+			*hard = strtoull(str, &end, 10);
+
+			if (errno || !end || *end || end == str)
+				return -1;
+		}
+		*found |= PRLIMIT_SOFT | PRLIMIT_HARD;
+
+	} else						/* <value> */
+		*found |= PRLIMIT_SOFT | PRLIMIT_HARD;
+
+	return 0;
+}
+
+
+static int parse_prlim(struct rlimit *lim, char *ops, size_t id)
+{
+	rlim_t soft, hard;
+	int found = 0;
+
+	if (get_range(ops, &soft, &hard, &found))
+		errx(EXIT_FAILURE, _("failed to parse %s limit"),
+		     prlimit_desc[id].name);
+
+	lim->rlim_cur = soft;
+	lim->rlim_max = hard;
+
+	return found;
+}
+
+static int add_prlim(char *ops, struct list_head *lims, size_t id)
+{
+	struct prlimit *lim = xcalloc(1, sizeof(*lim));
+
+	INIT_LIST_HEAD(&lim->lims);
+	lim->desc = &prlimit_desc[id];
+
+	if (ops)
+		lim->modify = parse_prlim(&lim->rlim, ops, id);
+
+	list_add_tail(&lim->lims, lims);
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	int opt;
+	struct list_head lims;
+
+	enum {
+		VERBOSE_OPTION = CHAR_MAX + 1,
+		RAW_OPTION,
+		NOHEADINGS_OPTION
+	};
+
+	static const struct option longopts[] = {
+		{ "pid",	required_argument, NULL, 'p' },
+		{ "output",     required_argument, NULL, 'o' },
+		{ "as",         optional_argument, NULL, 'v' },
+		{ "core",       optional_argument, NULL, 'c' },
+		{ "cpu",        optional_argument, NULL, 't' },
+		{ "data",       optional_argument, NULL, 'd' },
+		{ "fsize",      optional_argument, NULL, 'f' },
+		{ "locks",      optional_argument, NULL, 'x' },
+		{ "memlock",    optional_argument, NULL, 'l' },
+		{ "msgqueue",   optional_argument, NULL, 'q' },
+		{ "nice",       optional_argument, NULL, 'e' },
+		{ "nofile",     optional_argument, NULL, 'n' },
+		{ "nproc",      optional_argument, NULL, 'u' },
+		{ "rss",        optional_argument, NULL, 'm' },
+		{ "rtprio",     optional_argument, NULL, 'r' },
+		{ "rttime",     optional_argument, NULL, 'y' },
+		{ "sigpending", optional_argument, NULL, 'i' },
+		{ "stack",      optional_argument, NULL, 's' },
+		{ "version",    no_argument, NULL, 'V' },
+		{ "help",       no_argument, NULL, 'h' },
+		{ "noheadings", no_argument, NULL, NOHEADINGS_OPTION },
+		{ "raw",        no_argument, NULL, RAW_OPTION },
+		{ "verbose",    no_argument, NULL, VERBOSE_OPTION },
+		{ NULL, 0, NULL, 0 }
+	};
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	INIT_LIST_HEAD(&lims);
+
+	/*
+	 * Something is very wrong if this doesn't succeed,
+	 * assuming STACK is the last resource, of course.
+	 */
+	assert(MAX_RESOURCES == STACK + 1);
+
+	while((opt = getopt_long(argc, argv,
+				 "+c::d::e::f::i::l::m::n::q::r::s::t::u::v::x::y::p:o:vVh",
+				 longopts, NULL)) != -1) {
+		switch(opt) {
+		case 'c':
+			add_prlim(optarg, &lims, CORE);
+			break;
+		case 'd':
+			add_prlim(optarg, &lims, DATA);
+			break;
+		case 'e':
+			add_prlim(optarg, &lims, NICE);
+			break;
+		case 'f':
+			add_prlim(optarg, &lims, FSIZE);
+			break;
+		case 'i':
+			add_prlim(optarg, &lims, SIGPENDING);
+			break;
+		case 'l':
+			add_prlim(optarg, &lims, MEMLOCK);
+			break;
+		case 'm':
+			add_prlim(optarg, &lims, RSS);
+			break;
+		case 'n':
+			add_prlim(optarg, &lims, NOFILE);
+			break;
+		case 'q':
+			add_prlim(optarg, &lims, MSGQUEUE);
+			break;
+		case 'r':
+			add_prlim(optarg, &lims, RTPRIO);
+			break;
+		case 's':
+			add_prlim(optarg, &lims, STACK);
+			break;
+		case 't':
+			add_prlim(optarg, &lims, CPU);
+			break;
+		case 'u':
+			add_prlim(optarg, &lims, NPROC);
+			break;
+		case 'v':
+			add_prlim(optarg, &lims, AS);
+			break;
+		case 'x':
+			add_prlim(optarg, &lims, LOCKS);
+			break;
+		case 'y':
+			add_prlim(optarg, &lims, RTTIME);
+			break;
+
+		case 'p':
+			if (pid)
+				errx(EXIT_FAILURE, _("option --pid may be specified only once"));
+			pid = strtos32_or_err(optarg, _("invalid PID argument"));
+			break;
+		case 'h':
+			usage();
+		case 'o':
+			ncolumns = string_to_idarray(optarg,
+						     columns, ARRAY_SIZE(columns),
+						     column_name_to_id);
+			if (ncolumns < 0)
+				return EXIT_FAILURE;
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+
+		case NOHEADINGS_OPTION:
+			no_headings = 1;
+			break;
+		case VERBOSE_OPTION:
+			verbose++;
+			break;
+		case RAW_OPTION:
+			raw = 1;
+			break;
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+	if (argc > optind && pid)
+		errx(EXIT_FAILURE, _("options --pid and COMMAND are mutually exclusive"));
+	if (!ncolumns) {
+		/* default columns */
+		columns[ncolumns++] = COL_RES;
+		columns[ncolumns++] = COL_HELP;
+		columns[ncolumns++] = COL_SOFT;
+		columns[ncolumns++] = COL_HARD;
+		columns[ncolumns++] = COL_UNITS;
+	}
+
+	scols_init_debug(0);
+
+	if (list_empty(&lims)) {
+		/* default is to print all resources */
+		size_t n;
+
+		for (n = 0; n < MAX_RESOURCES; n++)
+			add_prlim(NULL, &lims, n);
+	}
+
+	do_prlimit(&lims);
+
+	if (!list_empty(&lims))
+		show_limits(&lims);
+
+	if (argc > optind) {
+		/* prlimit [options] COMMAND */
+		execvp(argv[optind], &argv[optind]);
+		errexec(argv[optind]);
+	}
+
+	return EXIT_SUCCESS;
+}
diff --git a/sys-utils/readprofile.8 b/sys-utils/readprofile.8
new file mode 100644
index 0000000..a37aa0a
--- /dev/null
+++ b/sys-utils/readprofile.8
@@ -0,0 +1,153 @@
+.TH READPROFILE "8" "October 2011" "util-linux" "System Administration"
+.SH NAME
+readprofile \- read kernel profiling information
+.SH SYNOPSIS
+.B readprofile
+[options]
+.SH VERSION
+This manpage documents version 2.0 of the program.
+.SH DESCRIPTION
+.LP
+The
+.B readprofile
+command uses the
+.I /proc/profile
+information to print ascii data on standard output.  The output is
+organized in three columns: the first is the number of clock ticks,
+the second is the name of the C function in the kernel where those
+many ticks occurred, and the third is the normalized `load' of the
+procedure, calculated as a ratio between the number of ticks and the
+length of the procedure.  The output is filled with blanks to ease
+readability.
+.SH OPTIONS
+.TP
+\fB\-a\fR, \fB\-\-all\fR
+Print all symbols in the mapfile.  By default the procedures with
+reported ticks are not printed.
+.TP
+\fB\-b\fR, \fB\-\-histbin\fR
+Print individual histogram-bin counts.
+.TP
+\fB\-i\fR, \fB\-\-info\fR
+Info.  This makes
+.B readprofile
+only print the profiling step used by the kernel.  The profiling step
+is the resolution of the profiling buffer, and is chosen during
+kernel configuration (through `make config'), or in the kernel's
+command line.  If the
+.B \-t
+(terse) switch is used together with
+.B \-i
+only the decimal number is printed.
+.TP
+\fB\-m\fR, \fB\-\-mapfile\fR \fImapfile\fR
+Specify a mapfile, which by default is
+.IR /usr/src/linux/System.map .
+You should specify the map file on cmdline if your current kernel
+isn't the last one you compiled, or if you keep System.map elsewhere.
+If the name of the map file ends with `.gz' it is decompressed on the
+fly.
+.TP
+\fB\-M\fR, \fB\-\-multiplier\fR \fImultiplier\fR
+On some architectures it is possible to alter the frequency at which
+the kernel delivers profiling interrupts to each CPU.  This option
+allows you to set the frequency, as a multiplier of the system clock
+frequency, HZ. Linux 2.6.16 dropped multiplier support for most systems.
+This option also resets the profiling buffer, and requires superuser
+privileges.
+.TP
+\fB\-p\fR, \fB\-\-profile\fR \fIpro-file\fR
+Specify a different profiling buffer, which by default is
+.IR /proc/profile .
+Using a different pro-file is useful if you want to `freeze' the
+kernel profiling at some time and read it later.  The
+.I /proc/profile
+file can be copied using `cat' or `cp'.  There is no more support for
+compressed profile buffers, like in
+.B readprofile-1.1,
+because the program needs to know the size of the buffer in advance.
+.TP
+\fB\-r\fR, \fB\-\-reset\fR
+Reset the profiling buffer.  This can only be invoked by root,
+because
+.I /proc/profile
+is readable by everybody but writable only by the superuser.
+However, you can make
+.B readprofile
+set-user-ID 0, in order to reset the buffer without gaining privileges.
+.TP
+\fB\-s, \fB\-\-counters\fR
+Print individual counters within functions.
+.TP
+\fB\-v\fR, \fB\-\-verbose\fR
+Verbose.  The output is organized in four columns and filled with
+blanks.  The first column is the RAM address of a kernel function,
+the second is the name of the function, the third is the number of
+clock ticks and the last is the normalized load.
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+Display version information and exit.
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+Display help text and exit.
+.SH EXAMPLES
+Browse the profiling buffer ordering by clock ticks:
+.nf
+   readprofile | sort -nr | less
+
+.fi
+Print the 20 most loaded procedures:
+.nf
+   readprofile | sort -nr +2 | head -20
+
+.fi
+Print only filesystem profile:
+.nf
+   readprofile | grep _ext2
+
+.fi
+Look at all the kernel information, with ram addresses:
+.nf
+   readprofile -av | less
+
+.fi
+Browse a `frozen' profile buffer for a non current kernel:
+.nf
+   readprofile -p ~/profile.freeze -m /zImage.map.gz
+
+.fi
+Request profiling at 2kHz per CPU, and reset the profiling buffer:
+.nf
+   sudo readprofile -M 20
+.fi
+.SH BUGS
+.LP
+.B readprofile
+only works with a 1.3.x or newer kernel, because
+.I /proc/profile
+changed in the step from 1.2 to 1.3
+.LP
+This program only works with ELF kernels.  The change for a.out
+kernels is trivial, and left as an exercise to the a.out user.
+.LP
+To enable profiling, the kernel must be rebooted, because no
+profiling module is available, and it wouldn't be easy to build.  To
+enable profiling, you can specify "profile=2" (or another number) on
+the kernel commandline.  The number you specify is the two-exponent
+used as profiling step.
+.LP
+Profiling is disabled when interrupts are inhibited.  This means that
+many profiling ticks happen when interrupts are re-enabled.  Watch
+out for misleading information.
+.SH FILES
+.nf
+/proc/profile              A binary snapshot of the profiling buffer.
+/usr/src/linux/System.map  The symbol table for the kernel.
+/usr/src/linux/*           The program being profiled :-)
+.fi
+.SH AVAILABILITY
+The readprofile command is part of the util-linux package and is
+available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/readprofile.c b/sys-utils/readprofile.c
new file mode 100644
index 0000000..0350738
--- /dev/null
+++ b/sys-utils/readprofile.c
@@ -0,0 +1,407 @@
+/*
+ *  readprofile.c - used to read /proc/profile
+ *
+ *  Copyright (C) 1994,1996 Alessandro Rubini (rubini@ipvvis.unipv.it)
+ *
+ *   This program is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This program is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License along
+ *   with this program; if not, write to the Free Software Foundation, Inc.,
+ *   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/*
+ * 1999-02-22 Arkadiusz Miśkiewicz <misiek@pld.ORG.PL>
+ * - added Native Language Support
+ * 1999-09-01 Stephane Eranian <eranian@cello.hpl.hp.com>
+ * - 64bit clean patch
+ * 3Feb2001 Andrew Morton <andrewm@uow.edu.au>
+ * - -M option to write profile multiplier.
+ * 2001-11-07 Werner Almesberger <wa@almesberger.net>
+ * - byte order auto-detection and -n option
+ * 2001-11-09 Werner Almesberger <wa@almesberger.net>
+ * - skip step size (index 0)
+ * 2002-03-09 John Levon <moz@compsoc.man.ac.uk>
+ * - make maplineno do something
+ * 2002-11-28 Mads Martin Joergensen +
+ * - also try /boot/System.map-`uname -r`
+ * 2003-04-09 Werner Almesberger <wa@almesberger.net>
+ * - fixed off-by eight error and improved heuristics in byte order detection
+ * 2003-08-12 Nikita Danilov <Nikita@Namesys.COM>
+ * - added -s option; example of use:
+ * "readprofile -s -m /boot/System.map-test | grep __d_lookup | sort -n -k3"
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+
+#include "nls.h"
+#include "xalloc.h"
+#include "closestream.h"
+
+#define S_LEN 128
+
+/* These are the defaults */
+static char defaultmap[]="/boot/System.map";
+static char defaultpro[]="/proc/profile";
+
+static FILE *myopen(char *name, char *mode, int *flag)
+{
+	int len = strlen(name);
+
+	if (!strcmp(name + len - 3, ".gz")) {
+		FILE *res;
+		char *cmdline = xmalloc(len + 6);
+		sprintf(cmdline, "zcat %s", name);
+		res = popen(cmdline, mode);
+		free(cmdline);
+		*flag = 1;
+		return res;
+	}
+	*flag = 0;
+	return fopen(name, mode);
+}
+
+#ifndef BOOT_SYSTEM_MAP
+#define BOOT_SYSTEM_MAP "/boot/System.map-"
+#endif
+
+static char *boot_uname_r_str(void)
+{
+	struct utsname uname_info;
+	char *s;
+	size_t len;
+
+	if (uname(&uname_info))
+		return "";
+	len = strlen(BOOT_SYSTEM_MAP) + strlen(uname_info.release) + 1;
+	s = xmalloc(len);
+	strcpy(s, BOOT_SYSTEM_MAP);
+	strcat(s, uname_info.release);
+	return s;
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	fputs(USAGE_HEADER, out);
+	fprintf(out, _(" %s [options]\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Display kernel profiling information.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fprintf(out,
+	      _(" -m, --mapfile <mapfile>   (defaults: \"%s\" and\n"), defaultmap);
+	fprintf(out,
+	      _("                                      \"%s\")\n"), boot_uname_r_str());
+	fprintf(out,
+	      _(" -p, --profile <pro-file>  (default:  \"%s\")\n"), defaultpro);
+	fputs(_(" -M, --multiplier <mult>   set the profiling multiplier to <mult>\n"), out);
+	fputs(_(" -i, --info                print only info about the sampling step\n"), out);
+	fputs(_(" -v, --verbose             print verbose data\n"), out);
+	fputs(_(" -a, --all                 print all symbols, even if count is 0\n"), out);
+	fputs(_(" -b, --histbin             print individual histogram-bin counts\n"), out);
+	fputs(_(" -s, --counters            print individual counters within functions\n"), out);
+	fputs(_(" -r, --reset               reset all the counters (root only)\n"), out);
+	fputs(_(" -n, --no-auto             disable byte order auto-detection\n"), out);
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(27));
+	printf(USAGE_MAN_TAIL("readprofile(8)"));
+	exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+	FILE *map;
+	int proFd;
+	char *mapFile, *proFile, *mult = NULL;
+	size_t len = 0, indx = 1;
+	unsigned long long add0 = 0;
+	unsigned int step;
+	unsigned int *buf, total, fn_len;
+	unsigned long long fn_add, next_add;	/* current and next address */
+	char fn_name[S_LEN], next_name[S_LEN];	/* current and next name */
+	char mode[8];
+	int c;
+	ssize_t rc;
+	int optAll = 0, optInfo = 0, optReset = 0, optVerbose = 0, optNative = 0;
+	int optBins = 0, optSub = 0;
+	char mapline[S_LEN];
+	int maplineno = 1;
+	int popenMap;		/* flag to tell if popen() has been used */
+	int header_printed;
+
+	static const struct option longopts[] = {
+		{"mapfile", required_argument, NULL, 'm'},
+		{"profile", required_argument, NULL, 'p'},
+		{"multiplier", required_argument, NULL, 'M'},
+		{"info", no_argument, NULL, 'i'},
+		{"verbose", no_argument, NULL, 'v'},
+		{"all", no_argument, NULL, 'a'},
+		{"histbin", no_argument, NULL, 'b'},
+		{"counters", no_argument, NULL, 's'},
+		{"reset", no_argument, NULL, 'r'},
+		{"no-auto", no_argument, NULL, 'n'},
+		{"version", no_argument, NULL, 'V'},
+		{"help", no_argument, NULL, 'h'},
+		{NULL, 0, NULL, 0}
+	};
+
+#define next (current^1)
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	proFile = defaultpro;
+	mapFile = defaultmap;
+
+	while ((c = getopt_long(argc, argv, "m:p:M:ivabsrnVh", longopts, NULL)) != -1) {
+		switch (c) {
+		case 'm':
+			mapFile = optarg;
+			break;
+		case 'n':
+			optNative++;
+			break;
+		case 'p':
+			proFile = optarg;
+			break;
+		case 'a':
+			optAll++;
+			break;
+		case 'b':
+			optBins++;
+			break;
+		case 's':
+			optSub++;
+			break;
+		case 'i':
+			optInfo++;
+			break;
+		case 'M':
+			mult = optarg;
+			break;
+		case 'r':
+			optReset++;
+			break;
+		case 'v':
+			optVerbose++;
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		case 'h':
+			usage();
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if (optReset || mult) {
+		int multiplier, fd, to_write;
+
+		/* When writing the multiplier, if the length of the
+		 * write is not sizeof(int), the multiplier is not
+		 * changed. */
+		if (mult) {
+			multiplier = strtoul(mult, NULL, 10);
+			to_write = sizeof(int);
+		} else {
+			multiplier = 0;
+			/* sth different from sizeof(int) */
+			to_write = 1;
+		}
+		/* try to become root, just in case */
+		ignore_result( setuid(0) );
+		fd = open(defaultpro, O_WRONLY);
+		if (fd < 0)
+			err(EXIT_FAILURE, "%s", defaultpro);
+		if (write(fd, &multiplier, to_write) != to_write)
+			err(EXIT_FAILURE, _("error writing %s"), defaultpro);
+		close(fd);
+		exit(EXIT_SUCCESS);
+	}
+
+	/* Use an fd for the profiling buffer, to skip stdio overhead */
+	if (((proFd = open(proFile, O_RDONLY)) < 0)
+	    || ((int)(len = lseek(proFd, 0, SEEK_END)) < 0)
+	    || (lseek(proFd, 0, SEEK_SET) < 0))
+		err(EXIT_FAILURE, "%s", proFile);
+
+	buf = xmalloc(len);
+
+	rc = read(proFd, buf, len);
+	if (rc < 0 || (size_t) rc != len)
+		err(EXIT_FAILURE, "%s", proFile);
+	close(proFd);
+
+	if (!optNative) {
+		int entries = len / sizeof(*buf);
+		int big = 0, small = 0;
+		unsigned *p;
+		size_t i;
+
+		for (p = buf + 1; p < buf + entries; p++) {
+			if (*p & ~0U << (sizeof(*buf) * 4))
+				big++;
+			if (*p & ((1 << (sizeof(*buf) * 4)) - 1))
+				small++;
+		}
+		if (big > small) {
+			warnx(_("Assuming reversed byte order. "
+				"Use -n to force native byte order."));
+			for (p = buf; p < buf + entries; p++)
+				for (i = 0; i < sizeof(*buf) / 2; i++) {
+					unsigned char *b = (unsigned char *)p;
+					unsigned char tmp;
+					tmp = b[i];
+					b[i] = b[sizeof(*buf) - i - 1];
+					b[sizeof(*buf) - i - 1] = tmp;
+				}
+		}
+	}
+
+	step = buf[0];
+	if (optInfo) {
+		printf(_("Sampling_step: %u\n"), step);
+		exit(EXIT_SUCCESS);
+	}
+
+	total = 0;
+
+	map = myopen(mapFile, "r", &popenMap);
+	if (map == NULL && mapFile == defaultmap) {
+		mapFile = boot_uname_r_str();
+		map = myopen(mapFile, "r", &popenMap);
+	}
+	if (map == NULL)
+		err(EXIT_FAILURE, "%s", mapFile);
+
+	while (fgets(mapline, S_LEN, map)) {
+		if (sscanf(mapline, "%llx %7[^\n ] %127[^\n ]", &fn_add, mode, fn_name) != 3)
+			errx(EXIT_FAILURE, _("%s(%i): wrong map line"), mapFile,
+			     maplineno);
+		/* only elf works like this */
+		if (!strcmp(fn_name, "_stext") || !strcmp(fn_name, "__stext")) {
+			add0 = fn_add;
+			break;
+		}
+		maplineno++;
+	}
+
+	if (!add0)
+		errx(EXIT_FAILURE, _("can't find \"_stext\" in %s"), mapFile);
+
+	/*
+	 * Main loop.
+	 */
+	while (fgets(mapline, S_LEN, map)) {
+		unsigned int this = 0;
+		int done = 0;
+
+		if (sscanf(mapline, "%llx %7[^\n ] %127[^\n ]", &next_add, mode, next_name) != 3)
+			errx(EXIT_FAILURE, _("%s(%i): wrong map line"), mapFile,
+			     maplineno);
+		header_printed = 0;
+
+		/* the kernel only profiles up to _etext */
+		if (!strcmp(next_name, "_etext") ||
+		    !strcmp(next_name, "__etext"))
+			done = 1;
+		else {
+			/* ignore any LEADING (before a '[tT]' symbol
+			 * is found) Absolute symbols and __init_end
+			 * because some architectures place it before
+			 * .text section */
+			if ((*mode == 'A' || *mode == '?')
+			    && (total == 0 || !strcmp(next_name, "__init_end")))
+				continue;
+			if (*mode != 'T' && *mode != 't' &&
+			    *mode != 'W' && *mode != 'w')
+				break;	/* only text is profiled */
+		}
+
+		if (indx >= len / sizeof(*buf))
+			errx(EXIT_FAILURE,
+			     _("profile address out of range. Wrong map file?"));
+
+		while (indx < (next_add - add0) / step) {
+			if (optBins && (buf[indx] || optAll)) {
+				if (!header_printed) {
+					printf("%s:\n", fn_name);
+					header_printed = 1;
+				}
+				printf("\t%llx\t%u\n", (indx - 1) * step + add0,
+				       buf[indx]);
+			}
+			this += buf[indx++];
+		}
+		total += this;
+
+		if (optBins) {
+			if (optVerbose || this > 0)
+				printf("  total\t\t\t\t%u\n", this);
+		} else if ((this || optAll) &&
+			   (fn_len = next_add - fn_add) != 0) {
+			if (optVerbose)
+				printf("%016llx %-40s %6u %8.4f\n", fn_add,
+				       fn_name, this, this / (double)fn_len);
+			else
+				printf("%6u %-40s %8.4f\n",
+				       this, fn_name, this / (double)fn_len);
+			if (optSub) {
+				unsigned long long scan;
+
+				for (scan = (fn_add - add0) / step + 1;
+				     scan < (next_add - add0) / step;
+				     scan++) {
+					unsigned long long addr;
+					addr = (scan - 1) * step + add0;
+					printf("\t%#llx\t%s+%#llx\t%u\n",
+					       addr, fn_name, addr - fn_add,
+					       buf[scan]);
+				}
+			}
+		}
+
+		fn_add = next_add;
+		strcpy(fn_name, next_name);
+
+		maplineno++;
+		if (done)
+			break;
+	}
+
+	/* clock ticks, out of kernel text - probably modules */
+	printf("%6u %s\n", buf[len / sizeof(*buf) - 1], "*unknown*");
+
+	/* trailer */
+	if (optVerbose)
+		printf("%016x %-40s %6u %8.4f\n",
+		       0, "total", total, total / (double)(fn_add - add0));
+	else
+		printf("%6u %-40s %8.4f\n",
+		       total, _("total"), total / (double)(fn_add - add0));
+
+	popenMap ? pclose(map) : fclose(map);
+	exit(EXIT_SUCCESS);
+}
diff --git a/sys-utils/renice.1 b/sys-utils/renice.1
new file mode 100644
index 0000000..6b735fa
--- /dev/null
+++ b/sys-utils/renice.1
@@ -0,0 +1,119 @@
+.\" Copyright (c) 1983, 1991, 1993
+.\"	The Regents of the University of California.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\"    must display the following acknowledgement:
+.\"	This product includes software developed by the University of
+.\"	California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\"    may be used to endorse or promote products derived from this software
+.\"    without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\"     @(#)renice.8	8.1 (Berkeley) 6/9/93
+.\"
+.TH RENICE "1" "July 2014" "util-linux" "User Commands"
+.SH NAME
+renice \- alter priority of running processes
+.SH SYNOPSIS
+.B renice
+.RB [ \-n ]
+.I priority
+.RB [ \-g | \-p | \-u ]
+.IR identifier ...
+.SH DESCRIPTION
+.B renice
+alters the scheduling priority of one or more running processes.  The
+first argument is the \fIpriority\fR value to be used.
+The other arguments are interpreted as process IDs (by default),
+process group IDs, user IDs, or user names.
+.BR renice 'ing
+a process group causes all processes in the process group to have their
+scheduling priority altered.
+.BR renice 'ing
+a user causes all processes owned by the user to have their scheduling
+priority altered.
+.PP
+.SH OPTIONS
+.TP
+.BR \-n , " \-\-priority " \fIpriority\fR
+Specify the scheduling
+.I priority
+to be used for the process, process group, or user.  Use of the option
+.BR \-n " or " \-\-priority
+is optional, but when used it must be the first argument.
+.TP
+.BR \-g , " \-\-pgrp
+Interpret the succeeding arguments as process group IDs.
+.TP
+.BR \-p , " \-\-pid
+Interpret the succeeding arguments as process IDs
+(the default).
+.TP
+.BR \-u , " \-\-user
+Interpret the succeeding arguments as usernames or UIDs.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH EXAMPLES
+The following command would change the priority of the processes with
+PIDs 987 and 32, plus all processes owned by the users daemon and root:
+.TP
+.B "       renice" +1 987 -u daemon root -p 32
+.SH NOTES
+Users other than the superuser may only alter the priority of processes they
+own.  Furthermore, an unprivileged user can only
+.I increase
+the ``nice value'' (i.e., choose a lower priority)
+and such changes are irreversible unless (since Linux 2.6.12)
+the user has a suitable ``nice'' resource limit (see
+.BR ulimit (1)
+and
+.BR getrlimit (2)).
+
+The superuser may alter the priority of any process and set the priority to any
+value in the range \-20 to 19.
+Useful priorities are: 19 (the affected processes will run only when nothing
+else in the system wants to), 0 (the ``base'' scheduling priority), anything
+negative (to make things go very fast).
+.SH FILES
+.TP
+.I /etc/passwd
+to map user names to user IDs
+.SH SEE ALSO
+.BR nice (1),
+.BR getpriority (2),
+.BR setpriority (2),
+.BR credentials (7),
+.BR sched (7)
+.SH HISTORY
+The
+.B renice
+command appeared in 4.0BSD.
+.SH AVAILABILITY
+The renice command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/renice.c b/sys-utils/renice.c
new file mode 100644
index 0000000..3ae71f9
--- /dev/null
+++ b/sys-utils/renice.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 1983, 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+ /* 1999-02-22 Arkadiusz Miśkiewicz <misiek@pld.ORG.PL>
+  * - added Native Language Support
+  */
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include <stdio.h>
+#include <pwd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include "nls.h"
+#include "c.h"
+#include "closestream.h"
+
+static const char *idtype[] = {
+	[PRIO_PROCESS]	= N_("process ID"),
+	[PRIO_PGRP]	= N_("process group ID"),
+	[PRIO_USER]	= N_("user ID"),
+};
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	fputs(USAGE_HEADER, out);
+	fprintf(out,
+	      _(" %1$s [-n] <priority> [-p|--pid] <pid>...\n"
+		" %1$s [-n] <priority>  -g|--pgrp <pgid>...\n"
+		" %1$s [-n] <priority>  -u|--user <user>...\n"),
+		program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Alter the priority of running processes.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -n, --priority <num>   specify the nice increment value\n"), out);
+	fputs(_(" -p, --pid <id>         interpret argument as process ID (default)\n"), out);
+	fputs(_(" -g, --pgrp <id>        interpret argument as process group ID\n"), out);
+	fputs(_(" -u, --user <name>|<id> interpret argument as username or user ID\n"), out);
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(24));
+	printf(USAGE_MAN_TAIL("renice(1)"));
+	exit(EXIT_SUCCESS);
+}
+
+static int getprio(const int which, const int who, int *prio)
+{
+	errno = 0;
+	*prio = getpriority(which, who);
+	if (*prio == -1 && errno) {
+		warn(_("failed to get priority for %d (%s)"), who, idtype[which]);
+		return -errno;
+	}
+	return 0;
+}
+
+static int donice(const int which, const int who, const int prio)
+{
+	int oldprio, newprio;
+
+	if (getprio(which, who, &oldprio) != 0)
+		return 1;
+	if (setpriority(which, who, prio) < 0) {
+		warn(_("failed to set priority for %d (%s)"), who, idtype[which]);
+		return 1;
+	}
+	if (getprio(which, who, &newprio) != 0)
+		return 1;
+	printf(_("%d (%s) old priority %d, new priority %d\n"),
+	       who, idtype[which], oldprio, newprio);
+	return 0;
+}
+
+/*
+ * Change the priority (the nice value) of processes
+ * or groups of processes which are already running.
+ */
+int main(int argc, char **argv)
+{
+	int which = PRIO_PROCESS;
+	int who = 0, prio, errs = 0;
+	char *endptr = NULL;
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	argc--;
+	argv++;
+
+	if (argc == 1) {
+		if (strcmp(*argv, "-h") == 0 ||
+		    strcmp(*argv, "--help") == 0)
+			usage();
+
+		if (strcmp(*argv, "-v") == 0 ||
+		    strcmp(*argv, "-V") == 0 ||
+		    strcmp(*argv, "--version") == 0) {
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		}
+	}
+
+	if (*argv && (strcmp(*argv, "-n") == 0 || strcmp(*argv, "--priority") == 0)) {
+		argc--;
+		argv++;
+	}
+
+	if (argc < 2) {
+		warnx(_("not enough arguments"));
+		errtryhelp(EXIT_FAILURE);
+	}
+
+	prio = strtol(*argv, &endptr, 10);
+	if (*endptr) {
+		warnx(_("invalid priority '%s'"), *argv);
+		errtryhelp(EXIT_FAILURE);
+	}
+	argc--;
+	argv++;
+
+	for (; argc > 0; argc--, argv++) {
+		if (strcmp(*argv, "-g") == 0 || strcmp(*argv, "--pgrp") == 0) {
+			which = PRIO_PGRP;
+			continue;
+		}
+		if (strcmp(*argv, "-u") == 0 || strcmp(*argv, "--user") == 0) {
+			which = PRIO_USER;
+			continue;
+		}
+		if (strcmp(*argv, "-p") == 0 || strcmp(*argv, "--pid") == 0) {
+			which = PRIO_PROCESS;
+			continue;
+		}
+		if (which == PRIO_USER) {
+			struct passwd *pwd = getpwnam(*argv);
+
+			if (pwd != NULL)
+				who = pwd->pw_uid;
+			else
+				who = strtol(*argv, &endptr, 10);
+			if (who < 0 || *endptr) {
+				warnx(_("unknown user %s"), *argv);
+				errs = 1;
+				continue;
+			}
+		} else {
+			who = strtol(*argv, &endptr, 10);
+			if (who < 0 || *endptr) {
+				/* TRANSLATORS: The first %s is one of the above
+				 * three ID names. Read: "bad value for %s: %s" */
+				warnx(_("bad %s value: %s"), idtype[which], *argv);
+				errs = 1;
+				continue;
+			}
+		}
+		errs |= donice(which, who, prio);
+	}
+	return errs != 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/sys-utils/rfkill.8 b/sys-utils/rfkill.8
new file mode 100644
index 0000000..9eff913
--- /dev/null
+++ b/sys-utils/rfkill.8
@@ -0,0 +1,120 @@
+.\" -*- nroff -*-
+.TH RFKILL "8" "2017-07-06" "util-linux" "System Administration"
+.SH NAME
+rfkill \- tool for enabling and disabling wireless devices
+.SH SYNOPSIS
+.B rfkill
+.RI [ options ]
+.RI [ command ]
+.RI [ id|type \ ...]
+
+.SH DESCRIPTION
+.B rfkill
+lists, enabling and disabling wireless devices.
+
+The command "list" output format is deprecated and maintained for backward
+compatibility only. The new output format is the default when no command is
+specified or when the option \fB\-\-output\fR is used.
+
+The default output is subject to change.  So whenever possible, you should
+avoid using default outputs in your scripts.  Always explicitly define expected
+columns by using the \fB\-\-output\fR option together with a columns list in
+environments where a stable output is required.
+
+
+.SH OPTIONS
+.TP
+\fB\-J\fR, \fB\-\-json\fR
+Use JSON output format.
+.TP
+\fB\-n\fR, \fB\-\-noheadings\fR
+Do not print a header line.
+.TP
+\fB\-o\fR, \fB\-\-output\fR
+Specify which output columns to print.  Use \-\-help to get a list of
+available columns.
+.TP
+.B \-\-output\-all
+Output all available columns.
+.TP
+\fB\-r\fR, \fB\-\-raw\fR
+Use the raw output format.
+.TP
+.B \-\-help
+Display help text and exit.
+.TP
+.B \-\-version
+Display version information and exit.
+.SH COMMANDS
+.TP
+.B help
+Display help text and exit.
+.TP
+.B event
+Listen for rfkill events and display them on stdout.
+.TP
+\fBlist \fR[\fIid\fR|\fItype\fR ...]
+List the current state of all available devices.  The command output format is deprecated, see the section DESCRIPTION.
+It is a good idea to check with
+.B list
+command
+.IR id " or " type
+scope is appropriate before setting
+.BR block " or " unblock .
+Special
+.I all
+type string will match everything.  Use of multiple
+.IR id " or " type
+arguments is supported.
+.TP
+\fBblock \fBid\fR|\fBtype\fR [...]
+Disable the corresponding device.
+.TP
+\fBunblock \fBid\fR|\fBtype\fR [...]
+Enable the corresponding device.  If the device is hard\-blocked, for example
+via a hardware switch, it will remain unavailable though it is now
+soft\-unblocked.
+.SH EXAMPLES
+rfkill --output ID,TYPE
+.br
+rfkill block all
+.br
+rfkill unblock wlan
+.br
+rfkill block bluetooth uwb wimax wwan gps fm nfc
+.SH AUTHORS
+.B rfkill
+was originally written by
+.MT johannes@\:sipsolutions.\:net
+Johannes Berg
+.ME
+and
+.MT marcel@\:holtmann.\:org
+Marcel Holtmann
+.ME .
+The code has been later modified by
+.MT kerolasa@\:iki.\:fi
+Sami Kerola
+.ME
+and
+.MT kzak@\:redhat.\:com
+Karel Zak
+.ME
+for util-linux project.
+.PP
+This manual page was written by
+.MT linux@\:youmustbejoking.\:demon.\:co.uk
+Darren Salt
+.ME ,
+for the Debian project (and may be used by others).
+.SH "SEE ALSO"
+.BR powertop (8),
+.BR systemd-rfkill (8),
+.UR https://\:git.\:kernel.\:org/\:pub/\:scm/\:linux/\:kernel/\:git/\:torvalds/\:linux.git/\:tree/\:Documentation/\:rfkill.txt
+Linux kernel documentation
+.UE
+.SH AVAILABILITY
+The rfkill command is part of the util\-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util\-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/rfkill.c b/sys-utils/rfkill.c
new file mode 100644
index 0000000..a93e8ba
--- /dev/null
+++ b/sys-utils/rfkill.c
@@ -0,0 +1,751 @@
+/*
+ * /dev/rfkill userspace tool
+ *
+ * Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2009 Marcel Holtmann <marcel@holtmann.org>
+ * Copyright 2009 Tim Gardner <tim.gardner@canonical.com>
+ * Copyright 2017 Sami Kerola <kerolasa@iki.fi>
+ * Copyright (C) 2017 Karel Zak <kzak@redhat.com>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <ctype.h>
+#include <getopt.h>
+#include <libsmartcols.h>
+#include <linux/rfkill.h>
+#include <poll.h>
+#include <sys/syslog.h>
+#include <sys/time.h>
+
+#include "c.h"
+#include "closestream.h"
+#include "nls.h"
+#include "optutils.h"
+#include "pathnames.h"
+#include "strutils.h"
+#include "timeutils.h"
+#include "widechar.h"
+#include "xalloc.h"
+
+
+/*
+ * NFC supported by kernel since v3.10 (year 2013); FM and another types are from
+ * year 2009 (2.6.33) or older.
+ */
+#ifndef RFKILL_TYPE_NFC
+# ifndef RFKILL_TYPE_FM
+#  define RFKILL_TYPE_FM	RFKILL_TYPE_GPS + 1
+# endif
+# define RFKILL_TYPE_NFC	RFKILL_TYPE_FM + 1
+# undef NUM_RFKILL_TYPES
+# define NUM_RFKILL_TYPES	RFKILL_TYPE_NFC + 1
+#endif
+
+struct rfkill_type_str {
+	enum rfkill_type type;	/* ID */
+	const char *name;	/* generic name */
+	const char *desc;	/* human readable name */
+};
+
+static const struct rfkill_type_str rfkill_type_strings[] = {
+	{ .type = RFKILL_TYPE_ALL,       .name = "all"           },
+	{ .type = RFKILL_TYPE_WLAN,      .name = "wlan",         .desc = "Wireless LAN" },
+	{ .type = RFKILL_TYPE_WLAN,      .name = "wifi"          },				/* alias */
+	{ .type = RFKILL_TYPE_BLUETOOTH, .name = "bluetooth",    .desc = "Bluetooth" },
+	{ .type = RFKILL_TYPE_UWB,       .name = "uwb",          .desc = "Ultra-Wideband" },
+	{ .type = RFKILL_TYPE_UWB,       .name = "ultrawideband" }, /* alias */
+	{ .type = RFKILL_TYPE_WIMAX,     .name = "wimax",        .desc = "WiMAX" },
+	{ .type = RFKILL_TYPE_WWAN,      .name = "wwan",         .desc = "Wireless WAN" },
+	{ .type = RFKILL_TYPE_GPS,       .name = "gps",          .desc = "GPS" },
+	{ .type = RFKILL_TYPE_FM,        .name = "fm",           .desc = "FM" },
+	{ .type = RFKILL_TYPE_NFC,       .name = "nfc",          .desc = "NFC" },
+	{ .type = NUM_RFKILL_TYPES,      .name = NULL            }
+};
+
+struct rfkill_id {
+	union {
+		enum rfkill_type type;
+		uint32_t index;
+	};
+	enum {
+		RFKILL_IS_INVALID,
+		RFKILL_IS_TYPE,
+		RFKILL_IS_INDEX,
+		RFKILL_IS_ALL
+	} result;
+};
+
+/* supported actions */
+enum {
+	ACT_LIST,
+	ACT_HELP,
+	ACT_EVENT,
+	ACT_BLOCK,
+	ACT_UNBLOCK,
+
+	ACT_LIST_OLD
+};
+
+static char *rfkill_actions[] = {
+	[ACT_LIST]	= "list",
+	[ACT_HELP]	= "help",
+	[ACT_EVENT]	= "event",
+	[ACT_BLOCK]	= "block",
+	[ACT_UNBLOCK]	= "unblock"
+};
+
+/* column IDs */
+enum {
+	COL_DEVICE,
+	COL_ID,
+	COL_TYPE,
+	COL_DESC,
+	COL_SOFT,
+	COL_HARD
+};
+
+/* column names */
+struct colinfo {
+	const char *name;	/* header */
+	double whint;		/* width hint (N < 1 is in percent of termwidth) */
+	int flags;		/* SCOLS_FL_* */
+	const char *help;
+};
+
+/* columns descriptions */
+static const struct colinfo infos[] = {
+	[COL_DEVICE] = {"DEVICE", 0, 0, N_("kernel device name")},
+	[COL_ID]     = {"ID",	  2, SCOLS_FL_RIGHT, N_("device identifier value")},
+	[COL_TYPE]   = {"TYPE",	  0, 0, N_("device type name that can be used as identifier")},
+	[COL_DESC]   = {"TYPE-DESC",   0, 0, N_("device type description")},
+	[COL_SOFT]   = {"SOFT",	  0, SCOLS_FL_RIGHT, N_("status of software block")},
+	[COL_HARD]   = {"HARD",	  0, SCOLS_FL_RIGHT, N_("status of hardware block")}
+};
+
+static int columns[ARRAY_SIZE(infos) * 2];
+static size_t ncolumns;
+
+struct control {
+	struct libscols_table *tb;
+	unsigned int
+		json:1,
+		no_headings:1,
+		raw:1;
+};
+
+static int column_name_to_id(const char *name, size_t namesz)
+{
+	size_t i;
+
+	assert(name);
+
+	for (i = 0; i < ARRAY_SIZE(infos); i++) {
+		const char *cn = infos[i].name;
+
+		if (!strncasecmp(name, cn, namesz) && !*(cn + namesz))
+			return i;
+	}
+	warnx(_("unknown column: %s"), name);
+	return -1;
+}
+
+static int get_column_id(size_t num)
+{
+	assert(num < ncolumns);
+	assert(columns[num] < (int)ARRAY_SIZE(infos));
+	return columns[num];
+}
+
+static const struct colinfo *get_column_info(int num)
+{
+	return &infos[get_column_id(num)];
+}
+
+static int string_to_action(const char *str)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(rfkill_actions); i++)
+		if (strcmp(str, rfkill_actions[i]) == 0)
+			return i;
+
+	return -EINVAL;
+}
+
+static int rfkill_ro_open(int nonblock)
+{
+	int fd;
+
+	fd = open(_PATH_DEV_RFKILL, O_RDONLY);
+	if (fd < 0) {
+		warn(_("cannot open %s"), _PATH_DEV_RFKILL);
+		return -errno;
+	}
+
+	if (nonblock && fcntl(fd, F_SETFL, O_NONBLOCK) < 0) {
+		warn(_("cannot set non-blocking %s"), _PATH_DEV_RFKILL);
+		close(fd);
+		return -errno;
+	}
+
+	return fd;
+}
+
+/* returns: 0 success, 1 read again, < 0 error */
+static int rfkill_read_event(int fd, struct rfkill_event *event)
+{
+	ssize_t	len = read(fd, event, sizeof(*event));
+
+	if (len < 0) {
+		if (errno == EAGAIN)
+			return 1;
+		warn(_("cannot read %s"), _PATH_DEV_RFKILL);
+		return -errno;
+	}
+
+	if (len < RFKILL_EVENT_SIZE_V1) {
+		warnx(_("wrong size of rfkill event: %zu < %d"), len, RFKILL_EVENT_SIZE_V1);
+		return 1;
+	}
+
+	return 0;
+}
+
+
+static int rfkill_event(void)
+{
+	struct rfkill_event event;
+	struct timeval tv;
+	char date_buf[ISO_BUFSIZ];
+	struct pollfd p;
+	int fd, n;
+
+	fd = rfkill_ro_open(0);
+	if (fd < 0)
+		return -errno;
+
+	memset(&p, 0, sizeof(p));
+	p.fd = fd;
+	p.events = POLLIN | POLLHUP;
+
+	/* interrupted by signal only */
+	while (1) {
+		int rc = 1;	/* recover-able error */
+
+		n = poll(&p, 1, -1);
+		if (n < 0) {
+			warn(_("failed to poll %s"), _PATH_DEV_RFKILL);
+			goto failed;
+		}
+
+		if (n)
+			rc = rfkill_read_event(fd, &event);
+		if (rc < 0)
+			goto failed;
+		if (rc)
+			continue;
+
+		gettimeofday(&tv, NULL);
+		strtimeval_iso(&tv, ISO_TIMESTAMP_COMMA, date_buf,
+			       sizeof(date_buf));
+		printf("%s: idx %u type %u op %u soft %u hard %u\n",
+		       date_buf,
+		       event.idx, event.type, event.op, event.soft, event.hard);
+		fflush(stdout);
+	}
+
+failed:
+	close(fd);
+	return -1;
+}
+
+static const char *get_sys_attr(uint32_t idx, const char *attr)
+{
+	static char name[128];
+	char path[PATH_MAX];
+	FILE *f;
+	char *p;
+
+	snprintf(path, sizeof(path), _PATH_SYS_RFKILL "/rfkill%u/%s", idx, attr);
+	f = fopen(path, "r");
+	if (!f)
+		goto done;
+	if (!fgets(name, sizeof(name), f))
+		goto done;
+	p = strchr(name, '\n');
+	if (p)
+		*p = '\0';
+done:
+	if (f)
+		fclose(f);
+	return name;
+}
+
+static struct rfkill_id rfkill_id_to_type(const char *s)
+{
+	const struct rfkill_type_str *p;
+	struct rfkill_id ret;
+
+	if (islower(*s)) {
+		for (p = rfkill_type_strings; p->name != NULL; p++) {
+			if (!strcmp(s, p->name)) {
+				ret.type = p->type;
+				if (!strcmp(s, "all"))
+					ret.result = RFKILL_IS_ALL;
+				else
+					ret.result = RFKILL_IS_TYPE;
+				return ret;
+			}
+		}
+	} else if (isdigit(*s)) {
+		/* assume a numeric character implies an index. */
+		char filename[64];
+
+		ret.index = strtou32_or_err(s, _("invalid identifier"));
+		snprintf(filename, sizeof(filename) - 1,
+			 _PATH_SYS_RFKILL "/rfkill%" PRIu32 "/name", ret.index);
+		if (access(filename, F_OK) == 0)
+			ret.result = RFKILL_IS_INDEX;
+		else
+			ret.result = RFKILL_IS_INVALID;
+		return ret;
+	}
+
+	ret.result = RFKILL_IS_INVALID;
+	return ret;
+}
+
+static const char *rfkill_type_to_desc(enum rfkill_type type)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(rfkill_type_strings); i++) {
+		if (type == rfkill_type_strings[i].type)
+			return rfkill_type_strings[i].desc;
+	}
+
+	return NULL;
+}
+
+
+static int event_match(struct rfkill_event *event, struct rfkill_id *id)
+{
+	if (event->op != RFKILL_OP_ADD)
+		return 0;
+
+	/* filter out unwanted results */
+	switch (id->result) {
+	case RFKILL_IS_TYPE:
+		if (event->type != id->type)
+			return 0;
+		break;
+	case RFKILL_IS_INDEX:
+		if (event->idx != id->index)
+			return 0;
+		break;
+	case RFKILL_IS_ALL:
+		break;
+	default:
+		abort();
+	}
+
+	return 1;
+}
+
+static void fill_table_row(struct libscols_table *tb, struct rfkill_event *event)
+{
+	static struct libscols_line *ln;
+	size_t i;
+
+	assert(tb);
+
+	ln = scols_table_new_line(tb, NULL);
+	if (!ln) {
+		errno = ENOMEM;
+		errx(EXIT_FAILURE, _("failed to allocate output line"));
+	}
+
+	for (i = 0; i < (size_t)ncolumns; i++) {
+		char *str = NULL;
+		switch (get_column_id(i)) {
+		case COL_DEVICE:
+			str = xstrdup(get_sys_attr(event->idx, "name"));
+			break;
+		case COL_ID:
+			xasprintf(&str, "%" PRIu32, event->idx);
+			break;
+		case COL_TYPE:
+			str = xstrdup(get_sys_attr(event->idx, "type"));
+			break;
+		case COL_DESC:
+			str = xstrdup(rfkill_type_to_desc(event->type));
+			break;
+		case COL_SOFT:
+			str = xstrdup(event->soft ? _("blocked") : _("unblocked"));
+			break;
+		case COL_HARD:
+			str = xstrdup(event->hard ? _("blocked") : _("unblocked"));
+			break;
+		default:
+			abort();
+		}
+		if (str && scols_line_refer_data(ln, i, str))
+			errx(EXIT_FAILURE, _("failed to add output data"));
+	}
+}
+
+static int rfkill_list_old(const char *param)
+{
+	struct rfkill_id id = { .result = RFKILL_IS_ALL };
+	struct rfkill_event event;
+	int fd, rc = 0;
+
+	if (param) {
+		id = rfkill_id_to_type(param);
+		if (id.result == RFKILL_IS_INVALID) {
+			warnx(_("invalid identifier: %s"), param);
+			return -EINVAL;
+		}
+	}
+
+	fd = rfkill_ro_open(1);
+
+	while (1) {
+		rc = rfkill_read_event(fd, &event);
+		if (rc < 0)
+			break;
+		if (rc == 1 && errno == EAGAIN) {
+			rc = 0;		/* done */
+			break;
+		}
+		if (rc == 0 && event_match(&event, &id)) {
+			char *name = xstrdup(get_sys_attr(event.idx, "name")),
+			     *type = xstrdup(rfkill_type_to_desc(event.type));
+
+			if (!type)
+				type = xstrdup(get_sys_attr(event.idx, "type"));
+
+			printf("%u: %s: %s\n", event.idx, name, type);
+			printf("\tSoft blocked: %s\n", event.soft ? "yes" : "no");
+			printf("\tHard blocked: %s\n", event.hard ? "yes" : "no");
+
+			free(name);
+			free(type);
+		}
+	}
+	close(fd);
+	return rc;
+}
+
+static void rfkill_list_init(struct control *ctrl)
+{
+	size_t i;
+
+	scols_init_debug(0);
+
+	ctrl->tb = scols_new_table();
+	if (!ctrl->tb)
+		err(EXIT_FAILURE, _("failed to allocate output table"));
+
+	scols_table_enable_json(ctrl->tb, ctrl->json);
+	scols_table_enable_noheadings(ctrl->tb, ctrl->no_headings);
+	scols_table_enable_raw(ctrl->tb, ctrl->raw);
+
+	for (i = 0; i < (size_t) ncolumns; i++) {
+		const struct colinfo *col = get_column_info(i);
+		struct libscols_column *cl;
+
+		cl = scols_table_new_column(ctrl->tb, col->name, col->whint, col->flags);
+		if (!cl)
+			err(EXIT_FAILURE, _("failed to allocate output column"));
+		if (ctrl->json) {
+			int id = get_column_id(i);
+			if (id == COL_ID)
+				scols_column_set_json_type(cl, SCOLS_JSON_NUMBER);
+		}
+	}
+}
+
+static int rfkill_list_fill(struct control const *ctrl, const char *param)
+{
+	struct rfkill_id id = { .result = RFKILL_IS_ALL };
+	struct rfkill_event event;
+	int fd, rc = 0;
+
+	if (param) {
+		id = rfkill_id_to_type(param);
+		if (id.result == RFKILL_IS_INVALID) {
+			warnx(_("invalid identifier: %s"), param);
+			return -EINVAL;
+		}
+	}
+
+	fd = rfkill_ro_open(1);
+
+	while (1) {
+		rc = rfkill_read_event(fd, &event);
+		if (rc < 0)
+			break;
+		if (rc == 1 && errno == EAGAIN) {
+			rc = 0;		/* done */
+			break;
+		}
+		if (rc == 0 && event_match(&event, &id))
+			fill_table_row(ctrl->tb, &event);
+	}
+	close(fd);
+	return rc;
+}
+
+static void rfkill_list_output(struct control const *ctrl)
+{
+	scols_print_table(ctrl->tb);
+	scols_unref_table(ctrl->tb);
+}
+
+static int rfkill_block(uint8_t block, const char *param)
+{
+	struct rfkill_id id;
+	struct rfkill_event event = {
+		.op = RFKILL_OP_CHANGE_ALL,
+		.soft = block,
+		0
+	};
+	ssize_t len;
+	int fd;
+	char *message = NULL;
+
+	id = rfkill_id_to_type(param);
+
+	switch (id.result) {
+	case RFKILL_IS_INVALID:
+		warnx(_("invalid identifier: %s"), param);
+		return -1;
+	case RFKILL_IS_TYPE:
+		event.type = id.type;
+		xasprintf(&message, "type %s", param);
+		break;
+	case RFKILL_IS_INDEX:
+		event.op = RFKILL_OP_CHANGE;
+		event.idx = id.index;
+		xasprintf(&message, "id %d", id.index);
+		break;
+	case RFKILL_IS_ALL:
+		message = xstrdup("all");
+		break;
+	default:
+		abort();
+	}
+
+	fd = open(_PATH_DEV_RFKILL, O_RDWR);
+	if (fd < 0) {
+		warn(_("cannot open %s"), _PATH_DEV_RFKILL);
+		free(message);
+		return -errno;
+	}
+
+	len = write(fd, &event, sizeof(event));
+	if (len < 0)
+		warn(_("write failed: %s"), _PATH_DEV_RFKILL);
+	else {
+		openlog("rfkill", 0, LOG_USER);
+		syslog(LOG_NOTICE, "%s set for %s", block ? "block" : "unblock", message);
+		closelog();
+	}
+	free(message);
+	return close(fd);
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	size_t i;
+
+	fputs(USAGE_HEADER, stdout);
+	fprintf(stdout, _(" %s [options] command [identifier ...]\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, stdout);
+	fputs(_("Tool for enabling and disabling wireless devices.\n"), stdout);
+
+	fputs(USAGE_OPTIONS, stdout);
+	fputs(_(" -J, --json             use JSON output format\n"), stdout);
+	fputs(_(" -n, --noheadings       don't print headings\n"), stdout);
+	fputs(_(" -o, --output <list>    define which output columns to use\n"), stdout);
+	fputs(_("     --output-all       output all columns\n"), stdout);
+	fputs(_(" -r, --raw              use the raw output format\n"), stdout);
+
+	fputs(USAGE_SEPARATOR, stdout);
+	printf(USAGE_HELP_OPTIONS(24));
+
+	fputs(USAGE_COLUMNS, stdout);
+	for (i = 0; i < ARRAY_SIZE(infos); i++)
+		fprintf(stdout, " %-10s  %s\n", infos[i].name, _(infos[i].help));
+
+	fputs(USAGE_COMMANDS, stdout);
+
+	/*
+	 * TRANSLATORS: command names should not be translated, explaining
+	 * them as additional field after identifier is fine, for example
+	 *
+	 * list   [identifier]   (lista [tarkenne])
+	 */
+	fputs(_(" help\n"), stdout);
+	fputs(_(" event\n"), stdout);
+	fputs(_(" list   [identifier]\n"), stdout);
+	fputs(_(" block   identifier\n"), stdout);
+	fputs(_(" unblock identifier\n"), stdout);
+
+	fprintf(stdout, USAGE_MAN_TAIL("rfkill(8)"));
+	exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+	struct control ctrl = { 0 };
+	int c, act = ACT_LIST, list_all = 0;
+	char *outarg = NULL;
+	enum {
+		OPT_LIST_TYPES = CHAR_MAX + 1
+	};
+	static const struct option longopts[] = {
+		{ "json",	no_argument,	   NULL, 'J' },
+		{ "noheadings", no_argument,	   NULL, 'n' },
+		{ "output",	required_argument, NULL, 'o' },
+		{ "output-all",	no_argument,	   NULL, OPT_LIST_TYPES },
+		{ "raw",	no_argument,	   NULL, 'r' },
+		{ "version",	no_argument,	   NULL, 'V' },
+		{ "help",	no_argument,	   NULL, 'h' },
+		{ NULL, 0, NULL, 0 }
+	};
+	static const ul_excl_t excl[] = {
+		{'J', 'r'},
+		{0}
+	};
+	int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+	int ret = 0;
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	while ((c = getopt_long(argc, argv, "Jno:rVh", longopts, NULL)) != -1) {
+		err_exclusive_options(c, longopts, excl, excl_st);
+		switch (c) {
+		case 'J':
+			ctrl.json = 1;
+			break;
+		case 'n':
+			ctrl.no_headings = 1;
+			break;
+		case 'o':
+			outarg = optarg;
+			break;
+		case OPT_LIST_TYPES:
+			list_all = 1;
+			break;
+		case 'r':
+			ctrl.raw = 1;
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		case 'h':
+			usage();
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+	argc -= optind;
+	argv += optind;
+
+	if (argc > 0) {
+		act = string_to_action(*argv);
+		if (act < 0)
+			errtryhelp(EXIT_FAILURE);
+		argv++;
+		argc--;
+
+		/*
+		 * For backward compatibility we use old output format if
+		 * "list" explicitly specified and--output not defined.
+		 */
+		if (!outarg && act == ACT_LIST)
+			act = ACT_LIST_OLD;
+	}
+
+	switch (act) {
+	case ACT_LIST_OLD:
+		/* Deprecated in favour of ACT_LIST */
+		if (!argc)
+			ret |= rfkill_list_old(NULL);	/* ALL */
+		else while (argc) {
+			ret |= rfkill_list_old(*argv);
+			argc--;
+			argv++;
+		}
+		break;
+
+	case ACT_LIST:
+		columns[ncolumns++] = COL_ID;
+		columns[ncolumns++] = COL_TYPE;
+		columns[ncolumns++] = COL_DEVICE;
+		if (list_all)
+			columns[ncolumns++] = COL_DESC;
+		columns[ncolumns++] = COL_SOFT;
+		columns[ncolumns++] = COL_HARD;
+
+		if (outarg
+		    && string_add_to_idarray(outarg, columns,
+					     ARRAY_SIZE(columns), &ncolumns,
+					     column_name_to_id) < 0)
+			return EXIT_FAILURE;
+
+		rfkill_list_init(&ctrl);
+		if (!argc)
+			ret |= rfkill_list_fill(&ctrl, NULL);	/* ALL */
+		else while (argc) {
+			ret |= rfkill_list_fill(&ctrl, *argv);
+			argc--;
+			argv++;
+		}
+		rfkill_list_output(&ctrl);
+		break;
+
+	case ACT_EVENT:
+		ret = rfkill_event();
+		break;
+
+	case ACT_HELP:
+		usage();
+		break;
+
+	case ACT_BLOCK:
+		while (argc) {
+			ret |= rfkill_block(1, *argv);
+			argc--;
+			argv++;
+		}
+		break;
+
+	case ACT_UNBLOCK:
+		while (argc) {
+			ret |= rfkill_block(0, *argv);
+			argv++;
+			argc--;
+		}
+		break;
+	}
+
+	return ret ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/sys-utils/rtcwake.8 b/sys-utils/rtcwake.8
new file mode 100644
index 0000000..4a5f8d7
--- /dev/null
+++ b/sys-utils/rtcwake.8
@@ -0,0 +1,189 @@
+.\" Copyright (c) 2007, SUSE LINUX Products GmbH
+.\"                     Bernhard Walle <bwalle@suse.de>
+.\"
+.\" This program is free software; you can redistribute it and/or
+.\" modify it under the terms of the GNU General Public License
+.\" as published by the Free Software Foundation; either version 2
+.\" of the License, or (at your option) any later version.
+.\"
+.\" This program is distributed in the hope that it will be useful,
+.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
+.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+.\" GNU General Public License for more details.
+.\"
+.\" You should have received a copy of the GNU General Public License
+.\" along with this program; if not, write to the Free Software
+.\" Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+.\" 02110-1301, USA.
+.\"
+.TH RTCWAKE 8 "June 2015" "util-linux" "System Administration"
+.SH NAME
+rtcwake \- enter a system sleep state until specified wakeup time
+.SH SYNOPSIS
+.B rtcwake
+[options]
+.RB [ \-d
+.IR device ]
+.RB [ \-m
+.IR standby_mode ]
+.RB { "\-s \fIseconds\fP" | "\-t \fItime_t\fP" }
+.SH DESCRIPTION
+This program is used to enter a system sleep state and to automatically
+wake from it at a specified time.
+.PP
+This uses cross-platform Linux interfaces to enter a system sleep state, and
+leave it no later than a specified time.  It uses any RTC framework driver that
+supports standard driver model wakeup flags.
+.PP
+This is normally used like the old \fBapmsleep\fP utility, to wake from a suspend
+state like ACPI S1 (standby) or S3 (suspend-to-RAM).  Most platforms can
+implement those without analogues of BIOS, APM, or ACPI.
+.PP
+On some systems, this can also be used like \fBnvram-wakeup\fP, waking from states
+like ACPI S4 (suspend to disk).  Not all systems have persistent media that are
+appropriate for such suspend modes.
+.PP
+Note that alarm functionality depends on hardware; not every RTC is able to setup 
+an alarm up to 24 hours in the future.
+.PP
+The suspend setup maybe be interrupted by active hardware; for example wireless USB
+input devices that continue to send events for some fraction of a second after the
+return key is pressed.
+.B rtcwake
+tries to avoid this problem and it waits to terminal to settle down before
+entering a system sleep.
+
+.SH OPTIONS
+.TP
+.BR \-A , " \-\-adjfile " \fIfile
+Specify an alternative path to the adjust file.
+.TP
+.BR \-a , " \-\-auto"
+Read the clock mode (whether the hardware clock is set to UTC or local time)
+from the \fIadjtime\fP file, where
+.BR hwclock (8)
+stores that information.  This is the default.
+.TP
+.BR \-\-date " \fItimestamp"
+Set the wakeup time to the value of the timestamp.  Format of the
+timestamp can be any of the following:
+.TS
+tab(|);
+l2 l.
+YYYYMMDDhhmmss
+YYYY-MM-DD hh:mm:ss
+YYYY-MM-DD hh:mm|(seconds will be set to 00)
+YYYY-MM-DD|(time will be set to 00:00:00)
+hh:mm:ss|(date will be set to today)
+hh:mm|(date will be set to today, seconds to 00)
+tomorrow|(time is set to 00:00:00)
++5min
+.TE
+.TP
+.BR \-d , " \-\-device " \fIdevice
+Use the specified \fIdevice\fP instead of \fBrtc0\fP as realtime clock.
+This option is only relevant if your system has more than one RTC.
+You may specify \fBrtc1\fP, \fBrtc2\fP, ... here.
+.TP
+.BR \-l , " \-\-local"
+Assume that the hardware clock is set to local time, regardless of the
+contents of the \fIadjtime\fP file.
+.TP
+.B \-\-list\-modes
+List available \-\-mode option arguments.
+.TP
+.BR \-m , " \-\-mode " \fImode
+Go into the given standby state.  Valid values for \fImode\fP are:
+.RS
+.TP
+.B standby
+ACPI state S1.  This state offers minimal, though real, power savings, while
+providing a very low-latency transition back to a working system.  This is the
+default mode.
+.TP
+.B freeze
+The processes are frozen, all the devices are suspended and all the processors
+idled.  This state is a general state that does not need any platform-specific
+support, but it saves less power than Suspend-to-RAM, because the system is
+still in a running state.  (Available since Linux 3.9.)
+.TP
+.B mem
+ACPI state S3 (Suspend-to-RAM).  This state offers significant power savings as
+everything in the system is put into a low-power state, except for memory,
+which is placed in self-refresh mode to retain its contents.
+.TP
+.B disk
+ACPI state S4 (Suspend-to-disk).  This state offers the greatest power savings,
+and can be used even in the absence of low-level platform support for power
+management.  This state operates similarly to Suspend-to-RAM, but includes a
+final step of writing memory contents to disk.
+.TP
+.B off
+ACPI state S5 (Poweroff).  This is done by calling '/sbin/shutdown'.
+Not officially supported by ACPI, but it usually works.
+.TP
+.B no
+Don't suspend, only set the RTC wakeup time.
+.TP
+.B on
+Don't suspend, but read the RTC device until an alarm time appears.
+This mode is useful for debugging.
+.TP
+.B disable
+Disable a previously set alarm.
+.TP
+.B show
+Print alarm information in format: "alarm: off|on  <time>".
+The time is in ctime() output format, e.g. "alarm: on  Tue Nov 16 04:48:45 2010".
+.RE
+.TP
+.BR \-n , " \-\-dry-run"
+This option does everything apart from actually setting up the alarm,
+suspending the system, or waiting for the alarm.
+.TP
+.BR \-s , " \-\-seconds " \fIseconds
+Set the wakeup time to \fIseconds\fP in the future from now.
+.TP
+.BR \-t , " \-\-time " \fItime_t
+Set the wakeup time to the absolute time \fItime_t\fP.  \fItime_t\fP
+is the time in seconds since 1970-01-01, 00:00 UTC.  Use the
+.BR date (1)
+tool to convert between human-readable time and \fItime_t\fP.
+.TP
+.BR \-u , " \-\-utc"
+Assume that the hardware clock is set to UTC (Universal Time Coordinated),
+regardless of the contents of the \fIadjtime\fP file.
+.TP
+.BR \-v , " \-\-verbose"
+Be verbose.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH NOTES
+Some PC systems can't currently exit sleep states such as \fBmem\fP
+using only the kernel code accessed by this driver.
+They need help from userspace code to make the framebuffer work again.
+.SH FILES
+.I /etc/adjtime
+.SH HISTORY
+The program was posted several times on LKML and other lists
+before appearing in kernel commit message for Linux 2.6 in the GIT
+commit 87ac84f42a7a580d0dd72ae31d6a5eb4bfe04c6d.
+.SH AUTHORS
+The program was written by David Brownell <dbrownell@users.sourceforge.net>
+and improved by Bernhard Walle <bwalle@suse.de>.
+.SH COPYRIGHT
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+There is NO WARRANTY, to the extent permitted by law.
+.SH "SEE ALSO"
+.BR hwclock (8),
+.BR date (1)
+.SH AVAILABILITY
+The rtcwake command is part of the util-linux package and is available from the
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/rtcwake.8.in b/sys-utils/rtcwake.8.in
new file mode 100644
index 0000000..167f7f9
--- /dev/null
+++ b/sys-utils/rtcwake.8.in
@@ -0,0 +1,189 @@
+.\" Copyright (c) 2007, SUSE LINUX Products GmbH
+.\"                     Bernhard Walle <bwalle@suse.de>
+.\"
+.\" This program is free software; you can redistribute it and/or
+.\" modify it under the terms of the GNU General Public License
+.\" as published by the Free Software Foundation; either version 2
+.\" of the License, or (at your option) any later version.
+.\"
+.\" This program is distributed in the hope that it will be useful,
+.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
+.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+.\" GNU General Public License for more details.
+.\"
+.\" You should have received a copy of the GNU General Public License
+.\" along with this program; if not, write to the Free Software
+.\" Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+.\" 02110-1301, USA.
+.\"
+.TH RTCWAKE 8 "June 2015" "util-linux" "System Administration"
+.SH NAME
+rtcwake \- enter a system sleep state until specified wakeup time
+.SH SYNOPSIS
+.B rtcwake
+[options]
+.RB [ \-d
+.IR device ]
+.RB [ \-m
+.IR standby_mode ]
+.RB { "\-s \fIseconds\fP" | "\-t \fItime_t\fP" }
+.SH DESCRIPTION
+This program is used to enter a system sleep state and to automatically
+wake from it at a specified time.
+.PP
+This uses cross-platform Linux interfaces to enter a system sleep state, and
+leave it no later than a specified time.  It uses any RTC framework driver that
+supports standard driver model wakeup flags.
+.PP
+This is normally used like the old \fBapmsleep\fP utility, to wake from a suspend
+state like ACPI S1 (standby) or S3 (suspend-to-RAM).  Most platforms can
+implement those without analogues of BIOS, APM, or ACPI.
+.PP
+On some systems, this can also be used like \fBnvram-wakeup\fP, waking from states
+like ACPI S4 (suspend to disk).  Not all systems have persistent media that are
+appropriate for such suspend modes.
+.PP
+Note that alarm functionality depends on hardware; not every RTC is able to setup 
+an alarm up to 24 hours in the future.
+.PP
+The suspend setup maybe be interrupted by active hardware; for example wireless USB
+input devices that continue to send events for some fraction of a second after the
+return key is pressed.
+.B rtcwake
+tries to avoid this problem and it waits to terminal to settle down before
+entering a system sleep.
+
+.SH OPTIONS
+.TP
+.BR \-A , " \-\-adjfile " \fIfile
+Specify an alternative path to the adjust file.
+.TP
+.BR \-a , " \-\-auto"
+Read the clock mode (whether the hardware clock is set to UTC or local time)
+from the \fIadjtime\fP file, where
+.BR hwclock (8)
+stores that information.  This is the default.
+.TP
+.BR \-\-date " \fItimestamp"
+Set the wakeup time to the value of the timestamp.  Format of the
+timestamp can be any of the following:
+.TS
+tab(|);
+l2 l.
+YYYYMMDDhhmmss
+YYYY-MM-DD hh:mm:ss
+YYYY-MM-DD hh:mm|(seconds will be set to 00)
+YYYY-MM-DD|(time will be set to 00:00:00)
+hh:mm:ss|(date will be set to today)
+hh:mm|(date will be set to today, seconds to 00)
+tomorrow|(time is set to 00:00:00)
++5min
+.TE
+.TP
+.BR \-d , " \-\-device " \fIdevice
+Use the specified \fIdevice\fP instead of \fBrtc0\fP as realtime clock.
+This option is only relevant if your system has more than one RTC.
+You may specify \fBrtc1\fP, \fBrtc2\fP, ... here.
+.TP
+.BR \-l , " \-\-local"
+Assume that the hardware clock is set to local time, regardless of the
+contents of the \fIadjtime\fP file.
+.TP
+.B \-\-list\-modes
+List available \-\-mode option arguments.
+.TP
+.BR \-m , " \-\-mode " \fImode
+Go into the given standby state.  Valid values for \fImode\fP are:
+.RS
+.TP
+.B standby
+ACPI state S1.  This state offers minimal, though real, power savings, while
+providing a very low-latency transition back to a working system.  This is the
+default mode.
+.TP
+.B freeze
+The processes are frozen, all the devices are suspended and all the processors
+idled.  This state is a general state that does not need any platform-specific
+support, but it saves less power than Suspend-to-RAM, because the system is
+still in a running state.  (Available since Linux 3.9.)
+.TP
+.B mem
+ACPI state S3 (Suspend-to-RAM).  This state offers significant power savings as
+everything in the system is put into a low-power state, except for memory,
+which is placed in self-refresh mode to retain its contents.
+.TP
+.B disk
+ACPI state S4 (Suspend-to-disk).  This state offers the greatest power savings,
+and can be used even in the absence of low-level platform support for power
+management.  This state operates similarly to Suspend-to-RAM, but includes a
+final step of writing memory contents to disk.
+.TP
+.B off
+ACPI state S5 (Poweroff).  This is done by calling '/sbin/shutdown'.
+Not officially supported by ACPI, but it usually works.
+.TP
+.B no
+Don't suspend, only set the RTC wakeup time.
+.TP
+.B on
+Don't suspend, but read the RTC device until an alarm time appears.
+This mode is useful for debugging.
+.TP
+.B disable
+Disable a previously set alarm.
+.TP
+.B show
+Print alarm information in format: "alarm: off|on  <time>".
+The time is in ctime() output format, e.g. "alarm: on  Tue Nov 16 04:48:45 2010".
+.RE
+.TP
+.BR \-n , " \-\-dry-run"
+This option does everything apart from actually setting up the alarm,
+suspending the system, or waiting for the alarm.
+.TP
+.BR \-s , " \-\-seconds " \fIseconds
+Set the wakeup time to \fIseconds\fP in the future from now.
+.TP
+.BR \-t , " \-\-time " \fItime_t
+Set the wakeup time to the absolute time \fItime_t\fP.  \fItime_t\fP
+is the time in seconds since 1970-01-01, 00:00 UTC.  Use the
+.BR date (1)
+tool to convert between human-readable time and \fItime_t\fP.
+.TP
+.BR \-u , " \-\-utc"
+Assume that the hardware clock is set to UTC (Universal Time Coordinated),
+regardless of the contents of the \fIadjtime\fP file.
+.TP
+.BR \-v , " \-\-verbose"
+Be verbose.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH NOTES
+Some PC systems can't currently exit sleep states such as \fBmem\fP
+using only the kernel code accessed by this driver.
+They need help from userspace code to make the framebuffer work again.
+.SH FILES
+.I @ADJTIME_PATH@
+.SH HISTORY
+The program was posted several times on LKML and other lists
+before appearing in kernel commit message for Linux 2.6 in the GIT
+commit 87ac84f42a7a580d0dd72ae31d6a5eb4bfe04c6d.
+.SH AUTHORS
+The program was written by David Brownell <dbrownell@users.sourceforge.net>
+and improved by Bernhard Walle <bwalle@suse.de>.
+.SH COPYRIGHT
+This is free software.  You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+There is NO WARRANTY, to the extent permitted by law.
+.SH "SEE ALSO"
+.BR hwclock (8),
+.BR date (1)
+.SH AVAILABILITY
+The rtcwake command is part of the util-linux package and is available from the
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/rtcwake.c b/sys-utils/rtcwake.c
new file mode 100644
index 0000000..b63c646
--- /dev/null
+++ b/sys-utils/rtcwake.c
@@ -0,0 +1,655 @@
+/*
+ * rtcwake -- enter a system sleep state until specified wakeup time.
+ *
+ * This uses cross-platform Linux interfaces to enter a system sleep state,
+ * and leave it no later than a specified time.  It uses any RTC framework
+ * driver that supports standard driver model wakeup flags.
+ *
+ * This is normally used like the old "apmsleep" utility, to wake from a
+ * suspend state like ACPI S1 (standby) or S3 (suspend-to-RAM).  Most
+ * platforms can implement those without analogues of BIOS, APM, or ACPI.
+ *
+ * On some systems, this can also be used like "nvram-wakeup", waking
+ * from states like ACPI S4 (suspend to disk).  Not all systems have
+ * persistent media that are appropriate for such suspend modes.
+ *
+ * The best way to set the system's RTC is so that it holds the current
+ * time in UTC.  Use the "-l" flag to tell this program that the system
+ * RTC uses a local timezone instead (maybe you dual-boot MS-Windows).
+ * That flag should not be needed on systems with adjtime support.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/rtc.h>
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <termios.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "c.h"
+#include "closestream.h"
+#include "env.h"
+#include "nls.h"
+#include "optutils.h"
+#include "pathnames.h"
+#include "strutils.h"
+#include "strv.h"
+#include "timeutils.h"
+#include "xalloc.h"
+
+#ifndef RTC_AF
+# define RTC_AF		0x20	/* Alarm interrupt */
+#endif
+
+#define ADJTIME_ZONE_BUFSIZ		8
+#define SYS_WAKEUP_PATH_TEMPLATE	"/sys/class/rtc/%s/device/power/wakeup"
+#define SYS_POWER_STATE_PATH		"/sys/power/state"
+#define DEFAULT_RTC_DEVICE		"/dev/rtc0"
+
+enum rtc_modes {	/* manual page --mode option explains these. */
+	OFF_MODE = 0,
+	NO_MODE,
+	ON_MODE,
+	DISABLE_MODE,
+	SHOW_MODE,
+
+	SYSFS_MODE	/* keep it last */
+
+};
+
+static const char *rtcwake_mode_string[] = {
+	[OFF_MODE] = "off",
+	[NO_MODE] = "no",
+	[ON_MODE] = "on",
+	[DISABLE_MODE] = "disable",
+	[SHOW_MODE] = "show"
+};
+
+enum clock_modes {
+	CM_AUTO,
+	CM_UTC,
+	CM_LOCAL
+};
+
+struct rtcwake_control {
+	char *mode_str;			/* name of the requested mode */
+	char **possible_modes;		/* modes listed in /sys/power/state */
+	char *adjfile;			/* adjtime file path */
+	enum clock_modes clock_mode;	/* hwclock timezone */
+	time_t sys_time;		/* system time */
+	time_t rtc_time;		/* hardware time */
+	unsigned int verbose:1,		/* verbose messaging */
+		     dryrun:1;		/* do not set alarm, suspend system, etc */
+};
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	fputs(USAGE_HEADER, out);
+	fprintf(out,
+	      _(" %s [options]\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Enter a system sleep state until a specified wakeup time.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -a, --auto               reads the clock mode from adjust file (default)\n"), out);
+	fprintf(out,
+	      _(" -A, --adjfile <file>     specifies the path to the adjust file\n"
+		"                            the default is %s\n"), _PATH_ADJTIME);
+	fputs(_("     --date <timestamp>   date time of timestamp to wake\n"), out);
+	fputs(_(" -d, --device <device>    select rtc device (rtc0|rtc1|...)\n"), out);
+	fputs(_(" -n, --dry-run            does everything, but suspend\n"), out);
+	fputs(_(" -l, --local              RTC uses local timezone\n"), out);
+	fputs(_("     --list-modes         list available modes\n"), out);
+	fputs(_(" -m, --mode <mode>        standby|mem|... sleep mode\n"), out);
+	fputs(_(" -s, --seconds <seconds>  seconds to sleep\n"), out);
+	fputs(_(" -t, --time <time_t>      time to wake\n"), out);
+	fputs(_(" -u, --utc                RTC uses UTC\n"), out);
+	fputs(_(" -v, --verbose            verbose messages\n"), out);
+
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(26));
+	printf(USAGE_MAN_TAIL("rtcwake(8)"));
+	exit(EXIT_SUCCESS);
+}
+
+static int is_wakeup_enabled(const char *devname)
+{
+	char	buf[128], *s;
+	FILE	*f;
+	size_t	skip = 0;
+
+	if (startswith(devname, "/dev/"))
+		skip = 5;
+	snprintf(buf, sizeof buf, SYS_WAKEUP_PATH_TEMPLATE, devname + skip);
+	f = fopen(buf, "r");
+	if (!f) {
+		warn(_("cannot open %s"), buf);
+		return 0;
+	}
+
+	s = fgets(buf, sizeof buf, f);
+	fclose(f);
+	if (!s)
+		return 0;
+	s = strchr(buf, '\n');
+	if (!s)
+		return 0;
+	*s = 0;
+	/* wakeup events could be disabled or not supported */
+	return strcmp(buf, "enabled") == 0;
+}
+
+static int get_basetimes(struct rtcwake_control *ctl, int fd)
+{
+	struct tm tm = { 0 };
+	struct rtc_time	rtc;
+
+	/* This process works in RTC time, except when working
+	 * with the system clock (which always uses UTC).
+	 */
+	if (ctl->clock_mode == CM_UTC)
+		xsetenv("TZ", "UTC", 1);
+	tzset();
+	/* Read rtc and system clocks "at the same time", or as
+	 * precisely (+/- a second) as we can read them.
+	 */
+	if (ioctl(fd, RTC_RD_TIME, &rtc) < 0) {
+		warn(_("read rtc time failed"));
+		return -1;
+	}
+
+	ctl->sys_time = time(NULL);
+	if (ctl->sys_time == (time_t)-1) {
+		warn(_("read system time failed"));
+		return -1;
+	}
+	/* Convert rtc_time to normal arithmetic-friendly form,
+	 * updating tm.tm_wday as used by asctime().
+	 */
+	tm.tm_sec = rtc.tm_sec;
+	tm.tm_min = rtc.tm_min;
+	tm.tm_hour = rtc.tm_hour;
+	tm.tm_mday = rtc.tm_mday;
+	tm.tm_mon = rtc.tm_mon;
+	tm.tm_year = rtc.tm_year;
+	tm.tm_isdst = -1;  /* assume the system knows better than the RTC */
+
+	ctl->rtc_time = mktime(&tm);
+	if (ctl->rtc_time == (time_t)-1) {
+		warn(_("convert rtc time failed"));
+		return -1;
+	}
+
+	if (ctl->verbose) {
+		/* Unless the system uses UTC, either delta or tzone
+		 * reflects a seconds offset from UTC.  The value can
+		 * help sort out problems like bugs in your C library. */
+		printf("\tdelta   = %ld\n", ctl->sys_time - ctl->rtc_time);
+		printf("\ttzone   = %ld\n", timezone);
+		printf("\ttzname  = %s\n", tzname[daylight]);
+		gmtime_r(&ctl->rtc_time, &tm);
+		printf("\tsystime = %ld, (UTC) %s",
+				(long) ctl->sys_time, asctime(gmtime(&ctl->sys_time)));
+		printf("\trtctime = %ld, (UTC) %s",
+				(long) ctl->rtc_time, asctime(&tm));
+	}
+	return 0;
+}
+
+static int setup_alarm(struct rtcwake_control *ctl, int fd, time_t *wakeup)
+{
+	struct tm		*tm;
+	struct rtc_wkalrm	wake = { 0 };
+
+	/* The wakeup time is in POSIX time (more or less UTC).  Ideally
+	 * RTCs use that same time; but PCs can't do that if they need to
+	 * boot MS-Windows.  Messy...
+	 *
+	 * When clock_mode == CM_UTC this process's timezone is UTC, so
+	 * we'll pass a UTC date to the RTC.
+	 *
+	 * Else clock_mode == CM_LOCAL so the time given to the RTC will
+	 * instead use the local time zone. */
+	tm = localtime(wakeup);
+	wake.time.tm_sec = tm->tm_sec;
+	wake.time.tm_min = tm->tm_min;
+	wake.time.tm_hour = tm->tm_hour;
+	wake.time.tm_mday = tm->tm_mday;
+	wake.time.tm_mon = tm->tm_mon;
+	wake.time.tm_year = tm->tm_year;
+	/* wday, yday, and isdst fields are unused */
+	wake.time.tm_wday = -1;
+	wake.time.tm_yday = -1;
+	wake.time.tm_isdst = -1;
+	wake.enabled = 1;
+
+	if (!ctl->dryrun && ioctl(fd, RTC_WKALM_SET, &wake) < 0) {
+		warn(_("set rtc wake alarm failed"));
+		return -1;
+	}
+	return 0;
+}
+
+static char **get_sys_power_states(struct rtcwake_control *ctl)
+{
+	int fd = -1;
+
+	if (!ctl->possible_modes) {
+		char buf[256] = { 0 };
+
+		fd = open(SYS_POWER_STATE_PATH, O_RDONLY);
+		if (fd < 0)
+			goto nothing;
+		if (read(fd, &buf, sizeof(buf) - 1) <= 0)
+			goto nothing;
+		ctl->possible_modes = strv_split(buf, " \n");
+		close(fd);
+	}
+	return ctl->possible_modes;
+nothing:
+	if (fd >= 0)
+		close(fd);
+	return NULL;
+}
+
+static void wait_stdin(struct rtcwake_control *ctl)
+{
+	struct pollfd fd[] = {
+		{.fd = STDIN_FILENO, .events = POLLIN}
+	};
+	int tries = 0;
+
+	while (tries < 8 && poll(fd, 1, 10) == 1) {
+		if (ctl->verbose)
+			warnx(_("discarding stdin"));
+		xusleep(250000);
+		tcflush(STDIN_FILENO, TCIFLUSH);
+		tries++;
+	}
+}
+
+static void suspend_system(struct rtcwake_control *ctl)
+{
+	FILE	*f = fopen(SYS_POWER_STATE_PATH, "w");
+
+	if (!f) {
+		warn(_("cannot open %s"), SYS_POWER_STATE_PATH);
+		return;
+	}
+
+	if (!ctl->dryrun) {
+		if (isatty(STDIN_FILENO))
+			wait_stdin(ctl);
+		fprintf(f, "%s\n", ctl->mode_str);
+		fflush(f);
+	}
+	/* this executes after wake from suspend */
+	if (close_stream(f))
+		errx(EXIT_FAILURE, _("write error"));
+}
+
+static int read_clock_mode(struct rtcwake_control *ctl)
+{
+	FILE *fp;
+	char linebuf[ADJTIME_ZONE_BUFSIZ];
+
+	fp = fopen(ctl->adjfile, "r");
+	if (!fp)
+		return -1;
+	/* skip two lines */
+	if (skip_fline(fp) || skip_fline(fp)) {
+		fclose(fp);
+		return -1;
+	}
+	/* read third line */
+	if (!fgets(linebuf, sizeof linebuf, fp)) {
+		fclose(fp);
+		return -1;
+	}
+
+	if (strncmp(linebuf, "UTC", 3) == 0)
+		ctl->clock_mode = CM_UTC;
+	else if (strncmp(linebuf, "LOCAL", 5) == 0)
+		ctl->clock_mode = CM_LOCAL;
+	else if (ctl->verbose)
+		warnx(_("unexpected third line in: %s: %s"), ctl->adjfile, linebuf);
+
+	fclose(fp);
+	return 0;
+}
+
+static int print_alarm(struct rtcwake_control *ctl, int fd)
+{
+	struct rtc_wkalrm wake;
+	struct tm tm = { 0 };
+	time_t alarm;
+
+	if (ioctl(fd, RTC_WKALM_RD, &wake) < 0) {
+		warn(_("read rtc alarm failed"));
+		return -1;
+	}
+
+	if (wake.enabled != 1 || wake.time.tm_year == -1) {
+		printf(_("alarm: off\n"));
+		return 0;
+	}
+	tm.tm_sec = wake.time.tm_sec;
+	tm.tm_min = wake.time.tm_min;
+	tm.tm_hour = wake.time.tm_hour;
+	tm.tm_mday = wake.time.tm_mday;
+	tm.tm_mon = wake.time.tm_mon;
+	tm.tm_year = wake.time.tm_year;
+	tm.tm_isdst = -1;  /* assume the system knows better than the RTC */
+
+	alarm = mktime(&tm);
+	if (alarm == (time_t)-1) {
+		warn(_("convert time failed"));
+		return -1;
+	}
+	/* 0 if both UTC, or expresses diff if RTC in local time */
+	alarm += ctl->sys_time - ctl->rtc_time;
+	printf(_("alarm: on  %s"), ctime(&alarm));
+
+	return 0;
+}
+
+static int get_rtc_mode(struct rtcwake_control *ctl, const char *s)
+{
+	size_t i;
+	char **modes = get_sys_power_states(ctl), **m;
+
+	STRV_FOREACH(m, modes) {
+		if (strcmp(s, *m) == 0)
+			return SYSFS_MODE;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(rtcwake_mode_string); i++)
+		if (!strcmp(s, rtcwake_mode_string[i]))
+			return i;
+
+	return -EINVAL;
+}
+
+static int open_dev_rtc(const char *devname)
+{
+	int fd;
+	char *devpath = NULL;
+
+	if (startswith(devname, "/dev"))
+		devpath = xstrdup(devname);
+	else
+		xasprintf(&devpath, "/dev/%s", devname);
+	fd = open(devpath, O_RDONLY | O_CLOEXEC);
+	if (fd < 0)
+		err(EXIT_FAILURE, _("%s: unable to find device"), devpath);
+	free(devpath);
+	return fd;
+}
+
+static void list_modes(struct rtcwake_control *ctl)
+{
+	size_t i;
+	char **modes = get_sys_power_states(ctl), **m;
+
+	if (!modes)
+		errx(EXIT_FAILURE, _("could not read: %s"), SYS_POWER_STATE_PATH);
+
+	STRV_FOREACH(m, modes)
+		printf("%s ", *m);
+
+	for (i = 0; i < ARRAY_SIZE(rtcwake_mode_string); i++)
+		printf("%s ", rtcwake_mode_string[i]);
+	putchar('\n');
+}
+
+int main(int argc, char **argv)
+{
+	struct rtcwake_control ctl = {
+		.mode_str = "suspend",		/* default mode */
+		.adjfile = _PATH_ADJTIME,
+		.clock_mode = CM_AUTO
+	};
+	char *devname = DEFAULT_RTC_DEVICE;
+	unsigned seconds = 0;
+	int suspend = SYSFS_MODE;
+	int rc = EXIT_SUCCESS;
+	int t;
+	int fd;
+	time_t alarm = 0;
+	enum {
+		OPT_DATE = CHAR_MAX + 1,
+		OPT_LIST
+	};
+	static const struct option long_options[] = {
+		{ "adjfile",	required_argument,	NULL, 'A'      },
+		{ "auto",	no_argument,		NULL, 'a'      },
+		{ "dry-run",	no_argument,		NULL, 'n'      },
+		{ "local",	no_argument,		NULL, 'l'      },
+		{ "utc",	no_argument,		NULL, 'u'      },
+		{ "verbose",	no_argument,		NULL, 'v'      },
+		{ "version",	no_argument,		NULL, 'V'      },
+		{ "help",	no_argument,		NULL, 'h'      },
+		{ "mode",	required_argument,	NULL, 'm'      },
+		{ "device",	required_argument,	NULL, 'd'      },
+		{ "seconds",	required_argument,	NULL, 's'      },
+		{ "time",	required_argument,	NULL, 't'      },
+		{ "date",	required_argument,	NULL, OPT_DATE },
+		{ "list-modes",	no_argument,		NULL, OPT_LIST },
+		{ NULL, 0, NULL, 0 }
+	};
+	static const ul_excl_t excl[] = {
+		{ 'a', 'l', 'u' },
+		{ 's', 't', OPT_DATE },
+	};
+	int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	while ((t = getopt_long(argc, argv, "A:ahd:lm:ns:t:uVv",
+					long_options, NULL)) != EOF) {
+		err_exclusive_options(t, long_options, excl, excl_st);
+		switch (t) {
+		case 'A':
+			/* for better compatibility with hwclock */
+			ctl.adjfile = optarg;
+			break;
+		case 'a':
+			ctl.clock_mode = CM_AUTO;
+			break;
+		case 'd':
+			devname = optarg;
+			break;
+		case 'l':
+			ctl.clock_mode = CM_LOCAL;
+			break;
+
+		case OPT_LIST:
+			list_modes(&ctl);
+			return EXIT_SUCCESS;
+
+		case 'm':
+			if ((suspend = get_rtc_mode(&ctl, optarg)) < 0)
+				errx(EXIT_FAILURE, _("unrecognized suspend state '%s'"), optarg);
+			ctl.mode_str = optarg;
+			break;
+		case 'n':
+			ctl.dryrun = 1;
+			break;
+		case 's':
+			/* alarm time, seconds-to-sleep (relative) */
+			seconds = strtou32_or_err(optarg, _("invalid seconds argument"));
+			break;
+		case 't':
+			/* alarm time, time_t (absolute, seconds since epoch) */
+			alarm = strtou32_or_err(optarg, _("invalid time argument"));
+			break;
+		case OPT_DATE:
+		{	/* alarm time, see timestamp format from manual */
+			usec_t p;
+			if (parse_timestamp(optarg, &p) < 0)
+				errx(EXIT_FAILURE, _("invalid time value \"%s\""), optarg);
+			alarm = (time_t) (p / 1000000);
+			break;
+		}
+		case 'u':
+			ctl.clock_mode = CM_UTC;
+			break;
+		case 'v':
+			ctl.verbose = 1;
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			exit(EXIT_SUCCESS);
+		case 'h':
+			usage();
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if (ctl.clock_mode == CM_AUTO && read_clock_mode(&ctl) < 0) {
+		printf(_("%s: assuming RTC uses UTC ...\n"),  program_invocation_short_name);
+		ctl.clock_mode = CM_UTC;
+	}
+
+	if (ctl.verbose)
+		printf("%s",  ctl.clock_mode == CM_UTC ? _("Using UTC time.\n") :
+				_("Using local time.\n"));
+
+	if (!alarm && !seconds && suspend != DISABLE_MODE && suspend != SHOW_MODE)
+		errx(EXIT_FAILURE, _("must provide wake time (see --seconds, --time and --date options)"));
+
+	/* device must exist and (if we'll sleep) be wakeup-enabled */
+	fd = open_dev_rtc(devname);
+
+	if (suspend != ON_MODE && suspend != NO_MODE && !is_wakeup_enabled(devname))
+		errx(EXIT_FAILURE, _("%s not enabled for wakeup events"), devname);
+
+	/* relative or absolute alarm time, normalized to time_t */
+	if (get_basetimes(&ctl, fd) < 0)
+		exit(EXIT_FAILURE);
+
+	if (ctl.verbose)
+		printf(_("alarm %ld, sys_time %ld, rtc_time %ld, seconds %u\n"),
+				alarm, ctl.sys_time, ctl.rtc_time, seconds);
+
+	if (suspend != DISABLE_MODE && suspend != SHOW_MODE) {
+		/* perform alarm setup when the show or disable modes are not set */
+		if (alarm) {
+			if (alarm < ctl.sys_time)
+				errx(EXIT_FAILURE, _("time doesn't go backward to %s"),
+						ctime(&alarm));
+			alarm -= ctl.sys_time - ctl.rtc_time;
+		} else
+			alarm = ctl.rtc_time + seconds + 1;
+
+		if (setup_alarm(&ctl, fd, &alarm) < 0)
+			exit(EXIT_FAILURE);
+
+		if (suspend == NO_MODE || suspend == ON_MODE)
+			printf(_("%s: wakeup using %s at %s"),
+				program_invocation_short_name, devname,
+				ctime(&alarm));
+		else
+			printf(_("%s: wakeup from \"%s\" using %s at %s"),
+				program_invocation_short_name, ctl.mode_str, devname,
+				ctime(&alarm));
+		fflush(stdout);
+		xusleep(10 * 1000);
+	}
+
+	switch (suspend) {
+	case NO_MODE:
+		if (ctl.verbose)
+			printf(_("suspend mode: no; leaving\n"));
+		ctl.dryrun = 1;	/* to skip disabling alarm at the end */
+		break;
+	case OFF_MODE:
+	{
+		char *arg[5];
+		int i = 0;
+
+		if (ctl.verbose)
+			printf(_("suspend mode: off; executing %s\n"),
+						_PATH_SHUTDOWN);
+		arg[i++] = _PATH_SHUTDOWN;
+		arg[i++] = "-h";
+		arg[i++] = "-P";
+		arg[i++] = "now";
+		arg[i]   = NULL;
+		if (!ctl.dryrun) {
+			execv(arg[0], arg);
+			warn(_("failed to execute %s"), _PATH_SHUTDOWN);
+			rc = EXIT_FAILURE;
+		}
+		break;
+	}
+	case ON_MODE:
+	{
+		unsigned long data;
+
+		if (ctl.verbose)
+			printf(_("suspend mode: on; reading rtc\n"));
+		if (!ctl.dryrun) {
+			do {
+				t = read(fd, &data, sizeof data);
+				if (t < 0) {
+					warn(_("rtc read failed"));
+					break;
+				}
+				if (ctl.verbose)
+					printf("... %s: %03lx\n", devname, data);
+			} while (!(data & RTC_AF));
+		}
+		break;
+	}
+	case DISABLE_MODE:
+		/* just break, alarm gets disabled in the end */
+		if (ctl.verbose)
+			printf(_("suspend mode: disable; disabling alarm\n"));
+		break;
+	case SHOW_MODE:
+		if (ctl.verbose)
+			printf(_("suspend mode: show; printing alarm info\n"));
+		if (print_alarm(&ctl, fd))
+			rc = EXIT_FAILURE;
+		ctl.dryrun = 1;	/* don't really disable alarm in the end, just show */
+		break;
+	default:
+		if (ctl.verbose)
+			printf(_("suspend mode: %s; suspending system\n"), ctl.mode_str);
+		sync();
+		suspend_system(&ctl);
+	}
+
+	if (!ctl.dryrun) {
+		struct rtc_wkalrm wake;
+
+		if (ioctl(fd, RTC_WKALM_RD, &wake) < 0) {
+			warn(_("read rtc alarm failed"));
+			rc = EXIT_FAILURE;
+		} else {
+			wake.enabled = 0;
+			if (ioctl(fd, RTC_WKALM_SET, &wake) < 0) {
+				warn(_("disable rtc alarm interrupt failed"));
+				rc = EXIT_FAILURE;
+			}
+		}
+	}
+
+	close(fd);
+	return rc;
+}
diff --git a/sys-utils/setarch.8 b/sys-utils/setarch.8
new file mode 100644
index 0000000..efa3d50
--- /dev/null
+++ b/sys-utils/setarch.8
@@ -0,0 +1,143 @@
+.TH SETARCH 8 "December 2017" "util-linux" "System Administration"
+.SH NAME
+setarch \- change reported architecture in new program environment and/or set personality flags
+.SH SYNOPSIS
+.B setarch
+.RI [ arch ]
+[options]
+.RI [ program
+.RI [ argument ...]]
+.sp
+.B setarch
+.BR \-\-list | \-h | \-V
+.sp
+.B arch
+[options]
+.RI [ program
+.RI [ argument ...]]
+.SH DESCRIPTION
+.B setarch
+modifies execution domains and process personality flags.
+.PP
+The execution domains currently only affects the output of \fBuname -m\fR.
+For example, on an AMD64 system, running \fBsetarch i386 \fIprogram\fR
+will cause \fIprogram\fR to see i686 instead of x86_64 as the machine type.
+It also allows to set various personality options.
+The default \fIprogram\fR is \fB/bin/sh\fR.
+.PP
+Since version 2.33 the
+.I arch
+command line argument is optional and
+.B setarch
+may be used to change personality flags (ADDR_LIMIT_*, SHORT_INODE, etc) without
+modification of the execution domain.
+.SH OPTIONS
+.TP
+.B \-\-list
+List the architectures that \fBsetarch\fR knows about.  Whether \fBsetarch\fR
+can actually set each of these architectures depends on the running kernel.
+.TP
+.B \-\-uname\-2.6
+Causes the \fIprogram\fR to see a kernel version number beginning with 2.6.
+Turns on UNAME26.
+.TP
+.BR \-v , " \-\-verbose"
+Be verbose.
+.TP
+\fB\-3\fR, \fB\-\-3gb\fR
+Specifies
+.I program
+should use a maximum of 3GB of address space.  Supported on x86.  Turns on
+ADDR_LIMIT_3GB.
+.TP
+\fB\-\-4gb\fR
+This option has no effect.  It is retained for backward compatibility only,
+and may be removed in future releases.
+.TP
+\fB\-B\fR, \fB\-\-32bit\fR
+Limit the address space to 32 bits to emulate hardware.  Supported on ARM
+and Alpha.  Turns on ADDR_LIMIT_32BIT.
+.TP
+\fB\-F\fR, \fB\-\-fdpic\-funcptrs\fR
+Treat user-space function pointers to signal handlers as pointers to address
+descriptors.  This option has no effect on architectures that do not support
+FDPIC ELF binaries.  In kernel v4.14 support is limited to ARM, Blackfin,
+Fujitsu FR-V, and SuperH CPU architectures.
+.TP
+\fB\-I\fR, \fB\-\-short\-inode\fR
+Obsolete bug emulation flag.  Turns on SHORT_INODE.
+.TP
+\fB\-L\fR, \fB\-\-addr\-compat\-layout\fR
+Provide legacy virtual address space layout.  Use when the
+.I program
+binary does not have PT_GNU_STACK ELF header.  Turns on
+ADDR_COMPAT_LAYOUT.
+.TP
+\fB\-R\fR, \fB\-\-addr\-no\-randomize\fR
+Disables randomization of the virtual address space.  Turns on
+ADDR_NO_RANDOMIZE.
+.TP
+\fB\-S\fR, \fB\-\-whole\-seconds\fR
+Obsolete bug emulation flag.  Turns on WHOLE_SECONDS.
+.TP
+\fB\-T\fR, \fB\-\-sticky\-timeouts\fR
+This makes
+.BR select (2),
+.BR pselect (2),
+and
+.BR ppoll (2)
+system calls preserve the timeout value instead of modifying it to reflect
+the amount of time not slept when interrupted by a signal handler.  Use when
+.I program
+depends on this behavior.  For more details see the timeout description in
+.BR select (2)
+manual page.  Turns on STICKY_TIMEOUTS.
+.TP
+\fB\-X\fR, \fB\-\-read\-implies\-exec\fR
+If this is set then
+.BR mmap (3)
+PROT_READ will also add the PROT_EXEC bit - as expected by legacy x86
+binaries.  Notice that the ELF loader will automatically set this bit when
+it encounters a legacy binary.  Turns on READ_IMPLIES_EXEC.
+.TP
+\fB\-Z\fR, \fB\-\-mmap\-page\-zero\fR
+SVr4 bug emulation that will set
+.BR mmap (3)
+page zero as read-only.  Use when
+.I program
+depends on this behavior, and the source code is not available to be fixed.
+Turns on MMAP_PAGE_ZERO.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH EXAMPLES
+setarch --addr-no-randomize mytestprog
+.br
+setarch ppc32 rpmbuild --target=ppc --rebuild foo.src.rpm
+.br
+setarch ppc32 -v -vL3 rpmbuild --target=ppc --rebuild bar.src.rpm
+.br
+setarch ppc32 --32bit rpmbuild --target=ppc --rebuild foo.src.rpm
+.SH AUTHOR
+.MT sopwith@redhat.com
+Elliot Lee
+.ME
+.br
+.MT jnovy@redhat.com
+Jindrich Novy
+.ME
+.br
+.MT kzak@redhat.com
+Karel Zak
+.ME
+.SH "SEE ALSO"
+.BR personality (2),
+.BR select (2)
+.SH AVAILABILITY
+The setarch command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/setarch.c b/sys-utils/setarch.c
new file mode 100644
index 0000000..7c0a63f
--- /dev/null
+++ b/sys-utils/setarch.c
@@ -0,0 +1,446 @@
+/*
+ * Copyright (C) 2003-2007 Red Hat, Inc.
+ *
+ * This file is part of util-linux.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ *
+ * Written by Elliot Lee <sopwith@redhat.com>
+ * New personality options & code added by Jindrich Novy <jnovy@redhat.com>
+ * ADD_NO_RANDOMIZE flag added by Arjan van de Ven <arjanv@redhat.com>
+ * Help and MIPS support from Mike Frysinger (vapier@gentoo.org)
+ * Better error handling from Dmitry V. Levin (ldv@altlinux.org)
+ *
+ * based on ideas from the ppc32 util by Guy Streeter (2002-01), based on the
+ * sparc32 util by Jakub Jelinek (1998, 1999)
+ */
+
+#include <sys/personality.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <limits.h>
+#include <sys/utsname.h>
+#include "nls.h"
+#include "c.h"
+#include "closestream.h"
+
+#ifndef HAVE_PERSONALITY
+# include <syscall.h>
+# define personality(pers) ((long)syscall(SYS_personality, pers))
+#endif
+
+#define turn_on(_flag, _opts) \
+	do { \
+		(_opts) |= _flag; \
+		if (verbose) \
+			printf(_("Switching on %s.\n"), #_flag); \
+	} while(0)
+
+#ifndef UNAME26
+# define UNAME26                 0x0020000
+#endif
+#ifndef ADDR_NO_RANDOMIZE
+# define ADDR_NO_RANDOMIZE       0x0040000
+#endif
+#ifndef FDPIC_FUNCPTRS
+# define FDPIC_FUNCPTRS          0x0080000
+#endif
+#ifndef MMAP_PAGE_ZERO
+# define MMAP_PAGE_ZERO          0x0100000
+#endif
+#ifndef ADDR_COMPAT_LAYOUT
+# define ADDR_COMPAT_LAYOUT      0x0200000
+#endif
+#ifndef READ_IMPLIES_EXEC
+# define READ_IMPLIES_EXEC       0x0400000
+#endif
+#ifndef ADDR_LIMIT_32BIT
+# define ADDR_LIMIT_32BIT        0x0800000
+#endif
+#ifndef SHORT_INODE
+# define SHORT_INODE             0x1000000
+#endif
+#ifndef WHOLE_SECONDS
+# define WHOLE_SECONDS           0x2000000
+#endif
+#ifndef STICKY_TIMEOUTS
+# define STICKY_TIMEOUTS         0x4000000
+#endif
+#ifndef ADDR_LIMIT_3GB
+# define ADDR_LIMIT_3GB          0x8000000
+#endif
+
+
+struct arch_domain {
+	int		perval;		/* PER_* */
+	const char	*target_arch;
+	const char	*result_arch;
+};
+
+
+static void __attribute__((__noreturn__)) usage(int archwrapper)
+{
+	fputs(USAGE_HEADER, stdout);
+	if (!archwrapper)
+		printf(_(" %s [<arch>] [options] [<program> [<argument>...]]\n"), program_invocation_short_name);
+	else
+		printf(_(" %s [options] [<program> [<argument>...]]\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, stdout);
+	fputs(_("Change the reported architecture and set personality flags.\n"), stdout);
+
+	fputs(USAGE_OPTIONS, stdout);
+	fputs(_(" -B, --32bit              turns on ADDR_LIMIT_32BIT\n"), stdout);
+	fputs(_(" -F, --fdpic-funcptrs     makes function pointers point to descriptors\n"), stdout);
+	fputs(_(" -I, --short-inode        turns on SHORT_INODE\n"), stdout);
+	fputs(_(" -L, --addr-compat-layout changes the way virtual memory is allocated\n"), stdout);
+	fputs(_(" -R, --addr-no-randomize  disables randomization of the virtual address space\n"), stdout);
+	fputs(_(" -S, --whole-seconds      turns on WHOLE_SECONDS\n"), stdout);
+	fputs(_(" -T, --sticky-timeouts    turns on STICKY_TIMEOUTS\n"), stdout);
+	fputs(_(" -X, --read-implies-exec  turns on READ_IMPLIES_EXEC\n"), stdout);
+	fputs(_(" -Z, --mmap-page-zero     turns on MMAP_PAGE_ZERO\n"), stdout);
+	fputs(_(" -3, --3gb                limits the used address space to a maximum of 3 GB\n"), stdout);
+	fputs(_("     --4gb                ignored (for backward compatibility only)\n"), stdout);
+	fputs(_("     --uname-2.6          turns on UNAME26\n"), stdout);
+	fputs(_(" -v, --verbose            say what options are being switched on\n"), stdout);
+
+	if (!archwrapper)
+		fputs(_("     --list               list settable architectures, and exit\n"), stdout);
+
+	fputs(USAGE_SEPARATOR, stdout);
+	printf(USAGE_HELP_OPTIONS(26));
+	printf(USAGE_MAN_TAIL("setarch(8)"));
+
+	exit(EXIT_SUCCESS);
+}
+
+/*
+ * Returns inilialized list of all available execution domains.
+ */
+static struct arch_domain *init_arch_domains(void)
+{
+	struct utsname un;
+	size_t i;
+
+	static struct arch_domain transitions[] =
+	{
+		{UNAME26,	"uname26",	NULL},
+		{PER_LINUX32,	"linux32",	NULL},
+		{PER_LINUX,	"linux64",	NULL},
+#if defined(__powerpc__) || defined(__powerpc64__)
+# ifdef __BIG_ENDIAN__
+		{PER_LINUX32,	"ppc32",	"ppc"},
+		{PER_LINUX32,	"ppc",		"ppc"},
+		{PER_LINUX,	"ppc64",	"ppc64"},
+		{PER_LINUX,	"ppc64pseries",	"ppc64"},
+		{PER_LINUX,	"ppc64iseries",	"ppc64"},
+# else
+		{PER_LINUX32,	"ppc32",	"ppcle"},
+		{PER_LINUX32,	"ppc",		"ppcle"},
+		{PER_LINUX32,	"ppc32le",	"ppcle"},
+		{PER_LINUX32,	"ppcle",	"ppcle"},
+		{PER_LINUX,	"ppc64le",	"ppc64le"},
+# endif
+#endif
+#if defined(__x86_64__) || defined(__i386__) || defined(__ia64__)
+		{PER_LINUX32,	"i386",		"i386"},
+		{PER_LINUX32,	"i486",		"i386"},
+		{PER_LINUX32,	"i586",		"i386"},
+		{PER_LINUX32,	"i686",		"i386"},
+		{PER_LINUX32,	"athlon",	"i386"},
+#endif
+#if defined(__x86_64__) || defined(__i386__)
+		{PER_LINUX,	"x86_64",	"x86_64"},
+#endif
+#if defined(__ia64__) || defined(__i386__)
+		{PER_LINUX,	"ia64",		"ia64"},
+#endif
+#if defined(__hppa__)
+		{PER_LINUX32,	"parisc32",	"parisc"},
+		{PER_LINUX32,	"parisc",	"parisc"},
+		{PER_LINUX,	"parisc64",	"parisc64"},
+#endif
+#if defined(__s390x__) || defined(__s390__)
+		{PER_LINUX32,	"s390",		"s390"},
+		{PER_LINUX,	"s390x",	"s390x"},
+#endif
+#if defined(__sparc64__) || defined(__sparc__)
+		{PER_LINUX32,	"sparc",	"sparc"},
+		{PER_LINUX32,	"sparc32bash",	"sparc"},
+		{PER_LINUX32,	"sparc32",	"sparc"},
+		{PER_LINUX,	"sparc64",	"sparc64"},
+#endif
+#if defined(__mips64__) || defined(__mips__)
+		{PER_LINUX32,	"mips32",	"mips"},
+		{PER_LINUX32,	"mips",		"mips"},
+		{PER_LINUX,	"mips64",	"mips64"},
+#endif
+#if defined(__alpha__)
+		{PER_LINUX,	"alpha",	"alpha"},
+		{PER_LINUX,	"alphaev5",	"alpha"},
+		{PER_LINUX,	"alphaev56",	"alpha"},
+		{PER_LINUX,	"alphaev6",	"alpha"},
+		{PER_LINUX,	"alphaev67",	"alpha"},
+#endif
+		/* place holder, will be filled up at runtime */
+		{-1,		NULL,		NULL},
+		{-1,		NULL,		NULL}
+	};
+
+	/* Add the trivial transition {PER_LINUX, machine, machine} if no
+	 * such target_arch is hardcoded yet.  */
+	uname(&un);
+	for (i = 0; transitions[i].perval >= 0; i++)
+		if (!strcmp(un.machine, transitions[i].target_arch))
+			break;
+	if (transitions[i].perval < 0) {
+		unsigned long wrdsz = CHAR_BIT * sizeof(void *);
+		if (wrdsz == 32 || wrdsz == 64) {
+			/* fill up the place holder */
+			transitions[i].perval = wrdsz == 32 ? PER_LINUX32 : PER_LINUX;
+			transitions[i].target_arch = un.machine;
+			transitions[i].result_arch = un.machine;
+		}
+	}
+
+	return transitions;
+}
+
+/*
+ * List all execution domains from transitions
+ */
+static void list_arch_domains(struct arch_domain *doms)
+{
+	struct arch_domain *d;
+
+	for (d = doms; d->target_arch != NULL; d++)
+		printf("%s\n", d->target_arch);
+}
+
+static struct arch_domain *get_arch_domain(struct arch_domain *doms, const char *pers)
+{
+	struct arch_domain *d;
+
+	for (d = doms; d->perval >= 0; d++) {
+		if (!strcmp(pers, d->target_arch))
+			break;
+	}
+
+	return !d || d->perval < 0 ? NULL : d;
+}
+
+static void verify_arch_domain(struct arch_domain *dom, const char *wanted)
+{
+	struct utsname un;
+
+	if (!dom || !dom->result_arch)
+		return;
+
+	uname(&un);
+	if (strcmp(un.machine, dom->result_arch)) {
+		if (strcmp(dom->result_arch, "i386")
+		    || (strcmp(un.machine, "i486")
+			&& strcmp(un.machine, "i586")
+			&& strcmp(un.machine, "i686")
+			&& strcmp(un.machine, "athlon")))
+			errx(EXIT_FAILURE, _("Kernel cannot set architecture to %s"), wanted);
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	const char *arch = NULL;
+	unsigned long options = 0;
+	int verbose = 0;
+	int archwrapper;
+	int c;
+	struct arch_domain *doms, *target;
+	unsigned long pers_value = 0;
+	char *shell = NULL, *shell_arg = NULL;
+
+	/* Options without equivalent short options */
+	enum {
+		OPT_4GB = CHAR_MAX + 1,
+		OPT_UNAME26,
+		OPT_LIST
+	};
+
+	/* Options --3gb and --4gb are for compatibility with an old
+	 * Debian setarch implementation.  */
+	static const struct option longopts[] = {
+		{"help",		no_argument,	NULL,	'h'},
+		{"version",		no_argument,	NULL,	'V'},
+		{"verbose",		no_argument,	NULL,	'v'},
+		{"addr-no-randomize",	no_argument,	NULL,	'R'},
+		{"fdpic-funcptrs",	no_argument,	NULL,	'F'},
+		{"mmap-page-zero",	no_argument,	NULL,	'Z'},
+		{"addr-compat-layout",	no_argument,	NULL,	'L'},
+		{"read-implies-exec",	no_argument,	NULL,	'X'},
+		{"32bit",		no_argument,	NULL,	'B'},
+		{"short-inode",		no_argument,	NULL,	'I'},
+		{"whole-seconds",	no_argument,	NULL,	'S'},
+		{"sticky-timeouts",	no_argument,	NULL,	'T'},
+		{"3gb",			no_argument,	NULL,	'3'},
+		{"4gb",			no_argument,	NULL,	OPT_4GB},
+		{"uname-2.6",		no_argument,	NULL,	OPT_UNAME26},
+		{"list",		no_argument,	NULL,	OPT_LIST},
+		{NULL,			0,		NULL,	0}
+	};
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	if (argc < 1) {
+		warnx(_("Not enough arguments"));
+		errtryhelp(EXIT_FAILURE);
+	}
+	archwrapper = strcmp(program_invocation_short_name, "setarch") != 0;
+	if (archwrapper) {
+		arch = program_invocation_short_name;	/* symlinks to setarch */
+
+		/* Don't use ifdef sparc here, we get "Unrecognized architecture"
+		 * error message later if necessary */
+		if (strcmp(arch, "sparc32bash") == 0) {
+			shell = "/bin/bash";
+			shell_arg = "";
+			goto set_arch;
+		}
+	} else {
+		if (1 < argc && *argv[1] != '-') {
+			arch = argv[1];
+			argv[1] = argv[0];	/* for getopt_long() to get the program name */
+			argv++;
+			argc--;
+		}
+	}
+
+	while ((c = getopt_long(argc, argv, "+hVv3BFILRSTXZ", longopts, NULL)) != -1) {
+		switch (c) {
+		case 'h':
+			usage(archwrapper);
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		case 'v':
+			verbose = 1;
+			break;
+		case 'R':
+			turn_on(ADDR_NO_RANDOMIZE, options);
+			break;
+		case 'F':
+			turn_on(FDPIC_FUNCPTRS, options);
+			break;
+		case 'Z':
+			turn_on(MMAP_PAGE_ZERO, options);
+			break;
+		case 'L':
+			turn_on(ADDR_COMPAT_LAYOUT, options);
+			break;
+		case 'X':
+			turn_on(READ_IMPLIES_EXEC, options);
+			break;
+		case 'B':
+			turn_on(ADDR_LIMIT_32BIT, options);
+			break;
+		case 'I':
+			turn_on(SHORT_INODE, options);
+			break;
+		case 'S':
+			turn_on(WHOLE_SECONDS, options);
+			break;
+		case 'T':
+			turn_on(STICKY_TIMEOUTS, options);
+			break;
+		case '3':
+			turn_on(ADDR_LIMIT_3GB, options);
+			break;
+		case OPT_4GB:	/* just ignore this one */
+			break;
+		case OPT_UNAME26:
+			turn_on(UNAME26, options);
+			break;
+		case OPT_LIST:
+			if (!archwrapper) {
+				list_arch_domains(init_arch_domains());
+				return EXIT_SUCCESS;
+			} else
+				warnx(_("unrecognized option '--list'"));
+			/* fallthrough */
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if (!arch && !options)
+		errx(EXIT_FAILURE, _("no architecture argument or personality flags specified"));
+
+	argc -= optind;
+	argv += optind;
+
+set_arch:
+	/* get execution domain (architecture) */
+	if (arch) {
+		doms = init_arch_domains();
+		target = get_arch_domain(doms, arch);
+
+		if (!target)
+			errx(EXIT_FAILURE, _("%s: Unrecognized architecture"), arch);
+		pers_value = target->perval;
+	}
+
+	/* add personality flags */
+	pers_value |= options;
+
+	/* call kernel */
+	if (personality(pers_value) < 0) {
+		/*
+		 * Depending on architecture and kernel version, personality
+		 * syscall is either capable or incapable of returning an error.
+		 * If the return value is not an error, then it's the previous
+		 * personality value, which can be an arbitrary value
+		 * undistinguishable from an error value.
+		 * To make things clear, a second call is needed.
+		 */
+		if (personality(pers_value) < 0)
+			err(EXIT_FAILURE, _("failed to set personality to %s"), arch);
+	}
+
+	/* make sure architecture is set as expected */
+	if (arch)
+		verify_arch_domain(target, arch);
+
+	if (!argc) {
+		shell = "/bin/sh";
+		shell_arg = "-sh";
+	}
+	if (verbose) {
+		printf(_("Execute command `%s'.\n"), shell ? shell : argv[0]);
+		/* flush all output streams before exec */
+		fflush(NULL);
+	}
+
+	/* Execute shell */
+	if (shell) {
+		execl(shell, shell_arg, NULL);
+		errexec(shell);
+	}
+
+	/* Execute on command line specified command */
+	execvp(argv[0], argv);
+	errexec(argv[0]);
+}
diff --git a/sys-utils/setpriv.1 b/sys-utils/setpriv.1
new file mode 100644
index 0000000..9ff9058
--- /dev/null
+++ b/sys-utils/setpriv.1
@@ -0,0 +1,222 @@
+.TH SETPRIV 1 "July 2014" "util-linux" "User Commands"
+.SH NAME
+setpriv \- run a program with different Linux privilege settings
+.SH SYNOPSIS
+.B setpriv
+[options]
+.I program
+.RI [ arguments ]
+.SH DESCRIPTION
+Sets or queries various Linux privilege settings that are inherited across
+.BR execve (2).
+.PP
+In comparison to
+.BR su (1)
+and
+.BR runuser (1),
+.BR setpriv (1)
+neither uses PAM, nor does it prompt for a password.
+It is a simple, non-set-user-ID wrapper around
+.BR execve (2),
+and can be used to drop privileges in the same way as
+.BR setuidgid (8)
+from
+.BR daemontools ,
+.BR chpst (8)
+from
+.BR runit ,
+or similar tools shipped by other service managers.
+.SH OPTION
+.TP
+.B \-\-clear\-groups
+Clear supplementary groups.
+.TP
+.BR \-d , " \-\-dump"
+Dump current privilege state.  Can be specified more than once to show extra,
+mostly useless, information.  Incompatible with all other options.
+.TP
+.B \-\-groups \fIgroup\fR...
+Set supplementary groups.  The argument is a comma-separated list of GIDs or names.
+.TP
+.BR \-\-inh\-caps " (" + | \- ) \fIcap "...  or  " \-\-ambient-caps " (" + | \- ) \fIcap "...  or  " \-\-bounding\-set " (" + | \- ) \fIcap ...
+Set the inheritable capabilities, ambient capabilities or the capability bounding set.  See
+.BR capabilities (7).
+The argument is a comma-separated list of
+.BI + cap
+and
+.BI \- cap
+entries, which add or remove an entry respectively. \fIcap\fR can either be a
+human-readable name as seen in
+.BR capabilities (7)
+without the \fIcap_\fR prefix or of the format
+.BI cap_N ,
+where \fIN\fR is the internal capability index used by Linux.
+.B +all
+and
+.B \-all
+can be used to add or remove all caps.  The set of capabilities starts out as
+the current inheritable set for
+.BR \-\-inh\-caps ,
+the current ambient set for
+.B \-\-ambient\-caps
+and the current bounding set for
+.BR \-\-bounding\-set .
+If you drop something from the bounding set without also dropping it from the
+inheritable set, you are likely to become confused.  Do not do that.
+.TP
+.B \-\-keep\-groups
+Preserve supplementary groups.  Only useful in conjunction with
+.BR \-\-rgid ,
+.BR \-\-egid ", or"
+.BR \-\-regid .
+.TP
+.B \-\-init\-groups
+Initialize supplementary groups using
+.BR initgroups "(3)."
+Only useful in conjunction with
+.B \-\-ruid
+or
+.BR \-\-reuid .
+.TP
+.B \-\-list\-caps
+List all known capabilities.  This option must be specified alone.
+.TP
+.B \-\-no\-new\-privs
+Set the
+.I no_new_privs
+bit.  With this bit set,
+.BR execve (2)
+will not grant new privileges.
+For example, the set-user-ID and set-group-ID bits as well
+as file capabilities will be disabled.  (Executing binaries with these bits set
+will still work, but they will not gain privileges.  Certain LSMs, especially
+AppArmor, may result in failures to execute certain programs.)  This bit is
+inherited by child processes and cannot be unset.  See
+.BR prctl (2)
+and
+.I Documentation/\:prctl/\:no_\:new_\:privs.txt
+in the Linux kernel source.
+.sp
+The no_new_privs bit is supported since Linux 3.5.
+.TP
+.BI \-\-rgid " gid\fR, " \-\-egid " gid\fR, " \-\-regid " gid"
+Set the real, effective, or both GIDs.  The \fIgid\fR argument can be
+given as textual group name.
+.sp
+For safety, you must specify one of
+.BR \-\-clear\-groups ,
+.BR \-\-groups ,
+.BR \-\-keep\-groups ", or"
+.B \-\-init\-groups
+if you set any primary
+.IR gid .
+.TP
+.BI \-\-ruid " uid\fR, " \-\-euid " uid\fR, " \-\-reuid " uid"
+Set the real, effective, or both UIDs.  The \fIuid\fR argument can be
+given as textual login name.
+.sp
+Setting a
+.I uid
+or
+.I gid
+does not change capabilities, although the exec call at the end might change
+capabilities.  This means that, if you are root, you probably want to do
+something like:
+.sp
+.B "        setpriv \-\-reuid=1000 \-\-regid=1000 \-\-inh\-caps=\-all"
+.TP
+.BR \-\-securebits " (" + | \- ) \fIsecurebit ...
+Set or clear securebits.  The argument is a comma-separated list.
+The valid securebits are
+.IR noroot ,
+.IR noroot_locked ,
+.IR no_setuid_fixup ,
+.IR no_setuid_fixup_locked ,
+and
+.IR keep_caps_locked .
+.I keep_caps
+is cleared by
+.BR execve (2)
+and is therefore not allowed.
+.TP
+.BR "\-\-pdeathsig keep" | clear | <signal>
+Keep, clear or set the parent death signal.  Some LSMs, most notably SELinux and
+AppArmor, clear the signal when the process' credentials change.  Using
+\fB--pdeathsig keep\fR will restore the parent death signal after changing
+credentials to remedy that situation.
+.TP
+.BI \-\-selinux\-label " label"
+Request a particular SELinux transition (using a transition on exec, not
+dyntrans).  This will fail and cause
+.BR setpriv (1)
+to abort if SELinux is not in use, and the transition may be ignored or cause
+.BR execve (2)
+to fail at SELinux's whim.  (In particular, this is unlikely to work in
+conjunction with
+.IR no_new_privs .)
+This is similar to
+.BR runcon (1).
+.TP
+.BI \-\-apparmor\-profile " profile"
+Request a particular AppArmor profile (using a transition on exec).  This will
+fail and cause
+.BR setpriv (1)
+to abort if AppArmor is not in use, and the transition may be ignored or cause
+.BR execve (2)
+to fail at AppArmor's whim.
+.TP
+.BI \-\-reset\-env
+Clears all the environment variables except TERM; initializes the environment variables HOME, SHELL, USER, LOGNAME
+according to the user's passwd entry; sets PATH to \fI/usr/local/bin:/bin:/usr/bin\fR for a regual user and to
+\fI/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin\fR for root.
+.sp
+The environment variable PATH may be different on systems where /bin and /sbin
+are merged into /usr.  The environment variable SHELL defaults to \fI/bin/sh\fR if none is given in the user's
+passwd entry.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH NOTES
+If applying any specified option fails,
+.I program
+will not be run and
+.B setpriv
+will return with exit code 127.
+.PP
+Be careful with this tool \-\- it may have unexpected security consequences.
+For example, setting no_new_privs and then execing a program that is
+SELinux\-confined (as this tool would do) may prevent the SELinux
+restrictions from taking effect.
+.SH EXAMPLE
+If you're looking for behaviour similar to
+.BR su (1)/ runuser "(1), or " sudo (8)
+(without the
+.B -g
+option), try something like:
+.sp
+.B "    setpriv \-\-reuid=1000 \-\-regid=1000 \-\-init\-groups"
+.PP
+If you want to mimic daemontools'
+.BR setuid (8),
+try:
+.sp
+.B "    setpriv \-\-reuid=1000 \-\-regid=1000 \-\-clear\-groups"
+.SH SEE ALSO
+.BR runuser (1),
+.BR su (1),
+.BR prctl (2),
+.BR capabilities (7)
+.SH AUTHOR
+.MT luto@amacapital.net
+Andy Lutomirski
+.ME
+.SH AVAILABILITY
+The
+.B setpriv
+command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/setpriv.c b/sys-utils/setpriv.c
new file mode 100644
index 0000000..828ddc1
--- /dev/null
+++ b/sys-utils/setpriv.c
@@ -0,0 +1,1096 @@
+/*
+ * setpriv(1) - set various kernel privilege bits and run something
+ *
+ * Copyright (C) 2012 Andy Lutomirski <luto@amacapital.net>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <cap-ng.h>
+#include <errno.h>
+#include <getopt.h>
+#include <grp.h>
+#include <linux/securebits.h>
+#include <pwd.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "c.h"
+#include "closestream.h"
+#include "nls.h"
+#include "optutils.h"
+#include "strutils.h"
+#include "xalloc.h"
+#include "pathnames.h"
+#include "signames.h"
+#include "env.h"
+
+#ifndef PR_SET_NO_NEW_PRIVS
+# define PR_SET_NO_NEW_PRIVS 38
+#endif
+#ifndef PR_GET_NO_NEW_PRIVS
+# define PR_GET_NO_NEW_PRIVS 39
+#endif
+
+#ifndef PR_CAP_AMBIENT
+# define PR_CAP_AMBIENT		47
+#  define PR_CAP_AMBIENT_IS_SET	1
+#  define PR_CAP_AMBIENT_RAISE	2
+#  define PR_CAP_AMBIENT_LOWER	3
+#endif
+
+#define SETPRIV_EXIT_PRIVERR 127	/* how we exit when we fail to set privs */
+
+/* The shell to set SHELL env.variable if none is given in the user's passwd entry.  */
+#define DEFAULT_SHELL "/bin/sh"
+
+static gid_t get_group(const char *s, const char *err);
+
+enum cap_type {
+	CAP_TYPE_EFFECTIVE   = CAPNG_EFFECTIVE,
+	CAP_TYPE_PERMITTED   = CAPNG_PERMITTED,
+	CAP_TYPE_INHERITABLE = CAPNG_INHERITABLE,
+	CAP_TYPE_BOUNDING    = CAPNG_BOUNDING_SET,
+	CAP_TYPE_AMBIENT     = (1 << 4)
+};
+
+/*
+ * Note: We are subject to https://bugzilla.redhat.com/show_bug.cgi?id=895105
+ * and we will therefore have problems if new capabilities are added.  Once
+ * that bug is fixed, I'll (Andy Lutomirski) submit a corresponding fix to
+ * setpriv.  In the mean time, the code here tries to work reasonably well.
+ */
+
+struct privctx {
+	unsigned int
+		nnp:1,			/* no_new_privs */
+		have_ruid:1,		/* real uid */
+		have_euid:1,		/* effective uid */
+		have_rgid:1,		/* real gid */
+		have_egid:1,		/* effective gid */
+		have_passwd:1,		/* passwd entry */
+		have_groups:1,		/* add groups */
+		keep_groups:1,		/* keep groups */
+		clear_groups:1,		/* remove groups */
+		init_groups:1,		/* initialize groups */
+		reset_env:1,		/* reset environment */
+		have_securebits:1;	/* remove groups */
+
+	/* uids and gids */
+	uid_t ruid, euid;
+	gid_t rgid, egid;
+
+	/* real user passwd entry */
+	struct passwd passwd;
+
+	/* supplementary groups */
+	size_t num_groups;
+	gid_t *groups;
+
+	/* caps */
+	const char *caps_to_inherit;
+	const char *ambient_caps;
+	const char *bounding_set;
+
+	/* securebits */
+	int securebits;
+	/* parent death signal (<0 clear, 0 nothing, >0 signal) */
+	int pdeathsig;
+
+	/* LSMs */
+	const char *selinux_label;
+	const char *apparmor_profile;
+};
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	fputs(USAGE_HEADER, out);
+	fprintf(out, _(" %s [options] <program> [<argument>...]\n"),
+		program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Run a program with different privilege settings.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -d, --dump                  show current state (and do not exec)\n"), out);
+	fputs(_(" --nnp, --no-new-privs       disallow granting new privileges\n"), out);
+	fputs(_(" --ambient-caps <caps,...>   set ambient capabilities\n"), out);
+	fputs(_(" --inh-caps <caps,...>       set inheritable capabilities\n"), out);
+	fputs(_(" --bounding-set <caps>       set capability bounding set\n"), out);
+	fputs(_(" --ruid <uid|user>           set real uid\n"), out);
+	fputs(_(" --euid <uid|user>           set effective uid\n"), out);
+	fputs(_(" --rgid <gid|user>           set real gid\n"), out);
+	fputs(_(" --egid <gid|group>          set effective gid\n"), out);
+	fputs(_(" --reuid <uid|user>          set real and effective uid\n"), out);
+	fputs(_(" --regid <gid|group>         set real and effective gid\n"), out);
+	fputs(_(" --clear-groups              clear supplementary groups\n"), out);
+	fputs(_(" --keep-groups               keep supplementary groups\n"), out);
+	fputs(_(" --init-groups               initialize supplementary groups\n"), out);
+	fputs(_(" --groups <group,...>        set supplementary groups by UID or name\n"), out);
+	fputs(_(" --securebits <bits>         set securebits\n"), out);
+	fputs(_(" --pdeathsig keep|clear|<signame>\n"
+	        "                             set or clear parent death signal\n"), out);
+	fputs(_(" --selinux-label <label>     set SELinux label\n"), out);
+	fputs(_(" --apparmor-profile <pr>     set AppArmor profile\n"), out);
+	fputs(_(" --reset-env                 clear all environment and initialize\n"
+		"                               HOME, SHELL, USER, LOGNAME and PATH\n"), out);
+
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(29));
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_(" This tool can be dangerous.  Read the manpage, and be careful.\n"), out);
+	printf(USAGE_MAN_TAIL("setpriv(1)"));
+
+	exit(EXIT_SUCCESS);
+}
+
+static int real_cap_last_cap(void)
+{
+	/* CAP_LAST_CAP is untrustworthy. */
+	static int ret = -1;
+	int matched;
+	FILE *f;
+
+	if (ret != -1)
+		return ret;
+
+	f = fopen(_PATH_PROC_CAPLASTCAP, "r");
+	if (!f) {
+		ret = CAP_LAST_CAP;	/* guess */
+		return ret;
+	}
+
+	matched = fscanf(f, "%d", &ret);
+	fclose(f);
+
+	if (matched != 1)
+		ret = CAP_LAST_CAP;	/* guess */
+
+	return ret;
+}
+
+static int has_cap(enum cap_type which, unsigned int i)
+{
+	switch (which) {
+	case CAP_TYPE_EFFECTIVE:
+	case CAP_TYPE_BOUNDING:
+	case CAP_TYPE_INHERITABLE:
+	case CAP_TYPE_PERMITTED:
+		return capng_have_capability((capng_type_t)which, i);
+	case CAP_TYPE_AMBIENT:
+		return prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET,
+				(unsigned long) i, 0UL, 0UL);
+	default:
+		warnx(_("invalid capability type"));
+		return -1;
+	}
+}
+
+/* Returns the number of capabilities printed. */
+static int print_caps(FILE *f, enum cap_type which)
+{
+	int i, n = 0, max = real_cap_last_cap();
+
+	for (i = 0; i <= max; i++) {
+		int ret = has_cap(which, i);
+
+		if (i == 0 && ret < 0)
+			return -1;
+
+		if (ret == 1) {
+			const char *name = capng_capability_to_name(i);
+			if (n)
+				fputc(',', f);
+			if (name)
+				fputs(name, f);
+			else
+				/* cap-ng has very poor handling of
+				 * CAP_LAST_CAP changes.  This is the
+				 * best we can do. */
+				printf("cap_%d", i);
+			n++;
+		}
+	}
+
+	return n;
+}
+
+static void dump_one_secbit(int *first, int *bits, int bit, const char *name)
+{
+	if (*bits & bit) {
+		if (*first)
+			*first = 0;
+		else
+			printf(",");
+		fputs(name, stdout);
+		*bits &= ~bit;
+	}
+}
+
+static void dump_securebits(void)
+{
+	int first = 1;
+	int bits = prctl(PR_GET_SECUREBITS, 0, 0, 0, 0);
+
+	if (bits < 0) {
+		warnx(_("getting process secure bits failed"));
+		return;
+	}
+
+	printf(_("Securebits: "));
+
+	dump_one_secbit(&first, &bits, SECBIT_NOROOT, "noroot");
+	dump_one_secbit(&first, &bits, SECBIT_NOROOT_LOCKED, "noroot_locked");
+	dump_one_secbit(&first, &bits, SECBIT_NO_SETUID_FIXUP,
+			"no_setuid_fixup");
+	dump_one_secbit(&first, &bits, SECBIT_NO_SETUID_FIXUP_LOCKED,
+			"no_setuid_fixup_locked");
+	bits &= ~SECBIT_KEEP_CAPS;
+	dump_one_secbit(&first, &bits, SECBIT_KEEP_CAPS_LOCKED,
+			"keep_caps_locked");
+	if (bits) {
+		if (first)
+			first = 0;
+		else
+			printf(",");
+		printf("0x%x", (unsigned)bits);
+	}
+
+	if (first)
+		printf(_("[none]\n"));
+	else
+		printf("\n");
+}
+
+static void dump_label(const char *name)
+{
+	char buf[4097];
+	ssize_t len;
+	int fd, e;
+
+	fd = open(_PATH_PROC_ATTR_CURRENT, O_RDONLY);
+	if (fd == -1) {
+		warn(_("cannot open %s"), _PATH_PROC_ATTR_CURRENT);
+		return;
+	}
+
+	len = read(fd, buf, sizeof(buf));
+	e = errno;
+	close(fd);
+	if (len < 0) {
+		errno = e;
+		warn(_("cannot read %s"), name);
+		return;
+	}
+	if (sizeof(buf) - 1 <= (size_t)len) {
+		warnx(_("%s: too long"), name);
+		return;
+	}
+
+	buf[len] = 0;
+	if (0 < len && buf[len - 1] == '\n')
+		buf[len - 1] = 0;
+	printf("%s: %s\n", name, buf);
+}
+
+static void dump_groups(void)
+{
+	int n = getgroups(0, NULL);
+	gid_t *groups;
+
+	if (n < 0) {
+		warn("getgroups failed");
+		return;
+	}
+
+	groups = xmalloc(n * sizeof(gid_t));
+	n = getgroups(n, groups);
+	if (n < 0) {
+		free(groups);
+		warn("getgroups failed");
+		return;
+	}
+
+	printf(_("Supplementary groups: "));
+	if (n == 0)
+		printf(_("[none]"));
+	else {
+		int i;
+		for (i = 0; i < n; i++) {
+			if (0 < i)
+				printf(",");
+			printf("%ld", (long)groups[i]);
+		}
+	}
+	printf("\n");
+	free(groups);
+}
+
+static void dump_pdeathsig(void)
+{
+	int pdeathsig;
+
+	if (prctl(PR_GET_PDEATHSIG, &pdeathsig) != 0) {
+		warn(_("get pdeathsig failed"));
+		return;
+	}
+
+	printf("Parent death signal: ");
+	if (pdeathsig && signum_to_signame(pdeathsig) != NULL)
+		printf("%s\n", signum_to_signame(pdeathsig));
+	else if (pdeathsig)
+		printf("%d\n", pdeathsig);
+	else
+		printf("[none]\n");
+}
+
+static void dump(int dumplevel)
+{
+	int x;
+	uid_t ru, eu, su;
+	gid_t rg, eg, sg;
+
+	if (getresuid(&ru, &eu, &su) == 0) {
+		printf(_("uid: %u\n"), ru);
+		printf(_("euid: %u\n"), eu);
+		/* Saved and fs uids always equal euid. */
+		if (3 <= dumplevel)
+			printf(_("suid: %u\n"), su);
+	} else
+		warn(_("getresuid failed"));
+
+	if (getresgid(&rg, &eg, &sg) == 0) {
+		printf("gid: %ld\n", (long)rg);
+		printf("egid: %ld\n", (long)eg);
+		/* Saved and fs gids always equal egid. */
+		if (dumplevel >= 3)
+			printf("sgid: %ld\n", (long)sg);
+	} else
+		warn(_("getresgid failed"));
+
+	dump_groups();
+
+	x = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
+	if (0 <= x)
+		printf("no_new_privs: %d\n", x);
+	else
+		warn("setting no_new_privs failed");
+
+	if (2 <= dumplevel) {
+		printf(_("Effective capabilities: "));
+		if (print_caps(stdout, CAP_TYPE_EFFECTIVE) == 0)
+			printf(_("[none]"));
+		printf("\n");
+
+		printf(_("Permitted capabilities: "));
+		if (print_caps(stdout, CAP_TYPE_PERMITTED) == 0)
+			printf(_("[none]"));
+		printf("\n");
+	}
+
+	printf(_("Inheritable capabilities: "));
+	if (print_caps(stdout, CAP_TYPE_INHERITABLE) == 0)
+		printf(_("[none]"));
+	printf("\n");
+
+	printf(_("Ambient capabilities: "));
+	x = print_caps(stdout, CAP_TYPE_AMBIENT);
+	if (x == 0)
+		printf(_("[none]"));
+	if (x < 0)
+		printf(_("[unsupported]"));
+	printf("\n");
+
+	printf(_("Capability bounding set: "));
+	if (print_caps(stdout, CAP_TYPE_BOUNDING) == 0)
+		printf(_("[none]"));
+	printf("\n");
+
+	dump_securebits();
+	dump_pdeathsig();
+
+	if (access(_PATH_SYS_SELINUX, F_OK) == 0)
+		dump_label(_("SELinux label"));
+
+	if (access(_PATH_SYS_APPARMOR, F_OK) == 0) {
+		dump_label(_("AppArmor profile"));
+	}
+}
+
+static void list_known_caps(void)
+{
+	int i, max = real_cap_last_cap();
+
+	for (i = 0; i <= max; i++) {
+		const char *name = capng_capability_to_name(i);
+		if (name)
+			printf("%s\n", name);
+		else
+			warnx(_("cap %d: libcap-ng is broken"), i);
+	}
+}
+
+static void parse_groups(struct privctx *opts, const char *str)
+{
+	char *groups = xstrdup(str);
+	char *buf = groups;	/* We'll reuse it */
+	char *c;
+	size_t i = 0;
+
+	opts->have_groups = 1;
+	opts->num_groups = 0;
+	while ((c = strsep(&groups, ",")))
+		opts->num_groups++;
+
+	/* Start again */
+	strcpy(buf, str);	/* It's exactly the right length */
+	groups = buf;
+
+	opts->groups = xcalloc(opts->num_groups, sizeof(gid_t));
+	while ((c = strsep(&groups, ",")))
+		opts->groups[i++] = get_group(c, _("Invalid supplementary group id"));
+
+	free(groups);
+}
+
+static void parse_pdeathsig(struct privctx *opts, const char *str)
+{
+	if (!strcmp(str, "keep")) {
+		if (prctl(PR_GET_PDEATHSIG, &opts->pdeathsig) != 0)
+			errx(SETPRIV_EXIT_PRIVERR,
+				 _("failed to get parent death signal"));
+	} else if (!strcmp(str, "clear")) {
+		opts->pdeathsig = -1;
+	} else if ((opts->pdeathsig = signame_to_signum(str)) < 0) {
+		errx(EXIT_FAILURE, _("unknown signal: %s"), str);
+	}
+}
+
+static void do_setresuid(const struct privctx *opts)
+{
+	uid_t ruid, euid, suid;
+	if (getresuid(&ruid, &euid, &suid) != 0)
+		err(SETPRIV_EXIT_PRIVERR, _("getresuid failed"));
+	if (opts->have_ruid)
+		ruid = opts->ruid;
+	if (opts->have_euid)
+		euid = opts->euid;
+
+	/* Also copy effective to saved (for paranoia). */
+	if (setresuid(ruid, euid, euid) != 0)
+		err(SETPRIV_EXIT_PRIVERR, _("setresuid failed"));
+}
+
+static void do_setresgid(const struct privctx *opts)
+{
+	gid_t rgid, egid, sgid;
+	if (getresgid(&rgid, &egid, &sgid) != 0)
+		err(SETPRIV_EXIT_PRIVERR, _("getresgid failed"));
+	if (opts->have_rgid)
+		rgid = opts->rgid;
+	if (opts->have_egid)
+		egid = opts->egid;
+
+	/* Also copy effective to saved (for paranoia). */
+	if (setresgid(rgid, egid, egid) != 0)
+		err(SETPRIV_EXIT_PRIVERR, _("setresgid failed"));
+}
+
+static void bump_cap(unsigned int cap)
+{
+	if (capng_have_capability(CAPNG_PERMITTED, cap))
+		capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, cap);
+}
+
+static int cap_update(capng_act_t action,
+		enum cap_type type, unsigned int cap)
+{
+	switch (type) {
+		case CAP_TYPE_EFFECTIVE:
+		case CAP_TYPE_BOUNDING:
+		case CAP_TYPE_INHERITABLE:
+		case CAP_TYPE_PERMITTED:
+			return capng_update(action, (capng_type_t) type, cap);
+		case CAP_TYPE_AMBIENT:
+		{
+			int ret;
+
+			if (action == CAPNG_ADD)
+				ret = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE,
+						(unsigned long) cap, 0UL, 0UL);
+			else
+				ret = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_LOWER,
+						(unsigned long) cap, 0UL, 0UL);
+
+			return ret;
+		}
+		default:
+			errx(EXIT_FAILURE, _("unsupported capability type"));
+			return -1;
+	}
+}
+
+static void do_caps(enum cap_type type, const char *caps)
+{
+	char *my_caps = xstrdup(caps);
+	char *c;
+
+	while ((c = strsep(&my_caps, ","))) {
+		capng_act_t action;
+		if (*c == '+')
+			action = CAPNG_ADD;
+		else if (*c == '-')
+			action = CAPNG_DROP;
+		else
+			errx(EXIT_FAILURE, _("bad capability string"));
+
+		if (!strcmp(c + 1, "all")) {
+			int i;
+			/* It would be really bad if -all didn't drop all
+			 * caps.  It's better to just fail. */
+			if (real_cap_last_cap() > CAP_LAST_CAP)
+				errx(SETPRIV_EXIT_PRIVERR,
+				     _("libcap-ng is too old for \"all\" caps"));
+			for (i = 0; i <= CAP_LAST_CAP; i++)
+				cap_update(action, type, i);
+		} else {
+			int cap = capng_name_to_capability(c + 1);
+			if (0 <= cap)
+				cap_update(action, type, cap);
+			else if (sscanf(c + 1, "cap_%d", &cap) == 1
+			    && 0 <= cap && cap <= real_cap_last_cap())
+				cap_update(action, type, cap);
+			else
+				errx(EXIT_FAILURE,
+				     _("unknown capability \"%s\""), c + 1);
+		}
+	}
+
+	free(my_caps);
+}
+
+static void parse_securebits(struct privctx *opts, const char *arg)
+{
+	char *buf = xstrdup(arg);
+	char *c;
+
+	opts->have_securebits = 1;
+	opts->securebits = prctl(PR_GET_SECUREBITS, 0, 0, 0, 0);
+	if (opts->securebits < 0)
+		err(SETPRIV_EXIT_PRIVERR, _("getting process secure bits failed"));
+
+	if (opts->securebits & ~(int)(SECBIT_NOROOT |
+				      SECBIT_NOROOT_LOCKED |
+				      SECBIT_NO_SETUID_FIXUP |
+				      SECBIT_NO_SETUID_FIXUP_LOCKED |
+				      SECBIT_KEEP_CAPS |
+				      SECBIT_KEEP_CAPS_LOCKED))
+		errx(SETPRIV_EXIT_PRIVERR,
+		     _("unrecognized securebit set -- refusing to adjust"));
+
+	while ((c = strsep(&buf, ","))) {
+		if (*c != '+' && *c != '-')
+			errx(EXIT_FAILURE, _("bad securebits string"));
+
+		if (!strcmp(c + 1, "all")) {
+			if (*c == '-')
+				opts->securebits = 0;
+			else
+				errx(EXIT_FAILURE,
+				     _("+all securebits is not allowed"));
+		} else {
+			int bit;
+			if (!strcmp(c + 1, "noroot"))
+				bit = SECBIT_NOROOT;
+			else if (!strcmp(c + 1, "noroot_locked"))
+				bit = SECBIT_NOROOT_LOCKED;
+			else if (!strcmp(c + 1, "no_setuid_fixup"))
+				bit = SECBIT_NO_SETUID_FIXUP;
+			else if (!strcmp(c + 1, "no_setuid_fixup_locked"))
+				bit = SECBIT_NO_SETUID_FIXUP_LOCKED;
+			else if (!strcmp(c + 1, "keep_caps"))
+				errx(EXIT_FAILURE,
+				     _("adjusting keep_caps does not make sense"));
+			else if (!strcmp(c + 1, "keep_caps_locked"))
+				bit = SECBIT_KEEP_CAPS_LOCKED;	/* sigh */
+			else
+				errx(EXIT_FAILURE, _("unrecognized securebit"));
+
+			if (*c == '+')
+				opts->securebits |= bit;
+			else
+				opts->securebits &= ~bit;
+		}
+	}
+
+	opts->securebits |= SECBIT_KEEP_CAPS;	/* We need it, and it's reset on exec */
+
+	free(buf);
+}
+
+static void do_selinux_label(const char *label)
+{
+	int fd;
+	size_t len;
+
+	if (access(_PATH_SYS_SELINUX, F_OK) != 0)
+		errx(SETPRIV_EXIT_PRIVERR, _("SELinux is not running"));
+
+	fd = open(_PATH_PROC_ATTR_EXEC, O_RDWR);
+	if (fd == -1)
+		err(SETPRIV_EXIT_PRIVERR,
+		    _("cannot open %s"), _PATH_PROC_ATTR_EXEC);
+
+	len = strlen(label);
+	errno = 0;
+	if (write(fd, label, len) != (ssize_t) len)
+		err(SETPRIV_EXIT_PRIVERR,
+		    _("write failed: %s"), _PATH_PROC_ATTR_EXEC);
+
+	if (close(fd) != 0)
+		err(SETPRIV_EXIT_PRIVERR,
+		    _("close failed: %s"), _PATH_PROC_ATTR_EXEC);
+}
+
+static void do_apparmor_profile(const char *label)
+{
+	FILE *f;
+
+	if (access(_PATH_SYS_APPARMOR, F_OK) != 0)
+		errx(SETPRIV_EXIT_PRIVERR, _("AppArmor is not running"));
+
+	f = fopen(_PATH_PROC_ATTR_EXEC, "r+");
+	if (!f)
+		err(SETPRIV_EXIT_PRIVERR,
+		    _("cannot open %s"), _PATH_PROC_ATTR_EXEC);
+
+	fprintf(f, "exec %s", label);
+
+	if (close_stream(f) != 0)
+		err(SETPRIV_EXIT_PRIVERR,
+		    _("write failed: %s"), _PATH_PROC_ATTR_EXEC);
+}
+
+
+static void do_reset_environ(struct passwd *pw)
+{
+	char *term = getenv("TERM");
+
+	if (term)
+		term = xstrdup(term);
+#ifdef HAVE_CLEARENV
+	clearenv();
+#else
+	environ = NULL;
+#endif
+	if (term)
+		xsetenv("TERM", term, 1);
+
+	if (pw->pw_shell && *pw->pw_shell)
+		xsetenv("SHELL", pw->pw_shell, 1);
+	else
+		xsetenv("SHELL", DEFAULT_SHELL, 1);
+
+	xsetenv("HOME", pw->pw_dir, 1);
+	xsetenv("USER", pw->pw_name, 1);
+	xsetenv("LOGNAME", pw->pw_name, 1);
+
+	if (pw->pw_uid)
+		xsetenv("PATH", _PATH_DEFPATH, 1);
+	else
+		xsetenv("PATH", _PATH_DEFPATH_ROOT, 1);
+}
+
+static uid_t get_user(const char *s, const char *err)
+{
+	struct passwd *pw;
+	long tmp;
+	pw = getpwnam(s);
+	if (pw)
+		return pw->pw_uid;
+	tmp = strtol_or_err(s, err);
+	return tmp;
+}
+
+static gid_t get_group(const char *s, const char *err)
+{
+	struct group *gr;
+	long tmp;
+	gr = getgrnam(s);
+	if (gr)
+		return gr->gr_gid;
+	tmp = strtol_or_err(s, err);
+	return tmp;
+}
+
+static struct passwd *get_passwd(const char *s, uid_t *uid, const char *err)
+{
+	struct passwd *pw;
+	long tmp;
+	pw = getpwnam(s);
+	if (pw) {
+		*uid = pw->pw_uid;
+	} else {
+		tmp = strtol_or_err(s, err);
+		*uid = tmp;
+		pw = getpwuid(*uid);
+	}
+	return pw;
+}
+
+static struct passwd *passwd_copy(struct passwd *dst, const struct passwd *src)
+{
+	struct passwd *rv;
+	rv = memcpy(dst, src, sizeof(*dst));
+	rv->pw_name = xstrdup(rv->pw_name);
+	rv->pw_passwd = xstrdup(rv->pw_passwd);
+	rv->pw_gecos = xstrdup(rv->pw_gecos);
+	rv->pw_dir = xstrdup(rv->pw_dir);
+	rv->pw_shell = xstrdup(rv->pw_shell);
+	return rv;
+}
+
+int main(int argc, char **argv)
+{
+	enum {
+		NNP = CHAR_MAX + 1,
+		RUID,
+		EUID,
+		RGID,
+		EGID,
+		REUID,
+		REGID,
+		CLEAR_GROUPS,
+		KEEP_GROUPS,
+		INIT_GROUPS,
+		GROUPS,
+		INHCAPS,
+		AMBCAPS,
+		LISTCAPS,
+		CAPBSET,
+		SECUREBITS,
+		PDEATHSIG,
+		SELINUX_LABEL,
+		APPARMOR_PROFILE,
+		RESET_ENV
+	};
+
+	static const struct option longopts[] = {
+		{ "dump",             no_argument,       NULL, 'd'              },
+		{ "nnp",              no_argument,       NULL, NNP              },
+		{ "no-new-privs",     no_argument,       NULL, NNP              },
+		{ "inh-caps",         required_argument, NULL, INHCAPS          },
+		{ "ambient-caps",     required_argument, NULL, AMBCAPS          },
+		{ "list-caps",        no_argument,       NULL, LISTCAPS         },
+		{ "ruid",             required_argument, NULL, RUID             },
+		{ "euid",             required_argument, NULL, EUID             },
+		{ "rgid",             required_argument, NULL, RGID             },
+		{ "egid",             required_argument, NULL, EGID             },
+		{ "reuid",            required_argument, NULL, REUID            },
+		{ "regid",            required_argument, NULL, REGID            },
+		{ "clear-groups",     no_argument,       NULL, CLEAR_GROUPS     },
+		{ "keep-groups",      no_argument,       NULL, KEEP_GROUPS      },
+		{ "init-groups",      no_argument,       NULL, INIT_GROUPS      },
+		{ "groups",           required_argument, NULL, GROUPS           },
+		{ "bounding-set",     required_argument, NULL, CAPBSET          },
+		{ "securebits",       required_argument, NULL, SECUREBITS       },
+		{ "pdeathsig",        required_argument, NULL, PDEATHSIG,       },
+		{ "selinux-label",    required_argument, NULL, SELINUX_LABEL    },
+		{ "apparmor-profile", required_argument, NULL, APPARMOR_PROFILE },
+		{ "help",             no_argument,       NULL, 'h'              },
+		{ "reset-env",        no_argument,       NULL, RESET_ENV,       },
+		{ "version",          no_argument,       NULL, 'V'              },
+		{ NULL, 0, NULL, 0 }
+	};
+
+	static const ul_excl_t excl[] = {
+		/* keep in same order with enum definitions */
+		{CLEAR_GROUPS, KEEP_GROUPS, INIT_GROUPS, GROUPS},
+		{0}
+	};
+	int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+	int c;
+	struct privctx opts;
+	struct passwd *pw = NULL;
+	int dumplevel = 0;
+	int total_opts = 0;
+	int list_caps = 0;
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	memset(&opts, 0, sizeof(opts));
+
+	while ((c = getopt_long(argc, argv, "+dhV", longopts, NULL)) != -1) {
+		err_exclusive_options(c, longopts, excl, excl_st);
+		total_opts++;
+		switch (c) {
+		case 'd':
+			dumplevel++;
+			break;
+		case NNP:
+			if (opts.nnp)
+				errx(EXIT_FAILURE,
+				     _("duplicate --no-new-privs option"));
+			opts.nnp = 1;
+			break;
+		case RUID:
+			if (opts.have_ruid)
+				errx(EXIT_FAILURE, _("duplicate ruid"));
+			opts.have_ruid = 1;
+			pw = get_passwd(optarg, &opts.ruid, _("failed to parse ruid"));
+			if (pw) {
+				passwd_copy(&opts.passwd, pw);
+				opts.have_passwd = 1;
+			}
+			break;
+		case EUID:
+			if (opts.have_euid)
+				errx(EXIT_FAILURE, _("duplicate euid"));
+			opts.have_euid = 1;
+			opts.euid = get_user(optarg, _("failed to parse euid"));
+			break;
+		case REUID:
+			if (opts.have_ruid || opts.have_euid)
+				errx(EXIT_FAILURE, _("duplicate ruid or euid"));
+			opts.have_ruid = opts.have_euid = 1;
+			pw = get_passwd(optarg, &opts.ruid, _("failed to parse reuid"));
+			opts.euid = opts.ruid;
+			if (pw) {
+				passwd_copy(&opts.passwd, pw);
+				opts.have_passwd = 1;
+			}
+			break;
+		case RGID:
+			if (opts.have_rgid)
+				errx(EXIT_FAILURE, _("duplicate rgid"));
+			opts.have_rgid = 1;
+			opts.rgid = get_group(optarg, _("failed to parse rgid"));
+			break;
+		case EGID:
+			if (opts.have_egid)
+				errx(EXIT_FAILURE, _("duplicate egid"));
+			opts.have_egid = 1;
+			opts.egid = get_group(optarg, _("failed to parse egid"));
+			break;
+		case REGID:
+			if (opts.have_rgid || opts.have_egid)
+				errx(EXIT_FAILURE, _("duplicate rgid or egid"));
+			opts.have_rgid = opts.have_egid = 1;
+			opts.rgid = opts.egid = get_group(optarg, _("failed to parse regid"));
+			break;
+		case CLEAR_GROUPS:
+			if (opts.clear_groups)
+				errx(EXIT_FAILURE,
+				     _("duplicate --clear-groups option"));
+			opts.clear_groups = 1;
+			break;
+		case KEEP_GROUPS:
+			if (opts.keep_groups)
+				errx(EXIT_FAILURE,
+				     _("duplicate --keep-groups option"));
+			opts.keep_groups = 1;
+			break;
+		case INIT_GROUPS:
+			if (opts.init_groups)
+				errx(EXIT_FAILURE,
+				     _("duplicate --init-groups option"));
+			opts.init_groups = 1;
+			break;
+		case GROUPS:
+			if (opts.have_groups)
+				errx(EXIT_FAILURE,
+				     _("duplicate --groups option"));
+			parse_groups(&opts, optarg);
+			break;
+		case PDEATHSIG:
+			if (opts.pdeathsig)
+				errx(EXIT_FAILURE,
+				     _("duplicate --keep-pdeathsig option"));
+			parse_pdeathsig(&opts, optarg);
+			break;
+		case LISTCAPS:
+			list_caps = 1;
+			break;
+		case INHCAPS:
+			if (opts.caps_to_inherit)
+				errx(EXIT_FAILURE,
+				     _("duplicate --inh-caps option"));
+			opts.caps_to_inherit = optarg;
+			break;
+		case AMBCAPS:
+			if (opts.ambient_caps)
+				errx(EXIT_FAILURE,
+				     _("duplicate --ambient-caps option"));
+			opts.ambient_caps = optarg;
+			break;
+		case CAPBSET:
+			if (opts.bounding_set)
+				errx(EXIT_FAILURE,
+				     _("duplicate --bounding-set option"));
+			opts.bounding_set = optarg;
+			break;
+		case SECUREBITS:
+			if (opts.have_securebits)
+				errx(EXIT_FAILURE,
+				     _("duplicate --securebits option"));
+			parse_securebits(&opts, optarg);
+			break;
+		case SELINUX_LABEL:
+			if (opts.selinux_label)
+				errx(EXIT_FAILURE,
+				     _("duplicate --selinux-label option"));
+			opts.selinux_label = optarg;
+			break;
+		case APPARMOR_PROFILE:
+			if (opts.apparmor_profile)
+				errx(EXIT_FAILURE,
+				     _("duplicate --apparmor-profile option"));
+			opts.apparmor_profile = optarg;
+			break;
+		case RESET_ENV:
+			opts.reset_env = 1;
+			break;
+		case 'h':
+			usage();
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if (dumplevel) {
+		if (total_opts != dumplevel || optind < argc)
+			errx(EXIT_FAILURE,
+			     _("--dump is incompatible with all other options"));
+		dump(dumplevel);
+		return EXIT_SUCCESS;
+	}
+
+	if (list_caps) {
+		if (total_opts != 1 || optind < argc)
+			errx(EXIT_FAILURE,
+			     _("--list-caps must be specified alone"));
+		list_known_caps();
+		return EXIT_SUCCESS;
+	}
+
+	if (argc <= optind)
+		errx(EXIT_FAILURE, _("No program specified"));
+
+	if ((opts.have_rgid || opts.have_egid)
+	    && !opts.keep_groups && !opts.clear_groups && !opts.init_groups
+	    && !opts.have_groups)
+		errx(EXIT_FAILURE,
+		     _("--[re]gid requires --keep-groups, --clear-groups, --init-groups, or --groups"));
+
+	if (opts.init_groups && !opts.have_ruid)
+		errx(EXIT_FAILURE,
+		     _("--init-groups requires --ruid or --reuid"));
+
+	if (opts.init_groups && !opts.have_passwd)
+		errx(EXIT_FAILURE,
+		     _("uid %ld not found, --init-groups requires an user that "
+		       "can be found on the system"),
+		     (long) opts.ruid);
+
+	if (opts.reset_env) {
+		if (opts.have_passwd)
+			/* pwd according to --ruid or --reuid */
+			pw = &opts.passwd;
+		else
+			/* pwd for the current user */
+			pw = getpwuid(getuid());
+		do_reset_environ(pw);
+	}
+
+	if (opts.nnp && prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1)
+		err(EXIT_FAILURE, _("disallow granting new privileges failed"));
+
+	if (opts.selinux_label)
+		do_selinux_label(opts.selinux_label);
+	if (opts.apparmor_profile)
+		do_apparmor_profile(opts.apparmor_profile);
+
+	if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) == -1)
+		err(EXIT_FAILURE, _("keep process capabilities failed"));
+
+	/* We're going to want CAP_SETPCAP, CAP_SETUID, and CAP_SETGID if
+	 * possible.  */
+	bump_cap(CAP_SETPCAP);
+	bump_cap(CAP_SETUID);
+	bump_cap(CAP_SETGID);
+	if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+		err(SETPRIV_EXIT_PRIVERR, _("activate capabilities"));
+
+	if (opts.have_ruid || opts.have_euid) {
+		do_setresuid(&opts);
+		/* KEEPCAPS doesn't work for the effective mask. */
+		if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+			err(SETPRIV_EXIT_PRIVERR, _("reactivate capabilities"));
+	}
+
+	if (opts.have_rgid || opts.have_egid)
+		do_setresgid(&opts);
+
+	if (opts.have_groups) {
+		if (setgroups(opts.num_groups, opts.groups) != 0)
+			err(SETPRIV_EXIT_PRIVERR, _("setgroups failed"));
+	} else if (opts.init_groups) {
+		if (initgroups(opts.passwd.pw_name, opts.passwd.pw_gid) != 0)
+			err(SETPRIV_EXIT_PRIVERR, _("initgroups failed"));
+	} else if (opts.clear_groups) {
+		gid_t x = 0;
+		if (setgroups(0, &x) != 0)
+			err(SETPRIV_EXIT_PRIVERR, _("setgroups failed"));
+	}
+
+	if (opts.have_securebits && prctl(PR_SET_SECUREBITS, opts.securebits, 0, 0, 0) != 0)
+		err(SETPRIV_EXIT_PRIVERR, _("set process securebits failed"));
+
+	if (opts.bounding_set) {
+		do_caps(CAP_TYPE_BOUNDING, opts.bounding_set);
+		errno = EPERM;	/* capng doesn't set errno if we're missing CAP_SETPCAP */
+		if (capng_apply(CAPNG_SELECT_BOUNDS) != 0)
+			err(SETPRIV_EXIT_PRIVERR, _("apply bounding set"));
+	}
+
+	if (opts.caps_to_inherit) {
+		do_caps(CAP_TYPE_INHERITABLE, opts.caps_to_inherit);
+		if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+			err(SETPRIV_EXIT_PRIVERR, _("apply capabilities"));
+	}
+
+	if (opts.ambient_caps) {
+		do_caps(CAP_TYPE_AMBIENT, opts.ambient_caps);
+	}
+
+	/* Clear or set parent death signal */
+	if (opts.pdeathsig && prctl(PR_SET_PDEATHSIG, opts.pdeathsig < 0 ? 0 : opts.pdeathsig) != 0)
+		err(SETPRIV_EXIT_PRIVERR, _("set parent death signal failed"));
+
+	execvp(argv[optind], argv + optind);
+	errexec(argv[optind]);
+}
diff --git a/sys-utils/setsid.1 b/sys-utils/setsid.1
new file mode 100644
index 0000000..64f0555
--- /dev/null
+++ b/sys-utils/setsid.1
@@ -0,0 +1,42 @@
+.\" Rick Sladkey <jrs@world.std.com>
+.\" In the public domain.
+.TH SETSID 1 "July 2014" "util-linux" "User Commands"
+.SH NAME
+setsid \- run a program in a new session
+.SH SYNOPSIS
+.B setsid
+[options]
+.I program
+.RI [ arguments ]
+.SH DESCRIPTION
+.B setsid
+runs a program in a new session. The command calls
+.BR fork (2)
+if already a process group leader.  Otherwise, it executes a program in the
+current process.  This default behavior is possible to override by
+the \fB\-\-fork\fR option.
+.SH OPTIONS
+.TP
+.BR \-c , " \-\-ctty"
+Set the controlling terminal to the current one.
+.TP
+.BR \-f , " \-\-fork"
+Always create a new process.
+.TP
+.BR \-w , " \-\-wait"
+Wait for the execution of the program to end, and return the exit value of
+this program as the return value of
+.BR setsid .
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH "SEE ALSO"
+.BR setsid (2)
+.SH AUTHOR
+Rick Sladkey <jrs@world.std.com>
+.SH AVAILABILITY
+The setsid command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/setsid.c b/sys-utils/setsid.c
new file mode 100644
index 0000000..8b4f83d
--- /dev/null
+++ b/sys-utils/setsid.c
@@ -0,0 +1,123 @@
+/*
+ * setsid.c -- execute a command in a new session
+ * Rick Sladkey <jrs@world.std.com>
+ * In the public domain.
+ *
+ * 1999-02-22 Arkadiusz Miśkiewicz <misiek@pld.ORG.PL>
+ * - added Native Language Support
+ *
+ * 2001-01-18 John Fremlin <vii@penguinpowered.com>
+ * - fork in case we are process group leader
+ *
+ * 2008-08-20 Daniel Kahn Gillmor <dkg@fifthhorseman.net>
+ * - if forked, wait on child process and emit its return code.
+ */
+
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "c.h"
+#include "nls.h"
+#include "closestream.h"
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	fputs(USAGE_HEADER, out);
+	fprintf(out, _(
+		" %s [options] <program> [arguments ...]\n"),
+		program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Run a program in a new session.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -c, --ctty     set the controlling terminal to the current one\n"), out);
+	fputs(_(" -f, --fork     always fork\n"), out);
+	fputs(_(" -w, --wait     wait program to exit, and use the same return\n"), out);
+
+	printf(USAGE_HELP_OPTIONS(16));
+
+	printf(USAGE_MAN_TAIL("setsid(1)"));
+	exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+	int ch, forcefork = 0;
+	int ctty = 0;
+	pid_t pid;
+	int status = 0;
+
+	static const struct option longopts[] = {
+		{"ctty", no_argument, NULL, 'c'},
+		{"fork", no_argument, NULL, 'f'},
+		{"wait", no_argument, NULL, 'w'},
+		{"version", no_argument, NULL, 'V'},
+		{"help", no_argument, NULL, 'h'},
+		{NULL, 0, NULL, 0}
+	};
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	while ((ch = getopt_long(argc, argv, "+Vhcfw", longopts, NULL)) != -1)
+		switch (ch) {
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		case 'c':
+			ctty=1;
+			break;
+		case 'f':
+			forcefork = 1;
+			break;
+		case 'w':
+			status = 1;
+			break;
+		case 'h':
+			usage();
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+
+	if (argc - optind < 1) {
+		warnx(_("no command specified"));
+		errtryhelp(EXIT_FAILURE);
+	}
+
+	if (forcefork || getpgrp() == getpid()) {
+		pid = fork();
+		switch (pid) {
+		case -1:
+			err(EXIT_FAILURE, _("fork"));
+		case 0:
+			/* child */
+			break;
+		default:
+			/* parent */
+			if (!status)
+				return EXIT_SUCCESS;
+			if (wait(&status) != pid)
+				err(EXIT_FAILURE, "wait");
+			if (WIFEXITED(status))
+				return WEXITSTATUS(status);
+			err(status, _("child %d did not exit normally"), pid);
+		}
+	}
+	if (setsid() < 0)
+		/* cannot happen */
+		err(EXIT_FAILURE, _("setsid failed"));
+
+	if (ctty && ioctl(STDIN_FILENO, TIOCSCTTY, 1))
+		err(EXIT_FAILURE, _("failed to set the controlling terminal"));
+	execvp(argv[optind], argv + optind);
+	errexec(argv[optind]);
+}
diff --git a/sys-utils/swapoff.8 b/sys-utils/swapoff.8
new file mode 100644
index 0000000..1a06b7e
--- /dev/null
+++ b/sys-utils/swapoff.8
@@ -0,0 +1 @@
+.so man8/swapon.8
diff --git a/sys-utils/swapoff.c b/sys-utils/swapoff.c
new file mode 100644
index 0000000..0a3807f
--- /dev/null
+++ b/sys-utils/swapoff.c
@@ -0,0 +1,253 @@
+#include <stdio.h>
+#include <errno.h>
+#include <getopt.h>
+
+#ifdef HAVE_SYS_SWAP_H
+# include <sys/swap.h>
+#endif
+
+#include "nls.h"
+#include "c.h"
+#include "xalloc.h"
+#include "closestream.h"
+
+#include "swapprober.h"
+#include "swapon-common.h"
+
+#if !defined(HAVE_SWAPOFF) && defined(SYS_swapoff)
+# include <sys/syscall.h>
+# define swapoff(path) syscall(SYS_swapoff, path)
+#endif
+
+static int verbose;
+static int all;
+
+#define QUIET	1
+#define CANONIC	1
+
+/*
+ * This function works like mnt_resolve_tag(), but it's able to read UUID/LABEL
+ * from regular swap files too (according to entries in /proc/swaps). Note that
+ * mnt_resolve_tag() and mnt_resolve_spec() works with system visible block
+ * devices only.
+ */
+static char *swapoff_resolve_tag(const char *name, const char *value,
+				 struct libmnt_cache *cache)
+{
+	char *path;
+	struct libmnt_table *tb;
+	struct libmnt_iter *itr;
+	struct libmnt_fs *fs;
+
+	/* this is usual case for block devices (and it's really fast as it uses
+	 * udev /dev/disk/by-* symlinks by default */
+	path = mnt_resolve_tag(name, value, cache);
+	if (path)
+		return path;
+
+	/* try regular files from /proc/swaps */
+	tb = get_swaps();
+	if (!tb)
+		return NULL;
+
+	itr = mnt_new_iter(MNT_ITER_BACKWARD);
+	if (!itr)
+		err(EXIT_FAILURE, _("failed to initialize libmount iterator"));
+
+	while (tb && mnt_table_next_fs(tb, itr, &fs) == 0) {
+		blkid_probe pr = NULL;
+		const char *src = mnt_fs_get_source(fs);
+		const char *type = mnt_fs_get_swaptype(fs);
+		const char *data = NULL;
+
+		if (!src || !type || strcmp(type, "file") != 0)
+			continue;
+		pr = get_swap_prober(src);
+		if (!pr)
+			continue;
+		blkid_probe_lookup_value(pr, name, &data, NULL);
+		if (data && strcmp(data, value) == 0)
+			path = xstrdup(src);
+		blkid_free_probe(pr);
+		if (path)
+			break;
+	}
+
+	mnt_free_iter(itr);
+	return path;
+}
+
+static int do_swapoff(const char *orig_special, int quiet, int canonic)
+{
+        const char *special = orig_special;
+
+	if (verbose)
+		printf(_("swapoff %s\n"), orig_special);
+
+	if (!canonic) {
+		char *n, *v;
+
+		special = mnt_resolve_spec(orig_special, mntcache);
+		if (!special && blkid_parse_tag_string(orig_special, &n, &v) == 0) {
+			special = swapoff_resolve_tag(n, v, mntcache);
+			free(n);
+			free(v);
+		}
+		if (!special)
+			return cannot_find(orig_special);
+	}
+
+	if (swapoff(special) == 0)
+		return 0;	/* success */
+
+	if (errno == EPERM)
+		errx(EXIT_FAILURE, _("Not superuser."));
+
+	if (!quiet || errno == ENOMEM)
+		warn(_("%s: swapoff failed"), orig_special);
+
+	return -1;
+}
+
+static int swapoff_by(const char *name, const char *value, int quiet)
+{
+	const char *special = swapoff_resolve_tag(name, value, mntcache);
+	return special ? do_swapoff(special, quiet, CANONIC) : cannot_find(value);
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	fputs(USAGE_HEADER, out);
+	fprintf(out, _(" %s [options] [<spec>]\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Disable devices and files for paging and swapping.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -a, --all              disable all swaps from /proc/swaps\n"
+		" -v, --verbose          verbose mode\n"), out);
+
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(24));
+
+	fputs(_("\nThe <spec> parameter:\n" \
+		" -L <label>             LABEL of device to be used\n" \
+		" -U <uuid>              UUID of device to be used\n" \
+		" LABEL=<label>          LABEL of device to be used\n" \
+		" UUID=<uuid>            UUID of device to be used\n" \
+		" <device>               name of device to be used\n" \
+		" <file>                 name of file to be used\n"), out);
+
+	printf(USAGE_MAN_TAIL("swapoff(8)"));
+	exit(EXIT_SUCCESS);
+}
+
+static int swapoff_all(void)
+{
+	int status = 0;
+	struct libmnt_table *tb;
+	struct libmnt_fs *fs;
+	struct libmnt_iter *itr = mnt_new_iter(MNT_ITER_BACKWARD);
+
+	if (!itr)
+		err(EXIT_FAILURE, _("failed to initialize libmount iterator"));
+
+	/*
+	 * In case /proc/swaps exists, unswap stuff listed there.  We are quiet
+	 * but report errors in status.  Errors might mean that /proc/swaps
+	 * exists as ordinary file, not in procfs.  do_swapoff() exits
+	 * immediately on EPERM.
+	 */
+	tb = get_swaps();
+
+	while (tb && mnt_table_find_next_fs(tb, itr, match_swap, NULL, &fs) == 0)
+		status |= do_swapoff(mnt_fs_get_source(fs), QUIET, CANONIC);
+
+	/*
+	 * Unswap stuff mentioned in /etc/fstab.  Probably it was unmounted
+	 * already, so errors are not bad.  Doing swapoff -a twice should not
+	 * give error messages.
+	 */
+	tb = get_fstab();
+	mnt_reset_iter(itr, MNT_ITER_FORWARD);
+
+	while (tb && mnt_table_find_next_fs(tb, itr, match_swap, NULL, &fs) == 0) {
+		if (!is_active_swap(mnt_fs_get_source(fs)))
+			do_swapoff(mnt_fs_get_source(fs), QUIET, !CANONIC);
+	}
+
+	mnt_free_iter(itr);
+	return status;
+}
+
+int main(int argc, char *argv[])
+{
+	int status = 0, c;
+	size_t i;
+
+	static const struct option long_opts[] = {
+		{ "all",     no_argument, NULL, 'a' },
+		{ "help",    no_argument, NULL, 'h' },
+		{ "verbose", no_argument, NULL, 'v' },
+		{ "version", no_argument, NULL, 'V' },
+		{ NULL, 0, NULL, 0 }
+	};
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	while ((c = getopt_long(argc, argv, "ahvVL:U:",
+				 long_opts, NULL)) != -1) {
+		switch (c) {
+		case 'a':		/* all */
+			++all;
+			break;
+		case 'h':		/* help */
+			usage();
+			break;
+		case 'v':		/* be chatty */
+			++verbose;
+			break;
+		case 'V':		/* version */
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		case 'L':
+			add_label(optarg);
+			break;
+		case 'U':
+			add_uuid(optarg);
+			break;
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+	argv += optind;
+
+	if (!all && !numof_labels() && !numof_uuids() && *argv == NULL) {
+		warnx(_("bad usage"));
+		errtryhelp(EXIT_FAILURE);
+	}
+
+	mnt_init_debug(0);
+	mntcache = mnt_new_cache();
+
+	for (i = 0; i < numof_labels(); i++)
+		status |= swapoff_by("LABEL", get_label(i), !QUIET);
+
+	for (i = 0; i < numof_uuids(); i++)
+		status |= swapoff_by("UUID", get_uuid(i), !QUIET);
+
+	while (*argv != NULL)
+		status |= do_swapoff(*argv++, !QUIET, !CANONIC);
+
+	if (all)
+		status |= swapoff_all();
+
+	free_tables();
+	mnt_unref_cache(mntcache);
+
+	return status;
+}
diff --git a/sys-utils/swapon-common.c b/sys-utils/swapon-common.c
new file mode 100644
index 0000000..dd1593d
--- /dev/null
+++ b/sys-utils/swapon-common.c
@@ -0,0 +1,117 @@
+
+#include "c.h"
+#include "nls.h"
+#include "xalloc.h"
+
+#include "swapon-common.h"
+
+/*
+ * content of /proc/swaps and /etc/fstab
+ */
+static struct libmnt_table *swaps, *fstab;
+
+struct libmnt_cache *mntcache;
+
+static int table_parser_errcb(struct libmnt_table *tb __attribute__((__unused__)),
+			const char *filename, int line)
+{
+	if (filename)
+		warnx(_("%s: parse error at line %d -- ignored"), filename, line);
+	return 1;
+}
+
+struct libmnt_table *get_fstab(void)
+{
+	if (!fstab) {
+		fstab = mnt_new_table();
+		if (!fstab)
+			return NULL;
+		mnt_table_set_parser_errcb(fstab, table_parser_errcb);
+		mnt_table_set_cache(fstab, mntcache);
+		if (mnt_table_parse_fstab(fstab, NULL) != 0)
+			return NULL;
+	}
+
+	return fstab;
+}
+
+struct libmnt_table *get_swaps(void)
+{
+	if (!swaps) {
+		swaps = mnt_new_table();
+		if (!swaps)
+			return NULL;
+		mnt_table_set_cache(swaps, mntcache);
+		mnt_table_set_parser_errcb(swaps, table_parser_errcb);
+		if (mnt_table_parse_swaps(swaps, NULL) != 0)
+			return NULL;
+	}
+
+	return swaps;
+}
+
+void free_tables(void)
+{
+	mnt_unref_table(swaps);
+	mnt_unref_table(fstab);
+}
+
+int match_swap(struct libmnt_fs *fs, void *data __attribute__((unused)))
+{
+	return fs && mnt_fs_is_swaparea(fs);
+}
+
+int is_active_swap(const char *filename)
+{
+	struct libmnt_table *st = get_swaps();
+	return st && mnt_table_find_source(st, filename, MNT_ITER_BACKWARD);
+}
+
+
+int cannot_find(const char *special)
+{
+	warnx(_("cannot find the device for %s"), special);
+	return -1;
+}
+
+/*
+ * Lists with -L and -U option
+ */
+static const char **llist;
+static size_t llct;
+static const char **ulist;
+static size_t ulct;
+
+
+void add_label(const char *label)
+{
+	llist = xrealloc(llist, (++llct) * sizeof(char *));
+	llist[llct - 1] = label;
+}
+
+const char *get_label(size_t i)
+{
+	return i < llct ? llist[i] : NULL;
+}
+
+size_t numof_labels(void)
+{
+	return llct;
+}
+
+void add_uuid(const char *uuid)
+{
+	ulist = xrealloc(ulist, (++ulct) * sizeof(char *));
+	ulist[ulct - 1] = uuid;
+}
+
+const char *get_uuid(size_t i)
+{
+	return i < ulct ? ulist[i] : NULL;
+}
+
+size_t numof_uuids(void)
+{
+	return ulct;
+}
+
diff --git a/sys-utils/swapon-common.h b/sys-utils/swapon-common.h
new file mode 100644
index 0000000..d1b679f
--- /dev/null
+++ b/sys-utils/swapon-common.h
@@ -0,0 +1,25 @@
+#ifndef UTIL_LINUX_SWAPON_COMMON_H
+#define UTIL_LINUX_SWAPON_COMMON_H
+
+#include <libmount.h>
+
+extern struct libmnt_cache *mntcache;
+
+extern struct libmnt_table *get_fstab(void);
+extern struct libmnt_table *get_swaps(void);
+extern void free_tables(void);
+
+extern int match_swap(struct libmnt_fs *fs, void *data);
+extern int is_active_swap(const char *filename);
+
+extern int cannot_find(const char *special);
+
+extern void add_label(const char *label);
+extern const char *get_label(size_t i);
+extern size_t numof_labels(void);
+
+extern void add_uuid(const char *uuid);
+extern const char *get_uuid(size_t i);
+extern size_t numof_uuids(void);
+
+#endif /* UTIL_LINUX_SWAPON_COMMON_H */
diff --git a/sys-utils/swapon.8 b/sys-utils/swapon.8
new file mode 100644
index 0000000..510a15f
--- /dev/null
+++ b/sys-utils/swapon.8
@@ -0,0 +1,256 @@
+.\" Copyright (c) 1980, 1991 Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\"    must display the following acknowledgement:
+.\"	This product includes software developed by the University of
+.\"	California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\"    may be used to endorse or promote products derived from this software
+.\"    without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\"     @(#)swapon.8	6.3 (Berkeley) 3/16/91
+.\"
+.TH SWAPON 8 "October 2014" "util-linux" "System Administration"
+.SH NAME
+swapon, swapoff \- enable/disable devices and files for paging and swapping
+.SH SYNOPSIS
+.B swapon
+[options]
+.RI [ specialfile ...]
+.br
+.B swapoff
+.RB [ \-va ]
+.RI [ specialfile ...]
+.SH DESCRIPTION
+.B swapon
+is used to specify devices on which paging and swapping are to take place.
+
+The device or file used is given by the
+.I specialfile
+parameter.  It may be of the form
+.BI \-L " label"
+or
+.BI \-U " uuid"
+to indicate a device by label or uuid.
+
+Calls to
+.B swapon
+normally occur in the system boot scripts making all swap devices available, so
+that the paging and swapping activity is interleaved across several devices and
+files.
+
+.B swapoff
+disables swapping on the specified devices and files.
+When the
+.B \-a
+flag is given, swapping is disabled on all known swap devices and files
+(as found in
+.I /proc/swaps
+or
+.IR /etc/fstab ).
+
+.SH OPTIONS
+.TP
+.BR \-a , " \-\-all"
+All devices marked as ``swap'' in
+.I /etc/fstab
+are made available, except for those with the ``noauto'' option.
+Devices that are already being used as swap are silently skipped.
+.TP
+.BR \-d , " \-\-discard" [ =\fIpolicy\fR]
+Enable swap discards, if the swap backing device supports the discard or
+trim operation.  This may improve performance on some Solid State Devices,
+but often it does not.  The option allows one to select between two
+available swap discard policies:
+.B \-\-discard=once
+to perform a single-time discard operation for the whole swap area at swapon;
+or
+.B \-\-discard=pages
+to asynchronously discard freed swap pages before they are available for reuse.
+If no policy is selected, the default behavior is to enable both discard types.
+The
+.I /etc/fstab
+mount options
+.BR discard ,
+.BR discard=once ,
+or
+.B discard=pages
+may also be used to enable discard flags.
+.TP
+.BR \-e , " \-\-ifexists"
+Silently skip devices that do not exist.
+The
+.I /etc/fstab
+mount option
+.B nofail
+may also be used to skip non-existing device.
+
+.TP
+.BR \-f , " \-\-fixpgsz"
+Reinitialize (exec mkswap) the swap space if its page size does not
+match that of the current running kernel.
+.BR mkswap (2)
+initializes the whole device and does not check for bad blocks.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.TP
+.BI \-L " label"
+Use the partition that has the specified
+.IR label .
+(For this, access to
+.I /proc/partitions
+is needed.)
+.TP
+.BR \-o , " \-\-options " \fIopts\fP
+Specify swap options by an fstab-compatible comma-separated string.
+For example:
+.RS
+.RS
+.sp
+.B "swapon -o pri=1,discard=pages,nofail /dev/sda2"
+.sp
+.RE
+The \fIopts\fP string is evaluated last and overrides all other
+command line options.
+.RE
+.TP
+.BR \-p , " \-\-priority " \fIpriority\fP
+Specify the priority of the swap device.
+.I priority
+is a value between \-1 and 32767.  Higher numbers indicate
+higher priority.  See
+.BR swapon (2)
+for a full description of swap priorities.  Add
+.BI pri= value
+to the option field of
+.I /etc/fstab
+for use with
+.BR "swapon -a" .
+When no priority is defined, it defaults to \-1.
+.TP
+.BR \-s , " \-\-summary"
+Display swap usage summary by device.  Equivalent to "cat /proc/swaps".
+This output format is DEPRECATED in favour
+of \fB\-\-show\fR that provides better control on output data.
+.TP
+.BR \-\-show [ =\fIcolumn\fR ...]
+Display a definable table of swap areas.  See the
+.B \-\-help
+output for a list of available columns.
+.TP
+.B \-\-output\-all
+Output all available columns.
+.TP
+.B \-\-noheadings
+Do not print headings when displaying
+.B \-\-show
+output.
+.TP
+.B \-\-raw
+Display
+.B \-\-show
+output without aligning table columns.
+.TP
+.B \-\-bytes
+Display swap size in bytes in
+.B \-\-show
+output instead of in user-friendly units.
+.TP
+.BI \-U  " uuid"
+Use the partition that has the specified
+.IR uuid .
+.TP
+.BR \-v , " \-\-verbose"
+Be verbose.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.SH NOTES
+You should not use \fBswapon\fR on a file with holes.
+This can be seen in the system log as
+.RS
+.sp
+.B "swapon: swapfile has holes."
+.sp
+.RE
+The swap file implementation in the kernel expects to be able to write to the
+file directly, without the assistance of the filesystem.  This is a problem on
+preallocated files (e.g.
+.BR fallocate (1))
+on filesystems like \fBXFS\fR or \fBext4\fR, and on copy-on-write
+filesystems like \fBbtrfs\fR.
+.PP
+It is recommended to use
+.BR dd (1)
+and
+.I /dev/zero
+to avoid holes on XFS and ext4.
+.PP
+.B swapon
+may not work correctly when using a swap file with some versions of
+\fBbtrfs\fR.  This is due to btrfs being a copy-on-write filesystem: the
+file location may not be static and corruption can result.  Btrfs actively
+disallows the use of swap files on its filesystems by refusing to map the file.
+.PP
+One possible workaround is to map the swap
+file to a loopback device.  This will allow the filesystem to determine the
+mapping properly but may come with a performance impact.
+.PP
+Swap over \fBNFS\fR may not work.
+.PP
+.B swapon
+automatically detects and rewrites a swap space signature with old software
+suspend data (e.g. S1SUSPEND, S2SUSPEND, ...). The problem is that if we don't
+do it, then we get data corruption the next time an attempt at unsuspending is
+made.
+
+.SH ENVIRONMENT
+.IP LIBMOUNT_DEBUG=all
+enables libmount debug output.
+.IP LIBBLKID_DEBUG=all
+enables libblkid debug output.
+
+.SH SEE ALSO
+.BR swapoff (2),
+.BR swapon (2),
+.BR fstab (5),
+.BR init (8),
+.BR mkswap (8),
+.BR mount (8),
+.BR rc (8)
+.SH FILES
+.br
+.I /dev/sd??
+standard paging devices
+.br
+.I /etc/fstab
+ascii filesystem description table
+.SH HISTORY
+The
+.B swapon
+command appeared in 4.0BSD.
+.SH AVAILABILITY
+The swapon command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/swapon.c b/sys-utils/swapon.c
new file mode 100644
index 0000000..357dcb3
--- /dev/null
+++ b/sys-utils/swapon.c
@@ -0,0 +1,1017 @@
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <ctype.h>
+
+#include <libsmartcols.h>
+
+#include "c.h"
+#include "nls.h"
+#include "bitops.h"
+#include "blkdev.h"
+#include "pathnames.h"
+#include "xalloc.h"
+#include "strutils.h"
+#include "optutils.h"
+#include "closestream.h"
+
+#include "swapheader.h"
+#include "swapprober.h"
+#include "swapon-common.h"
+
+#ifdef HAVE_SYS_SWAP_H
+# include <sys/swap.h>
+#endif
+
+#ifndef SWAP_FLAG_DISCARD
+# define SWAP_FLAG_DISCARD	0x10000 /* enable discard for swap */
+#endif
+
+#ifndef SWAP_FLAG_DISCARD_ONCE
+# define SWAP_FLAG_DISCARD_ONCE 0x20000 /* discard swap area at swapon-time */
+#endif
+
+#ifndef SWAP_FLAG_DISCARD_PAGES
+# define SWAP_FLAG_DISCARD_PAGES 0x40000 /* discard page-clusters after use */
+#endif
+
+#define SWAP_FLAGS_DISCARD_VALID (SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \
+				  SWAP_FLAG_DISCARD_PAGES)
+
+#ifndef SWAP_FLAG_PREFER
+# define SWAP_FLAG_PREFER	0x8000	/* set if swap priority specified */
+#endif
+
+#ifndef SWAP_FLAG_PRIO_MASK
+# define SWAP_FLAG_PRIO_MASK	0x7fff
+#endif
+
+#ifndef SWAP_FLAG_PRIO_SHIFT
+# define SWAP_FLAG_PRIO_SHIFT	0
+#endif
+
+#if !defined(HAVE_SWAPON) && defined(SYS_swapon)
+# include <sys/syscall.h>
+# define swapon(path, flags) syscall(SYS_swapon, path, flags)
+#endif
+
+#define MAX_PAGESIZE	(64 * 1024)
+
+#ifndef UUID_STR_LEN
+# define UUID_STR_LEN	37
+#endif
+
+enum {
+	SIG_SWAPSPACE = 1,
+	SIG_SWSUSPEND
+};
+
+/* column names */
+struct colinfo {
+        const char *name; /* header */
+        double     whint; /* width hint (N < 1 is in percent of termwidth) */
+	int        flags; /* SCOLS_FL_* */
+        const char *help;
+};
+
+enum {
+	COL_PATH,
+	COL_TYPE,
+	COL_SIZE,
+	COL_USED,
+	COL_PRIO,
+	COL_UUID,
+	COL_LABEL
+};
+static struct colinfo infos[] = {
+	[COL_PATH]     = { "NAME",	0.20, 0, N_("device file or partition path") },
+	[COL_TYPE]     = { "TYPE",	0.20, SCOLS_FL_TRUNC, N_("type of the device")},
+	[COL_SIZE]     = { "SIZE",	0.20, SCOLS_FL_RIGHT, N_("size of the swap area")},
+	[COL_USED]     = { "USED",	0.20, SCOLS_FL_RIGHT, N_("bytes in use")},
+	[COL_PRIO]     = { "PRIO",	0.20, SCOLS_FL_RIGHT, N_("swap priority")},
+	[COL_UUID]     = { "UUID",	0.20, 0, N_("swap uuid")},
+	[COL_LABEL]    = { "LABEL",	0.20, 0, N_("swap label")},
+};
+
+
+/* swap area properties */
+struct swap_prop {
+	int discard;			/* discard policy */
+	int priority;			/* non-prioritized swap by default */
+	int no_fail;			/* skip device if not exist */
+};
+
+/* device description */
+struct swap_device {
+	const char *path;		/* device or file to be turned on */
+	const char *label;		/* swap label */
+	const char *uuid;		/* unique identifier */
+	unsigned int pagesize;
+};
+
+/* control struct */
+struct swapon_ctl {
+	int columns[ARRAY_SIZE(infos) * 2];	/* --show columns */
+	int ncolumns;				/* number of columns */
+
+	struct swap_prop props;		/* global settings for all devices */
+
+	unsigned int
+		all:1,			/* turn on all swap devices */
+		bytes:1,		/* display --show in bytes */
+		fix_page_size:1,	/* reinitialize page size */
+		no_heading:1,		/* toggle --show headers */
+		raw:1,			/* toggle --show alignment */
+		show:1,			/* display --show information */
+		verbose:1;		/* be chatty */
+};
+
+static int column_name_to_id(const char *name, size_t namesz)
+{
+	size_t i;
+
+	assert(name);
+
+	for (i = 0; i < ARRAY_SIZE(infos); i++) {
+		const char *cn = infos[i].name;
+
+		if (!strncasecmp(name, cn, namesz) && !*(cn + namesz))
+			return i;
+	}
+	warnx(_("unknown column: %s"), name);
+	return -1;
+}
+
+static inline int get_column_id(const struct swapon_ctl *ctl, int num)
+{
+	assert(num < ctl->ncolumns);
+	assert(ctl->columns[num] < (int) ARRAY_SIZE(infos));
+
+	return ctl->columns[num];
+}
+
+static inline struct colinfo *get_column_info(const struct swapon_ctl *ctl, unsigned num)
+{
+	return &infos[get_column_id(ctl, num)];
+}
+
+static void add_scols_line(const struct swapon_ctl *ctl, struct libscols_table *table, struct libmnt_fs *fs)
+{
+	int i;
+	struct libscols_line *line;
+	blkid_probe pr = NULL;
+	const char *data;
+
+	assert(table);
+	assert(fs);
+
+	line = scols_table_new_line(table, NULL);
+	if (!line)
+		err(EXIT_FAILURE, _("failed to allocate output line"));
+
+	data = mnt_fs_get_source(fs);
+	if (access(data, R_OK) == 0)
+		pr = get_swap_prober(data);
+	for (i = 0; i < ctl->ncolumns; i++) {
+		char *str = NULL;
+		off_t size;
+
+		switch (get_column_id(ctl, i)) {
+		case COL_PATH:
+			xasprintf(&str, "%s", mnt_fs_get_source(fs));
+			break;
+		case COL_TYPE:
+			xasprintf(&str, "%s", mnt_fs_get_swaptype(fs));
+			break;
+		case COL_SIZE:
+			size = mnt_fs_get_size(fs);
+			size *= 1024;	/* convert to bytes */
+			if (ctl->bytes)
+				xasprintf(&str, "%jd", size);
+			else
+				str = size_to_human_string(SIZE_SUFFIX_1LETTER, size);
+			break;
+		case COL_USED:
+			size = mnt_fs_get_usedsize(fs);
+			size *= 1024;	/* convert to bytes */
+			if (ctl->bytes)
+				xasprintf(&str, "%jd", size);
+			else
+				str = size_to_human_string(SIZE_SUFFIX_1LETTER, size);
+			break;
+		case COL_PRIO:
+			xasprintf(&str, "%d", mnt_fs_get_priority(fs));
+			break;
+		case COL_UUID:
+			if (pr && !blkid_probe_lookup_value(pr, "UUID", &data, NULL))
+				xasprintf(&str, "%s", data);
+			break;
+		case COL_LABEL:
+			if (pr && !blkid_probe_lookup_value(pr, "LABEL", &data, NULL))
+				xasprintf(&str, "%s", data);
+			break;
+		default:
+			break;
+		}
+
+		if (str && scols_line_refer_data(line, i, str))
+			err(EXIT_FAILURE, _("failed to add output data"));
+	}
+	if (pr)
+		blkid_free_probe(pr);
+	return;
+}
+
+static int display_summary(void)
+{
+	struct libmnt_table *st = get_swaps();
+	struct libmnt_iter *itr;
+	struct libmnt_fs *fs;
+
+	if (!st)
+		return -1;
+
+	if (mnt_table_is_empty(st))
+		return 0;
+
+	itr = mnt_new_iter(MNT_ITER_FORWARD);
+	if (!itr)
+		err(EXIT_FAILURE, _("failed to initialize libmount iterator"));
+
+	printf(_("%s\t\t\t\tType\t\tSize\tUsed\tPriority\n"), _("Filename"));
+
+	while (mnt_table_next_fs(st, itr, &fs) == 0) {
+		printf("%-39s\t%-8s\t%jd\t%jd\t%d\n",
+			mnt_fs_get_source(fs),
+			mnt_fs_get_swaptype(fs),
+			mnt_fs_get_size(fs),
+			mnt_fs_get_usedsize(fs),
+			mnt_fs_get_priority(fs));
+	}
+
+	mnt_free_iter(itr);
+	return 0;
+}
+
+static int show_table(struct swapon_ctl *ctl)
+{
+	struct libmnt_table *st = get_swaps();
+	struct libmnt_iter *itr = NULL;
+	struct libmnt_fs *fs;
+	int i;
+	struct libscols_table *table = NULL;
+
+	if (!st)
+		return -1;
+
+	itr = mnt_new_iter(MNT_ITER_FORWARD);
+	if (!itr)
+		err(EXIT_FAILURE, _("failed to initialize libmount iterator"));
+
+	scols_init_debug(0);
+
+	table = scols_new_table();
+	if (!table)
+		err(EXIT_FAILURE, _("failed to allocate output table"));
+
+	scols_table_enable_raw(table, ctl->raw);
+	scols_table_enable_noheadings(table, ctl->no_heading);
+
+	for (i = 0; i < ctl->ncolumns; i++) {
+		struct colinfo *col = get_column_info(ctl, i);
+
+		if (!scols_table_new_column(table, col->name, col->whint, col->flags))
+			err(EXIT_FAILURE, _("failed to allocate output column"));
+	}
+
+	while (mnt_table_next_fs(st, itr, &fs) == 0)
+		add_scols_line(ctl, table, fs);
+
+	scols_print_table(table);
+	scols_unref_table(table);
+	mnt_free_iter(itr);
+	return 0;
+}
+
+/* calls mkswap */
+static int swap_reinitialize(struct swap_device *dev)
+{
+	pid_t pid;
+	int status, ret;
+	char const *cmd[7];
+	int idx=0;
+
+	assert(dev);
+	assert(dev->path);
+
+	warnx(_("%s: reinitializing the swap."), dev->path);
+
+	switch ((pid=fork())) {
+	case -1: /* fork error */
+		warn(_("fork failed"));
+		return -1;
+
+	case 0:	/* child */
+		if (geteuid() != getuid()) {
+			/* in case someone uses swapon as setuid binary */
+			if (setgid(getgid()) < 0)
+				exit(EXIT_FAILURE);
+			if (setuid(getuid()) < 0)
+				exit(EXIT_FAILURE);
+		}
+
+		cmd[idx++] = "mkswap";
+		if (dev->label) {
+			cmd[idx++] = "-L";
+			cmd[idx++] = dev->label;
+		}
+		if (dev->uuid) {
+			cmd[idx++] = "-U";
+			cmd[idx++] = dev->uuid;
+		}
+		cmd[idx++] = dev->path;
+		cmd[idx++] = NULL;
+		execvp(cmd[0], (char * const *) cmd);
+		errexec(cmd[0]);
+
+	default: /* parent */
+		do {
+			ret = waitpid(pid, &status, 0);
+		} while (ret == -1 && errno == EINTR);
+
+		if (ret < 0) {
+			warn(_("waitpid failed"));
+			return -1;
+		}
+
+		/* mkswap returns: 0=suss, >0 error */
+		if (WIFEXITED(status) && WEXITSTATUS(status)==0)
+			return 0; /* ok */
+		break;
+	}
+	return -1; /* error */
+}
+
+/* Replaces unwanted SWSUSPEND signature with swap signature */
+static int swap_rewrite_signature(const struct swap_device *dev)
+{
+	int fd, rc = -1;
+
+	assert(dev);
+	assert(dev->path);
+	assert(dev->pagesize);
+
+	fd = open(dev->path, O_WRONLY);
+	if (fd == -1) {
+		warn(_("cannot open %s"), dev->path);
+		return -1;
+	}
+
+	if (lseek(fd, dev->pagesize - SWAP_SIGNATURE_SZ, SEEK_SET) < 0) {
+		warn(_("%s: lseek failed"), dev->path);
+		goto err;
+	}
+
+	if (write(fd, (void *) SWAP_SIGNATURE,
+			SWAP_SIGNATURE_SZ) != SWAP_SIGNATURE_SZ) {
+		warn(_("%s: write signature failed"), dev->path);
+		goto err;
+	}
+
+	rc  = 0;
+err:
+	if (close_fd(fd) != 0) {
+		warn(_("write failed: %s"), dev->path);
+		rc = -1;
+	}
+	return rc;
+}
+
+static int swap_detect_signature(const char *buf, int *sig)
+{
+	assert(buf);
+	assert(sig);
+
+	if (memcmp(buf, SWAP_SIGNATURE, SWAP_SIGNATURE_SZ) == 0)
+		*sig = SIG_SWAPSPACE;
+
+	else if (memcmp(buf, "S1SUSPEND", 9) == 0 ||
+		 memcmp(buf, "S2SUSPEND", 9) == 0 ||
+		 memcmp(buf, "ULSUSPEND", 9) == 0 ||
+		 memcmp(buf, "\xed\xc3\x02\xe9\x98\x56\xe5\x0c", 8) == 0 ||
+		 memcmp(buf, "LINHIB0001", 10) == 0)
+		*sig = SIG_SWSUSPEND;
+	else
+		return 0;
+
+	return 1;
+}
+
+static char *swap_get_header(int fd, int *sig, unsigned int *pagesize)
+{
+	char *buf;
+	ssize_t datasz;
+	unsigned int page;
+
+	assert(sig);
+	assert(pagesize);
+
+	*pagesize = 0;
+	*sig = 0;
+
+	buf = xmalloc(MAX_PAGESIZE);
+
+	datasz = read(fd, buf, MAX_PAGESIZE);
+	if (datasz == (ssize_t) -1)
+		goto err;
+
+	for (page = 0x1000; page <= MAX_PAGESIZE; page <<= 1) {
+		/* skip 32k pagesize since this does not seem to
+		 * be supported */
+		if (page == 0x8000)
+			continue;
+		/* the smallest swap area is PAGE_SIZE*10, it means
+		 * 40k, that's less than MAX_PAGESIZE */
+		if (datasz < 0 || (size_t) datasz < (page - SWAP_SIGNATURE_SZ))
+			break;
+		if (swap_detect_signature(buf + page - SWAP_SIGNATURE_SZ, sig)) {
+			*pagesize = page;
+			break;
+		}
+	}
+
+	if (*pagesize)
+		return buf;
+err:
+	free(buf);
+	return NULL;
+}
+
+/* returns real size of swap space */
+static unsigned long long swap_get_size(const struct swap_device *dev,
+					const char *hdr)
+{
+	unsigned int last_page = 0;
+	const unsigned int swap_version = SWAP_VERSION;
+	const struct swap_header_v1_2 *s;
+
+	assert(dev);
+	assert(dev->pagesize > 0);
+
+	s = (const struct swap_header_v1_2 *) hdr;
+
+	if (s->version == swap_version)
+		last_page = s->last_page;
+	else if (swab32(s->version) == swap_version)
+		last_page = swab32(s->last_page);
+
+	return ((unsigned long long) last_page + 1) * dev->pagesize;
+}
+
+static void swap_get_info(struct swap_device *dev, const char *hdr)
+{
+	const struct swap_header_v1_2 *s = (const struct swap_header_v1_2 *) hdr;
+
+	assert(dev);
+
+	if (s && *s->volume_name)
+		dev->label = xstrdup(s->volume_name);
+
+	if (s && *s->uuid) {
+		const unsigned char *u = s->uuid;
+		char str[UUID_STR_LEN];
+
+		snprintf(str, sizeof(str),
+			"%02x%02x%02x%02x-"
+			"%02x%02x-%02x%02x-"
+			"%02x%02x-%02x%02x%02x%02x%02x%02x",
+			u[0], u[1], u[2], u[3],
+			u[4], u[5], u[6], u[7],
+			u[8], u[9], u[10], u[11], u[12], u[13], u[14], u[15]);
+		dev->uuid = xstrdup(str);
+	}
+}
+
+static int swapon_checks(const struct swapon_ctl *ctl, struct swap_device *dev)
+{
+	struct stat st;
+	int fd, sig;
+	char *hdr = NULL;
+	unsigned long long devsize = 0;
+	int permMask;
+
+	assert(ctl);
+	assert(dev);
+	assert(dev->path);
+
+	fd = open(dev->path, O_RDONLY);
+	if (fd == -1) {
+		warn(_("cannot open %s"), dev->path);
+		goto err;
+	}
+
+	if (fstat(fd, &st) < 0) {
+		warn(_("stat of %s failed"), dev->path);
+		goto err;
+	}
+
+	permMask = S_ISBLK(st.st_mode) ? 07007 : 07077;
+	if ((st.st_mode & permMask) != 0)
+		warnx(_("%s: insecure permissions %04o, %04o suggested."),
+				dev->path, st.st_mode & 07777,
+				~permMask & 0666);
+
+	if (S_ISREG(st.st_mode) && st.st_uid != 0)
+		warnx(_("%s: insecure file owner %d, 0 (root) suggested."),
+				dev->path, st.st_uid);
+
+	/* test for holes by LBT */
+	if (S_ISREG(st.st_mode)) {
+		if (st.st_blocks * 512 < st.st_size) {
+			warnx(_("%s: skipping - it appears to have holes."),
+				dev->path);
+			goto err;
+		}
+		devsize = st.st_size;
+	}
+
+	if (S_ISBLK(st.st_mode) && blkdev_get_size(fd, &devsize)) {
+		warnx(_("%s: get size failed"), dev->path);
+		goto err;
+	}
+
+	hdr = swap_get_header(fd, &sig, &dev->pagesize);
+	if (!hdr) {
+		warnx(_("%s: read swap header failed"), dev->path);
+		goto err;
+	}
+
+	if (ctl->verbose)
+		warnx(_("%s: found signature [pagesize=%d, signature=%s]"),
+			dev->path,
+			dev->pagesize,
+			sig == SIG_SWAPSPACE ? "swap" :
+			sig == SIG_SWSUSPEND ? "suspend" : "unknown");
+
+	if (sig == SIG_SWAPSPACE && dev->pagesize) {
+		unsigned long long swapsize = swap_get_size(dev, hdr);
+		int syspg = getpagesize();
+
+		if (ctl->verbose)
+			warnx(_("%s: pagesize=%d, swapsize=%llu, devsize=%llu"),
+				dev->path, dev->pagesize, swapsize, devsize);
+
+		if (swapsize > devsize) {
+			if (ctl->verbose)
+				warnx(_("%s: last_page 0x%08llx is larger"
+					" than actual size of swapspace"),
+					dev->path, swapsize);
+
+		} else if (syspg < 0 || (unsigned int) syspg != dev->pagesize) {
+			if (ctl->fix_page_size) {
+				int rc;
+
+				swap_get_info(dev, hdr);
+
+				warnx(_("%s: swap format pagesize does not match."),
+					dev->path);
+				rc = swap_reinitialize(dev);
+				if (rc < 0)
+					goto err;
+			} else
+				warnx(_("%s: swap format pagesize does not match. "
+					"(Use --fixpgsz to reinitialize it.)"),
+					dev->path);
+		}
+	} else if (sig == SIG_SWSUSPEND) {
+		/* We have to reinitialize swap with old (=useless) software suspend
+		 * data. The problem is that if we don't do it, then we get data
+		 * corruption the next time an attempt at unsuspending is made.
+		 */
+		warnx(_("%s: software suspend data detected. "
+				"Rewriting the swap signature."),
+			dev->path);
+		if (swap_rewrite_signature(dev) < 0)
+			goto err;
+	}
+
+	free(hdr);
+	close(fd);
+	return 0;
+err:
+	if (fd != -1)
+		close(fd);
+	free(hdr);
+	return -1;
+}
+
+static int do_swapon(const struct swapon_ctl *ctl,
+		     const struct swap_prop *prop,
+		     const char *spec,
+		     int canonic)
+{
+	struct swap_device dev = { .path = NULL };
+	int status;
+	int flags = 0;
+	int priority;
+
+	assert(ctl);
+	assert(prop);
+
+	if (!canonic) {
+		dev.path = mnt_resolve_spec(spec, mntcache);
+		if (!dev.path)
+			return cannot_find(spec);
+	} else
+		dev.path = spec;
+
+	priority = prop->priority;
+
+	if (swapon_checks(ctl, &dev))
+		return -1;
+
+#ifdef SWAP_FLAG_PREFER
+	if (priority >= 0) {
+		if (priority > SWAP_FLAG_PRIO_MASK)
+			priority = SWAP_FLAG_PRIO_MASK;
+
+		flags = SWAP_FLAG_PREFER
+			| ((priority & SWAP_FLAG_PRIO_MASK)
+			   << SWAP_FLAG_PRIO_SHIFT);
+	}
+#endif
+	/*
+	 * Validate the discard flags passed and set them
+	 * accordingly before calling sys_swapon.
+	 */
+	if (prop->discard && !(prop->discard & ~SWAP_FLAGS_DISCARD_VALID)) {
+		/*
+		 * If we get here with both discard policy flags set,
+		 * we just need to tell the kernel to enable discards
+		 * and it will do correctly, just as we expect.
+		 */
+		if ((prop->discard & SWAP_FLAG_DISCARD_ONCE) &&
+		    (prop->discard & SWAP_FLAG_DISCARD_PAGES))
+			flags |= SWAP_FLAG_DISCARD;
+		else
+			flags |= prop->discard;
+	}
+
+	if (ctl->verbose)
+		printf(_("swapon %s\n"), dev.path);
+
+	status = swapon(dev.path, flags);
+	if (status < 0)
+		warn(_("%s: swapon failed"), dev.path);
+
+	return status;
+}
+
+static int swapon_by_label(struct swapon_ctl *ctl, const char *label)
+{
+	char *device = mnt_resolve_tag("LABEL", label, mntcache);
+	return device ? do_swapon(ctl, &ctl->props, device, TRUE) :  cannot_find(label);
+}
+
+static int swapon_by_uuid(struct swapon_ctl *ctl, const char *uuid)
+{
+	char *device = mnt_resolve_tag("UUID", uuid, mntcache);
+	return device ? do_swapon(ctl, &ctl->props, device, TRUE) : cannot_find(uuid);
+}
+
+/* -o <options> or fstab */
+static int parse_options(struct swap_prop *props, const char *options)
+{
+	char *arg = NULL;
+	size_t argsz = 0;
+
+	assert(props);
+	assert(options);
+
+	if (mnt_optstr_get_option(options, "nofail", NULL, NULL) == 0)
+		props->no_fail = 1;
+
+	if (mnt_optstr_get_option(options, "discard", &arg, &argsz) == 0) {
+		props->discard |= SWAP_FLAG_DISCARD;
+
+		if (arg) {
+			/* only single-time discards are wanted */
+			if (strncmp(arg, "once", argsz) == 0)
+				props->discard |= SWAP_FLAG_DISCARD_ONCE;
+
+			/* do discard for every released swap page */
+			if (strncmp(arg, "pages", argsz) == 0)
+				props->discard |= SWAP_FLAG_DISCARD_PAGES;
+		}
+	}
+
+	arg = NULL;
+	if (mnt_optstr_get_option(options, "pri", &arg, NULL) == 0 && arg)
+		props->priority = atoi(arg);
+
+	return 0;
+}
+
+
+static int swapon_all(struct swapon_ctl *ctl)
+{
+	struct libmnt_table *tb = get_fstab();
+	struct libmnt_iter *itr;
+	struct libmnt_fs *fs;
+	int status = 0;
+
+	if (!tb)
+		err(EXIT_FAILURE, _("failed to parse %s"), mnt_get_fstab_path());
+
+	itr = mnt_new_iter(MNT_ITER_FORWARD);
+	if (!itr)
+		err(EXIT_FAILURE, _("failed to initialize libmount iterator"));
+
+	while (mnt_table_find_next_fs(tb, itr, match_swap, NULL, &fs) == 0) {
+		/* defaults */
+		const char *opts;
+		const char *device;
+		struct swap_prop prop;		/* per device setting */
+
+		if (mnt_fs_get_option(fs, "noauto", NULL, NULL) == 0) {
+			if (ctl->verbose)
+				warnx(_("%s: noauto option -- ignored"), mnt_fs_get_source(fs));
+			continue;
+		}
+
+		/* default setting */
+		prop = ctl->props;
+
+		/* overwrite default by setting from fstab */
+		opts = mnt_fs_get_options(fs);
+		if (opts)
+			parse_options(&prop, opts);
+
+		/* convert LABEL=, UUID= etc. from fstab to device name */
+		device = mnt_resolve_spec(mnt_fs_get_source(fs), mntcache);
+		if (!device) {
+			if (!prop.no_fail)
+				status |= cannot_find(mnt_fs_get_source(fs));
+			continue;
+		}
+
+		if (is_active_swap(device)) {
+			if (ctl->verbose)
+				warnx(_("%s: already active -- ignored"), device);
+			continue;
+		}
+
+		if (prop.no_fail && access(device, R_OK) != 0) {
+			if (ctl->verbose)
+				warnx(_("%s: inaccessible -- ignored"), device);
+			continue;
+		}
+
+		/* swapon */
+		status |= do_swapon(ctl, &prop, device, TRUE);
+	}
+
+	mnt_free_iter(itr);
+	return status;
+}
+
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	size_t i;
+
+	fputs(USAGE_HEADER, out);
+	fprintf(out, _(" %s [options] [<spec>]\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Enable devices and files for paging and swapping.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -a, --all                enable all swaps from /etc/fstab\n"), out);
+	fputs(_(" -d, --discard[=<policy>] enable swap discards, if supported by device\n"), out);
+	fputs(_(" -e, --ifexists           silently skip devices that do not exist\n"), out);
+	fputs(_(" -f, --fixpgsz            reinitialize the swap space if necessary\n"), out);
+	fputs(_(" -o, --options <list>     comma-separated list of swap options\n"), out);
+	fputs(_(" -p, --priority <prio>    specify the priority of the swap device\n"), out);
+	fputs(_(" -s, --summary            display summary about used swap devices (DEPRECATED)\n"), out);
+	fputs(_("     --show[=<columns>]   display summary in definable table\n"), out);
+	fputs(_("     --noheadings         don't print table heading (with --show)\n"), out);
+	fputs(_("     --raw                use the raw output format (with --show)\n"), out);
+	fputs(_("     --bytes              display swap size in bytes in --show output\n"), out);
+	fputs(_(" -v, --verbose            verbose mode\n"), out);
+
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(26));
+
+	fputs(_("\nThe <spec> parameter:\n" \
+		" -L <label>             synonym for LABEL=<label>\n"
+		" -U <uuid>              synonym for UUID=<uuid>\n"
+		" LABEL=<label>          specifies device by swap area label\n"
+		" UUID=<uuid>            specifies device by swap area UUID\n"
+		" PARTLABEL=<label>      specifies device by partition label\n"
+		" PARTUUID=<uuid>        specifies device by partition UUID\n"
+		" <device>               name of device to be used\n"
+		" <file>                 name of file to be used\n"), out);
+
+	fputs(_("\nAvailable discard policy types (for --discard):\n"
+		" once    : only single-time area discards are issued\n"
+		" pages   : freed pages are discarded before they are reused\n"
+		"If no policy is selected, both discard types are enabled (default).\n"), out);
+
+	fputs(USAGE_COLUMNS, out);
+	for (i = 0; i < ARRAY_SIZE(infos); i++)
+		fprintf(out, " %-5s  %s\n", infos[i].name, _(infos[i].help));
+
+	printf(USAGE_MAN_TAIL("swapon(8)"));
+	exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char *argv[])
+{
+	int status = 0, c;
+	size_t i;
+	char *options = NULL;
+
+	enum {
+		BYTES_OPTION = CHAR_MAX + 1,
+		NOHEADINGS_OPTION,
+		RAW_OPTION,
+		SHOW_OPTION,
+		OPT_LIST_TYPES
+	};
+
+	static const struct option long_opts[] = {
+		{ "priority",   required_argument, NULL, 'p'               },
+		{ "discard",    optional_argument, NULL, 'd'               },
+		{ "ifexists",   no_argument,       NULL, 'e'               },
+		{ "options",    optional_argument, NULL, 'o'               },
+		{ "summary",    no_argument,       NULL, 's'               },
+		{ "fixpgsz",    no_argument,       NULL, 'f'               },
+		{ "all",        no_argument,       NULL, 'a'               },
+		{ "help",       no_argument,       NULL, 'h'               },
+		{ "verbose",    no_argument,       NULL, 'v'               },
+		{ "version",    no_argument,       NULL, 'V'               },
+		{ "show",       optional_argument, NULL, SHOW_OPTION       },
+		{ "output-all", no_argument,       NULL, OPT_LIST_TYPES    },
+		{ "noheadings", no_argument,       NULL, NOHEADINGS_OPTION },
+		{ "raw",        no_argument,       NULL, RAW_OPTION        },
+		{ "bytes",      no_argument,       NULL, BYTES_OPTION      },
+		{ NULL, 0, NULL, 0 }
+	};
+
+	static const ul_excl_t excl[] = {       /* rows and cols in ASCII order */
+		{ 'a','o','s', SHOW_OPTION },
+		{ 'a','o', BYTES_OPTION },
+		{ 'a','o', NOHEADINGS_OPTION },
+		{ 'a','o', RAW_OPTION },
+		{ 0 }
+	};
+	int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+	struct swapon_ctl ctl;
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	memset(&ctl, 0, sizeof(struct swapon_ctl));
+	ctl.props.priority = -1;
+
+	mnt_init_debug(0);
+	mntcache = mnt_new_cache();
+
+	while ((c = getopt_long(argc, argv, "ahd::efo:p:svVL:U:",
+				long_opts, NULL)) != -1) {
+
+		err_exclusive_options(c, long_opts, excl, excl_st);
+
+		switch (c) {
+		case 'a':		/* all */
+			ctl.all = 1;
+			break;
+		case 'h':		/* help */
+			usage();
+			break;
+		case 'o':
+			options = optarg;
+			break;
+		case 'p':		/* priority */
+			ctl.props.priority = strtos16_or_err(optarg,
+					   _("failed to parse priority"));
+			break;
+		case 'L':
+			add_label(optarg);
+			break;
+		case 'U':
+			add_uuid(optarg);
+			break;
+		case 'd':
+			ctl.props.discard |= SWAP_FLAG_DISCARD;
+			if (optarg) {
+				if (*optarg == '=')
+					optarg++;
+
+				if (strcmp(optarg, "once") == 0)
+					ctl.props.discard |= SWAP_FLAG_DISCARD_ONCE;
+				else if (strcmp(optarg, "pages") == 0)
+					ctl.props.discard |= SWAP_FLAG_DISCARD_PAGES;
+				else
+					errx(EXIT_FAILURE, _("unsupported discard policy: %s"), optarg);
+			}
+			break;
+		case 'e':               /* ifexists */
+			ctl.props.no_fail = 1;
+			break;
+		case 'f':
+			ctl.fix_page_size = 1;
+			break;
+		case 's':		/* status report */
+			status = display_summary();
+			return status;
+		case 'v':		/* be chatty */
+			ctl.verbose = 1;
+			break;
+		case SHOW_OPTION:
+			if (optarg) {
+				ctl.ncolumns = string_to_idarray(optarg,
+							     ctl.columns,
+							     ARRAY_SIZE(ctl.columns),
+							     column_name_to_id);
+				if (ctl.ncolumns < 0)
+					return EXIT_FAILURE;
+			}
+			ctl.show = 1;
+			break;
+		case OPT_LIST_TYPES:
+			for (ctl.ncolumns = 0; (size_t)ctl.ncolumns < ARRAY_SIZE(infos); ctl.ncolumns++)
+				ctl.columns[ctl.ncolumns] = ctl.ncolumns;
+			break;
+		case NOHEADINGS_OPTION:
+			ctl.no_heading = 1;
+			break;
+		case RAW_OPTION:
+			ctl.raw = 1;
+			break;
+		case BYTES_OPTION:
+			ctl.bytes = 1;
+			break;
+		case 'V':		/* version */
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		case 0:
+			break;
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+	argv += optind;
+
+	if (ctl.show || (!ctl.all && !numof_labels() && !numof_uuids() && *argv == NULL)) {
+		if (!ctl.ncolumns) {
+			/* default columns */
+			ctl.columns[ctl.ncolumns++] = COL_PATH;
+			ctl.columns[ctl.ncolumns++] = COL_TYPE;
+			ctl.columns[ctl.ncolumns++] = COL_SIZE;
+			ctl.columns[ctl.ncolumns++] = COL_USED;
+			ctl.columns[ctl.ncolumns++] = COL_PRIO;
+		}
+		status = show_table(&ctl);
+		return status;
+	}
+
+	if (ctl.props.no_fail && !ctl.all) {
+		warnx(_("bad usage"));
+		errtryhelp(EXIT_FAILURE);
+	}
+
+	if (ctl.all)
+		status |= swapon_all(&ctl);
+
+	if (options)
+		parse_options(&ctl.props, options);
+
+	for (i = 0; i < numof_labels(); i++)
+		status |= swapon_by_label(&ctl, get_label(i));
+
+	for (i = 0; i < numof_uuids(); i++)
+		status |= swapon_by_uuid(&ctl, get_uuid(i));
+
+	while (*argv != NULL)
+		status |= do_swapon(&ctl, &ctl.props, *argv++, FALSE);
+
+	free_tables();
+	mnt_unref_cache(mntcache);
+
+	return status;
+}
diff --git a/sys-utils/switch_root.8 b/sys-utils/switch_root.8
new file mode 100644
index 0000000..4e162b3
--- /dev/null
+++ b/sys-utils/switch_root.8
@@ -0,0 +1,61 @@
+.\" Karel Zak <kzak@redhat.com>
+.TH SWITCH_ROOT 8 "June 2009" "util-linux" "System Administration"
+.SH NAME
+switch_root \- switch to another filesystem as the root of the mount tree
+.SH SYNOPSIS
+.B switch_root
+.RB [ \-hV ]
+.LP
+.B switch_root
+.I newroot
+.I init
+.RI [ arg ...]
+.SH DESCRIPTION
+.B switch_root
+moves already mounted /proc, /dev, /sys and /run to
+.I newroot
+and makes
+.I newroot
+the new root filesystem and starts
+.I init
+process.
+
+.B WARNING: switch_root removes recursively all files and directories on the current root filesystem.
+
+.SH OPTIONS
+.IP "\fB\-h, \-\-help\fP"
+Display help text and exit.
+.IP "\fB\-V, \-\-version\fP"
+Display version information and exit.
+
+.SH RETURN VALUE
+.B switch_root
+returns 0 on success and 1 on failure.
+
+.SH NOTES
+switch_root will fail to function if
+.B newroot
+is not the root of a mount. If you want to switch root into a directory that
+does not meet this requirement then you can first use a bind-mounting trick to
+turn any directory into a mount point:
+.sp
+.nf
+.RS
+mount --bind $DIR $DIR
+.RE
+.fi
+
+.SH "SEE ALSO"
+.BR chroot (2),
+.BR init (8),
+.BR mkinitrd (8),
+.BR mount (8)
+.SH AUTHORS
+.nf
+Peter Jones <pjones@redhat.com>
+Jeremy Katz <katzj@redhat.com>
+Karel Zak <kzak@redhat.com>
+.fi
+.SH AVAILABILITY
+The switch_root command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/switch_root.c b/sys-utils/switch_root.c
new file mode 100644
index 0000000..a85ce24
--- /dev/null
+++ b/sys-utils/switch_root.c
@@ -0,0 +1,263 @@
+/*
+ * switchroot.c - switch to new root directory and start init.
+ *
+ * Copyright 2002-2009 Red Hat, Inc.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authors:
+ *	Peter Jones <pjones@redhat.com>
+ *	Jeremy Katz <katzj@redhat.com>
+ */
+#include <sys/mount.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/param.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <ctype.h>
+#include <dirent.h>
+#include <getopt.h>
+
+#include "c.h"
+#include "nls.h"
+#include "closestream.h"
+#include "statfs_magic.h"
+
+#ifndef MS_MOVE
+#define MS_MOVE 8192
+#endif
+
+#ifndef MNT_DETACH
+#define MNT_DETACH       0x00000002	/* Just detach from the tree */
+#endif
+
+/* remove all files/directories below dirName -- don't cross mountpoints */
+static int recursiveRemove(int fd)
+{
+	struct stat rb;
+	DIR *dir;
+	int rc = -1;
+	int dfd;
+
+	if (!(dir = fdopendir(fd))) {
+		warn(_("failed to open directory"));
+		goto done;
+	}
+
+	/* fdopendir() precludes us from continuing to use the input fd */
+	dfd = dirfd(dir);
+
+	if (fstat(dfd, &rb)) {
+		warn(_("stat failed"));
+		goto done;
+	}
+
+	while(1) {
+		struct dirent *d;
+		int isdir = 0;
+
+		errno = 0;
+		if (!(d = readdir(dir))) {
+			if (errno) {
+				warn(_("failed to read directory"));
+				goto done;
+			}
+			break;	/* end of directory */
+		}
+
+		if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
+			continue;
+#ifdef _DIRENT_HAVE_D_TYPE
+		if (d->d_type == DT_DIR || d->d_type == DT_UNKNOWN)
+#endif
+		{
+			struct stat sb;
+
+			if (fstatat(dfd, d->d_name, &sb, AT_SYMLINK_NOFOLLOW)) {
+				warn(_("stat of %s failed"), d->d_name);
+				continue;
+			}
+
+			/* skip if device is not the same */
+			if (sb.st_dev != rb.st_dev)
+				continue;
+
+			/* remove subdirectories */
+			if (S_ISDIR(sb.st_mode)) {
+				int cfd;
+
+				cfd = openat(dfd, d->d_name, O_RDONLY);
+				if (cfd >= 0) {
+					recursiveRemove(cfd);
+					close(cfd);
+				}
+				isdir = 1;
+			}
+		}
+
+		if (unlinkat(dfd, d->d_name, isdir ? AT_REMOVEDIR : 0))
+			warn(_("failed to unlink %s"), d->d_name);
+	}
+
+	rc = 0;	/* success */
+
+done:
+	if (dir)
+		closedir(dir);
+	return rc;
+}
+
+static int switchroot(const char *newroot)
+{
+	/*  Don't try to unmount the old "/", there's no way to do it. */
+	const char *umounts[] = { "/dev", "/proc", "/sys", "/run", NULL };
+	int i;
+	int cfd;
+	pid_t pid;
+	struct stat newroot_stat, sb;
+
+	if (stat(newroot, &newroot_stat) != 0) {
+		warn(_("stat of %s failed"), newroot);
+		return -1;
+	}
+
+	for (i = 0; umounts[i] != NULL; i++) {
+		char newmount[PATH_MAX];
+
+		snprintf(newmount, sizeof(newmount), "%s%s", newroot, umounts[i]);
+
+		if ((stat(newmount, &sb) != 0) || (sb.st_dev != newroot_stat.st_dev)) {
+			/* mount point seems to be mounted already or stat failed */
+			umount2(umounts[i], MNT_DETACH);
+			continue;
+		}
+
+		if (mount(umounts[i], newmount, NULL, MS_MOVE, NULL) < 0) {
+			warn(_("failed to mount moving %s to %s"),
+				umounts[i], newmount);
+			warnx(_("forcing unmount of %s"), umounts[i]);
+			umount2(umounts[i], MNT_FORCE);
+		}
+	}
+
+	if (chdir(newroot)) {
+		warn(_("failed to change directory to %s"), newroot);
+		return -1;
+	}
+
+	cfd = open("/", O_RDONLY);
+	if (cfd < 0) {
+		warn(_("cannot open %s"), "/");
+		return -1;
+	}
+
+	if (mount(newroot, "/", NULL, MS_MOVE, NULL) < 0) {
+		close(cfd);
+		warn(_("failed to mount moving %s to /"), newroot);
+		return -1;
+	}
+
+	if (chroot(".")) {
+		close(cfd);
+		warn(_("failed to change root"));
+		return -1;
+	}
+
+	pid = fork();
+	if (pid <= 0) {
+		struct statfs stfs;
+
+		if (fstatfs(cfd, &stfs) == 0 &&
+		    (F_TYPE_EQUAL(stfs.f_type, STATFS_RAMFS_MAGIC) ||
+		     F_TYPE_EQUAL(stfs.f_type, STATFS_TMPFS_MAGIC)))
+			recursiveRemove(cfd);
+		else
+			warn(_("old root filesystem is not an initramfs"));
+		if (pid == 0)
+			exit(EXIT_SUCCESS);
+	}
+
+	close(cfd);
+	return 0;
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *output = stdout;
+	fputs(USAGE_HEADER, output);
+	fprintf(output, _(" %s [options] <newrootdir> <init> <args to init>\n"),
+		program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, output);
+	fputs(_("Switch to another filesystem as the root of the mount tree.\n"), output);
+
+	fputs(USAGE_OPTIONS, output);
+	printf(USAGE_HELP_OPTIONS(16));
+	printf(USAGE_MAN_TAIL("switch_root(8)"));
+
+	exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char *argv[])
+{
+	char *newroot, *init, **initargs;
+	int c;
+	static const struct option longopts[] = {
+		{"version", no_argument, NULL, 'V'},
+		{"help", no_argument, NULL, 'h'},
+		{NULL, 0, NULL, 0}
+	};
+
+	atexit(close_stdout);
+
+	while ((c = getopt_long(argc, argv, "+Vh", longopts, NULL)) != -1)
+		switch (c) {
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		case 'h':
+			usage();
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	if (argc < 3) {
+		warnx(_("not enough arguments"));
+		errtryhelp(EXIT_FAILURE);
+	}
+
+	newroot = argv[1];
+	init = argv[2];
+	initargs = &argv[2];
+
+	if (!*newroot || !*init) {
+		warnx(_("bad usage"));
+		errtryhelp(EXIT_FAILURE);
+	}
+
+	if (switchroot(newroot))
+		errx(EXIT_FAILURE, _("failed. Sorry."));
+
+	if (access(init, X_OK))
+		warn(_("cannot access %s"), init);
+
+	execv(init, initargs);
+	errexec(init);
+}
+
diff --git a/sys-utils/tunelp.8 b/sys-utils/tunelp.8
new file mode 100644
index 0000000..90db834
--- /dev/null
+++ b/sys-utils/tunelp.8
@@ -0,0 +1,122 @@
+.\" Copyright (C) 1992-1997 Michael K. Johnson <johnsonm@redhat.com>
+.\" Copyright (C) 1998      Andrea Arcangeli   <andrea@e-mind.com>
+.\" It may be distributed under the terms of the GNU General Public License,
+.\" version 2, or any higher version.  See section COPYING of the GNU General
+.\" Public license for conditions under which this file may be redistributed.
+.\"
+.TH TUNELP 8 "October 2011" "util-linux" "System Administration"
+.SH NAME
+tunelp \- set various parameters for the lp device
+.SH SYNOPSIS
+.B tunelp
+[options]
+.I device
+.SH DESCRIPTION
+\fBtunelp\fP sets several parameters for the /dev/lp\fI?\fP devices, for
+better performance (or for any performance at all, if your printer won't work
+without it...) Without parameters, it tells whether the device is using
+interrupts, and if so, which one.  With parameters, it sets the device
+characteristics accordingly.
+.SH OPTIONS
+.TP
+\fB\-i\fR, \fB\-\-irq\fR \fIargument\fR
+specifies the IRQ to use for the parallel port in question.  If this is set
+to something non-zero, \-t and \-c have no effect.  If your port does not use
+interrupts, this option will make printing stop.  The command
+.B tunelp -i 0
+restores non-interrupt driven (polling) action, and your printer should work
+again.  If your parallel port does support interrupts, interrupt-driven
+printing should be somewhat faster and efficient, and will probably be
+desirable.
+.IP
+NOTE: This option will have no effect with kernel 2.1.131 or later since the
+irq is handled by the parport driver.  You can change the parport irq for
+example via
+.IR /proc/parport/*/irq .
+Read
+.I /usr/src/linux/Documentation/parport.txt
+for more details on parport.
+.TP
+\fB\-t\fR, \fB\-\-time\fR \fImilliseconds\fR
+is the amount of time in jiffies that the driver waits if the printer doesn't
+take a character for the number of tries dictated by the \-c parameter.  10
+is the default value.  If you want fastest possible printing, and don't care
+about system load, you may set this to 0.  If you don't care how fast your
+printer goes, or are printing text on a slow printer with a buffer, then 500
+(5 seconds) should be fine, and will give you very low system load.  This
+value generally should be lower for printing graphics than text, by a factor
+of approximately 10, for best performance.
+.TP
+\fB\-c\fR, \fB\-\-chars\fR \fIcharacters\fR
+is the number of times to try to output a character to the printer before
+sleeping for \-t \fITIME\fP.  It is the number of times around a loop that
+tries to send a character to the printer.  120 appears to be a good value for
+most printers in polling mode.  1000 is the default, because there are some
+printers that become jerky otherwise, but you \fImust\fP set this to `1' to
+handle the maximal CPU efficiency if you are using interrupts.  If you have a
+very fast printer, a value of 10 might make more sense even if in polling
+mode.  If you have a \fIreally\fP old printer, you can increase this further.
+.IP
+Setting \-t \fITIME\fP to 0 is equivalent to setting \-c \fICHARS\fP to
+infinity.
+.TP
+\fB\-w\fR, \fB\-\-wait\fR \fImilliseconds\fR
+is the number of usec we wait while playing with the strobe signal.  While
+most printers appear to be able to deal with an extremely short strobe, some
+printers demand a longer one.  Increasing this from the default 1 may make it
+possible to print with those printers.  This may also make it possible to use
+longer cables.  It's also possible to decrease this value to 0 if your
+printer is fast enough or your machine is slow enough.
+.TP
+\fB\-a\fR, \fB\-\-abort\fR \fI<on|off>\fR
+This is whether to abort on printer error - the default is not to.  If you
+are sitting at your computer, you probably want to be able to see an error
+and fix it, and have the printer go on printing.  On the other hand, if you
+aren't, you might rather that your printer spooler find out that the printer
+isn't ready, quit trying, and send you mail about it.  The choice is yours.
+.TP
+\fB\-o\fR, \fB\-\-check\-status\fR \fI<on|off>\fR
+This option is much like \-a.  It makes any
+.BR open (2)
+of this device check to see that the device is on-line and not reporting any
+out of paper or other errors.  This is the correct setting for most versions
+of lpd.
+.TP
+\fB\-C\fR, \fB\-\-careful\fR \fI<on|off>\fR
+This option adds extra ("careful") error checking.  When this option is on,
+the printer driver will ensure that the printer is on-line and not reporting
+any out of paper or other errors before sending data.  This is particularly
+useful for printers that normally appear to accept data when turned off.
+.IP
+NOTE: This option is obsolete because it's the default in 2.1.131 kernel or
+later.
+.TP
+\fB\-s\fR, \fB\-\-status\fR
+This option returns the current printer status, both as a decimal number from
+0..255, and as a list of active flags.  When this option is specified, \-q
+off, turning off the display of the current IRQ, is implied.
+.TP
+\fB\-r\fR, \fB\-\-reset\fR
+This option resets the port.  It requires a Linux kernel version of 1.1.80 or
+later.
+.TP
+\fB\-q\fR, \fB\-\-print\-irq\fR \fI<on|off>\fR
+This option sets printing the display of the current IRQ setting.
+.SH NOTES
+.BR \-o ,
+.BR \-C ,
+and
+.B \-s
+all require a Linux kernel version of 1.1.76 or later.
+.PP
+.B \-C
+requires a Linux version prior to 2.1.131.
+.SH FILES
+.I /dev/lp?
+.br
+.I /proc/parport/*/*
+.SH AVAILABILITY
+The tunelp  command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/tunelp.c b/sys-utils/tunelp.c
new file mode 100644
index 0000000..fe261f3
--- /dev/null
+++ b/sys-utils/tunelp.c
@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 1992-1997 Michael K. Johnson, johnsonm@redhat.com
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License, version 2, or any later version.  See file COPYING for
+ * information on distribution conditions.
+ */
+
+/*
+ * This command is deprecated.  The utility is in maintenance mode,
+ * meaning we keep them in source tree for backward compatibility
+ * only.  Do not waste time making this command better, unless the
+ * fix is about security or other very critical issue.
+ *
+ * See Documentation/deprecated.txt for more information.
+ */
+
+/*
+ * $Log: tunelp.c,v $
+ * Revision 1.9  1998/06/08 19:37:11  janl
+ * Thus compiles tunelp with 2.1.103 kernels
+ *
+ * Revision 1.8  1997/07/06 00:14:06  aebr
+ * Fixes to silence -Wall.
+ *
+ * Revision 1.7  1997/06/20 16:10:38  janl
+ * tunelp refreshed from authors archive.
+ *
+ * Revision 1.9  1997/06/20 12:56:43  johnsonm
+ * Finished fixing license terms.
+ *
+ * Revision 1.8  1997/06/20 12:34:59  johnsonm
+ * Fixed copyright and license.
+ *
+ * Revision 1.7  1995/03/29 11:16:23  johnsonm
+ * TYPO fixed...
+ *
+ * Revision 1.6  1995/03/29  11:12:15  johnsonm
+ * Added third argument to ioctl needed with new kernels
+ *
+ * Revision 1.5  1995/01/13  10:33:43  johnsonm
+ * Chris's changes for new ioctl numbers and backwards compatibility
+ * and the reset ioctl.
+ *
+ * Revision 1.4  1995/01/03  17:42:14  johnsonm
+ * -s isn't supposed to take an argument; removed : after s in getopt...
+ *
+ * Revision 1.3  1995/01/03  07:36:49  johnsonm
+ * Fixed typo
+ *
+ * Revision 1.2  1995/01/03  07:33:44  johnsonm
+ * revisions for lp driver updates in Linux 1.1.76
+ *
+ * 1999-02-22 Arkadiusz Miśkiewicz <misiek@pld.ORG.PL>
+ * - added Native Language Support
+ *
+ * 1999-05-07 Merged LPTRUSTIRQ patch by Andrea Arcangeli (1998/11/29), aeb
+ *
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <linux/lp.h>
+
+#include "nls.h"
+#include "closestream.h"
+#include "strutils.h"
+
+#define EXIT_LP_MALLOC		2
+#define EXIT_LP_BADVAL		3
+#define EXIT_LP_IO_ERR		4
+
+#define XALLOC_EXIT_CODE EXIT_LP_MALLOC
+#include "xalloc.h"
+
+struct command {
+	long op;
+	long val;
+	struct command *next;
+};
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	fputs(USAGE_HEADER, out);
+	fprintf(out, _(" %s [options] <device>\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Set various parameters for the line printer.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -i, --irq <num>              specify parallel port irq\n"), out);
+	fputs(_(" -t, --time <ms>              driver wait time in milliseconds\n"), out);
+	fputs(_(" -c, --chars <num>            number of output characters before sleep\n"), out);
+	fputs(_(" -w, --wait <us>              strobe wait in micro seconds\n"), out);
+	/* TRANSLATORS: do not translate <on|off> arguments. The
+	   argument reader does not recognize locale, unless `on' is
+	   exactly that very same string. */
+	fputs(_(" -a, --abort <on|off>         abort on error\n"), out);
+	fputs(_(" -o, --check-status <on|off>  check printer status before printing\n"), out);
+	fputs(_(" -C, --careful <on|off>       extra checking to status check\n"), out);
+	fputs(_(" -s, --status                 query printer status\n"), out);
+	fputs(_(" -r, --reset                  reset the port\n"), out);
+	fputs(_(" -q, --print-irq <on|off>     display current irq setting\n"), out);
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(30));
+	printf(USAGE_MAN_TAIL("tunelp(8)"));
+
+	exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+	int c, fd, irq, status, show_irq, offset = 0, retval;
+	char *filename;
+	struct stat statbuf;
+	struct command *cmds, *cmdst;
+	static const struct option longopts[] = {
+		{"irq", required_argument, NULL, 'i'},
+		{"time", required_argument, NULL, 't'},
+		{"chars", required_argument, NULL, 'c'},
+		{"wait", required_argument, NULL, 'w'},
+		{"abort", required_argument, NULL, 'a'},
+		{"check-status", required_argument, NULL, 'o'},
+		{"careful", required_argument, NULL, 'C'},
+		{"status", no_argument, NULL, 's'},
+		{"trust-irq", required_argument, NULL, 'T'},
+		{"reset", no_argument, NULL, 'r'},
+		{"print-irq", required_argument, NULL, 'q'},
+		{"version", no_argument, NULL, 'V'},
+		{"help", no_argument, NULL, 'h'},
+		{NULL, 0, NULL, 0}
+	};
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	strutils_set_exitcode(EXIT_LP_BADVAL);
+
+	if (argc < 2) {
+		warnx(_("not enough arguments"));
+		errtryhelp(EXIT_FAILURE);
+	}
+
+	cmdst = cmds = xmalloc(sizeof(struct command));
+	cmds->next = NULL;
+
+	show_irq = 1;
+	while ((c = getopt_long(argc, argv, "t:c:w:a:i:ho:C:sq:rT:vV", longopts, NULL)) != -1) {
+		switch (c) {
+		case 'h':
+			usage();
+			break;
+		case 'i':
+			cmds->op = LPSETIRQ;
+			cmds->val = strtol_or_err(optarg, _("argument error"));
+			cmds->next = xmalloc(sizeof(struct command));
+			cmds = cmds->next;
+			cmds->next = NULL;
+			break;
+		case 't':
+			cmds->op = LPTIME;
+			cmds->val = strtol_or_err(optarg, _("argument error"));
+			cmds->next = xmalloc(sizeof(struct command));
+			cmds = cmds->next;
+			cmds->next = NULL;
+			break;
+		case 'c':
+			cmds->op = LPCHAR;
+			cmds->val = strtol_or_err(optarg, _("argument error"));
+			cmds->next = xmalloc(sizeof(struct command));
+			cmds = cmds->next;
+			cmds->next = NULL;
+			break;
+		case 'w':
+			cmds->op = LPWAIT;
+			cmds->val = strtol_or_err(optarg, _("argument error"));
+			cmds->next = xmalloc(sizeof(struct command));
+			cmds = cmds->next;
+			cmds->next = NULL;
+			break;
+		case 'a':
+			cmds->op = LPABORT;
+			cmds->val = parse_switch(optarg, _("argument error"), "on", "off", NULL);
+			cmds->next = xmalloc(sizeof(struct command));
+			cmds = cmds->next;
+			cmds->next = NULL;
+			break;
+		case 'q':
+			show_irq = parse_switch(optarg, _("argument error"), "on", "off", NULL);
+			break;
+		case 'o':
+			cmds->op = LPABORTOPEN;
+			cmds->val = parse_switch(optarg, _("argument error"), "on", "off", NULL);
+			cmds->next = xmalloc(sizeof(struct command));
+			cmds = cmds->next;
+			cmds->next = NULL;
+			break;
+		case 'C':
+			cmds->op = LPCAREFUL;
+			cmds->val = parse_switch(optarg, _("argument error"), "on", "off", NULL);
+			cmds->next = xmalloc(sizeof(struct command));
+			cmds = cmds->next;
+			cmds->next = NULL;
+			break;
+		case 's':
+			show_irq = 0;
+			cmds->op = LPGETSTATUS;
+			cmds->val = 0;
+			cmds->next = xmalloc(sizeof(struct command));
+			cmds = cmds->next;
+			cmds->next = NULL;
+			break;
+		case 'r':
+			cmds->op = LPRESET;
+			cmds->val = 0;
+			cmds->next = xmalloc(sizeof(struct command));
+			cmds = cmds->next;
+			cmds->next = NULL;
+			break;
+		case 'v':
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if (optind != argc - 1) {
+		warnx(_("no device specified"));
+		errtryhelp(EXIT_FAILURE);
+	}
+
+	filename = xstrdup(argv[optind]);
+	fd = open(filename, O_WRONLY | O_NONBLOCK, 0);
+	/* Need to open O_NONBLOCK in case ABORTOPEN is already set
+	 * and printer is off or off-line or in an error condition.
+	 * Otherwise we would abort...
+         */
+	if (fd < 0)
+		err(EXIT_FAILURE, "%s", filename);
+
+	if (fstat(fd, &statbuf))
+		err(EXIT_FAILURE, "%s: stat() failed", filename);
+
+	if (!S_ISCHR(statbuf.st_mode)) {
+		warnx(_("%s not an lp device"), filename);
+		errtryhelp(EXIT_FAILURE);
+	}
+	/* Allow for binaries compiled under a new kernel to work on
+	 * the old ones The irq argument to ioctl isn't touched by
+	 * the old kernels, but we don't want to cause the kernel to
+	 * complain if we are using a new kernel
+	 */
+	if (LPGETIRQ >= 0x0600 && ioctl(fd, LPGETIRQ, &irq) < 0
+	    && errno == EINVAL)
+	        /* We don't understand the new ioctls */
+		offset = 0x0600;
+
+	cmds = cmdst;
+	while (cmds->next) {
+		if (cmds->op == LPGETSTATUS) {
+			status = 0xdeadbeef;
+			retval = ioctl(fd, LPGETSTATUS - offset, &status);
+			if (retval < 0)
+				warnx(_("LPGETSTATUS error"));
+			else {
+				if (status == (int)0xdeadbeef)
+					/* a few 1.1.7x kernels will do this */
+					status = retval;
+				printf(_("%s status is %d"), filename, status);
+				if (!(status & LP_PBUSY))
+					printf(_(", busy"));
+				if (!(status & LP_PACK))
+					printf(_(", ready"));
+				if ((status & LP_POUTPA))
+					printf(_(", out of paper"));
+				if ((status & LP_PSELECD))
+					printf(_(", on-line"));
+				if (!(status & LP_PERRORP))
+					printf(_(", error"));
+				printf("\n");
+			}
+		} else
+		if (ioctl(fd, cmds->op - offset, cmds->val) < 0)
+			warn(_("ioctl failed"));
+		cmdst = cmds;
+		cmds = cmds->next;
+		free(cmdst);
+	}
+
+	if (show_irq) {
+		irq = 0xdeadbeef;
+		retval = ioctl(fd, LPGETIRQ - offset, &irq);
+		if (retval == -1)
+			err(EXIT_LP_IO_ERR, _("LPGETIRQ error"));
+		if (irq == (int)0xdeadbeef)
+		        /* up to 1.1.77 will do this */
+			irq = retval;
+		if (irq)
+			printf(_("%s using IRQ %d\n"), filename, irq);
+		else
+			printf(_("%s using polling\n"), filename);
+	}
+	free(filename);
+	close(fd);
+
+	return EXIT_SUCCESS;
+}
diff --git a/sys-utils/umount.8 b/sys-utils/umount.8
new file mode 100644
index 0000000..f94d2f4
--- /dev/null
+++ b/sys-utils/umount.8
@@ -0,0 +1,267 @@
+.\" Copyright (c) 1996 Andries Brouwer
+.\" This page is somewhat derived from a page that was
+.\" (c) 1980, 1989, 1991 The Regents of the University of California
+.\" and had been heavily modified by Rik Faith and myself.
+.\"
+.\" This is free documentation; you can redistribute it and/or
+.\" modify it under the terms of the GNU General Public License as
+.\" published by the Free Software Foundation; either version 2 of
+.\" the License, or (at your option) any later version.
+.\"
+.\" The GNU General Public License's references to "object code"
+.\" and "executables" are to be interpreted as the output of any
+.\" document formatting or typesetting system, including
+.\" intermediate and printed output.
+.\"
+.\" This manual is distributed in the hope that it will be useful,
+.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
+.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+.\" GNU General Public License for more details.
+.\"
+.\" You should have received a copy of the GNU General Public License along
+.\" with this program; if not, write to the Free Software Foundation, Inc.,
+.\" 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+.\"
+.TH UMOUNT 8 "July 2014" "util-linux" "System Administration"
+.SH NAME
+umount \- unmount file systems
+.SH SYNOPSIS
+.B umount \-a
+.RB [ \-dflnrv ]
+.RB [ \-t
+.IR fstype ]
+.RB [ \-O
+.IR option ...]
+.sp
+.B umount
+.RB [ \-dflnrv ]
+.RI { directory | device }...
+.sp
+.B umount
+.BR \-h | \-V
+
+.SH DESCRIPTION
+The
+.B umount
+command detaches the mentioned file system(s) from the file hierarchy.  A
+file system is specified by giving the directory where it has been
+mounted.  Giving the special device on which the file system lives may
+also work, but is obsolete, mainly because it will fail in case this
+device was mounted on more than one directory.
+.PP
+Note that a file system cannot be unmounted when it is 'busy' - for
+example, when there are open files on it, or when some process has its
+working directory there, or when a swap file on it is in use.  The
+offending process could even be
+.B umount
+itself - it opens libc, and libc in its turn may open for example locale
+files.  A lazy unmount avoids this problem, but it may introduce another
+issues. See \fB\-\-lazy\fR description below.
+.SH OPTIONS
+.TP
+.BR \-a , " \-\-all"
+All of the filesystems described in
+.I /proc/self/mountinfo
+(or in deprecated /etc/mtab)
+are unmounted, except the proc, devfs, devpts, sysfs, rpc_pipefs and nfsd
+filesystems. This list of the filesystems may be replaced by \fB\-\-types\fR
+umount option.
+.TP
+.BR \-A , " \-\-all\-targets"
+Unmount all mountpoints in the current namespace for the specified filesystem.
+The filesystem can be specified by one of the mountpoints or the device name (or
+UUID, etc.).  When this option is used together with \fB\-\-recursive\fR, then
+all nested mounts within the filesystem are recursively unmounted.
+This option is only supported on systems where /etc/mtab is a symlink
+to /proc/mounts.
+.TP
+.BR \-c , " \-\-no\-canonicalize"
+Do not canonicalize paths.  The paths canonicalization is based on
+.BR stat (2)
+and
+.BR readlink (2)
+system calls. These system calls may hang in some cases (for example on NFS if
+server is not available). The option has to be used with canonical path to the
+mount point.
+
+For more details about this option see the
+.BR mount (8)
+man page. Note that \fBumount\fR does not pass this option to the
+.BI /sbin/umount. type
+helpers.
+.TP
+.BR \-d , " \-\-detach\-loop"
+When the unmounted device was a loop device, also free this loop
+device. This option is unnecessary for devices initialized by
+.BR mount (8),
+in this case "autoclear" functionality is enabled by default.
+.TP
+.B \-\-fake
+Causes everything to be done except for the actual system call or umount helper
+execution; this 'fakes' unmounting the filesystem.  It can be used to remove
+entries from the deprecated
+.I /etc/mtab
+that were unmounted earlier with the
+.B \-n
+option.
+.TP
+.BR \-f , " \-\-force"
+Force an unmount (in case of an unreachable NFS system).
+
+Note that this option does not guarantee that umount command does not hang.
+It's strongly recommended to use absolute paths without symlinks to avoid
+unwanted readlink and stat system calls on unreachable NFS in umount.
+.TP
+.BR \-i , " \-\-internal\-only"
+Do not call the \fB/sbin/umount.\fIfilesystem\fR helper even if it exists.
+By default such a helper program is called if it exists.
+.TP
+.BR \-l , " \-\-lazy"
+Lazy unmount.  Detach the filesystem from the file hierarchy now,
+and clean up all references to this filesystem as soon as it is not busy
+anymore.
+
+A system reboot would be expected in near future if you're going to use this
+option for network filesystem or local filesystem with submounts.  The
+recommended use-case for \fBumount -l\fR is to prevent hangs on shutdown due to
+an unreachable network share where a normal umount will hang due to a downed
+server or a network partition. Remounts of the share will not be possible.
+
+.TP
+.BR \-N , " \-\-namespace " \fIns
+Perform umount in namespace specified by \fIns\fR.
+\fIns\fR is either PID of process running in that namespace
+or special file representing that namespace.
+.sp
+.BR umount (8)
+switches to the namespace when it reads /etc/fstab, writes /etc/mtab (or writes to /run/mount) and calls
+.BR umount (2)
+system call, otherwise it runs in the original namespace. It means that the target namespace does not have
+to contain any libraries or another requirements necessary to execute
+.BR umount (2)
+command.
+.sp
+See \fBnamespaces\fR(7) for more information.
+.TP
+.BR \-n , " \-\-no\-mtab"
+Unmount without writing in
+.IR /etc/mtab .
+.TP
+.BR \-O , " \-\-test\-opts " \fIoption\fR...
+Unmount only the filesystems that have the specified option set in
+.IR /etc/fstab .
+More than one option may be specified in a comma-separated list.
+Each option can be prefixed with
+.B no
+to indicate that no action should be taken for this option.
+.TP
+.BR \-q , " \-\-quiet"
+Suppress "not mounted" error messages.
+.TP
+.BR \-R , " \-\-recursive"
+Recursively unmount each specified directory.  Recursion for each directory will
+stop if any unmount operation in the chain fails for any reason.  The relationship
+between mountpoints is determined by /proc/self/mountinfo entries.  The filesystem
+must be specified by mountpoint path; a recursive unmount by device name (or UUID)
+is unsupported.
+.TP
+.BR \-r , " \-\-read\-only"
+When an unmount fails, try to remount the filesystem read-only.
+.TP
+.BR \-t , " \-\-types " \fItype\fR...
+Indicate that the actions should only be taken on filesystems of the
+specified
+.IR type .
+More than one type may be specified in a comma-separated list.  The list
+of filesystem types can be prefixed with
+.B no
+to indicate that no action should be taken for all of the mentioned types.
+Note that
+.B umount
+reads information about mounted filesystems from kernel (/proc/mounts) and
+filesystem names may be different than filesystem names used in the /etc/fstab
+(e.g. "nfs4" vs. "nfs").
+.TP
+.BR \-v , " \-\-verbose"
+Verbose mode.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH "LOOP DEVICE"
+The
+.B umount
+command will automatically detach loop device previously initialized by
+.BR mount (8)
+command independently of /etc/mtab.
+
+In this case the device is initialized with "autoclear" flag (see
+.BR losetup (8)
+output for more details), otherwise it's necessary to use the option \fB \-\-detach\-loop\fR
+or call \fBlosetup -d <device>\fR. The autoclear feature is supported since Linux 2.6.25.
+.SH EXTERNAL HELPERS
+The syntax of external unmount helpers is:
+.PP
+.RS
+.BI umount. suffix
+.RI { directory | device }
+.RB [ \-flnrv ]
+.RB [ \-N
+.IR namespace ]
+.RB [ \-t
+.IR type . subtype ]
+.RE
+.PP
+where \fIsuffix\fR is the filesystem type (or the value from a
+\fBuhelper=\fR or \fBhelper=\fR marker in the mtab file).
+The \fB\-t\fR option can be used for filesystems that
+have subtype support.  For example:
+.PP
+.RS
+.B umount.fuse \-t fuse.sshfs
+.RE
+.PP
+A \fBuhelper=\fIsomething\fR marker (unprivileged helper) can appear in
+the \fI/etc/mtab\fR file when ordinary users need to be able to unmount
+a mountpoint that is not defined in \fI/etc/fstab\fR
+(for example for a device that was mounted by \fBudisks\fR(1)).
+.PP
+A \fBhelper=\fItype\fR marker in the mtab file will redirect
+all unmount requests
+to the \fB/sbin/umount.\fItype\fR helper independently of UID.
+.PP
+Note that \fI/etc/mtab\fR is currently deprecated and helper= and another
+userspace mount options are maintained by libmount.
+.SH FILES
+.TP
+.I /etc/mtab
+table of mounted filesystems (deprecated and usually replaced by
+symlink to /proc/mounts)
+.TP
+.I /etc/fstab
+table of known filesystems
+.TP
+.I /proc/self/mountinfo
+table of mounted filesystems generated by kernel.
+.SH ENVIRONMENT
+.IP LIBMOUNT_FSTAB=<path>
+overrides the default location of the fstab file (ignored for suid)
+.IP LIBMOUNT_MTAB=<path>
+overrides the default location of the mtab file (ignored for suid)
+.IP LIBMOUNT_DEBUG=all
+enables libmount debug output
+.SH "SEE ALSO"
+.BR umount (2),
+.BR losetup (8),
+.BR mount (8)
+.SH HISTORY
+A
+.B umount
+command appeared in Version 6 AT&T UNIX.
+.SH AVAILABILITY
+The umount command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/umount.c b/sys-utils/umount.c
new file mode 100644
index 0000000..b021088
--- /dev/null
+++ b/sys-utils/umount.c
@@ -0,0 +1,610 @@
+/*
+ * umount(8) -- mount a filesystem
+ *
+ * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
+ * Written by Karel Zak <kzak@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <sys/types.h>
+
+#include <libmount.h>
+
+#include "nls.h"
+#include "c.h"
+#include "env.h"
+#include "closestream.h"
+#include "pathnames.h"
+#include "canonicalize.h"
+
+#define XALLOC_EXIT_CODE MNT_EX_SYSERR
+#include "xalloc.h"
+
+#define OPTUTILS_EXIT_CODE MNT_EX_USAGE
+#include "optutils.h"
+
+static int quiet;
+
+static int table_parser_errcb(struct libmnt_table *tb __attribute__((__unused__)),
+			const char *filename, int line)
+{
+	if (filename)
+		warnx(_("%s: parse error at line %d -- ignored"), filename, line);
+	return 1;
+}
+
+
+static void __attribute__((__noreturn__)) print_version(void)
+{
+	const char *ver = NULL;
+	const char **features = NULL, **p;
+
+	mnt_get_library_version(&ver);
+	mnt_get_library_features(&features);
+
+	printf(_("%s from %s (libmount %s"),
+			program_invocation_short_name,
+			PACKAGE_STRING,
+			ver);
+	p = features;
+	while (p && *p) {
+		fputs(p == features ? ": " : ", ", stdout);
+		fputs(*p++, stdout);
+	}
+	fputs(")\n", stdout);
+	exit(MNT_EX_SUCCESS);
+}
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	fputs(USAGE_HEADER, out);
+	fprintf(out, _(
+		" %1$s [-hV]\n"
+		" %1$s -a [options]\n"
+		" %1$s [options] <source> | <directory>\n"),
+		program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Unmount filesystems.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -a, --all               unmount all filesystems\n"), out);
+	fputs(_(" -A, --all-targets       unmount all mountpoints for the given device in the\n"
+	        "                           current namespace\n"), out);
+	fputs(_(" -c, --no-canonicalize   don't canonicalize paths\n"), out);
+	fputs(_(" -d, --detach-loop       if mounted loop device, also free this loop device\n"), out);
+	fputs(_("     --fake              dry run; skip the umount(2) syscall\n"), out);
+	fputs(_(" -f, --force             force unmount (in case of an unreachable NFS system)\n"), out);
+	fputs(_(" -i, --internal-only     don't call the umount.<type> helpers\n"), out);
+	fputs(_(" -n, --no-mtab           don't write to /etc/mtab\n"), out);
+	fputs(_(" -l, --lazy              detach the filesystem now, clean up things later\n"), out);
+	fputs(_(" -O, --test-opts <list>  limit the set of filesystems (use with -a)\n"), out);
+	fputs(_(" -R, --recursive         recursively unmount a target with all its children\n"), out);
+	fputs(_(" -r, --read-only         in case unmounting fails, try to remount read-only\n"), out);
+	fputs(_(" -t, --types <list>      limit the set of filesystem types\n"), out);
+	fputs(_(" -v, --verbose           say what is being done\n"), out);
+	fputs(_(" -q, --quiet             suppress 'not mounted' error messages\n"), out);
+	fputs(_(" -N, --namespace <ns>    perform umount in another namespace\n"), out);
+
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(25));
+	printf(USAGE_MAN_TAIL("umount(8)"));
+
+	exit(MNT_EX_SUCCESS);
+}
+
+static void __attribute__((__noreturn__)) exit_non_root(const char *option)
+{
+	const uid_t ruid = getuid();
+	const uid_t euid = geteuid();
+
+	if (ruid == 0 && euid != 0) {
+		/* user is root, but setuid to non-root */
+		if (option)
+			errx(MNT_EX_USAGE,
+				_("only root can use \"--%s\" option "
+				 "(effective UID is %u)"),
+					option, euid);
+		errx(MNT_EX_USAGE, _("only root can do that "
+				 "(effective UID is %u)"), euid);
+	}
+	if (option)
+		errx(MNT_EX_USAGE, _("only root can use \"--%s\" option"), option);
+	errx(MNT_EX_USAGE, _("only root can do that"));
+}
+
+static void success_message(struct libmnt_context *cxt)
+{
+	const char *tgt, *src;
+
+	if (mnt_context_helper_executed(cxt)
+	    || mnt_context_get_status(cxt) != 1)
+		return;
+
+	tgt = mnt_context_get_target(cxt);
+	if (!tgt)
+		return;
+
+	src = mnt_context_get_source(cxt);
+	if (src)
+		warnx(_("%s (%s) unmounted"), tgt, src);
+	else
+		warnx(_("%s unmounted"), tgt);
+}
+
+static int mk_exit_code(struct libmnt_context *cxt, int rc)
+{
+	char buf[BUFSIZ] = { 0 };
+
+	rc = mnt_context_get_excode(cxt, rc, buf, sizeof(buf));
+
+	/* suppress "not mounted" error message */
+	if (quiet &&
+	    rc == MNT_EX_FAIL &&
+	    mnt_context_syscall_called(cxt) &&
+	    mnt_context_get_syscall_errno(cxt) == EINVAL)
+		return rc;
+
+	/* print errors/warnings */
+	if (*buf) {
+		const char *spec = mnt_context_get_target(cxt);
+		if (!spec)
+			spec = mnt_context_get_source(cxt);
+		if (!spec)
+			spec = "???";
+		warnx("%s: %s.", spec, buf);
+	}
+	return rc;
+}
+
+static int umount_all(struct libmnt_context *cxt)
+{
+	struct libmnt_iter *itr;
+	struct libmnt_fs *fs;
+	int mntrc, ignored, rc = 0;
+
+	itr = mnt_new_iter(MNT_ITER_BACKWARD);
+	if (!itr) {
+		warn(_("failed to initialize libmount iterator"));
+		return MNT_EX_SYSERR;
+	}
+
+	while (mnt_context_next_umount(cxt, itr, &fs, &mntrc, &ignored) == 0) {
+
+		const char *tgt = mnt_fs_get_target(fs);
+
+		if (ignored) {
+			if (mnt_context_is_verbose(cxt))
+				printf(_("%-25s: ignored\n"), tgt);
+		} else {
+			int xrc = mk_exit_code(cxt, mntrc);
+
+			if (xrc == MNT_EX_SUCCESS
+			    && mnt_context_is_verbose(cxt))
+				printf("%-25s: successfully unmounted\n", tgt);
+			rc |= xrc;
+		}
+	}
+
+	mnt_free_iter(itr);
+	return rc;
+}
+
+static int umount_one(struct libmnt_context *cxt, const char *spec)
+{
+	int rc;
+
+	if (!spec)
+		return MNT_EX_SOFTWARE;
+
+	if (mnt_context_set_target(cxt, spec))
+		err(MNT_EX_SYSERR, _("failed to set umount target"));
+
+	rc = mnt_context_umount(cxt);
+	rc = mk_exit_code(cxt, rc);
+
+	if (rc == MNT_EX_SUCCESS && mnt_context_is_verbose(cxt))
+		success_message(cxt);
+
+	mnt_reset_context(cxt);
+	return rc;
+}
+
+static struct libmnt_table *new_mountinfo(struct libmnt_context *cxt)
+{
+	struct libmnt_table *tb;
+	struct libmnt_ns *ns_old = mnt_context_switch_target_ns(cxt);
+
+	if (!ns_old)
+		err(MNT_EX_SYSERR, _("failed to switch namespace"));
+
+	tb = mnt_new_table();
+	if (!tb)
+		err(MNT_EX_SYSERR, _("libmount table allocation failed"));
+
+	mnt_table_set_parser_errcb(tb, table_parser_errcb);
+	mnt_table_set_cache(tb, mnt_context_get_cache(cxt));
+
+	if (mnt_table_parse_file(tb, _PATH_PROC_MOUNTINFO)) {
+		warn(_("failed to parse %s"), _PATH_PROC_MOUNTINFO);
+		mnt_unref_table(tb);
+		tb = NULL;
+	}
+
+	if (!mnt_context_switch_ns(cxt, ns_old))
+		err(MNT_EX_SYSERR, _("failed to switch namespace"));
+
+	return tb;
+}
+
+/*
+ * like umount_one() but does not return error is @spec not mounted
+ */
+static int umount_one_if_mounted(struct libmnt_context *cxt, const char *spec)
+{
+	int rc;
+	struct libmnt_fs *fs;
+
+	rc = mnt_context_find_umount_fs(cxt, spec, &fs);
+	if (rc == 1) {
+		rc = MNT_EX_SUCCESS;		/* already unmounted */
+		mnt_reset_context(cxt);
+	} else if (rc < 0) {
+		rc = mk_exit_code(cxt, rc);	/* error */
+		mnt_reset_context(cxt);
+	} else
+		rc = umount_one(cxt, mnt_fs_get_target(fs));
+
+	return rc;
+}
+
+static int umount_do_recurse(struct libmnt_context *cxt,
+		struct libmnt_table *tb, struct libmnt_fs *fs)
+{
+	struct libmnt_fs *child;
+	struct libmnt_iter *itr = mnt_new_iter(MNT_ITER_BACKWARD);
+	int rc;
+
+	if (!itr)
+		err(MNT_EX_SYSERR, _("libmount iterator allocation failed"));
+
+	/* umount all children */
+	for (;;) {
+		rc = mnt_table_next_child_fs(tb, itr, fs, &child);
+		if (rc < 0) {
+			warnx(_("failed to get child fs of %s"),
+					mnt_fs_get_target(fs));
+			rc = MNT_EX_SOFTWARE;
+			goto done;
+		} else if (rc == 1)
+			break;		/* no more children */
+
+		rc = umount_do_recurse(cxt, tb, child);
+		if (rc != MNT_EX_SUCCESS)
+			goto done;
+	}
+
+	rc = umount_one_if_mounted(cxt, mnt_fs_get_target(fs));
+done:
+	mnt_free_iter(itr);
+	return rc;
+}
+
+static int umount_recursive(struct libmnt_context *cxt, const char *spec)
+{
+	struct libmnt_table *tb;
+	struct libmnt_fs *fs;
+	int rc;
+
+	tb = new_mountinfo(cxt);
+	if (!tb)
+		return MNT_EX_SOFTWARE;
+
+	/* it's always real mountpoint, don't assume that the target maybe a device */
+	mnt_context_disable_swapmatch(cxt, 1);
+
+	fs = mnt_table_find_target(tb, spec, MNT_ITER_BACKWARD);
+	if (fs)
+		rc = umount_do_recurse(cxt, tb, fs);
+	else {
+		rc = MNT_EX_USAGE;
+		if (!quiet)
+			warnx(access(spec, F_OK) == 0 ?
+				_("%s: not mounted") :
+				_("%s: not found"), spec);
+	}
+
+	mnt_unref_table(tb);
+	return rc;
+}
+
+static int umount_alltargets(struct libmnt_context *cxt, const char *spec, int rec)
+{
+	struct libmnt_fs *fs;
+	struct libmnt_table *tb;
+	struct libmnt_iter *itr = NULL;
+	dev_t devno = 0;
+	int rc;
+
+	/* Convert @spec to device name, Use the same logic like regular
+	 * "umount <spec>".
+	 */
+	rc = mnt_context_find_umount_fs(cxt, spec, &fs);
+	if (rc == 1) {
+		rc = MNT_EX_USAGE;
+		if (!quiet)
+			warnx(access(spec, F_OK) == 0 ?
+				_("%s: not mounted") :
+				_("%s: not found"), spec);
+		return rc;
+	}
+	if (rc < 0)
+		return mk_exit_code(cxt, rc);		/* error */
+
+	if (!mnt_fs_get_srcpath(fs) || !mnt_fs_get_devno(fs))
+		errx(MNT_EX_USAGE, _("%s: failed to determine source "
+				"(--all-targets is unsupported on systems with "
+				"regular mtab file)."), spec);
+
+	itr = mnt_new_iter(MNT_ITER_BACKWARD);
+	if (!itr)
+		err(MNT_EX_SYSERR, _("libmount iterator allocation failed"));
+
+	/* get on @cxt independent mountinfo */
+	tb = new_mountinfo(cxt);
+	if (!tb) {
+		rc = MNT_EX_SOFTWARE;
+		goto done;
+	}
+
+	/* Note that @fs is from mount context and the context will be reset
+	 * after each umount() call */
+	devno = mnt_fs_get_devno(fs);
+	fs = NULL;
+
+	mnt_reset_context(cxt);
+
+	while (mnt_table_next_fs(tb, itr, &fs) == 0) {
+		if (mnt_fs_get_devno(fs) != devno)
+			continue;
+		mnt_context_disable_swapmatch(cxt, 1);
+		if (rec)
+			rc = umount_do_recurse(cxt, tb, fs);
+		else
+			rc = umount_one_if_mounted(cxt, mnt_fs_get_target(fs));
+
+		if (rc != MNT_EX_SUCCESS)
+			break;
+	}
+
+done:
+	mnt_free_iter(itr);
+	mnt_unref_table(tb);
+
+	return rc;
+}
+
+/*
+ * Check path -- non-root user should not be able to resolve path which is
+ * unreadable for him.
+ */
+static char *sanitize_path(const char *path)
+{
+	char *p;
+
+	if (!path)
+		return NULL;
+
+	p = canonicalize_path_restricted(path);
+	if (!p)
+		err(MNT_EX_USAGE, "%s", path);
+
+	return p;
+}
+
+static pid_t parse_pid(const char *str)
+{
+	char *end;
+	pid_t ret;
+
+	errno = 0;
+	ret = strtoul(str, &end, 10);
+
+	if (ret < 0 || errno || end == str || (end && *end))
+		return 0;
+	return ret;
+}
+
+int main(int argc, char **argv)
+{
+	int c, rc = 0, all = 0, recursive = 0, alltargets = 0;
+	struct libmnt_context *cxt;
+	char *types = NULL;
+
+	enum {
+		UMOUNT_OPT_FAKE = CHAR_MAX + 1,
+	};
+
+	static const struct option longopts[] = {
+		{ "all",             no_argument,       NULL, 'a'             },
+		{ "all-targets",     no_argument,       NULL, 'A'             },
+		{ "detach-loop",     no_argument,       NULL, 'd'             },
+		{ "fake",            no_argument,       NULL, UMOUNT_OPT_FAKE },
+		{ "force",           no_argument,       NULL, 'f'             },
+		{ "help",            no_argument,       NULL, 'h'             },
+		{ "internal-only",   no_argument,       NULL, 'i'             },
+		{ "lazy",            no_argument,       NULL, 'l'             },
+		{ "no-canonicalize", no_argument,       NULL, 'c'             },
+		{ "no-mtab",         no_argument,       NULL, 'n'             },
+		{ "quiet",           no_argument,       NULL, 'q'             },
+		{ "read-only",       no_argument,       NULL, 'r'             },
+		{ "recursive",       no_argument,       NULL, 'R'             },
+		{ "test-opts",       required_argument, NULL, 'O'             },
+		{ "types",           required_argument, NULL, 't'             },
+		{ "verbose",         no_argument,       NULL, 'v'             },
+		{ "version",         no_argument,       NULL, 'V'             },
+		{ "namespace",       required_argument, NULL, 'N'             },
+		{ NULL, 0, NULL, 0 }
+	};
+
+	static const ul_excl_t excl[] = {       /* rows and cols in ASCII order */
+		{ 'A','a' },			/* all-targets,all */
+		{ 'R','a' },			/* recursive,all */
+		{ 'O','R','t'},			/* options,recursive,types */
+		{ 'R','r' },			/* recursive,read-only */
+		{ 0 }
+	};
+	int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+	sanitize_env();
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	mnt_init_debug(0);
+	cxt = mnt_new_context();
+	if (!cxt)
+		err(MNT_EX_SYSERR, _("libmount context allocation failed"));
+
+	mnt_context_set_tables_errcb(cxt, table_parser_errcb);
+
+	while ((c = getopt_long(argc, argv, "aAcdfhilnqRrO:t:vVN:",
+					longopts, NULL)) != -1) {
+
+
+		/* only few options are allowed for non-root users */
+		if (mnt_context_is_restricted(cxt) && !strchr("hdilqVv", c))
+			exit_non_root(option_to_longopt(c, longopts));
+
+		err_exclusive_options(c, longopts, excl, excl_st);
+
+		switch(c) {
+		case 'a':
+			all = 1;
+			break;
+		case 'A':
+			alltargets = 1;
+			break;
+		case 'c':
+			mnt_context_disable_canonicalize(cxt, TRUE);
+			break;
+		case 'd':
+			mnt_context_enable_loopdel(cxt, TRUE);
+			break;
+		case UMOUNT_OPT_FAKE:
+			mnt_context_enable_fake(cxt, TRUE);
+			break;
+		case 'f':
+			mnt_context_enable_force(cxt, TRUE);
+			break;
+		case 'h':
+			usage();
+			break;
+		case 'i':
+			mnt_context_disable_helpers(cxt, TRUE);
+			break;
+		case 'l':
+			mnt_context_enable_lazy(cxt, TRUE);
+			break;
+		case 'n':
+			mnt_context_disable_mtab(cxt, TRUE);
+			break;
+		case 'q':
+			quiet = 1;
+			break;
+		case 'r':
+			mnt_context_enable_rdonly_umount(cxt, TRUE);
+			break;
+		case 'R':
+			recursive = TRUE;
+			break;
+		case 'O':
+			if (mnt_context_set_options_pattern(cxt, optarg))
+				err(MNT_EX_SYSERR, _("failed to set options pattern"));
+			break;
+		case 't':
+			types = optarg;
+			break;
+		case 'v':
+			mnt_context_enable_verbose(cxt, TRUE);
+			break;
+		case 'V':
+			print_version();
+			break;
+		case 'N':
+		{
+			char path[PATH_MAX];
+			pid_t pid = parse_pid(optarg);
+
+			if (pid)
+				snprintf(path, sizeof(path), "/proc/%i/ns/mnt", pid);
+
+			if (mnt_context_set_target_ns(cxt, pid ? path : optarg))
+				err(MNT_EX_SYSERR, _("failed to set target namespace to %s"), pid ? path : optarg);
+			break;
+		}
+		default:
+			errtryhelp(MNT_EX_USAGE);
+		}
+	}
+
+	argc -= optind;
+	argv += optind;
+
+	if (all) {
+		if (!types)
+			types = "noproc,nodevfs,nodevpts,nosysfs,norpc_pipefs,nonfsd,noselinuxfs";
+
+		mnt_context_set_fstype_pattern(cxt, types);
+		rc = umount_all(cxt);
+
+	} else if (argc < 1) {
+		warnx(_("bad usage"));
+		errtryhelp(MNT_EX_USAGE);
+
+	} else if (alltargets) {
+		while (argc--)
+			rc += umount_alltargets(cxt, *argv++, recursive);
+	} else if (recursive) {
+		while (argc--)
+			rc += umount_recursive(cxt, *argv++);
+	} else {
+		while (argc--) {
+			char *path = *argv;
+
+			if (mnt_context_is_restricted(cxt)
+			    && !mnt_tag_is_valid(path))
+				path = sanitize_path(path);
+
+			rc += umount_one(cxt, path);
+
+			if (path != *argv)
+				free(path);
+			argv++;
+		}
+	}
+
+	mnt_free_context(cxt);
+	return (rc < 256) ? rc : 255;
+}
+
diff --git a/sys-utils/unshare.1 b/sys-utils/unshare.1
new file mode 100644
index 0000000..746c411
--- /dev/null
+++ b/sys-utils/unshare.1
@@ -0,0 +1,266 @@
+.TH UNSHARE 1 "February 2016" "util-linux" "User Commands"
+.SH NAME
+unshare \- run program with some namespaces unshared from parent
+.SH SYNOPSIS
+.B unshare
+[options]
+.RI [ program
+.RI [ arguments ]]
+.SH DESCRIPTION
+Unshares the indicated namespaces from the parent process and then executes
+the specified \fIprogram\fR. If \fIprogram\fR is not given, then ``${SHELL}'' is
+run (default: /bin/sh).
+.PP
+The namespaces can optionally be made persistent by bind mounting
+/proc/\fIpid\fR/ns/\fItype\fR files to a filesystem path and entered with
+.BR \%nsenter (1)
+even after the \fIprogram\fR terminates (except PID namespaces where
+permanently running init process is required).
+Once a persistent \%namespace is no longer needed, it can be unpersisted with
+.BR umount (8).
+See the \fBEXAMPLES\fR section for more details.
+.PP
+The namespaces to be unshared are indicated via options.  Unshareable namespaces are:
+.TP
+.B mount namespace
+Mounting and unmounting filesystems will not affect the rest of the system,
+except for filesystems which are explicitly marked as
+shared (with \fBmount --make-shared\fP; see \fI/proc/self/mountinfo\fP or
+\fBfindmnt -o+PROPAGATION\fP for the \fBshared\fP flags).
+For further details, see
+.BR mount_namespaces (7)
+and the discussion of the
+.B CLONE_NEWNS
+flag in
+.BR clone (2).
+.sp
+.B unshare
+since util-linux version 2.27 automatically sets propagation to \fBprivate\fP
+in a new mount namespace to make sure that the new namespace is really
+unshared.  It's possible to disable this feature with option
+\fB\-\-propagation unchanged\fP.
+Note that \fBprivate\fP is the kernel default.
+.TP
+.B UTS namespace
+Setting hostname or domainname will not affect the rest of the system.
+For further details, see
+.BR namespaces (7)
+and the discussion of the
+.B CLONE_NEWUTS
+flag in
+.BR clone (2).
+.TP
+.B IPC namespace
+The process will have an independent namespace for POSIX message queues
+as well as System V \%message queues,
+semaphore sets and shared memory segments.
+For further details, see
+.BR namespaces (7)
+and the discussion of the
+.B CLONE_NEWIPC
+flag in
+.BR clone (2).
+.TP
+.B network namespace
+The process will have independent IPv4 and IPv6 stacks, IP routing tables,
+firewall rules, the \fI/proc/net\fP and \fI/sys/class/net\fP directory trees,
+sockets, etc.
+For further details, see
+.BR namespaces (7)
+and the discussion of the
+.B CLONE_NEWNET
+flag in
+.BR clone (2).
+.TP
+.B PID namespace
+Children will have a distinct set of PID-to-process mappings from their parent.
+For further details, see
+.BR pid_namespaces (7)
+and
+the discussion of the
+.B CLONE_NEWPID
+flag in
+.BR clone (2).
+.TP
+.B cgroup namespace
+The process will have a virtualized view of \fI/proc\:/self\:/cgroup\fP, and new
+cgroup mounts will be rooted at the namespace cgroup root.
+For further details, see
+.BR cgroup_namespaces (7)
+and the discussion of the
+.B CLONE_NEWCGROUP
+flag in
+.BR clone (2).
+.TP
+.B user namespace
+The process will have a distinct set of UIDs, GIDs and capabilities.
+For further details, see
+.BR user_namespaces (7)
+and the discussion of the
+.B CLONE_NEWUSER
+flag in
+.BR clone (2).
+.SH OPTIONS
+.TP
+.BR \-i , " \-\-ipc" [ =\fIfile ]
+Unshare the IPC namespace.  If \fIfile\fP is specified, then a persistent
+namespace is created by a bind mount.
+.TP
+.BR \-m , " \-\-mount" [ =\fIfile ]
+Unshare the mount namespace.  If \fIfile\fP is specified, then a persistent
+namespace is created by a bind mount.
+Note that \fIfile\fP has to be located on a filesystem with the propagation
+flag set to \fBprivate\fP.  Use the command \fBfindmnt -o+PROPAGATION\fP
+when not sure about the current setting.  See also the examples below.
+.TP
+.BR \-n , " \-\-net" [ =\fIfile ]
+Unshare the network namespace.  If \fIfile\fP is specified, then a persistent
+namespace is created by a bind mount.
+.TP
+.BR \-p , " \-\-pid" [ =\fIfile ]
+Unshare the PID namespace.  If \fIfile\fP is specified then persistent
+namespace is created by a bind mount.  See also the \fB--fork\fP and
+\fB--mount-proc\fP options.
+.TP
+.BR \-u , " \-\-uts" [ =\fIfile ]
+Unshare the UTS namespace.  If \fIfile\fP is specified, then a persistent
+namespace is created by a bind mount.
+.TP
+.BR \-U , " \-\-user" [ =\fIfile ]
+Unshare the user namespace.  If \fIfile\fP is specified, then a persistent
+namespace is created by a bind mount.
+.TP
+.BR \-C , " \-\-cgroup"[=\fIfile\fP]
+Unshare the cgroup namespace. If \fIfile\fP is specified then persistent namespace is created
+by bind mount.
+.TP
+.BR \-f , " \-\-fork"
+Fork the specified \fIprogram\fR as a child process of \fBunshare\fR rather than
+running it directly.  This is useful when creating a new PID namespace.
+.TP
+.BR \-\-kill\-child [ =\fIsigname ]
+When \fBunshare\fR terminates, have \fIsigname\fP be sent to the forked child process.
+Combined with \fB--pid\fR this allows for an easy and reliable killing of the entire
+process tree below \fBunshare\fR.
+If not given, \fIsigname\fP defaults to \fBSIGKILL\fR.
+This option implies \fB--fork\fR.
+.TP
+.BR \-\-mount\-proc [ =\fImountpoint ]
+Just before running the program, mount the proc filesystem at \fImountpoint\fP
+(default is /proc).  This is useful when creating a new PID namespace.  It also
+implies creating a new mount namespace since the /proc mount would otherwise
+mess up existing programs on the system.  The new proc filesystem is explicitly
+mounted as private (with MS_PRIVATE|MS_REC).
+.TP
+.BR \-r , " \-\-map\-root\-user"
+Run the program only after the current effective user and group IDs have been mapped to
+the superuser UID and GID in the newly created user namespace.  This makes it possible to
+conveniently gain capabilities needed to manage various aspects of the newly created
+namespaces (such as configuring interfaces in the network namespace or mounting filesystems in
+the mount namespace) even when run unprivileged.  As a mere convenience feature, it does not support
+more sophisticated use cases, such as mapping multiple ranges of UIDs and GIDs.
+This option implies \fB--setgroups=deny\fR.
+.TP
+.BR "\-\-propagation private" | shared | slave | unchanged
+Recursively set the mount propagation flag in the new mount namespace.  The default
+is to set the propagation to \fIprivate\fP.  It is possible to disable this feature
+with the argument \fBunchanged\fR.  The option is silently ignored when the mount
+namespace (\fB\-\-mount\fP) is not requested.
+.TP
+.BR "\-\-setgroups allow" | deny
+Allow or deny the
+.BR setgroups (2)
+system call in a user namespace.
+.sp
+To be able to call
+.BR setgroups (2),
+the calling process must at least have CAP_SETGID.
+But since Linux 3.19 a further restriction applies:
+the kernel gives permission to call
+.BR \%setgroups (2)
+only after the GID map (\fB/proc/\fIpid\fB/gid_map\fR) has been set.
+The GID map is writable by root when
+.BR \%setgroups (2)
+is enabled (i.e. \fBallow\fR, the default), and
+the GID map becomes writable by unprivileged processes when
+.BR \%setgroups (2)
+is permanently disabled (with \fBdeny\fR).
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH NOTES
+The proc and sysfs filesystems mounting as root in a user namespace have to be
+restricted so that a less privileged user can not get more access to sensitive
+files that a more privileged user made unavailable. In short the rule for proc
+and sysfs is as close to a bind mount as possible.
+.SH EXAMPLES
+.TP
+.B # unshare --fork --pid --mount-proc readlink /proc/self
+.TQ
+1
+.br
+Establish a PID namespace, ensure we're PID 1 in it against a newly mounted
+procfs instance.
+.TP
+.B $ unshare --map-root-user --user sh -c whoami
+.TQ
+root
+.br
+Establish a user namespace as an unprivileged user with a root user within it.
+.TP
+.B # touch /root/uts-ns
+.TQ
+.B # unshare --uts=/root/uts-ns hostname FOO
+.TQ
+.B # nsenter --uts=/root/uts-ns hostname
+.TQ
+FOO
+.TQ
+.B # umount /root/uts-ns
+.br
+Establish a persistent UTS namespace, and modify the hostname.  The namespace
+is then entered with \fBnsenter\fR.  The namespace is destroyed by unmounting
+the bind reference.
+.TP
+.B # mount --bind /root/namespaces /root/namespaces
+.TQ
+.B # mount --make-private /root/namespaces
+.TQ
+.B # touch /root/namespaces/mnt
+.TQ
+.B # unshare --mount=/root/namespaces/mnt
+.br
+Establish a persistent mount namespace referenced by the bind mount
+/root/namespaces/mnt.  This example shows a portable solution, because it
+makes sure that the bind mount is created on a shared filesystem.
+.TP
+.B # unshare -pf --kill-child -- bash -c "(sleep 999 &) && sleep 1000" &
+.TQ
+.B # pid=$!
+.TQ
+.B # kill $pid
+.br
+Reliable killing of subprocesses of the \fIprogram\fR.
+When \fBunshare\fR gets killed, everything below it gets killed as well.
+Without it, the children of \fIprogram\fR would have orphaned and
+been re-parented to PID 1.
+
+.SH SEE ALSO
+.BR clone (2),
+.BR unshare (2),
+.BR namespaces (7),
+.BR mount (8)
+.SH AUTHORS
+.UR dottedmag@dottedmag.net
+Mikhail Gusarov
+.UE
+.br
+.UR kzak@redhat.com
+Karel Zak
+.UE
+.SH AVAILABILITY
+The unshare command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/unshare.c b/sys-utils/unshare.c
new file mode 100644
index 0000000..661665a
--- /dev/null
+++ b/sys-utils/unshare.c
@@ -0,0 +1,484 @@
+/*
+ * unshare(1) - command-line interface for unshare(2)
+ *
+ * Copyright (C) 2009 Mikhail Gusarov <dottedmag@dottedmag.net>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/wait.h>
+#include <sys/mount.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/prctl.h>
+
+/* we only need some defines missing in sys/mount.h, no libmount linkage */
+#include <libmount.h>
+
+#include "nls.h"
+#include "c.h"
+#include "closestream.h"
+#include "namespace.h"
+#include "exec_shell.h"
+#include "xalloc.h"
+#include "pathnames.h"
+#include "all-io.h"
+#include "signames.h"
+
+/* synchronize parent and child by pipe */
+#define PIPE_SYNC_BYTE	0x06
+
+/* 'private' is kernel default */
+#define UNSHARE_PROPAGATION_DEFAULT	(MS_REC | MS_PRIVATE)
+
+/* /proc namespace files and mountpoints for binds */
+static struct namespace_file {
+	int		type;		/* CLONE_NEW* */
+	const char	*name;		/* ns/<type> */
+	const char	*target;	/* user specified target for bind mount */
+} namespace_files[] = {
+	{ .type = CLONE_NEWUSER,  .name = "ns/user" },
+	{ .type = CLONE_NEWCGROUP,.name = "ns/cgroup" },
+	{ .type = CLONE_NEWIPC,   .name = "ns/ipc"  },
+	{ .type = CLONE_NEWUTS,   .name = "ns/uts"  },
+	{ .type = CLONE_NEWNET,   .name = "ns/net"  },
+	{ .type = CLONE_NEWPID,   .name = "ns/pid"  },
+	{ .type = CLONE_NEWNS,    .name = "ns/mnt"  },
+	{ .name = NULL }
+};
+
+static int npersists;	/* number of persistent namespaces */
+
+
+enum {
+	SETGROUPS_NONE = -1,
+	SETGROUPS_DENY = 0,
+	SETGROUPS_ALLOW = 1,
+};
+
+static const char *setgroups_strings[] =
+{
+	[SETGROUPS_DENY] = "deny",
+	[SETGROUPS_ALLOW] = "allow"
+};
+
+static int setgroups_str2id(const char *str)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(setgroups_strings); i++)
+		if (strcmp(str, setgroups_strings[i]) == 0)
+			return i;
+
+	errx(EXIT_FAILURE, _("unsupported --setgroups argument '%s'"), str);
+}
+
+static void setgroups_control(int action)
+{
+	const char *file = _PATH_PROC_SETGROUPS;
+	const char *cmd;
+	int fd;
+
+	if (action < 0 || (size_t) action >= ARRAY_SIZE(setgroups_strings))
+		return;
+	cmd = setgroups_strings[action];
+
+	fd = open(file, O_WRONLY);
+	if (fd < 0) {
+		if (errno == ENOENT)
+			return;
+		err(EXIT_FAILURE, _("cannot open %s"), file);
+	}
+
+	if (write_all(fd, cmd, strlen(cmd)))
+		err(EXIT_FAILURE, _("write failed %s"), file);
+	close(fd);
+}
+
+static void map_id(const char *file, uint32_t from, uint32_t to)
+{
+	char *buf;
+	int fd;
+
+	fd = open(file, O_WRONLY);
+	if (fd < 0)
+		 err(EXIT_FAILURE, _("cannot open %s"), file);
+
+	xasprintf(&buf, "%u %u 1", from, to);
+	if (write_all(fd, buf, strlen(buf)))
+		err(EXIT_FAILURE, _("write failed %s"), file);
+	free(buf);
+	close(fd);
+}
+
+static unsigned long parse_propagation(const char *str)
+{
+	size_t i;
+	static const struct prop_opts {
+		const char *name;
+		unsigned long flag;
+	} opts[] = {
+		{ "slave",	MS_REC | MS_SLAVE },
+		{ "private",	MS_REC | MS_PRIVATE },
+		{ "shared",     MS_REC | MS_SHARED },
+		{ "unchanged",        0 }
+	};
+
+	for (i = 0; i < ARRAY_SIZE(opts); i++) {
+		if (strcmp(opts[i].name, str) == 0)
+			return opts[i].flag;
+	}
+
+	errx(EXIT_FAILURE, _("unsupported propagation mode: %s"), str);
+}
+
+static void set_propagation(unsigned long flags)
+{
+	if (flags == 0)
+		return;
+
+	if (mount("none", "/", NULL, flags, NULL) != 0)
+		err(EXIT_FAILURE, _("cannot change root filesystem propagation"));
+}
+
+
+static int set_ns_target(int type, const char *path)
+{
+	struct namespace_file *ns;
+
+	for (ns = namespace_files; ns->name; ns++) {
+		if (ns->type != type)
+			continue;
+		ns->target = path;
+		npersists++;
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int bind_ns_files(pid_t pid)
+{
+	struct namespace_file *ns;
+	char src[PATH_MAX];
+
+	for (ns = namespace_files; ns->name; ns++) {
+		if (!ns->target)
+			continue;
+
+		snprintf(src, sizeof(src), "/proc/%u/%s", (unsigned) pid, ns->name);
+
+		if (mount(src, ns->target, NULL, MS_BIND, NULL) != 0)
+			err(EXIT_FAILURE, _("mount %s on %s failed"), src, ns->target);
+	}
+
+	return 0;
+}
+
+static ino_t get_mnt_ino(pid_t pid)
+{
+	struct stat st;
+	char path[PATH_MAX];
+
+	snprintf(path, sizeof(path), "/proc/%u/ns/mnt", (unsigned) pid);
+
+	if (stat(path, &st) != 0)
+		err(EXIT_FAILURE, _("cannot stat %s"), path);
+	return st.st_ino;
+}
+
+static void bind_ns_files_from_child(pid_t *child, int fds[2])
+{
+	char ch;
+	pid_t ppid = getpid();
+	ino_t ino = get_mnt_ino(ppid);
+
+	if (pipe(fds) < 0)
+		err(EXIT_FAILURE, _("pipe failed"));
+
+	*child = fork();
+
+	switch (*child) {
+	case -1:
+		err(EXIT_FAILURE, _("fork failed"));
+
+	case 0:	/* child */
+		close(fds[1]);
+		fds[1] = -1;
+
+		/* wait for parent */
+		if (read_all(fds[0], &ch, 1) != 1 && ch != PIPE_SYNC_BYTE)
+			err(EXIT_FAILURE, _("failed to read pipe"));
+		if (get_mnt_ino(ppid) == ino)
+			exit(EXIT_FAILURE);
+		bind_ns_files(ppid);
+		exit(EXIT_SUCCESS);
+		break;
+
+	default: /* parent */
+		close(fds[0]);
+		fds[0] = -1;
+		break;
+	}
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+
+	fputs(USAGE_HEADER, out);
+	fprintf(out, _(" %s [options] [<program> [<argument>...]]\n"),
+		program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Run a program with some namespaces unshared from the parent.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -m, --mount[=<file>]      unshare mounts namespace\n"), out);
+	fputs(_(" -u, --uts[=<file>]        unshare UTS namespace (hostname etc)\n"), out);
+	fputs(_(" -i, --ipc[=<file>]        unshare System V IPC namespace\n"), out);
+	fputs(_(" -n, --net[=<file>]        unshare network namespace\n"), out);
+	fputs(_(" -p, --pid[=<file>]        unshare pid namespace\n"), out);
+	fputs(_(" -U, --user[=<file>]       unshare user namespace\n"), out);
+	fputs(_(" -C, --cgroup[=<file>]     unshare cgroup namespace\n"), out);
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_(" -f, --fork                fork before launching <program>\n"), out);
+	fputs(_(" -r, --map-root-user       map current user to root (implies --user)\n"), out);
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_(" --kill-child[=<signame>]  when dying, kill the forked child (implies --fork)\n"
+		"                             defaults to SIGKILL\n"), out);
+	fputs(_(" --mount-proc[=<dir>]      mount proc filesystem first (implies --mount)\n"), out);
+	fputs(_(" --propagation slave|shared|private|unchanged\n"
+	        "                           modify mount propagation in mount namespace\n"), out);
+	fputs(_(" --setgroups allow|deny    control the setgroups syscall in user namespaces\n"), out);
+
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(27));
+	printf(USAGE_MAN_TAIL("unshare(1)"));
+
+	exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char *argv[])
+{
+	enum {
+		OPT_MOUNTPROC = CHAR_MAX + 1,
+		OPT_PROPAGATION,
+		OPT_SETGROUPS,
+		OPT_KILLCHILD
+	};
+	static const struct option longopts[] = {
+		{ "help",          no_argument,       NULL, 'h'             },
+		{ "version",       no_argument,       NULL, 'V'             },
+
+		{ "mount",         optional_argument, NULL, 'm'             },
+		{ "uts",           optional_argument, NULL, 'u'             },
+		{ "ipc",           optional_argument, NULL, 'i'             },
+		{ "net",           optional_argument, NULL, 'n'             },
+		{ "pid",           optional_argument, NULL, 'p'             },
+		{ "user",          optional_argument, NULL, 'U'             },
+		{ "cgroup",        optional_argument, NULL, 'C'             },
+
+		{ "fork",          no_argument,       NULL, 'f'             },
+		{ "kill-child",    optional_argument, NULL, OPT_KILLCHILD   },
+		{ "mount-proc",    optional_argument, NULL, OPT_MOUNTPROC   },
+		{ "map-root-user", no_argument,       NULL, 'r'             },
+		{ "propagation",   required_argument, NULL, OPT_PROPAGATION },
+		{ "setgroups",     required_argument, NULL, OPT_SETGROUPS   },
+		{ NULL, 0, NULL, 0 }
+	};
+
+	int setgrpcmd = SETGROUPS_NONE;
+	int unshare_flags = 0;
+	int c, forkit = 0, maproot = 0;
+	int kill_child_signo = 0; /* 0 means --kill-child was not used */
+	const char *procmnt = NULL;
+	pid_t pid = 0;
+	int fds[2];
+	int status;
+	unsigned long propagation = UNSHARE_PROPAGATION_DEFAULT;
+	uid_t real_euid = geteuid();
+	gid_t real_egid = getegid();
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	while ((c = getopt_long(argc, argv, "+fhVmuinpCUr", longopts, NULL)) != -1) {
+		switch (c) {
+		case 'f':
+			forkit = 1;
+			break;
+		case 'h':
+			usage();
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		case 'm':
+			unshare_flags |= CLONE_NEWNS;
+			if (optarg)
+				set_ns_target(CLONE_NEWNS, optarg);
+			break;
+		case 'u':
+			unshare_flags |= CLONE_NEWUTS;
+			if (optarg)
+				set_ns_target(CLONE_NEWUTS, optarg);
+			break;
+		case 'i':
+			unshare_flags |= CLONE_NEWIPC;
+			if (optarg)
+				set_ns_target(CLONE_NEWIPC, optarg);
+			break;
+		case 'n':
+			unshare_flags |= CLONE_NEWNET;
+			if (optarg)
+				set_ns_target(CLONE_NEWNET, optarg);
+			break;
+		case 'p':
+			unshare_flags |= CLONE_NEWPID;
+			if (optarg)
+				set_ns_target(CLONE_NEWPID, optarg);
+			break;
+		case 'U':
+			unshare_flags |= CLONE_NEWUSER;
+			if (optarg)
+				set_ns_target(CLONE_NEWUSER, optarg);
+			break;
+		case 'C':
+			unshare_flags |= CLONE_NEWCGROUP;
+			if (optarg)
+				set_ns_target(CLONE_NEWCGROUP, optarg);
+			break;
+		case OPT_MOUNTPROC:
+			unshare_flags |= CLONE_NEWNS;
+			procmnt = optarg ? optarg : "/proc";
+			break;
+		case 'r':
+			unshare_flags |= CLONE_NEWUSER;
+			maproot = 1;
+			break;
+		case OPT_SETGROUPS:
+			setgrpcmd = setgroups_str2id(optarg);
+			break;
+		case OPT_PROPAGATION:
+			propagation = parse_propagation(optarg);
+			break;
+		case OPT_KILLCHILD:
+			forkit = 1;
+			if (optarg) {
+				if ((kill_child_signo = signame_to_signum(optarg)) < 0)
+					errx(EXIT_FAILURE, _("unknown signal: %s"),
+					     optarg);
+			} else {
+				kill_child_signo = SIGKILL;
+			}
+			break;
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if (npersists && (unshare_flags & CLONE_NEWNS))
+		bind_ns_files_from_child(&pid, fds);
+
+	if (-1 == unshare(unshare_flags))
+		err(EXIT_FAILURE, _("unshare failed"));
+
+	if (npersists) {
+		if (pid && (unshare_flags & CLONE_NEWNS)) {
+			int rc;
+			char ch = PIPE_SYNC_BYTE;
+
+			/* signal child we are ready */
+			write_all(fds[1], &ch, 1);
+			close(fds[1]);
+			fds[1] = -1;
+
+			/* wait for bind_ns_files_from_child() */
+			do {
+				rc = waitpid(pid, &status, 0);
+				if (rc < 0) {
+					if (errno == EINTR)
+						continue;
+					err(EXIT_FAILURE, _("waitpid failed"));
+				}
+				if (WIFEXITED(status) &&
+				    WEXITSTATUS(status) != EXIT_SUCCESS)
+					return WEXITSTATUS(status);
+			} while (rc < 0);
+		} else
+			/* simple way, just bind */
+			bind_ns_files(getpid());
+	}
+
+	if (forkit) {
+		pid = fork();
+
+		switch(pid) {
+		case -1:
+			err(EXIT_FAILURE, _("fork failed"));
+		case 0:	/* child */
+			break;
+		default: /* parent */
+			if (waitpid(pid, &status, 0) == -1)
+				err(EXIT_FAILURE, _("waitpid failed"));
+			if (WIFEXITED(status))
+				return WEXITSTATUS(status);
+			else if (WIFSIGNALED(status))
+				kill(getpid(), WTERMSIG(status));
+			err(EXIT_FAILURE, _("child exit failed"));
+		}
+	}
+
+	if (kill_child_signo != 0 && prctl(PR_SET_PDEATHSIG, kill_child_signo) < 0)
+		err(EXIT_FAILURE, "prctl failed");
+
+	if (maproot) {
+		if (setgrpcmd == SETGROUPS_ALLOW)
+			errx(EXIT_FAILURE, _("options --setgroups=allow and "
+					"--map-root-user are mutually exclusive"));
+
+		/* since Linux 3.19 unprivileged writing of /proc/self/gid_map
+		 * has s been disabled unless /proc/self/setgroups is written
+		 * first to permanently disable the ability to call setgroups
+		 * in that user namespace. */
+		setgroups_control(SETGROUPS_DENY);
+		map_id(_PATH_PROC_UIDMAP, 0, real_euid);
+		map_id(_PATH_PROC_GIDMAP, 0, real_egid);
+
+	} else if (setgrpcmd != SETGROUPS_NONE)
+		setgroups_control(setgrpcmd);
+
+	if ((unshare_flags & CLONE_NEWNS) && propagation)
+		set_propagation(propagation);
+
+	if (procmnt &&
+	    (mount("none", procmnt, NULL, MS_PRIVATE|MS_REC, NULL) != 0 ||
+	     mount("proc", procmnt, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) != 0))
+			err(EXIT_FAILURE, _("mount %s failed"), procmnt);
+
+	if (optind < argc) {
+		execvp(argv[optind], argv + optind);
+		errexec(argv[optind]);
+	}
+	exec_shell();
+}
diff --git a/sys-utils/wdctl.8 b/sys-utils/wdctl.8
new file mode 100644
index 0000000..7edf808
--- /dev/null
+++ b/sys-utils/wdctl.8
@@ -0,0 +1,70 @@
+.\" wdctl.8 --
+.\" Copyright (C) 2012 Karel Zak <kzak@redhat.com>
+.\" May be distributed under the GNU General Public License
+.TH WDCTL "8" "July 2014" "util-linux" "System Administration"
+.SH NAME
+wdctl \- show hardware watchdog status
+.SH SYNOPSIS
+.B wdctl
+[options]
+.RI [ device ...]
+.SH DESCRIPTION
+Show hardware watchdog status.  The default device is
+.IR /dev/watchdog .
+If more than one device is specified then the output is separated by
+one blank line.
+.PP
+Note that the number of supported watchdog features is hardware specific.
+.SH OPTIONS
+.TP
+.BR \-f , " \-\-flags " \fIlist
+Print only the specified flags.
+.TP
+.BR \-F , " \-\-noflags"
+Do not print information about flags.
+.TP
+.BR \-I , " \-\-noident"
+Do not print watchdog identity information.
+.TP
+.BR \-n , " \-\-noheadings"
+Do not print a header line for flags table.
+.IP "\fB\-o\fR, \fB\-\-output \fIlist\fP"
+Define the output columns to use in table of watchdog flags.  If no
+output arrangement is specified, then a default set is used.  Use
+.B \-\-help
+to get list of all supported columns.
+.TP
+.BR \-O , " \-\-oneline"
+Print all wanted information on one line in key="value" output format.
+.TP
+.BR \-r , " \-\-raw"
+Use the raw output format.
+.TP
+.BR \-s , " \-settimeout " \fIseconds
+Set the watchdog timeout in seconds.
+.TP
+.BR \-T , " \-\-notimeouts"
+Do not print watchdog timeouts.
+.IP "\fB\-x\fR, \fB\-\-flags\-only\fP"
+Same as \fB\-I \-T\fP.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH AUTHORS
+.MT kzak@\:redhat\:.com
+Karel Zak
+.ME
+.br
+.MT lennart@\:poettering\:.net
+Lennart Poettering
+.ME
+.SH AVAILABILITY
+The
+.B wdctl
+command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/wdctl.c b/sys-utils/wdctl.c
new file mode 100644
index 0000000..642db85
--- /dev/null
+++ b/sys-utils/wdctl.c
@@ -0,0 +1,618 @@
+/*
+ * wdctl(8) - show hardware watchdog status
+ *
+ * Copyright (C) 2012 Lennart Poettering
+ * Copyright (C) 2012 Karel Zak <kzak@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include <sys/ioctl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <signal.h>
+#include <assert.h>
+#include <linux/watchdog.h>
+
+#include <libsmartcols.h>
+
+#include "nls.h"
+#include "c.h"
+#include "xalloc.h"
+#include "closestream.h"
+#include "optutils.h"
+#include "pathnames.h"
+#include "strutils.h"
+#include "carefulputc.h"
+
+/*
+ * since 2.6.18
+ */
+#ifndef WDIOC_SETPRETIMEOUT
+# define WDIOC_SETPRETIMEOUT    _IOWR(WATCHDOG_IOCTL_BASE, 8, int)
+# define WDIOC_GETPRETIMEOUT	_IOR(WATCHDOG_IOCTL_BASE, 9, int)
+# define WDIOC_GETTIMELEFT	_IOR(WATCHDOG_IOCTL_BASE, 10, int)
+# define WDIOF_POWEROVER	0x0040	/* Power over voltage */
+# define WDIOF_SETTIMEOUT	0x0080  /* Set timeout (in seconds) */
+# define WDIOF_MAGICCLOSE	0x0100	/* Supports magic close char */
+# define WDIOF_PRETIMEOUT	0x0200  /* Pretimeout (in seconds), get/set */
+# define WDIOF_KEEPALIVEPING	0x8000	/* Keep alive ping reply */
+#endif
+
+/*
+ * since 3.5
+ */
+#ifndef WDIOF_ALARMONLY
+# define WDIOF_ALARMONLY	0x0400	/* Watchdog triggers a management or
+					   other external alarm not a reboot */
+#endif
+
+/* basic output flags */
+static int no_headings;
+static int raw;
+
+struct wdflag {
+	uint32_t	flag;
+	const char	*name;
+	const char	*description;
+};
+
+static const struct wdflag wdflags[] = {
+	{ WDIOF_CARDRESET,     "CARDRESET",  N_("Card previously reset the CPU") },
+	{ WDIOF_EXTERN1,       "EXTERN1",    N_("External relay 1") },
+	{ WDIOF_EXTERN2,       "EXTERN2",    N_("External relay 2") },
+	{ WDIOF_FANFAULT,      "FANFAULT",   N_("Fan failed") },
+	{ WDIOF_KEEPALIVEPING, "KEEPALIVEPING", N_("Keep alive ping reply") },
+	{ WDIOF_MAGICCLOSE,    "MAGICCLOSE", N_("Supports magic close char") },
+	{ WDIOF_OVERHEAT,      "OVERHEAT",   N_("Reset due to CPU overheat") },
+	{ WDIOF_POWEROVER,     "POWEROVER",  N_("Power over voltage") },
+	{ WDIOF_POWERUNDER,    "POWERUNDER", N_("Power bad/power fault") },
+	{ WDIOF_PRETIMEOUT,    "PRETIMEOUT", N_("Pretimeout (in seconds)") },
+	{ WDIOF_SETTIMEOUT,    "SETTIMEOUT", N_("Set timeout (in seconds)") },
+	{ WDIOF_ALARMONLY,     "ALARMONLY",  N_("Not trigger reboot") }
+};
+
+
+/* column names */
+struct colinfo {
+	const char *name; /* header */
+	double	   whint; /* width hint (N < 1 is in percent of termwidth) */
+	int	   flags; /* SCOLS_FL_* */
+	const char *help;
+};
+
+enum { COL_FLAG, COL_DESC, COL_STATUS, COL_BSTATUS, COL_DEVICE };
+
+/* columns descriptions */
+static struct colinfo infos[] = {
+	[COL_FLAG]    = { "FLAG",        14,  0, N_("flag name") },
+	[COL_DESC]    = { "DESCRIPTION", 0.1, SCOLS_FL_TRUNC, N_("flag description") },
+	[COL_STATUS]  = { "STATUS",      1,   SCOLS_FL_RIGHT, N_("flag status") },
+	[COL_BSTATUS] = { "BOOT-STATUS", 1,   SCOLS_FL_RIGHT, N_("flag boot status") },
+	[COL_DEVICE]  = { "DEVICE",      0.1, 0, N_("watchdog device name") }
+
+};
+
+static int columns[ARRAY_SIZE(infos) * 2];
+static int ncolumns;
+
+struct wdinfo {
+	char		*device;
+
+	int		timeout;
+	int		timeleft;
+	int		pretimeout;
+
+	uint32_t	status;
+	uint32_t	bstatus;
+
+	struct watchdog_info ident;
+
+	unsigned int	has_timeout : 1,
+			has_timeleft : 1,
+			has_pretimeout : 1;
+};
+
+/* converts flag name to flag bit */
+static long name2bit(const char *name, size_t namesz)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(wdflags); i++) {
+		const char *cn = wdflags[i].name;
+		if (!strncasecmp(name, cn, namesz) && !*(cn + namesz))
+			return wdflags[i].flag;
+	}
+	warnx(_("unknown flag: %s"), name);
+	return -1;
+}
+
+static int column2id(const char *name, size_t namesz)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(infos); i++) {
+		const char *cn = infos[i].name;
+		if (!strncasecmp(name, cn, namesz) && !*(cn + namesz))
+			return i;
+	}
+	warnx(_("unknown column: %s"), name);
+	return -1;
+}
+
+static int get_column_id(int num)
+{
+	assert(num < ncolumns);
+	assert(columns[num] < (int) ARRAY_SIZE(infos));
+
+	return columns[num];
+}
+
+static struct colinfo *get_column_info(unsigned num)
+{
+	return &infos[ get_column_id(num) ];
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	size_t i;
+
+	fputs(USAGE_HEADER, out);
+	fprintf(out,
+	      _(" %s [options] [<device> ...]\n"), program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Show the status of the hardware watchdog.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -f, --flags <list>     print selected flags only\n"
+		" -F, --noflags          don't print information about flags\n"
+		" -I, --noident          don't print watchdog identity information\n"
+		" -n, --noheadings       don't print headings for flags table\n"
+		" -O, --oneline          print all information on one line\n"
+		" -o, --output <list>    output columns of the flags\n"
+		" -r, --raw              use raw output format for flags table\n"
+		" -T, --notimeouts       don't print watchdog timeouts\n"
+		" -s, --settimeout <sec> set watchdog timeout\n"
+		" -x, --flags-only       print only flags table (same as -I -T)\n"), out);
+
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(24));
+	fputs(USAGE_SEPARATOR, out);
+
+	fprintf(out, _("The default device is %s.\n"), _PATH_WATCHDOG_DEV);
+
+	fputs(USAGE_COLUMNS, out);
+	for (i = 0; i < ARRAY_SIZE(infos); i++)
+		fprintf(out, " %13s  %s\n", infos[i].name, _(infos[i].help));
+
+	printf(USAGE_MAN_TAIL("wdctl(8)"));
+
+	exit(EXIT_SUCCESS);
+}
+
+static void add_flag_line(struct libscols_table *table, struct wdinfo *wd, const struct wdflag *fl)
+{
+	int i;
+	struct libscols_line *line;
+
+	line = scols_table_new_line(table, NULL);
+	if (!line) {
+		warn(_("failed to allocate output line"));
+		return;
+	}
+
+	for (i = 0; i < ncolumns; i++) {
+		const char *str = NULL;
+
+		switch (get_column_id(i)) {
+		case COL_FLAG:
+			str = fl->name;
+			break;
+		case COL_DESC:
+			str = fl->description;
+			break;
+		case COL_STATUS:
+			str = wd->status & fl->flag ? "1" : "0";
+			break;
+		case COL_BSTATUS:
+			str = wd->bstatus & fl->flag ? "1" : "0";
+			break;
+		case COL_DEVICE:
+			str = wd->device;
+			break;
+		default:
+			break;
+		}
+
+		if (str && scols_line_set_data(line, i, str)) {
+			warn(_("failed to add output data"));
+			break;
+		}
+	}
+}
+
+static int show_flags(struct wdinfo *wd, uint32_t wanted)
+{
+	size_t i;
+	int rc = -1;
+	struct libscols_table *table;
+	uint32_t flags;
+
+	scols_init_debug(0);
+
+	/* create output table */
+	table = scols_new_table();
+	if (!table) {
+		warn(_("failed to allocate output table"));
+		return -1;
+	}
+	scols_table_enable_raw(table, raw);
+	scols_table_enable_noheadings(table, no_headings);
+
+	/* define columns */
+	for (i = 0; i < (size_t) ncolumns; i++) {
+		struct colinfo *col = get_column_info(i);
+
+		if (!scols_table_new_column(table, col->name, col->whint, col->flags)) {
+			warnx(_("failed to allocate output column"));
+			goto done;
+		}
+	}
+
+	/* fill-in table with data
+	 * -- one line for each supported flag (option)	 */
+	flags = wd->ident.options;
+
+	for (i = 0; i < ARRAY_SIZE(wdflags); i++) {
+		if (wanted && !(wanted & wdflags[i].flag))
+			; /* ignore */
+		else if (flags & wdflags[i].flag)
+			add_flag_line(table, wd, &wdflags[i]);
+
+		flags &= ~wdflags[i].flag;
+	}
+
+	if (flags)
+		warnx(_("%s: unknown flags 0x%x\n"), wd->device, flags);
+
+	scols_print_table(table);
+	rc = 0;
+done:
+	scols_unref_table(table);
+	return rc;
+}
+/*
+ * Warning: successfully opened watchdog has to be properly closed with magic
+ * close character otherwise the machine will be rebooted!
+ *
+ * Don't use err() or exit() here!
+ */
+static int set_watchdog(struct wdinfo *wd, int timeout)
+{
+	int fd;
+	sigset_t sigs, oldsigs;
+	int rc = 0;
+
+	assert(wd->device);
+
+	sigemptyset(&oldsigs);
+	sigfillset(&sigs);
+	sigprocmask(SIG_BLOCK, &sigs, &oldsigs);
+
+	fd = open(wd->device, O_WRONLY|O_CLOEXEC);
+
+	if (fd < 0) {
+		if (errno == EBUSY)
+			warnx(_("%s: watchdog already in use, terminating."),
+					wd->device);
+		warn(_("cannot open %s"), wd->device);
+		return -1;
+	}
+
+	for (;;) {
+		/* We just opened this to query the state, not to arm
+		 * it hence use the magic close character */
+		static const char v = 'V';
+
+		if (write(fd, &v, 1) >= 0)
+			break;
+		if (errno != EINTR) {
+			warn(_("%s: failed to disarm watchdog"), wd->device);
+			break;
+		}
+		/* Let's try hard, since if we don't get this right
+		 * the machine might end up rebooting. */
+	}
+
+	if (ioctl(fd, WDIOC_SETTIMEOUT, &timeout) != 0) {
+		rc = errno;
+		warn(_("cannot set timeout for %s"), wd->device);
+	}
+
+	if (close(fd))
+		warn(_("write failed"));
+	sigprocmask(SIG_SETMASK, &oldsigs, NULL);
+	printf(P_("Timeout has been set to %d second.\n",
+		  "Timeout has been set to %d seconds.\n", timeout), timeout);
+
+	return rc;
+}
+
+/*
+ * Warning: successfully opened watchdog has to be properly closed with magic
+ * close character otherwise the machine will be rebooted!
+ *
+ * Don't use err() or exit() here!
+ */
+static int read_watchdog(struct wdinfo *wd)
+{
+	int fd;
+	sigset_t sigs, oldsigs;
+
+	assert(wd->device);
+
+	sigemptyset(&oldsigs);
+	sigfillset(&sigs);
+	sigprocmask(SIG_BLOCK, &sigs, &oldsigs);
+
+	fd = open(wd->device, O_WRONLY|O_CLOEXEC);
+
+	if (fd < 0) {
+		if (errno == EBUSY)
+			warnx(_("%s: watchdog already in use, terminating."),
+					wd->device);
+		warn(_("cannot open %s"), wd->device);
+		return -1;
+	}
+
+	if (ioctl(fd, WDIOC_GETSUPPORT, &wd->ident) < 0)
+		warn(_("%s: failed to get information about watchdog"), wd->device);
+	else {
+		ioctl(fd, WDIOC_GETSTATUS, &wd->status);
+		ioctl(fd, WDIOC_GETBOOTSTATUS, &wd->bstatus);
+
+		if (ioctl(fd, WDIOC_GETTIMEOUT, &wd->timeout) >= 0)
+			wd->has_timeout = 1;
+		if (ioctl(fd, WDIOC_GETPRETIMEOUT, &wd->pretimeout) >= 0)
+			wd->has_pretimeout = 1;
+		if (ioctl(fd, WDIOC_GETTIMELEFT, &wd->timeleft) >= 0)
+			wd->has_timeleft = 1;
+	}
+
+	for (;;) {
+		/* We just opened this to query the state, not to arm
+		 * it hence use the magic close character */
+		static const char v = 'V';
+
+		if (write(fd, &v, 1) >= 0)
+			break;
+		if (errno != EINTR) {
+			warn(_("%s: failed to disarm watchdog"), wd->device);
+			break;
+		}
+		/* Let's try hard, since if we don't get this right
+		 * the machine might end up rebooting. */
+	}
+
+	if (close(fd))
+		warn(_("write failed"));
+	sigprocmask(SIG_SETMASK, &oldsigs, NULL);
+
+	return 0;
+}
+
+static void print_oneline(struct wdinfo *wd, uint32_t wanted,
+		int noident, int notimeouts, int noflags)
+{
+	printf("%s:", wd->device);
+
+	if (!noident) {
+		printf(" VERSION=\"%x\"", wd->ident.firmware_version);
+
+		printf(" IDENTITY=");
+		fputs_quoted((char *) wd->ident.identity, stdout);
+	}
+	if (!notimeouts) {
+		if (wd->has_timeout)
+			printf(" TIMEOUT=\"%i\"", wd->timeout);
+		if (wd->has_pretimeout)
+			printf(" PRETIMEOUT=\"%i\"", wd->pretimeout);
+		if (wd->has_timeleft)
+			printf(" TIMELEFT=\"%i\"", wd->timeleft);
+	}
+
+	if (!noflags) {
+		size_t i;
+		uint32_t flags = wd->ident.options;
+
+		for (i = 0; i < ARRAY_SIZE(wdflags); i++) {
+			const struct wdflag *fl;
+
+			if ((wanted && !(wanted & wdflags[i].flag)) ||
+			    !(flags & wdflags[i].flag))
+				continue;
+
+			fl= &wdflags[i];
+
+			printf(" %s=\"%s\"", fl->name,
+					     wd->status & fl->flag ? "1" : "0");
+			printf(" %s_BOOT=\"%s\"", fl->name,
+					     wd->bstatus & fl->flag ? "1" : "0");
+
+		}
+	}
+
+	fputc('\n', stdout);
+}
+
+static void show_timeouts(struct wdinfo *wd)
+{
+	if (wd->has_timeout)
+		printf(P_("%-14s %2i second\n", "%-14s %2i seconds\n", wd->timeout),
+			  _("Timeout:"), wd->timeout);
+	if (wd->has_pretimeout)
+		printf(P_("%-14s %2i second\n", "%-14s %2i seconds\n", wd->pretimeout),
+			  _("Pre-timeout:"), wd->pretimeout);
+	if (wd->has_timeleft)
+		printf(P_("%-14s %2i second\n", "%-14s %2i seconds\n", wd->timeleft),
+			  _("Timeleft:"), wd->timeleft);
+}
+
+int main(int argc, char *argv[])
+{
+	struct wdinfo wd;
+	int c, res = EXIT_SUCCESS, count = 0;
+	char noflags = 0, noident = 0, notimeouts = 0, oneline = 0;
+	uint32_t wanted = 0;
+	int timeout = 0;
+
+	static const struct option long_opts[] = {
+		{ "flags",      required_argument, NULL, 'f' },
+		{ "flags-only", no_argument,       NULL, 'x' },
+		{ "help",	no_argument,       NULL, 'h' },
+		{ "noflags",    no_argument,       NULL, 'F' },
+		{ "noheadings", no_argument,       NULL, 'n' },
+		{ "noident",	no_argument,       NULL, 'I' },
+		{ "notimeouts", no_argument,       NULL, 'T' },
+		{ "settimeout", required_argument, NULL, 's' },
+		{ "output",     required_argument, NULL, 'o' },
+		{ "oneline",    no_argument,       NULL, 'O' },
+		{ "raw",        no_argument,       NULL, 'r' },
+		{ "version",    no_argument,       NULL, 'V' },
+		{ NULL, 0, NULL, 0 }
+	};
+
+	static const ul_excl_t excl[] = {       /* rows and cols in ASCII order */
+		{ 'F','f' },			/* noflags,flags*/
+		{ 0 }
+	};
+	int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	while ((c = getopt_long(argc, argv,
+				"d:f:hFnITo:s:OrVx", long_opts, NULL)) != -1) {
+
+		err_exclusive_options(c, long_opts, excl, excl_st);
+
+		switch(c) {
+		case 'o':
+			ncolumns = string_to_idarray(optarg,
+						     columns, ARRAY_SIZE(columns),
+						     column2id);
+			if (ncolumns < 0)
+				return EXIT_FAILURE;
+			break;
+		case 's':
+			timeout = strtos32_or_err(optarg, _("invalid timeout argument"));
+			break;
+		case 'f':
+			if (string_to_bitmask(optarg, (unsigned long *) &wanted, name2bit) != 0)
+				return EXIT_FAILURE;
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		case 'h':
+			usage();
+		case 'F':
+			noflags = 1;
+			break;
+		case 'I':
+			noident = 1;
+			break;
+		case 'T':
+			notimeouts = 1;
+			break;
+		case 'n':
+			no_headings = 1;
+			break;
+		case 'r':
+			raw = 1;
+			break;
+		case 'O':
+			oneline = 1;
+			break;
+		case 'x':
+			noident = 1;
+			notimeouts = 1;
+			break;
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if (!ncolumns) {
+		/* default columns */
+		columns[ncolumns++] = COL_FLAG;
+		columns[ncolumns++] = COL_DESC;
+		columns[ncolumns++] = COL_STATUS;
+		columns[ncolumns++] = COL_BSTATUS;
+	}
+
+	do {
+		int rc;
+
+		memset(&wd, 0, sizeof(wd));
+
+		if (optind == argc)
+			wd.device = _PATH_WATCHDOG_DEV;
+		else
+			wd.device = argv[optind++];
+
+		if (count)
+			fputc('\n', stdout);
+		count++;
+
+		if (timeout) {
+			rc = set_watchdog(&wd, timeout);
+			if (rc) {
+				res = EXIT_FAILURE;
+			}
+		}
+
+		rc = read_watchdog(&wd);
+		if (rc) {
+			res = EXIT_FAILURE;
+			continue;
+		}
+
+		if (oneline) {
+			print_oneline(&wd, wanted, noident, notimeouts, noflags);
+			continue;
+		}
+
+		/* pretty output */
+		if (!noident) {
+			printf("%-15s%s\n", _("Device:"), wd.device);
+			printf("%-15s%s [%s %x]\n",
+					_("Identity:"),
+					wd.ident.identity,
+					_("version"),
+					wd.ident.firmware_version);
+		}
+		if (!notimeouts)
+			show_timeouts(&wd);
+		if (!noflags)
+			show_flags(&wd, wanted);
+	} while (optind < argc);
+
+	return res;
+}
diff --git a/sys-utils/zramctl.8 b/sys-utils/zramctl.8
new file mode 100644
index 0000000..c6ecdc3
--- /dev/null
+++ b/sys-utils/zramctl.8
@@ -0,0 +1,131 @@
+.TH ZRAMCTL 8 "July 2014" "util-linux" "System Administration"
+.SH NAME
+zramctl \- set up and control zram devices
+.SH SYNOPSIS
+.ad l
+Get info:
+.sp
+.in +5
+.BR zramctl " [options]"
+.sp
+.in -5
+Reset zram:
+.sp
+.in +5
+.B "zramctl \-r"
+.IR zramdev ...
+.sp
+.in -5
+Print name of first unused zram device:
+.sp
+.in +5
+.B "zramctl \-f"
+.sp
+.in -5
+Set up a zram device:
+.sp
+.in +5
+.B zramctl
+.RB [ \-f " | "\fIzramdev\fP ]
+.RB [ \-s
+.IR size ]
+.RB [ \-t
+.IR number ]
+.RB [ \-a
+.IR algorithm ]
+.sp
+.in -5
+.ad b
+.SH DESCRIPTION
+.B zramctl
+is used to quickly set up zram device parameters, to reset zram devices, and to
+query the status of used zram devices.
+.PP
+If no option is given, all non-zero size zram devices are shown.
+.PP
+Note that \fIzramdev\fP node specified on command line has to already exist. The command
+.B zramctl
+creates a new /dev/zram<N> nodes only when \fB\-\-find\fR option specified. It's possible
+(and common) that after system boot /dev/zram<N> nodes are not created yet.
+.SH OPTIONS
+.TP
+.BR \-a , " \-\-algorithm lzo" | lz4 | lz4hc | deflate | 842
+Set the compression algorithm to be used for compressing data in the zram device.
+.TP
+.BR \-f , " \-\-find"
+Find the first unused zram device.  If a \fB\-\-size\fR argument is present, then
+initialize the device.
+.TP
+.BR \-n , " \-\-noheadings"
+Do not print a header line in status output.
+.TP
+.BR \-o , " \-\-output " \fIlist
+Define the status output columns to be used.  If no output arrangement is
+specified, then a default set is used.
+Use \fB\-\-help\fP to get a list of all supported columns.
+.TP
+.B \-\-output\-all
+Output all available columns.
+.TP
+.B \-\-raw
+Use the raw format for status output.
+.TP
+.BR \-r , " \-\-reset"
+Reset the options of the specified zram device(s).  Zram device settings
+can be changed only after a reset.
+.TP
+.BR \-s , " \-\-size " \fIsize
+Create a zram device of the specified \fIsize\fR.
+Zram devices are aligned to memory pages; when the requested \fIsize\fR is
+not a multiple of the page size, it will be rounded up to the next multiple.
+When not otherwise specified, the unit of the \fIsize\fR parameter is bytes.
+.IP
+The \fIsize\fR argument may be followed by the multiplicative suffixes KiB (=1024),
+MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB"
+is optional, e.g., "K" has the same meaning as "KiB") or the suffixes
+KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB and YB.
+.TP
+.BR \-t , " \-\-streams " \fInumber
+Set the maximum number of compression streams that can be used for the device.
+The default is one stream.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+
+.SH RETURN VALUE
+.B zramctl
+returns 0 on success, nonzero on failure.
+
+.SH FILES
+.TP
+.I /dev/zram[0..N]
+zram block devices
+
+.SH EXAMPLE
+The following commands set up a zram device with a size of one gigabyte
+and use it as swap device.
+.nf
+.IP
+# zramctl --find --size 1024M
+/dev/zram0
+# mkswap /dev/zram0
+# swapon /dev/zram0
+ ...
+# swapoff /dev/zram0
+# zramctl --reset /dev/zram0
+.fi
+.SH SEE ALSO
+.UR http://git.\:kernel.\:org\:/cgit\:/linux\:/kernel\:/git\:/torvalds\:/linux.git\:/tree\:/Documentation\:/blockdev\:/zram.txt
+Linux kernel documentation
+.UE .
+.SH AUTHORS
+.nf
+Timofey Titovets <nefelim4ag@gmail.com>
+Karel Zak <kzak@redhat.com>
+.fi
+.SH AVAILABILITY
+The zramctl command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/zramctl.c b/sys-utils/zramctl.c
new file mode 100644
index 0000000..69267c8
--- /dev/null
+++ b/sys-utils/zramctl.c
@@ -0,0 +1,765 @@
+/*
+ * zramctl - control compressed block devices in RAM
+ *
+ * Copyright (c) 2014 Timofey Titovets <Nefelim4ag@gmail.com>
+ * Copyright (C) 2014 Karel Zak <kzak@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <getopt.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <dirent.h>
+
+#include <libsmartcols.h>
+
+#include "c.h"
+#include "nls.h"
+#include "closestream.h"
+#include "strutils.h"
+#include "xalloc.h"
+#include "sysfs.h"
+#include "optutils.h"
+#include "ismounted.h"
+#include "strv.h"
+#include "path.h"
+#include "pathnames.h"
+
+/*#define CONFIG_ZRAM_DEBUG*/
+
+#ifdef CONFIG_ZRAM_DEBUG
+# define DBG(x)	 do { fputs("zram: ", stderr); x; fputc('\n', stderr); } while(0)
+#else
+# define DBG(x)
+#endif
+
+/* status output columns */
+struct colinfo {
+	const char *name;
+	double whint;
+	int flags;
+	const char *help;
+};
+
+enum {
+	COL_NAME = 0,
+	COL_DISKSIZE,
+	COL_ORIG_SIZE,
+	COL_COMP_SIZE,
+	COL_ALGORITHM,
+	COL_STREAMS,
+	COL_ZEROPAGES,
+	COL_MEMTOTAL,
+	COL_MEMLIMIT,
+	COL_MEMUSED,
+	COL_MIGRATED,
+	COL_MOUNTPOINT
+};
+
+static const struct colinfo infos[] = {
+	[COL_NAME]      = { "NAME",      0.25, 0, N_("zram device name") },
+	[COL_DISKSIZE]  = { "DISKSIZE",     5, SCOLS_FL_RIGHT, N_("limit on the uncompressed amount of data") },
+	[COL_ORIG_SIZE] = { "DATA",         5, SCOLS_FL_RIGHT, N_("uncompressed size of stored data") },
+	[COL_COMP_SIZE] = { "COMPR",        5, SCOLS_FL_RIGHT, N_("compressed size of stored data") },
+	[COL_ALGORITHM] = { "ALGORITHM",    3, 0, N_("the selected compression algorithm") },
+	[COL_STREAMS]   = { "STREAMS",      3, SCOLS_FL_RIGHT, N_("number of concurrent compress operations") },
+	[COL_ZEROPAGES] = { "ZERO-PAGES",   3, SCOLS_FL_RIGHT, N_("empty pages with no allocated memory") },
+	[COL_MEMTOTAL]  = { "TOTAL",        5, SCOLS_FL_RIGHT, N_("all memory including allocator fragmentation and metadata overhead") },
+	[COL_MEMLIMIT]  = { "MEM-LIMIT",    5, SCOLS_FL_RIGHT, N_("memory limit used to store compressed data") },
+	[COL_MEMUSED]   = { "MEM-USED",     5, SCOLS_FL_RIGHT, N_("memory zram have been consumed to store compressed data") },
+	[COL_MIGRATED]  = { "MIGRATED",     5, SCOLS_FL_RIGHT, N_("number of objects migrated by compaction") },
+	[COL_MOUNTPOINT]= { "MOUNTPOINT",0.10, SCOLS_FL_TRUNC, N_("where the device is mounted") },
+};
+
+static int columns[ARRAY_SIZE(infos) * 2] = {-1};
+static int ncolumns;
+
+enum {
+	MM_ORIG_DATA_SIZE = 0,
+	MM_COMPR_DATA_SIZE,
+	MM_MEM_USED_TOTAL,
+	MM_MEM_LIMIT,
+	MM_MEM_USED_MAX,
+	MM_ZERO_PAGES,
+	MM_NUM_MIGRATED
+};
+
+static const char *mm_stat_names[] = {
+	[MM_ORIG_DATA_SIZE]  = "orig_data_size",
+	[MM_COMPR_DATA_SIZE] = "compr_data_size",
+	[MM_MEM_USED_TOTAL]  = "mem_used_total",
+	[MM_MEM_LIMIT]       = "mem_limit",
+	[MM_MEM_USED_MAX]    = "mem_used_max",
+	[MM_ZERO_PAGES]      = "zero_pages",
+	[MM_NUM_MIGRATED]    = "num_migrated"
+};
+
+struct zram {
+	char	devname[32];
+	struct	path_cxt *sysfs;	/* device specific sysfs directory */
+	char	**mm_stat;
+
+	unsigned int mm_stat_probed : 1,
+		     control_probed : 1,
+		     has_control : 1;	/* has /sys/class/zram-control/ */
+};
+
+static unsigned int raw, no_headings, inbytes;
+static struct path_cxt *__control;
+
+static int get_column_id(int num)
+{
+	assert(num < ncolumns);
+	assert(columns[num] < (int) ARRAY_SIZE(infos));
+	return columns[num];
+}
+
+static const struct colinfo *get_column_info(int num)
+{
+	return &infos[ get_column_id(num) ];
+}
+
+static int column_name_to_id(const char *name, size_t namesz)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(infos); i++) {
+		const char *cn = infos[i].name;
+
+		if (!strncasecmp(name, cn, namesz) && !*(cn + namesz))
+			return i;
+	}
+	warnx(_("unknown column: %s"), name);
+	return -1;
+}
+
+static void zram_reset_stat(struct zram *z)
+{
+	if (z) {
+		strv_free(z->mm_stat);
+		z->mm_stat = NULL;
+		z->mm_stat_probed = 0;
+	}
+}
+
+static void zram_set_devname(struct zram *z, const char *devname, size_t n)
+{
+	assert(z);
+
+	if (!devname)
+		snprintf(z->devname, sizeof(z->devname), "/dev/zram%zu", n);
+	else {
+		strncpy(z->devname, devname, sizeof(z->devname));
+		z->devname[sizeof(z->devname) - 1] = '\0';
+	}
+
+	DBG(fprintf(stderr, "set devname: %s", z->devname));
+	ul_unref_path(z->sysfs);
+	z->sysfs = NULL;
+	zram_reset_stat(z);
+}
+
+static int zram_get_devnum(struct zram *z)
+{
+	int n;
+
+	assert(z);
+
+	if (sscanf(z->devname, "/dev/zram%d", &n) == 1)
+		return n;
+	return -EINVAL;
+}
+
+static struct zram *new_zram(const char *devname)
+{
+	struct zram *z = xcalloc(1, sizeof(struct zram));
+
+	DBG(fprintf(stderr, "new: %p", z));
+	if (devname)
+		zram_set_devname(z, devname, 0);
+	return z;
+}
+
+static void free_zram(struct zram *z)
+{
+	if (!z)
+		return;
+	DBG(fprintf(stderr, "free: %p", z));
+	ul_unref_path(z->sysfs);
+	zram_reset_stat(z);
+	free(z);
+}
+
+static struct path_cxt *zram_get_sysfs(struct zram *z)
+{
+	assert(z);
+
+	if (!z->sysfs) {
+		dev_t devno = sysfs_devname_to_devno(z->devname);
+		if (!devno)
+			return NULL;
+		z->sysfs = ul_new_sysfs_path(devno, NULL, NULL);
+		if (!z->sysfs)
+			return NULL;
+		if (*z->devname != '/')
+			/* canonicalize the device name according to /sys */
+			sysfs_blkdev_get_path(z->sysfs, z->devname, sizeof(z->devname));
+	}
+
+	return z->sysfs;
+}
+
+static inline int zram_exist(struct zram *z)
+{
+	assert(z);
+
+	errno = 0;
+	if (zram_get_sysfs(z) == NULL) {
+		errno = ENODEV;
+		return 0;
+	}
+
+	DBG(fprintf(stderr, "%s exists", z->devname));
+	return 1;
+}
+
+static int zram_set_u64parm(struct zram *z, const char *attr, uint64_t num)
+{
+	struct path_cxt *sysfs = zram_get_sysfs(z);
+	if (!sysfs)
+		return -EINVAL;
+	DBG(fprintf(stderr, "%s writing %ju to %s", z->devname, num, attr));
+	return ul_path_write_u64(sysfs, num, attr);
+}
+
+static int zram_set_strparm(struct zram *z, const char *attr, const char *str)
+{
+	struct path_cxt *sysfs = zram_get_sysfs(z);
+	if (!sysfs)
+		return -EINVAL;
+	DBG(fprintf(stderr, "%s writing %s to %s", z->devname, str, attr));
+	return ul_path_write_string(sysfs, str, attr);
+}
+
+
+static int zram_used(struct zram *z)
+{
+	uint64_t size;
+	struct path_cxt *sysfs = zram_get_sysfs(z);
+
+	if (sysfs &&
+	    ul_path_read_u64(sysfs, &size, "disksize") == 0 &&
+	    size > 0) {
+
+		DBG(fprintf(stderr, "%s used", z->devname));
+		return 1;
+	}
+	DBG(fprintf(stderr, "%s unused", z->devname));
+	return 0;
+}
+
+static int zram_has_control(struct zram *z)
+{
+	if (!z->control_probed) {
+		z->has_control = access(_PATH_SYS_CLASS "/zram-control/", F_OK) == 0 ? 1 : 0;
+		z->control_probed = 1;
+		DBG(fprintf(stderr, "zram-control: %s", z->has_control ? "yes" : "no"));
+	}
+
+	return z->has_control;
+}
+
+static struct path_cxt *zram_get_control(void)
+{
+	if (!__control)
+		__control = ul_new_path(_PATH_SYS_CLASS "/zram-control");
+	return __control;
+}
+
+static int zram_control_add(struct zram *z)
+{
+	int n;
+	struct path_cxt *ctl;
+
+	if (!zram_has_control(z) || !(ctl = zram_get_control()))
+		return -ENOSYS;
+
+	if (ul_path_read_s32(ctl, &n, "hot_add") != 0 || n < 0)
+		return n;
+
+	DBG(fprintf(stderr, "hot-add: %d", n));
+	zram_set_devname(z, NULL, n);
+	return 0;
+}
+
+static int zram_control_remove(struct zram *z)
+{
+	struct path_cxt *ctl;
+	int n;
+
+	if (!zram_has_control(z) || !(ctl = zram_get_control()))
+		return -ENOSYS;
+
+	n = zram_get_devnum(z);
+	if (n < 0)
+		return n;
+
+	DBG(fprintf(stderr, "hot-remove: %d", n));
+	return ul_path_write_u64(ctl, n, "hot_remove");
+}
+
+static struct zram *find_free_zram(void)
+{
+	struct zram *z = new_zram(NULL);
+	size_t i;
+	int isfree = 0;
+
+	for (i = 0; isfree == 0; i++) {
+		DBG(fprintf(stderr, "find free: checking zram%zu", i));
+		zram_set_devname(z, NULL, i);
+		if (!zram_exist(z) && zram_control_add(z) != 0)
+			break;
+		isfree = !zram_used(z);
+	}
+	if (!isfree) {
+		free_zram(z);
+		z = NULL;
+	}
+	return z;
+}
+
+static char *get_mm_stat(struct zram *z, size_t idx, int bytes)
+{
+	struct path_cxt *sysfs;
+	const char *name;
+	char *str = NULL;
+	uint64_t num;
+
+	assert(idx < ARRAY_SIZE(mm_stat_names));
+	assert(z);
+
+	sysfs = zram_get_sysfs(z);
+	if (!sysfs)
+		return NULL;
+
+	/* Linux >= 4.1 uses /sys/block/zram<id>/mm_stat */
+	if (!z->mm_stat && !z->mm_stat_probed) {
+		if (ul_path_read_string(sysfs, &str, "mm_stat") > 0 && str) {
+			z->mm_stat = strv_split(str, " ");
+
+			/* make sure kernel provides mm_stat as expected */
+			if (strv_length(z->mm_stat) < ARRAY_SIZE(mm_stat_names)) {
+				strv_free(z->mm_stat);
+				z->mm_stat = NULL;
+			}
+		}
+		z->mm_stat_probed = 1;
+		free(str);
+		str = NULL;
+	}
+
+	if (z->mm_stat) {
+		if (bytes)
+			return xstrdup(z->mm_stat[idx]);
+
+		num = strtou64_or_err(z->mm_stat[idx], _("Failed to parse mm_stat"));
+		return size_to_human_string(SIZE_SUFFIX_1LETTER, num);
+	}
+
+	/* Linux < 4.1 uses /sys/block/zram<id>/<attrname> */
+	name = mm_stat_names[idx];
+	if (bytes) {
+		ul_path_read_string(sysfs, &str, name);
+		return str;
+
+	} else if (ul_path_read_u64(sysfs, &num, name) == 0)
+		return size_to_human_string(SIZE_SUFFIX_1LETTER, num);
+
+	return NULL;
+}
+
+static void fill_table_row(struct libscols_table *tb, struct zram *z)
+{
+	static struct libscols_line *ln;
+	struct path_cxt *sysfs;
+	size_t i;
+	uint64_t num;
+
+	assert(tb);
+	assert(z);
+
+	DBG(fprintf(stderr, "%s: filling status table", z->devname));
+
+	sysfs = zram_get_sysfs(z);
+	if (!sysfs)
+		return;
+
+	ln = scols_table_new_line(tb, NULL);
+	if (!ln)
+		err(EXIT_FAILURE, _("failed to allocate output line"));
+
+	for (i = 0; i < (size_t) ncolumns; i++) {
+		char *str = NULL;
+
+		switch (get_column_id(i)) {
+		case COL_NAME:
+			str = xstrdup(z->devname);
+			break;
+		case COL_DISKSIZE:
+			if (inbytes)
+				ul_path_read_string(sysfs, &str, "disksize");
+
+			else if (ul_path_read_u64(sysfs, &num, "disksize") == 0)
+				str = size_to_human_string(SIZE_SUFFIX_1LETTER, num);
+			break;
+		case COL_ALGORITHM:
+		{
+			char *alg = NULL;
+
+			ul_path_read_string(sysfs, &alg, "comp_algorithm");
+			if (alg) {
+				char* lbr = strrchr(alg, '[');
+				char* rbr = strrchr(alg, ']');
+
+				if (lbr != NULL && rbr != NULL && rbr - lbr > 1)
+					str = xstrndup(lbr + 1, rbr - lbr - 1);
+				free(alg);
+			}
+			break;
+		}
+		case COL_MOUNTPOINT:
+		{
+			char path[PATH_MAX] = { '\0' };
+			int fl;
+
+			check_mount_point(z->devname, &fl, path, sizeof(path));
+			if (*path)
+				str = xstrdup(path);
+			break;
+		}
+		case COL_STREAMS:
+			ul_path_read_string(sysfs, &str, "max_comp_streams");
+			break;
+		case COL_ZEROPAGES:
+			str = get_mm_stat(z, MM_ZERO_PAGES, 1);
+			break;
+		case COL_ORIG_SIZE:
+			str = get_mm_stat(z, MM_ORIG_DATA_SIZE, inbytes);
+			break;
+		case COL_COMP_SIZE:
+			str = get_mm_stat(z, MM_COMPR_DATA_SIZE, inbytes);
+			break;
+		case COL_MEMTOTAL:
+			str = get_mm_stat(z, MM_MEM_USED_TOTAL, inbytes);
+			break;
+		case COL_MEMLIMIT:
+			str = get_mm_stat(z, MM_MEM_LIMIT, inbytes);
+			break;
+		case COL_MEMUSED:
+			str = get_mm_stat(z, MM_MEM_USED_MAX, inbytes);
+			break;
+		case COL_MIGRATED:
+			str = get_mm_stat(z, MM_NUM_MIGRATED, inbytes);
+			break;
+		}
+		if (str && scols_line_refer_data(ln, i, str))
+			err(EXIT_FAILURE, _("failed to add output data"));
+	}
+}
+
+static void status(struct zram *z)
+{
+	struct libscols_table *tb;
+	size_t i;
+	DIR *dir;
+	struct dirent *d;
+
+	scols_init_debug(0);
+
+	tb = scols_new_table();
+	if (!tb)
+		err(EXIT_FAILURE, _("failed to allocate output table"));
+
+	scols_table_enable_raw(tb, raw);
+	scols_table_enable_noheadings(tb, no_headings);
+
+	for (i = 0; i < (size_t) ncolumns; i++) {
+		const struct colinfo *col = get_column_info(i);
+
+		if (!scols_table_new_column(tb, col->name, col->whint, col->flags))
+			err(EXIT_FAILURE, _("failed to initialize output column"));
+	}
+
+	if (z) {
+		/* just one device specified */
+		fill_table_row(tb, z);
+		goto print_table;
+	}
+
+	/* list all used devices */
+	z = new_zram(NULL);
+	if (!(dir = opendir(_PATH_DEV)))
+		err(EXIT_FAILURE, _("cannot open %s"), _PATH_DEV);
+
+	while ((d = readdir(dir))) {
+		int n;
+		if (sscanf(d->d_name, "zram%d", &n) != 1)
+			continue;
+		zram_set_devname(z, NULL, n);
+		if (zram_exist(z) && zram_used(z))
+			fill_table_row(tb, z);
+	}
+	closedir(dir);
+	free_zram(z);
+
+print_table:
+	scols_print_table(tb);
+	scols_unref_table(tb);
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+	FILE *out = stdout;
+	size_t i;
+
+	fputs(USAGE_HEADER, out);
+	fprintf(out, _(	" %1$s [options] <device>\n"
+			" %1$s -r <device> [...]\n"
+			" %1$s [options] -f | <device> -s <size>\n"),
+			program_invocation_short_name);
+
+	fputs(USAGE_SEPARATOR, out);
+	fputs(_("Set up and control zram devices.\n"), out);
+
+	fputs(USAGE_OPTIONS, out);
+	fputs(_(" -a, --algorithm lzo|lz4|lz4hc|deflate|842   compression algorithm to use\n"), out);
+	fputs(_(" -b, --bytes               print sizes in bytes rather than in human readable format\n"), out);
+	fputs(_(" -f, --find                find a free device\n"), out);
+	fputs(_(" -n, --noheadings          don't print headings\n"), out);
+	fputs(_(" -o, --output <list>       columns to use for status output\n"), out);
+	fputs(_("     --output-all          output all columns\n"), out);
+	fputs(_("     --raw                 use raw status output format\n"), out);
+	fputs(_(" -r, --reset               reset all specified devices\n"), out);
+	fputs(_(" -s, --size <size>         device size\n"), out);
+	fputs(_(" -t, --streams <number>    number of compression streams\n"), out);
+
+	fputs(USAGE_SEPARATOR, out);
+	printf(USAGE_HELP_OPTIONS(27));
+
+	fputs(USAGE_COLUMNS, out);
+	for (i = 0; i < ARRAY_SIZE(infos); i++)
+		fprintf(out, " %11s  %s\n", infos[i].name, _(infos[i].help));
+
+	printf(USAGE_MAN_TAIL("zramctl(8)"));
+	exit(EXIT_SUCCESS);
+}
+
+/* actions */
+enum {
+	A_NONE = 0,
+	A_STATUS,
+	A_CREATE,
+	A_FINDONLY,
+	A_RESET
+};
+
+int main(int argc, char **argv)
+{
+	uintmax_t size = 0, nstreams = 0;
+	char *algorithm = NULL;
+	int rc = 0, c, find = 0, act = A_NONE;
+	struct zram *zram = NULL;
+
+	enum {
+		OPT_RAW = CHAR_MAX + 1,
+		OPT_LIST_TYPES
+	};
+
+	static const struct option longopts[] = {
+		{ "algorithm", required_argument, NULL, 'a' },
+		{ "bytes",     no_argument, NULL, 'b' },
+		{ "find",      no_argument, NULL, 'f' },
+		{ "help",      no_argument, NULL, 'h' },
+		{ "output",    required_argument, NULL, 'o' },
+		{ "output-all",no_argument, NULL, OPT_LIST_TYPES },
+		{ "noheadings",no_argument, NULL, 'n' },
+		{ "reset",     no_argument, NULL, 'r' },
+		{ "raw",       no_argument, NULL, OPT_RAW },
+		{ "size",      required_argument, NULL, 's' },
+		{ "streams",   required_argument, NULL, 't' },
+		{ "version",   no_argument, NULL, 'V' },
+		{ NULL, 0, NULL, 0 }
+	};
+
+	static const ul_excl_t excl[] = {
+		{ 'f', 'o', 'r' },
+		{ 'o', 'r', 's' },
+		{ 0 }
+	};
+	int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+	setlocale(LC_ALL, "");
+	bindtextdomain(PACKAGE, LOCALEDIR);
+	textdomain(PACKAGE);
+	atexit(close_stdout);
+
+	while ((c = getopt_long(argc, argv, "a:bfho:nrs:t:V", longopts, NULL)) != -1) {
+
+		err_exclusive_options(c, longopts, excl, excl_st);
+
+		switch (c) {
+		case 'a':
+			algorithm = optarg;
+			break;
+		case 'b':
+			inbytes = 1;
+			break;
+		case 'f':
+			find = 1;
+			break;
+		case 'o':
+			ncolumns = string_to_idarray(optarg,
+						     columns, ARRAY_SIZE(columns),
+						     column_name_to_id);
+			if (ncolumns < 0)
+				return EXIT_FAILURE;
+			break;
+		case OPT_LIST_TYPES:
+			for (ncolumns = 0; (size_t)ncolumns < ARRAY_SIZE(infos); ncolumns++)
+				columns[ncolumns] = ncolumns;
+			break;
+		case 's':
+			size = strtosize_or_err(optarg, _("failed to parse size"));
+			act = A_CREATE;
+			break;
+		case 't':
+			nstreams = strtou64_or_err(optarg, _("failed to parse streams"));
+			break;
+		case 'r':
+			act = A_RESET;
+			break;
+		case OPT_RAW:
+			raw = 1;
+			break;
+		case 'n':
+			no_headings = 1;
+			break;
+		case 'V':
+			printf(UTIL_LINUX_VERSION);
+			return EXIT_SUCCESS;
+		case 'h':
+			usage();
+		default:
+			errtryhelp(EXIT_FAILURE);
+		}
+	}
+
+	if (find && optind < argc)
+		errx(EXIT_FAILURE, _("option --find is mutually exclusive "
+				     "with <device>"));
+	if (act == A_NONE)
+		act = find ? A_FINDONLY : A_STATUS;
+
+	if (act != A_RESET && optind + 1 < argc)
+		errx(EXIT_FAILURE, _("only one <device> at a time is allowed"));
+
+	if ((act == A_STATUS || act == A_FINDONLY) && (algorithm || nstreams))
+		errx(EXIT_FAILURE, _("options --algorithm and --streams "
+				     "must be combined with --size"));
+
+	ul_path_init_debug();
+	ul_sysfs_init_debug();
+
+	switch (act) {
+	case A_STATUS:
+		if (!ncolumns) {		/* default columns */
+			columns[ncolumns++] = COL_NAME;
+			columns[ncolumns++] = COL_ALGORITHM;
+			columns[ncolumns++] = COL_DISKSIZE;
+			columns[ncolumns++] = COL_ORIG_SIZE;
+			columns[ncolumns++] = COL_COMP_SIZE;
+			columns[ncolumns++] = COL_MEMTOTAL;
+			columns[ncolumns++] = COL_STREAMS;
+			columns[ncolumns++] = COL_MOUNTPOINT;
+		}
+		if (optind < argc) {
+			zram = new_zram(argv[optind++]);
+			if (!zram_exist(zram))
+				err(EXIT_FAILURE, "%s", zram->devname);
+		}
+		status(zram);
+		free_zram(zram);
+		break;
+	case A_RESET:
+		if (optind == argc)
+			errx(EXIT_FAILURE, _("no device specified"));
+		while (optind < argc) {
+			zram = new_zram(argv[optind]);
+			if (!zram_exist(zram)
+			    || zram_set_u64parm(zram, "reset", 1)) {
+				warn(_("%s: failed to reset"), zram->devname);
+				rc = 1;
+			}
+			zram_control_remove(zram);
+			free_zram(zram);
+			optind++;
+		}
+		break;
+	case A_FINDONLY:
+		zram = find_free_zram();
+		if (!zram)
+			errx(EXIT_FAILURE, _("no free zram device found"));
+		printf("%s\n", zram->devname);
+		free_zram(zram);
+		break;
+	case A_CREATE:
+		if (find) {
+			zram = find_free_zram();
+			if (!zram)
+				errx(EXIT_FAILURE, _("no free zram device found"));
+		} else if (optind == argc)
+			errx(EXIT_FAILURE, _("no device specified"));
+		else {
+			zram = new_zram(argv[optind]);
+			if (!zram_exist(zram))
+				err(EXIT_FAILURE, "%s", zram->devname);
+		}
+
+		if (zram_set_u64parm(zram, "reset", 1))
+			err(EXIT_FAILURE, _("%s: failed to reset"), zram->devname);
+
+		if (nstreams &&
+		    zram_set_u64parm(zram, "max_comp_streams", nstreams))
+			err(EXIT_FAILURE, _("%s: failed to set number of streams"), zram->devname);
+
+		if (algorithm &&
+		    zram_set_strparm(zram, "comp_algorithm", algorithm))
+			err(EXIT_FAILURE, _("%s: failed to set algorithm"), zram->devname);
+
+		if (zram_set_u64parm(zram, "disksize", size))
+			err(EXIT_FAILURE, _("%s: failed to set disksize (%ju bytes)"),
+				zram->devname, size);
+		if (find)
+			printf("%s\n", zram->devname);
+		free_zram(zram);
+		break;
+	}
+
+	ul_unref_path(__control);
+	return rc ? EXIT_FAILURE : EXIT_SUCCESS;
+}