summaryrefslogtreecommitdiffstats
path: root/sys-utils
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-06 02:42:50 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-06 02:42:50 +0000
commit8cb83eee5a58b1fad74c34094ce3afb9e430b5a4 (patch)
treea9b2e7baeca1be40eb734371e3c8b11b02294497 /sys-utils
parentInitial commit. (diff)
downloadutil-linux-8cb83eee5a58b1fad74c34094ce3afb9e430b5a4.tar.xz
util-linux-8cb83eee5a58b1fad74c34094ce3afb9e430b5a4.zip
Adding upstream version 2.33.1.upstream/2.33.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--sys-utils/Makemodule.am470
-rw-r--r--sys-utils/adjtime_config.564
-rw-r--r--sys-utils/blkdiscard.885
-rw-r--r--sys-utils/blkdiscard.c254
-rw-r--r--sys-utils/blkzone.8109
-rw-r--r--sys-utils/blkzone.c416
-rw-r--r--sys-utils/chcpu.8106
-rw-r--r--sys-utils/chcpu.c389
-rw-r--r--sys-utils/chmem.8114
-rw-r--r--sys-utils/chmem.c453
-rw-r--r--sys-utils/choom.182
-rw-r--r--sys-utils/choom.c159
-rw-r--r--sys-utils/ctrlaltdel.858
-rw-r--r--sys-utils/ctrlaltdel.c114
-rw-r--r--sys-utils/dmesg.1256
-rw-r--r--sys-utils/dmesg.c1547
-rw-r--r--sys-utils/eject.1187
-rw-r--r--sys-utils/eject.c1044
-rw-r--r--sys-utils/fallocate.1191
-rw-r--r--sys-utils/fallocate.c412
-rw-r--r--sys-utils/flock.1197
-rw-r--r--sys-utils/flock.c380
-rw-r--r--sys-utils/fsfreeze.889
-rw-r--r--sys-utils/fsfreeze.c152
-rw-r--r--sys-utils/fstab.5248
-rw-r--r--sys-utils/fstrim.8131
-rw-r--r--sys-utils/fstrim.c417
-rw-r--r--sys-utils/fstrim.service.in7
-rw-r--r--sys-utils/fstrim.timer11
-rw-r--r--sys-utils/hwclock-cmos.c420
-rw-r--r--sys-utils/hwclock-rtc.c448
-rw-r--r--sys-utils/hwclock.8998
-rw-r--r--sys-utils/hwclock.8.in998
-rw-r--r--sys-utils/hwclock.c1551
-rw-r--r--sys-utils/hwclock.h80
-rw-r--r--sys-utils/ipcmk.154
-rw-r--r--sys-utils/ipcmk.c163
-rw-r--r--sys-utils/ipcrm.1117
-rw-r--r--sys-utils/ipcrm.c423
-rw-r--r--sys-utils/ipcs.1116
-rw-r--r--sys-utils/ipcs.c668
-rw-r--r--sys-utils/ipcutils.c533
-rw-r--r--sys-utils/ipcutils.h187
-rw-r--r--sys-utils/ldattach.8155
-rw-r--r--sys-utils/ldattach.c489
-rw-r--r--sys-utils/losetup.8208
-rw-r--r--sys-utils/losetup.c917
-rw-r--r--sys-utils/lscpu-arm.c252
-rw-r--r--sys-utils/lscpu-dmi.c305
-rw-r--r--sys-utils/lscpu.1184
-rw-r--r--sys-utils/lscpu.c2134
-rw-r--r--sys-utils/lscpu.h194
-rw-r--r--sys-utils/lsipc.1139
-rw-r--r--sys-utils/lsipc.c1338
-rw-r--r--sys-utils/lsmem.199
-rw-r--r--sys-utils/lsmem.c747
-rw-r--r--sys-utils/lsns.893
-rw-r--r--sys-utils/lsns.c1100
-rw-r--r--sys-utils/mount.82589
-rw-r--r--sys-utils/mount.c918
-rw-r--r--sys-utils/mountpoint.158
-rw-r--r--sys-utils/mountpoint.c203
-rw-r--r--sys-utils/nsenter.1269
-rw-r--r--sys-utils/nsenter.c484
-rw-r--r--sys-utils/pivot_root.875
-rw-r--r--sys-utils/pivot_root.c80
-rw-r--r--sys-utils/prlimit.1120
-rw-r--r--sys-utils/prlimit.c646
-rw-r--r--sys-utils/readprofile.8153
-rw-r--r--sys-utils/readprofile.c407
-rw-r--r--sys-utils/renice.1119
-rw-r--r--sys-utils/renice.c196
-rw-r--r--sys-utils/rfkill.8120
-rw-r--r--sys-utils/rfkill.c751
-rw-r--r--sys-utils/rtcwake.8189
-rw-r--r--sys-utils/rtcwake.8.in189
-rw-r--r--sys-utils/rtcwake.c655
-rw-r--r--sys-utils/setarch.8143
-rw-r--r--sys-utils/setarch.c446
-rw-r--r--sys-utils/setpriv.1222
-rw-r--r--sys-utils/setpriv.c1096
-rw-r--r--sys-utils/setsid.142
-rw-r--r--sys-utils/setsid.c123
-rw-r--r--sys-utils/swapoff.81
-rw-r--r--sys-utils/swapoff.c253
-rw-r--r--sys-utils/swapon-common.c117
-rw-r--r--sys-utils/swapon-common.h25
-rw-r--r--sys-utils/swapon.8256
-rw-r--r--sys-utils/swapon.c1017
-rw-r--r--sys-utils/switch_root.861
-rw-r--r--sys-utils/switch_root.c263
-rw-r--r--sys-utils/tunelp.8122
-rw-r--r--sys-utils/tunelp.c321
-rw-r--r--sys-utils/umount.8267
-rw-r--r--sys-utils/umount.c610
-rw-r--r--sys-utils/unshare.1266
-rw-r--r--sys-utils/unshare.c484
-rw-r--r--sys-utils/wdctl.870
-rw-r--r--sys-utils/wdctl.c618
-rw-r--r--sys-utils/zramctl.8131
-rw-r--r--sys-utils/zramctl.c765
101 files changed, 39192 insertions, 0 deletions
diff --git a/sys-utils/Makemodule.am b/sys-utils/Makemodule.am
new file mode 100644
index 0000000..825a733
--- /dev/null
+++ b/sys-utils/Makemodule.am
@@ -0,0 +1,470 @@
+if BUILD_LSMEM
+usrbin_exec_PROGRAMS += lsmem
+dist_man_MANS += sys-utils/lsmem.1
+lsmem_SOURCES = sys-utils/lsmem.c
+lsmem_LDADD = $(LDADD) libcommon.la libsmartcols.la
+lsmem_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir)
+endif
+
+if BUILD_CHMEM
+usrbin_exec_PROGRAMS += chmem
+dist_man_MANS += sys-utils/chmem.8
+chmem_SOURCES = sys-utils/chmem.c
+chmem_LDADD = $(LDADD) libcommon.la
+endif
+
+if BUILD_FLOCK
+usrbin_exec_PROGRAMS += flock
+dist_man_MANS += sys-utils/flock.1
+flock_SOURCES = sys-utils/flock.c lib/monotonic.c lib/timer.c
+flock_LDADD = $(LDADD) libcommon.la $(REALTIME_LIBS)
+endif
+
+if BUILD_CHOOM
+usrbin_exec_PROGRAMS += choom
+dist_man_MANS += sys-utils/choom.1
+choom_SOURCES = sys-utils/choom.c
+choom_LDADD = $(LDADD) libcommon.la
+endif
+
+if BUILD_IPCMK
+usrbin_exec_PROGRAMS += ipcmk
+dist_man_MANS += sys-utils/ipcmk.1
+ipcmk_SOURCES = sys-utils/ipcmk.c
+ipcmk_LDADD = $(LDADD) libcommon.la
+endif
+
+if BUILD_IPCRM
+usrbin_exec_PROGRAMS += ipcrm
+dist_man_MANS += sys-utils/ipcrm.1
+ipcrm_SOURCES = sys-utils/ipcrm.c
+ipcrm_LDADD = $(LDADD) libcommon.la
+endif
+
+if BUILD_IPCS
+usrbin_exec_PROGRAMS += ipcs
+dist_man_MANS += sys-utils/ipcs.1
+ipcs_SOURCES = sys-utils/ipcs.c \
+ sys-utils/ipcutils.c \
+ sys-utils/ipcutils.h
+ipcs_LDADD = $(LDADD) libcommon.la
+endif
+
+if BUILD_LSIPC
+usrbin_exec_PROGRAMS += lsipc
+dist_man_MANS += sys-utils/lsipc.1
+lsipc_SOURCES = sys-utils/lsipc.c \
+ sys-utils/ipcutils.c \
+ sys-utils/ipcutils.h
+lsipc_LDADD = $(LDADD) libcommon.la libsmartcols.la
+lsipc_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir)
+endif
+
+if BUILD_RENICE
+usrbin_exec_PROGRAMS += renice
+dist_man_MANS += sys-utils/renice.1
+renice_SOURCES = sys-utils/renice.c
+endif
+
+if BUILD_RFKILL
+usrsbin_exec_PROGRAMS += rfkill
+dist_man_MANS += sys-utils/rfkill.8
+rfkill_SOURCES = sys-utils/rfkill.c
+rfkill_LDADD = $(LDADD) libcommon.la libsmartcols.la
+rfkill_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir)
+endif
+
+if BUILD_SETSID
+usrbin_exec_PROGRAMS += setsid
+dist_man_MANS += sys-utils/setsid.1
+setsid_SOURCES = sys-utils/setsid.c
+endif
+
+if BUILD_READPROFILE
+usrsbin_exec_PROGRAMS += readprofile
+dist_man_MANS += sys-utils/readprofile.8
+readprofile_SOURCES = sys-utils/readprofile.c
+endif
+
+if BUILD_TUNELP
+usrsbin_exec_PROGRAMS += tunelp
+dist_man_MANS += sys-utils/tunelp.8
+tunelp_SOURCES = sys-utils/tunelp.c
+tunelp_LDADD = $(LDADD) libcommon.la
+endif
+
+if BUILD_FSTRIM
+sbin_PROGRAMS += fstrim
+dist_man_MANS += sys-utils/fstrim.8
+fstrim_SOURCES = sys-utils/fstrim.c
+fstrim_LDADD = $(LDADD) libcommon.la libmount.la
+fstrim_CFLAGS = $(AM_CFLAGS) -I$(ul_libmount_incdir)
+if HAVE_SYSTEMD
+systemdsystemunit_DATA += \
+ sys-utils/fstrim.service \
+ sys-utils/fstrim.timer
+endif
+endif # BUILD_FSTRIM
+
+PATHFILES += sys-utils/fstrim.service
+EXTRA_DIST += sys-utils/fstrim.timer
+
+if BUILD_DMESG
+bin_PROGRAMS += dmesg
+dist_man_MANS += sys-utils/dmesg.1
+dmesg_SOURCES = sys-utils/dmesg.c lib/monotonic.c
+dmesg_LDADD = $(LDADD) libcommon.la libtcolors.la $(REALTIME_LIBS)
+dmesg_CFLAGS = $(AM_CFLAGS)
+check_PROGRAMS += test_dmesg
+test_dmesg_SOURCES = $(dmesg_SOURCES)
+test_dmesg_LDADD = $(dmesg_LDADD)
+test_dmesg_CFLAGS = -DTEST_DMESG $(dmesg_CFLAGS)
+endif
+
+if BUILD_CTRLALTDEL
+sbin_PROGRAMS += ctrlaltdel
+dist_man_MANS += sys-utils/ctrlaltdel.8
+ctrlaltdel_SOURCES = sys-utils/ctrlaltdel.c
+ctrlaltdel_LDADD = $(LDADD) libcommon.la
+endif
+
+if BUILD_FSFREEZE
+sbin_PROGRAMS += fsfreeze
+dist_man_MANS += sys-utils/fsfreeze.8
+fsfreeze_SOURCES = sys-utils/fsfreeze.c
+endif
+
+if BUILD_BLKDISCARD
+sbin_PROGRAMS += blkdiscard
+dist_man_MANS += sys-utils/blkdiscard.8
+blkdiscard_SOURCES = sys-utils/blkdiscard.c lib/monotonic.c
+blkdiscard_LDADD = $(LDADD) libcommon.la $(REALTIME_LIBS)
+endif
+
+if BUILD_BLKZONE
+sbin_PROGRAMS += blkzone
+dist_man_MANS += sys-utils/blkzone.8
+blkzone_SOURCES = sys-utils/blkzone.c
+blkzone_LDADD = $(LDADD) libcommon.la
+endif
+
+if BUILD_LDATTACH
+usrsbin_exec_PROGRAMS += ldattach
+dist_man_MANS += sys-utils/ldattach.8
+ldattach_SOURCES = sys-utils/ldattach.c
+ldattach_LDADD = $(LDADD) libcommon.la
+endif
+
+if BUILD_RTCWAKE
+usrsbin_exec_PROGRAMS += rtcwake
+dist_man_MANS += sys-utils/rtcwake.8
+PATHFILES += sys-utils/rtcwake.8
+rtcwake_SOURCES = sys-utils/rtcwake.c
+rtcwake_LDADD = $(LDADD) libcommon.la
+endif
+
+if BUILD_SETARCH
+usrbin_exec_PROGRAMS += setarch
+dist_man_MANS += sys-utils/setarch.8
+setarch_SOURCES = sys-utils/setarch.c
+
+SETARCH_LINKS = uname26 linux32 linux64
+
+if ARCH_S390
+SETARCH_LINKS += s390 s390x
+endif
+if ARCH_I86
+SETARCH_LINKS += i386
+endif
+if ARCH_86_64
+SETARCH_LINKS += i386 x86_64
+endif
+if ARCH_PPC
+SETARCH_LINKS += ppc ppc64 ppc32
+endif
+if ARCH_SPARC
+SETARCH_LINKS += sparc sparc64 sparc32 sparc32bash
+endif
+if ARCH_MIPS
+SETARCH_LINKS += mips mips64 mips32
+endif
+if ARCH_IA64
+SETARCH_LINKS += i386 ia64
+endif
+if ARCH_HPPA
+SETARCH_LINKS += parisc parisc64 parisc32
+endif
+
+SETARCH_MAN_LINKS = $(addprefix sys-utils/,$(SETARCH_LINKS:=.8))
+man_MANS += $(SETARCH_MAN_LINKS)
+CLEANFILES += $(SETARCH_MAN_LINKS)
+
+$(SETARCH_MAN_LINKS):
+ $(AM_V_at) $(MKDIR_P) sys-utils
+ $(AM_V_GEN)echo ".so man8/setarch.8" > $@
+
+install-exec-hook-setarch:
+ for I in $(SETARCH_LINKS); do \
+ cd $(DESTDIR)$(usrbin_execdir) && ln -sf setarch $$I ; \
+ done
+
+uninstall-hook-setarch:
+ for I in $(SETARCH_LINKS); do \
+ rm -f $(DESTDIR)$(usrbin_execdir)/$$I ; \
+ done
+
+INSTALL_EXEC_HOOKS += install-exec-hook-setarch
+UNINSTALL_HOOKS += uninstall-hook-setarch
+
+endif # BUILD_SETARCH
+
+
+if BUILD_EJECT
+usrbin_exec_PROGRAMS += eject
+eject_SOURCES = sys-utils/eject.c lib/monotonic.c
+eject_LDADD = $(LDADD) libmount.la libcommon.la $(REALTIME_LIBS)
+eject_CFLAGS = $(AM_CFLAGS) -I$(ul_libmount_incdir)
+dist_man_MANS += sys-utils/eject.1
+endif
+
+
+if BUILD_LOSETUP
+sbin_PROGRAMS += losetup
+dist_man_MANS += sys-utils/losetup.8
+losetup_SOURCES = sys-utils/losetup.c
+losetup_LDADD = $(LDADD) libcommon.la libsmartcols.la
+losetup_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir)
+
+if HAVE_STATIC_LOSETUP
+bin_PROGRAMS += losetup.static
+losetup_static_SOURCES = $(losetup_SOURCES)
+losetup_static_LDFLAGS = -all-static
+losetup_static_LDADD = $(losetup_LDADD)
+losetup_static_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir)
+endif
+endif # BUILD_LOSETUP
+
+
+if BUILD_ZRAMCTL
+sbin_PROGRAMS += zramctl
+dist_man_MANS += sys-utils/zramctl.8
+zramctl_SOURCES = sys-utils/zramctl.c
+zramctl_LDADD = $(LDADD) libcommon.la libsmartcols.la
+zramctl_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir)
+endif
+
+
+if BUILD_PRLIMIT
+usrbin_exec_PROGRAMS += prlimit
+dist_man_MANS += sys-utils/prlimit.1
+prlimit_SOURCES = sys-utils/prlimit.c
+prlimit_LDADD = $(LDADD) libcommon.la libsmartcols.la
+prlimit_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir)
+endif
+
+
+if BUILD_LSNS
+usrbin_exec_PROGRAMS += lsns
+dist_man_MANS += sys-utils/lsns.8
+lsns_SOURCES = sys-utils/lsns.c
+lsns_LDADD = $(LDADD) libcommon.la libsmartcols.la libmount.la
+lsns_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir) -I$(ul_libmount_incdir)
+endif
+
+
+if BUILD_MOUNT
+bin_PROGRAMS += mount umount
+dist_man_MANS += \
+ sys-utils/mount.8 \
+ sys-utils/fstab.5 \
+ sys-utils/umount.8
+mount_SOURCES = sys-utils/mount.c
+mount_LDADD = $(LDADD) libcommon.la libmount.la $(SELINUX_LIBS)
+mount_CFLAGS = $(SUID_CFLAGS) $(AM_CFLAGS) -I$(ul_libmount_incdir)
+mount_LDFLAGS = $(SUID_LDFLAGS) $(AM_LDFLAGS)
+
+umount_SOURCES = sys-utils/umount.c
+umount_LDADD = $(LDADD) libcommon.la libmount.la
+umount_CFLAGS = $(AM_CFLAGS) $(SUID_CFLAGS) -I$(ul_libmount_incdir)
+umount_LDFLAGS = $(SUID_LDFLAGS) $(AM_LDFLAGS)
+
+if HAVE_STATIC_MOUNT
+bin_PROGRAMS += mount.static
+mount_static_SOURCES = $(mount_SOURCES)
+mount_static_CFLAGS = $(mount_CFLAGS)
+mount_static_LDFLAGS = $(mount_LDFLAGS) -all-static
+mount_static_LDADD = $(mount_LDADD) $(SELINUX_LIBS_STATIC)
+endif
+
+if HAVE_STATIC_UMOUNT
+bin_PROGRAMS += umount.static
+umount_static_SOURCES = $(umount_SOURCES)
+umount_static_CFLAGS = $(umount_CFLAGS)
+umount_static_LDFLAGS = $(umount_LDFLAGS) -all-static
+umount_static_LDADD = $(umount_LDADD)
+endif
+
+install-exec-hook-mount:
+if MAKEINSTALL_DO_CHOWN
+ chown root:root $(DESTDIR)$(bindir)/mount
+endif
+if MAKEINSTALL_DO_SETUID
+ chmod 4755 $(DESTDIR)$(bindir)/mount
+endif
+if MAKEINSTALL_DO_CHOWN
+ chown root:root $(DESTDIR)$(bindir)/umount
+endif
+if MAKEINSTALL_DO_SETUID
+ chmod 4755 $(DESTDIR)$(bindir)/umount
+endif
+
+INSTALL_EXEC_HOOKS += install-exec-hook-mount
+endif # BUILD_MOUNT
+
+
+if BUILD_SWAPON
+sbin_PROGRAMS += swapon swapoff
+dist_man_MANS += \
+ sys-utils/swapoff.8 \
+ sys-utils/swapon.8
+
+swapon_SOURCES = \
+ sys-utils/swapon.c \
+ sys-utils/swapon-common.c \
+ sys-utils/swapon-common.h \
+ lib/swapprober.c \
+ include/swapprober.h
+swapon_CFLAGS = $(AM_CFLAGS) \
+ -I$(ul_libblkid_incdir) \
+ -I$(ul_libmount_incdir) \
+ -I$(ul_libsmartcols_incdir)
+swapon_LDADD = $(LDADD) \
+ libblkid.la \
+ libcommon.la \
+ libmount.la \
+ libsmartcols.la
+
+swapoff_SOURCES = \
+ sys-utils/swapoff.c \
+ sys-utils/swapon-common.c \
+ sys-utils/swapon-common.h \
+ lib/swapprober.c \
+ include/swapprober.h
+swapoff_CFLAGS = $(AM_CFLAGS) \
+ -I$(ul_libblkid_incdir) \
+ -I$(ul_libmount_incdir)
+swapoff_LDADD = $(LDADD) \
+ libmount.la \
+ libblkid.la \
+ libcommon.la
+endif
+
+if BUILD_LSCPU
+usrbin_exec_PROGRAMS += lscpu
+lscpu_SOURCES = \
+ sys-utils/lscpu.c \
+ sys-utils/lscpu.h \
+ sys-utils/lscpu-arm.c \
+ sys-utils/lscpu-dmi.c
+lscpu_LDADD = $(LDADD) libcommon.la libsmartcols.la $(RTAS_LIBS)
+lscpu_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir)
+dist_man_MANS += sys-utils/lscpu.1
+endif
+
+if BUILD_CHCPU
+sbin_PROGRAMS += chcpu
+chcpu_SOURCES = sys-utils/chcpu.c
+chcpu_LDADD = $(LDADD) libcommon.la
+dist_man_MANS += sys-utils/chcpu.8
+endif
+
+if BUILD_WDCTL
+bin_PROGRAMS += wdctl
+dist_man_MANS += sys-utils/wdctl.8
+wdctl_SOURCES = sys-utils/wdctl.c
+wdctl_LDADD = $(LDADD) libcommon.la libsmartcols.la
+wdctl_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir)
+endif
+
+if BUILD_MOUNTPOINT
+bin_PROGRAMS += mountpoint
+mountpoint_LDADD = $(LDADD) libmount.la
+mountpoint_CFLAGS = $(AM_CFLAGS) -I$(ul_libmount_incdir)
+dist_man_MANS += sys-utils/mountpoint.1
+mountpoint_SOURCES = sys-utils/mountpoint.c
+endif
+
+if BUILD_FALLOCATE
+usrbin_exec_PROGRAMS += fallocate
+fallocate_SOURCES = sys-utils/fallocate.c
+fallocate_LDADD = $(LDADD) libcommon.la
+dist_man_MANS += sys-utils/fallocate.1
+endif
+
+if BUILD_PIVOT_ROOT
+sbin_PROGRAMS += pivot_root
+dist_man_MANS += sys-utils/pivot_root.8
+pivot_root_SOURCES = sys-utils/pivot_root.c
+endif
+
+if BUILD_SWITCH_ROOT
+sbin_PROGRAMS += switch_root
+dist_man_MANS += sys-utils/switch_root.8
+switch_root_SOURCES = sys-utils/switch_root.c
+endif
+
+if BUILD_UNSHARE
+usrbin_exec_PROGRAMS += unshare
+dist_man_MANS += sys-utils/unshare.1
+unshare_SOURCES = sys-utils/unshare.c
+unshare_LDADD = $(LDADD) libcommon.la
+unshare_CFLAGS = $(AM_CFLAGS) -I$(ul_libmount_incdir)
+
+if HAVE_STATIC_UNSHARE
+usrbin_exec_PROGRAMS += unshare.static
+unshare_static_SOURCES = $(unshare_SOURCES)
+unshare_static_LDFLAGS = -all-static
+unshare_static_LDADD = $(unshare_LDADD)
+unshare_static_CFLAGS = $(unshare_CFLAGS)
+endif
+endif
+
+if BUILD_NSENTER
+usrbin_exec_PROGRAMS += nsenter
+dist_man_MANS += sys-utils/nsenter.1
+nsenter_SOURCES = sys-utils/nsenter.c
+nsenter_LDADD = $(LDADD) libcommon.la $(SELINUX_LIBS)
+
+if HAVE_STATIC_NSENTER
+usrbin_exec_PROGRAMS += nsenter.static
+nsenter_static_SOURCES = $(nsenter_SOURCES)
+nsenter_static_LDFLAGS = -all-static
+nsenter_static_LDADD = $(nsenter_LDADD)
+endif
+endif
+
+if BUILD_HWCLOCK
+sbin_PROGRAMS += hwclock
+dist_man_MANS += \
+ sys-utils/hwclock.8 \
+ sys-utils/adjtime_config.5
+PATHFILES += sys-utils/hwclock.8
+hwclock_SOURCES = \
+ sys-utils/hwclock.c \
+ sys-utils/hwclock.h \
+ sys-utils/hwclock-cmos.c
+if LINUX
+hwclock_SOURCES += sys-utils/hwclock-rtc.c
+endif
+hwclock_LDADD = $(LDADD) libcommon.la -lm
+if HAVE_AUDIT
+hwclock_LDADD += -laudit
+endif
+endif # BUILD_HWCLOCK
+
+if BUILD_SETPRIV
+usrbin_exec_PROGRAMS += setpriv
+dist_man_MANS += sys-utils/setpriv.1
+setpriv_SOURCES = sys-utils/setpriv.c
+setpriv_LDADD = $(LDADD) -lcap-ng libcommon.la
+endif
diff --git a/sys-utils/adjtime_config.5 b/sys-utils/adjtime_config.5
new file mode 100644
index 0000000..6f03ca7
--- /dev/null
+++ b/sys-utils/adjtime_config.5
@@ -0,0 +1,64 @@
+.TH ADJTIME_CONFIG 5 "August 2018" "util-linux" "File Formats"
+.SH NAME
+adjtime \- information about hardware clock setting and drift factor
+.SH SYNOPSIS
+.I /etc/adjtime
+.SH DESCRIPTION
+The file
+.B /etc/adjtime
+contains descriptive information about the hardware mode clock setting and clock drift factor.
+The file is read and write by hwclock; and read by programs like rtcwake to get RTC time mode.
+.PP
+The file is usually located in /etc, but tools like
+.BR hwclock (8)
+or
+.BR rtcwake (8)
+allow to use alternative location by command line options if write access to
+/etc is unwanted. The default clock mode is "UTC" if the file is missing.
+.PP
+The Hardware Clock is usually not very accurate. However, much of its inaccuracy is completely predictable - it gains
+or loses the same amount of time every day. This is called systematic drift. The util hwclock keeps the file /etc/adjtime,
+that keeps some historical information.
+For more details see "\fBThe Adjust Function\fR" and "\fBThe Adjtime File\fR" sections from
+.BR hwckock (8)
+man page.
+.PP
+
+The format of the adjtime file is, in ASCII.
+.sp
+.SS First line
+Three numbers, separated by blanks:
+.TP
+.B "drift factor"
+the systematic drift rate in seconds per day (floating point decimal)
+.TP
+.B last adjust time
+the resulting number of seconds since 1969 UTC of most recent adjustment or calibration (decimal integer)
+.TP
+.B "adjustment status"
+zero (for compatibility with clock(8)) as a decimal integer.
+
+.SS Second line
+.TP
+.B "last calibration time"
+The resulting number of seconds since 1969 UTC of most recent calibration.
+Zero if there has been no calibration yet or it is known that any previous
+calibration is moot (for example, because the Hardware Clock has been found,
+since that calibration, not to contain a valid time). This is a decimal
+integer.
+
+.SS Third line
+.TP
+.B "clock mode"
+Supported values are "UTC" or "LOCAL". Tells whether the Hardware Clock is set
+to Coordinated Universal Time or local time. You can always override this
+value with options on the hwclock command line.
+
+.SH FILES
+.IR /etc/adjtime
+.SH "SEE ALSO"
+.BR hwclock (8),
+.BR rtcwake (8)
+.SH AVAILABILITY
+This man page is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/blkdiscard.8 b/sys-utils/blkdiscard.8
new file mode 100644
index 0000000..1f3a32b
--- /dev/null
+++ b/sys-utils/blkdiscard.8
@@ -0,0 +1,85 @@
+.TH BLKDISCARD 8 "July 2014" "util-linux" "System Administration"
+.SH NAME
+blkdiscard \- discard sectors on a device
+.SH SYNOPSIS
+.B blkdiscard
+[options]
+.RB [ \-o
+.IR offset ]
+.RB [ \-l
+.IR length ]
+.I device
+.SH DESCRIPTION
+.B blkdiscard
+is used to discard device sectors. This is useful for solid-state
+drivers (SSDs) and thinly-provisioned storage. Unlike
+.BR fstrim (8),
+this command is used directly on the block device.
+.PP
+By default,
+.B blkdiscard
+will discard all blocks on the device. Options may be used to modify
+this behavior based on range or size, as explained below.
+.PP
+The
+.I device
+argument is the pathname of the block device.
+.PP
+.B WARNING: All data in the discarded region on the device will be lost!
+.SH OPTIONS
+The
+.I offset
+and
+.I length
+arguments may be followed by the multiplicative suffixes KiB (=1024),
+MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB" is
+optional, e.g., "K" has the same meaning as "KiB") or the suffixes
+KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB and YB.
+.TP
+.BR \-o , " \-\-offset \fIoffset"
+Byte offset into the device from which to start discarding. The provided value
+will be aligned to the device sector size. The default value is zero.
+.TP
+.BR \-l , " \-\-length \fIlength"
+The number of bytes to discard (counting from the starting point). The provided value
+will be aligned to the device sector size. If the specified value extends past
+the end of the device,
+.B blkdiscard
+will stop at the device size boundary. The default value extends to the end
+of the device.
+.TP
+.BR \-p , " \-\-step \fIlength"
+The number of bytes to discard within one iteration. The default is to discard
+all by one ioctl call.
+.TP
+.BR \-s , " \-\-secure"
+Perform a secure discard. A secure discard is the same as a regular discard
+except that all copies of the discarded blocks that were possibly created by
+garbage collection must also be erased. This requires support from the device.
+.TP
+.BR \-z , " \-\-zeroout"
+Zero-fill rather than discard.
+.TP
+.BR \-v , " \-\-verbose"
+Display the aligned values of
+.I offset
+and
+.IR length .
+If the \fB\-\-step\fR option is specified, it prints the discard progress every second.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH AUTHOR
+.MT lczerner@redhat.com
+Lukas Czerner
+.ME
+.SH SEE ALSO
+.BR fstrim (8)
+.SH AVAILABILITY
+The blkdiscard command is part of the util-linux package and is available
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/blkdiscard.c b/sys-utils/blkdiscard.c
new file mode 100644
index 0000000..c19b67b
--- /dev/null
+++ b/sys-utils/blkdiscard.c
@@ -0,0 +1,254 @@
+/*
+ * blkdiscard.c -- discard the part (or whole) of the block device.
+ *
+ * Copyright (C) 2012 Red Hat, Inc. All rights reserved.
+ * Written by Lukas Czerner <lczerner@redhat.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This program uses BLKDISCARD ioctl to discard part or the whole block
+ * device if the device supports it. You can specify range (start and
+ * length) to be discarded, or simply discard the whole device.
+ */
+
+
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <getopt.h>
+#include <time.h>
+
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <linux/fs.h>
+
+#include "nls.h"
+#include "strutils.h"
+#include "c.h"
+#include "closestream.h"
+#include "monotonic.h"
+
+#ifndef BLKDISCARD
+# define BLKDISCARD _IO(0x12,119)
+#endif
+
+#ifndef BLKSECDISCARD
+# define BLKSECDISCARD _IO(0x12,125)
+#endif
+
+#ifndef BLKZEROOUT
+# define BLKZEROOUT _IO(0x12,127)
+#endif
+
+enum {
+ ACT_DISCARD = 0, /* default */
+ ACT_ZEROOUT,
+ ACT_SECURE
+};
+
+static void print_stats(int act, char *path, uint64_t stats[])
+{
+ switch (act) {
+ case ACT_ZEROOUT:
+ printf(_("%s: Zero-filled %" PRIu64 " bytes from the offset %" PRIu64"\n"), \
+ path, stats[1], stats[0]);
+ break;
+ case ACT_SECURE:
+ case ACT_DISCARD:
+ printf(_("%s: Discarded %" PRIu64 " bytes from the offset %" PRIu64"\n"), \
+ path, stats[1], stats[0]);
+ break;
+ }
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ fputs(USAGE_HEADER, out);
+ fprintf(out,
+ _(" %s [options] <device>\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Discard the content of sectors on a device.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -o, --offset <num> offset in bytes to discard from\n"), out);
+ fputs(_(" -l, --length <num> length of bytes to discard from the offset\n"), out);
+ fputs(_(" -p, --step <num> size of the discard iterations within the offset\n"), out);
+ fputs(_(" -s, --secure perform secure discard\n"), out);
+ fputs(_(" -z, --zeroout zero-fill rather than discard\n"), out);
+ fputs(_(" -v, --verbose print aligned length and offset\n"), out);
+
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(21));
+
+ printf(USAGE_MAN_TAIL("blkdiscard(8)"));
+ exit(EXIT_SUCCESS);
+}
+
+
+int main(int argc, char **argv)
+{
+ char *path;
+ int c, fd, verbose = 0, secsize;
+ uint64_t end, blksize, step, range[2], stats[2];
+ struct stat sb;
+ struct timeval now, last;
+ int act = ACT_DISCARD;
+
+ static const struct option longopts[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, 'V' },
+ { "offset", required_argument, NULL, 'o' },
+ { "length", required_argument, NULL, 'l' },
+ { "step", required_argument, NULL, 'p' },
+ { "secure", no_argument, NULL, 's' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "zeroout", no_argument, NULL, 'z' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ range[0] = 0;
+ range[1] = ULLONG_MAX;
+ step = 0;
+
+ while ((c = getopt_long(argc, argv, "hVsvo:l:p:z", longopts, NULL)) != -1) {
+ switch(c) {
+ case 'h':
+ usage();
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ case 'l':
+ range[1] = strtosize_or_err(optarg,
+ _("failed to parse length"));
+ break;
+ case 'o':
+ range[0] = strtosize_or_err(optarg,
+ _("failed to parse offset"));
+ break;
+ case 'p':
+ step = strtosize_or_err(optarg,
+ _("failed to parse step"));
+ break;
+ case 's':
+ act = ACT_SECURE;
+ break;
+ case 'v':
+ verbose = 1;
+ break;
+ case 'z':
+ act = ACT_ZEROOUT;
+ break;
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if (optind == argc)
+ errx(EXIT_FAILURE, _("no device specified"));
+
+ path = argv[optind++];
+
+ if (optind != argc) {
+ warnx(_("unexpected number of arguments"));
+ errtryhelp(EXIT_FAILURE);
+ }
+
+ fd = open(path, O_WRONLY);
+ if (fd < 0)
+ err(EXIT_FAILURE, _("cannot open %s"), path);
+
+ if (fstat(fd, &sb) == -1)
+ err(EXIT_FAILURE, _("stat of %s failed"), path);
+ if (!S_ISBLK(sb.st_mode))
+ errx(EXIT_FAILURE, _("%s: not a block device"), path);
+
+ if (ioctl(fd, BLKGETSIZE64, &blksize))
+ err(EXIT_FAILURE, _("%s: BLKGETSIZE64 ioctl failed"), path);
+ if (ioctl(fd, BLKSSZGET, &secsize))
+ err(EXIT_FAILURE, _("%s: BLKSSZGET ioctl failed"), path);
+
+ /* check offset alignment to the sector size */
+ if (range[0] % secsize)
+ errx(EXIT_FAILURE, _("%s: offset %" PRIu64 " is not aligned "
+ "to sector size %i"), path, range[0], secsize);
+
+ /* is the range end behind the end of the device ?*/
+ if (range[0] > blksize)
+ errx(EXIT_FAILURE, _("%s: offset is greater than device size"), path);
+ end = range[0] + range[1];
+ if (end < range[0] || end > blksize)
+ end = blksize;
+
+ range[1] = (step > 0) ? step : end - range[0];
+
+ /* check length alignment to the sector size */
+ if (range[1] % secsize)
+ errx(EXIT_FAILURE, _("%s: length %" PRIu64 " is not aligned "
+ "to sector size %i"), path, range[1], secsize);
+
+ stats[0] = range[0], stats[1] = 0;
+ gettime_monotonic(&last);
+
+ for (/* nothing */; range[0] < end; range[0] += range[1]) {
+ if (range[0] + range[1] > end)
+ range[1] = end - range[0];
+
+ switch (act) {
+ case ACT_ZEROOUT:
+ if (ioctl(fd, BLKZEROOUT, &range))
+ err(EXIT_FAILURE, _("%s: BLKZEROOUT ioctl failed"), path);
+ break;
+ case ACT_SECURE:
+ if (ioctl(fd, BLKSECDISCARD, &range))
+ err(EXIT_FAILURE, _("%s: BLKSECDISCARD ioctl failed"), path);
+ break;
+ case ACT_DISCARD:
+ if (ioctl(fd, BLKDISCARD, &range))
+ err(EXIT_FAILURE, _("%s: BLKDISCARD ioctl failed"), path);
+ break;
+ }
+
+ stats[1] += range[1];
+
+ /* reporting progress at most once per second */
+ if (verbose && step) {
+ gettime_monotonic(&now);
+ if (now.tv_sec > last.tv_sec &&
+ (now.tv_usec >= last.tv_usec || now.tv_sec > last.tv_sec + 1)) {
+ print_stats(act, path, stats);
+ stats[0] += stats[1], stats[1] = 0;
+ last = now;
+ }
+ }
+ }
+
+ if (verbose && stats[1])
+ print_stats(act, path, stats);
+
+ close(fd);
+ return EXIT_SUCCESS;
+}
diff --git a/sys-utils/blkzone.8 b/sys-utils/blkzone.8
new file mode 100644
index 0000000..bf7f15f
--- /dev/null
+++ b/sys-utils/blkzone.8
@@ -0,0 +1,109 @@
+.TH BLKZONE 8 "February 2017" "util-linux" "System Administration"
+.SH NAME
+blkzone \- run zone command on a device
+.SH SYNOPSIS
+.B blkzone
+.I command
+[options]
+.I device
+.SH DESCRIPTION
+.B blkzone
+is used to run zone command on device that support the Zoned Block Commands
+(ZBC) or Zoned-device ATA Commands (ZAC). The zones to operate on can be
+specified using the offset, count and length options.
+.PP
+The
+.I device
+argument is the pathname of the block device.
+.SH COMMANDS
+.SS report
+The command \fBblkzone report\fP is used to report device zone information.
+.PP
+By default, the command will report all zones from the start of the
+block device. Options may be used to modify this behavior, changing the
+starting zone or the size of the report, as explained below.
+
+.B Report output
+.TS
+tab(:);
+l l.
+start:Zone start sector
+len:Zone length in number of sectors
+wptr:Zone write pointer position
+reset:Reset write pointer recommended
+non-seq:Non-sequential write resources active
+cond:Zone condition
+type:Zone type
+.TE
+
+.B Zone conditions
+.TS
+tab(:);
+l l.
+cl:Closed
+nw:Not write pointer
+em:Empty
+fu:Full
+oe:Explicitly opened
+oi:Implicitly opened
+ol:Offline
+ro:Read only
+x?:Reserved conditions (should not be reported)
+.TE
+
+.SS reset
+The command \fBblkzone reset\fP is used to reset one or more zones. Unlike
+.BR sg_reset_wp (8),
+this command operates from the block layer and can reset a range of zones.
+.PP
+By default, the command will operate from the zone at device
+sector 0 and reset all zones. Options may be used to modify this behavior
+as well as specify the operation to be performed on the zone, as explained below.
+
+.SH OPTIONS
+The
+.I offset
+and
+.I length
+option arguments may be followed by the multiplicative suffixes KiB (=1024),
+MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB" is
+optional, e.g., "K" has the same meaning as "KiB") or the suffixes
+KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB and YB.
+Additionally, the 0x prefix can be used to specify \fIoffset\fR and
+\fIlength\fR in hex.
+.TP
+.BR \-o , " \-\-offset "\fIsector\fP
+The starting zone specified as a sector offset. The provided offset in sector
+units (512 bytes) should match the start of a zone. The default value is zero.
+.TP
+.BR \-l , " \-\-length "\fIsectors\fP
+The maximum number of sectors the command should operate on. The default value
+is the number of sectors remaining after \fIoffset\fR. This option cannot be
+used together with the option \fB\-\-count\fP.
+.TP
+.BR \-c , " \-\-count "\fIcount\fP
+The maximum number of zones the command should operate on. The default value
+is the number of zones starting from \fIoffset\fR. This option cannot be
+used together with the option \fB\-\-length\fP.
+.TP
+.BR \-v , " \-\-verbose"
+Display the number of zones returned in the report or the range of sectors
+reset..
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH AUTHORS
+.nf
+Shaun Tancheff <shaun@tancheff.com>
+Karel Zak <kzak@redhat.com>
+.fi
+.SH SEE ALSO
+.BR sg_rep_zones (8)
+.SH AVAILABILITY
+The blkzone command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/blkzone.c b/sys-utils/blkzone.c
new file mode 100644
index 0000000..1dcbdf5
--- /dev/null
+++ b/sys-utils/blkzone.c
@@ -0,0 +1,416 @@
+/*
+ * blkzone.c -- the block device zone commands
+ *
+ * Copyright (C) 2015,2016 Seagate Technology PLC
+ * Written by Shaun Tancheff <shaun.tancheff@seagate.com>
+ *
+ * Copyright (C) 2017 Karel Zak <kzak@redhat.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <getopt.h>
+#include <time.h>
+
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <linux/fs.h>
+#include <linux/blkzoned.h>
+
+#include "nls.h"
+#include "strutils.h"
+#include "xalloc.h"
+#include "c.h"
+#include "closestream.h"
+#include "blkdev.h"
+#include "sysfs.h"
+#include "optutils.h"
+
+struct blkzone_control;
+
+static int blkzone_report(struct blkzone_control *ctl);
+static int blkzone_reset(struct blkzone_control *ctl);
+
+struct blkzone_command {
+ const char *name;
+ int (*handler)(struct blkzone_control *);
+ const char *help;
+};
+
+struct blkzone_control {
+ const char *devname;
+ const struct blkzone_command *command;
+
+ uint64_t total_sectors;
+ int secsize;
+
+ uint64_t offset;
+ uint64_t length;
+ uint32_t count;
+
+ unsigned int verbose : 1;
+};
+
+static const struct blkzone_command commands[] = {
+ { "report", blkzone_report, N_("Report zone information about the given device") },
+ { "reset", blkzone_reset, N_("Reset a range of zones.") }
+};
+
+static const struct blkzone_command *name_to_command(const char *name)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(commands); i++) {
+ if (strcmp(commands[i].name, name) == 0)
+ return &commands[i];
+ }
+
+ return NULL;
+}
+
+static int init_device(struct blkzone_control *ctl, int mode)
+{
+ struct stat sb;
+ int fd;
+
+ fd = open(ctl->devname, mode);
+ if (fd < 0)
+ err(EXIT_FAILURE, _("cannot open %s"), ctl->devname);
+
+ if (fstat(fd, &sb) == -1)
+ err(EXIT_FAILURE, _("stat of %s failed"), ctl->devname);
+ if (!S_ISBLK(sb.st_mode))
+ errx(EXIT_FAILURE, _("%s: not a block device"), ctl->devname);
+
+ if (blkdev_get_sectors(fd, (unsigned long long *) &ctl->total_sectors))
+ err(EXIT_FAILURE, _("%s: blkdev_get_sectors ioctl failed"), ctl->devname);
+
+ if (blkdev_get_sector_size(fd, &ctl->secsize))
+ err(EXIT_FAILURE, _("%s: BLKSSZGET ioctl failed"), ctl->devname);
+
+ return fd;
+}
+
+/*
+ * Get the device zone size indicated by chunk sectors).
+ */
+static unsigned long blkdev_chunk_sectors(const char *dname)
+{
+ struct path_cxt *pc = NULL;
+ dev_t devno = sysfs_devname_to_devno(dname);
+ dev_t disk;
+ uint64_t sz = 0;
+ int rc;
+
+ /*
+ * Mapping /dev/sdXn -> /sys/block/sdX to read the chunk_size entry.
+ * This method masks off the partition specified by the minor device
+ * component.
+ */
+ pc = ul_new_sysfs_path(devno, NULL, NULL);
+ if (!pc)
+ return 0;
+
+ rc = sysfs_blkdev_get_wholedisk(pc, NULL, 0, &disk);
+ if (rc != 0)
+ goto done;
+
+ /* if @pc is not while-disk device, switch to disk */
+ if (devno != disk) {
+ rc = sysfs_blkdev_init_path(pc, disk, NULL);
+ if (rc != 0)
+ goto done;
+ }
+
+ rc = ul_path_read_u64(pc, &sz, "queue/chunk_sectors");
+done:
+ ul_unref_path(pc);
+ return rc == 0 ? sz : 0;
+}
+
+/*
+ * blkzone report
+ */
+#define DEF_REPORT_LEN (1U << 12) /* 4k zones per report (256k kzalloc) */
+
+static const char *type_text[] = {
+ "RESERVED",
+ "CONVENTIONAL",
+ "SEQ_WRITE_REQUIRED",
+ "SEQ_WRITE_PREFERRED",
+};
+
+static const char *condition_str[] = {
+ "nw", /* Not write pointer */
+ "em", /* Empty */
+ "oi", /* Implicitly opened */
+ "oe", /* Explicitly opened */
+ "cl", /* Closed */
+ "x5", "x6", "x7", "x8", "x9", "xA", "xB", "xC", /* xN: reserved */
+ "ro", /* Read only */
+ "fu", /* Full */
+ "of" /* Offline */
+};
+
+static int blkzone_report(struct blkzone_control *ctl)
+{
+ struct blk_zone_report *zi;
+ unsigned long zonesize;
+ uint32_t i, nr_zones;
+ int fd;
+
+ fd = init_device(ctl, O_RDONLY);
+
+ if (ctl->offset >= ctl->total_sectors)
+ errx(EXIT_FAILURE,
+ _("%s: offset is greater than or equal to device size"), ctl->devname);
+
+ zonesize = blkdev_chunk_sectors(ctl->devname);
+ if (!zonesize)
+ errx(EXIT_FAILURE, _("%s: unable to determine zone size"), ctl->devname);
+
+ if (ctl->count)
+ nr_zones = ctl->count;
+ else if (ctl->length)
+ nr_zones = (ctl->length + zonesize - 1) / zonesize;
+ else
+ nr_zones = 1 + (ctl->total_sectors - ctl->offset) / zonesize;
+
+ zi = xmalloc(sizeof(struct blk_zone_report) +
+ (DEF_REPORT_LEN * sizeof(struct blk_zone)));
+
+ while (nr_zones && ctl->offset < ctl->total_sectors) {
+
+ zi->nr_zones = min(nr_zones, DEF_REPORT_LEN);
+ zi->sector = ctl->offset;
+
+ if (ioctl(fd, BLKREPORTZONE, zi) == -1)
+ err(EXIT_FAILURE, _("%s: BLKREPORTZONE ioctl failed"), ctl->devname);
+
+ if (ctl->verbose)
+ printf(_("Found %d zones from 0x%"PRIx64"\n"),
+ zi->nr_zones, ctl->offset);
+
+ if (!zi->nr_zones) {
+ nr_zones = 0;
+ break;
+ }
+
+ for (i = 0; i < zi->nr_zones; i++) {
+ const struct blk_zone *entry = &zi->zones[i];
+ unsigned int type = entry->type;
+ uint64_t start = entry->start;
+ uint64_t wp = entry->wp;
+ uint8_t cond = entry->cond;
+ uint64_t len = entry->len;
+
+ if (!len) {
+ nr_zones = 0;
+ break;
+ }
+
+ printf(_(" start: 0x%09"PRIx64", len 0x%06"PRIx64", wptr 0x%06"PRIx64
+ " reset:%u non-seq:%u, zcond:%2u(%s) [type: %u(%s)]\n"),
+ start, len, (type == 0x1) ? 0 : wp - start,
+ entry->reset, entry->non_seq,
+ cond, condition_str[cond & (ARRAY_SIZE(condition_str) - 1)],
+ type, type_text[type]);
+
+ nr_zones--;
+ ctl->offset = start + len;
+
+ }
+
+ }
+
+ free(zi);
+ close(fd);
+
+ return 0;
+}
+
+/*
+ * blkzone reset
+ */
+static int blkzone_reset(struct blkzone_control *ctl)
+{
+ struct blk_zone_range za = { .sector = 0 };
+ unsigned long zonesize;
+ uint64_t zlen;
+ int fd;
+
+ zonesize = blkdev_chunk_sectors(ctl->devname);
+ if (!zonesize)
+ errx(EXIT_FAILURE, _("%s: unable to determine zone size"), ctl->devname);
+
+ fd = init_device(ctl, O_WRONLY);
+
+ if (ctl->offset & (zonesize - 1))
+ errx(EXIT_FAILURE, _("%s: offset %" PRIu64 " is not aligned "
+ "to zone size %lu"),
+ ctl->devname, ctl->offset, zonesize);
+
+ if (ctl->offset > ctl->total_sectors)
+ errx(EXIT_FAILURE, _("%s: offset is greater than device size"), ctl->devname);
+
+ if (ctl->count)
+ zlen = ctl->count * zonesize;
+ else if (ctl->length)
+ zlen = ctl->length;
+ else
+ zlen = ctl->total_sectors;
+ if (ctl->offset + zlen > ctl->total_sectors)
+ zlen = ctl->total_sectors - ctl->offset;
+
+ if (ctl->length &&
+ (zlen & (zonesize - 1)) &&
+ ctl->offset + zlen != ctl->total_sectors)
+ errx(EXIT_FAILURE, _("%s: number of sectors %" PRIu64 " is not aligned "
+ "to zone size %lu"),
+ ctl->devname, ctl->length, zonesize);
+
+ za.sector = ctl->offset;
+ za.nr_sectors = zlen;
+
+ if (ioctl(fd, BLKRESETZONE, &za) == -1)
+ err(EXIT_FAILURE, _("%s: BLKRESETZONE ioctl failed"), ctl->devname);
+ else if (ctl->verbose)
+ printf(_("%s: successfully reset in range from %" PRIu64 ", to %" PRIu64),
+ ctl->devname,
+ ctl->offset,
+ ctl->offset + zlen);
+ close(fd);
+ return 0;
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ size_t i;
+
+ fputs(USAGE_HEADER, out);
+ fprintf(out, _(" %s <command> [options] <device>\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Run zone command on the given block device.\n"), out);
+
+ fputs(USAGE_COMMANDS, out);
+ for (i = 0; i < ARRAY_SIZE(commands); i++)
+ fprintf(out, " %-11s %s\n", commands[i].name, _(commands[i].help));
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -o, --offset <sector> start sector of zone to act (in 512-byte sectors)\n"), out);
+ fputs(_(" -l, --length <sectors> maximum sectors to act (in 512-byte sectors)\n"), out);
+ fputs(_(" -c, --count <number> maximum number of zones\n"), out);
+ fputs(_(" -v, --verbose display more details\n"), out);
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(24));
+
+ printf(USAGE_MAN_TAIL("blkzone(8)"));
+ exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+ int c;
+ struct blkzone_control ctl = {
+ .devname = NULL,
+ .offset = 0,
+ .count = 0,
+ .length = 0
+ };
+
+ static const struct option longopts[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "count", required_argument, NULL, 'c' }, /* max #of zones to operate on */
+ { "length", required_argument, NULL, 'l' }, /* max of sectors to operate on */
+ { "offset", required_argument, NULL, 'o' }, /* starting LBA */
+ { "verbose", no_argument, NULL, 'v' },
+ { "version", no_argument, NULL, 'V' },
+ { NULL, 0, NULL, 0 }
+ };
+ static const ul_excl_t excl[] = { /* rows and cols in ASCII order */
+ { 'c', 'l' },
+ { 0 }
+ };
+ int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ if (argc >= 2 && *argv[1] != '-') {
+ ctl.command = name_to_command(argv[1]);
+ if (!ctl.command)
+ errx(EXIT_FAILURE, _("%s is not valid command name"), argv[1]);
+ argv++;
+ argc--;
+ }
+
+ while ((c = getopt_long(argc, argv, "hc:l:o:vV", longopts, NULL)) != -1) {
+
+ err_exclusive_options(c, longopts, excl, excl_st);
+
+ switch (c) {
+ case 'h':
+ usage();
+ break;
+ case 'c':
+ ctl.count = strtou32_or_err(optarg,
+ _("failed to parse number of zones"));
+ break;
+ case 'l':
+ ctl.length = strtosize_or_err(optarg,
+ _("failed to parse number of sectors"));
+ break;
+ case 'o':
+ ctl.offset = strtosize_or_err(optarg,
+ _("failed to parse zone offset"));
+ break;
+ case 'v':
+ ctl.verbose = 1;
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if (!ctl.command)
+ errx(EXIT_FAILURE, _("no command specified"));
+
+ if (optind == argc)
+ errx(EXIT_FAILURE, _("no device specified"));
+ ctl.devname = argv[optind++];
+
+ if (optind != argc)
+ errx(EXIT_FAILURE,_("unexpected number of arguments"));
+
+ if (ctl.command->handler(&ctl) < 0)
+ return EXIT_FAILURE;
+
+ return EXIT_SUCCESS;
+
+}
diff --git a/sys-utils/chcpu.8 b/sys-utils/chcpu.8
new file mode 100644
index 0000000..2fb7111
--- /dev/null
+++ b/sys-utils/chcpu.8
@@ -0,0 +1,106 @@
+.TH CHCPU 8 "July 2014" "util-linux" "System Administration"
+.SH NAME
+chcpu \- configure CPUs
+.SH SYNOPSIS
+.B chcpu
+.BR \-c | \-d | \-e | \-g
+.I cpu-list
+.br
+.B chcpu \-p
+.I mode
+.br
+.B chcpu
+.BR \-r | \-h | \-V
+.SH DESCRIPTION
+.B chcpu
+can modify the state of CPUs. It can enable or disable CPUs, scan for new
+CPUs, change the CPU dispatching
+.I mode
+of the underlying hypervisor, and request CPUs from the hypervisor
+(configure) or return CPUs to the hypervisor (deconfigure).
+.PP
+Some options have a
+.I cpu-list
+argument. Use this argument to specify a comma-separated list of CPUs. The
+list can contain individual CPU addresses or ranges of addresses. For
+example,
+.B 0,5,7,9-11
+makes the command applicable to the CPUs with the addresses 0, 5, 7, 9, 10,
+and 11.
+.SH OPTIONS
+.TP
+.BR \-c , " \-\-configure " \fIcpu-list\fP
+Configure the specified CPUs. Configuring a CPU means that the hypervisor
+takes a CPU from the CPU pool and assigns it to the virtual hardware on which
+your kernel runs.
+.TP
+.BR \-d , " \-\-disable " \fIcpu-list\fP
+Disable the specified CPUs. Disabling a CPU means that the kernel sets it
+offline.
+.TP
+.BR \-e , " \-\-enable " \fIcpu-list\fP
+Enable the specified CPUs. Enabling a CPU means that the kernel sets it
+online. A CPU must be configured, see \fB\-c\fR, before it can be enabled.
+.TP
+.BR \-g , " \-\-deconfigure " \fIcpu-list\fP
+Deconfigure the specified CPUs. Deconfiguring a CPU means that the
+hypervisor removes the CPU from the virtual hardware on which the Linux
+instance runs and returns it to the CPU pool. A CPU must be offline, see
+\fB\-d\fR, before it can be deconfigured.
+.TP
+.BR \-p , " \-\-dispatch " \fImode\fP
+Set the CPU dispatching
+.I mode
+(polarization). This option has an effect only if your hardware architecture
+and hypervisor support CPU polarization. Available
+.I modes
+are:
+.RS 14
+.TP 12
+.PD 0
+.B horizontal
+The workload is spread across all available CPUs.
+.TP 12
+.B vertical
+The workload is concentrated on few CPUs.
+.RE
+.PD 1
+.TP
+.BR \-r , " \-\-rescan"
+Trigger a rescan of CPUs. After a rescan, the Linux kernel recognizes
+the new CPUs. Use this option on systems that do not
+automatically detect newly attached CPUs.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+
+.SH RETURN CODES
+.B chcpu
+has the following return codes:
+.TP
+.B 0
+success
+.TP
+.B 1
+failure
+.TP
+.B 64
+partial success
+.RE
+.SH AUTHOR
+.MT heiko.carstens@de.ibm.com
+Heiko Carstens
+.ME
+.SH COPYRIGHT
+Copyright IBM Corp. 2011
+.br
+.SH "SEE ALSO"
+.BR lscpu (1)
+.SH AVAILABILITY
+The chcpu command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/chcpu.c b/sys-utils/chcpu.c
new file mode 100644
index 0000000..36c47af
--- /dev/null
+++ b/sys-utils/chcpu.c
@@ -0,0 +1,389 @@
+/*
+ * chcpu - CPU configuration tool
+ *
+ * Copyright IBM Corp. 2011
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "cpuset.h"
+#include "nls.h"
+#include "xalloc.h"
+#include "c.h"
+#include "strutils.h"
+#include "bitops.h"
+#include "path.h"
+#include "closestream.h"
+#include "optutils.h"
+
+#define EXCL_ERROR "--{configure,deconfigure,disable,dispatch,enable}"
+
+/* partial success, otherwise we return regular EXIT_{SUCCESS,FAILURE} */
+#define CHCPU_EXIT_SOMEOK 64
+
+#define _PATH_SYS_CPU "/sys/devices/system/cpu"
+
+static cpu_set_t *onlinecpus;
+static int maxcpus;
+
+#define is_cpu_online(cpu) (CPU_ISSET_S((cpu), CPU_ALLOC_SIZE(maxcpus), onlinecpus))
+#define num_online_cpus() (CPU_COUNT_S(CPU_ALLOC_SIZE(maxcpus), onlinecpus))
+
+enum {
+ CMD_CPU_ENABLE = 0,
+ CMD_CPU_DISABLE,
+ CMD_CPU_CONFIGURE,
+ CMD_CPU_DECONFIGURE,
+ CMD_CPU_RESCAN,
+ CMD_CPU_DISPATCH_HORIZONTAL,
+ CMD_CPU_DISPATCH_VERTICAL,
+};
+
+/* returns: 0 = success
+ * < 0 = failure
+ * > 0 = partial success
+ */
+static int cpu_enable(struct path_cxt *sys, cpu_set_t *cpu_set, size_t setsize, int enable)
+{
+ int cpu;
+ int online, rc;
+ int configured = -1;
+ int fails = 0;
+
+ for (cpu = 0; cpu < maxcpus; cpu++) {
+ if (!CPU_ISSET_S(cpu, setsize, cpu_set))
+ continue;
+ if (ul_path_accessf(sys, F_OK, "cpu%d", cpu) != 0) {
+ warnx(_("CPU %u does not exist"), cpu);
+ fails++;
+ continue;
+ }
+ if (ul_path_accessf(sys, F_OK, "cpu%d/online", cpu) != 0) {
+ warnx(_("CPU %u is not hot pluggable"), cpu);
+ fails++;
+ continue;
+ }
+ if (ul_path_readf_s32(sys, &online, "cpu%d/online", cpu) == 0
+ && online == 1
+ && enable == 1) {
+ printf(_("CPU %u is already enabled\n"), cpu);
+ continue;
+ }
+ if (online == 0 && enable == 0) {
+ printf(_("CPU %u is already disabled\n"), cpu);
+ continue;
+ }
+ if (ul_path_accessf(sys, F_OK, "cpu%d/configure", cpu) == 0)
+ ul_path_readf_s32(sys, &configured, "cpu%d/configure", cpu);
+ if (enable) {
+ rc = ul_path_writef_string(sys, "1", "cpu%d/online", cpu);
+ if (rc != 0 && configured == 0) {
+ warn(_("CPU %u enable failed (CPU is deconfigured)"), cpu);
+ fails++;
+ } else if (rc != 0) {
+ warn(_("CPU %u enable failed"), cpu);
+ fails++;
+ } else
+ printf(_("CPU %u enabled\n"), cpu);
+ } else {
+ if (onlinecpus && num_online_cpus() == 1) {
+ warnx(_("CPU %u disable failed (last enabled CPU)"), cpu);
+ fails++;
+ continue;
+ }
+ rc = ul_path_writef_string(sys, "0", "cpu%d/online", cpu);
+ if (rc != 0) {
+ warn(_("CPU %u disable failed"), cpu);
+ fails++;
+ } else {
+ printf(_("CPU %u disabled\n"), cpu);
+ if (onlinecpus)
+ CPU_CLR_S(cpu, setsize, onlinecpus);
+ }
+ }
+ }
+
+ return fails == 0 ? 0 : fails == maxcpus ? -1 : 1;
+}
+
+static int cpu_rescan(struct path_cxt *sys)
+{
+ if (ul_path_access(sys, F_OK, "rescan") != 0)
+ errx(EXIT_FAILURE, _("This system does not support rescanning of CPUs"));
+
+ if (ul_path_write_string(sys, "1", "rescan") != 0)
+ err(EXIT_FAILURE, _("Failed to trigger rescan of CPUs"));
+
+ printf(_("Triggered rescan of CPUs\n"));
+ return 0;
+}
+
+static int cpu_set_dispatch(struct path_cxt *sys, int mode)
+{
+ if (ul_path_access(sys, F_OK, "dispatching") != 0)
+ errx(EXIT_FAILURE, _("This system does not support setting "
+ "the dispatching mode of CPUs"));
+ if (mode == 0) {
+ if (ul_path_write_string(sys, "0", "dispatching") != 0)
+ err(EXIT_FAILURE, _("Failed to set horizontal dispatch mode"));
+
+ printf(_("Successfully set horizontal dispatching mode\n"));
+ } else {
+ if (ul_path_write_string(sys, "1", "dispatching") != 0)
+ err(EXIT_FAILURE, _("Failed to set vertical dispatch mode"));
+
+ printf(_("Successfully set vertical dispatching mode\n"));
+ }
+ return 0;
+}
+
+/* returns: 0 = success
+ * < 0 = failure
+ * > 0 = partial success
+ */
+static int cpu_configure(struct path_cxt *sys, cpu_set_t *cpu_set, size_t setsize, int configure)
+{
+ int cpu;
+ int rc, current;
+ int fails = 0;
+
+ for (cpu = 0; cpu < maxcpus; cpu++) {
+ if (!CPU_ISSET_S(cpu, setsize, cpu_set))
+ continue;
+ if (ul_path_accessf(sys, F_OK, "cpu%d", cpu) != 0) {
+ warnx(_("CPU %u does not exist"), cpu);
+ fails++;
+ continue;
+ }
+ if (ul_path_accessf(sys, F_OK, "cpu%d/configure", cpu) != 0) {
+ warnx(_("CPU %u is not configurable"), cpu);
+ fails++;
+ continue;
+ }
+ ul_path_readf_s32(sys, &current, "cpu%d/configure", cpu);
+ if (current == 1 && configure == 1) {
+ printf(_("CPU %u is already configured\n"), cpu);
+ continue;
+ }
+ if (current == 0 && configure == 0) {
+ printf(_("CPU %u is already deconfigured\n"), cpu);
+ continue;
+ }
+ if (current == 1 && configure == 0 && onlinecpus &&
+ is_cpu_online(cpu)) {
+ warnx(_("CPU %u deconfigure failed (CPU is enabled)"), cpu);
+ fails++;
+ continue;
+ }
+ if (configure) {
+ rc = ul_path_writef_string(sys, "1", "cpu%d/configure", cpu);
+ if (rc != 0) {
+ warn(_("CPU %u configure failed"), cpu);
+ fails++;
+ } else
+ printf(_("CPU %u configured\n"), cpu);
+ } else {
+ rc = ul_path_writef_string(sys, "0", "cpu%d/configure", cpu);
+ if (rc != 0) {
+ warn(_("CPU %u deconfigure failed"), cpu);
+ fails++;
+ } else
+ printf(_("CPU %u deconfigured\n"), cpu);
+ }
+ }
+
+ return fails == 0 ? 0 : fails == maxcpus ? -1 : 1;
+}
+
+static void cpu_parse(char *cpu_string, cpu_set_t *cpu_set, size_t setsize)
+{
+ int rc;
+
+ rc = cpulist_parse(cpu_string, cpu_set, setsize, 1);
+ if (rc == 0)
+ return;
+ if (rc == 2)
+ errx(EXIT_FAILURE, _("invalid CPU number in CPU list: %s"), cpu_string);
+ errx(EXIT_FAILURE, _("failed to parse CPU list: %s"), cpu_string);
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ fprintf(out, _(
+ "\nUsage:\n"
+ " %s [options]\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Configure CPUs in a multi-processor system.\n"), out);
+
+ fputs(USAGE_OPTIONS, stdout);
+ fputs(_(
+ " -e, --enable <cpu-list> enable cpus\n"
+ " -d, --disable <cpu-list> disable cpus\n"
+ " -c, --configure <cpu-list> configure cpus\n"
+ " -g, --deconfigure <cpu-list> deconfigure cpus\n"
+ " -p, --dispatch <mode> set dispatching mode\n"
+ " -r, --rescan trigger rescan of cpus\n"
+ ), stdout);
+ printf(USAGE_HELP_OPTIONS(31));
+
+ printf(USAGE_MAN_TAIL("chcpu(8)"));
+ exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char *argv[])
+{
+ struct path_cxt *sys = NULL; /* _PATH_SYS_CPU handler */
+ cpu_set_t *cpu_set;
+ size_t setsize;
+ int cmd = -1;
+ int c, rc;
+
+ static const struct option longopts[] = {
+ { "configure", required_argument, NULL, 'c' },
+ { "deconfigure",required_argument, NULL, 'g' },
+ { "disable", required_argument, NULL, 'd' },
+ { "dispatch", required_argument, NULL, 'p' },
+ { "enable", required_argument, NULL, 'e' },
+ { "help", no_argument, NULL, 'h' },
+ { "rescan", no_argument, NULL, 'r' },
+ { "version", no_argument, NULL, 'V' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ static const ul_excl_t excl[] = { /* rows and cols in ASCII order */
+ { 'c','d','e','g','p' },
+ { 0 }
+ };
+ int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ ul_path_init_debug();
+ sys = ul_new_path(_PATH_SYS_CPU);
+ if (!sys)
+ err(EXIT_FAILURE, _("failed to initialize sysfs handler"));
+
+ maxcpus = get_max_number_of_cpus();
+ if (maxcpus < 1)
+ errx(EXIT_FAILURE, _("cannot determine NR_CPUS; aborting"));
+
+ if (ul_path_access(sys, F_OK, "online") == 0)
+ ul_path_readf_cpulist(sys, &cpu_set, maxcpus, "online");
+
+ setsize = CPU_ALLOC_SIZE(maxcpus);
+ cpu_set = CPU_ALLOC(maxcpus);
+ if (!cpu_set)
+ err(EXIT_FAILURE, _("cpuset_alloc failed"));
+
+ while ((c = getopt_long(argc, argv, "c:d:e:g:hp:rV", longopts, NULL)) != -1) {
+
+ err_exclusive_options(c, longopts, excl, excl_st);
+
+ switch (c) {
+ case 'c':
+ cmd = CMD_CPU_CONFIGURE;
+ cpu_parse(argv[optind - 1], cpu_set, setsize);
+ break;
+ case 'd':
+ cmd = CMD_CPU_DISABLE;
+ cpu_parse(argv[optind - 1], cpu_set, setsize);
+ break;
+ case 'e':
+ cmd = CMD_CPU_ENABLE;
+ cpu_parse(argv[optind - 1], cpu_set, setsize);
+ break;
+ case 'g':
+ cmd = CMD_CPU_DECONFIGURE;
+ cpu_parse(argv[optind - 1], cpu_set, setsize);
+ break;
+ case 'h':
+ usage();
+ case 'p':
+ if (strcmp("horizontal", argv[optind - 1]) == 0)
+ cmd = CMD_CPU_DISPATCH_HORIZONTAL;
+ else if (strcmp("vertical", argv[optind - 1]) == 0)
+ cmd = CMD_CPU_DISPATCH_VERTICAL;
+ else
+ errx(EXIT_FAILURE, _("unsupported argument: %s"),
+ argv[optind -1 ]);
+ break;
+ case 'r':
+ cmd = CMD_CPU_RESCAN;
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if ((argc == 1) || (argc != optind)) {
+ warnx(_("bad usage"));
+ errtryhelp(EXIT_FAILURE);
+ }
+
+ switch (cmd) {
+ case CMD_CPU_ENABLE:
+ rc = cpu_enable(sys, cpu_set, maxcpus, 1);
+ break;
+ case CMD_CPU_DISABLE:
+ rc = cpu_enable(sys, cpu_set, maxcpus, 0);
+ break;
+ case CMD_CPU_CONFIGURE:
+ rc = cpu_configure(sys, cpu_set, maxcpus, 1);
+ break;
+ case CMD_CPU_DECONFIGURE:
+ rc = cpu_configure(sys, cpu_set, maxcpus, 0);
+ break;
+ case CMD_CPU_RESCAN:
+ rc = cpu_rescan(sys);
+ break;
+ case CMD_CPU_DISPATCH_HORIZONTAL:
+ rc = cpu_set_dispatch(sys, 0);
+ break;
+ case CMD_CPU_DISPATCH_VERTICAL:
+ rc = cpu_set_dispatch(sys, 1);
+ break;
+ default:
+ rc = -EINVAL;
+ break;
+ }
+
+ ul_unref_path(sys);
+
+ return rc == 0 ? EXIT_SUCCESS :
+ rc < 0 ? EXIT_FAILURE : CHCPU_EXIT_SOMEOK;
+}
diff --git a/sys-utils/chmem.8 b/sys-utils/chmem.8
new file mode 100644
index 0000000..8a3b34d
--- /dev/null
+++ b/sys-utils/chmem.8
@@ -0,0 +1,114 @@
+.TH CHMEM 8 "October 2016" "util-linux" "System Administration"
+.SH NAME
+chmem \- configure memory
+.SH SYNOPSIS
+.B chmem
+.RB [ \-h "] [" \-V "] [" \-v "] [" \-e | \-d "]"
+[\fISIZE\fP|\fIRANGE\fP|\fB\-b\fP \fIBLOCKRANGE\fP]
+[-z ZONE]
+.SH DESCRIPTION
+The chmem command sets a particular size or range of memory online or offline.
+.
+.IP "\(hy" 2
+Specify \fISIZE\fP as <size>[m|M|g|G]. With m or M, <size> specifies the memory
+size in MiB (1024 x 1024 bytes). With g or G, <size> specifies the memory size
+in GiB (1024 x 1024 x 1024 bytes). The default unit is MiB.
+.
+.IP "\(hy" 2
+Specify \fIRANGE\fP in the form 0x<start>-0x<end> as shown in the output of the
+\fBlsmem\fP command. <start> is the hexadecimal address of the first byte and <end>
+is the hexadecimal address of the last byte in the memory range.
+.
+.IP "\(hy" 2
+Specify \fIBLOCKRANGE\fP in the form <first>-<last> or <block> as shown in the
+output of the \fBlsmem\fP command. <first> is the number of the first memory block
+and <last> is the number of the last memory block in the memory
+range. Alternatively a single block can be specified. \fIBLOCKRANGE\fP requires
+the \fB--blocks\fP option.
+.
+.IP "\(hy" 2
+Specify \fIZONE\fP as the name of a memory zone, as shown in the output of the
+\fBlsmem -o +ZONES\fP command. The output shows one or more valid memory zones
+for each memory range. If multiple zones are shown, then the memory range
+currently belongs to the first zone. By default, chmem will set memory online
+to the zone Movable, if this is among the valid zones. This default can be
+changed by specifying the \fB--zone\fP option with another valid zone.
+For memory ballooning, it is recommended to select the zone Movable for memory
+online and offline, if possible. Memory in this zone is much more likely to be
+able to be offlined again, but it cannot be used for arbitrary kernel
+allocations, only for migratable pages (e.g. anonymous and page cache pages).
+Use the \fB\-\-help\fR option to see all available zones.
+.
+.PP
+\fISIZE\fP and \fIRANGE\fP must be aligned to the Linux memory block size, as
+shown in the output of the \fBlsmem\fP command.
+
+Setting memory online can fail for various reasons. On virtualized systems it
+can fail if the hypervisor does not have enough memory left, for example
+because memory was overcommitted. Setting memory offline can fail if Linux
+cannot free the memory. If only part of the requested memory can be set online
+or offline, a message tells you how much memory was set online or offline
+instead of the requested amount.
+
+When setting memory online \fBchmem\fP starts with the lowest memory block
+numbers. When setting memory offline \fBchmem\fP starts with the highest memory
+block numbers.
+.SH OPTIONS
+.TP
+.BR \-b ", " \-\-blocks
+Use a \fIBLOCKRANGE\fP parameter instead of \fIRANGE\fP or \fISIZE\fP for the
+\fB--enable\fP and \fB--disable\fP options.
+.TP
+.BR \-d ", " \-\-disable
+Set the specified \fIRANGE\fP, \fISIZE\fP, or \fIBLOCKRANGE\fP of memory offline.
+.TP
+.BR \-e ", " \-\-enable
+Set the specified \fIRANGE\fP, \fISIZE\fP, or \fIBLOCKRANGE\fP of memory online.
+.TP
+.BR \-z ", " \-\-zone
+Select the memory \fIZONE\fP where to set the specified \fIRANGE\fP, \fISIZE\fP,
+or \fIBLOCKRANGE\fP of memory online or offline. By default, memory will be set
+online to the zone Movable, if possible.
+.TP
+.BR \-h ", " \-\-help
+Print a short help text, then exit.
+.TP
+.BR \-v ", " \-\-verbose
+Verbose mode. Causes \fBchmem\fP to print debugging messages about it's
+progress.
+.TP
+.BR \-V ", " \-\-version
+Print the version number, then exit.
+.SH RETURN CODES
+.B chmem
+has the following return codes:
+.TP
+.B 0
+success
+.TP
+.B 1
+failure
+.TP
+.B 64
+partial success
+.SH EXAMPLES
+.TP
+.B chmem --enable 1024
+This command requests 1024 MiB of memory to be set online.
+.TP
+.B chmem -e 2g
+This command requests 2 GiB of memory to be set online.
+.TP
+.B chmem --disable 0x00000000e4000000-0x00000000f3ffffff
+This command requests the memory range starting with 0x00000000e4000000
+and ending with 0x00000000f3ffffff to be set offline.
+.TP
+.B chmem -b -d 10
+This command requests the memory block number 10 to be set offline.
+.SH SEE ALSO
+.BR lsmem (1)
+.SH AVAILABILITY
+The \fBchmem\fP command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/chmem.c b/sys-utils/chmem.c
new file mode 100644
index 0000000..861f6cf
--- /dev/null
+++ b/sys-utils/chmem.c
@@ -0,0 +1,453 @@
+/*
+ * chmem - Memory configuration tool
+ *
+ * Copyright IBM Corp. 2016
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <assert.h>
+#include <dirent.h>
+
+#include "c.h"
+#include "nls.h"
+#include "path.h"
+#include "strutils.h"
+#include "strv.h"
+#include "optutils.h"
+#include "closestream.h"
+#include "xalloc.h"
+
+/* partial success, otherwise we return regular EXIT_{SUCCESS,FAILURE} */
+#define CHMEM_EXIT_SOMEOK 64
+
+#define _PATH_SYS_MEMORY "/sys/devices/system/memory"
+
+struct chmem_desc {
+ struct path_cxt *sysmem; /* _PATH_SYS_MEMORY handler */
+ struct dirent **dirs;
+ int ndirs;
+ uint64_t block_size;
+ uint64_t start;
+ uint64_t end;
+ uint64_t size;
+ unsigned int use_blocks : 1;
+ unsigned int is_size : 1;
+ unsigned int verbose : 1;
+ unsigned int have_zones : 1;
+};
+
+enum {
+ CMD_MEMORY_ENABLE = 0,
+ CMD_MEMORY_DISABLE,
+ CMD_NONE
+};
+
+enum zone_id {
+ ZONE_DMA = 0,
+ ZONE_DMA32,
+ ZONE_NORMAL,
+ ZONE_HIGHMEM,
+ ZONE_MOVABLE,
+ ZONE_DEVICE,
+};
+
+static char *zone_names[] = {
+ [ZONE_DMA] = "DMA",
+ [ZONE_DMA32] = "DMA32",
+ [ZONE_NORMAL] = "Normal",
+ [ZONE_HIGHMEM] = "Highmem",
+ [ZONE_MOVABLE] = "Movable",
+ [ZONE_DEVICE] = "Device",
+};
+
+/*
+ * name must be null-terminated
+ */
+static int zone_name_to_id(const char *name)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(zone_names); i++) {
+ if (!strcasecmp(name, zone_names[i]))
+ return i;
+ }
+ return -1;
+}
+
+static void idxtostr(struct chmem_desc *desc, uint64_t idx, char *buf, size_t bufsz)
+{
+ uint64_t start, end;
+
+ start = idx * desc->block_size;
+ end = start + desc->block_size - 1;
+ snprintf(buf, bufsz,
+ _("Memory Block %"PRIu64" (0x%016"PRIx64"-0x%016"PRIx64")"),
+ idx, start, end);
+}
+
+static int chmem_size(struct chmem_desc *desc, int enable, int zone_id)
+{
+ char *name, *onoff, line[BUFSIZ], str[BUFSIZ];
+ uint64_t size, index;
+ const char *zn;
+ int i, rc;
+
+ size = desc->size;
+ onoff = enable ? "online" : "offline";
+ i = enable ? 0 : desc->ndirs - 1;
+
+ if (enable && zone_id >= 0) {
+ if (zone_id == ZONE_MOVABLE)
+ onoff = "online_movable";
+ else
+ onoff = "online_kernel";
+ }
+
+ for (; i >= 0 && i < desc->ndirs && size; i += enable ? 1 : -1) {
+ name = desc->dirs[i]->d_name;
+ index = strtou64_or_err(name + 6, _("Failed to parse index"));
+
+ if (ul_path_readf_buffer(desc->sysmem, line, sizeof(line), "%s/state", name) > 0
+ && strncmp(onoff, line, 6) == 0)
+ continue;
+
+ if (desc->have_zones) {
+ ul_path_readf_buffer(desc->sysmem, line, sizeof(line), "%s/valid_zones", name);
+ if (zone_id >= 0) {
+ zn = zone_names[zone_id];
+ if (enable && !strcasestr(line, zn))
+ continue;
+ if (!enable && strncasecmp(line, zn, strlen(zn)))
+ continue;
+ } else if (enable) {
+ /* By default, use zone Movable for online, if valid */
+ if (strcasestr(line, zone_names[ZONE_MOVABLE]))
+ onoff = "online_movable";
+ else
+ onoff = "online";
+ }
+ }
+
+ idxtostr(desc, index, str, sizeof(str));
+ rc = ul_path_writef_string(desc->sysmem, onoff, "%s/state", name);
+ if (rc != 0 && desc->verbose) {
+ if (enable)
+ fprintf(stdout, _("%s enable failed\n"), str);
+ else
+ fprintf(stdout, _("%s disable failed\n"), str);
+ } else if (rc == 0 && desc->verbose) {
+ if (enable)
+ fprintf(stdout, _("%s enabled\n"), str);
+ else
+ fprintf(stdout, _("%s disabled\n"), str);
+ }
+ if (rc == 0)
+ size--;
+ }
+ if (size) {
+ uint64_t bytes;
+ char *sizestr;
+
+ bytes = (desc->size - size) * desc->block_size;
+ sizestr = size_to_human_string(SIZE_SUFFIX_1LETTER, bytes);
+ if (enable)
+ warnx(_("Could only enable %s of memory"), sizestr);
+ else
+ warnx(_("Could only disable %s of memory"), sizestr);
+ free(sizestr);
+ }
+ return size == 0 ? 0 : size == desc->size ? -1 : 1;
+}
+
+static int chmem_range(struct chmem_desc *desc, int enable, int zone_id)
+{
+ char *name, *onoff, line[BUFSIZ], str[BUFSIZ];
+ uint64_t index, todo;
+ const char *zn;
+ int i, rc;
+
+ todo = desc->end - desc->start + 1;
+ onoff = enable ? "online" : "offline";
+
+ if (enable && zone_id >= 0) {
+ if (zone_id == ZONE_MOVABLE)
+ onoff = "online_movable";
+ else
+ onoff = "online_kernel";
+ }
+
+ for (i = 0; i < desc->ndirs; i++) {
+ name = desc->dirs[i]->d_name;
+ index = strtou64_or_err(name + 6, _("Failed to parse index"));
+ if (index < desc->start)
+ continue;
+ if (index > desc->end)
+ break;
+ idxtostr(desc, index, str, sizeof(str));
+ if (ul_path_readf_buffer(desc->sysmem, line, sizeof(line), "%s/state", name) > 0
+ && strncmp(onoff, line, 6) == 0) {
+ if (desc->verbose && enable)
+ fprintf(stdout, _("%s already enabled\n"), str);
+ else if (desc->verbose && !enable)
+ fprintf(stdout, _("%s already disabled\n"), str);
+ todo--;
+ continue;
+ }
+
+ if (desc->have_zones) {
+ ul_path_readf_buffer(desc->sysmem, line, sizeof(line), "%s/valid_zones", name);
+ if (zone_id >= 0) {
+ zn = zone_names[zone_id];
+ if (enable && !strcasestr(line, zn)) {
+ warnx(_("%s enable failed: Zone mismatch"), str);
+ continue;
+ }
+ if (!enable && strncasecmp(line, zn, strlen(zn))) {
+ warnx(_("%s disable failed: Zone mismatch"), str);
+ continue;
+ }
+ } else if (enable) {
+ /* By default, use zone Movable for online, if valid */
+ if (strcasestr(line, zone_names[ZONE_MOVABLE]))
+ onoff = "online_movable";
+ else
+ onoff = "online";
+ }
+ }
+
+ rc = ul_path_writef_string(desc->sysmem, onoff, "%s/state", name);
+ if (rc != 0) {
+ if (enable)
+ warn(_("%s enable failed"), str);
+ else
+ warn(_("%s disable failed"), str);
+ } else if (desc->verbose) {
+ if (enable)
+ fprintf(stdout, _("%s enabled\n"), str);
+ else
+ fprintf(stdout, _("%s disabled\n"), str);
+ }
+ if (rc == 0)
+ todo--;
+ }
+ return todo == 0 ? 0 : todo == desc->end - desc->start + 1 ? -1 : 1;
+}
+
+static int filter(const struct dirent *de)
+{
+ if (strncmp("memory", de->d_name, 6))
+ return 0;
+ return isdigit_string(de->d_name + 6);
+}
+
+static void read_info(struct chmem_desc *desc)
+{
+ char line[128];
+
+ desc->ndirs = scandir(_PATH_SYS_MEMORY, &desc->dirs, filter, versionsort);
+ if (desc->ndirs <= 0)
+ err(EXIT_FAILURE, _("Failed to read %s"), _PATH_SYS_MEMORY);
+ ul_path_read_buffer(desc->sysmem, line, sizeof(line), "block_size_bytes");
+ desc->block_size = strtoumax(line, NULL, 16);
+}
+
+static void parse_single_param(struct chmem_desc *desc, char *str)
+{
+ if (desc->use_blocks) {
+ desc->start = strtou64_or_err(str, _("Failed to parse block number"));
+ desc->end = desc->start;
+ return;
+ }
+ desc->is_size = 1;
+ desc->size = strtosize_or_err(str, _("Failed to parse size"));
+ if (isdigit(str[strlen(str) - 1]))
+ desc->size *= 1024*1024;
+ if (desc->size % desc->block_size) {
+ errx(EXIT_FAILURE, _("Size must be aligned to memory block size (%s)"),
+ size_to_human_string(SIZE_SUFFIX_1LETTER, desc->block_size));
+ }
+ desc->size /= desc->block_size;
+}
+
+static void parse_range_param(struct chmem_desc *desc, char *start, char *end)
+{
+ if (desc->use_blocks) {
+ desc->start = strtou64_or_err(start, _("Failed to parse start"));
+ desc->end = strtou64_or_err(end, _("Failed to parse end"));
+ return;
+ }
+ if (strlen(start) < 2 || start[1] != 'x')
+ errx(EXIT_FAILURE, _("Invalid start address format: %s"), start);
+ if (strlen(end) < 2 || end[1] != 'x')
+ errx(EXIT_FAILURE, _("Invalid end address format: %s"), end);
+ desc->start = strtox64_or_err(start, _("Failed to parse start address"));
+ desc->end = strtox64_or_err(end, _("Failed to parse end address"));
+ if (desc->start % desc->block_size || (desc->end + 1) % desc->block_size) {
+ errx(EXIT_FAILURE,
+ _("Start address and (end address + 1) must be aligned to "
+ "memory block size (%s)"),
+ size_to_human_string(SIZE_SUFFIX_1LETTER, desc->block_size));
+ }
+ desc->start /= desc->block_size;
+ desc->end /= desc->block_size;
+}
+
+static void parse_parameter(struct chmem_desc *desc, char *param)
+{
+ char **split;
+
+ split = strv_split(param, "-");
+ if (strv_length(split) > 2)
+ errx(EXIT_FAILURE, _("Invalid parameter: %s"), param);
+ if (strv_length(split) == 1)
+ parse_single_param(desc, split[0]);
+ else
+ parse_range_param(desc, split[0], split[1]);
+ strv_free(split);
+ if (desc->start > desc->end)
+ errx(EXIT_FAILURE, _("Invalid range: %s"), param);
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ size_t i;
+
+ fputs(USAGE_HEADER, out);
+ fprintf(out, _(" %s [options] [SIZE|RANGE|BLOCKRANGE]\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Set a particular size or range of memory online or offline.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -e, --enable enable memory\n"), out);
+ fputs(_(" -d, --disable disable memory\n"), out);
+ fputs(_(" -b, --blocks use memory blocks\n"), out);
+ fputs(_(" -z, --zone <name> select memory zone (see below)\n"), out);
+ fputs(_(" -v, --verbose verbose output\n"), out);
+ printf(USAGE_HELP_OPTIONS(20));
+
+ fputs(_("\nSupported zones:\n"), out);
+ for (i = 0; i < ARRAY_SIZE(zone_names); i++)
+ fprintf(out, " %s\n", zone_names[i]);
+
+ printf(USAGE_MAN_TAIL("chmem(8)"));
+
+ exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+ struct chmem_desc _desc = { 0 }, *desc = &_desc;
+ int cmd = CMD_NONE, zone_id = -1;
+ char *zone = NULL;
+ int c, rc;
+
+ static const struct option longopts[] = {
+ {"block", no_argument, NULL, 'b'},
+ {"disable", no_argument, NULL, 'd'},
+ {"enable", no_argument, NULL, 'e'},
+ {"help", no_argument, NULL, 'h'},
+ {"verbose", no_argument, NULL, 'v'},
+ {"version", no_argument, NULL, 'V'},
+ {"zone", required_argument, NULL, 'z'},
+ {NULL, 0, NULL, 0}
+ };
+
+ static const ul_excl_t excl[] = { /* rows and cols in ASCII order */
+ { 'd','e' },
+ { 0 }
+ };
+ int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ ul_path_init_debug();
+ desc->sysmem = ul_new_path(_PATH_SYS_MEMORY);
+ if (!desc->sysmem)
+ err(EXIT_FAILURE, _("failed to initialize %s handler"), _PATH_SYS_MEMORY);
+
+ read_info(desc);
+
+ while ((c = getopt_long(argc, argv, "bdehvVz:", longopts, NULL)) != -1) {
+
+ err_exclusive_options(c, longopts, excl, excl_st);
+
+ switch (c) {
+ case 'd':
+ cmd = CMD_MEMORY_DISABLE;
+ break;
+ case 'e':
+ cmd = CMD_MEMORY_ENABLE;
+ break;
+ case 'b':
+ desc->use_blocks = 1;
+ break;
+ case 'h':
+ usage();
+ break;
+ case 'v':
+ desc->verbose = 1;
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ case 'z':
+ zone = xstrdup(optarg);
+ break;
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if ((argc == 1) || (argc != optind + 1) || (cmd == CMD_NONE)) {
+ warnx(_("bad usage"));
+ errtryhelp(EXIT_FAILURE);
+ }
+
+ parse_parameter(desc, argv[optind]);
+
+
+ /* The valid_zones sysfs attribute was introduced with kernel 3.18 */
+ if (ul_path_access(desc->sysmem, F_OK, "memory0/valid_zones") == 0)
+ desc->have_zones = 1;
+ else if (zone)
+ warnx(_("zone ignored, no valid_zones sysfs attribute present"));
+
+ if (zone && desc->have_zones) {
+ zone_id = zone_name_to_id(zone);
+ if (zone_id == -1) {
+ warnx(_("unknown memory zone: %s"), zone);
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if (desc->is_size)
+ rc = chmem_size(desc, cmd == CMD_MEMORY_ENABLE ? 1 : 0, zone_id);
+ else
+ rc = chmem_range(desc, cmd == CMD_MEMORY_ENABLE ? 1 : 0, zone_id);
+
+ ul_unref_path(desc->sysmem);
+
+ return rc == 0 ? EXIT_SUCCESS :
+ rc < 0 ? EXIT_FAILURE : CHMEM_EXIT_SOMEOK;
+}
diff --git a/sys-utils/choom.1 b/sys-utils/choom.1
new file mode 100644
index 0000000..2b844cb
--- /dev/null
+++ b/sys-utils/choom.1
@@ -0,0 +1,82 @@
+.TH CHOOM 1 "April 2018" "util-linux" "User Commands"
+.SH NAME
+choom \- display and adjust OOM-killer score.
+.SH SYNOPSIS
+.B choom
+.B \-p
+.I pid
+.sp
+.B choom
+.B \-p
+.I pid
+.B \-n
+.I number
+.sp
+.B choom
+.B \-n
+.I number
+.IR command\ [ argument ...]
+
+.SH DESCRIPTION
+The \fBchoom\fP command displays and adjusts Out-Of-Memory killer score setting.
+
+.SH OPTIONS
+.TP
+.BR \-p ", " \-\-pid " \fIpid\fP
+Specifies process ID.
+.TP
+.BR \-n , " \-\-adjust " \fIvalue\fP
+Specify the adjust score value.
+.TP
+.BR \-h ", " \-\-help
+Display help text and exit.
+.TP
+.BR \-V ", " \-\-version
+Display version information and exit.
+.SH NOTES
+Linux kernel uses the badness heuristic to select which process gets killed in
+out of memory conditions.
+
+The badness heuristic assigns a value to each candidate task ranging from 0
+(never kill) to 1000 (always kill) to determine which process is targeted. The
+units are roughly a proportion along that range of allowed memory the process
+may allocate from based on an estimation of its current memory and swap use.
+For example, if a task is using all allowed memory, its badness score will be
+1000. If it is using half of its allowed memory, its score will be 500.
+
+There is an additional factor included in the badness score: the current memory
+and swap usage is discounted by 3% for root processes.
+
+The amount of "allowed" memory depends on the context in which the oom killer
+was called. If it is due to the memory assigned to the allocating task's cpuset
+being exhausted, the allowed memory represents the set of mems assigned to that
+cpuset. If it is due to a mempolicy's node(s) being exhausted, the allowed
+memory represents the set of mempolicy nodes. If it is due to a memory
+limit (or swap limit) being reached, the allowed memory is that configured
+limit. Finally, if it is due to the entire system being out of memory, the
+allowed memory represents all allocatable resources.
+
+The adjust score value is added to the badness score before it is used to
+determine which task to kill. Acceptable values range from -1000 to +1000.
+This allows userspace to polarize the preference for oom killing either by
+always preferring a certain task or completely disabling it. The lowest
+possible value, -1000, is equivalent to disabling oom killing entirely for that
+task since it will always report a badness score of 0.
+
+Setting an adjust score value of +500, for example, is roughly equivalent to
+allowing the remainder of tasks sharing the same system, cpuset, mempolicy, or
+memory controller resources to use at least 50% more memory. A value of -500,
+on the other hand, would be roughly equivalent to discounting 50% of the task's
+allowed memory from being considered as scoring against the task.
+
+.SH AUTHORS
+.nf
+Karel Zak <kzak@redhat.com>
+.fi
+.SH SEE ALSO
+.BR proc (5)
+.SH AVAILABILITY
+The \fBchoom\fP command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/choom.c b/sys-utils/choom.c
new file mode 100644
index 0000000..eff95b6
--- /dev/null
+++ b/sys-utils/choom.c
@@ -0,0 +1,159 @@
+/*
+ * choom - Change OOM score setting
+ *
+ * Copyright (C) 2018 Karel Zak <kzak@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <errno.h>
+
+#include "nls.h"
+#include "c.h"
+#include "path.h"
+#include "strutils.h"
+#include "closestream.h"
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ fputs(USAGE_HEADER, out);
+ fprintf(out,
+ _(" %1$s [options] -p pid\n"
+ " %1$s [options] -n number -p pid\n"
+ " %1$s [options] -n number command [args...]]\n"),
+ program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Display and adjust OOM-killer score.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -n, --adjust <num> specify the adjust score value\n"), out);
+ fputs(_(" -p, --pid <num> process ID\n"), out);
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(24));
+ printf(USAGE_MAN_TAIL("choom(1)"));
+ exit(EXIT_SUCCESS);
+}
+
+static int get_score(struct path_cxt *pc)
+{
+ int ret;
+
+ if (ul_path_read_s32(pc, &ret, "oom_score") != 0)
+ err(EXIT_FAILURE, _("failed to read OOM score value"));
+
+ return ret;
+}
+
+static int get_score_adj(struct path_cxt *pc)
+{
+ int ret;
+
+ if (ul_path_read_s32(pc, &ret, "oom_score_adj") != 0)
+ err(EXIT_FAILURE, _("failed to read OOM score adjust value"));
+
+ return ret;
+}
+
+static int set_score_adj(struct path_cxt *pc, int adj)
+{
+ return ul_path_write_s64(pc, adj, "oom_score_adj");
+}
+
+int main(int argc, char **argv)
+{
+ pid_t pid = 0;
+ int c, adj = 0, has_adj = 0;
+ struct path_cxt *pc = NULL;
+
+ static const struct option longopts[] = {
+ { "adjust", required_argument, NULL, 'n' },
+ { "pid", required_argument, NULL, 'p' },
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, 'V' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ while ((c = getopt_long(argc, argv, "hn:p:V", longopts, NULL)) != -1) {
+ switch (c) {
+ case 'p':
+ pid = strtos32_or_err(optarg, _("invalid PID argument"));
+ break;
+ case 'n':
+ adj = strtos32_or_err(optarg, _("invalid adjust argument"));
+ has_adj = 1;
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ case 'h':
+ usage();
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if (optind < argc && pid) {
+ warnx(_("invalid argument: %s"), argv[optind]);
+ errtryhelp(EXIT_FAILURE);
+ }
+ if (!pid && argc - optind < 1) {
+ warnx(_("no PID or COMMAND specified"));
+ errtryhelp(EXIT_FAILURE);
+ }
+ if (optind < argc && !has_adj) {
+ warnx(_("no OOM score adjust value specified"));
+ errtryhelp(EXIT_FAILURE);
+ }
+
+ pc = ul_new_path("/proc/%d", (int) (pid ? pid : getpid()));
+
+ /* Show */
+ if (!has_adj) {
+ printf(_("pid %d's current OOM score: %d\n"), pid, get_score(pc));
+ printf(_("pid %d's current OOM score adjust value: %d\n"), pid, get_score_adj(pc));
+
+ /* Change */
+ } else if (pid) {
+ int old = get_score_adj(pc);
+
+ if (set_score_adj(pc, adj))
+ err(EXIT_FAILURE, _("failed to set score adjust value"));
+
+ printf(_("pid %d's OOM score adjust value changed from %d to %d\n"), pid, old, adj);
+
+ /* Start new process */
+ } else {
+ if (set_score_adj(pc, adj))
+ err(EXIT_FAILURE, _("failed to set score adjust value"));
+ ul_unref_path(pc);
+ argv += optind;
+ execvp(argv[0], argv);
+ errexec(argv[0]);
+ }
+
+ ul_unref_path(pc);
+ return EXIT_SUCCESS;
+}
diff --git a/sys-utils/ctrlaltdel.8 b/sys-utils/ctrlaltdel.8
new file mode 100644
index 0000000..a44ad19
--- /dev/null
+++ b/sys-utils/ctrlaltdel.8
@@ -0,0 +1,58 @@
+.\" Copyright 1992, 1993 Rickard E. Faith (faith@cs.unc.edu)
+.\" May be distributed under the GNU General Public License
+.TH CTRLALTDEL 8 "October 2015" "util-linux" "System Administration"
+.SH NAME
+ctrlaltdel \- set the function of the Ctrl-Alt-Del combination
+.SH SYNOPSIS
+.BR "ctrlaltdel hard" | soft
+.SH DESCRIPTION
+Based on examination of the
+.I linux/kernel/reboot.c
+code, it is clear that there are two supported functions that the
+Ctrl-Alt-Del sequence can perform.
+.TP
+.B hard
+Immediately reboot the computer without calling
+.BR sync (2)
+and without any other preparation. This is the default.
+.TP
+.B soft
+Make the kernel send the SIGINT (interrupt) signal to the
+.B init
+process (this is always the process with PID 1). If this option is used,
+the
+.BR init (8)
+program must support this feature. Since there are now several
+.BR init (8)
+programs in the Linux community, please consult the documentation for the
+version that you are currently using.
+.PP
+When the command is run without any argument, it will display the current
+setting.
+.PP
+The function of
+.B ctrlaltdel
+is usually set in the
+.I /etc/rc.local
+file.
+.SH OPTIONS
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+Display version information and exit.
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+Display help text and exit.
+.SH FILES
+.I /etc/rc.local
+.SH "SEE ALSO"
+.BR init (8),
+.BR systemd (1)
+.SH AUTHOR
+.UR poe@daimi.aau.dk
+Peter Orbaek
+.UE
+.SH AVAILABILITY
+The ctrlaltdel command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/ctrlaltdel.c b/sys-utils/ctrlaltdel.c
new file mode 100644
index 0000000..ea662c4
--- /dev/null
+++ b/sys-utils/ctrlaltdel.c
@@ -0,0 +1,114 @@
+/*
+ * ctrlaltdel.c - Set the function of the Ctrl-Alt-Del combination
+ * Created 4-Jul-92 by Peter Orbaek <poe@daimi.aau.dk>
+ * 1999-02-22 Arkadiusz Miśkiewicz <misiek@pld.ORG.PL>
+ * - added Native Language Support
+ */
+
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/reboot.h>
+#include "nls.h"
+#include "c.h"
+#include "closestream.h"
+#include "pathnames.h"
+#include "path.h"
+
+#define LINUX_REBOOT_CMD_CAD_ON 0x89ABCDEF
+#define LINUX_REBOOT_CMD_CAD_OFF 0x00000000
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ fputs(USAGE_HEADER, out);
+ fprintf(out, _(" %s hard|soft\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fprintf(out, _("Set the function of the Ctrl-Alt-Del combination.\n"));
+
+ fputs(USAGE_OPTIONS, out);
+ printf(USAGE_HELP_OPTIONS(16));
+ printf(USAGE_MAN_TAIL("ctrlaltdel(8)"));
+ exit(EXIT_SUCCESS);
+}
+
+static int get_cad(void)
+{
+ uint64_t val;
+
+ if (ul_path_read_u64(NULL, &val, _PATH_PROC_CTRL_ALT_DEL) != 0)
+ err(EXIT_FAILURE, _("cannot read %s"), _PATH_PROC_CTRL_ALT_DEL);
+
+ switch (val) {
+ case 0:
+ fputs("soft\n", stdout);
+ break;
+ case 1:
+ fputs("hard\n", stdout);
+ break;
+ default:
+ printf("%s hard\n", _("implicit"));
+ warnx(_("unexpected value in %s: %ju"), _PATH_PROC_CTRL_ALT_DEL, val);
+ return EXIT_FAILURE;
+ }
+ return EXIT_SUCCESS;
+}
+
+static int set_cad(const char *arg)
+{
+ unsigned int cmd;
+
+ if (geteuid()) {
+ warnx(_("You must be root to set the Ctrl-Alt-Del behavior"));
+ return EXIT_FAILURE;
+ }
+ if (!strcmp("hard", arg))
+ cmd = LINUX_REBOOT_CMD_CAD_ON;
+ else if (!strcmp("soft", arg))
+ cmd = LINUX_REBOOT_CMD_CAD_OFF;
+ else {
+ warnx(_("unknown argument: %s"), arg);
+ return EXIT_FAILURE;
+ }
+ if (reboot(cmd) < 0) {
+ warnx("reboot");
+ return EXIT_FAILURE;
+ }
+ return EXIT_SUCCESS;
+}
+
+int main(int argc, char **argv)
+{
+ int ch, ret;
+ static const struct option longopts[] = {
+ {"version", no_argument, NULL, 'V'},
+ {"help", no_argument, NULL, 'h'},
+ {NULL, 0, NULL, 0}
+ };
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ while ((ch = getopt_long(argc, argv, "Vh", longopts, NULL)) != -1)
+ switch (ch) {
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ case 'h':
+ usage();
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+
+ if (argc < 2)
+ ret = get_cad();
+ else
+ ret = set_cad(argv[1]);
+ return ret;
+}
diff --git a/sys-utils/dmesg.1 b/sys-utils/dmesg.1
new file mode 100644
index 0000000..a93821a
--- /dev/null
+++ b/sys-utils/dmesg.1
@@ -0,0 +1,256 @@
+.\" Copyright 1993 Rickard E. Faith (faith@cs.unc.edu)
+.\" May be distributed under the GNU General Public License
+.TH DMESG "1" "July 2012" "util-linux" "User Commands"
+.SH NAME
+dmesg \- print or control the kernel ring buffer
+.SH SYNOPSIS
+.B dmesg
+[options]
+.sp
+.B dmesg \-\-clear
+.br
+.BR "dmesg \-\-read\-clear " [options]
+.br
+.BI "dmesg \-\-console\-level " level
+.br
+.B dmesg \-\-console\-on
+.br
+.B dmesg \-\-console\-off
+.SH DESCRIPTION
+.B dmesg
+is used to examine or control the kernel ring buffer.
+.PP
+The default action is to display all messages from the kernel ring buffer.
+.SH OPTIONS
+The
+.BR \-\-clear ,
+.BR \-\-read\-clear ,
+.BR \-\-console\-on ,
+.BR \-\-console\-off ,
+and
+.B \-\-console\-level
+options are mutually exclusive.
+.PP
+.IP "\fB\-C\fR, \fB\-\-clear\fR"
+Clear the ring buffer.
+.IP "\fB\-c\fR, \fB\-\-read\-clear\fR"
+Clear the ring buffer after first printing its contents.
+.IP "\fB\-D\fR, \fB\-\-console\-off\fR"
+Disable the printing of messages to the console.
+.IP "\fB\-d\fR, \fB\-\-show\-delta\fR"
+Display the timestamp and the time delta spent between messages. If used
+together with
+.B \-\-notime
+then only the time delta without the timestamp is printed.
+.IP "\fB\-E\fR, \fB\-\-console\-on\fR"
+Enable printing messages to the console.
+.IP "\fB\-e\fR, \fB\-\-reltime\fR"
+Display the local time and the delta in human-readable format. Be aware that
+conversion to the local time could be inaccurate (see \fB\-T\fR for more
+details).
+.IP "\fB\-F\fR, \fB\-\-file \fIfile\fR"
+Read the syslog messages from the given
+.IR file .
+Note that \fB\-F\fR does not support messages in kmsg format. The old syslog format is supported only.
+.IP "\fB\-f\fR, \fB\-\-facility \fIlist\fR"
+Restrict output to the given (comma-separated)
+.I list
+of facilities. For example:
+.PP
+.RS 14
+.B dmesg \-\-facility=daemon
+.RE
+.IP
+will print messages from system daemons only. For all supported facilities
+see the
+.B \-\-help
+output.
+.IP "\fB\-H\fR, \fB\-\-human\fR"
+Enable human-readable output. See also \fB\-\-color\fR, \fB\-\-reltime\fR
+and \fB\-\-nopager\fR.
+.IP "\fB\-k\fR, \fB\-\-kernel\fR"
+Print kernel messages.
+.IP "\fB\-L\fR, \fB\-\-color\fR[=\fIwhen\fR]"
+Colorize the output. The optional argument \fIwhen\fP
+can be \fBauto\fR, \fBnever\fR or \fBalways\fR. If the \fIwhen\fR argument is omitted,
+it defaults to \fBauto\fR. The colors can be disabled; for the current built-in default
+see the \fB\-\-help\fR output. See also the \fBCOLORS\fR section below.
+.IP "\fB\-l\fR, \fB\-\-level \fIlist\fR"
+Restrict output to the given (comma-separated)
+.I list
+of levels. For example:
+.PP
+.RS 14
+.B dmesg \-\-level=err,warn
+.RE
+.IP
+will print error and warning messages only. For all supported levels see the
+.B \-\-help
+output.
+.IP "\fB\-n\fR, \fB\-\-console\-level \fIlevel\fR
+Set the
+.I level
+at which printing of messages is done to the console. The
+.I level
+is a level number or abbreviation of the level name. For all supported
+levels see the
+.B \-\-help
+output.
+.sp
+For example,
+.B \-n 1
+or
+.B \-n emerg
+prevents all messages, except emergency (panic) messages, from appearing on
+the console. All levels of messages are still written to
+.IR /proc/kmsg ,
+so
+.BR syslogd (8)
+can still be used to control exactly where kernel messages appear. When the
+.B \-n
+option is used,
+.B dmesg
+will
+.I not
+print or clear the kernel ring buffer.
+.IP "\fB\-P\fR, \fB\-\-nopager\fR"
+Do not pipe output into a pager. A pager is enabled by default for \fB\-\-human\fR output.
+.IP "\fB\-p\fR, \fB\-\-force\-prefix\fR"
+Add facility, level or timestamp information to each line of a multi-line message.
+.IP "\fB\-r\fR, \fB\-\-raw\fR"
+Print the raw message buffer, i.e. do not strip the log-level prefixes.
+
+Note that the real raw format depends on the method how
+.BR dmesg (1)
+reads kernel messages. The /dev/kmsg device uses a different format than
+.BR syslog (2).
+For backward compatibility,
+.BR dmesg (1)
+returns data always in the
+.BR syslog (2)
+format. It is possible to read the real raw data from /dev/kmsg by, for example,
+the command 'dd if=/dev/kmsg iflag=nonblock'.
+.IP "\fB\-S\fR, \fB\-\-syslog\fR"
+Force \fBdmesg\fR to use the
+.BR syslog (2)
+kernel interface to read kernel messages. The default is to use /dev/kmsg rather
+than
+.BR syslog (2)
+since kernel 3.5.0.
+.IP "\fB\-s\fR, \fB\-\-buffer\-size \fIsize\fR
+Use a buffer of
+.I size
+to query the kernel ring buffer. This is 16392 by default. (The default
+kernel syslog buffer size was 4096 at first, 8192 since 1.3.54, 16384 since
+2.1.113.) If you have set the kernel buffer to be larger than the default,
+then this option can be used to view the entire buffer.
+.IP "\fB\-T\fR, \fB\-\-ctime\fR"
+Print human-readable timestamps.
+.IP
+.B Be aware that the timestamp could be inaccurate!
+The
+.B time
+source used for the logs is
+.B not updated after
+system
+.BR SUSPEND / RESUME .
+.IP "\fB\-t\fR, \fB\-\-notime\fR"
+Do not print kernel's timestamps.
+.IP "\fB\-\-time\-format\fR \fIformat\fR"
+Print timestamps using the given \fIformat\fR, which can be
+.BR ctime ,
+.BR reltime ,
+.B delta
+or
+.BR iso .
+The first three formats are aliases of the time-format-specific options.
+The
+.B iso
+format is a
+.B dmesg
+implementation of the ISO-8601 timestamp format. The purpose of this format is
+to make the comparing of timestamps between two systems, and any other parsing,
+easy. The definition of the \fBiso\fR timestamp is:
+YYYY-MM-DD<T>HH:MM:SS,<microseconds><-+><timezone offset from UTC>.
+.IP
+The
+.B iso
+format has the same issue as
+.BR ctime :
+the time may be inaccurate when a system is suspended and resumed.
+.TP
+.BR \-u , " \-\-userspace"
+Print userspace messages.
+.TP
+.BR \-w , " \-\-follow"
+Wait for new messages. This feature is supported only on systems with
+a readable /dev/kmsg (since kernel 3.5.0).
+.TP
+.BR \-x , " \-\-decode"
+Decode facility and level (priority) numbers to human-readable prefixes.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH COLORS
+Implicit coloring can be disabled by an empty file \fI/etc/terminal-colors.d/dmesg.disable\fR.
+See
+.BR terminal-colors.d (5)
+for more details about colorization configuration.
+.PP
+The logical color names supported by
+.B dmesg
+are:
+.TP
+.B subsys
+The message sub-system prefix (e.g. "ACPI:").
+.TP
+.B time
+The message timestamp.
+.TP
+.B timebreak
+The message timestamp in short ctime format in \fB\-\-reltime\fR
+or \fB\-\-human\fR output.
+.TP
+.B alert
+The text of the message with the alert log priority.
+.TP
+.B crit
+The text of the message with the critical log priority.
+.TP
+.B err
+The text of the message with the error log priority.
+.TP
+.B warn
+The text of the message with the warning log priority.
+.TP
+.B segfault
+The text of the message that inform about segmentation fault.
+.SH EXIT STATUS
+.B dmesg
+can fail reporting permission denied error. This is usually caused by
+.B dmesg_restrict
+kernel setting, please see
+.BR syslog (2)
+for more details.
+.SH SEE ALSO
+.BR terminal-colors.d (5),
+.BR syslogd (8)
+.SH AUTHORS
+.MT kzak@redhat.com
+Karel Zak
+.ME
+
+.br
+.B dmesg
+was originally written by
+.MT tytso@athena.mit.edu
+Theodore Ts'o
+.ME
+.SH AVAILABILITY
+The dmesg command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/dmesg.c b/sys-utils/dmesg.c
new file mode 100644
index 0000000..ba4e225
--- /dev/null
+++ b/sys-utils/dmesg.c
@@ -0,0 +1,1547 @@
+/*
+ * dmesg.c -- Print out the contents of the kernel ring buffer
+ *
+ * Copyright (C) 1993 Theodore Ts'o <tytso@athena.mit.edu>
+ * Copyright (C) 2011 Karel Zak <kzak@redhat.com>
+ *
+ * This program comes with ABSOLUTELY NO WARRANTY.
+ */
+#include <stdio.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include <sys/klog.h>
+#include <sys/syslog.h>
+#include <sys/time.h>
+#include <sys/sysinfo.h>
+#include <ctype.h>
+#include <time.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "c.h"
+#include "colors.h"
+#include "nls.h"
+#include "strutils.h"
+#include "xalloc.h"
+#include "widechar.h"
+#include "all-io.h"
+#include "bitops.h"
+#include "closestream.h"
+#include "optutils.h"
+#include "timeutils.h"
+#include "monotonic.h"
+#include "mangle.h"
+#include "pager.h"
+
+/* Close the log. Currently a NOP. */
+#define SYSLOG_ACTION_CLOSE 0
+/* Open the log. Currently a NOP. */
+#define SYSLOG_ACTION_OPEN 1
+/* Read from the log. */
+#define SYSLOG_ACTION_READ 2
+/* Read all messages remaining in the ring buffer. (allowed for non-root) */
+#define SYSLOG_ACTION_READ_ALL 3
+/* Read and clear all messages remaining in the ring buffer */
+#define SYSLOG_ACTION_READ_CLEAR 4
+/* Clear ring buffer. */
+#define SYSLOG_ACTION_CLEAR 5
+/* Disable printk's to console */
+#define SYSLOG_ACTION_CONSOLE_OFF 6
+/* Enable printk's to console */
+#define SYSLOG_ACTION_CONSOLE_ON 7
+/* Set level of messages printed to console */
+#define SYSLOG_ACTION_CONSOLE_LEVEL 8
+/* Return number of unread characters in the log buffer */
+#define SYSLOG_ACTION_SIZE_UNREAD 9
+/* Return size of the log buffer */
+#define SYSLOG_ACTION_SIZE_BUFFER 10
+
+/*
+ * Color scheme
+ */
+struct dmesg_color {
+ const char *scheme; /* name used in termina-colors.d/dmesg.scheme */
+ const char *dflt; /* default color ESC sequence */
+};
+
+enum {
+ DMESG_COLOR_SUBSYS,
+ DMESG_COLOR_TIME,
+ DMESG_COLOR_TIMEBREAK,
+ DMESG_COLOR_ALERT,
+ DMESG_COLOR_CRIT,
+ DMESG_COLOR_ERR,
+ DMESG_COLOR_WARN,
+ DMESG_COLOR_SEGFAULT
+};
+
+static const struct dmesg_color colors[] =
+{
+ [DMESG_COLOR_SUBSYS] = { "subsys", UL_COLOR_BROWN },
+ [DMESG_COLOR_TIME] = { "time", UL_COLOR_GREEN },
+ [DMESG_COLOR_TIMEBREAK] = { "timebreak",UL_COLOR_GREEN UL_COLOR_BOLD },
+ [DMESG_COLOR_ALERT] = { "alert", UL_COLOR_REVERSE UL_COLOR_RED },
+ [DMESG_COLOR_CRIT] = { "crit", UL_COLOR_BOLD UL_COLOR_RED },
+ [DMESG_COLOR_ERR] = { "err", UL_COLOR_RED },
+ [DMESG_COLOR_WARN] = { "warn", UL_COLOR_BOLD },
+ [DMESG_COLOR_SEGFAULT] = { "segfault", UL_COLOR_HALFBRIGHT UL_COLOR_RED }
+};
+
+#define dmesg_enable_color(_id) \
+ color_scheme_enable(colors[_id].scheme, colors[_id].dflt);
+
+/*
+ * Priority and facility names
+ */
+struct dmesg_name {
+ const char *name;
+ const char *help;
+};
+
+/*
+ * Priority names -- based on sys/syslog.h
+ */
+static const struct dmesg_name level_names[] =
+{
+ [LOG_EMERG] = { "emerg", N_("system is unusable") },
+ [LOG_ALERT] = { "alert", N_("action must be taken immediately") },
+ [LOG_CRIT] = { "crit", N_("critical conditions") },
+ [LOG_ERR] = { "err", N_("error conditions") },
+ [LOG_WARNING] = { "warn", N_("warning conditions") },
+ [LOG_NOTICE] = { "notice",N_("normal but significant condition") },
+ [LOG_INFO] = { "info", N_("informational") },
+ [LOG_DEBUG] = { "debug", N_("debug-level messages") }
+};
+
+/*
+ * sys/syslog.h uses (f << 3) for all facility codes.
+ * We want to use the codes as array indexes, so shift back...
+ *
+ * Note that libc LOG_FAC() macro returns the base codes, not the
+ * shifted code :-)
+ */
+#define FAC_BASE(f) ((f) >> 3)
+
+static const struct dmesg_name facility_names[] =
+{
+ [FAC_BASE(LOG_KERN)] = { "kern", N_("kernel messages") },
+ [FAC_BASE(LOG_USER)] = { "user", N_("random user-level messages") },
+ [FAC_BASE(LOG_MAIL)] = { "mail", N_("mail system") },
+ [FAC_BASE(LOG_DAEMON)] = { "daemon", N_("system daemons") },
+ [FAC_BASE(LOG_AUTH)] = { "auth", N_("security/authorization messages") },
+ [FAC_BASE(LOG_SYSLOG)] = { "syslog", N_("messages generated internally by syslogd") },
+ [FAC_BASE(LOG_LPR)] = { "lpr", N_("line printer subsystem") },
+ [FAC_BASE(LOG_NEWS)] = { "news", N_("network news subsystem") },
+ [FAC_BASE(LOG_UUCP)] = { "uucp", N_("UUCP subsystem") },
+ [FAC_BASE(LOG_CRON)] = { "cron", N_("clock daemon") },
+ [FAC_BASE(LOG_AUTHPRIV)] = { "authpriv", N_("security/authorization messages (private)") },
+ [FAC_BASE(LOG_FTP)] = { "ftp", N_("FTP daemon") },
+};
+
+/* supported methods to read message buffer
+ */
+enum {
+ DMESG_METHOD_KMSG, /* read messages from /dev/kmsg (default) */
+ DMESG_METHOD_SYSLOG, /* klogctl() buffer */
+ DMESG_METHOD_MMAP /* mmap file with records (see --file) */
+};
+
+enum {
+ DMESG_TIMEFTM_NONE = 0,
+ DMESG_TIMEFTM_CTIME, /* [ctime] */
+ DMESG_TIMEFTM_CTIME_DELTA, /* [ctime <delta>] */
+ DMESG_TIMEFTM_DELTA, /* [<delta>] */
+ DMESG_TIMEFTM_RELTIME, /* [relative] */
+ DMESG_TIMEFTM_TIME, /* [time] */
+ DMESG_TIMEFTM_TIME_DELTA, /* [time <delta>] */
+ DMESG_TIMEFTM_ISO8601 /* 2013-06-13T22:11:00,123456+0100 */
+};
+#define is_timefmt(c, f) ((c)->time_fmt == (DMESG_TIMEFTM_ ##f))
+
+struct dmesg_control {
+ /* bit arrays -- see include/bitops.h */
+ char levels[ARRAY_SIZE(level_names) / NBBY + 1];
+ char facilities[ARRAY_SIZE(facility_names) / NBBY + 1];
+
+ struct timeval lasttime; /* last printed timestamp */
+ struct tm lasttm; /* last localtime */
+ struct timeval boot_time; /* system boot time */
+
+ int action; /* SYSLOG_ACTION_* */
+ int method; /* DMESG_METHOD_* */
+
+ size_t bufsize; /* size of syslog buffer */
+
+ int kmsg; /* /dev/kmsg file descriptor */
+ ssize_t kmsg_first_read;/* initial read() return code */
+ char kmsg_buf[BUFSIZ];/* buffer to read kmsg data */
+
+ /*
+ * For the --file option we mmap whole file. The unnecessary (already
+ * printed) pages are always unmapped. The result is that we have in
+ * memory only the currently used page(s).
+ */
+ char *filename;
+ char *mmap_buff;
+ size_t pagesize;
+ unsigned int time_fmt; /* time format */
+
+ unsigned int follow:1, /* wait for new messages */
+ raw:1, /* raw mode */
+ fltr_lev:1, /* filter out by levels[] */
+ fltr_fac:1, /* filter out by facilities[] */
+ decode:1, /* use "facility: level: " prefix */
+ pager:1, /* pipe output into a pager */
+ color:1, /* colorize messages */
+ force_prefix:1; /* force timestamp and decode prefix
+ on each line */
+ int indent; /* due to timestamps if newline */
+};
+
+struct dmesg_record {
+ const char *mesg;
+ size_t mesg_size;
+
+ int level;
+ int facility;
+ struct timeval tv;
+
+ const char *next; /* buffer with next unparsed record */
+ size_t next_size; /* size of the next buffer */
+};
+
+#define INIT_DMESG_RECORD(_r) do { \
+ (_r)->mesg = NULL; \
+ (_r)->mesg_size = 0; \
+ (_r)->facility = -1; \
+ (_r)->level = -1; \
+ (_r)->tv.tv_sec = 0; \
+ (_r)->tv.tv_usec = 0; \
+ } while (0)
+
+static int read_kmsg(struct dmesg_control *ctl);
+
+static int set_level_color(int log_level, const char *mesg, size_t mesgsz)
+{
+ int id = -1;
+
+ switch (log_level) {
+ case LOG_ALERT:
+ id = DMESG_COLOR_ALERT;
+ break;
+ case LOG_CRIT:
+ id = DMESG_COLOR_CRIT;
+ break;
+ case LOG_ERR:
+ id = DMESG_COLOR_ERR;
+ break;
+ case LOG_WARNING:
+ id = DMESG_COLOR_WARN;
+ break;
+ default:
+ break;
+ }
+
+ /* well, sometimes the messages contains important keywords, but in
+ * non-warning/error messages
+ */
+ if (id < 0 && memmem(mesg, mesgsz, "segfault at", 11))
+ id = DMESG_COLOR_SEGFAULT;
+
+ if (id >= 0)
+ dmesg_enable_color(id);
+
+ return id >= 0 ? 0 : -1;
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ size_t i;
+
+ fputs(USAGE_HEADER, out);
+ fprintf(out, _(" %s [options]\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Display or control the kernel ring buffer.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -C, --clear clear the kernel ring buffer\n"), out);
+ fputs(_(" -c, --read-clear read and clear all messages\n"), out);
+ fputs(_(" -D, --console-off disable printing messages to console\n"), out);
+ fputs(_(" -E, --console-on enable printing messages to console\n"), out);
+ fputs(_(" -F, --file <file> use the file instead of the kernel log buffer\n"), out);
+ fputs(_(" -f, --facility <list> restrict output to defined facilities\n"), out);
+ fputs(_(" -H, --human human readable output\n"), out);
+ fputs(_(" -k, --kernel display kernel messages\n"), out);
+ fputs(_(" -L, --color[=<when>] colorize messages (auto, always or never)\n"), out);
+ fprintf(out,
+ " %s\n", USAGE_COLORS_DEFAULT);
+ fputs(_(" -l, --level <list> restrict output to defined levels\n"), out);
+ fputs(_(" -n, --console-level <level> set level of messages printed to console\n"), out);
+ fputs(_(" -P, --nopager do not pipe output into a pager\n"), out);
+ fputs(_(" -p, --force-prefix force timestamp output on each line of multi-line messages\n"), out);
+ fputs(_(" -r, --raw print the raw message buffer\n"), out);
+ fputs(_(" -S, --syslog force to use syslog(2) rather than /dev/kmsg\n"), out);
+ fputs(_(" -s, --buffer-size <size> buffer size to query the kernel ring buffer\n"), out);
+ fputs(_(" -u, --userspace display userspace messages\n"), out);
+ fputs(_(" -w, --follow wait for new messages\n"), out);
+ fputs(_(" -x, --decode decode facility and level to readable string\n"), out);
+ fputs(_(" -d, --show-delta show time delta between printed messages\n"), out);
+ fputs(_(" -e, --reltime show local time and time delta in readable format\n"), out);
+ fputs(_(" -T, --ctime show human-readable timestamp (may be inaccurate!)\n"), out);
+ fputs(_(" -t, --notime don't show any timestamp with messages\n"), out);
+ fputs(_(" --time-format <format> show timestamp using the given format:\n"
+ " [delta|reltime|ctime|notime|iso]\n"
+ "Suspending/resume will make ctime and iso timestamps inaccurate.\n"), out);
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(29));
+ fputs(_("\nSupported log facilities:\n"), out);
+ for (i = 0; i < ARRAY_SIZE(level_names); i++)
+ fprintf(out, " %7s - %s\n",
+ facility_names[i].name,
+ _(facility_names[i].help));
+
+ fputs(_("\nSupported log levels (priorities):\n"), out);
+ for (i = 0; i < ARRAY_SIZE(level_names); i++)
+ fprintf(out, " %7s - %s\n",
+ level_names[i].name,
+ _(level_names[i].help));
+
+ printf(USAGE_MAN_TAIL("dmesg(1)"));
+ exit(EXIT_SUCCESS);
+}
+
+/*
+ * LEVEL ::= <number> | <name>
+ * <number> ::= @len is set: number in range <0..N>, where N < ARRAY_SIZE(level_names)
+ * ::= @len not set: number in range <1..N>, where N <= ARRAY_SIZE(level_names)
+ * <name> ::= case-insensitive text
+ *
+ * Note that @len argument is not set when parsing "-n <level>" command line
+ * option. The console_level is interpreted as "log level less than the value".
+ *
+ * For example "dmesg -n 8" or "dmesg -n debug" enables debug console log
+ * level by klogctl(SYSLOG_ACTION_CONSOLE_LEVEL, NULL, 8). The @str argument
+ * has to be parsed to number in range <1..8>.
+ */
+static int parse_level(const char *str, size_t len)
+{
+ int offset = 0;
+
+ if (!str)
+ return -1;
+ if (!len) {
+ len = strlen(str);
+ offset = 1;
+ }
+ errno = 0;
+
+ if (isdigit(*str)) {
+ char *end = NULL;
+ long x = strtol(str, &end, 10) - offset;
+
+ if (!errno && end && end > str && (size_t) (end - str) == len &&
+ x >= 0 && (size_t) x < ARRAY_SIZE(level_names))
+ return x + offset;
+ } else {
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(level_names); i++) {
+ const char *n = level_names[i].name;
+
+ if (strncasecmp(str, n, len) == 0 && *(n + len) == '\0')
+ return i + offset;
+ }
+ }
+
+ if (errno)
+ err(EXIT_FAILURE, _("failed to parse level '%s'"), str);
+
+ errx(EXIT_FAILURE, _("unknown level '%s'"), str);
+ return -1;
+}
+
+/*
+ * FACILITY ::= <number> | <name>
+ * <number> ::= number in range <0..N>, where N < ARRAY_SIZE(facility_names)
+ * <name> ::= case-insensitive text
+ */
+static int parse_facility(const char *str, size_t len)
+{
+ if (!str)
+ return -1;
+ if (!len)
+ len = strlen(str);
+ errno = 0;
+
+ if (isdigit(*str)) {
+ char *end = NULL;
+ long x = strtol(str, &end, 10);
+
+ if (!errno && end && end > str && (size_t) (end - str) == len &&
+ x >= 0 && (size_t) x < ARRAY_SIZE(facility_names))
+ return x;
+ } else {
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(facility_names); i++) {
+ const char *n = facility_names[i].name;
+
+ if (strncasecmp(str, n, len) == 0 && *(n + len) == '\0')
+ return i;
+ }
+ }
+
+ if (errno)
+ err(EXIT_FAILURE, _("failed to parse facility '%s'"), str);
+
+ errx(EXIT_FAILURE, _("unknown facility '%s'"), str);
+ return -1;
+}
+
+/*
+ * Parses numerical prefix used for all messages in kernel ring buffer.
+ *
+ * Priorities/facilities are encoded into a single 32-bit quantity, where the
+ * bottom 3 bits are the priority (0-7) and the top 28 bits are the facility
+ * (0-big number).
+ *
+ * Note that the number has to end with '>' or ',' char.
+ */
+static const char *parse_faclev(const char *str, int *fac, int *lev)
+{
+ long num;
+ char *end = NULL;
+
+ if (!str)
+ return str;
+
+ errno = 0;
+ num = strtol(str, &end, 10);
+
+ if (!errno && end && end > str) {
+ *fac = LOG_FAC(num);
+ *lev = LOG_PRI(num);
+
+ if (*lev < 0 || (size_t) *lev > ARRAY_SIZE(level_names))
+ *lev = -1;
+ if (*fac < 0 || (size_t) *fac > ARRAY_SIZE(facility_names))
+ *fac = -1;
+ return end + 1; /* skip '<' or ',' */
+ }
+
+ return str;
+}
+
+/*
+ * Parses timestamp from syslog message prefix, expected format:
+ *
+ * seconds.microseconds]
+ *
+ * the ']' is the timestamp field terminator.
+ */
+static const char *parse_syslog_timestamp(const char *str0, struct timeval *tv)
+{
+ const char *str = str0;
+ char *end = NULL;
+
+ if (!str0)
+ return str0;
+
+ errno = 0;
+ tv->tv_sec = strtol(str, &end, 10);
+
+ if (!errno && end && *end == '.' && *(end + 1)) {
+ str = end + 1;
+ end = NULL;
+ tv->tv_usec = strtol(str, &end, 10);
+ }
+ if (errno || !end || end == str || *end != ']')
+ return str0;
+
+ return end + 1; /* skip ']' */
+}
+
+/*
+ * Parses timestamp from /dev/kmsg, expected formats:
+ *
+ * microseconds,
+ * microseconds;
+ *
+ * the ',' is fields separators and ';' items terminator (for the last item)
+ */
+static const char *parse_kmsg_timestamp(const char *str0, struct timeval *tv)
+{
+ const char *str = str0;
+ char *end = NULL;
+ uint64_t usec;
+
+ if (!str0)
+ return str0;
+
+ errno = 0;
+ usec = strtoumax(str, &end, 10);
+
+ if (!errno && end && (*end == ';' || *end == ',')) {
+ tv->tv_usec = usec % 1000000;
+ tv->tv_sec = usec / 1000000;
+ } else
+ return str0;
+
+ return end + 1; /* skip separator */
+}
+
+
+static double time_diff(struct timeval *a, struct timeval *b)
+{
+ return (a->tv_sec - b->tv_sec) + (a->tv_usec - b->tv_usec) / 1E6;
+}
+
+static int get_syslog_buffer_size(void)
+{
+ int n = klogctl(SYSLOG_ACTION_SIZE_BUFFER, NULL, 0);
+
+ return n > 0 ? n : 0;
+}
+
+/*
+ * Reads messages from regular file by mmap
+ */
+static ssize_t mmap_file_buffer(struct dmesg_control *ctl, char **buf)
+{
+ struct stat st;
+ int fd;
+
+ if (!ctl->filename)
+ return -1;
+
+ fd = open(ctl->filename, O_RDONLY);
+ if (fd < 0)
+ err(EXIT_FAILURE, _("cannot open %s"), ctl->filename);
+ if (fstat(fd, &st))
+ err(EXIT_FAILURE, _("stat of %s failed"), ctl->filename);
+
+ *buf = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
+ if (*buf == MAP_FAILED)
+ err(EXIT_FAILURE, _("cannot mmap: %s"), ctl->filename);
+ ctl->mmap_buff = *buf;
+ ctl->pagesize = getpagesize();
+ close(fd);
+
+ return st.st_size;
+}
+
+/*
+ * Reads messages from kernel ring buffer by klogctl()
+ */
+static ssize_t read_syslog_buffer(struct dmesg_control *ctl, char **buf)
+{
+ size_t sz;
+ int rc = -1;
+
+ if (ctl->bufsize) {
+ sz = ctl->bufsize + 8;
+ *buf = xmalloc(sz * sizeof(char));
+ rc = klogctl(ctl->action, *buf, sz);
+ } else {
+ sz = 16392;
+ while (1) {
+ *buf = xmalloc(sz * sizeof(char));
+ rc = klogctl(SYSLOG_ACTION_READ_ALL, *buf, sz);
+ if (rc < 0)
+ break;
+ if ((size_t) rc != sz || sz > (1 << 28))
+ break;
+ free(*buf);
+ *buf = NULL;
+ sz *= 4;
+ }
+
+ if (rc > 0 && ctl->action == SYSLOG_ACTION_READ_CLEAR)
+ rc = klogctl(SYSLOG_ACTION_READ_CLEAR, *buf, sz);
+ }
+
+ return rc;
+}
+
+/*
+ * Top level function to read messages
+ */
+static ssize_t read_buffer(struct dmesg_control *ctl, char **buf)
+{
+ ssize_t n = -1;
+
+ switch (ctl->method) {
+ case DMESG_METHOD_MMAP:
+ n = mmap_file_buffer(ctl, buf);
+ break;
+ case DMESG_METHOD_SYSLOG:
+ if (!ctl->bufsize)
+ ctl->bufsize = get_syslog_buffer_size();
+
+ n = read_syslog_buffer(ctl, buf);
+ break;
+ case DMESG_METHOD_KMSG:
+ /*
+ * Since kernel 3.5.0
+ */
+ n = read_kmsg(ctl);
+ if (n == 0 && ctl->action == SYSLOG_ACTION_READ_CLEAR)
+ n = klogctl(SYSLOG_ACTION_CLEAR, NULL, 0);
+ break;
+ default:
+ abort(); /* impossible method -> drop core */
+ }
+
+ return n;
+}
+
+static int fwrite_hex(const char *buf, size_t size, FILE *out)
+{
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ int rc = fprintf(out, "\\x%02hhx", buf[i]);
+ if (rc < 0)
+ return rc;
+ }
+ return 0;
+}
+
+/*
+ * Prints to 'out' and non-printable chars are replaced with \x<hex> sequences.
+ */
+static void safe_fwrite(const char *buf, size_t size, int indent, FILE *out)
+{
+ size_t i;
+#ifdef HAVE_WIDECHAR
+ mbstate_t s;
+ memset(&s, 0, sizeof (s));
+#endif
+ for (i = 0; i < size; i++) {
+ const char *p = buf + i;
+ int rc, hex = 0;
+ size_t len;
+
+#ifdef HAVE_WIDECHAR
+ wchar_t wc;
+ len = mbrtowc(&wc, p, size - i, &s);
+
+ if (len == 0) /* L'\0' */
+ return;
+
+ if (len == (size_t)-1 || len == (size_t)-2) { /* invalid sequence */
+ memset(&s, 0, sizeof (s));
+ len = hex = 1;
+ } else if (len > 1 && !iswprint(wc)) { /* non-printable multibyte */
+ hex = 1;
+ }
+ i += len - 1;
+#else
+ len = 1;
+ if (!isprint((unsigned char) *p) &&
+ !isspace((unsigned char) *p)) /* non-printable */
+ hex = 1;
+#endif
+ if (hex)
+ rc = fwrite_hex(p, len, out);
+ else if (*p == '\n' && *(p + 1) && indent) {
+ rc = fwrite(p, 1, len, out) != len;
+ if (fprintf(out, "%*s", indent, "") != indent)
+ rc |= 1;
+ }
+ else
+ rc = fwrite(p, 1, len, out) != len;
+ if (rc != 0) {
+ if (errno != EPIPE)
+ err(EXIT_FAILURE, _("write failed"));
+ exit(EXIT_SUCCESS);
+ }
+ }
+}
+
+static const char *skip_item(const char *begin, const char *end, const char *sep)
+{
+ while (begin < end) {
+ int c = *begin++;
+
+ if (c == '\0' || strchr(sep, c))
+ break;
+ }
+
+ return begin;
+}
+
+/*
+ * Parses one record from syslog(2) buffer
+ */
+static int get_next_syslog_record(struct dmesg_control *ctl,
+ struct dmesg_record *rec)
+{
+ size_t i;
+ const char *begin = NULL;
+
+ if (ctl->method != DMESG_METHOD_MMAP &&
+ ctl->method != DMESG_METHOD_SYSLOG)
+ return -1;
+
+ if (!rec->next || !rec->next_size)
+ return 1;
+
+ INIT_DMESG_RECORD(rec);
+
+ /*
+ * Unmap already printed file data from memory
+ */
+ if (ctl->mmap_buff && (size_t) (rec->next - ctl->mmap_buff) > ctl->pagesize) {
+ void *x = ctl->mmap_buff;
+
+ ctl->mmap_buff += ctl->pagesize;
+ munmap(x, ctl->pagesize);
+ }
+
+ for (i = 0; i < rec->next_size; i++) {
+ const char *p = rec->next + i;
+ const char *end = NULL;
+
+ if (!begin)
+ begin = p;
+ if (i + 1 == rec->next_size) {
+ end = p + 1;
+ i++;
+ } else if (*p == '\n' && *(p + 1) == '<')
+ end = p;
+
+ if (begin && !*begin)
+ begin = NULL; /* zero(s) at the end of the buffer? */
+ if (!begin || !end)
+ continue;
+ if (end <= begin)
+ continue; /* error or empty line? */
+
+ if (*begin == '<') {
+ if (ctl->fltr_lev || ctl->fltr_fac || ctl->decode || ctl->color)
+ begin = parse_faclev(begin + 1, &rec->facility,
+ &rec->level);
+ else
+ begin = skip_item(begin, end, ">");
+ }
+
+ if (*begin == '[' && (*(begin + 1) == ' ' ||
+ isdigit(*(begin + 1)))) {
+
+ if (!is_timefmt(ctl, NONE))
+ begin = parse_syslog_timestamp(begin + 1, &rec->tv);
+ else
+ begin = skip_item(begin, end, "]");
+
+ if (begin < end && *begin == ' ')
+ begin++;
+ }
+
+ rec->mesg = begin;
+ rec->mesg_size = end - begin;
+
+ /* Don't count \n from the last message to the message size */
+ if (*end != '\n' && *(end - 1) == '\n')
+ rec->mesg_size--;
+
+ rec->next_size -= end - rec->next;
+ rec->next = rec->next_size > 0 ? end + 1 : NULL;
+ if (rec->next_size > 0)
+ rec->next_size--;
+
+ return 0;
+ }
+
+ return 1;
+}
+
+static int accept_record(struct dmesg_control *ctl, struct dmesg_record *rec)
+{
+ if (ctl->fltr_lev && (rec->facility < 0 ||
+ !isset(ctl->levels, rec->level)))
+ return 0;
+
+ if (ctl->fltr_fac && (rec->facility < 0 ||
+ !isset(ctl->facilities, rec->facility)))
+ return 0;
+
+ return 1;
+}
+
+static void raw_print(struct dmesg_control *ctl, const char *buf, size_t size)
+{
+ int lastc = '\n';
+
+ if (!ctl->mmap_buff) {
+ /*
+ * Print whole ring buffer
+ */
+ safe_fwrite(buf, size, 0, stdout);
+ lastc = buf[size - 1];
+ } else {
+ /*
+ * Print file in small chunks to save memory
+ */
+ while (size) {
+ size_t sz = size > ctl->pagesize ? ctl->pagesize : size;
+ char *x = ctl->mmap_buff;
+
+ safe_fwrite(x, sz, 0, stdout);
+ lastc = x[sz - 1];
+ size -= sz;
+ ctl->mmap_buff += sz;
+ munmap(x, sz);
+ }
+ }
+
+ if (lastc != '\n')
+ putchar('\n');
+}
+
+static struct tm *record_localtime(struct dmesg_control *ctl,
+ struct dmesg_record *rec,
+ struct tm *tm)
+{
+ time_t t = ctl->boot_time.tv_sec + rec->tv.tv_sec;
+ return localtime_r(&t, tm);
+}
+
+static char *record_ctime(struct dmesg_control *ctl,
+ struct dmesg_record *rec,
+ char *buf, size_t bufsiz)
+{
+ struct tm tm;
+
+ record_localtime(ctl, rec, &tm);
+
+ if (strftime(buf, bufsiz, "%a %b %e %H:%M:%S %Y", &tm) == 0)
+ *buf = '\0';
+ return buf;
+}
+
+static char *short_ctime(struct tm *tm, char *buf, size_t bufsiz)
+{
+ if (strftime(buf, bufsiz, "%b%e %H:%M", tm) == 0)
+ *buf = '\0';
+ return buf;
+}
+
+static char *iso_8601_time(struct dmesg_control *ctl, struct dmesg_record *rec,
+ char *buf, size_t bufsz)
+{
+ struct timeval tv = {
+ .tv_sec = ctl->boot_time.tv_sec + rec->tv.tv_sec,
+ .tv_usec = rec->tv.tv_usec
+ };
+
+ if (strtimeval_iso(&tv, ISO_TIMESTAMP_COMMA_T, buf, bufsz) != 0)
+ return NULL;
+
+ return buf;
+}
+
+static double record_count_delta(struct dmesg_control *ctl,
+ struct dmesg_record *rec)
+{
+ double delta = 0;
+
+ if (timerisset(&ctl->lasttime))
+ delta = time_diff(&rec->tv, &ctl->lasttime);
+
+ ctl->lasttime = rec->tv;
+ return delta;
+}
+
+static const char *get_subsys_delimiter(const char *mesg, size_t mesg_size)
+{
+ const char *p = mesg;
+ size_t sz = mesg_size;
+
+ while (sz > 0) {
+ const char *d = strnchr(p, sz, ':');
+ if (!d)
+ return NULL;
+ sz -= d - p + 1;
+ if (sz) {
+ if (isblank(*(d + 1)))
+ return d;
+ p = d + 1;
+ }
+ }
+ return NULL;
+}
+
+static void print_record(struct dmesg_control *ctl,
+ struct dmesg_record *rec)
+{
+ char buf[128];
+ char fpbuf[32] = "\0";
+ char tsbuf[64] = "\0";
+ size_t mesg_size = rec->mesg_size;
+ int timebreak = 0;
+ char *mesg_copy = NULL;
+ const char *line = NULL;
+
+ if (!accept_record(ctl, rec))
+ return;
+
+ if (!rec->mesg_size) {
+ putchar('\n');
+ return;
+ }
+
+ /*
+ * Compose syslog(2) compatible raw output -- used for /dev/kmsg for
+ * backward compatibility with syslog(2) buffers only
+ */
+ if (ctl->raw) {
+ ctl->indent = snprintf(tsbuf, sizeof(tsbuf),
+ "<%d>[%5ld.%06ld] ",
+ LOG_MAKEPRI(rec->facility, rec->level),
+ (long) rec->tv.tv_sec,
+ (long) rec->tv.tv_usec);
+ goto full_output;
+ }
+
+ /* Store decode information (facility & priority level) in a buffer */
+ if (ctl->decode &&
+ (rec->level > -1) && (rec->level < (int) ARRAY_SIZE(level_names)) &&
+ (rec->facility > -1) &&
+ (rec->facility < (int) ARRAY_SIZE(facility_names)))
+ snprintf(fpbuf, sizeof(fpbuf), "%-6s:%-6s: ",
+ facility_names[rec->facility].name,
+ level_names[rec->level].name);
+
+ /* Store the timestamp in a buffer */
+ switch (ctl->time_fmt) {
+ double delta;
+ struct tm cur;
+ case DMESG_TIMEFTM_NONE:
+ ctl->indent = 0;
+ break;
+ case DMESG_TIMEFTM_CTIME:
+ ctl->indent = snprintf(tsbuf, sizeof(tsbuf), "[%s] ",
+ record_ctime(ctl, rec, buf, sizeof(buf)));
+ break;
+ case DMESG_TIMEFTM_CTIME_DELTA:
+ ctl->indent = snprintf(tsbuf, sizeof(tsbuf), "[%s <%12.06f>] ",
+ record_ctime(ctl, rec, buf, sizeof(buf)),
+ record_count_delta(ctl, rec));
+ break;
+ case DMESG_TIMEFTM_DELTA:
+ ctl->indent = snprintf(tsbuf, sizeof(tsbuf), "[<%12.06f>] ",
+ record_count_delta(ctl, rec));
+ break;
+ case DMESG_TIMEFTM_RELTIME:
+ record_localtime(ctl, rec, &cur);
+ delta = record_count_delta(ctl, rec);
+ if (cur.tm_min != ctl->lasttm.tm_min ||
+ cur.tm_hour != ctl->lasttm.tm_hour ||
+ cur.tm_yday != ctl->lasttm.tm_yday) {
+ timebreak = 1;
+ ctl->indent = snprintf(tsbuf, sizeof(tsbuf), "[%s] ",
+ short_ctime(&cur, buf,
+ sizeof(buf)));
+ } else {
+ if (delta < 10)
+ ctl->indent = snprintf(tsbuf, sizeof(tsbuf),
+ "[ %+8.06f] ", delta);
+ else
+ ctl->indent = snprintf(tsbuf, sizeof(tsbuf),
+ "[ %+9.06f] ", delta);
+ }
+ ctl->lasttm = cur;
+ break;
+ case DMESG_TIMEFTM_TIME:
+ ctl->indent = snprintf(tsbuf, sizeof(tsbuf), "[%5ld.%06ld] ",
+ (long)rec->tv.tv_sec,
+ (long)rec->tv.tv_usec);
+ break;
+ case DMESG_TIMEFTM_TIME_DELTA:
+ ctl->indent = snprintf(tsbuf, sizeof(tsbuf), "[%5ld.%06ld <%12.06f>] ",
+ (long)rec->tv.tv_sec,
+ (long)rec->tv.tv_usec,
+ record_count_delta(ctl, rec));
+ break;
+ case DMESG_TIMEFTM_ISO8601:
+ ctl->indent = snprintf(tsbuf, sizeof(tsbuf), "%s ",
+ iso_8601_time(ctl, rec, buf,
+ sizeof(buf)));
+ break;
+ default:
+ abort();
+ }
+
+ ctl->indent += strlen(fpbuf);
+
+full_output:
+ /* Output the decode information */
+ if (*fpbuf)
+ fputs(fpbuf, stdout);
+
+ /* Output the timestamp buffer */
+ if (*tsbuf) {
+ /* Colorize the timestamp */
+ if (ctl->color)
+ dmesg_enable_color(timebreak ? DMESG_COLOR_TIMEBREAK :
+ DMESG_COLOR_TIME);
+ if (ctl->time_fmt != DMESG_TIMEFTM_RELTIME) {
+ fputs(tsbuf, stdout);
+ } else {
+ /*
+ * For relative timestamping, the first line's
+ * timestamp is the offset and all other lines will
+ * report an offset of 0.000000.
+ */
+ if (!line)
+ fputs(tsbuf, stdout);
+ else
+ printf("[ +0.000000] ");
+ }
+ if (ctl->color)
+ color_disable();
+ }
+
+ /*
+ * A kernel message may contain several lines of output, separated
+ * by '\n'. If the timestamp and decode outputs are forced then each
+ * line of the message must be displayed with that information.
+ */
+ if (ctl->force_prefix) {
+ if (!line) {
+ mesg_copy = xstrdup(rec->mesg);
+ line = strtok(mesg_copy, "\n");
+ mesg_size = strlen(line);
+ }
+ } else {
+ line = rec->mesg;
+ mesg_size = rec->mesg_size;
+ }
+
+ /* Colorize kernel message output */
+ if (ctl->color) {
+ /* Subsystem prefix */
+ const char *subsys = get_subsys_delimiter(line, mesg_size);
+ int has_color = 0;
+
+ if (subsys) {
+ dmesg_enable_color(DMESG_COLOR_SUBSYS);
+ safe_fwrite(line, subsys - line, ctl->indent, stdout);
+ color_disable();
+
+ mesg_size -= subsys - line;
+ line = subsys;
+ }
+ /* Error, alert .. etc. colors */
+ has_color = set_level_color(rec->level, line, mesg_size) == 0;
+ safe_fwrite(line, mesg_size, ctl->indent, stdout);
+ if (has_color)
+ color_disable();
+ } else
+ safe_fwrite(line, mesg_size, ctl->indent, stdout);
+
+ /* Get the next line */
+ if (ctl->force_prefix) {
+ line = strtok(NULL, "\n");
+ if (line && *line) {
+ putchar('\n');
+ mesg_size = strlen(line);
+ goto full_output;
+ }
+ free(mesg_copy);
+ }
+
+ putchar('\n');
+}
+
+/*
+ * Prints the 'buf' kernel ring buffer; the messages are filtered out according
+ * to 'levels' and 'facilities' bitarrays.
+ */
+static void print_buffer(struct dmesg_control *ctl,
+ const char *buf, size_t size)
+{
+ struct dmesg_record rec = { .next = buf, .next_size = size };
+
+ if (ctl->raw) {
+ raw_print(ctl, buf, size);
+ return;
+ }
+
+ while (get_next_syslog_record(ctl, &rec) == 0)
+ print_record(ctl, &rec);
+}
+
+static ssize_t read_kmsg_one(struct dmesg_control *ctl)
+{
+ ssize_t size;
+
+ /* kmsg returns EPIPE if record was modified while reading */
+ do {
+ size = read(ctl->kmsg, ctl->kmsg_buf,
+ sizeof(ctl->kmsg_buf) - 1);
+ } while (size < 0 && errno == EPIPE);
+
+ return size;
+}
+
+static int init_kmsg(struct dmesg_control *ctl)
+{
+ int mode = O_RDONLY;
+
+ if (!ctl->follow)
+ mode |= O_NONBLOCK;
+ else
+ setlinebuf(stdout);
+
+ ctl->kmsg = open("/dev/kmsg", mode);
+ if (ctl->kmsg < 0)
+ return -1;
+
+ /*
+ * Seek after the last record available at the time
+ * the last SYSLOG_ACTION_CLEAR was issued.
+ *
+ * ... otherwise SYSLOG_ACTION_CLEAR will have no effect for kmsg.
+ */
+ lseek(ctl->kmsg, 0, SEEK_DATA);
+
+ /*
+ * Old kernels (<3.5) allow to successfully open /dev/kmsg for
+ * read-only, but read() returns -EINVAL :-(((
+ *
+ * Let's try to read the first record. The record is later processed in
+ * read_kmsg().
+ */
+ ctl->kmsg_first_read = read_kmsg_one(ctl);
+ if (ctl->kmsg_first_read < 0) {
+ close(ctl->kmsg);
+ ctl->kmsg = -1;
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * /dev/kmsg record format:
+ *
+ * faclev,seqnum,timestamp[optional, ...];message\n
+ * TAGNAME=value
+ * ...
+ *
+ * - fields are separated by ','
+ * - last field is terminated by ';'
+ *
+ */
+#define LAST_KMSG_FIELD(s) (!s || !*s || *(s - 1) == ';')
+
+static int parse_kmsg_record(struct dmesg_control *ctl,
+ struct dmesg_record *rec,
+ char *buf,
+ size_t sz)
+{
+ const char *p = buf, *end;
+
+ if (sz == 0 || !buf || !*buf)
+ return -1;
+
+ end = buf + (sz - 1);
+ INIT_DMESG_RECORD(rec);
+
+ while (p < end && isspace(*p))
+ p++;
+
+ /* A) priority and facility */
+ if (ctl->fltr_lev || ctl->fltr_fac || ctl->decode ||
+ ctl->raw || ctl->color)
+ p = parse_faclev(p, &rec->facility, &rec->level);
+ else
+ p = skip_item(p, end, ",");
+ if (LAST_KMSG_FIELD(p))
+ goto mesg;
+
+ /* B) sequence number */
+ p = skip_item(p, end, ",;");
+ if (LAST_KMSG_FIELD(p))
+ goto mesg;
+
+ /* C) timestamp */
+ if (is_timefmt(ctl, NONE))
+ p = skip_item(p, end, ",;");
+ else
+ p = parse_kmsg_timestamp(p, &rec->tv);
+ if (LAST_KMSG_FIELD(p))
+ goto mesg;
+
+ /* D) optional fields (ignore) */
+ p = skip_item(p, end, ";");
+
+mesg:
+ /* E) message text */
+ rec->mesg = p;
+ p = skip_item(p, end, "\n");
+ if (!p)
+ return -1;
+
+ /* The message text is terminated by \n, but it's possible that the
+ * message contains another stuff behind this linebreak; in this case
+ * the previous skip_item() returns pointer to the stuff behind \n.
+ * Let's normalize all these situations and make sure we always point to
+ * the \n.
+ *
+ * Note that the next unhexmangle_to_buffer() will replace \n by \0.
+ */
+ if (*p && *p != '\n')
+ p--;
+
+ /*
+ * Kernel escapes non-printable characters, unfortunately kernel
+ * definition of "non-printable" is too strict. On UTF8 console we can
+ * print many chars, so let's decode from kernel.
+ */
+ rec->mesg_size = unhexmangle_to_buffer(rec->mesg,
+ (char *) rec->mesg, p - rec->mesg + 1);
+
+ rec->mesg_size--; /* don't count \0 */
+
+ /* F) message tags (ignore) */
+
+ return 0;
+}
+
+/*
+ * Note that each read() call for /dev/kmsg returns always one record. It means
+ * that we don't have to read whole message buffer before the records parsing.
+ *
+ * So this function does not compose one huge buffer (like read_syslog_buffer())
+ * and print_buffer() is unnecessary. All is done in this function.
+ *
+ * Returns 0 on success, -1 on error.
+ */
+static int read_kmsg(struct dmesg_control *ctl)
+{
+ struct dmesg_record rec;
+ ssize_t sz;
+
+ if (ctl->method != DMESG_METHOD_KMSG || ctl->kmsg < 0)
+ return -1;
+
+ /*
+ * The very first read() call is done in kmsg_init() where we test
+ * /dev/kmsg usability. The return code from the initial read() is
+ * stored in ctl->kmsg_first_read;
+ */
+ sz = ctl->kmsg_first_read;
+
+ while (sz > 0) {
+ *(ctl->kmsg_buf + sz) = '\0'; /* for debug messages */
+
+ if (parse_kmsg_record(ctl, &rec,
+ ctl->kmsg_buf, (size_t) sz) == 0)
+ print_record(ctl, &rec);
+
+ sz = read_kmsg_one(ctl);
+ }
+
+ return 0;
+}
+
+static int which_time_format(const char *s)
+{
+ if (!strcmp(s, "notime"))
+ return DMESG_TIMEFTM_NONE;
+ if (!strcmp(s, "ctime"))
+ return DMESG_TIMEFTM_CTIME;
+ if (!strcmp(s, "delta"))
+ return DMESG_TIMEFTM_DELTA;
+ if (!strcmp(s, "reltime"))
+ return DMESG_TIMEFTM_RELTIME;
+ if (!strcmp(s, "iso"))
+ return DMESG_TIMEFTM_ISO8601;
+ errx(EXIT_FAILURE, _("unknown time format: %s"), s);
+}
+
+#ifdef TEST_DMESG
+static inline int dmesg_get_boot_time(struct timeval *tv)
+{
+ char *str = getenv("DMESG_TEST_BOOTIME");
+ uintmax_t sec, usec;
+
+ if (str && sscanf(str, "%ju.%ju", &sec, &usec) == 2) {
+ tv->tv_sec = sec;
+ tv->tv_usec = usec;
+ return tv->tv_sec >= 0 && tv->tv_usec >= 0 ? 0 : -EINVAL;
+ }
+
+ return get_boot_time(tv);
+}
+#else
+# define dmesg_get_boot_time get_boot_time
+#endif
+
+int main(int argc, char *argv[])
+{
+ char *buf = NULL;
+ int c, nopager = 0;
+ int console_level = 0;
+ int klog_rc = 0;
+ int delta = 0;
+ ssize_t n;
+ static struct dmesg_control ctl = {
+ .filename = NULL,
+ .action = SYSLOG_ACTION_READ_ALL,
+ .method = DMESG_METHOD_KMSG,
+ .kmsg = -1,
+ .time_fmt = DMESG_TIMEFTM_TIME,
+ .indent = 0,
+ };
+ int colormode = UL_COLORMODE_UNDEF;
+ enum {
+ OPT_TIME_FORMAT = CHAR_MAX + 1,
+ };
+
+ static const struct option longopts[] = {
+ { "buffer-size", required_argument, NULL, 's' },
+ { "clear", no_argument, NULL, 'C' },
+ { "color", optional_argument, NULL, 'L' },
+ { "console-level", required_argument, NULL, 'n' },
+ { "console-off", no_argument, NULL, 'D' },
+ { "console-on", no_argument, NULL, 'E' },
+ { "decode", no_argument, NULL, 'x' },
+ { "file", required_argument, NULL, 'F' },
+ { "facility", required_argument, NULL, 'f' },
+ { "follow", no_argument, NULL, 'w' },
+ { "human", no_argument, NULL, 'H' },
+ { "help", no_argument, NULL, 'h' },
+ { "kernel", no_argument, NULL, 'k' },
+ { "level", required_argument, NULL, 'l' },
+ { "syslog", no_argument, NULL, 'S' },
+ { "raw", no_argument, NULL, 'r' },
+ { "read-clear", no_argument, NULL, 'c' },
+ { "reltime", no_argument, NULL, 'e' },
+ { "show-delta", no_argument, NULL, 'd' },
+ { "ctime", no_argument, NULL, 'T' },
+ { "notime", no_argument, NULL, 't' },
+ { "nopager", no_argument, NULL, 'P' },
+ { "userspace", no_argument, NULL, 'u' },
+ { "version", no_argument, NULL, 'V' },
+ { "time-format", required_argument, NULL, OPT_TIME_FORMAT },
+ { "force-prefix", no_argument, NULL, 'p' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ static const ul_excl_t excl[] = { /* rows and cols in ASCII order */
+ { 'C','D','E','c','n','r' }, /* clear,off,on,read-clear,level,raw*/
+ { 'H','r' }, /* human, raw */
+ { 'L','r' }, /* color, raw */
+ { 'S','w' }, /* syslog,follow */
+ { 'T','r' }, /* ctime, raw */
+ { 'd','r' }, /* delta, raw */
+ { 'e','r' }, /* reltime, raw */
+ { 'r','x' }, /* raw, decode */
+ { 'r','t' }, /* notime, raw */
+ { 0 }
+ };
+ int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ while ((c = getopt_long(argc, argv, "CcDdEeF:f:HhkL::l:n:iPprSs:TtuVwx",
+ longopts, NULL)) != -1) {
+
+ err_exclusive_options(c, longopts, excl, excl_st);
+
+ switch (c) {
+ case 'C':
+ ctl.action = SYSLOG_ACTION_CLEAR;
+ break;
+ case 'c':
+ ctl.action = SYSLOG_ACTION_READ_CLEAR;
+ break;
+ case 'D':
+ ctl.action = SYSLOG_ACTION_CONSOLE_OFF;
+ break;
+ case 'd':
+ delta = 1;
+ break;
+ case 'E':
+ ctl.action = SYSLOG_ACTION_CONSOLE_ON;
+ break;
+ case 'e':
+ ctl.time_fmt = DMESG_TIMEFTM_RELTIME;
+ break;
+ case 'F':
+ ctl.filename = optarg;
+ ctl.method = DMESG_METHOD_MMAP;
+ break;
+ case 'f':
+ ctl.fltr_fac = 1;
+ if (string_to_bitarray(optarg,
+ ctl.facilities, parse_facility) < 0)
+ return EXIT_FAILURE;
+ break;
+ case 'H':
+ ctl.time_fmt = DMESG_TIMEFTM_RELTIME;
+ colormode = UL_COLORMODE_AUTO;
+ ctl.pager = 1;
+ break;
+ case 'h':
+ usage();
+ break;
+ case 'k':
+ ctl.fltr_fac = 1;
+ setbit(ctl.facilities, FAC_BASE(LOG_KERN));
+ break;
+ case 'L':
+ colormode = UL_COLORMODE_AUTO;
+ if (optarg)
+ colormode = colormode_or_err(optarg,
+ _("unsupported color mode"));
+ break;
+ case 'l':
+ ctl.fltr_lev= 1;
+ if (string_to_bitarray(optarg,
+ ctl.levels, parse_level) < 0)
+ return EXIT_FAILURE;
+ break;
+ case 'n':
+ ctl.action = SYSLOG_ACTION_CONSOLE_LEVEL;
+ console_level = parse_level(optarg, 0);
+ break;
+ case 'P':
+ nopager = 1;
+ break;
+ case 'p':
+ ctl.force_prefix = 1;
+ break;
+ case 'r':
+ ctl.raw = 1;
+ break;
+ case 'S':
+ ctl.method = DMESG_METHOD_SYSLOG;
+ break;
+ case 's':
+ ctl.bufsize = strtou32_or_err(optarg,
+ _("invalid buffer size argument"));
+ if (ctl.bufsize < 4096)
+ ctl.bufsize = 4096;
+ break;
+ case 'T':
+ ctl.time_fmt = DMESG_TIMEFTM_CTIME;
+ break;
+ case 't':
+ ctl.time_fmt = DMESG_TIMEFTM_NONE;
+ break;
+ case 'u':
+ ctl.fltr_fac = 1;
+ for (n = 1; (size_t) n < ARRAY_SIZE(facility_names); n++)
+ setbit(ctl.facilities, n);
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ case 'w':
+ ctl.follow = 1;
+ break;
+ case 'x':
+ ctl.decode = 1;
+ break;
+ case OPT_TIME_FORMAT:
+ ctl.time_fmt = which_time_format(optarg);
+ break;
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if (argc != optind) {
+ warnx(_("bad usage"));
+ errtryhelp(EXIT_FAILURE);
+ }
+
+ if ((is_timefmt(&ctl, RELTIME) ||
+ is_timefmt(&ctl, CTIME) ||
+ is_timefmt(&ctl, ISO8601))
+ && dmesg_get_boot_time(&ctl.boot_time) != 0)
+ ctl.time_fmt = DMESG_TIMEFTM_NONE;
+
+ if (delta)
+ switch (ctl.time_fmt) {
+ case DMESG_TIMEFTM_CTIME:
+ ctl.time_fmt = DMESG_TIMEFTM_CTIME_DELTA;
+ break;
+ case DMESG_TIMEFTM_TIME:
+ ctl.time_fmt = DMESG_TIMEFTM_TIME_DELTA;
+ break;
+ case DMESG_TIMEFTM_ISO8601:
+ warnx(_("--show-delta is ignored when used together with iso8601 time format"));
+ break;
+ default:
+ ctl.time_fmt = DMESG_TIMEFTM_DELTA;
+ }
+
+
+ ctl.color = colors_init(colormode, "dmesg") ? 1 : 0;
+ if (ctl.follow)
+ nopager = 1;
+ ctl.pager = nopager ? 0 : ctl.pager;
+ if (ctl.pager)
+ pager_redirect();
+
+ switch (ctl.action) {
+ case SYSLOG_ACTION_READ_ALL:
+ case SYSLOG_ACTION_READ_CLEAR:
+ if (ctl.method == DMESG_METHOD_KMSG && init_kmsg(&ctl) != 0)
+ ctl.method = DMESG_METHOD_SYSLOG;
+
+ if (ctl.raw
+ && ctl.method != DMESG_METHOD_KMSG
+ && (ctl.fltr_lev || ctl.fltr_fac))
+ errx(EXIT_FAILURE, _("--raw can be used together with --level or "
+ "--facility only when reading messages from /dev/kmsg"));
+
+ /* only kmsg supports multi-line messages */
+ if (ctl.force_prefix && ctl.method != DMESG_METHOD_KMSG)
+ ctl.force_prefix = 0;
+
+ if (ctl.pager)
+ pager_redirect();
+ n = read_buffer(&ctl, &buf);
+ if (n > 0)
+ print_buffer(&ctl, buf, n);
+ if (!ctl.mmap_buff)
+ free(buf);
+ if (n < 0)
+ err(EXIT_FAILURE, _("read kernel buffer failed"));
+ if (ctl.kmsg >= 0)
+ close(ctl.kmsg);
+ break;
+ case SYSLOG_ACTION_CLEAR:
+ case SYSLOG_ACTION_CONSOLE_OFF:
+ case SYSLOG_ACTION_CONSOLE_ON:
+ klog_rc = klogctl(ctl.action, NULL, 0);
+ break;
+ case SYSLOG_ACTION_CONSOLE_LEVEL:
+ klog_rc = klogctl(ctl.action, NULL, console_level);
+ break;
+ default:
+ errx(EXIT_FAILURE, _("unsupported command"));
+ break;
+ }
+
+
+ if (klog_rc)
+ err(EXIT_FAILURE, _("klogctl failed"));
+
+ return EXIT_SUCCESS;
+}
diff --git a/sys-utils/eject.1 b/sys-utils/eject.1
new file mode 100644
index 0000000..f901b23
--- /dev/null
+++ b/sys-utils/eject.1
@@ -0,0 +1,187 @@
+.\" Copyright (C) 1994-2005 Jeff Tranter (tranter@pobox.com)
+.\" Copyright (C) 2012 Karel Zak <kzak@redhat.com>
+.\"
+.\" It may be distributed under the GNU Public License, version 2, or
+.\" any higher version. See section COPYING of the GNU Public license
+.\" for conditions under which this file may be redistributed.
+.TH EJECT 1 "April 2012" "Linux" "User Commands"
+.SH NAME
+eject \- eject removable media
+.SH SYNOPSIS
+.B eject
+[options]
+.IR device | mountpoint
+.SH DESCRIPTION
+.B eject
+allows removable media (typically a CD-ROM, floppy disk, tape, JAZ, ZIP or USB
+disk) to be ejected under software control. The command can also control some
+multi-disc CD-ROM changers, the auto-eject feature supported by some devices,
+and close the disc tray of some CD-ROM drives.
+.PP
+The device corresponding to \fIdevice\fP or \fImountpoint\fP is ejected. If no
+name is specified, the default name \fB/dev/cdrom\fR is used. The device may be
+addressed by device name (e.g. 'sda'), device path (e.g. '/dev/sda'),
+UUID=\fIuuid\fR or LABEL=\fIlabel\fR tags.
+.PP
+There are four different methods of ejecting, depending on whether the device
+is a CD-ROM, SCSI device, removable floppy, or tape. By default \fBeject\fR tries
+all four methods in order until it succeeds.
+.PP
+If a device partition is specified, the whole-disk device is used. If the device
+or a device partition is currently mounted, it is unmounted before ejecting.
+.SH OPTIONS
+.TP
+.BR \-a , " \-\-auto on" | off
+This option controls the auto-eject mode, supported by some devices. When
+enabled, the drive automatically ejects when the device is closed.
+.TP
+.BR \-c , " \-\-changerslot " \fIslot
+With this option a CD slot can be selected from an ATAPI/IDE CD-ROM changer.
+The CD-ROM drive cannot be in use (mounted data CD or playing a music CD) for
+a change request to work. Please also note that the first slot of the changer
+is referred to as 0, not 1.
+.TP
+.BR \-d , " \-\-default"
+List the default device name.
+.TP
+.BR \-F , " \-\-force"
+Force eject, don't check device type.
+.TP
+.BR \-f , " \-\-floppy"
+This option specifies that the drive should be ejected using a removable floppy
+disk eject command.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.TP
+.BR \-i , " \-\-manualeject on" | off
+This option controls locking of the hardware eject button. When enabled, the
+drive will not be ejected when the button is pressed. This is useful when you
+are carrying a laptop in a bag or case and don't want it to eject if the button
+is inadvertently pressed.
+.TP
+.BR \-M , " \-\-no\-partitions\-unmount"
+The option tells eject to not try to unmount other partitions on partitioned
+devices. If another partition is still mounted, the program will not attempt
+to eject the media. It will attempt to unmount only the device or mountpoint
+given on the command line.
+.TP
+.BR \-m , " \-\-no\-unmount"
+The option tells eject to not try to unmount at all.
+.TP
+.BR \-n , " \-\-noop"
+With this option the selected device is displayed but no action is performed.
+.TP
+.BR \-p , " \-\-proc"
+This option allows you to use /proc/mounts instead /etc/mtab. It also passes the
+\fB\-n\fR option to \fBumount\fR(8).
+.TP
+.BR \-q , " \-\-tape"
+This option specifies that the drive should be ejected using a tape drive
+offline command.
+.TP
+.BR \-r , " \-\-cdrom"
+This option specifies that the drive should be ejected using a CDROM eject
+command.
+.TP
+.BR \-s , " \-\-scsi"
+This option specifies that the drive should be ejected using SCSI commands.
+.TP
+.BR \-T , " \-\-traytoggle"
+With this option the drive is given a CD-ROM tray close command if it's opened,
+and a CD-ROM tray eject command if it's closed. Not all devices support this
+command, because it uses the above CD-ROM tray close command.
+.TP
+.BR \-t , " \-\-trayclose"
+With this option the drive is given a CD-ROM tray close command. Not all
+devices support this command.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-v , " \-\-verbose"
+Run in verbose mode; more information is displayed about what the command is
+doing.
+.TP
+.BR \-X , " \-\-listspeed"
+With this option the CD-ROM drive will be probed to detect the available
+speeds. The output is a list of speeds which can be used as an argument of the
+\fB\-x\fR option. This only works with Linux 2.6.13 or higher, on previous versions
+solely the maximum speed will be reported. Also note that some drives may not
+correctly report the speed and therefore this option does not work with them.
+.TP
+.BR \-x , " \-\-cdspeed " \fIspeed
+With this option the drive is given a CD-ROM select speed command. The
+.I speed
+argument is a number indicating the desired speed (e.g. 8 for 8X speed), or 0
+for maximum data rate. Not all devices support this command and you can only
+specify speeds that the drive is capable of. Every time the media is changed
+this option is cleared. This option can be used alone, or with the
+\fB\-t\fR and \fB\-c\fR options.
+.SH EXIT STATUS
+Returns 0 if operation was successful, 1 if operation failed or command syntax
+was not valid.
+.SH NOTES
+.B eject
+only works with devices that support one or more of the four methods of
+ejecting. This includes most CD-ROM drives (IDE, SCSI, and proprietary), some
+SCSI tape drives, JAZ drives, ZIP drives (parallel port, SCSI, and IDE
+versions), and LS120 removable floppies. Users have also reported success with
+floppy drives on Sun SPARC and Apple Macintosh systems. If
+.B eject
+does not work, it is most likely a limitation of the kernel driver for the
+device and not the
+.B eject
+program itself.
+.PP
+The \fB\-r\fR, \fB\-s\fR, \fB\-f\fR, and \fB\-q\fR options allow controlling
+which methods are used to
+eject. More than one method can be specified. If none of these options are
+specified, it tries all four (this works fine in most cases).
+.PP
+.B eject
+may not always be able to determine if the device is mounted (e.g. if it has
+several names). If the device name is a symbolic link,
+.B eject
+will follow the link and use the device that it points to.
+.PP
+If
+.B eject
+determines that the device can have multiple partitions, it will attempt to
+unmount all mounted partitions of the device before ejecting (see also
+\fB--no-partitions-unmount\fR). If an unmount fails, the program will not
+attempt to eject the media.
+.PP
+You can eject an audio CD. Some CD-ROM drives will refuse to open the tray if
+the drive is empty. Some devices do not support the tray close command.
+.PP
+If the auto-eject feature is enabled, then the drive will always be ejected
+after running this command. Not all Linux kernel CD-ROM drivers support the
+auto-eject mode. There is no way to find out the state of the auto-eject mode.
+.PP
+You need appropriate privileges to access the device files. Running as root is
+required to eject some devices (e.g. SCSI devices).
+.SH AUTHORS
+.MT tranter@\:pobox.com
+Jeff Tranter
+.ME
+- original author.
+.br
+.MT kzak@\:redhat.com
+Karel Zak
+.ME
+and
+.MT mluscon@\:redhat.com
+Michal Luscon
+.ME
+- util-linux version.
+.SH SEE ALSO
+.BR findmnt (8),
+.BR lsblk (8),
+.BR mount (8),
+.BR umount (8)
+.SH AVAILABILITY
+The eject command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/eject.c b/sys-utils/eject.c
new file mode 100644
index 0000000..8196b60
--- /dev/null
+++ b/sys-utils/eject.c
@@ -0,0 +1,1044 @@
+/*
+ * Copyright (C) 1994-2005 Jeff Tranter (tranter@pobox.com)
+ * Copyright (C) 2012 Karel Zak <kzak@redhat.com>
+ * Copyright (C) Michal Luscon <mluscon@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <err.h>
+#include <stdarg.h>
+
+#include <getopt.h>
+#include <errno.h>
+#include <regex.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/wait.h>
+#include <sys/mtio.h>
+#include <linux/cdrom.h>
+#include <linux/fd.h>
+#include <sys/mount.h>
+#include <scsi/scsi.h>
+#include <scsi/sg.h>
+#include <scsi/scsi_ioctl.h>
+#include <sys/time.h>
+
+#include <libmount.h>
+
+#include "c.h"
+#include "closestream.h"
+#include "nls.h"
+#include "strutils.h"
+#include "xalloc.h"
+#include "pathnames.h"
+#include "sysfs.h"
+#include "monotonic.h"
+
+/*
+ * sg_io_hdr_t driver_status -- see kernel include/scsi/scsi.h
+ */
+#ifndef DRIVER_SENSE
+# define DRIVER_SENSE 0x08
+#endif
+
+
+#define EJECT_DEFAULT_DEVICE "/dev/cdrom"
+
+
+/* Used by the toggle_tray() function. If ejecting the tray takes this
+ * time or less, the tray was probably already ejected, so we close it
+ * again.
+ */
+#define TRAY_WAS_ALREADY_OPEN_USECS 200000 /* about 0.2 seconds */
+
+struct eject_control {
+ struct libmnt_table *mtab;
+ char *device; /* device or mount point to be ejected */
+ int fd; /* file descriptor for device */
+ unsigned int /* command flags and arguments */
+ a_option:1,
+ c_option:1,
+ d_option:1,
+ F_option:1,
+ f_option:1,
+ i_option:1,
+ M_option:1,
+ m_option:1,
+ n_option:1,
+ p_option:1,
+ q_option:1,
+ r_option:1,
+ s_option:1,
+ T_option:1,
+ t_option:1,
+ v_option:1,
+ X_option:1,
+ x_option:1,
+ a_arg:1,
+ i_arg:1;
+ long int c_arg; /* changer slot number */
+ long int x_arg; /* cd speed */
+};
+
+static void vinfo(const char *fmt, va_list va)
+{
+ fprintf(stdout, "%s: ", program_invocation_short_name);
+ vprintf(fmt, va);
+ fputc('\n', stdout);
+}
+
+static inline void verbose(const struct eject_control *ctl, const char *fmt, ...)
+{
+ va_list va;
+
+ if (!ctl->v_option)
+ return;
+
+ va_start(va, fmt);
+ vinfo(fmt, va);
+ va_end(va);
+}
+
+static inline void info(const char *fmt, ...)
+{
+ va_list va;
+ va_start(va, fmt);
+ vinfo(fmt, va);
+ va_end(va);
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ fputs(USAGE_HEADER, out);
+ fprintf(out,
+ _(" %s [options] [<device>|<mountpoint>]\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Eject removable media.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -a, --auto <on|off> turn auto-eject feature on or off\n"
+ " -c, --changerslot <slot> switch discs on a CD-ROM changer\n"
+ " -d, --default display default device\n"
+ " -f, --floppy eject floppy\n"
+ " -F, --force don't care about device type\n"
+ " -i, --manualeject <on|off> toggle manual eject protection on/off\n"
+ " -m, --no-unmount do not unmount device even if it is mounted\n"
+ " -M, --no-partitions-unmount do not unmount another partitions\n"
+ " -n, --noop don't eject, just show device found\n"
+ " -p, --proc use /proc/mounts instead of /etc/mtab\n"
+ " -q, --tape eject tape\n"
+ " -r, --cdrom eject CD-ROM\n"
+ " -s, --scsi eject SCSI device\n"
+ " -t, --trayclose close tray\n"
+ " -T, --traytoggle toggle tray\n"
+ " -v, --verbose enable verbose output\n"
+ " -x, --cdspeed <speed> set CD-ROM max speed\n"
+ " -X, --listspeed list CD-ROM available speeds\n"),
+ out);
+
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(29));
+
+ fputs(_("\nBy default tries -r, -s, -f, and -q in order until success.\n"), out);
+ printf(USAGE_MAN_TAIL("eject(1)"));
+
+ exit(EXIT_SUCCESS);
+}
+
+
+/* Handle command line options. */
+static void parse_args(struct eject_control *ctl, int argc, char **argv)
+{
+ static const struct option long_opts[] =
+ {
+ {"auto", required_argument, NULL, 'a'},
+ {"cdrom", no_argument, NULL, 'r'},
+ {"cdspeed", required_argument, NULL, 'x'},
+ {"changerslot", required_argument, NULL, 'c'},
+ {"default", no_argument, NULL, 'd'},
+ {"floppy", no_argument, NULL, 'f'},
+ {"force", no_argument, NULL, 'F'},
+ {"help", no_argument, NULL, 'h'},
+ {"listspeed", no_argument, NULL, 'X'},
+ {"manualeject", required_argument, NULL, 'i'},
+ {"noop", no_argument, NULL, 'n'},
+ {"no-unmount", no_argument, NULL, 'm'},
+ {"no-partitions-unmount", no_argument, NULL, 'M' },
+ {"proc", no_argument, NULL, 'p'},
+ {"scsi", no_argument, NULL, 's'},
+ {"tape", no_argument, NULL, 'q'},
+ {"trayclose", no_argument, NULL, 't'},
+ {"traytoggle", no_argument, NULL, 'T'},
+ {"verbose", no_argument, NULL, 'v'},
+ {"version", no_argument, NULL, 'V'},
+ {NULL, 0, NULL, 0}
+ };
+ int c;
+
+ while ((c = getopt_long(argc, argv,
+ "a:c:i:x:dfFhnqrstTXvVpmM", long_opts, NULL)) != -1) {
+ switch (c) {
+ case 'a':
+ ctl->a_option = 1;
+ ctl->a_arg = parse_switch(optarg, _("argument error"),
+ "on", "off", "1", "0", NULL);
+ break;
+ case 'c':
+ ctl->c_option = 1;
+ ctl->c_arg = strtoul_or_err(optarg, _("invalid argument to --changerslot/-c option"));
+ break;
+ case 'x':
+ ctl->x_option = 1;
+ ctl->x_arg = strtoul_or_err(optarg, _("invalid argument to --cdspeed/-x option"));
+ break;
+ case 'd':
+ ctl->d_option = 1;
+ break;
+ case 'f':
+ ctl->f_option = 1;
+ break;
+ case 'F':
+ ctl->F_option = 1;
+ break;
+ case 'h':
+ usage();
+ break;
+ case 'i':
+ ctl->i_option = 1;
+ ctl->i_arg = parse_switch(optarg, _("argument error"),
+ "on", "off", "1", "0", NULL);
+ break;
+ case 'm':
+ ctl->m_option = 1;
+ break;
+ case 'M':
+ ctl->M_option = 1;
+ break;
+ case 'n':
+ ctl->n_option = 1;
+ break;
+ case 'p':
+ ctl->p_option = 1;
+ break;
+ case 'q':
+ ctl->q_option = 1;
+ break;
+ case 'r':
+ ctl->r_option = 1;
+ break;
+ case 's':
+ ctl->s_option = 1;
+ break;
+ case 't':
+ ctl->t_option = 1;
+ break;
+ case 'T':
+ ctl->T_option = 1;
+ break;
+ case 'X':
+ ctl->X_option = 1;
+ break;
+ case 'v':
+ ctl->v_option = 1;
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ exit(EXIT_SUCCESS);
+ break;
+ default:
+ errtryhelp(EXIT_FAILURE);
+ break;
+ }
+ }
+
+ /* check for a single additional argument */
+ if ((argc - optind) > 1)
+ errx(EXIT_FAILURE, _("too many arguments"));
+
+ if ((argc - optind) == 1)
+ ctl->device = xstrdup(argv[optind]);
+}
+
+/*
+ * Given name, such as foo, see if any of the following exist:
+ *
+ * foo (if foo starts with '.' or '/')
+ * /dev/foo
+ *
+ * If found, return the full path. If not found, return 0.
+ * Returns pointer to dynamically allocated string.
+ */
+static char *find_device(const char *name)
+{
+ if (!name)
+ return NULL;
+
+ if ((*name == '.' || *name == '/') && access(name, F_OK) == 0)
+ return xstrdup(name);
+ else {
+ char buf[PATH_MAX];
+
+ snprintf(buf, sizeof(buf), "/dev/%s", name);
+ if (access(buf, F_OK) == 0)
+ return xstrdup(buf);
+ }
+
+ return NULL;
+}
+
+/* Set or clear auto-eject mode. */
+static void auto_eject(const struct eject_control *ctl)
+{
+ int status = -1;
+
+#if defined(CDROM_SET_OPTIONS) && defined(CDROM_CLEAR_OPTIONS)
+ if (ctl->a_arg)
+ status = ioctl(ctl->fd, CDROM_SET_OPTIONS, CDO_AUTO_EJECT);
+ else
+ status = ioctl(ctl->fd, CDROM_CLEAR_OPTIONS, CDO_AUTO_EJECT);
+#else
+ errno = ENOSYS;
+#endif
+ if (status < 0)
+ err(EXIT_FAILURE,_("CD-ROM auto-eject command failed"));
+}
+
+/*
+ * Stops CDROM from opening on manual eject button press.
+ * This can be useful when you carry your laptop
+ * in your bag while it's on and no CD inserted in it's drive.
+ * Implemented as found in Documentation/ioctl/cdrom.txt
+ */
+static void manual_eject(const struct eject_control *ctl)
+{
+ if (ioctl(ctl->fd, CDROM_LOCKDOOR, ctl->i_arg) < 0) {
+ switch (errno) {
+ case EDRIVE_CANT_DO_THIS:
+ errx(EXIT_FAILURE, _("CD-ROM door lock is not supported"));
+ case EBUSY:
+ errx(EXIT_FAILURE, _("other users have the drive open and not CAP_SYS_ADMIN"));
+ default:
+ err(EXIT_FAILURE, _("CD-ROM lock door command failed"));
+ }
+ }
+
+ if (ctl->i_arg)
+ info(_("CD-Drive may NOT be ejected with device button"));
+ else
+ info(_("CD-Drive may be ejected with device button"));
+}
+
+/*
+ * Changer select. CDROM_SELECT_DISC is preferred, older kernels used
+ * CDROMLOADFROMSLOT.
+ */
+static void changer_select(const struct eject_control *ctl)
+{
+#ifdef CDROM_SELECT_DISC
+ if (ioctl(ctl->fd, CDROM_SELECT_DISC, ctl->c_arg) < 0)
+ err(EXIT_FAILURE, _("CD-ROM select disc command failed"));
+
+#elif defined CDROMLOADFROMSLOT
+ if (ioctl(ctl->fd, CDROMLOADFROMSLOT, ctl->c_arg) != 0)
+ err(EXIT_FAILURE, _("CD-ROM load from slot command failed"));
+#else
+ warnx(_("IDE/ATAPI CD-ROM changer not supported by this kernel\n") );
+#endif
+}
+
+/*
+ * Close tray. Not supported by older kernels.
+ */
+static void close_tray(int fd)
+{
+ int status;
+
+#if defined(CDROMCLOSETRAY) || defined(CDIOCCLOSE)
+#if defined(CDROMCLOSETRAY)
+ status = ioctl(fd, CDROMCLOSETRAY);
+#elif defined(CDIOCCLOSE)
+ status = ioctl(fd, CDIOCCLOSE);
+#endif
+ if (status != 0)
+ err(EXIT_FAILURE, _("CD-ROM tray close command failed"));
+#else
+ warnx(_("CD-ROM tray close command not supported by this kernel\n"));
+#endif
+}
+
+/*
+ * Eject using CDROMEJECT ioctl.
+ */
+static int eject_cdrom(int fd)
+{
+#if defined(CDROMEJECT)
+ int ret = ioctl(fd, CDROM_LOCKDOOR, 0);
+ if (ret < 0)
+ return 0;
+ return ioctl(fd, CDROMEJECT) >= 0;
+#elif defined(CDIOCEJECT)
+ return ioctl(fd, CDIOCEJECT) >= 0;
+#else
+ warnx(_("CD-ROM eject unsupported"));
+ errno = ENOSYS;
+ return 0;
+#endif
+}
+
+/*
+ * Toggle tray.
+ *
+ * Written by Benjamin Schwenk <benjaminschwenk@yahoo.de> and
+ * Sybren Stuvel <sybren@thirdtower.com>
+ *
+ * Not supported by older kernels because it might use
+ * CloseTray().
+ *
+ */
+static void toggle_tray(int fd)
+{
+#ifdef CDROM_DRIVE_STATUS
+ /* First ask the CDROM for info, otherwise fall back to manual. */
+ switch (ioctl(fd, CDROM_DRIVE_STATUS)) {
+ case CDS_TRAY_OPEN:
+ close_tray(fd);
+ return;
+
+ case CDS_NO_DISC:
+ case CDS_DISC_OK:
+ if (!eject_cdrom(fd))
+ err(EXIT_FAILURE, _("CD-ROM eject command failed"));
+ return;
+ case CDS_NO_INFO:
+ warnx(_("no CD-ROM information available"));
+ return;
+ case CDS_DRIVE_NOT_READY:
+ warnx(_("CD-ROM drive is not ready"));
+ return;
+ default:
+ err(EXIT_FAILURE, _("CD-ROM status command failed"));
+ }
+#else
+ struct timeval time_start, time_stop;
+ int time_elapsed;
+
+ /* Try to open the CDROM tray and measure the time therefor
+ * needed. In my experience the function needs less than 0.05
+ * seconds if the tray was already open, and at least 1.5 seconds
+ * if it was closed. */
+ gettime_monotonic(&time_start);
+
+ /* Send the CDROMEJECT command to the device. */
+ if (!eject_cdrom(fd))
+ err(EXIT_FAILURE, _("CD-ROM eject command failed"));
+
+ /* Get the second timestamp, to measure the time needed to open
+ * the tray. */
+ gettime_monotonic(&time_stop);
+
+ time_elapsed = (time_stop.tv_sec * 1000000 + time_stop.tv_usec) -
+ (time_start.tv_sec * 1000000 + time_start.tv_usec);
+
+ /* If the tray "opened" too fast, we can be nearly sure, that it
+ * was already open. In this case, close it now. Else the tray was
+ * closed before. This would mean that we are done. */
+ if (time_elapsed < TRAY_WAS_ALREADY_OPEN_USECS)
+ close_tray(fd);
+#endif
+}
+
+/*
+ * Select Speed of CD-ROM drive.
+ * Thanks to Roland Krivanek (krivanek@fmph.uniba.sk)
+ * http://dmpc.dbp.fmph.uniba.sk/~krivanek/cdrom_speed/
+ */
+static void select_speed(const struct eject_control *ctl)
+{
+#ifdef CDROM_SELECT_SPEED
+ if (ioctl(ctl->fd, CDROM_SELECT_SPEED, ctl->x_arg) != 0)
+ err(EXIT_FAILURE, _("CD-ROM select speed command failed"));
+#else
+ warnx(_("CD-ROM select speed command not supported by this kernel"));
+#endif
+}
+
+/*
+ * Read Speed of CD-ROM drive. From Linux 2.6.13, the current speed
+ * is correctly reported
+ */
+static int read_speed(const char *devname)
+{
+ int drive_number = -1;
+ char *name;
+ FILE *f;
+
+ f = fopen(_PATH_PROC_CDROMINFO, "r");
+ if (!f)
+ err(EXIT_FAILURE, _("cannot open %s"), _PATH_PROC_CDROMINFO);
+
+ name = strrchr(devname, '/') + 1;
+
+ while (name && !feof(f)) {
+ char line[512];
+ char *str;
+
+ if (!fgets(line, sizeof(line), f))
+ break;
+
+ /* find drive number in line "drive name" */
+ if (drive_number == -1) {
+ if (strncmp(line, "drive name:", 11) == 0) {
+ str = strtok(&line[11], "\t ");
+ drive_number = 0;
+ while (str && strncmp(name, str, strlen(name)) != 0) {
+ drive_number++;
+ str = strtok(NULL, "\t ");
+ if (!str)
+ errx(EXIT_FAILURE,
+ _("%s: failed to finding CD-ROM name"),
+ _PATH_PROC_CDROMINFO);
+ }
+ }
+ /* find line "drive speed" and read the correct speed */
+ } else {
+ if (strncmp(line, "drive speed:", 12) == 0) {
+ int i;
+
+ str = strtok(&line[12], "\t ");
+ for (i = 1; i < drive_number; i++)
+ str = strtok(NULL, "\t ");
+
+ if (!str)
+ errx(EXIT_FAILURE,
+ _("%s: failed to read speed"),
+ _PATH_PROC_CDROMINFO);
+ fclose(f);
+ return atoi(str);
+ }
+ }
+ }
+
+ errx(EXIT_FAILURE, _("failed to read speed"));
+}
+
+/*
+ * List Speed of CD-ROM drive.
+ */
+static void list_speeds(struct eject_control *ctl)
+{
+ int max_speed, curr_speed = 0;
+
+ select_speed(ctl);
+ max_speed = read_speed(ctl->device);
+
+ while (curr_speed < max_speed) {
+ ctl->x_arg = curr_speed + 1;
+ select_speed(ctl);
+ curr_speed = read_speed(ctl->device);
+ if (ctl->x_arg < curr_speed)
+ printf("%d ", curr_speed);
+ else
+ curr_speed = ctl->x_arg + 1;
+ }
+
+ printf("\n");
+}
+
+/*
+ * Eject using SCSI SG_IO commands. Return 1 if successful, 0 otherwise.
+ */
+static int eject_scsi(const struct eject_control *ctl)
+{
+ int status, k;
+ sg_io_hdr_t io_hdr;
+ unsigned char allowRmBlk[6] = {ALLOW_MEDIUM_REMOVAL, 0, 0, 0, 0, 0};
+ unsigned char startStop1Blk[6] = {START_STOP, 0, 0, 0, 1, 0};
+ unsigned char startStop2Blk[6] = {START_STOP, 0, 0, 0, 2, 0};
+ unsigned char inqBuff[2];
+ unsigned char sense_buffer[32];
+
+ if ((ioctl(ctl->fd, SG_GET_VERSION_NUM, &k) < 0) || (k < 30000)) {
+ verbose(ctl, _("not an sg device, or old sg driver"));
+ return 0;
+ }
+
+ memset(&io_hdr, 0, sizeof(sg_io_hdr_t));
+ io_hdr.interface_id = 'S';
+ io_hdr.cmd_len = 6;
+ io_hdr.mx_sb_len = sizeof(sense_buffer);
+ io_hdr.dxfer_direction = SG_DXFER_NONE;
+ io_hdr.dxfer_len = 0;
+ io_hdr.dxferp = inqBuff;
+ io_hdr.sbp = sense_buffer;
+ io_hdr.timeout = 10000;
+
+ io_hdr.cmdp = allowRmBlk;
+ status = ioctl(ctl->fd, SG_IO, (void *)&io_hdr);
+ if (status < 0 || io_hdr.host_status || io_hdr.driver_status)
+ return 0;
+
+ io_hdr.cmdp = startStop1Blk;
+ status = ioctl(ctl->fd, SG_IO, (void *)&io_hdr);
+ if (status < 0 || io_hdr.host_status)
+ return 0;
+
+ /* Ignore errors when there is not medium -- in this case driver sense
+ * buffer sets MEDIUM NOT PRESENT (3a) bit. For more details see:
+ * http://www.tldp.org/HOWTO/archived/SCSI-Programming-HOWTO/SCSI-Programming-HOWTO-22.html#sec-sensecodes
+ * -- kzak Jun 2013
+ */
+ if (io_hdr.driver_status != 0 &&
+ !(io_hdr.driver_status == DRIVER_SENSE && io_hdr.sbp &&
+ io_hdr.sbp[12] == 0x3a))
+ return 0;
+
+ io_hdr.cmdp = startStop2Blk;
+ status = ioctl(ctl->fd, SG_IO, (void *)&io_hdr);
+ if (status < 0 || io_hdr.host_status || io_hdr.driver_status)
+ return 0;
+
+ /* force kernel to reread partition table when new disc inserted */
+ ioctl(ctl->fd, BLKRRPART);
+ return 1;
+}
+
+/*
+ * Eject using FDEJECT ioctl. Return 1 if successful, 0 otherwise.
+ */
+static int eject_floppy(int fd)
+{
+ return ioctl(fd, FDEJECT) >= 0;
+}
+
+
+/*
+ * Rewind and eject using tape ioctl. Return 1 if successful, 0 otherwise.
+ */
+static int eject_tape(int fd)
+{
+ struct mtop op = { .mt_op = MTOFFL, .mt_count = 0 };
+
+ return ioctl(fd, MTIOCTOP, &op) >= 0;
+}
+
+
+/* umount a device. */
+static void umount_one(const struct eject_control *ctl, const char *name)
+{
+ int status;
+
+ if (!name)
+ return;
+
+ verbose(ctl, _("%s: unmounting"), name);
+
+ switch (fork()) {
+ case 0: /* child */
+ if (setgid(getgid()) < 0)
+ err(EXIT_FAILURE, _("cannot set group id"));
+
+ if (setuid(getuid()) < 0)
+ err(EXIT_FAILURE, _("cannot set user id"));
+
+ if (ctl->p_option)
+ execl("/bin/umount", "/bin/umount", name, "-n", NULL);
+ else
+ execl("/bin/umount", "/bin/umount", name, NULL);
+
+ errexec("/bin/umount");
+
+ case -1:
+ warn( _("unable to fork"));
+ break;
+
+ default: /* parent */
+ wait(&status);
+ if (WIFEXITED(status) == 0)
+ errx(EXIT_FAILURE,
+ _("unmount of `%s' did not exit normally"), name);
+
+ if (WEXITSTATUS(status) != 0)
+ errx(EXIT_FAILURE, _("unmount of `%s' failed\n"), name);
+ break;
+ }
+}
+
+/* Open a device file. */
+static void open_device(struct eject_control *ctl)
+{
+ ctl->fd = open(ctl->device, O_RDWR | O_NONBLOCK);
+ if (ctl->fd < 0)
+ ctl->fd = open(ctl->device, O_RDONLY | O_NONBLOCK);
+ if (ctl->fd == -1)
+ err(EXIT_FAILURE, _("cannot open %s"), ctl->device);
+}
+
+/*
+ * See if device has been mounted by looking in mount table. If so, set
+ * device name and mount point name, and return 1, otherwise return 0.
+ */
+static int device_get_mountpoint(struct eject_control *ctl, char **devname, char **mnt)
+{
+ struct libmnt_fs *fs;
+ int rc;
+
+ *mnt = NULL;
+
+ if (!ctl->mtab) {
+ struct libmnt_cache *cache;
+
+ ctl->mtab = mnt_new_table();
+ if (!ctl->mtab)
+ err(EXIT_FAILURE, _("failed to initialize libmount table"));
+
+ cache = mnt_new_cache();
+ mnt_table_set_cache(ctl->mtab, cache);
+ mnt_unref_cache(cache);
+
+ if (ctl->p_option)
+ rc = mnt_table_parse_file(ctl->mtab, _PATH_PROC_MOUNTINFO);
+ else
+ rc = mnt_table_parse_mtab(ctl->mtab, NULL);
+ if (rc)
+ err(EXIT_FAILURE, _("failed to parse mount table"));
+ }
+
+ fs = mnt_table_find_source(ctl->mtab, *devname, MNT_ITER_BACKWARD);
+ if (!fs) {
+ /* maybe 'devname' is mountpoint rather than a real device */
+ fs = mnt_table_find_target(ctl->mtab, *devname, MNT_ITER_BACKWARD);
+ if (fs) {
+ free(*devname);
+ *devname = xstrdup(mnt_fs_get_source(fs));
+ }
+ }
+
+ if (fs)
+ *mnt = xstrdup(mnt_fs_get_target(fs));
+ return *mnt ? 0 : -1;
+}
+
+static char *get_disk_devname(const char *device)
+{
+ struct stat st;
+ dev_t diskno = 0;
+ char diskname[128];
+
+ if (stat(device, &st) != 0)
+ return NULL;
+
+ /* get whole-disk devno */
+ if (sysfs_devno_to_wholedisk(st.st_rdev, diskname,
+ sizeof(diskname), &diskno) != 0)
+ return NULL;
+
+ return st.st_rdev == diskno ? NULL : find_device(diskname);
+}
+
+static int umount_partitions(struct eject_control *ctl)
+{
+ struct path_cxt *pc = NULL;
+ dev_t devno;
+ DIR *dir = NULL;
+ struct dirent *d;
+ int count = 0;
+
+ devno = sysfs_devname_to_devno(ctl->device);
+ if (devno)
+ pc = ul_new_sysfs_path(devno, NULL, NULL);
+ if (!pc)
+ return 0;
+
+ /* open /sys/block/<wholedisk> */
+ if (!(dir = ul_path_opendir(pc, NULL)))
+ goto done;
+
+ /* scan for partition subdirs */
+ while ((d = readdir(dir))) {
+ if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
+ continue;
+
+ if (sysfs_blkdev_is_partition_dirent(dir, d, ctl->device)) {
+ char *mnt = NULL;
+ char *dev = find_device(d->d_name);
+
+ if (dev && device_get_mountpoint(ctl, &dev, &mnt) == 0) {
+ verbose(ctl, _("%s: mounted on %s"), dev, mnt);
+ if (!ctl->M_option)
+ umount_one(ctl, mnt);
+ count++;
+ }
+ free(dev);
+ free(mnt);
+ }
+ }
+
+done:
+ if (dir)
+ closedir(dir);
+ ul_unref_path(pc);
+
+ return count;
+}
+
+static int is_hotpluggable(const struct eject_control *ctl)
+{
+ struct path_cxt *pc = NULL;
+ dev_t devno;
+ int rc = 0;
+
+ devno = sysfs_devname_to_devno(ctl->device);
+ if (devno)
+ pc = ul_new_sysfs_path(devno, NULL, NULL);
+ if (!pc)
+ return 0;
+
+ rc = sysfs_blkdev_is_hotpluggable(pc);
+ ul_unref_path(pc);
+ return rc;
+}
+
+
+/* handle -x option */
+static void set_device_speed(struct eject_control *ctl)
+{
+ if (!ctl->x_option)
+ return;
+
+ if (ctl->x_arg == 0)
+ verbose(ctl, _("setting CD-ROM speed to auto"));
+ else
+ verbose(ctl, _("setting CD-ROM speed to %ldX"), ctl->x_arg);
+
+ open_device(ctl);
+ select_speed(ctl);
+ exit(EXIT_SUCCESS);
+}
+
+
+/* main program */
+int main(int argc, char **argv)
+{
+ char *disk = NULL;
+ char *mountpoint = NULL;
+ int worked = 0; /* set to 1 when successfully ejected */
+ struct eject_control ctl = { NULL };
+
+ setlocale(LC_ALL,"");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ /* parse the command line arguments */
+ parse_args(&ctl, argc, argv);
+
+ /* handle -d option */
+ if (ctl.d_option) {
+ info(_("default device: `%s'"), EJECT_DEFAULT_DEVICE);
+ return EXIT_SUCCESS;
+ }
+
+ if (!ctl.device) {
+ ctl.device = mnt_resolve_path(EJECT_DEFAULT_DEVICE, NULL);
+ verbose(&ctl, _("using default device `%s'"), ctl.device);
+ } else {
+ char *p;
+
+ if (ctl.device[strlen(ctl.device) - 1] == '/')
+ ctl.device[strlen(ctl.device) - 1] = '\0';
+
+ /* figure out full device or mount point name */
+ p = find_device(ctl.device);
+ if (p)
+ free(ctl.device);
+ else
+ p = ctl.device;
+
+ ctl.device = mnt_resolve_spec(p, NULL);
+ free(p);
+ }
+
+ if (!ctl.device)
+ errx(EXIT_FAILURE, _("%s: unable to find device"), ctl.device);
+
+ verbose(&ctl, _("device name is `%s'"), ctl.device);
+
+ device_get_mountpoint(&ctl, &ctl.device, &mountpoint);
+ if (mountpoint)
+ verbose(&ctl, _("%s: mounted on %s"), ctl.device, mountpoint);
+ else
+ verbose(&ctl, _("%s: not mounted"), ctl.device);
+
+ disk = get_disk_devname(ctl.device);
+ if (disk) {
+ verbose(&ctl, _("%s: disc device: %s (disk device will be used for eject)"), ctl.device, disk);
+ free(ctl.device);
+ ctl.device = disk;
+ disk = NULL;
+ } else {
+ struct stat st;
+
+ if (stat(ctl.device, &st) != 0 || !S_ISBLK(st.st_mode))
+ errx(EXIT_FAILURE, _("%s: not found mountpoint or device "
+ "with the given name"), ctl.device);
+
+ verbose(&ctl, _("%s: is whole-disk device"), ctl.device);
+ }
+
+ if (ctl.F_option == 0 && is_hotpluggable(&ctl) == 0)
+ errx(EXIT_FAILURE, _("%s: is not hot-pluggable device"), ctl.device);
+
+ /* handle -n option */
+ if (ctl.n_option) {
+ info(_("device is `%s'"), ctl.device);
+ verbose(&ctl, _("exiting due to -n/--noop option"));
+ return EXIT_SUCCESS;
+ }
+
+ /* handle -i option */
+ if (ctl.i_option) {
+ open_device(&ctl);
+ manual_eject(&ctl);
+ return EXIT_SUCCESS;
+ }
+
+ /* handle -a option */
+ if (ctl.a_option) {
+ if (ctl.a_arg)
+ verbose(&ctl, _("%s: enabling auto-eject mode"), ctl.device);
+ else
+ verbose(&ctl, _("%s: disabling auto-eject mode"), ctl.device);
+ open_device(&ctl);
+ auto_eject(&ctl);
+ return EXIT_SUCCESS;
+ }
+
+ /* handle -t option */
+ if (ctl.t_option) {
+ verbose(&ctl, _("%s: closing tray"), ctl.device);
+ open_device(&ctl);
+ close_tray(ctl.fd);
+ set_device_speed(&ctl);
+ return EXIT_SUCCESS;
+ }
+
+ /* handle -T option */
+ if (ctl.T_option) {
+ verbose(&ctl, _("%s: toggling tray"), ctl.device);
+ open_device(&ctl);
+ toggle_tray(ctl.fd);
+ set_device_speed(&ctl);
+ return EXIT_SUCCESS;
+ }
+
+ /* handle -X option */
+ if (ctl.X_option) {
+ verbose(&ctl, _("%s: listing CD-ROM speed"), ctl.device);
+ open_device(&ctl);
+ list_speeds(&ctl);
+ return EXIT_SUCCESS;
+ }
+
+ /* handle -x option only */
+ if (!ctl.c_option)
+ set_device_speed(&ctl);
+
+
+ /*
+ * Unmount all partitions if -m is not specified; or umount given
+ * mountpoint if -M is specified, otherwise print error of another
+ * partition is mounted.
+ */
+ if (!ctl.m_option) {
+ int ct = umount_partitions(&ctl);
+
+ if (ct == 0 && mountpoint)
+ umount_one(&ctl, mountpoint); /* probably whole-device */
+
+ if (ctl.M_option) {
+ if (ct == 1 && mountpoint)
+ umount_one(&ctl, mountpoint);
+ else if (ct)
+ errx(EXIT_FAILURE, _("error: %s: device in use"), ctl.device);
+ }
+ }
+
+ /* handle -c option */
+ if (ctl.c_option) {
+ verbose(&ctl, _("%s: selecting CD-ROM disc #%ld"), ctl.device, ctl.c_arg);
+ open_device(&ctl);
+ changer_select(&ctl);
+ set_device_speed(&ctl);
+ return EXIT_SUCCESS;
+ }
+
+ /* if user did not specify type of eject, try all four methods */
+ if (ctl.r_option + ctl.s_option + ctl.f_option + ctl.q_option == 0)
+ ctl.r_option = ctl.s_option = ctl.f_option = ctl.q_option = 1;
+
+ /* open device */
+ open_device(&ctl);
+
+ /* try various methods of ejecting until it works */
+ if (ctl.r_option) {
+ verbose(&ctl, _("%s: trying to eject using CD-ROM eject command"), ctl.device);
+ worked = eject_cdrom(ctl.fd);
+ verbose(&ctl, worked ? _("CD-ROM eject command succeeded") :
+ _("CD-ROM eject command failed"));
+ }
+
+ if (ctl.s_option && !worked) {
+ verbose(&ctl, _("%s: trying to eject using SCSI commands"), ctl.device);
+ worked = eject_scsi(&ctl);
+ verbose(&ctl, worked ? _("SCSI eject succeeded") :
+ _("SCSI eject failed"));
+ }
+
+ if (ctl.f_option && !worked) {
+ verbose(&ctl, _("%s: trying to eject using floppy eject command"), ctl.device);
+ worked = eject_floppy(ctl.fd);
+ verbose(&ctl, worked ? _("floppy eject command succeeded") :
+ _("floppy eject command failed"));
+ }
+
+ if (ctl.q_option && !worked) {
+ verbose(&ctl, _("%s: trying to eject using tape offline command"), ctl.device);
+ worked = eject_tape(ctl.fd);
+ verbose(&ctl, worked ? _("tape offline command succeeded") :
+ _("tape offline command failed"));
+ }
+
+ if (!worked)
+ errx(EXIT_FAILURE, _("unable to eject"));
+
+ /* cleanup */
+ close(ctl.fd);
+ free(ctl.device);
+ free(mountpoint);
+
+ mnt_unref_table(ctl.mtab);
+
+ return EXIT_SUCCESS;
+}
diff --git a/sys-utils/fallocate.1 b/sys-utils/fallocate.1
new file mode 100644
index 0000000..fe5072a
--- /dev/null
+++ b/sys-utils/fallocate.1
@@ -0,0 +1,191 @@
+.TH FALLOCATE 1 "April 2014" "util-linux" "User Commands"
+.SH NAME
+fallocate \- preallocate or deallocate space to a file
+.SH SYNOPSIS
+.B fallocate
+.RB [ \-c | \-p | \-z ]
+.RB [ \-o
+.IR offset ]
+.B \-l
+.I length
+.RB [ \-n ]
+.I filename
+.PP
+.B fallocate \-d
+.RB [ \-o
+.IR offset ]
+.RB [ \-l
+.IR length ]
+.I filename
+.PP
+.B fallocate \-x
+.RB [ \-o
+.IR offset ]
+.B \-l
+.I length
+.I filename
+.SH DESCRIPTION
+.B fallocate
+is used to manipulate the allocated disk space for a file,
+either to deallocate or preallocate it.
+For filesystems which support the fallocate system call,
+preallocation is done quickly by allocating blocks and marking them as
+uninitialized, requiring no IO to the data blocks.
+This is much faster than creating a file by filling it with zeroes.
+.PP
+The exit code returned by
+.B fallocate
+is 0 on success and 1 on failure.
+.SH OPTIONS
+The
+.I length
+and
+.I offset
+arguments may be followed by the multiplicative suffixes KiB (=1024),
+MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB, and YiB (the "iB" is
+optional, e.g., "K" has the same meaning as "KiB") or the suffixes
+KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB, and YB.
+.PP
+The options
+.BR \-\-collapse\-range ", " \-\-dig\-holes ", " \-\-punch\-hole ,
+and
+.B \-\-zero\-range
+are mutually exclusive.
+.TP
+.BR \-c ", " \-\-collapse\-range
+Removes a byte range from a file, without leaving a hole.
+The byte range to be collapsed starts at
+.I offset
+and continues for
+.I length
+bytes.
+At the completion of the operation,
+the contents of the file starting at the location
+.IR offset + length
+will be appended at the location
+.IR offset ,
+and the file will be
+.I length
+bytes smaller.
+The option
+.B \-\-keep\-size
+may not be specified for the collapse-range operation.
+.sp
+Available since Linux 3.15 for ext4 (only for extent-based files) and XFS.
+.sp
+A filesystem may place limitations on the granularity of the operation, in
+order to ensure efficient implementation. Typically, offset and len must be a
+multiple of the filesystem logical block size, which varies according to the
+filesystem type and configuration. If a filesystem has such a requirement,
+the operation will fail with the error EINVAL if this requirement is violated.
+.TP
+.BR \-d ", " \-\-dig\-holes
+Detect and dig holes.
+This makes the file sparse in-place, without using extra disk space.
+The minimum size of the hole depends on filesystem I/O block size
+(usually 4096 bytes).
+Also, when using this option,
+.B \-\-keep\-size
+is implied. If no range is specified by
+.B \-\-offset
+and
+.BR \-\-length ,
+then the entire file is analyzed for holes.
+.sp
+You can think of this option as doing a
+.RB """" "cp \-\-sparse" """"
+and then renaming the destination file to the original,
+without the need for extra disk space.
+.sp
+See \fB\-\-punch\-hole\fP for a list of supported filesystems.
+.TP
+.BR \-i ", " \-\-insert\-range
+Insert a hole of
+.I length
+bytes from
+.IR offset ,
+shifting existing data.
+.TP
+.BR \-l ", " "\-\-length " \fIlength
+Specifies the length of the range, in bytes.
+.TP
+.BR \-n ", " \-\-keep\-size
+Do not modify the apparent length of the file. This may effectively allocate
+blocks past EOF, which can be removed with a truncate.
+.TP
+.BR \-o ", " "\-\-offset " \fIoffset
+Specifies the beginning offset of the range, in bytes.
+.TP
+.BR \-p ", " \-\-punch\-hole
+Deallocates space (i.e., creates a hole) in the byte range starting at
+.I offset
+and continuing for
+.I length
+bytes.
+Within the specified range, partial filesystem blocks are zeroed,
+and whole filesystem blocks are removed from the file.
+After a successful call,
+subsequent reads from this range will return zeroes.
+This option may not be specified at the same time as the
+.B \-\-zero\-range
+option.
+Also, when using this option,
+.B \-\-keep\-size
+is implied.
+.sp
+Supported for XFS (since Linux 2.6.38), ext4 (since Linux 3.0),
+Btrfs (since Linux 3.7) and tmpfs (since Linux 3.5).
+.TP
+.BR \-v ", " \-\-verbose
+Enable verbose mode.
+.TP
+.BR \-x ", " \-\-posix
+Enable POSIX operation mode.
+In that mode allocation operation always completes,
+but it may take longer time when fast allocation is not supported by
+the underlying filesystem.
+.TP
+.BR \-z ", " \-\-zero\-range
+Zeroes space in the byte range starting at
+.I offset
+and continuing for
+.I length
+bytes.
+Within the specified range, blocks are preallocated for the regions
+that span the holes in the file.
+After a successful call,
+subsequent reads from this range will return zeroes.
+.sp
+Zeroing is done within the filesystem preferably by converting the
+range into unwritten extents. This approach means that the specified
+range will not be physically zeroed out on the device (except for
+partial blocks at the either end of the range), and I/O is
+(otherwise) required only to update metadata.
+.sp
+Option \fB\-\-keep\-size\fP can be specified to prevent file length
+modification.
+.sp
+Available since Linux 3.14 for ext4 (only for extent-based files) and XFS.
+.TP
+.BR \-V ", " \-\-version
+Display version information and exit.
+.TP
+.BR \-h ", " \-\-help
+Display help text and exit.
+.SH AUTHORS
+.MT sandeen@redhat.com
+Eric Sandeen
+.ME
+.br
+.MT kzak@redhat.com
+Karel Zak
+.ME
+.SH SEE ALSO
+.BR truncate (1),
+.BR fallocate (2),
+.BR posix_fallocate (3)
+.SH AVAILABILITY
+The fallocate command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/fallocate.c b/sys-utils/fallocate.c
new file mode 100644
index 0000000..ba3867c
--- /dev/null
+++ b/sys-utils/fallocate.c
@@ -0,0 +1,412 @@
+/*
+ * fallocate - utility to use the fallocate system call
+ *
+ * Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved.
+ * Written by Eric Sandeen <sandeen@redhat.com>
+ * Karel Zak <kzak@redhat.com>
+ *
+ * cvtnum routine taken from xfsprogs,
+ * Copyright (c) 2003-2005 Silicon Graphics, Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <limits.h>
+#include <string.h>
+
+#ifndef HAVE_FALLOCATE
+# include <sys/syscall.h>
+#endif
+
+#if defined(HAVE_LINUX_FALLOC_H) && \
+ (!defined(FALLOC_FL_KEEP_SIZE) || !defined(FALLOC_FL_PUNCH_HOLE) || \
+ !defined(FALLOC_FL_COLLAPSE_RANGE) || !defined(FALLOC_FL_ZERO_RANGE) || \
+ !defined(FALLOC_FL_INSERT_RANGE))
+# include <linux/falloc.h> /* non-libc fallback for FALLOC_FL_* flags */
+#endif
+
+
+#ifndef FALLOC_FL_KEEP_SIZE
+# define FALLOC_FL_KEEP_SIZE 0x1
+#endif
+
+#ifndef FALLOC_FL_PUNCH_HOLE
+# define FALLOC_FL_PUNCH_HOLE 0x2
+#endif
+
+#ifndef FALLOC_FL_COLLAPSE_RANGE
+# define FALLOC_FL_COLLAPSE_RANGE 0x8
+#endif
+
+#ifndef FALLOC_FL_ZERO_RANGE
+# define FALLOC_FL_ZERO_RANGE 0x10
+#endif
+
+#ifndef FALLOC_FL_INSERT_RANGE
+# define FALLOC_FL_INSERT_RANGE 0x20
+#endif
+
+#include "nls.h"
+#include "strutils.h"
+#include "c.h"
+#include "closestream.h"
+#include "xalloc.h"
+#include "optutils.h"
+
+static int verbose;
+static char *filename;
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ fputs(USAGE_HEADER, out);
+ fprintf(out,
+ _(" %s [options] <filename>\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Preallocate space to, or deallocate space from a file.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -c, --collapse-range remove a range from the file\n"), out);
+ fputs(_(" -d, --dig-holes detect zeroes and replace with holes\n"), out);
+ fputs(_(" -i, --insert-range insert a hole at range, shifting existing data\n"), out);
+ fputs(_(" -l, --length <num> length for range operations, in bytes\n"), out);
+ fputs(_(" -n, --keep-size maintain the apparent size of the file\n"), out);
+ fputs(_(" -o, --offset <num> offset for range operations, in bytes\n"), out);
+ fputs(_(" -p, --punch-hole replace a range with a hole (implies -n)\n"), out);
+ fputs(_(" -z, --zero-range zero and ensure allocation of a range\n"), out);
+#ifdef HAVE_POSIX_FALLOCATE
+ fputs(_(" -x, --posix use posix_fallocate(3) instead of fallocate(2)\n"), out);
+#endif
+ fputs(_(" -v, --verbose verbose mode\n"), out);
+
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(22));
+
+ printf(USAGE_MAN_TAIL("fallocate(1)"));
+
+ exit(EXIT_SUCCESS);
+}
+
+static loff_t cvtnum(char *s)
+{
+ uintmax_t x;
+
+ if (strtosize(s, &x))
+ return -1LL;
+
+ return x;
+}
+
+static void xfallocate(int fd, int mode, off_t offset, off_t length)
+{
+ int error;
+
+#ifdef HAVE_FALLOCATE
+ error = fallocate(fd, mode, offset, length);
+#else
+ error = syscall(SYS_fallocate, fd, mode, offset, length);
+#endif
+ /*
+ * EOPNOTSUPP: The FALLOC_FL_KEEP_SIZE is unsupported
+ * ENOSYS: The filesystem does not support sys_fallocate
+ */
+ if (error < 0) {
+ if ((mode & FALLOC_FL_KEEP_SIZE) && errno == EOPNOTSUPP)
+ errx(EXIT_FAILURE, _("fallocate failed: keep size mode is unsupported"));
+ err(EXIT_FAILURE, _("fallocate failed"));
+ }
+}
+
+#ifdef HAVE_POSIX_FALLOCATE
+static void xposix_fallocate(int fd, off_t offset, off_t length)
+{
+ int error = posix_fallocate(fd, offset, length);
+ if (error < 0) {
+ err(EXIT_FAILURE, _("fallocate failed"));
+ }
+}
+#endif
+
+/* The real buffer size has to be bufsize + sizeof(uintptr_t) */
+static int is_nul(void *buf, size_t bufsize)
+{
+ typedef uintptr_t word;
+ void const *vp;
+ char const *cbuf = buf, *cp;
+ word const *wp = buf;
+
+ /* set sentinel */
+ memset((char *) buf + bufsize, '\1', sizeof(word));
+
+ /* Find first nonzero *word*, or the word with the sentinel. */
+ while (*wp++ == 0)
+ continue;
+
+ /* Find the first nonzero *byte*, or the sentinel. */
+ vp = wp - 1;
+ cp = vp;
+
+ while (*cp++ == 0)
+ continue;
+
+ return cbuf + bufsize < cp;
+}
+
+static void dig_holes(int fd, off_t file_off, off_t len)
+{
+ off_t file_end = len ? file_off + len : 0;
+ off_t hole_start = 0, hole_sz = 0;
+ uintmax_t ct = 0;
+ size_t bufsz;
+ char *buf;
+ struct stat st;
+#if defined(POSIX_FADV_SEQUENTIAL) && defined(HAVE_POSIX_FADVISE)
+ off_t cache_start = file_off;
+ /*
+ * We don't want to call POSIX_FADV_DONTNEED to discard cached
+ * data in PAGE_SIZE steps. IMHO it's overkill (too many syscalls).
+ *
+ * Let's assume that 1MiB (on system with 4K page size) is just
+ * a good compromise.
+ * -- kzak Feb-2014
+ */
+ const size_t cachesz = getpagesize() * 256;
+#endif
+
+ if (fstat(fd, &st) != 0)
+ err(EXIT_FAILURE, _("stat of %s failed"), filename);
+
+ bufsz = st.st_blksize;
+
+ if (lseek(fd, file_off, SEEK_SET) < 0)
+ err(EXIT_FAILURE, _("seek on %s failed"), filename);
+
+ /* buffer + extra space for is_nul() sentinel */
+ buf = xmalloc(bufsz + sizeof(uintptr_t));
+ while (file_end == 0 || file_off < file_end) {
+ /*
+ * Detect data area (skip holes)
+ */
+ off_t end, off;
+
+ off = lseek(fd, file_off, SEEK_DATA);
+ if ((off == -1 && errno == ENXIO) ||
+ (file_end && off >= file_end))
+ break;
+
+ end = lseek(fd, off, SEEK_HOLE);
+ if (file_end && end > file_end)
+ end = file_end;
+
+#if defined(POSIX_FADV_SEQUENTIAL) && defined(HAVE_POSIX_FADVISE)
+ posix_fadvise(fd, off, end, POSIX_FADV_SEQUENTIAL);
+#endif
+ /*
+ * Dig holes in the area
+ */
+ while (off < end) {
+ ssize_t rsz = pread(fd, buf, bufsz, off);
+ if (rsz < 0 && errno)
+ err(EXIT_FAILURE, _("%s: read failed"), filename);
+ if (end && rsz > 0 && off > end - rsz)
+ rsz = end - off;
+ if (rsz <= 0)
+ break;
+
+ if (is_nul(buf, rsz)) {
+ if (!hole_sz) /* new hole detected */
+ hole_start = off;
+ hole_sz += rsz;
+ } else if (hole_sz) {
+ xfallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE,
+ hole_start, hole_sz);
+ ct += hole_sz;
+ hole_sz = hole_start = 0;
+ }
+
+#if defined(POSIX_FADV_DONTNEED) && defined(HAVE_POSIX_FADVISE)
+ /* discard cached data */
+ if (off - cache_start > (off_t) cachesz) {
+ size_t clen = off - cache_start;
+
+ clen = (clen / cachesz) * cachesz;
+ posix_fadvise(fd, cache_start, clen, POSIX_FADV_DONTNEED);
+ cache_start = cache_start + clen;
+ }
+#endif
+ off += rsz;
+ }
+ if (hole_sz) {
+ xfallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE,
+ hole_start, hole_sz);
+ ct += hole_sz;
+ }
+ file_off = off;
+ }
+
+ free(buf);
+
+ if (verbose) {
+ char *str = size_to_human_string(SIZE_SUFFIX_3LETTER | SIZE_SUFFIX_SPACE, ct);
+ fprintf(stdout, _("%s: %s (%ju bytes) converted to sparse holes.\n"),
+ filename, str, ct);
+ free(str);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ int c;
+ int fd;
+ int mode = 0;
+ int dig = 0;
+ int posix = 0;
+ loff_t length = -2LL;
+ loff_t offset = 0;
+
+ static const struct option longopts[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, 'V' },
+ { "keep-size", no_argument, NULL, 'n' },
+ { "punch-hole", no_argument, NULL, 'p' },
+ { "collapse-range", no_argument, NULL, 'c' },
+ { "dig-holes", no_argument, NULL, 'd' },
+ { "insert-range", no_argument, NULL, 'i' },
+ { "zero-range", no_argument, NULL, 'z' },
+ { "offset", required_argument, NULL, 'o' },
+ { "length", required_argument, NULL, 'l' },
+ { "posix", no_argument, NULL, 'x' },
+ { "verbose", no_argument, NULL, 'v' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ static const ul_excl_t excl[] = { /* rows and cols in ASCII order */
+ { 'c', 'd', 'p', 'z' },
+ { 'c', 'n' },
+ { 'x', 'c', 'd', 'i', 'n', 'p', 'z'},
+ { 0 }
+ };
+ int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ while ((c = getopt_long(argc, argv, "hvVncpdizxl:o:", longopts, NULL))
+ != -1) {
+
+ err_exclusive_options(c, longopts, excl, excl_st);
+
+ switch(c) {
+ case 'h':
+ usage();
+ break;
+ case 'c':
+ mode |= FALLOC_FL_COLLAPSE_RANGE;
+ break;
+ case 'd':
+ dig = 1;
+ break;
+ case 'i':
+ mode |= FALLOC_FL_INSERT_RANGE;
+ break;
+ case 'l':
+ length = cvtnum(optarg);
+ break;
+ case 'n':
+ mode |= FALLOC_FL_KEEP_SIZE;
+ break;
+ case 'o':
+ offset = cvtnum(optarg);
+ break;
+ case 'p':
+ mode |= FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE;
+ break;
+ case 'z':
+ mode |= FALLOC_FL_ZERO_RANGE;
+ break;
+ case 'x':
+#ifdef HAVE_POSIX_FALLOCATE
+ posix = 1;
+ break;
+#else
+ errx(EXIT_FAILURE, _("posix_fallocate support is not compiled"));
+#endif
+ case 'v':
+ verbose++;
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if (optind == argc)
+ errx(EXIT_FAILURE, _("no filename specified"));
+
+ filename = argv[optind++];
+
+ if (optind != argc)
+ errx(EXIT_FAILURE, _("unexpected number of arguments"));
+
+ if (dig) {
+ /* for --dig-holes the default is analyze all file */
+ if (length == -2LL)
+ length = 0;
+ if (length < 0)
+ errx(EXIT_FAILURE, _("invalid length value specified"));
+ } else {
+ /* it's safer to require the range specification (--length --offset) */
+ if (length == -2LL)
+ errx(EXIT_FAILURE, _("no length argument specified"));
+ if (length <= 0)
+ errx(EXIT_FAILURE, _("invalid length value specified"));
+ }
+ if (offset < 0)
+ errx(EXIT_FAILURE, _("invalid offset value specified"));
+
+ /* O_CREAT makes sense only for the default fallocate(2) behavior
+ * when mode is no specified and new space is allocated */
+ fd = open(filename, O_RDWR | (!dig && !mode ? O_CREAT : 0),
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH);
+ if (fd < 0)
+ err(EXIT_FAILURE, _("cannot open %s"), filename);
+
+ if (dig)
+ dig_holes(fd, offset, length);
+#ifdef HAVE_POSIX_FALLOCATE
+ else if (posix)
+ xposix_fallocate(fd, offset, length);
+#endif
+ else
+ xfallocate(fd, mode, offset, length);
+
+ if (close_fd(fd) != 0)
+ err(EXIT_FAILURE, _("write failed: %s"), filename);
+
+ return EXIT_SUCCESS;
+}
diff --git a/sys-utils/flock.1 b/sys-utils/flock.1
new file mode 100644
index 0000000..5b1d635
--- /dev/null
+++ b/sys-utils/flock.1
@@ -0,0 +1,197 @@
+.\" -----------------------------------------------------------------------
+.\"
+.\" Copyright 2003-2006 H. Peter Anvin - All Rights Reserved
+.\"
+.\" Permission is hereby granted, free of charge, to any person
+.\" obtaining a copy of this software and associated documentation
+.\" files (the "Software"), to deal in the Software without
+.\" restriction, including without limitation the rights to use,
+.\" copy, modify, merge, publish, distribute, sublicense, and/or
+.\" sell copies of the Software, and to permit persons to whom
+.\" the Software is furnished to do so, subject to the following
+.\" conditions:
+.\"
+.\" The above copyright notice and this permission notice shall
+.\" be included in all copies or substantial portions of the Software.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+.\" EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+.\" OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+.\" NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+.\" HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+.\" WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+.\" FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+.\" OTHER DEALINGS IN THE SOFTWARE.
+.\"
+.\" -----------------------------------------------------------------------
+.TH FLOCK 1 "July 2014" "util-linux" "User Commands"
+.SH NAME
+flock \- manage locks from shell scripts
+.SH SYNOPSIS
+.B flock
+[options]
+.IR file | "directory command " [ arguments ]
+.br
+.B flock
+[options]
+.IR file | directory
+.BI \-c " command"
+.br
+.B flock
+.RI [options] " number"
+.SH DESCRIPTION
+.PP
+This utility manages
+.BR flock (2)
+locks from within shell scripts or from the command line.
+.PP
+The first and second of the above forms wrap the lock around the execution of a
+.IR command ,
+in a manner similar to
+.BR su (1)
+or
+.BR newgrp (1).
+They lock a specified \fIfile\fR or \fIdirectory\fR, which is created (assuming
+appropriate permissions) if it does not already exist. By default, if the
+lock cannot be immediately acquired,
+.B flock
+waits until the lock is available.
+.PP
+The third form uses an open file by its file descriptor \fInumber\fR.
+See the examples below for how that can be used.
+.SH OPTIONS
+.TP
+.BR \-c , " \-\-command " \fIcommand
+Pass a single \fIcommand\fR, without arguments, to the shell with
+.BR \-c .
+.TP
+.BR \-E , " \-\-conflict\-exit\-code " \fInumber
+The exit code used when the \fB\-n\fP option is in use, and the
+conflicting lock exists, or the \fB\-w\fP option is in use,
+and the timeout is reached. The default value is \fB1\fR.
+.TP
+.BR \-F , " \-\-no\-fork"
+Do not fork before executing
+.IR command .
+Upon execution the flock process is replaced by
+.I command
+which continues to hold the lock. This option is incompatible with
+\fB\-\-close\fR as there would otherwise be nothing left to hold the lock.
+.TP
+.BR \-e , " \-x" , " \-\-exclusive"
+Obtain an exclusive lock, sometimes called a write lock. This is the
+default.
+.TP
+.BR \-n , " \-\-nb" , " \-\-nonblock"
+Fail rather than wait if the lock cannot be
+immediately acquired.
+See the
+.B \-E
+option for the exit code used.
+.TP
+.BR \-o , " \-\-close"
+Close the file descriptor on which the lock is held before executing
+.IR command .
+This is useful if
+.I command
+spawns a child process which should not be holding the lock.
+.TP
+.BR \-s , " \-\-shared"
+Obtain a shared lock, sometimes called a read lock.
+.TP
+.BR \-u , " \-\-unlock"
+Drop a lock. This is usually not required, since a lock is automatically
+dropped when the file is closed. However, it may be required in special
+cases, for example if the enclosed command group may have forked a background
+process which should not be holding the lock.
+.TP
+.BR \-w , " \-\-wait" , " \-\-timeout " \fIseconds
+Fail if the lock cannot be acquired within
+.IR seconds .
+Decimal fractional values are allowed.
+See the
+.B \-E
+option for the exit code used. The zero number of
+.I seconds
+is interpreted as \fB\-\-nonblock\fR.
+.TP
+.B \-\-verbose
+Report how long it took to acquire the lock, or why the lock could not be
+obtained.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH EXAMPLES
+.TP
+shell1> flock /tmp -c cat
+.TQ
+shell2> flock -w .007 /tmp -c echo; /bin/echo $?
+Set exclusive lock to directory /tmp and the second command will fail.
+.TP
+shell1> flock -s /tmp -c cat
+.TQ
+shell2> flock -s -w .007 /tmp -c echo; /bin/echo $?
+Set shared lock to directory /tmp and the second command will not fail.
+Notice that attempting to get exclusive lock with second command would fail.
+.TP
+shell> flock -x local-lock-file echo 'a b c'
+Grab the exclusive lock "local-lock-file" before running echo with 'a b c'.
+.TP
+(
+.TQ
+ flock -n 9 || exit 1
+.TQ
+ # ... commands executed under lock ...
+.TQ
+) 9>/var/lock/mylockfile
+The form is convenient inside shell scripts. The mode used to open the file
+doesn't matter to
+.BR flock ;
+using
+.I >
+or
+.I >>
+allows the lockfile to be created if it does not already exist, however,
+write permission is required. Using
+.I <
+requires that the file already exists but only read permission is required.
+.TP
+[ "${FLOCKER}" != "$0" ] && exec env FLOCKER="$0" flock -en "$0" "$0" "$@" || :
+This is useful boilerplate code for shell scripts. Put it at the top of the
+shell script you want to lock and it'll automatically lock itself on the first
+run. If the env var $FLOCKER is not set to the shell script that is being run,
+then execute flock and grab an exclusive non-blocking lock (using the script
+itself as the lock file) before re-execing itself with the right arguments. It
+also sets the FLOCKER env var to the right value so it doesn't run again.
+.SH "EXIT STATUS"
+The command uses
+.B sysexits.h
+return values for everything, except when using either of the options
+.B \-n
+or
+.B \-w
+which report a failure to acquire the lock with a return value given by the
+.B \-E
+option, or 1 by default.
+.PP
+When using the \fIcommand\fR variant, and executing the child worked, then
+the exit status is that of the child command.
+.SH AUTHOR
+.UR hpa@zytor.com
+H. Peter Anvin
+.UE
+.SH COPYRIGHT
+Copyright \(co 2003\-2006 H. Peter Anvin.
+.br
+This is free software; see the source for copying conditions. There is NO
+warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+.SH "SEE ALSO"
+.BR flock (2)
+.SH AVAILABILITY
+The flock command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/flock.c b/sys-utils/flock.c
new file mode 100644
index 0000000..ed25230
--- /dev/null
+++ b/sys-utils/flock.c
@@ -0,0 +1,380 @@
+/* Copyright 2003-2005 H. Peter Anvin - All Rights Reserved
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall
+ * be included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <paths.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sysexits.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "c.h"
+#include "nls.h"
+#include "strutils.h"
+#include "closestream.h"
+#include "monotonic.h"
+#include "timer.h"
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ fputs(USAGE_HEADER, stdout);
+ printf(
+ _(" %1$s [options] <file>|<directory> <command> [<argument>...]\n"
+ " %1$s [options] <file>|<directory> -c <command>\n"
+ " %1$s [options] <file descriptor number>\n"),
+ program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, stdout);
+ fputs(_("Manage file locks from shell scripts.\n"), stdout);
+
+ fputs(USAGE_OPTIONS, stdout);
+ fputs(_( " -s, --shared get a shared lock\n"), stdout);
+ fputs(_( " -x, --exclusive get an exclusive lock (default)\n"), stdout);
+ fputs(_( " -u, --unlock remove a lock\n"), stdout);
+ fputs(_( " -n, --nonblock fail rather than wait\n"), stdout);
+ fputs(_( " -w, --timeout <secs> wait for a limited amount of time\n"), stdout);
+ fputs(_( " -E, --conflict-exit-code <number> exit code after conflict or timeout\n"), stdout);
+ fputs(_( " -o, --close close file descriptor before running command\n"), stdout);
+ fputs(_( " -c, --command <command> run a single command string through the shell\n"), stdout);
+ fputs(_( " -F, --no-fork execute command without forking\n"), stdout);
+ fputs(_( " --verbose increase verbosity\n"), stdout);
+ fputs(USAGE_SEPARATOR, stdout);
+ printf(USAGE_HELP_OPTIONS(26));
+ printf(USAGE_MAN_TAIL("flock(1)"));
+ exit(EXIT_SUCCESS);
+}
+
+static sig_atomic_t timeout_expired = 0;
+
+static void timeout_handler(int sig __attribute__((__unused__)),
+ siginfo_t *info,
+ void *context __attribute__((__unused__)))
+{
+ if (info->si_code == SI_TIMER)
+ timeout_expired = 1;
+}
+
+static int open_file(const char *filename, int *flags)
+{
+
+ int fd;
+ int fl = *flags == 0 ? O_RDONLY : *flags;
+
+ errno = 0;
+ fl |= O_NOCTTY | O_CREAT;
+ fd = open(filename, fl, 0666);
+
+ /* Linux doesn't like O_CREAT on a directory, even though it
+ * should be a no-op; POSIX doesn't allow O_RDWR or O_WRONLY
+ */
+ if (fd < 0 && errno == EISDIR) {
+ fl = O_RDONLY | O_NOCTTY;
+ fd = open(filename, fl);
+ }
+ if (fd < 0) {
+ warn(_("cannot open lock file %s"), filename);
+ if (errno == ENOMEM || errno == EMFILE || errno == ENFILE)
+ exit(EX_OSERR);
+ if (errno == EROFS || errno == ENOSPC)
+ exit(EX_CANTCREAT);
+ exit(EX_NOINPUT);
+ }
+ *flags = fl;
+ return fd;
+}
+
+static void __attribute__((__noreturn__)) run_program(char **cmd_argv)
+{
+ execvp(cmd_argv[0], cmd_argv);
+
+ warn(_("failed to execute %s"), cmd_argv[0]);
+ _exit((errno == ENOMEM) ? EX_OSERR : EX_UNAVAILABLE);
+}
+
+int main(int argc, char *argv[])
+{
+ static timer_t t_id;
+ struct itimerval timeout;
+ int have_timeout = 0;
+ int type = LOCK_EX;
+ int block = 0;
+ int open_flags = 0;
+ int fd = -1;
+ int opt, ix;
+ int do_close = 0;
+ int no_fork = 0;
+ int status;
+ int verbose = 0;
+ struct timeval time_start, time_done;
+ /*
+ * The default exit code for lock conflict or timeout
+ * is specified in man flock.1
+ */
+ int conflict_exit_code = 1;
+ char **cmd_argv = NULL, *sh_c_argv[4];
+ const char *filename = NULL;
+ enum {
+ OPT_VERBOSE = CHAR_MAX + 1
+ };
+ static const struct option long_options[] = {
+ {"shared", no_argument, NULL, 's'},
+ {"exclusive", no_argument, NULL, 'x'},
+ {"unlock", no_argument, NULL, 'u'},
+ {"nonblocking", no_argument, NULL, 'n'},
+ {"nb", no_argument, NULL, 'n'},
+ {"timeout", required_argument, NULL, 'w'},
+ {"wait", required_argument, NULL, 'w'},
+ {"conflict-exit-code", required_argument, NULL, 'E'},
+ {"close", no_argument, NULL, 'o'},
+ {"no-fork", no_argument, NULL, 'F'},
+ {"verbose", no_argument, NULL, OPT_VERBOSE},
+ {"help", no_argument, NULL, 'h'},
+ {"version", no_argument, NULL, 'V'},
+ {NULL, 0, NULL, 0}
+ };
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ strutils_set_exitcode(EX_USAGE);
+
+ if (argc < 2) {
+ warnx(_("not enough arguments"));
+ errtryhelp(EX_USAGE);
+ }
+
+ memset(&timeout, 0, sizeof timeout);
+
+ optopt = 0;
+ while ((opt =
+ getopt_long(argc, argv, "+sexnoFuw:E:hV?", long_options,
+ &ix)) != EOF) {
+ switch (opt) {
+ case 's':
+ type = LOCK_SH;
+ break;
+ case 'e':
+ case 'x':
+ type = LOCK_EX;
+ break;
+ case 'u':
+ type = LOCK_UN;
+ break;
+ case 'o':
+ do_close = 1;
+ break;
+ case 'F':
+ no_fork = 1;
+ break;
+ case 'n':
+ block = LOCK_NB;
+ break;
+ case 'w':
+ have_timeout = 1;
+ strtotimeval_or_err(optarg, &timeout.it_value,
+ _("invalid timeout value"));
+ break;
+ case 'E':
+ conflict_exit_code = strtos32_or_err(optarg,
+ _("invalid exit code"));
+ break;
+ case OPT_VERBOSE:
+ verbose = 1;
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ exit(EX_OK);
+ case 'h':
+ usage();
+ default:
+ errtryhelp(EX_USAGE);
+ }
+ }
+
+ if (no_fork && do_close)
+ errx(EX_USAGE,
+ _("the --no-fork and --close options are incompatible"));
+
+ if (argc > optind + 1) {
+ /* Run command */
+ if (!strcmp(argv[optind + 1], "-c") ||
+ !strcmp(argv[optind + 1], "--command")) {
+ if (argc != optind + 3)
+ errx(EX_USAGE,
+ _("%s requires exactly one command argument"),
+ argv[optind + 1]);
+ cmd_argv = sh_c_argv;
+ cmd_argv[0] = getenv("SHELL");
+ if (!cmd_argv[0] || !*cmd_argv[0])
+ cmd_argv[0] = _PATH_BSHELL;
+ cmd_argv[1] = "-c";
+ cmd_argv[2] = argv[optind + 2];
+ cmd_argv[3] = NULL;
+ } else {
+ cmd_argv = &argv[optind + 1];
+ }
+
+ filename = argv[optind];
+ fd = open_file(filename, &open_flags);
+
+ } else if (optind < argc) {
+ /* Use provided file descriptor */
+ fd = strtos32_or_err(argv[optind], _("bad file descriptor"));
+ } else {
+ /* Bad options */
+ errx(EX_USAGE, _("requires file descriptor, file or directory"));
+ }
+
+ if (have_timeout) {
+ if (timeout.it_value.tv_sec == 0 &&
+ timeout.it_value.tv_usec == 0) {
+ /* -w 0 is equivalent to -n; this has to be
+ * special-cased because setting an itimer to zero
+ * means disabled!
+ */
+ have_timeout = 0;
+ block = LOCK_NB;
+ } else
+ if (setup_timer(&t_id, &timeout, &timeout_handler))
+ err(EX_OSERR, _("cannot set up timer"));
+ }
+
+ if (verbose)
+ gettime_monotonic(&time_start);
+ while (flock(fd, type | block)) {
+ switch (errno) {
+ case EWOULDBLOCK:
+ /* -n option set and failed to lock. */
+ if (verbose)
+ warnx(_("failed to get lock"));
+ exit(conflict_exit_code);
+ case EINTR:
+ /* Signal received */
+ if (timeout_expired) {
+ /* -w option set and failed to lock. */
+ if (verbose)
+ warnx(_("timeout while waiting to get lock"));
+ exit(conflict_exit_code);
+ }
+ /* otherwise try again */
+ continue;
+ case EIO:
+ case EBADF: /* since Linux 3.4 (commit 55725513) */
+ /* Probably NFSv4 where flock() is emulated by fcntl().
+ * Let's try to reopen in read-write mode.
+ */
+ if (!(open_flags & O_RDWR) &&
+ type != LOCK_SH &&
+ filename &&
+ access(filename, R_OK | W_OK) == 0) {
+
+ close(fd);
+ open_flags = O_RDWR;
+ fd = open_file(filename, &open_flags);
+
+ if (open_flags & O_RDWR)
+ break;
+ }
+ /* fallthrough */
+ default:
+ /* Other errors */
+ if (filename)
+ warn("%s", filename);
+ else
+ warn("%d", fd);
+ exit((errno == ENOLCK
+ || errno == ENOMEM) ? EX_OSERR : EX_DATAERR);
+ }
+ }
+
+ if (have_timeout)
+ cancel_timer(&t_id);
+ if (verbose) {
+ struct timeval delta;
+
+ gettime_monotonic(&time_done);
+ timersub(&time_done, &time_start, &delta);
+ printf(_("%s: getting lock took %ld.%06ld seconds\n"),
+ program_invocation_short_name, delta.tv_sec,
+ delta.tv_usec);
+ }
+ status = EX_OK;
+
+ if (cmd_argv) {
+ pid_t w, f;
+ /* Clear any inherited settings */
+ signal(SIGCHLD, SIG_DFL);
+ if (verbose)
+ printf(_("%s: executing %s\n"), program_invocation_short_name, cmd_argv[0]);
+
+ if (!no_fork) {
+ f = fork();
+ if (f < 0)
+ err(EX_OSERR, _("fork failed"));
+
+ /* child */
+ else if (f == 0) {
+ if (do_close)
+ close(fd);
+ run_program(cmd_argv);
+
+ /* parent */
+ } else {
+ do {
+ w = waitpid(f, &status, 0);
+ if (w == -1 && errno != EINTR)
+ break;
+ } while (w != f);
+
+ if (w == -1) {
+ status = EXIT_FAILURE;
+ warn(_("waitpid failed"));
+ } else if (WIFEXITED(status))
+ status = WEXITSTATUS(status);
+ else if (WIFSIGNALED(status))
+ status = WTERMSIG(status) + 128;
+ else
+ /* WTF? */
+ status = EX_OSERR;
+ }
+
+ } else
+ /* no-fork execution */
+ run_program(cmd_argv);
+ }
+
+ return status;
+}
diff --git a/sys-utils/fsfreeze.8 b/sys-utils/fsfreeze.8
new file mode 100644
index 0000000..3cd6738
--- /dev/null
+++ b/sys-utils/fsfreeze.8
@@ -0,0 +1,89 @@
+.TH FSFREEZE 8 "July 2014" "util-linux" "System Administration"
+.SH NAME
+fsfreeze \- suspend access to a filesystem (Ext3/4, ReiserFS, JFS, XFS)
+.SH SYNOPSIS
+.B fsfreeze
+.BR \--freeze | \--unfreeze
+.I mountpoint
+
+.SH DESCRIPTION
+.B fsfreeze
+suspends or resumes access to a filesystem.
+.PP
+.B fsfreeze
+halts any new access to the filesystem and creates a stable image on disk.
+.B fsfreeze
+is intended to be used with hardware RAID devices that support the creation
+of snapshots.
+.PP
+.B fsfreeze
+is unnecessary for
+.B device-mapper
+devices. The device-mapper (and LVM) automatically freezes a filesystem
+on the device when a snapshot creation is requested.
+For more details see the
+.BR dmsetup (8)
+man page.
+.PP
+The
+.I mountpoint
+argument is the pathname of the directory where the filesystem
+is mounted.
+The filesystem must be mounted to be frozen (see
+.BR mount (8)).
+.PP
+Note that access-time updates are also suspended if the filesystem is mounted with
+the traditional atime behavior (mount option \fBstrictatime\fR, for more details see
+.BR mount (8)).
+
+.SH OPTIONS
+.TP
+.BR \-f , " \-\-freeze"
+This option requests the specified a filesystem to be frozen from new
+modifications. When this is selected, all ongoing transactions in the
+filesystem are allowed to complete, new write system calls are halted, other
+calls which modify the filesystem are halted, and all dirty data, metadata, and
+log information are written to disk. Any process attempting to write to the
+frozen filesystem will block waiting for the filesystem to be unfrozen.
+.sp
+Note that even after freezing, the on-disk filesystem can contain
+information on files that are still in the process of unlinking.
+These files will not be unlinked until the filesystem is unfrozen
+or a clean mount of the snapshot is complete.
+.TP
+.BR \-u , " \-\-unfreeze"
+This option is used to un-freeze the filesystem and allow operations to
+continue. Any filesystem modifications that were blocked by the freeze are
+unblocked and allowed to complete.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH FILESYSTEM SUPPORT
+This command will work only if filesystem supports has support for freezing.
+List of these filesystems include (2016-12-18)
+.BR btrfs ,
+.BR ext2/3/4 ,
+.BR f2fs ,
+.BR jfs ,
+.BR nilfs2 ,
+.BR reiserfs ,
+and
+.BR xfs .
+Previous list may be incomplete, as more filesystems get support. If in
+doubt easiest way to know if a filesystem has support is create a small
+loopback mount and test freezing it.
+.SH AUTHOR
+.PP
+Written by Hajime Taira.
+.SH NOTES
+.PP
+This man page is based on
+.BR xfs_freeze (8).
+.SH SEE ALSO
+.BR mount (8)
+.SH AVAILABILITY
+The fsfreeze command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/fsfreeze.c b/sys-utils/fsfreeze.c
new file mode 100644
index 0000000..401ab5c
--- /dev/null
+++ b/sys-utils/fsfreeze.c
@@ -0,0 +1,152 @@
+/*
+ * fsfreeze.c -- Filesystem freeze/unfreeze IO for Linux
+ *
+ * Copyright (C) 2010 Hajime Taira <htaira@redhat.com>
+ * Masatake Yamato <yamato@redhat.com>
+ *
+ * This program is free software. You can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation: either version 1 or
+ * (at your option) any later version.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <getopt.h>
+
+#include "c.h"
+#include "blkdev.h"
+#include "nls.h"
+#include "closestream.h"
+#include "optutils.h"
+
+enum fs_operation {
+ NOOP,
+ FREEZE,
+ UNFREEZE
+};
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ fputs(USAGE_HEADER, out);
+ fprintf(out,
+ _(" %s [options] <mountpoint>\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Suspend access to a filesystem.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -f, --freeze freeze the filesystem\n"), out);
+ fputs(_(" -u, --unfreeze unfreeze the filesystem\n"), out);
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(19));
+ printf(USAGE_MAN_TAIL("fsfreeze(8)"));
+
+ exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+ int fd = -1, c;
+ int action = NOOP, rc = EXIT_FAILURE;
+ char *path;
+ struct stat sb;
+
+ static const struct option longopts[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "freeze", no_argument, NULL, 'f' },
+ { "unfreeze", no_argument, NULL, 'u' },
+ { "version", no_argument, NULL, 'V' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ static const ul_excl_t excl[] = { /* rows and cols in ASCII order */
+ { 'f','u' }, /* freeze, unfreeze */
+ { 0 }
+ };
+ int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ while ((c = getopt_long(argc, argv, "hfuV", longopts, NULL)) != -1) {
+
+ err_exclusive_options(c, longopts, excl, excl_st);
+
+ switch(c) {
+ case 'h':
+ usage();
+ break;
+ case 'f':
+ action = FREEZE;
+ break;
+ case 'u':
+ action = UNFREEZE;
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ exit(EXIT_SUCCESS);
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if (action == NOOP)
+ errx(EXIT_FAILURE, _("neither --freeze or --unfreeze specified"));
+ if (optind == argc)
+ errx(EXIT_FAILURE, _("no filename specified"));
+ path = argv[optind++];
+
+ if (optind != argc) {
+ warnx(_("unexpected number of arguments"));
+ errtryhelp(EXIT_FAILURE);
+ }
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ err(EXIT_FAILURE, _("cannot open %s"), path);
+
+ if (fstat(fd, &sb) == -1) {
+ warn(_("stat of %s failed"), path);
+ goto done;
+ }
+
+ if (!S_ISDIR(sb.st_mode)) {
+ warnx(_("%s: is not a directory"), path);
+ goto done;
+ }
+
+ switch (action) {
+ case FREEZE:
+ if (ioctl(fd, FIFREEZE, 0)) {
+ warn(_("%s: freeze failed"), path);
+ goto done;
+ }
+ break;
+ case UNFREEZE:
+ if (ioctl(fd, FITHAW, 0)) {
+ warn(_("%s: unfreeze failed"), path);
+ goto done;
+ }
+ break;
+ default:
+ abort();
+ }
+
+ rc = EXIT_SUCCESS;
+done:
+ if (fd >= 0)
+ close(fd);
+ return rc;
+}
+
diff --git a/sys-utils/fstab.5 b/sys-utils/fstab.5
new file mode 100644
index 0000000..a9e9f8c
--- /dev/null
+++ b/sys-utils/fstab.5
@@ -0,0 +1,248 @@
+.\" Copyright (c) 1980, 1989, 1991 The Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)fstab.5 6.5 (Berkeley) 5/10/91
+.\"
+.TH FSTAB 5 "February 2015" "util-linux" "File Formats"
+.SH NAME
+fstab \- static information about the filesystems
+.SH SYNOPSIS
+.I /etc/fstab
+.SH DESCRIPTION
+The file
+.B fstab
+contains descriptive information about the filesystems the system can mount.
+.B fstab
+is only read by programs, and not written; it is the duty of the system
+administrator to properly create and maintain this file. The order of records in
+.B fstab
+is important because
+.BR fsck (8),
+.BR mount (8),
+and
+.BR umount (8)
+sequentially iterate through
+.B fstab
+doing their thing.
+
+Each filesystem is described on a separate line.
+Fields on each line are separated by tabs or spaces.
+Lines starting with '#' are comments. Blank lines are ignored.
+.PP
+The following is a typical example of an
+.B fstab
+entry:
+.sp
+.RS 7
+LABEL=t-home2 /home ext4 defaults,auto_da_alloc 0 2
+.RE
+
+.B The first field
+.RI ( fs_spec ).
+.RS
+This field describes the block special device or
+remote filesystem to be mounted.
+.LP
+For ordinary mounts, it will hold (a link to) a block special
+device node (as created by
+.BR mknod (8))
+for the device to be mounted, like `/dev/cdrom' or `/dev/sdb7'.
+For NFS mounts, this field is <host>:<dir>, e.g., `knuth.aeb.nl:/'.
+For filesystems with no storage, any string can be used, and will show up in
+.BR df (1)
+output, for example. Typical usage is `proc' for procfs; `mem', `none',
+or `tmpfs' for tmpfs. Other special filesystems, like udev and sysfs,
+are typically not listed in
+.BR fstab .
+.LP
+LABEL=<label> or UUID=<uuid> may be given instead of a device name.
+This is the recommended method, as device names are often a coincidence
+of hardware detection order, and can change when other disks are added or removed.
+For example, `LABEL=Boot' or `UUID=3e6be9de\%-8139\%-11d1\%-9106\%-a43f08d823a6'.
+(Use a filesystem-specific tool like
+.BR e2label (8),
+.BR xfs_admin (8),
+or
+.BR fatlabel (8)
+to set LABELs on filesystems).
+
+It's also possible to use PARTUUID= and PARTLABEL=. These partitions identifiers
+are supported for example for GUID Partition Table (GPT).
+
+See
+.BR mount (8),
+.BR blkid (8)
+or
+.BR lsblk (8)
+for more details about device identifiers.
+
+.LP
+Note that
+.BR mount (8)
+uses UUIDs as strings. The string representation of the UUID should be based on
+lower case characters.
+.RE
+
+.B The second field
+.RI ( fs_file ).
+.RS
+This field describes the mount point (target) for the filesystem. For swap partitions, this
+field should be specified as `none'. If the name of the mount point
+contains spaces or tabs these can be escaped as `\\040' and '\\011'
+respectively.
+.RE
+
+.B The third field
+.RI ( fs_vfstype ).
+.RS
+This field describes the type of the filesystem. Linux supports many
+filesystem types: ext4, xfs, btrfs, f2fs, vfat, ntfs, hfsplus,
+tmpfs, sysfs, proc, iso9660, udf, squashfs, nfs, cifs, and many more.
+For more details, see
+.BR mount (8).
+
+An entry
+.I swap
+denotes a file or partition to be used
+for swapping, cf.\&
+.BR swapon (8).
+An entry
+.I none
+is useful for bind or move mounts.
+
+More than one type may be specified in a comma-separated list.
+
+.BR mount (8)
+and
+.BR umount (8)
+support filesystem
+.IR subtypes .
+The subtype is defined by '.subtype' suffix. For
+example 'fuse.sshfs'. It's recommended to use subtype notation rather than add
+any prefix to the first fstab field (for example 'sshfs#example.com' is
+deprecated).
+.RE
+
+.B The fourth field
+.RI ( fs_mntops ).
+.RS
+This field describes the mount options associated with the filesystem.
+
+It is formatted as a comma-separated list of options.
+It contains at least the type of mount
+.RB ( ro
+or
+.BR rw ),
+plus any additional options appropriate to the filesystem
+type (including performance-tuning options).
+For details, see
+.BR mount (8)
+or
+.BR swapon (8).
+
+Basic filesystem-independent options are:
+.TP
+.B defaults
+use default options: rw, suid, dev, exec, auto, nouser, and async.
+.TP
+.B noauto
+do not mount when "mount -a" is given (e.g., at boot time)
+.TP
+.B user
+allow a user to mount
+.TP
+.B owner
+allow device owner to mount
+.TP
+.B comment
+or
+.B x-<name>
+for use by fstab-maintaining programs
+.TP
+.B nofail
+do not report errors for this device if it does not exist.
+.RE
+
+.B The fifth field
+.RI ( fs_freq ).
+.RS
+This field is used by
+.BR dump (8)
+to determine which filesystems need to be dumped.
+Defaults to zero (don't dump) if not present.
+.RE
+
+.B The sixth field
+.RI ( fs_passno ).
+.RS
+This field is used by
+.BR fsck (8)
+to determine the order in which filesystem checks are done at
+boot time. The root filesystem should be specified with a
+.I fs_passno
+of 1. Other filesystems should have a
+.I fs_passno
+of 2. Filesystems within a drive will be checked sequentially, but
+filesystems on different drives will be checked at the same time to utilize
+parallelism available in the hardware.
+Defaults to zero (don't fsck) if not present.
+
+.SH NOTES
+The proper way to read records from
+.B fstab
+is to use the routines
+.BR getmntent (3)
+or
+.BR libmount .
+
+The keyword
+.B ignore
+as a filesystem type (3rd field) is no longer supported by the pure
+libmount based mount utility (since util-linux v2.22).
+
+.SH FILES
+.IR /etc/fstab ,
+.I <fstab.h>
+.SH "SEE ALSO"
+.BR getmntent (3),
+.BR fs (5),
+.BR findmnt (8),
+.BR mount (8),
+.BR swapon (8)
+.SH HISTORY
+The ancestor of this
+.B fstab
+file format appeared in 4.0BSD.
+.\" But without comment convention, and options and vfs_type.
+.\" Instead there was a type rw/ro/rq/sw/xx, where xx is the present 'ignore'.
+.SH AVAILABILITY
+This man page is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/fstrim.8 b/sys-utils/fstrim.8
new file mode 100644
index 0000000..ff572a4
--- /dev/null
+++ b/sys-utils/fstrim.8
@@ -0,0 +1,131 @@
+.TH FSTRIM 8 "July 2014" "util-linux" "System Administration"
+.SH NAME
+fstrim \- discard unused blocks on a mounted filesystem
+.SH SYNOPSIS
+.B fstrim
+.RB [ \-Aa ]
+.RB [ \-o
+.IR offset ]
+.RB [ \-l
+.IR length ]
+.RB [ \-m
+.IR minimum-size ]
+.RB [ \-v ]
+.I mountpoint
+
+.SH DESCRIPTION
+.B fstrim
+is used on a mounted filesystem to discard (or "trim") blocks which are not in
+use by the filesystem. This is useful for solid-state drives (SSDs) and
+thinly-provisioned storage.
+.PP
+By default,
+.B fstrim
+will discard all unused blocks in the filesystem. Options may be used to
+modify this behavior based on range or size, as explained below.
+.PP
+The
+.I mountpoint
+argument is the pathname of the directory where the filesystem
+is mounted.
+.PP
+Running
+.B fstrim
+frequently, or even using
+.BR "mount -o discard" ,
+might negatively affect the lifetime of poor-quality SSD devices. For most
+desktop and server systems a sufficient trimming frequency is once a week.
+Note that not all
+devices support a queued trim, so each trim command incurs a performance penalty
+on whatever else might be trying to use the disk at the time.
+
+.SH OPTIONS
+The \fIoffset\fR, \fIlength\fR, and \fIminimum-size\fR arguments may be
+followed by the multiplicative suffixes KiB (=1024),
+MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB"
+is optional, e.g., "K" has the same meaning as "KiB") or the suffixes
+KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB and YB.
+
+.IP "\fB\-A, \-\-fstab\fP"
+Trim all mounted filesystems mentioned in \fI/etc/fstab\fR on devices that support the
+discard operation.
+The other supplied options, like \fB\-\-offset\fR, \fB\-\-length\fR and
+\fB-\-minimum\fR, are applied to all these devices.
+Errors from filesystems that do not support the discard operation are silently
+ignored.
+
+.IP "\fB\-a, \-\-all\fP"
+Trim all mounted filesystems on devices that support the discard operation.
+The other supplied options, like \fB\-\-offset\fR, \fB\-\-length\fR and
+\fB-\-minimum\fR, are applied to all these devices.
+Errors from filesystems that do not support the discard operation are silently
+ignored.
+.IP "\fB\-n, \-\-dry\-run\fP"
+This option does everything apart from actually call FITRIM ioctl.
+.IP "\fB\-o, \-\-offset\fP \fIoffset\fP"
+Byte offset in the filesystem from which to begin searching for free blocks
+to discard. The default value is zero, starting at the beginning of the
+filesystem.
+.IP "\fB\-l, \-\-length\fP \fIlength\fP"
+The number of bytes (after the starting point) to search for free blocks
+to discard. If the specified value extends past the end of the filesystem,
+.B fstrim
+will stop at the filesystem size boundary. The default value extends to
+the end of the filesystem.
+.IP "\fB\-m, \-\-minimum\fP \fIminimum-size\fP"
+Minimum contiguous free range to discard, in bytes. (This value is internally
+rounded up to a multiple of the filesystem block size.) Free ranges smaller
+than this will be ignored. By increasing this value, the fstrim operation
+will complete more quickly for filesystems with badly fragmented freespace,
+although not all blocks will be discarded. The default value is zero,
+discarding every free block.
+.IP "\fB\-v, \-\-verbose\fP"
+Verbose execution. With this option
+.B fstrim
+will output the number of bytes passed from the filesystem
+down the block stack to the device for potential discard. This number is a
+maximum discard amount from the storage device's perspective, because
+.I FITRIM
+ioctl called repeated will keep sending the same sectors for discard repeatedly.
+.sp
+.B fstrim
+will report the same potential discard bytes each time, but only sectors which
+had been written to between the discards would actually be discarded by the
+storage device. Further, the kernel block layer reserves the right to adjust
+the discard ranges to fit raid stripe geometry, non-trim capable devices in a
+LVM setup, etc. These reductions would not be reflected in fstrim_range.len
+(the
+.B --length
+option).
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+
+.SH RETURN CODES
+.IP 0
+success
+.IP 1
+failure
+.IP 32
+all failed
+.IP 64
+some filesystem discards have succeeded, some failed
+.PP
+The command
+.B fstrim --all
+returns 0 (all succeeded), 32 (all failed) or 64 (some failed, some succeeded).
+
+.SH AUTHOR
+.nf
+Lukas Czerner <lczerner@redhat.com>
+Karel Zak <kzak@redhat.com>
+.fi
+.SH SEE ALSO
+.BR blkdiscard (8),
+.BR mount (8)
+.SH AVAILABILITY
+The fstrim command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/fstrim.c b/sys-utils/fstrim.c
new file mode 100644
index 0000000..2a67892
--- /dev/null
+++ b/sys-utils/fstrim.c
@@ -0,0 +1,417 @@
+/*
+ * fstrim.c -- discard the part (or whole) of mounted filesystem.
+ *
+ * Copyright (C) 2010 Red Hat, Inc. All rights reserved.
+ * Written by Lukas Czerner <lczerner@redhat.com>
+ * Karel Zak <kzak@redhat.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This program uses FITRIM ioctl to discard parts or the whole filesystem
+ * online (mounted). You can specify range (start and length) to be
+ * discarded, or simply discard whole filesystem.
+ */
+
+#include <string.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <getopt.h>
+
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <linux/fs.h>
+
+#include "nls.h"
+#include "strutils.h"
+#include "c.h"
+#include "closestream.h"
+#include "pathnames.h"
+#include "sysfs.h"
+
+#include <libmount.h>
+
+
+#ifndef FITRIM
+struct fstrim_range {
+ uint64_t start;
+ uint64_t len;
+ uint64_t minlen;
+};
+#define FITRIM _IOWR('X', 121, struct fstrim_range)
+#endif
+
+struct fstrim_control {
+ struct fstrim_range range;
+
+ unsigned int verbose : 1,
+ fstab : 1,
+ dryrun : 1;
+};
+
+/* returns: 0 = success, 1 = unsupported, < 0 = error */
+static int fstrim_filesystem(struct fstrim_control *ctl, const char *path, const char *devname)
+{
+ int fd, rc;
+ struct stat sb;
+ struct fstrim_range range;
+
+ /* kernel modifies the range */
+ memcpy(&range, &ctl->range, sizeof(range));
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ warn(_("cannot open %s"), path);
+ rc = -errno;
+ goto done;
+ }
+ if (fstat(fd, &sb) == -1) {
+ warn(_("stat of %s failed"), path);
+ rc = -errno;
+ goto done;
+ }
+ if (!S_ISDIR(sb.st_mode)) {
+ warnx(_("%s: not a directory"), path);
+ rc = -EINVAL;
+ goto done;
+ }
+
+ if (ctl->dryrun) {
+ if (devname)
+ printf(_("%s: 0 B (dry run) trimmed on %s\n"), path, devname);
+ else
+ printf(_("%s: 0 B (dry run) trimmed\n"), path);
+ rc = 0;
+ goto done;
+ }
+
+ errno = 0;
+ if (ioctl(fd, FITRIM, &range)) {
+ rc = errno == EOPNOTSUPP || errno == ENOTTY ? 1 : -errno;
+
+ if (rc != 1)
+ warn(_("%s: FITRIM ioctl failed"), path);
+ goto done;
+ }
+
+ if (ctl->verbose) {
+ char *str = size_to_human_string(
+ SIZE_SUFFIX_3LETTER | SIZE_SUFFIX_SPACE,
+ (uint64_t) range.len);
+ if (devname)
+ /* TRANSLATORS: The standard value here is a very large number. */
+ printf(_("%s: %s (%" PRIu64 " bytes) trimmed on %s\n"),
+ path, str, (uint64_t) range.len, devname);
+ else
+ /* TRANSLATORS: The standard value here is a very large number. */
+ printf(_("%s: %s (%" PRIu64 " bytes) trimmed\n"),
+ path, str, (uint64_t) range.len);
+
+ free(str);
+ }
+
+ rc = 0;
+done:
+ if (fd >= 0)
+ close(fd);
+ return rc;
+}
+
+static int has_discard(const char *devname, struct path_cxt **wholedisk)
+{
+ struct path_cxt *pc = NULL;
+ uint64_t dg = 0;
+ dev_t disk = 0, dev;
+ int rc = -1;
+
+ dev = sysfs_devname_to_devno(devname);
+ if (!dev)
+ goto fail;
+
+ pc = ul_new_sysfs_path(dev, NULL, NULL);
+ if (!pc)
+ goto fail;
+
+ /*
+ * This is tricky to read the info from sys/, because the queue
+ * attributes are provided for whole devices (disk) only. We're trying
+ * to reuse the whole-disk sysfs context to optimize this stuff (as
+ * system usually have just one disk only).
+ */
+ rc = sysfs_blkdev_get_wholedisk(pc, NULL, 0, &disk);
+ if (rc != 0 || !disk)
+ goto fail;
+
+ if (dev != disk) {
+ /* Partition, try reuse whole-disk context if valid for the
+ * current device, otherwise create new context for the
+ * whole-disk.
+ */
+ if (*wholedisk && sysfs_blkdev_get_devno(*wholedisk) != disk) {
+ ul_unref_path(*wholedisk);
+ *wholedisk = NULL;
+ }
+ if (!*wholedisk) {
+ *wholedisk = ul_new_sysfs_path(disk, NULL, NULL);
+ if (!*wholedisk)
+ goto fail;
+ }
+ sysfs_blkdev_set_parent(pc, *wholedisk);
+ }
+
+ rc = ul_path_read_u64(pc, &dg, "queue/discard_granularity");
+
+ ul_unref_path(pc);
+ return rc == 0 && dg > 0;
+fail:
+ ul_unref_path(pc);
+ return 1;
+}
+
+
+static int uniq_fs_target_cmp(
+ struct libmnt_table *tb __attribute__((__unused__)),
+ struct libmnt_fs *a,
+ struct libmnt_fs *b)
+{
+ return !mnt_fs_streq_target(a, mnt_fs_get_target(b));
+}
+
+static int uniq_fs_source_cmp(
+ struct libmnt_table *tb __attribute__((__unused__)),
+ struct libmnt_fs *a,
+ struct libmnt_fs *b)
+{
+ if (mnt_fs_is_pseudofs(a) || mnt_fs_is_netfs(a) ||
+ mnt_fs_is_pseudofs(b) || mnt_fs_is_netfs(b))
+ return 1;
+
+ return !mnt_fs_streq_srcpath(a, mnt_fs_get_srcpath(b));
+}
+
+/*
+ * fstrim --all follows "mount -a" return codes:
+ *
+ * 0 = all success
+ * 32 = all failed
+ * 64 = some failed, some success
+ */
+static int fstrim_all(struct fstrim_control *ctl)
+{
+ struct libmnt_fs *fs;
+ struct libmnt_iter *itr;
+ struct libmnt_table *tab;
+ struct libmnt_cache *cache = NULL;
+ struct path_cxt *wholedisk = NULL;
+ int cnt = 0, cnt_err = 0;
+ const char *filename = _PATH_PROC_MOUNTINFO;
+
+ mnt_init_debug(0);
+ ul_path_init_debug();
+
+ itr = mnt_new_iter(MNT_ITER_BACKWARD);
+ if (!itr)
+ err(MNT_EX_FAIL, _("failed to initialize libmount iterator"));
+
+ if (ctl->fstab)
+ filename = mnt_get_fstab_path();
+
+ tab = mnt_new_table_from_file(filename);
+ if (!tab)
+ err(MNT_EX_FAIL, _("failed to parse %s"), filename);
+
+ /* de-duplicate by mountpoints */
+ mnt_table_uniq_fs(tab, 0, uniq_fs_target_cmp);
+
+ /* de-duplicate by source */
+ mnt_table_uniq_fs(tab, MNT_UNIQ_FORWARD, uniq_fs_source_cmp);
+
+ if (ctl->fstab) {
+ cache = mnt_new_cache();
+ if (!cache)
+ err(MNT_EX_FAIL, _("failed to initialize libmount cache"));
+ }
+
+ while (mnt_table_next_fs(tab, itr, &fs) == 0) {
+ const char *src = mnt_fs_get_srcpath(fs),
+ *tgt = mnt_fs_get_target(fs);
+ char *path;
+ int rc = 1;
+
+ if (!tgt || mnt_fs_is_pseudofs(fs) || mnt_fs_is_netfs(fs))
+ continue;
+
+ if (!src && cache) {
+ /* convert LABEL= (etc.) from fstab to paths */
+ const char *spec = mnt_fs_get_source(fs);
+
+ if (!spec)
+ continue;
+ src = mnt_resolve_spec(spec, cache);
+ }
+
+ if (!src || *src != '/')
+ continue;
+
+ /* Is it really accessible mountpoint? Not all mountpoints are
+ * accessible (maybe over mounted by another filesystem) */
+ path = mnt_get_mountpoint(tgt);
+ if (path && strcmp(path, tgt) == 0)
+ rc = 0;
+ free(path);
+ if (rc)
+ continue; /* overlaying mount */
+
+ if (!has_discard(src, &wholedisk))
+ continue;
+ cnt++;
+
+ /*
+ * We're able to detect that the device supports discard, but
+ * things also depend on filesystem or device mapping, for
+ * example vfat or LUKS (by default) does not support FSTRIM.
+ *
+ * This is reason why we ignore EOPNOTSUPP and ENOTTY errors
+ * from discard ioctl.
+ */
+ if (fstrim_filesystem(ctl, tgt, src) < 0)
+ cnt_err++;
+ }
+
+ ul_unref_path(wholedisk);
+ mnt_unref_table(tab);
+ mnt_free_iter(itr);
+ mnt_unref_cache(cache);
+
+ if (cnt && cnt == cnt_err)
+ return MNT_EX_FAIL; /* all failed */
+ if (cnt && cnt_err)
+ return MNT_EX_SOMEOK; /* some ok */
+
+ return MNT_EX_SUCCESS;
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ fputs(USAGE_HEADER, out);
+ fprintf(out,
+ _(" %s [options] <mount point>\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Discard unused blocks on a mounted filesystem.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -a, --all trim all supported mounted filesystems\n"), out);
+ fputs(_(" -A, --fstab trim all supported mounted filesystems from /etc/fstab\n"), out);
+ fputs(_(" -o, --offset <num> the offset in bytes to start discarding from\n"), out);
+ fputs(_(" -l, --length <num> the number of bytes to discard\n"), out);
+ fputs(_(" -m, --minimum <num> the minimum extent length to discard\n"), out);
+ fputs(_(" -v, --verbose print number of discarded bytes\n"), out);
+ fputs(_(" -n, --dry-run does everything, but trim\n"), out);
+
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(21));
+ printf(USAGE_MAN_TAIL("fstrim(8)"));
+ exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+ char *path = NULL;
+ int c, rc, all = 0;
+ struct fstrim_control ctl = {
+ .range = { .len = ULLONG_MAX }
+ };
+
+ static const struct option longopts[] = {
+ { "all", no_argument, NULL, 'a' },
+ { "fstab", no_argument, NULL, 'A' },
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, 'V' },
+ { "offset", required_argument, NULL, 'o' },
+ { "length", required_argument, NULL, 'l' },
+ { "minimum", required_argument, NULL, 'm' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "dry-run", no_argument, NULL, 'n' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ while ((c = getopt_long(argc, argv, "Aahl:m:no:Vv", longopts, NULL)) != -1) {
+ switch(c) {
+ case 'A':
+ ctl.fstab = 1;
+ /* fallthrough */
+ case 'a':
+ all = 1;
+ break;
+ case 'n':
+ ctl.dryrun = 1;
+ break;
+ case 'h':
+ usage();
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ case 'l':
+ ctl.range.len = strtosize_or_err(optarg,
+ _("failed to parse length"));
+ break;
+ case 'o':
+ ctl.range.start = strtosize_or_err(optarg,
+ _("failed to parse offset"));
+ break;
+ case 'm':
+ ctl.range.minlen = strtosize_or_err(optarg,
+ _("failed to parse minimum extent length"));
+ break;
+ case 'v':
+ ctl.verbose = 1;
+ break;
+ default:
+ errtryhelp(EXIT_FAILURE);
+ break;
+ }
+ }
+
+ if (!all) {
+ if (optind == argc)
+ errx(EXIT_FAILURE, _("no mountpoint specified"));
+ path = argv[optind++];
+ }
+
+ if (optind != argc) {
+ warnx(_("unexpected number of arguments"));
+ errtryhelp(EXIT_FAILURE);
+ }
+
+ if (all)
+ return fstrim_all(&ctl); /* MNT_EX_* codes */
+
+ rc = fstrim_filesystem(&ctl, path, NULL);
+ if (rc == 1)
+ warnx(_("%s: the discard operation is not supported"), path);
+
+ return rc == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/sys-utils/fstrim.service.in b/sys-utils/fstrim.service.in
new file mode 100644
index 0000000..2d5daf9
--- /dev/null
+++ b/sys-utils/fstrim.service.in
@@ -0,0 +1,7 @@
+[Unit]
+Description=Discard unused blocks on filesystems from /etc/fstab
+Documentation=man:fstrim(8)
+
+[Service]
+Type=oneshot
+ExecStart=@sbindir@/fstrim -Av
diff --git a/sys-utils/fstrim.timer b/sys-utils/fstrim.timer
new file mode 100644
index 0000000..3a3762d
--- /dev/null
+++ b/sys-utils/fstrim.timer
@@ -0,0 +1,11 @@
+[Unit]
+Description=Discard unused blocks once a week
+Documentation=man:fstrim
+
+[Timer]
+OnCalendar=weekly
+AccuracySec=1h
+Persistent=true
+
+[Install]
+WantedBy=timers.target
diff --git a/sys-utils/hwclock-cmos.c b/sys-utils/hwclock-cmos.c
new file mode 100644
index 0000000..a11f676
--- /dev/null
+++ b/sys-utils/hwclock-cmos.c
@@ -0,0 +1,420 @@
+/*
+ * i386 CMOS starts out with 14 bytes clock data alpha has something
+ * similar, but with details depending on the machine type.
+ *
+ * byte 0: seconds 0-59
+ * byte 2: minutes 0-59
+ * byte 4: hours 0-23 in 24hr mode,
+ * 1-12 in 12hr mode, with high bit unset/set
+ * if am/pm.
+ * byte 6: weekday 1-7, Sunday=1
+ * byte 7: day of the month 1-31
+ * byte 8: month 1-12
+ * byte 9: year 0-99
+ *
+ * Numbers are stored in BCD/binary if bit 2 of byte 11 is unset/set The
+ * clock is in 12hr/24hr mode if bit 1 of byte 11 is unset/set The clock is
+ * undefined (being updated) if bit 7 of byte 10 is set. The clock is frozen
+ * (to be updated) by setting bit 7 of byte 11 Bit 7 of byte 14 indicates
+ * whether the CMOS clock is reliable: it is 1 if RTC power has been good
+ * since this bit was last read; it is 0 when the battery is dead and system
+ * power has been off.
+ *
+ * Avoid setting the RTC clock within 2 seconds of the day rollover that
+ * starts a new month or enters daylight saving time.
+ *
+ * The century situation is messy:
+ *
+ * Usually byte 50 (0x32) gives the century (in BCD, so 19 or 20 hex), but
+ * IBM PS/2 has (part of) a checksum there and uses byte 55 (0x37).
+ * Sometimes byte 127 (0x7f) or Bank 1, byte 0x48 gives the century. The
+ * original RTC will not access any century byte; some modern versions will.
+ * If a modern RTC or BIOS increments the century byte it may go from 0x19
+ * to 0x20, but in some buggy cases 0x1a is produced.
+ */
+/*
+ * A struct tm has int fields
+ * tm_sec 0-59, 60 or 61 only for leap seconds
+ * tm_min 0-59
+ * tm_hour 0-23
+ * tm_mday 1-31
+ * tm_mon 0-11
+ * tm_year number of years since 1900
+ * tm_wday 0-6, 0=Sunday
+ * tm_yday 0-365
+ * tm_isdst >0: yes, 0: no, <0: unknown
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "c.h"
+#include "nls.h"
+#include "pathnames.h"
+
+/* for inb, outb */
+#if defined(__i386__) || defined(__x86_64__)
+# ifdef HAVE_SYS_IO_H
+# include <sys/io.h>
+# elif defined(HAVE_ASM_IO_H)
+# include <asm/io.h>
+# else
+# undef __i386__
+# undef __x86_64__
+# warning "disable cmos access - no sys/io.h or asm/io.h"
+static void outb(int a __attribute__((__unused__)),
+ int b __attribute__((__unused__)))
+{
+}
+
+static int inb(int c __attribute__((__unused__)))
+{
+ return 0;
+}
+# endif /* __i386__ __x86_64__ */
+#else
+# warning "disable cmos access - not i386 or x86_64"
+static void outb(int a __attribute__((__unused__)),
+ int b __attribute__((__unused__)))
+{
+}
+
+static int inb(int c __attribute__((__unused__)))
+{
+ return 0;
+}
+#endif /* for inb, outb */
+
+#include "hwclock.h"
+
+#define BCD_TO_BIN(val) ((val)=((val)&15) + ((val)>>4)*10)
+#define BIN_TO_BCD(val) ((val)=(((val)/10)<<4) + (val)%10)
+
+#define IOPL_NOT_IMPLEMENTED -2
+
+/*
+ * POSIX uses 1900 as epoch for a struct tm, and 1970 for a time_t.
+ */
+#define TM_EPOCH 1900
+
+static unsigned short clock_ctl_addr = 0x70;
+static unsigned short clock_data_addr = 0x71;
+
+/*
+ * Hmmh, this isn't very atomic. Maybe we should force an error instead?
+ *
+ * TODO: optimize the access to CMOS by mlockall(MCL_CURRENT) and SCHED_FIFO
+ */
+static unsigned long atomic(unsigned long (*op) (unsigned long),
+ unsigned long arg)
+{
+ return (*op) (arg);
+}
+
+/*
+ * We only want to read CMOS data, but unfortunately writing to bit 7
+ * disables (1) or enables (0) NMI; since this bit is read-only we have
+ * to guess the old status. Various docs suggest that one should disable
+ * NMI while reading/writing CMOS data, and enable it again afterwards.
+ * This would yield the sequence
+ *
+ * outb (reg | 0x80, 0x70);
+ * val = inb(0x71);
+ * outb (0x0d, 0x70); // 0x0d: random read-only location
+ *
+ * Other docs state that "any write to 0x70 should be followed by an
+ * action to 0x71 or the RTC will be left in an unknown state". Most
+ * docs say that it doesn't matter at all what one does.
+ *
+ * bit 0x80: disable NMI while reading - should we? Let us follow the
+ * kernel and not disable. Called only with 0 <= reg < 128
+ */
+
+static inline unsigned long cmos_read(unsigned long reg)
+{
+ outb(reg, clock_ctl_addr);
+ return inb(clock_data_addr);
+}
+
+static inline unsigned long cmos_write(unsigned long reg, unsigned long val)
+{
+ outb(reg, clock_ctl_addr);
+ outb(val, clock_data_addr);
+ return 0;
+}
+
+static unsigned long cmos_set_time(unsigned long arg)
+{
+ unsigned char save_control, save_freq_select, pmbit = 0;
+ struct tm tm = *(struct tm *)arg;
+
+/*
+ * CMOS byte 10 (clock status register A) has 3 bitfields:
+ * bit 7: 1 if data invalid, update in progress (read-only bit)
+ * (this is raised 224 us before the actual update starts)
+ * 6-4 select base frequency
+ * 010: 32768 Hz time base (default)
+ * 111: reset
+ * all other combinations are manufacturer-dependent
+ * (e.g.: DS1287: 010 = start oscillator, anything else = stop)
+ * 3-0 rate selection bits for interrupt
+ * 0000 none (may stop RTC)
+ * 0001, 0010 give same frequency as 1000, 1001
+ * 0011 122 microseconds (minimum, 8192 Hz)
+ * .... each increase by 1 halves the frequency, doubles the period
+ * 1111 500 milliseconds (maximum, 2 Hz)
+ * 0110 976.562 microseconds (default 1024 Hz)
+ */
+ save_control = cmos_read(11); /* tell the clock it's being set */
+ cmos_write(11, (save_control | 0x80));
+ save_freq_select = cmos_read(10); /* stop and reset prescaler */
+ cmos_write(10, (save_freq_select | 0x70));
+
+ tm.tm_year %= 100;
+ tm.tm_mon += 1;
+ tm.tm_wday += 1;
+
+ if (!(save_control & 0x02)) { /* 12hr mode; the default is 24hr mode */
+ if (tm.tm_hour == 0)
+ tm.tm_hour = 24;
+ if (tm.tm_hour > 12) {
+ tm.tm_hour -= 12;
+ pmbit = 0x80;
+ }
+ }
+
+ if (!(save_control & 0x04)) { /* BCD mode - the default */
+ BIN_TO_BCD(tm.tm_sec);
+ BIN_TO_BCD(tm.tm_min);
+ BIN_TO_BCD(tm.tm_hour);
+ BIN_TO_BCD(tm.tm_wday);
+ BIN_TO_BCD(tm.tm_mday);
+ BIN_TO_BCD(tm.tm_mon);
+ BIN_TO_BCD(tm.tm_year);
+ }
+
+ cmos_write(0, tm.tm_sec);
+ cmos_write(2, tm.tm_min);
+ cmos_write(4, tm.tm_hour | pmbit);
+ cmos_write(6, tm.tm_wday);
+ cmos_write(7, tm.tm_mday);
+ cmos_write(8, tm.tm_mon);
+ cmos_write(9, tm.tm_year);
+
+ /*
+ * The kernel sources, linux/arch/i386/kernel/time.c, have the
+ * following comment:
+ *
+ * The following flags have to be released exactly in this order,
+ * otherwise the DS12887 (popular MC146818A clone with integrated
+ * battery and quartz) will not reset the oscillator and will not
+ * update precisely 500 ms later. You won't find this mentioned in
+ * the Dallas Semiconductor data sheets, but who believes data
+ * sheets anyway ... -- Markus Kuhn
+ */
+ cmos_write(11, save_control);
+ cmos_write(10, save_freq_select);
+ return 0;
+}
+
+static int hclock_read(unsigned long reg)
+{
+ return atomic(cmos_read, reg);
+}
+
+static void hclock_set_time(const struct tm *tm)
+{
+ atomic(cmos_set_time, (unsigned long)(tm));
+}
+
+static inline int cmos_clock_busy(void)
+{
+ return
+ /* poll bit 7 (UIP) of Control Register A */
+ (hclock_read(10) & 0x80);
+}
+
+static int synchronize_to_clock_tick_cmos(const struct hwclock_control *ctl
+ __attribute__((__unused__)))
+{
+ int i;
+
+ /*
+ * Wait for rise. Should be within a second, but in case something
+ * weird happens, we have a limit on this loop to reduce the impact
+ * of this failure.
+ */
+ for (i = 0; !cmos_clock_busy(); i++)
+ if (i >= 10000000)
+ return 1;
+
+ /* Wait for fall. Should be within 2.228 ms. */
+ for (i = 0; cmos_clock_busy(); i++)
+ if (i >= 1000000)
+ return 1;
+ return 0;
+}
+
+/*
+ * Read the hardware clock and return the current time via <tm> argument.
+ * Assume we have an ISA machine and read the clock directly with CPU I/O
+ * instructions.
+ *
+ * This function is not totally reliable. It takes a finite and
+ * unpredictable amount of time to execute the code below. During that time,
+ * the clock may change and we may even read an invalid value in the middle
+ * of an update. We do a few checks to minimize this possibility, but only
+ * the kernel can actually read the clock properly, since it can execute
+ * code in a short and predictable amount of time (by turning of
+ * interrupts).
+ *
+ * In practice, the chance of this function returning the wrong time is
+ * extremely remote.
+ */
+static int read_hardware_clock_cmos(const struct hwclock_control *ctl
+ __attribute__((__unused__)), struct tm *tm)
+{
+ unsigned char status = 0, pmbit = 0;
+
+ while (1) {
+ /*
+ * Bit 7 of Byte 10 of the Hardware Clock value is the
+ * Update In Progress (UIP) bit, which is on while and 244
+ * uS before the Hardware Clock updates itself. It updates
+ * the counters individually, so reading them during an
+ * update would produce garbage. The update takes 2mS, so we
+ * could be spinning here that long waiting for this bit to
+ * turn off.
+ *
+ * Furthermore, it is pathologically possible for us to be
+ * in this code so long that even if the UIP bit is not on
+ * at first, the clock has changed while we were running. We
+ * check for that too, and if it happens, we start over.
+ */
+ if (!cmos_clock_busy()) {
+ /* No clock update in progress, go ahead and read */
+ tm->tm_sec = hclock_read(0);
+ tm->tm_min = hclock_read(2);
+ tm->tm_hour = hclock_read(4);
+ tm->tm_wday = hclock_read(6);
+ tm->tm_mday = hclock_read(7);
+ tm->tm_mon = hclock_read(8);
+ tm->tm_year = hclock_read(9);
+ status = hclock_read(11);
+ /*
+ * Unless the clock changed while we were reading,
+ * consider this a good clock read .
+ */
+ if (tm->tm_sec == hclock_read(0))
+ break;
+ }
+ /*
+ * Yes, in theory we could have been running for 60 seconds
+ * and the above test wouldn't work!
+ */
+ }
+
+ if (!(status & 0x04)) { /* BCD mode - the default */
+ BCD_TO_BIN(tm->tm_sec);
+ BCD_TO_BIN(tm->tm_min);
+ pmbit = (tm->tm_hour & 0x80);
+ tm->tm_hour &= 0x7f;
+ BCD_TO_BIN(tm->tm_hour);
+ BCD_TO_BIN(tm->tm_wday);
+ BCD_TO_BIN(tm->tm_mday);
+ BCD_TO_BIN(tm->tm_mon);
+ BCD_TO_BIN(tm->tm_year);
+ }
+
+ /*
+ * We don't use the century byte of the Hardware Clock since we
+ * don't know its address (usually 50 or 55). Here, we follow the
+ * advice of the X/Open Base Working Group: "if century is not
+ * specified, then values in the range [69-99] refer to years in the
+ * twentieth century (1969 to 1999 inclusive), and values in the
+ * range [00-68] refer to years in the twenty-first century (2000 to
+ * 2068 inclusive)."
+ */
+ tm->tm_wday -= 1;
+ tm->tm_mon -= 1;
+ if (tm->tm_year < 69)
+ tm->tm_year += 100;
+ if (pmbit) {
+ tm->tm_hour += 12;
+ if (tm->tm_hour == 24)
+ tm->tm_hour = 0;
+ }
+
+ tm->tm_isdst = -1; /* don't know whether it's daylight */
+ return 0;
+}
+
+static int set_hardware_clock_cmos(const struct hwclock_control *ctl
+ __attribute__((__unused__)),
+ const struct tm *new_broken_time)
+{
+ hclock_set_time(new_broken_time);
+ return 0;
+}
+
+#if defined(__i386__) || defined(__x86_64__)
+# if defined(HAVE_IOPL)
+static int i386_iopl(const int level)
+{
+ return iopl(level);
+}
+# else
+static int i386_iopl(const int level __attribute__ ((__unused__)))
+{
+ extern int ioperm(unsigned long from, unsigned long num, int turn_on);
+ return ioperm(clock_ctl_addr, 2, 1);
+}
+# endif
+#else
+static int i386_iopl(const int level __attribute__ ((__unused__)))
+{
+ return IOPL_NOT_IMPLEMENTED;
+}
+#endif
+
+static int get_permissions_cmos(void)
+{
+ int rc;
+
+ rc = i386_iopl(3);
+ if (rc == IOPL_NOT_IMPLEMENTED) {
+ warnx(_("ISA port access is not implemented"));
+ } else if (rc != 0) {
+ warn(_("iopl() port access failed"));
+ }
+ return rc;
+}
+
+static const char *get_device_path(void)
+{
+ return NULL;
+}
+
+static struct clock_ops cmos_interface = {
+ N_("Using direct ISA access to the clock"),
+ get_permissions_cmos,
+ read_hardware_clock_cmos,
+ set_hardware_clock_cmos,
+ synchronize_to_clock_tick_cmos,
+ get_device_path,
+};
+
+/*
+ * return &cmos if cmos clock present, NULL otherwise.
+ */
+struct clock_ops *probe_for_cmos_clock(void)
+{
+#if defined(__i386__) || defined(__x86_64__)
+ return &cmos_interface;
+#else
+ return NULL;
+#endif
+}
diff --git a/sys-utils/hwclock-rtc.c b/sys-utils/hwclock-rtc.c
new file mode 100644
index 0000000..32feb35
--- /dev/null
+++ b/sys-utils/hwclock-rtc.c
@@ -0,0 +1,448 @@
+/*
+ * rtc.c - Use /dev/rtc for clock access
+ */
+#include <asm/ioctl.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "nls.h"
+
+#include "hwclock.h"
+
+/*
+ * Get defines for rtc stuff.
+ *
+ * Getting the rtc defines is nontrivial. The obvious way is by including
+ * <linux/mc146818rtc.h> but that again includes <asm/io.h> which again
+ * includes ... and on sparc and alpha this gives compilation errors for
+ * many kernel versions. So, we give the defines ourselves here. Moreover,
+ * some Sparc person decided to be incompatible, and used a struct rtc_time
+ * different from that used in mc146818rtc.h.
+ */
+
+/*
+ * On Sparcs, there is a <asm/rtc.h> that defines different ioctls (that are
+ * required on my machine). However, this include file does not exist on
+ * other architectures.
+ */
+/* One might do:
+#ifdef __sparc__
+# include <asm/rtc.h>
+#endif
+ */
+#ifdef __sparc__
+/* The following is roughly equivalent */
+struct sparc_rtc_time
+{
+ int sec; /* Seconds 0-59 */
+ int min; /* Minutes 0-59 */
+ int hour; /* Hour 0-23 */
+ int dow; /* Day of the week 1-7 */
+ int dom; /* Day of the month 1-31 */
+ int month; /* Month of year 1-12 */
+ int year; /* Year 0-99 */
+};
+#define RTCGET _IOR('p', 20, struct sparc_rtc_time)
+#define RTCSET _IOW('p', 21, struct sparc_rtc_time)
+#endif
+
+/*
+ * struct rtc_time is present since 1.3.99.
+ * Earlier (since 1.3.89), a struct tm was used.
+ */
+struct linux_rtc_time {
+ int tm_sec;
+ int tm_min;
+ int tm_hour;
+ int tm_mday;
+ int tm_mon;
+ int tm_year;
+ int tm_wday;
+ int tm_yday;
+ int tm_isdst;
+};
+
+/* RTC_RD_TIME etc have this definition since 1.99.9 (pre2.0-9) */
+#ifndef RTC_RD_TIME
+# define RTC_RD_TIME _IOR('p', 0x09, struct linux_rtc_time)
+# define RTC_SET_TIME _IOW('p', 0x0a, struct linux_rtc_time)
+# define RTC_UIE_ON _IO('p', 0x03) /* Update int. enable on */
+# define RTC_UIE_OFF _IO('p', 0x04) /* Update int. enable off */
+#endif
+
+/* RTC_EPOCH_READ and RTC_EPOCH_SET are present since 2.0.34 and 2.1.89 */
+#ifndef RTC_EPOCH_READ
+# define RTC_EPOCH_READ _IOR('p', 0x0d, unsigned long) /* Read epoch */
+# define RTC_EPOCH_SET _IOW('p', 0x0e, unsigned long) /* Set epoch */
+#endif
+
+/*
+ * /dev/rtc is conventionally chardev 10/135
+ * ia64 uses /dev/efirtc, chardev 10/136
+ * devfs (obsolete) used /dev/misc/... for miscdev
+ * new RTC framework + udev uses dynamic major and /dev/rtc0.../dev/rtcN
+ * ... so we need an overridable default
+ */
+
+/* default or user defined dev (by hwclock --rtc=<path>) */
+static const char *rtc_dev_name;
+static int rtc_dev_fd = -1;
+
+static void close_rtc(void)
+{
+ if (rtc_dev_fd != -1)
+ close(rtc_dev_fd);
+ rtc_dev_fd = -1;
+}
+
+static int open_rtc(const struct hwclock_control *ctl)
+{
+ static const char *fls[] = {
+#ifdef __ia64__
+ "/dev/efirtc",
+ "/dev/misc/efirtc",
+#endif
+ "/dev/rtc0",
+ "/dev/rtc",
+ "/dev/misc/rtc"
+ };
+ size_t i;
+
+ if (rtc_dev_fd != -1)
+ return rtc_dev_fd;
+
+ /* --rtc option has been given */
+ if (ctl->rtc_dev_name) {
+ rtc_dev_name = ctl->rtc_dev_name;
+ rtc_dev_fd = open(rtc_dev_name, O_RDONLY);
+ } else {
+ for (i = 0; i < ARRAY_SIZE(fls); i++) {
+ if (ctl->verbose)
+ printf(_("Trying to open: %s\n"), fls[i]);
+ rtc_dev_fd = open(fls[i], O_RDONLY);
+
+ if (rtc_dev_fd < 0
+ && (errno == ENOENT || errno == ENODEV))
+ continue;
+ rtc_dev_name = fls[i];
+ break;
+ }
+ if (rtc_dev_fd < 0)
+ rtc_dev_name = *fls; /* default for error messages */
+ }
+ if (rtc_dev_fd != -1)
+ atexit(close_rtc);
+ return rtc_dev_fd;
+}
+
+static int open_rtc_or_exit(const struct hwclock_control *ctl)
+{
+ int rtc_fd = open_rtc(ctl);
+
+ if (rtc_fd < 0) {
+ warn(_("cannot open rtc device"));
+ hwclock_exit(ctl, EXIT_FAILURE);
+ }
+ return rtc_fd;
+}
+
+static int do_rtc_read_ioctl(int rtc_fd, struct tm *tm)
+{
+ int rc = -1;
+ char *ioctlname;
+#ifdef __sparc__
+ /* some but not all sparcs use a different ioctl and struct */
+ struct sparc_rtc_time stm;
+#endif
+
+ ioctlname = "RTC_RD_TIME";
+ rc = ioctl(rtc_fd, RTC_RD_TIME, tm);
+
+#ifdef __sparc__
+ if (rc == -1) { /* sparc sbus */
+ ioctlname = "RTCGET";
+ rc = ioctl(rtc_fd, RTCGET, &stm);
+ if (rc == 0) {
+ tm->tm_sec = stm.sec;
+ tm->tm_min = stm.min;
+ tm->tm_hour = stm.hour;
+ tm->tm_mday = stm.dom;
+ tm->tm_mon = stm.month - 1;
+ tm->tm_year = stm.year - 1900;
+ tm->tm_wday = stm.dow - 1;
+ tm->tm_yday = -1; /* day in the year */
+ }
+ }
+#endif
+
+ if (rc == -1) {
+ warn(_("ioctl(%s) to %s to read the time failed"),
+ ioctlname, rtc_dev_name);
+ return -1;
+ }
+
+ tm->tm_isdst = -1; /* don't know whether it's dst */
+ return 0;
+}
+
+/*
+ * Wait for the top of a clock tick by reading /dev/rtc in a busy loop
+ * until we see it. This function is used for rtc drivers without ioctl
+ * interrupts. This is typical on an Alpha, where the Hardware Clock
+ * interrupts are used by the kernel for the system clock, so aren't at
+ * the user's disposal.
+ */
+static int busywait_for_rtc_clock_tick(const struct hwclock_control *ctl,
+ const int rtc_fd)
+{
+ struct tm start_time;
+ /* The time when we were called (and started waiting) */
+ struct tm nowtime;
+ int rc;
+ struct timeval begin, now;
+
+ if (ctl->verbose) {
+ printf("ioctl(%d, RTC_UIE_ON, 0): %s\n",
+ rtc_fd, strerror(errno));
+ printf(_("Waiting in loop for time from %s to change\n"),
+ rtc_dev_name);
+ }
+
+ if (do_rtc_read_ioctl(rtc_fd, &start_time))
+ return 1;
+
+ /*
+ * Wait for change. Should be within a second, but in case
+ * something weird happens, we have a time limit (1.5s) on this loop
+ * to reduce the impact of this failure.
+ */
+ gettimeofday(&begin, NULL);
+ do {
+ rc = do_rtc_read_ioctl(rtc_fd, &nowtime);
+ if (rc || start_time.tm_sec != nowtime.tm_sec)
+ break;
+ gettimeofday(&now, NULL);
+ if (time_diff(now, begin) > 1.5) {
+ warnx(_("Timed out waiting for time change."));
+ return 1;
+ }
+ } while (1);
+
+ if (rc)
+ return 1;
+ return 0;
+}
+
+/*
+ * Same as synchronize_to_clock_tick(), but just for /dev/rtc.
+ */
+static int synchronize_to_clock_tick_rtc(const struct hwclock_control *ctl)
+{
+ int rtc_fd; /* File descriptor of /dev/rtc */
+ int ret = 1;
+
+ rtc_fd = open_rtc(ctl);
+ if (rtc_fd == -1) {
+ warn(_("cannot open rtc device"));
+ return ret;
+ } else {
+ /* Turn on update interrupts (one per second) */
+ int rc = ioctl(rtc_fd, RTC_UIE_ON, 0);
+
+ if (rc != -1) {
+ /*
+ * Just reading rtc_fd fails on broken hardware: no
+ * update interrupt comes and a bootscript with a
+ * hwclock call hangs
+ */
+ fd_set rfds;
+ struct timeval tv;
+
+ /*
+ * Wait up to ten seconds for the next update
+ * interrupt
+ */
+ FD_ZERO(&rfds);
+ FD_SET(rtc_fd, &rfds);
+ tv.tv_sec = 10;
+ tv.tv_usec = 0;
+ rc = select(rtc_fd + 1, &rfds, NULL, NULL, &tv);
+ if (0 < rc)
+ ret = 0;
+ else if (rc == 0) {
+ warnx(_("select() to %s to wait for clock tick timed out"),
+ rtc_dev_name);
+ } else
+ warn(_("select() to %s to wait for clock tick failed"),
+ rtc_dev_name);
+ /* Turn off update interrupts */
+ rc = ioctl(rtc_fd, RTC_UIE_OFF, 0);
+ if (rc == -1)
+ warn(_("ioctl() to %s to turn off update interrupts failed"),
+ rtc_dev_name);
+ } else if (errno == ENOTTY || errno == EINVAL) {
+ /* rtc ioctl interrupts are unimplemented */
+ ret = busywait_for_rtc_clock_tick(ctl, rtc_fd);
+ } else
+ warn(_("ioctl(%d, RTC_UIE_ON, 0) to %s failed"),
+ rtc_fd, rtc_dev_name);
+ }
+ return ret;
+}
+
+static int read_hardware_clock_rtc(const struct hwclock_control *ctl,
+ struct tm *tm)
+{
+ int rtc_fd, rc;
+
+ rtc_fd = open_rtc_or_exit(ctl);
+
+ /* Read the RTC time/date, return answer via tm */
+ rc = do_rtc_read_ioctl(rtc_fd, tm);
+
+ return rc;
+}
+
+/*
+ * Set the Hardware Clock to the broken down time <new_broken_time>. Use
+ * ioctls to "rtc" device /dev/rtc.
+ */
+static int set_hardware_clock_rtc(const struct hwclock_control *ctl,
+ const struct tm *new_broken_time)
+{
+ int rc = -1;
+ int rtc_fd;
+ char *ioctlname;
+
+ rtc_fd = open_rtc_or_exit(ctl);
+
+ ioctlname = "RTC_SET_TIME";
+ rc = ioctl(rtc_fd, RTC_SET_TIME, new_broken_time);
+
+#ifdef __sparc__
+ if (rc == -1) { /* sparc sbus */
+ struct sparc_rtc_time stm;
+
+ stm.sec = new_broken_time->tm_sec;
+ stm.min = new_broken_time->tm_min;
+ stm.hour = new_broken_time->tm_hour;
+ stm.dom = new_broken_time->tm_mday;
+ stm.month = new_broken_time->tm_mon + 1;
+ stm.year = new_broken_time->tm_year + 1900;
+ stm.dow = new_broken_time->tm_wday + 1;
+
+ ioctlname = "RTCSET";
+ rc = ioctl(rtc_fd, RTCSET, &stm);
+ }
+#endif
+
+ if (rc == -1) {
+ warn(_("ioctl(%s) to %s to set the time failed"),
+ ioctlname, rtc_dev_name);
+ hwclock_exit(ctl, EXIT_FAILURE);
+ }
+
+ if (ctl->verbose)
+ printf(_("ioctl(%s) was successful.\n"), ioctlname);
+
+ return 0;
+}
+
+static int get_permissions_rtc(void)
+{
+ return 0;
+}
+
+static const char *get_device_path(void)
+{
+ return rtc_dev_name;
+}
+
+static struct clock_ops rtc_interface = {
+ N_("Using the rtc interface to the clock."),
+ get_permissions_rtc,
+ read_hardware_clock_rtc,
+ set_hardware_clock_rtc,
+ synchronize_to_clock_tick_rtc,
+ get_device_path,
+};
+
+/* return &rtc if /dev/rtc can be opened, NULL otherwise */
+struct clock_ops *probe_for_rtc_clock(const struct hwclock_control *ctl)
+{
+ const int rtc_fd = open_rtc(ctl);
+
+ if (rtc_fd < 0)
+ return NULL;
+ return &rtc_interface;
+}
+
+#ifdef __alpha__
+/*
+ * Get the Hardware Clock epoch setting from the kernel.
+ */
+int get_epoch_rtc(const struct hwclock_control *ctl, unsigned long *epoch_p)
+{
+ int rtc_fd;
+
+ rtc_fd = open_rtc(ctl);
+ if (rtc_fd < 0) {
+ warn(_("cannot open %s"), rtc_dev_name);
+ return 1;
+ }
+
+ if (ioctl(rtc_fd, RTC_EPOCH_READ, epoch_p) == -1) {
+ warn(_("ioctl(%d, RTC_EPOCH_READ, epoch_p) to %s failed"),
+ rtc_fd, rtc_dev_name);
+ return 1;
+ }
+
+ if (ctl->verbose)
+ printf(_("ioctl(%d, RTC_EPOCH_READ, epoch_p) to %s succeeded.\n"),
+ rtc_fd, rtc_dev_name);
+
+ return 0;
+}
+
+/*
+ * Set the Hardware Clock epoch in the kernel.
+ */
+int set_epoch_rtc(const struct hwclock_control *ctl)
+{
+ int rtc_fd;
+ unsigned long epoch;
+
+ epoch = strtoul(ctl->epoch_option, NULL, 10);
+
+ /* There were no RTC clocks before 1900. */
+ if (epoch < 1900 || epoch == ULONG_MAX) {
+ warnx(_("invalid epoch '%s'."), ctl->epoch_option);
+ return 1;
+ }
+
+ rtc_fd = open_rtc(ctl);
+ if (rtc_fd < 0) {
+ warn(_("cannot open %s"), rtc_dev_name);
+ return 1;
+ }
+
+ if (ioctl(rtc_fd, RTC_EPOCH_SET, epoch) == -1) {
+ warn(_("ioctl(%d, RTC_EPOCH_SET, %lu) to %s failed"),
+ rtc_fd, epoch, rtc_dev_name);
+ return 1;
+ }
+
+ if (ctl->verbose)
+ printf(_("ioctl(%d, RTC_EPOCH_SET, %lu) to %s succeeded.\n"),
+ rtc_fd, epoch, rtc_dev_name);
+
+ return 0;
+}
+#endif /* __alpha__ */
diff --git a/sys-utils/hwclock.8 b/sys-utils/hwclock.8
new file mode 100644
index 0000000..8a10e7a
--- /dev/null
+++ b/sys-utils/hwclock.8
@@ -0,0 +1,998 @@
+.\" hwclock.8.in -- man page for util-linux' hwclock
+.\"
+.\" 2015-01-07 J William Piggott
+.\" Authored new section: DATE-TIME CONFIGURATION.
+.\" Subsections: Keeping Time..., LOCAL vs UTC, POSIX vs 'RIGHT'.
+.\"
+.TH HWCLOCK 8 "July 2017" "util-linux" "System Administration"
+.SH NAME
+hwclock \- time clocks utility
+.SH SYNOPSIS
+.B hwclock
+.RI [ function ]
+.RI [ option ...]
+.
+.SH DESCRIPTION
+.B hwclock
+is an administration tool for the time clocks. It can: display the
+Hardware Clock time; set the Hardware Clock to a specified time; set the
+Hardware Clock from the System Clock; set the System Clock from the
+Hardware Clock; compensate for Hardware Clock drift; correct the System
+Clock timescale; set the kernel's timezone, NTP timescale, and epoch
+(Alpha only); and predict future
+Hardware Clock values based on its drift rate.
+.PP
+Since v2.26 important changes were made to the
+.B \-\-hctosys
+function and the
+.B \-\-directisa
+option, and a new option
+.B \-\-update\-drift
+was added. See their respective descriptions below.
+.
+.SH FUNCTIONS
+The following functions are mutually exclusive, only one can be given at
+a time. If none is given, the default is \fB\-\-show\fR.
+.TP
+.B \-a, \-\-adjust
+Add or subtract time from the Hardware Clock to account for systematic
+drift since the last time the clock was set or adjusted. See the
+discussion below, under
+.BR "The Adjust Function" .
+.
+.TP
+.B \-\-getepoch
+.TQ
+.B \-\-setepoch
+These functions are for Alpha machines only, and are only available
+through the Linux kernel RTC driver.
+.sp
+They are used to read and set the kernel's Hardware Clock epoch value.
+Epoch is the number of years into AD to which a zero year value in the
+Hardware Clock refers. For example, if the machine's BIOS sets the year
+counter in the Hardware Clock to contain the number of full years since
+1952, then the kernel's Hardware Clock epoch value must be 1952.
+.sp
+The \fB\%\-\-setepoch\fR function requires using the
+.B \%\-\-epoch
+option to specify the year. For example:
+.RS
+.IP "" 4
+.B hwclock\ \-\-setepoch\ \-\-epoch=1952
+.PP
+The RTC driver attempts to guess the correct epoch value, so setting it
+may not be required.
+.PP
+This epoch value is used whenever
+.B \%hwclock
+reads or sets the Hardware Clock on an Alpha machine. For ISA machines
+the kernel uses the fixed Hardware Clock epoch of 1900.
+.RE
+.
+.TP
+.B \-\-predict
+Predict what the Hardware Clock will read in the future based upon the
+time given by the
+.B \-\-date
+option and the information in
+.IR /etc/adjtime .
+This is useful, for example, to account for drift when setting a
+Hardware Clock wakeup (aka alarm). See
+.BR \%rtcwake (8).
+.sp
+Do not use this function if the Hardware Clock is being modified by
+anything other than the current operating system's
+.B \%hwclock
+command, such as \%'11\ minute\ mode' or from dual-booting another OS.
+.
+.TP
+.BR \-r , \ \-\-show
+.TQ
+.B \-\-get
+.br
+Read the Hardware Clock and print its time to standard output in the
+.B ISO 8601
+format.
+The time shown is always in local time, even if you keep your Hardware Clock
+in UTC. See the
+.B \%\-\-localtime
+option.
+.sp
+Showing the Hardware Clock time is the default when no function is specified.
+.sp
+The
+.B \-\-get
+function also applies drift correction to the time read, based upon the
+information in
+.IR /etc/adjtime .
+Do not use this function if the Hardware Clock is being modified by
+anything other than the current operating system's
+.B \%hwclock
+command, such as \%'11\ minute\ mode' or from dual-booting another OS.
+.
+.TP
+.BR \-s , \ \-\-hctosys
+Set the System Clock from the Hardware Clock. The time read from the Hardware
+Clock is compensated to account for systematic drift before using it to set the
+System Clock. See the discussion below, under
+.BR "The Adjust Function" .
+.sp
+The System Clock must be kept in the UTC timescale for date-time
+applications to work correctly in conjunction with the timezone configured
+for the system. If the Hardware Clock is kept in local time then the time read
+from it must be shifted to the UTC timescale before using it to set the System
+Clock. The
+.B \%\-\-hctosys
+function does this based upon the information in the
+.I /etc/adjtime
+file or the command line arguments
+.BR \%\-\-localtime " and " \-\-utc .
+Note: no daylight saving adjustment is made. See the discussion below, under
+.BR "LOCAL vs UTC" .
+.sp
+The kernel also keeps a timezone value, the
+.B \%\-\-hctosys
+function sets it to the timezone configured for the system. The system
+timezone is configured by the TZ environment variable or the
+.I \%/etc/localtime
+file, as
+.BR \%tzset (3)
+would interpret them.
+The obsolete tz_dsttime field of the kernel's timezone value is set
+to zero. (For details on what this field used to mean, see
+.BR \%settimeofday (2).)
+.sp
+When used in a startup script, making the
+.B \%\-\-hctosys
+function the first caller of
+.BR \%settimeofday (2)
+from boot, it will set the NTP \%'11\ minute\ mode' timescale via the
+.I \%persistent_clock_is_local
+kernel variable. If the Hardware Clock's timescale configuration is
+changed then a reboot is required to inform the kernel. See the
+discussion below, under
+.BR "Automatic Hardware Clock Synchronization by the Kernel" .
+.sp
+This is a good function to use in one of the system startup scripts before the
+file systems are mounted read/write.
+.sp
+This function should never be used on a running system. Jumping system time
+will cause problems, such as corrupted filesystem timestamps. Also, if
+something has changed the Hardware Clock, like NTP's \%'11\ minute\ mode', then
+.B \%\-\-hctosys
+will set the time incorrectly by including drift compensation.
+.sp
+Drift compensation can be inhibited by setting the drift factor in
+.I /etc/adjtime
+to zero. This setting will be persistent as long as the
+.BR \%\-\-update\-drift " option is not used with " \%\-\-systohc
+at shutdown (or anywhere else). Another way to inhibit this is by using the
+.BR \%\-\-noadjfile " option when calling the " \%\-\-hctosys
+function. A third method is to delete the
+.IR /etc/adjtime " file."
+.B Hwclock
+will then default to using the UTC timescale for the Hardware Clock. If
+the Hardware Clock is ticking local time it will need to be defined in
+the file. This can be done by calling
+.BR hwclock\ \-\-localtime\ \-\-adjust ;
+when the file is not present this command will not actually
+adjust the Clock, but it will create the file with local time
+configured, and a drift factor of zero.
+.sp
+A condition under which inhibiting
+.BR hwclock 's
+drift correction may be desired is when dual-booting multiple operating
+systems. If while this instance of Linux is stopped, another OS changes
+the Hardware Clock's value, then when this instance is started again the
+drift correction applied will be incorrect.
+.sp
+.RB "For " hwclock 's
+drift correction to work properly it is imperative that nothing changes
+the Hardware Clock while its Linux instance is not running.
+.
+.TP
+.B \-\-set
+Set the Hardware Clock to the time given by the
+.B \-\-date
+option, and update the timestamps in
+.IR /etc/adjtime .
+With the
+.B \%\-\-update-drift
+option also (re)calculate the drift factor. Try it without the option if
+.BR \%\-\-set " fails. See " \%\-\-update-drift " below."
+.
+.TP
+.B \-\-systz
+This is an alternate to the
+.B \%\-\-hctosys
+function that does not read the Hardware Clock nor set the System Clock;
+consequently there is not any drift correction. It is intended to be
+used in a startup script on systems with kernels above version 2.6 where
+you know the System Clock has been set from the Hardware Clock by the
+kernel during boot.
+.sp
+It does the following things that are detailed above in the
+.BR \%\-\-hctosys " function:"
+.RS
+.IP \(bu 2
+Corrects the System Clock timescale to UTC as needed. Only instead of
+accomplishing this by setting the System Clock,
+.B hwclock
+simply informs the kernel and it handles the change.
+.IP \(bu 2
+Sets the kernel's NTP \%'11\ minute\ mode' timescale.
+.IP \(bu 2
+Sets the kernel's timezone.
+.PP
+The first two are only available on the first call of
+.BR \%settimeofday (2)
+after boot. Consequently this option only makes sense when used in a
+startup script. If the Hardware Clocks timescale configuration is
+changed then a reboot would be required to inform the kernel.
+.RE
+.
+.TP
+.BR \-w , \ \-\-systohc
+Set the Hardware Clock from the System Clock, and update the timestamps in
+.IR /etc/adjtime .
+With the
+.B \%\-\-update-drift
+option also (re)calculate the drift factor. Try it without the option if
+.BR \%\-\-systohc " fails. See " \%\-\-update-drift " below."
+.
+.TP
+.BR \-V , \ \-\-version
+Display version information and exit.
+.
+.TP
+.BR \-h , \ \-\-help
+Display help text and exit.
+.
+.SH OPTIONS
+.
+.TP
+.BI \-\-adjfile= filename
+.RI "Override the default " /etc/adjtime " file path."
+.
+.TP
+.BI \%\-\-date= date_string
+This option must be used with the
+.B \-\-set
+or
+.B \%\-\-predict
+functions, otherwise it is ignored.
+.RS
+.IP "" 4
+.B "hwclock\ \-\-set\ \-\-date='16:45'"
+.IP "" 4
+.B "hwclock\ \-\-predict\ \-\-date='2525-08-14\ 07:11:05'"
+.PP
+The argument must be in local time, even if you keep your Hardware Clock in
+UTC. See the
+.B \%\-\-localtime
+option. Therefore, the argument should not include any timezone information.
+It also should not be a relative time like "+5 minutes", because
+.BR \%hwclock 's
+precision depends upon correlation between the argument's value and when the
+enter key is pressed. Fractional seconds are silently dropped. This option is
+capable of understanding many time and date formats, but the previous
+parameters should be observed.
+.RE
+.
+.TP
+.BI \%\-\-delay= seconds
+This option allows to overwrite internally used delay when set clock time. The
+default is 0.5 (500ms) for rtc_cmos, for another RTC types the delay is 0. If
+RTC type is impossible to determine (from sysfs) then it defaults also to 0.5
+to be backwardly compatible.
+.RS
+.PP
+The 500ms default is based on commonly used MC146818A-compatible (x86) hardware clock. This
+Hardware Clock can only be set to any integer time plus one half second. The
+integer time is required because there is no interface to set or get a
+fractional second. The additional half second delay is because the Hardware
+Clock updates to the following second precisely 500 ms after setting the new
+time. Unfortunately, this behavior is hardware specific and in same cases
+another delay is required.
+.RE
+.
+.TP
+.TP
+.BR \-D ", " \-\-debug
+.RB Use\ \-\-verbose .
+.RB The\ \%\-\-debug\ option
+has been deprecated and may be repurposed or removed in a future release.
+.
+.TP
+.B \-\-directisa
+This option is meaningful for ISA compatible machines in the x86 and
+x86_64 family. For other machines, it has no effect. This option tells
+.B \%hwclock
+to use explicit I/O instructions to access the Hardware Clock.
+Without this option,
+.B \%hwclock
+will use the rtc device file, which it assumes to be driven by the Linux
+RTC device driver. As of v2.26 it will no longer automatically use
+directisa when the rtc driver is unavailable; this was causing an unsafe
+condition that could allow two processes to access the Hardware Clock at
+the same time. Direct hardware access from userspace should only be
+used for testing, troubleshooting, and as a last resort when all other
+methods fail. See the
+.BR \-\-rtc " option."
+.
+.TP
+.BI \-\-epoch= year
+This option is required when using the
+.BR \%\-\-setepoch \ function.
+.RI "The minimum " year
+value is 1900. The maximum is system dependent
+.RB ( ULONG_MAX\ -\ 1 ).
+.
+.TP
+.BR \-f , \ \-\-rtc=\fIfilename\fR
+.RB "Override " \%hwclock 's
+default rtc device file name. Otherwise it will
+use the first one found in this order:
+.in +4
+.br
+.I /dev/rtc0
+.br
+.I /dev/rtc
+.br
+.I /dev/misc/rtc
+.br
+.in
+.RB "For " IA-64:
+.in +4
+.br
+.I /dev/efirtc
+.br
+.I /dev/misc/efirtc
+.in
+.
+.TP
+.BR \-l , \ \-\-localtime
+.TQ
+.BR \-u ", " \-\-utc
+Indicate which timescale the Hardware Clock is set to.
+.sp
+The Hardware Clock may be configured to use either the UTC or the local
+timescale, but nothing in the clock itself says which alternative is
+being used. The
+.BR \%\-\-localtime " or " \-\-utc
+options give this information to the
+.B \%hwclock
+command. If you specify the wrong one (or specify neither and take a
+wrong default), both setting and reading the Hardware Clock will be
+incorrect.
+.sp
+If you specify neither
+.BR \-\-utc " nor " \%\-\-localtime
+then the one last given with a set function
+.RB ( \-\-set ", " \%\-\-systohc ", or " \%\-\-adjust ),
+as recorded in
+.IR /etc/adjtime ,
+will be used. If the adjtime file doesn't exist, the default is UTC.
+.sp
+Note: daylight saving time changes may be inconsistent when the
+Hardware Clock is kept in local time. See the discussion below, under
+.BR "LOCAL vs UTC" .
+.
+.TP
+.B \-\-noadjfile
+Disable the facilities provided by
+.IR /etc/adjtime .
+.B \%hwclock
+will not read nor write to that file with this option. Either
+.BR \-\-utc " or " \%\-\-localtime
+must be specified when using this option.
+.
+.TP
+.B \-\-test
+Do not actually change anything on the system, that is, the Clocks or
+.I /etc/adjtime
+.RB ( \%\-\-verbose
+is implicit with this option).
+.
+.TP
+.B \-\-update\-drift
+Update the Hardware Clock's drift factor in
+.IR /etc/adjtime .
+It can only be used with
+.BR \-\-set " or " \%\-\-systohc ,
+.sp
+A minimum four hour period between settings is required. This is to
+avoid invalid calculations. The longer the period, the more precise the
+resulting drift factor will be.
+.sp
+This option was added in v2.26, because
+it is typical for systems to call
+.B \%hwclock\ \-\-systohc
+at shutdown; with the old behaviour this would automatically
+(re)calculate the drift factor which caused several problems:
+.RS
+.IP \(bu 2
+When using NTP with an \%'11\ minute\ mode' kernel the drift factor
+would be clobbered to near zero.
+.IP \(bu 2
+It would not allow the use of 'cold' drift correction. With most
+configurations using 'cold' drift will yield favorable results. Cold,
+means when the machine is turned off which can have a significant impact
+on the drift factor.
+.IP \(bu 2
+(Re)calculating drift factor on every shutdown delivers suboptimal
+results. For example, if ephemeral conditions cause the machine to be
+abnormally hot the drift factor calculation would be out of range.
+.IP \(bu 2
+Significantly increased system shutdown times (as of v2.31 when not
+using
+.B \%\-\-update\-drift
+the RTC is not read).
+.PP
+.RB "Having " \%hwclock
+calculate the drift factor is a good starting point, but for optimal
+results it will likely need to be adjusted by directly editing the
+.I /etc/adjtime
+file. For most configurations once a machine's optimal drift factor is
+crafted it should not need to be changed. Therefore, the old behavior to
+automatically (re)calculate drift was changed and now requires this
+option to be used. See the discussion below, under
+.BR "The Adjust Function" .
+.PP
+This option requires reading the Hardware Clock before setting it. If
+it cannot be read, then this option will cause the set functions to fail.
+This can happen, for example, if the Hardware Clock is corrupted by a
+power failure. In that case, the clock must first be set without this
+option. Despite it not working, the resulting drift correction factor
+would be invalid anyway.
+.RE
+.
+.TP
+.BR \-v ", " \-\-verbose
+Display more details about what
+.B \%hwclock
+is doing internally.
+.
+.SH NOTES
+.
+.SS Clocks in a Linux System
+.PP
+There are two types of date-time clocks:
+.PP
+.B The Hardware Clock:
+This clock is an independent hardware device, with its own power domain
+(battery, capacitor, etc), that operates when the machine is powered off,
+or even unplugged.
+.PP
+On an ISA compatible system, this clock is specified as part of the ISA
+standard. A control program can read or set this clock only to a whole
+second, but it can also detect the edges of the 1 second clock ticks, so
+the clock actually has virtually infinite precision.
+.PP
+This clock is commonly called the hardware clock, the real time clock,
+the RTC, the BIOS clock, and the CMOS clock. Hardware Clock, in its
+capitalized form, was coined for use by
+.BR \%hwclock .
+The Linux kernel also refers to it as the persistent clock.
+.PP
+Some non-ISA systems have a few real time clocks with
+only one of them having its own power domain.
+A very low power external I2C or SPI clock chip might be used with a
+backup battery as the hardware clock to initialize a more functional
+integrated real-time clock which is used for most other purposes.
+.PP
+.B The System Clock:
+This clock is part of the Linux kernel and is driven by
+a timer interrupt. (On an ISA machine, the timer interrupt is part of
+the ISA standard.) It has meaning only while Linux is running on the
+machine. The System Time is the number of seconds since 00:00:00
+January 1, 1970 UTC (or more succinctly, the number of seconds since
+1969 UTC). The System Time is not an integer, though. It has virtually
+infinite precision.
+.PP
+The System Time is the time that matters. The Hardware Clock's basic
+purpose is to keep time when Linux is not running so that the System
+Clock can be initialized from it at boot. Note that in DOS, for which
+ISA was designed, the Hardware Clock is the only real time clock.
+.PP
+It is important that the System Time not have any discontinuities such as
+would happen if you used the
+.BR \%date (1)
+program to set it while the system is running. You can, however, do whatever
+you want to the Hardware Clock while the system is running, and the next
+time Linux starts up, it will do so with the adjusted time from the Hardware
+Clock. Note: currently this is not possible on most systems because
+.B \%hwclock\ \-\-systohc
+is called at shutdown.
+.PP
+The Linux kernel's timezone is set by
+.BR hwclock .
+But don't be misled -- almost nobody cares what timezone the kernel
+thinks it is in. Instead, programs that care about the timezone
+(perhaps because they want to display a local time for you) almost
+always use a more traditional method of determining the timezone: They
+use the TZ environment variable or the
+.I \%/etc/localtime
+file, as explained in the man page for
+.BR \%tzset (3).
+However, some programs and fringe parts of the Linux kernel such as filesystems
+use the kernel's timezone value. An example is the vfat filesystem. If the
+kernel timezone value is wrong, the vfat filesystem will report and set the
+wrong timestamps on files. Another example is the kernel's NTP \%'11\ minute\ mode'.
+If the kernel's timezone value and/or the
+.I \%persistent_clock_is_local
+variable are wrong, then the Hardware Clock will be set incorrectly
+by \%'11\ minute\ mode'. See the discussion below, under
+.BR "Automatic Hardware Clock Synchronization by the Kernel" .
+.PP
+.B \%hwclock
+sets the kernel's timezone to the value indicated by TZ or
+.IR \%/etc/localtime " with the"
+.BR \%\-\-hctosys " or " \%\-\-systz " functions."
+.PP
+The kernel's timezone value actually consists of two parts: 1) a field
+tz_minuteswest indicating how many minutes local time (not adjusted
+for DST) lags behind UTC, and 2) a field tz_dsttime indicating
+the type of Daylight Savings Time (DST) convention that is in effect
+in the locality at the present time.
+This second field is not used under Linux and is always zero.
+See also
+.BR \%settimeofday (2).
+.
+.SS Hardware Clock Access Methods
+.PP
+.B \%hwclock
+uses many different ways to get and set Hardware Clock values. The most
+normal way is to do I/O to the rtc device special file, which is
+presumed to be driven by the rtc device driver. Also, Linux systems
+using the rtc framework with udev, are capable of supporting multiple
+Hardware Clocks. This may bring about the need to override the default
+rtc device by specifying one with the
+.BR \-\-rtc " option."
+.PP
+However, this method is not always available as older systems do not
+have an rtc driver. On these systems, the method of accessing the
+Hardware Clock depends on the system hardware.
+.PP
+On an ISA compatible system,
+.B \%hwclock
+can directly access the "CMOS memory" registers that
+constitute the clock, by doing I/O to Ports 0x70 and 0x71. It does
+this with actual I/O instructions and consequently can only do it if
+running with superuser effective userid. This method may be used by
+specifying the
+.BR \%\-\-directisa " option."
+.PP
+This is a really poor method of accessing the clock, for all the
+reasons that userspace programs are generally not supposed to do
+direct I/O and disable interrupts.
+.B \%hwclock
+provides it for testing, troubleshooting, and because it may be the
+only method available on ISA systems which do not have a working rtc
+device driver.
+.SS The Adjust Function
+.PP
+The Hardware Clock is usually not very accurate. However, much of its
+inaccuracy is completely predictable - it gains or loses the same amount
+of time every day. This is called systematic drift.
+.BR \%hwclock "'s " \%\-\-adjust
+function lets you apply systematic drift corrections to the
+Hardware Clock.
+.PP
+It works like this:
+.BR \%hwclock " keeps a file,"
+.IR /etc/adjtime ,
+that keeps some historical information. This is called the adjtime file.
+.PP
+Suppose you start with no adjtime file. You issue a
+.B \%hwclock\ \-\-set
+command to set the Hardware Clock to the true current time.
+.B \%hwclock
+creates the adjtime file and records in it the current time as the
+last time the clock was calibrated.
+Five days later, the clock has gained 10 seconds, so you issue a
+.B \%hwclock\ \-\-set\ \-\-update\-drift
+command to set it back 10 seconds.
+.B \%hwclock
+updates the adjtime file to show the current time as the last time the
+clock was calibrated, and records 2 seconds per day as the systematic
+drift rate. 24 hours go by, and then you issue a
+.B \%hwclock\ \-\-adjust
+command.
+.B \%hwclock
+consults the adjtime file and sees that the clock gains 2 seconds per
+day when left alone and that it has been left alone for exactly one
+day. So it subtracts 2 seconds from the Hardware Clock. It then
+records the current time as the last time the clock was adjusted.
+Another 24 hours go by and you issue another
+.BR \%hwclock\ \-\-adjust .
+.B \%hwclock
+does the same thing: subtracts 2 seconds and updates the adjtime file
+with the current time as the last time the clock was adjusted.
+.PP
+When you use the
+.BR \%\-\-update\-drift " option with " \-\-set " or " \%\-\-systohc ,
+the systematic drift rate is (re)calculated by comparing the fully drift
+corrected current Hardware Clock time with the new set time, from that
+it derives the 24 hour drift rate based on the last calibrated timestamp
+from the adjtime file. This updated drift factor is then saved in
+.IR /etc/adjtime .
+.PP
+A small amount of error creeps in when
+the Hardware Clock is set, so
+.B \%\-\-adjust
+refrains from making any adjustment that is less
+than 1 second. Later on, when you request an adjustment again, the accumulated
+drift will be more than 1 second and
+.B \%\-\-adjust
+will make the adjustment including any fractional amount.
+.PP
+.B \%hwclock\ \-\-hctosys
+also uses the adjtime file data to compensate the value read from the Hardware
+Clock before using it to set the System Clock. It does not share the 1 second
+limitation of
+.BR \%\-\-adjust ,
+and will correct sub-second drift values immediately. It does not
+change the Hardware Clock time nor the adjtime file. This may eliminate
+the need to use
+.BR \%\-\-adjust ,
+unless something else on the system needs the Hardware Clock to be
+compensated.
+.
+.SS The Adjtime File
+While named for its historical purpose of controlling adjustments only,
+it actually contains other information used by
+.B hwclock
+from one invocation to the next.
+.PP
+The format of the adjtime file is, in ASCII:
+.PP
+Line 1: Three numbers, separated by blanks: 1) the systematic drift rate
+in seconds per day, floating point decimal; 2) the resulting number of
+seconds since 1969 UTC of most recent adjustment or calibration,
+decimal integer; 3) zero (for compatibility with
+.BR \%clock (8))
+as a decimal integer.
+.PP
+Line 2: One number: the resulting number of seconds since 1969 UTC of most
+recent calibration. Zero if there has been no calibration yet or it
+is known that any previous calibration is moot (for example, because
+the Hardware Clock has been found, since that calibration, not to
+contain a valid time). This is a decimal integer.
+.PP
+Line 3: "UTC" or "LOCAL". Tells whether the Hardware Clock is set to
+Coordinated Universal Time or local time. You can always override this
+value with options on the
+.B \%hwclock
+command line.
+.PP
+You can use an adjtime file that was previously used with the
+.BR \%clock "(8) program with " \%hwclock .
+.
+.SS Automatic Hardware Clock Synchronization by the Kernel
+.PP
+You should be aware of another way that the Hardware Clock is kept
+synchronized in some systems. The Linux kernel has a mode wherein it
+copies the System Time to the Hardware Clock every 11 minutes. This mode
+is a compile time option, so not all kernels will have this capability.
+This is a good mode to use when you are using something sophisticated
+like NTP to keep your System Clock synchronized. (NTP is a way to keep
+your System Time synchronized either to a time server somewhere on the
+network or to a radio clock hooked up to your system. See RFC 1305.)
+.PP
+If the kernel is compiled with the \%'11\ minute\ mode' option it will
+be active when the kernel's clock discipline is in a synchronized state.
+When in this state, bit 6 (the bit that is set in the mask 0x0040)
+of the kernel's
+.I \%time_status
+variable is unset. This value is output as the 'status' line of the
+.BR \%adjtimex\ --print " or " \%ntptime " commands."
+.PP
+It takes an outside influence, like the NTP daemon
+to put the kernel's clock discipline into a synchronized state, and
+therefore turn on \%'11\ minute\ mode'.
+It can be turned off by running anything that sets the System Clock the old
+fashioned way, including
+.BR "\%hwclock\ \-\-hctosys" .
+However, if the NTP daemon is still running, it will turn \%'11\ minute\ mode'
+back on again the next time it synchronizes the System Clock.
+.PP
+If your system runs with \%'11\ minute\ mode' on, it may need to use either
+.BR \%\-\-hctosys " or " \%\-\-systz
+in a startup script, especially if the Hardware Clock is configured to use
+the local timescale. Unless the kernel is informed of what timescale the
+Hardware Clock is using, it may clobber it with the wrong one. The kernel
+uses UTC by default.
+.PP
+The first userspace command to set the System Clock informs the
+kernel what timescale the Hardware Clock is using. This happens via the
+.I \%persistent_clock_is_local
+kernel variable. If
+.BR \%\-\-hctosys " or " \%\-\-systz
+is the first, it will set this variable according to the adjtime file or the
+appropriate command-line argument. Note that when using this capability and the
+Hardware Clock timescale configuration is changed, then a reboot is required to
+notify the kernel.
+.PP
+.B \%hwclock\ \-\-adjust
+should not be used with NTP \%'11\ minute\ mode'.
+.
+.SS ISA Hardware Clock Century value
+.PP
+There is some sort of standard that defines CMOS memory Byte 50 on an ISA
+machine as an indicator of what century it is.
+.B \%hwclock
+does not use or set that byte because there are some machines that
+don't define the byte that way, and it really isn't necessary anyway,
+since the year-of-century does a good job of implying which century it
+is.
+.PP
+If you have a bona fide use for a CMOS century byte, contact the
+.B \%hwclock
+maintainer; an option may be appropriate.
+.PP
+Note that this section is only relevant when you are using the "direct
+ISA" method of accessing the Hardware Clock.
+ACPI provides a standard way to access century values, when they
+are supported by the hardware.
+.
+.SH DATE-TIME CONFIGURATION
+.in +4
+.SS Keeping Time without External Synchronization
+.in
+.PP
+This discussion is based on the following conditions:
+.IP \(bu 2
+Nothing is running that alters the date-time clocks, such as NTP daemon or a cron job."
+.IP \(bu 2
+The system timezone is configured for the correct local time. See below, under
+.BR "POSIX vs 'RIGHT'" .
+.IP \(bu 2
+Early during startup the following are called, in this order:
+.br
+.BI \%adjtimex\ \-\-tick \ value\ \-\-frequency \ value
+.br
+.B \%hwclock\ \-\-hctosys
+.IP \(bu 2
+During shutdown the following is called:
+.br
+.B \%hwclock\ \-\-systohc
+.PP
+.in +4
+.BR * " Systems without " adjtimex " may use " ntptime .
+.in
+.PP
+Whether maintaining precision time with NTP daemon
+or not, it makes sense to configure the system to keep reasonably good
+date-time on its own.
+.PP
+The first step in making that happen is having a clear understanding of
+the big picture. There are two completely separate hardware devices
+running at their own speed and drifting away from the 'correct' time at
+their own rates. The methods and software for drift correction are
+different for each of them. However, most systems are configured to
+exchange values between these two clocks at startup and shutdown. Now
+the individual device's time keeping errors are transferred back and
+forth between each other. Attempt to configure drift correction for only
+one of them, and the other's drift will be overlaid upon it.
+.PP
+This problem can be avoided when configuring drift correction for the
+System Clock by simply not shutting down the machine. This, plus the
+fact that all of
+.BR \%hwclock 's
+precision (including calculating drift factors) depends upon the System
+Clock's rate being correct, means that configuration of the System Clock
+should be done first.
+.PP
+The System Clock drift is corrected with the
+.BR \%adjtimex "(8) command's " \-\-tick " and " \%\-\-frequency
+options. These two work together: tick is the coarse adjustment and
+frequency is the fine adjustment. (For systems that do not have an
+.BR \%adjtimex " package,"
+.BI \%ntptime\ \-f\ ppm
+may be used instead.)
+.PP
+Some Linux distributions attempt to automatically calculate the System
+Clock drift with
+.BR \%adjtimex 's
+compare operation. Trying to correct one
+drifting clock by using another drifting clock as a reference is akin to
+a dog trying to catch its own tail. Success may happen eventually, but
+great effort and frustration will likely precede it. This automation may
+yield an improvement over no configuration, but expecting optimum
+results would be in error. A better choice for manual configuration
+would be
+.BR \%adjtimex 's " \-\-log " options.
+.PP
+It may be more effective to simply track the System Clock drift with
+.BR \%sntp ", or " \%date\ \-Ins
+and a precision timepiece, and then calculate the correction manually.
+.PP
+After setting the tick and frequency values, continue to test and refine the
+adjustments until the System Clock keeps good time. See
+.BR \%adjtimex (8)
+for more information and the example demonstrating manual drift
+calculations.
+.PP
+Once the System Clock is ticking smoothly, move on to the Hardware Clock.
+.PP
+As a rule, cold drift will work best for most use cases. This should be
+true even for 24/7 machines whose normal downtime consists of a reboot.
+In that case the drift factor value makes little difference. But on the
+rare occasion that the machine is shut down for an extended period, then
+cold drift should yield better results.
+.PP
+.B Steps to calculate cold drift:
+.IP 1 2
+.B "Ensure that NTP daemon will not be launched at startup."
+.IP 2 2
+.RI The " System Clock " "time must be correct at shutdown!"
+.IP 3 2
+Shut down the system.
+.IP 4 2
+Let an extended period pass without changing the Hardware Clock.
+.IP 5 2
+Start the system.
+.IP 6 2
+.RB "Immediately use " hwclock " to set the correct time, adding the"
+.BR \%\-\-update\-drift " option."
+.PP
+Note: if step 6 uses
+.BR \%\-\-systohc ,
+then the System Clock must be set correctly (step 6a) just before doing so.
+.PP
+.RB "Having " hwclock
+calculate the drift factor is a good starting point, but for optimal
+results it will likely need to be adjusted by directly editing the
+.I /etc/adjtime
+file. Continue to test and refine the drift factor until the Hardware
+Clock is corrected properly at startup. To check this, first make sure
+that the System Time is correct before shutdown and then use
+.BR \%sntp ", or " \%date\ \-Ins
+and a precision timepiece, immediately after startup.
+.SS LOCAL vs UTC
+Keeping the Hardware Clock in a local timescale causes inconsistent
+daylight saving time results:
+.IP \(bu 2
+If Linux is running during a daylight saving time change, the time
+written to the Hardware Clock will be adjusted for the change.
+.IP \(bu 2
+If Linux is NOT running during a daylight saving time change, the time
+read from the Hardware Clock will NOT be adjusted for the change.
+.PP
+The Hardware Clock on an ISA compatible system keeps only a date and time,
+it has no concept of timezone nor daylight saving. Therefore, when
+.B hwclock
+is told that it is in local time, it assumes it is in the 'correct'
+local time and makes no adjustments to the time read from it.
+.PP
+Linux handles daylight saving time changes transparently only when the
+Hardware Clock is kept in the UTC timescale. Doing so is made easy for
+system administrators as
+.B \%hwclock
+uses local time for its output and as the argument to the
+.BR \%\-\-date " option."
+.PP
+POSIX systems, like Linux, are designed to have the System Clock operate
+in the UTC timescale. The Hardware Clock's purpose is to initialize the
+System Clock, so also keeping it in UTC makes sense.
+.PP
+Linux does, however, attempt to accommodate the Hardware Clock being in
+the local timescale. This is primarily for dual-booting with older
+versions of MS Windows. From Windows 7 on, the RealTimeIsUniversal
+registry key is supposed to be working properly so that its Hardware
+Clock can be kept in UTC.
+.
+.SS POSIX vs 'RIGHT'
+A discussion on date-time configuration would be incomplete without
+addressing timezones, this is mostly well covered by
+.BR tzset (3).
+One area that seems to have no documentation is the 'right'
+directory of the Time Zone Database, sometimes called tz or zoneinfo.
+.PP
+There are two separate databases in the zoneinfo system, posix
+and 'right'. 'Right' (now named zoneinfo\-leaps) includes leap seconds and posix
+does not. To use the 'right' database the System Clock must be set to
+\%(UTC\ +\ leap seconds), which is equivalent to \%(TAI\ \-\ 10). This
+allows calculating the
+exact number of seconds between two dates that cross a leap second
+epoch. The System Clock is then converted to the correct civil time,
+including UTC, by using the 'right' timezone files which subtract the
+leap seconds. Note: this configuration is considered experimental and is
+known to have issues.
+.PP
+To configure a system to use a particular database all of the files
+located in its directory must be copied to the root of
+.IR \%/usr/share/zoneinfo .
+Files are never used directly from the posix or 'right' subdirectories, e.g.,
+.RI \%TZ=' right/Europe/Dublin '.
+This habit was becoming so common that the upstream zoneinfo project
+restructured the system's file tree by moving the posix and 'right'
+subdirectories out of the zoneinfo directory and into sibling directories:
+.PP
+.in +2
+.I /usr/share/zoneinfo
+.br
+.I /usr/share/zoneinfo\-posix
+.br
+.I /usr/share/zoneinfo\-leaps
+.PP
+Unfortunately, some Linux distributions are changing it back to the old
+tree structure in their packages. So the problem of system
+administrators reaching into the 'right' subdirectory persists. This
+causes the system timezone to be configured to include leap seconds
+while the zoneinfo database is still configured to exclude them. Then
+when an application such as a World Clock needs the South_Pole timezone
+file; or an email MTA, or
+.B hwclock
+needs the UTC timezone file; they fetch it from the root of
+.I \%/usr/share/zoneinfo
+, because that is what they are supposed to do. Those files exclude leap
+seconds, but the System Clock now includes them, causing an incorrect
+time conversion.
+.PP
+Attempting to mix and match files from these separate databases will not
+work, because they each require the System Clock to use a different
+timescale. The zoneinfo database must be configured to use either posix
+or 'right', as described above, or by assigning a database path to the
+.SB TZDIR
+environment variable.
+.SH EXIT STATUS
+One of the following exit values will be returned:
+.TP
+.BR EXIT_SUCCESS " ('0' on POSIX systems)"
+Successful program execution.
+.TP
+.BR EXIT_FAILURE " ('1' on POSIX systems)"
+The operation failed or the command syntax was not valid.
+.SH ENVIRONMENT
+.TP
+.B TZ
+If this variable is set its value takes precedence over the system
+configured timezone.
+.TP
+.B TZDIR
+If this variable is set its value takes precedence over the system
+configured timezone database directory path.
+.SH FILES
+.TP
+.I /etc/adjtime
+The configuration and state file for hwclock.
+.TP
+.I /etc/localtime
+The system timezone file.
+.TP
+.I /usr/share/zoneinfo/
+The system timezone database directory.
+.PP
+Device files
+.B hwclock
+may try for Hardware Clock access:
+.br
+.I /dev/rtc0
+.br
+.I /dev/rtc
+.br
+.I /dev/misc/rtc
+.br
+.I /dev/efirtc
+.br
+.I /dev/misc/efirtc
+.SH "SEE ALSO"
+.BR date (1),
+.BR adjtimex (8),
+.BR gettimeofday (2),
+.BR settimeofday (2),
+.BR crontab (1),
+.BR tzset (3)
+.
+.SH AUTHORS
+Written by Bryan Henderson, September 1996 (bryanh@giraffe-data.com),
+based on work done on the
+.BR \%clock (8)
+program by Charles Hedrick, Rob Hooft, and Harald Koenig.
+See the source code for complete history and credits.
+.
+.SH AVAILABILITY
+The hwclock command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/hwclock.8.in b/sys-utils/hwclock.8.in
new file mode 100644
index 0000000..dacdd27
--- /dev/null
+++ b/sys-utils/hwclock.8.in
@@ -0,0 +1,998 @@
+.\" hwclock.8.in -- man page for util-linux' hwclock
+.\"
+.\" 2015-01-07 J William Piggott
+.\" Authored new section: DATE-TIME CONFIGURATION.
+.\" Subsections: Keeping Time..., LOCAL vs UTC, POSIX vs 'RIGHT'.
+.\"
+.TH HWCLOCK 8 "July 2017" "util-linux" "System Administration"
+.SH NAME
+hwclock \- time clocks utility
+.SH SYNOPSIS
+.B hwclock
+.RI [ function ]
+.RI [ option ...]
+.
+.SH DESCRIPTION
+.B hwclock
+is an administration tool for the time clocks. It can: display the
+Hardware Clock time; set the Hardware Clock to a specified time; set the
+Hardware Clock from the System Clock; set the System Clock from the
+Hardware Clock; compensate for Hardware Clock drift; correct the System
+Clock timescale; set the kernel's timezone, NTP timescale, and epoch
+(Alpha only); and predict future
+Hardware Clock values based on its drift rate.
+.PP
+Since v2.26 important changes were made to the
+.B \-\-hctosys
+function and the
+.B \-\-directisa
+option, and a new option
+.B \-\-update\-drift
+was added. See their respective descriptions below.
+.
+.SH FUNCTIONS
+The following functions are mutually exclusive, only one can be given at
+a time. If none is given, the default is \fB\-\-show\fR.
+.TP
+.B \-a, \-\-adjust
+Add or subtract time from the Hardware Clock to account for systematic
+drift since the last time the clock was set or adjusted. See the
+discussion below, under
+.BR "The Adjust Function" .
+.
+.TP
+.B \-\-getepoch
+.TQ
+.B \-\-setepoch
+These functions are for Alpha machines only, and are only available
+through the Linux kernel RTC driver.
+.sp
+They are used to read and set the kernel's Hardware Clock epoch value.
+Epoch is the number of years into AD to which a zero year value in the
+Hardware Clock refers. For example, if the machine's BIOS sets the year
+counter in the Hardware Clock to contain the number of full years since
+1952, then the kernel's Hardware Clock epoch value must be 1952.
+.sp
+The \fB\%\-\-setepoch\fR function requires using the
+.B \%\-\-epoch
+option to specify the year. For example:
+.RS
+.IP "" 4
+.B hwclock\ \-\-setepoch\ \-\-epoch=1952
+.PP
+The RTC driver attempts to guess the correct epoch value, so setting it
+may not be required.
+.PP
+This epoch value is used whenever
+.B \%hwclock
+reads or sets the Hardware Clock on an Alpha machine. For ISA machines
+the kernel uses the fixed Hardware Clock epoch of 1900.
+.RE
+.
+.TP
+.B \-\-predict
+Predict what the Hardware Clock will read in the future based upon the
+time given by the
+.B \-\-date
+option and the information in
+.IR @ADJTIME_PATH@ .
+This is useful, for example, to account for drift when setting a
+Hardware Clock wakeup (aka alarm). See
+.BR \%rtcwake (8).
+.sp
+Do not use this function if the Hardware Clock is being modified by
+anything other than the current operating system's
+.B \%hwclock
+command, such as \%'11\ minute\ mode' or from dual-booting another OS.
+.
+.TP
+.BR \-r , \ \-\-show
+.TQ
+.B \-\-get
+.br
+Read the Hardware Clock and print its time to standard output in the
+.B ISO 8601
+format.
+The time shown is always in local time, even if you keep your Hardware Clock
+in UTC. See the
+.B \%\-\-localtime
+option.
+.sp
+Showing the Hardware Clock time is the default when no function is specified.
+.sp
+The
+.B \-\-get
+function also applies drift correction to the time read, based upon the
+information in
+.IR @ADJTIME_PATH@ .
+Do not use this function if the Hardware Clock is being modified by
+anything other than the current operating system's
+.B \%hwclock
+command, such as \%'11\ minute\ mode' or from dual-booting another OS.
+.
+.TP
+.BR \-s , \ \-\-hctosys
+Set the System Clock from the Hardware Clock. The time read from the Hardware
+Clock is compensated to account for systematic drift before using it to set the
+System Clock. See the discussion below, under
+.BR "The Adjust Function" .
+.sp
+The System Clock must be kept in the UTC timescale for date-time
+applications to work correctly in conjunction with the timezone configured
+for the system. If the Hardware Clock is kept in local time then the time read
+from it must be shifted to the UTC timescale before using it to set the System
+Clock. The
+.B \%\-\-hctosys
+function does this based upon the information in the
+.I @ADJTIME_PATH@
+file or the command line arguments
+.BR \%\-\-localtime " and " \-\-utc .
+Note: no daylight saving adjustment is made. See the discussion below, under
+.BR "LOCAL vs UTC" .
+.sp
+The kernel also keeps a timezone value, the
+.B \%\-\-hctosys
+function sets it to the timezone configured for the system. The system
+timezone is configured by the TZ environment variable or the
+.I \%/etc/localtime
+file, as
+.BR \%tzset (3)
+would interpret them.
+The obsolete tz_dsttime field of the kernel's timezone value is set
+to zero. (For details on what this field used to mean, see
+.BR \%settimeofday (2).)
+.sp
+When used in a startup script, making the
+.B \%\-\-hctosys
+function the first caller of
+.BR \%settimeofday (2)
+from boot, it will set the NTP \%'11\ minute\ mode' timescale via the
+.I \%persistent_clock_is_local
+kernel variable. If the Hardware Clock's timescale configuration is
+changed then a reboot is required to inform the kernel. See the
+discussion below, under
+.BR "Automatic Hardware Clock Synchronization by the Kernel" .
+.sp
+This is a good function to use in one of the system startup scripts before the
+file systems are mounted read/write.
+.sp
+This function should never be used on a running system. Jumping system time
+will cause problems, such as corrupted filesystem timestamps. Also, if
+something has changed the Hardware Clock, like NTP's \%'11\ minute\ mode', then
+.B \%\-\-hctosys
+will set the time incorrectly by including drift compensation.
+.sp
+Drift compensation can be inhibited by setting the drift factor in
+.I @ADJTIME_PATH@
+to zero. This setting will be persistent as long as the
+.BR \%\-\-update\-drift " option is not used with " \%\-\-systohc
+at shutdown (or anywhere else). Another way to inhibit this is by using the
+.BR \%\-\-noadjfile " option when calling the " \%\-\-hctosys
+function. A third method is to delete the
+.IR @ADJTIME_PATH@ " file."
+.B Hwclock
+will then default to using the UTC timescale for the Hardware Clock. If
+the Hardware Clock is ticking local time it will need to be defined in
+the file. This can be done by calling
+.BR hwclock\ \-\-localtime\ \-\-adjust ;
+when the file is not present this command will not actually
+adjust the Clock, but it will create the file with local time
+configured, and a drift factor of zero.
+.sp
+A condition under which inhibiting
+.BR hwclock 's
+drift correction may be desired is when dual-booting multiple operating
+systems. If while this instance of Linux is stopped, another OS changes
+the Hardware Clock's value, then when this instance is started again the
+drift correction applied will be incorrect.
+.sp
+.RB "For " hwclock 's
+drift correction to work properly it is imperative that nothing changes
+the Hardware Clock while its Linux instance is not running.
+.
+.TP
+.B \-\-set
+Set the Hardware Clock to the time given by the
+.B \-\-date
+option, and update the timestamps in
+.IR @ADJTIME_PATH@ .
+With the
+.B \%\-\-update-drift
+option also (re)calculate the drift factor. Try it without the option if
+.BR \%\-\-set " fails. See " \%\-\-update-drift " below."
+.
+.TP
+.B \-\-systz
+This is an alternate to the
+.B \%\-\-hctosys
+function that does not read the Hardware Clock nor set the System Clock;
+consequently there is not any drift correction. It is intended to be
+used in a startup script on systems with kernels above version 2.6 where
+you know the System Clock has been set from the Hardware Clock by the
+kernel during boot.
+.sp
+It does the following things that are detailed above in the
+.BR \%\-\-hctosys " function:"
+.RS
+.IP \(bu 2
+Corrects the System Clock timescale to UTC as needed. Only instead of
+accomplishing this by setting the System Clock,
+.B hwclock
+simply informs the kernel and it handles the change.
+.IP \(bu 2
+Sets the kernel's NTP \%'11\ minute\ mode' timescale.
+.IP \(bu 2
+Sets the kernel's timezone.
+.PP
+The first two are only available on the first call of
+.BR \%settimeofday (2)
+after boot. Consequently this option only makes sense when used in a
+startup script. If the Hardware Clocks timescale configuration is
+changed then a reboot would be required to inform the kernel.
+.RE
+.
+.TP
+.BR \-w , \ \-\-systohc
+Set the Hardware Clock from the System Clock, and update the timestamps in
+.IR @ADJTIME_PATH@ .
+With the
+.B \%\-\-update-drift
+option also (re)calculate the drift factor. Try it without the option if
+.BR \%\-\-systohc " fails. See " \%\-\-update-drift " below."
+.
+.TP
+.BR \-V , \ \-\-version
+Display version information and exit.
+.
+.TP
+.BR \-h , \ \-\-help
+Display help text and exit.
+.
+.SH OPTIONS
+.
+.TP
+.BI \-\-adjfile= filename
+.RI "Override the default " @ADJTIME_PATH@ " file path."
+.
+.TP
+.BI \%\-\-date= date_string
+This option must be used with the
+.B \-\-set
+or
+.B \%\-\-predict
+functions, otherwise it is ignored.
+.RS
+.IP "" 4
+.B "hwclock\ \-\-set\ \-\-date='16:45'"
+.IP "" 4
+.B "hwclock\ \-\-predict\ \-\-date='2525-08-14\ 07:11:05'"
+.PP
+The argument must be in local time, even if you keep your Hardware Clock in
+UTC. See the
+.B \%\-\-localtime
+option. Therefore, the argument should not include any timezone information.
+It also should not be a relative time like "+5 minutes", because
+.BR \%hwclock 's
+precision depends upon correlation between the argument's value and when the
+enter key is pressed. Fractional seconds are silently dropped. This option is
+capable of understanding many time and date formats, but the previous
+parameters should be observed.
+.RE
+.
+.TP
+.BI \%\-\-delay= seconds
+This option allows to overwrite internally used delay when set clock time. The
+default is 0.5 (500ms) for rtc_cmos, for another RTC types the delay is 0. If
+RTC type is impossible to determine (from sysfs) then it defaults also to 0.5
+to be backwardly compatible.
+.RS
+.PP
+The 500ms default is based on commonly used MC146818A-compatible (x86) hardware clock. This
+Hardware Clock can only be set to any integer time plus one half second. The
+integer time is required because there is no interface to set or get a
+fractional second. The additional half second delay is because the Hardware
+Clock updates to the following second precisely 500 ms after setting the new
+time. Unfortunately, this behavior is hardware specific and in same cases
+another delay is required.
+.RE
+.
+.TP
+.TP
+.BR \-D ", " \-\-debug
+.RB Use\ \-\-verbose .
+.RB The\ \%\-\-debug\ option
+has been deprecated and may be repurposed or removed in a future release.
+.
+.TP
+.B \-\-directisa
+This option is meaningful for ISA compatible machines in the x86 and
+x86_64 family. For other machines, it has no effect. This option tells
+.B \%hwclock
+to use explicit I/O instructions to access the Hardware Clock.
+Without this option,
+.B \%hwclock
+will use the rtc device file, which it assumes to be driven by the Linux
+RTC device driver. As of v2.26 it will no longer automatically use
+directisa when the rtc driver is unavailable; this was causing an unsafe
+condition that could allow two processes to access the Hardware Clock at
+the same time. Direct hardware access from userspace should only be
+used for testing, troubleshooting, and as a last resort when all other
+methods fail. See the
+.BR \-\-rtc " option."
+.
+.TP
+.BI \-\-epoch= year
+This option is required when using the
+.BR \%\-\-setepoch \ function.
+.RI "The minimum " year
+value is 1900. The maximum is system dependent
+.RB ( ULONG_MAX\ -\ 1 ).
+.
+.TP
+.BR \-f , \ \-\-rtc=\fIfilename\fR
+.RB "Override " \%hwclock 's
+default rtc device file name. Otherwise it will
+use the first one found in this order:
+.in +4
+.br
+.I /dev/rtc0
+.br
+.I /dev/rtc
+.br
+.I /dev/misc/rtc
+.br
+.in
+.RB "For " IA-64:
+.in +4
+.br
+.I /dev/efirtc
+.br
+.I /dev/misc/efirtc
+.in
+.
+.TP
+.BR \-l , \ \-\-localtime
+.TQ
+.BR \-u ", " \-\-utc
+Indicate which timescale the Hardware Clock is set to.
+.sp
+The Hardware Clock may be configured to use either the UTC or the local
+timescale, but nothing in the clock itself says which alternative is
+being used. The
+.BR \%\-\-localtime " or " \-\-utc
+options give this information to the
+.B \%hwclock
+command. If you specify the wrong one (or specify neither and take a
+wrong default), both setting and reading the Hardware Clock will be
+incorrect.
+.sp
+If you specify neither
+.BR \-\-utc " nor " \%\-\-localtime
+then the one last given with a set function
+.RB ( \-\-set ", " \%\-\-systohc ", or " \%\-\-adjust ),
+as recorded in
+.IR @ADJTIME_PATH@ ,
+will be used. If the adjtime file doesn't exist, the default is UTC.
+.sp
+Note: daylight saving time changes may be inconsistent when the
+Hardware Clock is kept in local time. See the discussion below, under
+.BR "LOCAL vs UTC" .
+.
+.TP
+.B \-\-noadjfile
+Disable the facilities provided by
+.IR @ADJTIME_PATH@ .
+.B \%hwclock
+will not read nor write to that file with this option. Either
+.BR \-\-utc " or " \%\-\-localtime
+must be specified when using this option.
+.
+.TP
+.B \-\-test
+Do not actually change anything on the system, that is, the Clocks or
+.I @ADJTIME_PATH@
+.RB ( \%\-\-verbose
+is implicit with this option).
+.
+.TP
+.B \-\-update\-drift
+Update the Hardware Clock's drift factor in
+.IR @ADJTIME_PATH@ .
+It can only be used with
+.BR \-\-set " or " \%\-\-systohc ,
+.sp
+A minimum four hour period between settings is required. This is to
+avoid invalid calculations. The longer the period, the more precise the
+resulting drift factor will be.
+.sp
+This option was added in v2.26, because
+it is typical for systems to call
+.B \%hwclock\ \-\-systohc
+at shutdown; with the old behaviour this would automatically
+(re)calculate the drift factor which caused several problems:
+.RS
+.IP \(bu 2
+When using NTP with an \%'11\ minute\ mode' kernel the drift factor
+would be clobbered to near zero.
+.IP \(bu 2
+It would not allow the use of 'cold' drift correction. With most
+configurations using 'cold' drift will yield favorable results. Cold,
+means when the machine is turned off which can have a significant impact
+on the drift factor.
+.IP \(bu 2
+(Re)calculating drift factor on every shutdown delivers suboptimal
+results. For example, if ephemeral conditions cause the machine to be
+abnormally hot the drift factor calculation would be out of range.
+.IP \(bu 2
+Significantly increased system shutdown times (as of v2.31 when not
+using
+.B \%\-\-update\-drift
+the RTC is not read).
+.PP
+.RB "Having " \%hwclock
+calculate the drift factor is a good starting point, but for optimal
+results it will likely need to be adjusted by directly editing the
+.I @ADJTIME_PATH@
+file. For most configurations once a machine's optimal drift factor is
+crafted it should not need to be changed. Therefore, the old behavior to
+automatically (re)calculate drift was changed and now requires this
+option to be used. See the discussion below, under
+.BR "The Adjust Function" .
+.PP
+This option requires reading the Hardware Clock before setting it. If
+it cannot be read, then this option will cause the set functions to fail.
+This can happen, for example, if the Hardware Clock is corrupted by a
+power failure. In that case, the clock must first be set without this
+option. Despite it not working, the resulting drift correction factor
+would be invalid anyway.
+.RE
+.
+.TP
+.BR \-v ", " \-\-verbose
+Display more details about what
+.B \%hwclock
+is doing internally.
+.
+.SH NOTES
+.
+.SS Clocks in a Linux System
+.PP
+There are two types of date-time clocks:
+.PP
+.B The Hardware Clock:
+This clock is an independent hardware device, with its own power domain
+(battery, capacitor, etc), that operates when the machine is powered off,
+or even unplugged.
+.PP
+On an ISA compatible system, this clock is specified as part of the ISA
+standard. A control program can read or set this clock only to a whole
+second, but it can also detect the edges of the 1 second clock ticks, so
+the clock actually has virtually infinite precision.
+.PP
+This clock is commonly called the hardware clock, the real time clock,
+the RTC, the BIOS clock, and the CMOS clock. Hardware Clock, in its
+capitalized form, was coined for use by
+.BR \%hwclock .
+The Linux kernel also refers to it as the persistent clock.
+.PP
+Some non-ISA systems have a few real time clocks with
+only one of them having its own power domain.
+A very low power external I2C or SPI clock chip might be used with a
+backup battery as the hardware clock to initialize a more functional
+integrated real-time clock which is used for most other purposes.
+.PP
+.B The System Clock:
+This clock is part of the Linux kernel and is driven by
+a timer interrupt. (On an ISA machine, the timer interrupt is part of
+the ISA standard.) It has meaning only while Linux is running on the
+machine. The System Time is the number of seconds since 00:00:00
+January 1, 1970 UTC (or more succinctly, the number of seconds since
+1969 UTC). The System Time is not an integer, though. It has virtually
+infinite precision.
+.PP
+The System Time is the time that matters. The Hardware Clock's basic
+purpose is to keep time when Linux is not running so that the System
+Clock can be initialized from it at boot. Note that in DOS, for which
+ISA was designed, the Hardware Clock is the only real time clock.
+.PP
+It is important that the System Time not have any discontinuities such as
+would happen if you used the
+.BR \%date (1)
+program to set it while the system is running. You can, however, do whatever
+you want to the Hardware Clock while the system is running, and the next
+time Linux starts up, it will do so with the adjusted time from the Hardware
+Clock. Note: currently this is not possible on most systems because
+.B \%hwclock\ \-\-systohc
+is called at shutdown.
+.PP
+The Linux kernel's timezone is set by
+.BR hwclock .
+But don't be misled -- almost nobody cares what timezone the kernel
+thinks it is in. Instead, programs that care about the timezone
+(perhaps because they want to display a local time for you) almost
+always use a more traditional method of determining the timezone: They
+use the TZ environment variable or the
+.I \%/etc/localtime
+file, as explained in the man page for
+.BR \%tzset (3).
+However, some programs and fringe parts of the Linux kernel such as filesystems
+use the kernel's timezone value. An example is the vfat filesystem. If the
+kernel timezone value is wrong, the vfat filesystem will report and set the
+wrong timestamps on files. Another example is the kernel's NTP \%'11\ minute\ mode'.
+If the kernel's timezone value and/or the
+.I \%persistent_clock_is_local
+variable are wrong, then the Hardware Clock will be set incorrectly
+by \%'11\ minute\ mode'. See the discussion below, under
+.BR "Automatic Hardware Clock Synchronization by the Kernel" .
+.PP
+.B \%hwclock
+sets the kernel's timezone to the value indicated by TZ or
+.IR \%/etc/localtime " with the"
+.BR \%\-\-hctosys " or " \%\-\-systz " functions."
+.PP
+The kernel's timezone value actually consists of two parts: 1) a field
+tz_minuteswest indicating how many minutes local time (not adjusted
+for DST) lags behind UTC, and 2) a field tz_dsttime indicating
+the type of Daylight Savings Time (DST) convention that is in effect
+in the locality at the present time.
+This second field is not used under Linux and is always zero.
+See also
+.BR \%settimeofday (2).
+.
+.SS Hardware Clock Access Methods
+.PP
+.B \%hwclock
+uses many different ways to get and set Hardware Clock values. The most
+normal way is to do I/O to the rtc device special file, which is
+presumed to be driven by the rtc device driver. Also, Linux systems
+using the rtc framework with udev, are capable of supporting multiple
+Hardware Clocks. This may bring about the need to override the default
+rtc device by specifying one with the
+.BR \-\-rtc " option."
+.PP
+However, this method is not always available as older systems do not
+have an rtc driver. On these systems, the method of accessing the
+Hardware Clock depends on the system hardware.
+.PP
+On an ISA compatible system,
+.B \%hwclock
+can directly access the "CMOS memory" registers that
+constitute the clock, by doing I/O to Ports 0x70 and 0x71. It does
+this with actual I/O instructions and consequently can only do it if
+running with superuser effective userid. This method may be used by
+specifying the
+.BR \%\-\-directisa " option."
+.PP
+This is a really poor method of accessing the clock, for all the
+reasons that userspace programs are generally not supposed to do
+direct I/O and disable interrupts.
+.B \%hwclock
+provides it for testing, troubleshooting, and because it may be the
+only method available on ISA systems which do not have a working rtc
+device driver.
+.SS The Adjust Function
+.PP
+The Hardware Clock is usually not very accurate. However, much of its
+inaccuracy is completely predictable - it gains or loses the same amount
+of time every day. This is called systematic drift.
+.BR \%hwclock "'s " \%\-\-adjust
+function lets you apply systematic drift corrections to the
+Hardware Clock.
+.PP
+It works like this:
+.BR \%hwclock " keeps a file,"
+.IR @ADJTIME_PATH@ ,
+that keeps some historical information. This is called the adjtime file.
+.PP
+Suppose you start with no adjtime file. You issue a
+.B \%hwclock\ \-\-set
+command to set the Hardware Clock to the true current time.
+.B \%hwclock
+creates the adjtime file and records in it the current time as the
+last time the clock was calibrated.
+Five days later, the clock has gained 10 seconds, so you issue a
+.B \%hwclock\ \-\-set\ \-\-update\-drift
+command to set it back 10 seconds.
+.B \%hwclock
+updates the adjtime file to show the current time as the last time the
+clock was calibrated, and records 2 seconds per day as the systematic
+drift rate. 24 hours go by, and then you issue a
+.B \%hwclock\ \-\-adjust
+command.
+.B \%hwclock
+consults the adjtime file and sees that the clock gains 2 seconds per
+day when left alone and that it has been left alone for exactly one
+day. So it subtracts 2 seconds from the Hardware Clock. It then
+records the current time as the last time the clock was adjusted.
+Another 24 hours go by and you issue another
+.BR \%hwclock\ \-\-adjust .
+.B \%hwclock
+does the same thing: subtracts 2 seconds and updates the adjtime file
+with the current time as the last time the clock was adjusted.
+.PP
+When you use the
+.BR \%\-\-update\-drift " option with " \-\-set " or " \%\-\-systohc ,
+the systematic drift rate is (re)calculated by comparing the fully drift
+corrected current Hardware Clock time with the new set time, from that
+it derives the 24 hour drift rate based on the last calibrated timestamp
+from the adjtime file. This updated drift factor is then saved in
+.IR @ADJTIME_PATH@ .
+.PP
+A small amount of error creeps in when
+the Hardware Clock is set, so
+.B \%\-\-adjust
+refrains from making any adjustment that is less
+than 1 second. Later on, when you request an adjustment again, the accumulated
+drift will be more than 1 second and
+.B \%\-\-adjust
+will make the adjustment including any fractional amount.
+.PP
+.B \%hwclock\ \-\-hctosys
+also uses the adjtime file data to compensate the value read from the Hardware
+Clock before using it to set the System Clock. It does not share the 1 second
+limitation of
+.BR \%\-\-adjust ,
+and will correct sub-second drift values immediately. It does not
+change the Hardware Clock time nor the adjtime file. This may eliminate
+the need to use
+.BR \%\-\-adjust ,
+unless something else on the system needs the Hardware Clock to be
+compensated.
+.
+.SS The Adjtime File
+While named for its historical purpose of controlling adjustments only,
+it actually contains other information used by
+.B hwclock
+from one invocation to the next.
+.PP
+The format of the adjtime file is, in ASCII:
+.PP
+Line 1: Three numbers, separated by blanks: 1) the systematic drift rate
+in seconds per day, floating point decimal; 2) the resulting number of
+seconds since 1969 UTC of most recent adjustment or calibration,
+decimal integer; 3) zero (for compatibility with
+.BR \%clock (8))
+as a decimal integer.
+.PP
+Line 2: One number: the resulting number of seconds since 1969 UTC of most
+recent calibration. Zero if there has been no calibration yet or it
+is known that any previous calibration is moot (for example, because
+the Hardware Clock has been found, since that calibration, not to
+contain a valid time). This is a decimal integer.
+.PP
+Line 3: "UTC" or "LOCAL". Tells whether the Hardware Clock is set to
+Coordinated Universal Time or local time. You can always override this
+value with options on the
+.B \%hwclock
+command line.
+.PP
+You can use an adjtime file that was previously used with the
+.BR \%clock "(8) program with " \%hwclock .
+.
+.SS Automatic Hardware Clock Synchronization by the Kernel
+.PP
+You should be aware of another way that the Hardware Clock is kept
+synchronized in some systems. The Linux kernel has a mode wherein it
+copies the System Time to the Hardware Clock every 11 minutes. This mode
+is a compile time option, so not all kernels will have this capability.
+This is a good mode to use when you are using something sophisticated
+like NTP to keep your System Clock synchronized. (NTP is a way to keep
+your System Time synchronized either to a time server somewhere on the
+network or to a radio clock hooked up to your system. See RFC 1305.)
+.PP
+If the kernel is compiled with the \%'11\ minute\ mode' option it will
+be active when the kernel's clock discipline is in a synchronized state.
+When in this state, bit 6 (the bit that is set in the mask 0x0040)
+of the kernel's
+.I \%time_status
+variable is unset. This value is output as the 'status' line of the
+.BR \%adjtimex\ --print " or " \%ntptime " commands."
+.PP
+It takes an outside influence, like the NTP daemon
+to put the kernel's clock discipline into a synchronized state, and
+therefore turn on \%'11\ minute\ mode'.
+It can be turned off by running anything that sets the System Clock the old
+fashioned way, including
+.BR "\%hwclock\ \-\-hctosys" .
+However, if the NTP daemon is still running, it will turn \%'11\ minute\ mode'
+back on again the next time it synchronizes the System Clock.
+.PP
+If your system runs with \%'11\ minute\ mode' on, it may need to use either
+.BR \%\-\-hctosys " or " \%\-\-systz
+in a startup script, especially if the Hardware Clock is configured to use
+the local timescale. Unless the kernel is informed of what timescale the
+Hardware Clock is using, it may clobber it with the wrong one. The kernel
+uses UTC by default.
+.PP
+The first userspace command to set the System Clock informs the
+kernel what timescale the Hardware Clock is using. This happens via the
+.I \%persistent_clock_is_local
+kernel variable. If
+.BR \%\-\-hctosys " or " \%\-\-systz
+is the first, it will set this variable according to the adjtime file or the
+appropriate command-line argument. Note that when using this capability and the
+Hardware Clock timescale configuration is changed, then a reboot is required to
+notify the kernel.
+.PP
+.B \%hwclock\ \-\-adjust
+should not be used with NTP \%'11\ minute\ mode'.
+.
+.SS ISA Hardware Clock Century value
+.PP
+There is some sort of standard that defines CMOS memory Byte 50 on an ISA
+machine as an indicator of what century it is.
+.B \%hwclock
+does not use or set that byte because there are some machines that
+don't define the byte that way, and it really isn't necessary anyway,
+since the year-of-century does a good job of implying which century it
+is.
+.PP
+If you have a bona fide use for a CMOS century byte, contact the
+.B \%hwclock
+maintainer; an option may be appropriate.
+.PP
+Note that this section is only relevant when you are using the "direct
+ISA" method of accessing the Hardware Clock.
+ACPI provides a standard way to access century values, when they
+are supported by the hardware.
+.
+.SH DATE-TIME CONFIGURATION
+.in +4
+.SS Keeping Time without External Synchronization
+.in
+.PP
+This discussion is based on the following conditions:
+.IP \(bu 2
+Nothing is running that alters the date-time clocks, such as NTP daemon or a cron job."
+.IP \(bu 2
+The system timezone is configured for the correct local time. See below, under
+.BR "POSIX vs 'RIGHT'" .
+.IP \(bu 2
+Early during startup the following are called, in this order:
+.br
+.BI \%adjtimex\ \-\-tick \ value\ \-\-frequency \ value
+.br
+.B \%hwclock\ \-\-hctosys
+.IP \(bu 2
+During shutdown the following is called:
+.br
+.B \%hwclock\ \-\-systohc
+.PP
+.in +4
+.BR * " Systems without " adjtimex " may use " ntptime .
+.in
+.PP
+Whether maintaining precision time with NTP daemon
+or not, it makes sense to configure the system to keep reasonably good
+date-time on its own.
+.PP
+The first step in making that happen is having a clear understanding of
+the big picture. There are two completely separate hardware devices
+running at their own speed and drifting away from the 'correct' time at
+their own rates. The methods and software for drift correction are
+different for each of them. However, most systems are configured to
+exchange values between these two clocks at startup and shutdown. Now
+the individual device's time keeping errors are transferred back and
+forth between each other. Attempt to configure drift correction for only
+one of them, and the other's drift will be overlaid upon it.
+.PP
+This problem can be avoided when configuring drift correction for the
+System Clock by simply not shutting down the machine. This, plus the
+fact that all of
+.BR \%hwclock 's
+precision (including calculating drift factors) depends upon the System
+Clock's rate being correct, means that configuration of the System Clock
+should be done first.
+.PP
+The System Clock drift is corrected with the
+.BR \%adjtimex "(8) command's " \-\-tick " and " \%\-\-frequency
+options. These two work together: tick is the coarse adjustment and
+frequency is the fine adjustment. (For systems that do not have an
+.BR \%adjtimex " package,"
+.BI \%ntptime\ \-f\ ppm
+may be used instead.)
+.PP
+Some Linux distributions attempt to automatically calculate the System
+Clock drift with
+.BR \%adjtimex 's
+compare operation. Trying to correct one
+drifting clock by using another drifting clock as a reference is akin to
+a dog trying to catch its own tail. Success may happen eventually, but
+great effort and frustration will likely precede it. This automation may
+yield an improvement over no configuration, but expecting optimum
+results would be in error. A better choice for manual configuration
+would be
+.BR \%adjtimex 's " \-\-log " options.
+.PP
+It may be more effective to simply track the System Clock drift with
+.BR \%sntp ", or " \%date\ \-Ins
+and a precision timepiece, and then calculate the correction manually.
+.PP
+After setting the tick and frequency values, continue to test and refine the
+adjustments until the System Clock keeps good time. See
+.BR \%adjtimex (8)
+for more information and the example demonstrating manual drift
+calculations.
+.PP
+Once the System Clock is ticking smoothly, move on to the Hardware Clock.
+.PP
+As a rule, cold drift will work best for most use cases. This should be
+true even for 24/7 machines whose normal downtime consists of a reboot.
+In that case the drift factor value makes little difference. But on the
+rare occasion that the machine is shut down for an extended period, then
+cold drift should yield better results.
+.PP
+.B Steps to calculate cold drift:
+.IP 1 2
+.B "Ensure that NTP daemon will not be launched at startup."
+.IP 2 2
+.RI The " System Clock " "time must be correct at shutdown!"
+.IP 3 2
+Shut down the system.
+.IP 4 2
+Let an extended period pass without changing the Hardware Clock.
+.IP 5 2
+Start the system.
+.IP 6 2
+.RB "Immediately use " hwclock " to set the correct time, adding the"
+.BR \%\-\-update\-drift " option."
+.PP
+Note: if step 6 uses
+.BR \%\-\-systohc ,
+then the System Clock must be set correctly (step 6a) just before doing so.
+.PP
+.RB "Having " hwclock
+calculate the drift factor is a good starting point, but for optimal
+results it will likely need to be adjusted by directly editing the
+.I @ADJTIME_PATH@
+file. Continue to test and refine the drift factor until the Hardware
+Clock is corrected properly at startup. To check this, first make sure
+that the System Time is correct before shutdown and then use
+.BR \%sntp ", or " \%date\ \-Ins
+and a precision timepiece, immediately after startup.
+.SS LOCAL vs UTC
+Keeping the Hardware Clock in a local timescale causes inconsistent
+daylight saving time results:
+.IP \(bu 2
+If Linux is running during a daylight saving time change, the time
+written to the Hardware Clock will be adjusted for the change.
+.IP \(bu 2
+If Linux is NOT running during a daylight saving time change, the time
+read from the Hardware Clock will NOT be adjusted for the change.
+.PP
+The Hardware Clock on an ISA compatible system keeps only a date and time,
+it has no concept of timezone nor daylight saving. Therefore, when
+.B hwclock
+is told that it is in local time, it assumes it is in the 'correct'
+local time and makes no adjustments to the time read from it.
+.PP
+Linux handles daylight saving time changes transparently only when the
+Hardware Clock is kept in the UTC timescale. Doing so is made easy for
+system administrators as
+.B \%hwclock
+uses local time for its output and as the argument to the
+.BR \%\-\-date " option."
+.PP
+POSIX systems, like Linux, are designed to have the System Clock operate
+in the UTC timescale. The Hardware Clock's purpose is to initialize the
+System Clock, so also keeping it in UTC makes sense.
+.PP
+Linux does, however, attempt to accommodate the Hardware Clock being in
+the local timescale. This is primarily for dual-booting with older
+versions of MS Windows. From Windows 7 on, the RealTimeIsUniversal
+registry key is supposed to be working properly so that its Hardware
+Clock can be kept in UTC.
+.
+.SS POSIX vs 'RIGHT'
+A discussion on date-time configuration would be incomplete without
+addressing timezones, this is mostly well covered by
+.BR tzset (3).
+One area that seems to have no documentation is the 'right'
+directory of the Time Zone Database, sometimes called tz or zoneinfo.
+.PP
+There are two separate databases in the zoneinfo system, posix
+and 'right'. 'Right' (now named zoneinfo\-leaps) includes leap seconds and posix
+does not. To use the 'right' database the System Clock must be set to
+\%(UTC\ +\ leap seconds), which is equivalent to \%(TAI\ \-\ 10). This
+allows calculating the
+exact number of seconds between two dates that cross a leap second
+epoch. The System Clock is then converted to the correct civil time,
+including UTC, by using the 'right' timezone files which subtract the
+leap seconds. Note: this configuration is considered experimental and is
+known to have issues.
+.PP
+To configure a system to use a particular database all of the files
+located in its directory must be copied to the root of
+.IR \%/usr/share/zoneinfo .
+Files are never used directly from the posix or 'right' subdirectories, e.g.,
+.RI \%TZ=' right/Europe/Dublin '.
+This habit was becoming so common that the upstream zoneinfo project
+restructured the system's file tree by moving the posix and 'right'
+subdirectories out of the zoneinfo directory and into sibling directories:
+.PP
+.in +2
+.I /usr/share/zoneinfo
+.br
+.I /usr/share/zoneinfo\-posix
+.br
+.I /usr/share/zoneinfo\-leaps
+.PP
+Unfortunately, some Linux distributions are changing it back to the old
+tree structure in their packages. So the problem of system
+administrators reaching into the 'right' subdirectory persists. This
+causes the system timezone to be configured to include leap seconds
+while the zoneinfo database is still configured to exclude them. Then
+when an application such as a World Clock needs the South_Pole timezone
+file; or an email MTA, or
+.B hwclock
+needs the UTC timezone file; they fetch it from the root of
+.I \%/usr/share/zoneinfo
+, because that is what they are supposed to do. Those files exclude leap
+seconds, but the System Clock now includes them, causing an incorrect
+time conversion.
+.PP
+Attempting to mix and match files from these separate databases will not
+work, because they each require the System Clock to use a different
+timescale. The zoneinfo database must be configured to use either posix
+or 'right', as described above, or by assigning a database path to the
+.SB TZDIR
+environment variable.
+.SH EXIT STATUS
+One of the following exit values will be returned:
+.TP
+.BR EXIT_SUCCESS " ('0' on POSIX systems)"
+Successful program execution.
+.TP
+.BR EXIT_FAILURE " ('1' on POSIX systems)"
+The operation failed or the command syntax was not valid.
+.SH ENVIRONMENT
+.TP
+.B TZ
+If this variable is set its value takes precedence over the system
+configured timezone.
+.TP
+.B TZDIR
+If this variable is set its value takes precedence over the system
+configured timezone database directory path.
+.SH FILES
+.TP
+.I @ADJTIME_PATH@
+The configuration and state file for hwclock.
+.TP
+.I /etc/localtime
+The system timezone file.
+.TP
+.I /usr/share/zoneinfo/
+The system timezone database directory.
+.PP
+Device files
+.B hwclock
+may try for Hardware Clock access:
+.br
+.I /dev/rtc0
+.br
+.I /dev/rtc
+.br
+.I /dev/misc/rtc
+.br
+.I /dev/efirtc
+.br
+.I /dev/misc/efirtc
+.SH "SEE ALSO"
+.BR date (1),
+.BR adjtimex (8),
+.BR gettimeofday (2),
+.BR settimeofday (2),
+.BR crontab (1),
+.BR tzset (3)
+.
+.SH AUTHORS
+Written by Bryan Henderson, September 1996 (bryanh@giraffe-data.com),
+based on work done on the
+.BR \%clock (8)
+program by Charles Hedrick, Rob Hooft, and Harald Koenig.
+See the source code for complete history and credits.
+.
+.SH AVAILABILITY
+The hwclock command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/hwclock.c b/sys-utils/hwclock.c
new file mode 100644
index 0000000..d9acbaf
--- /dev/null
+++ b/sys-utils/hwclock.c
@@ -0,0 +1,1551 @@
+/*
+ * hwclock.c
+ *
+ * clock.c was written by Charles Hedrick, hedrick@cs.rutgers.edu, Apr 1992
+ * Modified for clock adjustments - Rob Hooft <hooft@chem.ruu.nl>, Nov 1992
+ * Improvements by Harald Koenig <koenig@nova.tat.physik.uni-tuebingen.de>
+ * and Alan Modra <alan@spri.levels.unisa.edu.au>.
+ *
+ * Major rewrite by Bryan Henderson <bryanh@giraffe-data.com>, 96.09.19.
+ * The new program is called hwclock. New features:
+ *
+ * - You can set the hardware clock without also modifying the system
+ * clock.
+ * - You can read and set the clock with finer than 1 second precision.
+ * - When you set the clock, hwclock automatically refigures the drift
+ * rate, based on how far off the clock was before you set it.
+ *
+ * Reshuffled things, added sparc code, and re-added alpha stuff
+ * by David Mosberger <davidm@azstarnet.com>
+ * and Jay Estabrook <jestabro@amt.tay1.dec.com>
+ * and Martin Ostermann <ost@coments.rwth-aachen.de>, aeb@cwi.nl, 990212.
+ *
+ * Fix for Award 2094 bug, Dave Coffin (dcoffin@shore.net) 11/12/98
+ * Change of local time handling, Stefan Ring <e9725446@stud3.tuwien.ac.at>
+ * Change of adjtime handling, James P. Rutledge <ao112@rgfn.epcc.edu>.
+ *
+ * Distributed under GPL
+ */
+/*
+ * Explanation of `adjusting' (Rob Hooft):
+ *
+ * The problem with my machine is that its CMOS clock is 10 seconds
+ * per day slow. With this version of clock.c, and my '/etc/rc.local'
+ * reading '/etc/clock -au' instead of '/etc/clock -u -s', this error
+ * is automatically corrected at every boot.
+ *
+ * To do this job, the program reads and writes the file '/etc/adjtime'
+ * to determine the correction, and to save its data. In this file are
+ * three numbers:
+ *
+ * 1) the correction in seconds per day. (So if your clock runs 5
+ * seconds per day fast, the first number should read -5.0)
+ * 2) the number of seconds since 1/1/1970 the last time the program
+ * was used
+ * 3) the remaining part of a second which was leftover after the last
+ * adjustment
+ *
+ * Installation and use of this program:
+ *
+ * a) create a file '/etc/adjtime' containing as the first and only
+ * line: '0.0 0 0.0'
+ * b) run 'clock -au' or 'clock -a', depending on whether your cmos is
+ * in universal or local time. This updates the second number.
+ * c) set your system time using the 'date' command.
+ * d) update your cmos time using 'clock -wu' or 'clock -w'
+ * e) replace the first number in /etc/adjtime by your correction.
+ * f) put the command 'clock -au' or 'clock -a' in your '/etc/rc.local'
+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "c.h"
+#include "closestream.h"
+#include "nls.h"
+#include "optutils.h"
+#include "pathnames.h"
+#include "hwclock.h"
+#include "timeutils.h"
+#include "env.h"
+#include "xalloc.h"
+#include "path.h"
+#include "strutils.h"
+
+#ifdef HAVE_LIBAUDIT
+#include <libaudit.h>
+static int hwaudit_fd = -1;
+#endif
+
+UL_DEBUG_DEFINE_MASK(hwclock);
+UL_DEBUG_DEFINE_MASKNAMES(hwclock) = UL_DEBUG_EMPTY_MASKNAMES;
+
+/* The struct that holds our hardware access routines */
+static struct clock_ops *ur;
+
+/* Maximal clock adjustment in seconds per day.
+ (adjtime() glibc call has 2145 seconds limit on i386, so it is good enough for us as well,
+ 43219 is a maximal safe value preventing exact_adjustment overflow.) */
+#define MAX_DRIFT 2145.0
+
+struct adjtime {
+ /*
+ * This is information we keep in the adjtime file that tells us how
+ * to do drift corrections. Elements are all straight from the
+ * adjtime file, so see documentation of that file for details.
+ * Exception is <dirty>, which is an indication that what's in this
+ * structure is not what's in the disk file (because it has been
+ * updated since read from the disk file).
+ */
+ int dirty;
+ /* line 1 */
+ double drift_factor;
+ time_t last_adj_time;
+ double not_adjusted;
+ /* line 2 */
+ time_t last_calib_time;
+ /*
+ * The most recent time that we set the clock from an external
+ * authority (as opposed to just doing a drift adjustment)
+ */
+ /* line 3 */
+ enum a_local_utc { UTC = 0, LOCAL, UNKNOWN } local_utc;
+ /*
+ * To which time zone, local or UTC, we most recently set the
+ * hardware clock.
+ */
+};
+
+static void hwclock_init_debug(const char *str)
+{
+ __UL_INIT_DEBUG_FROM_STRING(hwclock, HWCLOCK_DEBUG_, 0, str);
+
+ DBG(INIT, ul_debug("hwclock debug mask: 0x%04x", hwclock_debug_mask));
+ DBG(INIT, ul_debug("hwclock version: %s", PACKAGE_STRING));
+}
+
+/* FOR TESTING ONLY: inject random delays of up to 1000ms */
+static void up_to_1000ms_sleep(void)
+{
+ int usec = random() % 1000000;
+
+ DBG(RANDOM_SLEEP, ul_debug("sleeping ~%d usec", usec));
+ xusleep(usec);
+}
+
+/*
+ * time_t to timeval conversion.
+ */
+static struct timeval t2tv(time_t timet)
+{
+ struct timeval rettimeval;
+
+ rettimeval.tv_sec = timet;
+ rettimeval.tv_usec = 0;
+ return rettimeval;
+}
+
+/*
+ * The difference in seconds between two times in "timeval" format.
+ */
+double time_diff(struct timeval subtrahend, struct timeval subtractor)
+{
+ return (subtrahend.tv_sec - subtractor.tv_sec)
+ + (subtrahend.tv_usec - subtractor.tv_usec) / 1E6;
+}
+
+/*
+ * The time, in "timeval" format, which is <increment> seconds after the
+ * time <addend>. Of course, <increment> may be negative.
+ */
+static struct timeval time_inc(struct timeval addend, double increment)
+{
+ struct timeval newtime;
+
+ newtime.tv_sec = addend.tv_sec + (int)increment;
+ newtime.tv_usec = addend.tv_usec + (increment - (int)increment) * 1E6;
+
+ /*
+ * Now adjust it so that the microsecond value is between 0 and 1
+ * million.
+ */
+ if (newtime.tv_usec < 0) {
+ newtime.tv_usec += 1E6;
+ newtime.tv_sec -= 1;
+ } else if (newtime.tv_usec >= 1E6) {
+ newtime.tv_usec -= 1E6;
+ newtime.tv_sec += 1;
+ }
+ return newtime;
+}
+
+static int
+hw_clock_is_utc(const struct hwclock_control *ctl,
+ const struct adjtime adjtime)
+{
+ int ret;
+
+ if (ctl->utc)
+ ret = 1; /* --utc explicitly given on command line */
+ else if (ctl->local_opt)
+ ret = 0; /* --localtime explicitly given */
+ else
+ /* get info from adjtime file - default is UTC */
+ ret = (adjtime.local_utc != LOCAL);
+ if (ctl->verbose)
+ printf(_("Assuming hardware clock is kept in %s time.\n"),
+ ret ? _("UTC") : _("local"));
+ return ret;
+}
+
+/*
+ * Read the adjustment parameters out of the /etc/adjtime file.
+ *
+ * Return them as the adjtime structure <*adjtime_p>. Its defaults are
+ * initialized in main().
+ */
+static int read_adjtime(const struct hwclock_control *ctl,
+ struct adjtime *adjtime_p)
+{
+ FILE *adjfile;
+ char line1[81]; /* String: first line of adjtime file */
+ char line2[81]; /* String: second line of adjtime file */
+ char line3[81]; /* String: third line of adjtime file */
+
+ if (access(ctl->adj_file_name, R_OK) != 0)
+ return EXIT_SUCCESS;
+
+ adjfile = fopen(ctl->adj_file_name, "r"); /* open file for reading */
+ if (adjfile == NULL) {
+ warn(_("cannot open %s"), ctl->adj_file_name);
+ return EXIT_FAILURE;
+ }
+
+ if (!fgets(line1, sizeof(line1), adjfile))
+ line1[0] = '\0'; /* In case fgets fails */
+ if (!fgets(line2, sizeof(line2), adjfile))
+ line2[0] = '\0'; /* In case fgets fails */
+ if (!fgets(line3, sizeof(line3), adjfile))
+ line3[0] = '\0'; /* In case fgets fails */
+
+ fclose(adjfile);
+
+ sscanf(line1, "%lf %ld %lf",
+ &adjtime_p->drift_factor,
+ &adjtime_p->last_adj_time,
+ &adjtime_p->not_adjusted);
+
+ sscanf(line2, "%ld", &adjtime_p->last_calib_time);
+
+ if (!strcmp(line3, "UTC\n")) {
+ adjtime_p->local_utc = UTC;
+ } else if (!strcmp(line3, "LOCAL\n")) {
+ adjtime_p->local_utc = LOCAL;
+ } else {
+ adjtime_p->local_utc = UNKNOWN;
+ if (line3[0]) {
+ warnx(_("Warning: unrecognized third line in adjtime file\n"
+ "(Expected: `UTC' or `LOCAL' or nothing.)"));
+ }
+ }
+
+ if (ctl->verbose) {
+ printf(_
+ ("Last drift adjustment done at %ld seconds after 1969\n"),
+ (long)adjtime_p->last_adj_time);
+ printf(_("Last calibration done at %ld seconds after 1969\n"),
+ (long)adjtime_p->last_calib_time);
+ printf(_("Hardware clock is on %s time\n"),
+ (adjtime_p->local_utc ==
+ LOCAL) ? _("local") : (adjtime_p->local_utc ==
+ UTC) ? _("UTC") : _("unknown"));
+ }
+
+ return EXIT_SUCCESS;
+}
+
+/*
+ * Wait until the falling edge of the Hardware Clock's update flag so that
+ * any time that is read from the clock immediately after we return will be
+ * exact.
+ *
+ * The clock only has 1 second precision, so it gives the exact time only
+ * once per second, right on the falling edge of the update flag.
+ *
+ * We wait (up to one second) either blocked waiting for an rtc device or in
+ * a CPU spin loop. The former is probably not very accurate.
+ *
+ * Return 0 if it worked, nonzero if it didn't.
+ */
+static int synchronize_to_clock_tick(const struct hwclock_control *ctl)
+{
+ int rc;
+
+ if (ctl->verbose)
+ printf(_("Waiting for clock tick...\n"));
+
+ rc = ur->synchronize_to_clock_tick(ctl);
+
+ if (ctl->verbose) {
+ if (rc)
+ printf(_("...synchronization failed\n"));
+ else
+ printf(_("...got clock tick\n"));
+ }
+
+ return rc;
+}
+
+/*
+ * Convert a time in broken down format (hours, minutes, etc.) into standard
+ * unix time (seconds into epoch). Return it as *systime_p.
+ *
+ * The broken down time is argument <tm>. This broken down time is either
+ * in local time zone or UTC, depending on value of logical argument
+ * "universal". True means it is in UTC.
+ *
+ * If the argument contains values that do not constitute a valid time, and
+ * mktime() recognizes this, return *valid_p == false and *systime_p
+ * undefined. However, mktime() sometimes goes ahead and computes a
+ * fictional time "as if" the input values were valid, e.g. if they indicate
+ * the 31st day of April, mktime() may compute the time of May 1. In such a
+ * case, we return the same fictional value mktime() does as *systime_p and
+ * return *valid_p == true.
+ */
+static int
+mktime_tz(const struct hwclock_control *ctl, struct tm tm,
+ time_t *systime_p)
+{
+ int valid;
+
+ if (ctl->universal)
+ *systime_p = timegm(&tm);
+ else
+ *systime_p = mktime(&tm);
+ if (*systime_p == -1) {
+ /*
+ * This apparently (not specified in mktime() documentation)
+ * means the 'tm' structure does not contain valid values
+ * (however, not containing valid values does _not_ imply
+ * mktime() returns -1).
+ */
+ valid = 0;
+ if (ctl->verbose)
+ printf(_("Invalid values in hardware clock: "
+ "%4d/%.2d/%.2d %.2d:%.2d:%.2d\n"),
+ tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
+ tm.tm_hour, tm.tm_min, tm.tm_sec);
+ } else {
+ valid = 1;
+ if (ctl->verbose)
+ printf(_
+ ("Hw clock time : %4d/%.2d/%.2d %.2d:%.2d:%.2d = "
+ "%ld seconds since 1969\n"), tm.tm_year + 1900,
+ tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min,
+ tm.tm_sec, (long)*systime_p);
+ }
+ return valid;
+}
+
+/*
+ * Read the hardware clock and return the current time via <tm> argument.
+ *
+ * Use the method indicated by <method> argument to access the hardware
+ * clock.
+ */
+static int
+read_hardware_clock(const struct hwclock_control *ctl,
+ int *valid_p, time_t *systime_p)
+{
+ struct tm tm;
+ int err;
+
+ err = ur->read_hardware_clock(ctl, &tm);
+ if (err)
+ return err;
+
+ if (ctl->verbose)
+ printf(_
+ ("Time read from Hardware Clock: %4d/%.2d/%.2d %02d:%02d:%02d\n"),
+ tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour,
+ tm.tm_min, tm.tm_sec);
+ *valid_p = mktime_tz(ctl, tm, systime_p);
+
+ return 0;
+}
+
+/*
+ * Set the Hardware Clock to the time <newtime>, in local time zone or UTC,
+ * according to <universal>.
+ */
+static void
+set_hardware_clock(const struct hwclock_control *ctl, const time_t newtime)
+{
+ struct tm new_broken_time;
+ /*
+ * Time to which we will set Hardware Clock, in broken down format,
+ * in the time zone of caller's choice
+ */
+
+ if (ctl->universal)
+ gmtime_r(&newtime, &new_broken_time);
+ else
+ localtime_r(&newtime, &new_broken_time);
+
+ if (ctl->verbose)
+ printf(_("Setting Hardware Clock to %.2d:%.2d:%.2d "
+ "= %ld seconds since 1969\n"),
+ new_broken_time.tm_hour, new_broken_time.tm_min,
+ new_broken_time.tm_sec, (long)newtime);
+
+ if (!ctl->testing)
+ ur->set_hardware_clock(ctl, &new_broken_time);
+}
+
+static double
+get_hardware_delay(const struct hwclock_control *ctl)
+{
+ const char *devpath, *rtcname;
+ char name[128 + 1];
+ struct path_cxt *pc;
+ int rc;
+
+ devpath = ur->get_device_path();
+ if (!devpath)
+ goto unknown;
+
+ rtcname = strrchr(devpath, '/');
+ if (!rtcname || !*(rtcname + 1))
+ goto unknown;
+ rtcname++;
+
+ pc = ul_new_path("/sys/class/rtc/%s", rtcname);
+ if (!pc)
+ goto unknown;
+ rc = ul_path_scanf(pc, "name", "%128[^\n ]", &name);
+ ul_unref_path(pc);
+
+ if (rc != 1 || !*name)
+ goto unknown;
+
+ if (ctl->verbose)
+ printf(_("RTC type: '%s'\n"), name);
+
+ /* MC146818A-compatible (x86) */
+ if (strcmp(name, "rtc_cmos") == 0)
+ return 0.5;
+
+ /* Another HW */
+ return 0;
+unknown:
+ /* Let's be backwardly compatible */
+ return 0.5;
+}
+
+
+/*
+ * Set the Hardware Clock to the time "sethwtime", in local time zone or
+ * UTC, according to "universal".
+ *
+ * Wait for a fraction of a second so that "sethwtime" is the value of the
+ * Hardware Clock as of system time "refsystime", which is in the past. For
+ * example, if "sethwtime" is 14:03:05 and "refsystime" is 12:10:04.5 and
+ * the current system time is 12:10:06.0: Wait .5 seconds (to make exactly 2
+ * seconds since "refsystime") and then set the Hardware Clock to 14:03:07,
+ * thus getting a precise and retroactive setting of the clock. The .5 delay is
+ * default on x86, see --delay and get_hardware_delay().
+ *
+ * (Don't be confused by the fact that the system clock and the Hardware
+ * Clock differ by two hours in the above example. That's just to remind you
+ * that there are two independent time scales here).
+ *
+ * This function ought to be able to accept set times as fractional times.
+ * Idea for future enhancement.
+ */
+static void
+set_hardware_clock_exact(const struct hwclock_control *ctl,
+ const time_t sethwtime,
+ const struct timeval refsystime)
+{
+ /*
+ * The Hardware Clock can only be set to any integer time plus one
+ * half second. The integer time is required because there is no
+ * interface to set or get a fractional second. The additional half
+ * second is because the Hardware Clock updates to the following
+ * second precisely 500 ms (not 1 second!) after you release the
+ * divider reset (after setting the new time) - see description of
+ * DV2, DV1, DV0 in Register A in the MC146818A data sheet (and note
+ * that although that document doesn't say so, real-world code seems
+ * to expect that the SET bit in Register B functions the same way).
+ * That means that, e.g., when you set the clock to 1:02:03, it
+ * effectively really sets it to 1:02:03.5, because it will update to
+ * 1:02:04 only half a second later. Our caller passes the desired
+ * integer Hardware Clock time in sethwtime, and the corresponding
+ * system time (which may have a fractional part, and which may or may
+ * not be the same!) in refsystime. In an ideal situation, we would
+ * then apply sethwtime to the Hardware Clock at refsystime+500ms, so
+ * that when the Hardware Clock ticks forward to sethwtime+1s half a
+ * second later at refsystime+1000ms, everything is in sync. So we
+ * spin, waiting for gettimeofday() to return a time at or after that
+ * time (refsystime+500ms) up to a tolerance value, initially 1ms. If
+ * we miss that time due to being preempted for some other process,
+ * then we increase the margin a little bit (initially 1ms, doubling
+ * each time), add 1 second (or more, if needed to get a time that is
+ * in the future) to both the time for which we are waiting and the
+ * time that we will apply to the Hardware Clock, and start waiting
+ * again.
+ *
+ * For example, the caller requests that we set the Hardware Clock to
+ * 1:02:03, with reference time (current system time) = 6:07:08.250.
+ * We want the Hardware Clock to update to 1:02:04 at 6:07:09.250 on
+ * the system clock, and the first such update will occur 0.500
+ * seconds after we write to the Hardware Clock, so we spin until the
+ * system clock reads 6:07:08.750. If we get there, great, but let's
+ * imagine the system is so heavily loaded that our process is
+ * preempted and by the time we get to run again, the system clock
+ * reads 6:07:11.990. We now want to wait until the next xx:xx:xx.750
+ * time, which is 6:07:12.750 (4.5 seconds after the reference time),
+ * at which point we will set the Hardware Clock to 1:02:07 (4 seconds
+ * after the originally requested time). If we do that successfully,
+ * then at 6:07:13.250 (5 seconds after the reference time), the
+ * Hardware Clock will update to 1:02:08 (5 seconds after the
+ * originally requested time), and all is well thereafter.
+ */
+
+ time_t newhwtime = sethwtime;
+ double target_time_tolerance_secs = 0.001; /* initial value */
+ double tolerance_incr_secs = 0.001; /* initial value */
+ double delay;
+ struct timeval rtc_set_delay_tv;
+
+ struct timeval targetsystime;
+ struct timeval nowsystime;
+ struct timeval prevsystime = refsystime;
+ double deltavstarget;
+
+ if (ctl->rtc_delay != -1.0) /* --delay specified */
+ delay = ctl->rtc_delay;
+ else
+ delay = get_hardware_delay(ctl);
+
+ if (ctl->verbose)
+ printf(_("Using delay: %.6f seconds\n"), delay);
+
+ rtc_set_delay_tv.tv_sec = 0;
+ rtc_set_delay_tv.tv_usec = delay * 1E6;
+
+ timeradd(&refsystime, &rtc_set_delay_tv, &targetsystime);
+
+ while (1) {
+ double ticksize;
+
+ ON_DBG(RANDOM_SLEEP, up_to_1000ms_sleep());
+
+ gettimeofday(&nowsystime, NULL);
+ deltavstarget = time_diff(nowsystime, targetsystime);
+ ticksize = time_diff(nowsystime, prevsystime);
+ prevsystime = nowsystime;
+
+ if (ticksize < 0) {
+ if (ctl->verbose)
+ printf(_("time jumped backward %.6f seconds "
+ "to %ld.%06ld - retargeting\n"),
+ ticksize, nowsystime.tv_sec,
+ nowsystime.tv_usec);
+ /* The retarget is handled at the end of the loop. */
+ } else if (deltavstarget < 0) {
+ /* deltavstarget < 0 if current time < target time */
+ DBG(DELTA_VS_TARGET,
+ ul_debug("%ld.%06ld < %ld.%06ld (%.6f)",
+ nowsystime.tv_sec, nowsystime.tv_usec,
+ targetsystime.tv_sec,
+ targetsystime.tv_usec, deltavstarget));
+ continue; /* not there yet - keep spinning */
+ } else if (deltavstarget <= target_time_tolerance_secs) {
+ /* Close enough to the target time; done waiting. */
+ break;
+ } else /* (deltavstarget > target_time_tolerance_secs) */ {
+ /*
+ * We missed our window. Increase the tolerance and
+ * aim for the next opportunity.
+ */
+ if (ctl->verbose)
+ printf(_("missed it - %ld.%06ld is too far "
+ "past %ld.%06ld (%.6f > %.6f)\n"),
+ nowsystime.tv_sec,
+ nowsystime.tv_usec,
+ targetsystime.tv_sec,
+ targetsystime.tv_usec,
+ deltavstarget,
+ target_time_tolerance_secs);
+ target_time_tolerance_secs += tolerance_incr_secs;
+ tolerance_incr_secs *= 2;
+ }
+
+ /*
+ * Aim for the same offset (tv_usec) within the second in
+ * either the current second (if that offset hasn't arrived
+ * yet), or the next second.
+ */
+ if (nowsystime.tv_usec < targetsystime.tv_usec)
+ targetsystime.tv_sec = nowsystime.tv_sec;
+ else
+ targetsystime.tv_sec = nowsystime.tv_sec + 1;
+ }
+
+ newhwtime = sethwtime
+ + (int)(time_diff(nowsystime, refsystime)
+ - delay /* don't count this */
+ + 0.5 /* for rounding */);
+ if (ctl->verbose)
+ printf(_("%ld.%06ld is close enough to %ld.%06ld (%.6f < %.6f)\n"
+ "Set RTC to %ld (%ld + %d; refsystime = %ld.%06ld)\n"),
+ nowsystime.tv_sec, nowsystime.tv_usec,
+ targetsystime.tv_sec, targetsystime.tv_usec,
+ deltavstarget, target_time_tolerance_secs,
+ newhwtime, sethwtime,
+ (int)(newhwtime - sethwtime),
+ refsystime.tv_sec, refsystime.tv_usec);
+
+ set_hardware_clock(ctl, newhwtime);
+}
+
+static int
+display_time(struct timeval hwctime)
+{
+ char buf[ISO_BUFSIZ];
+
+ if (strtimeval_iso(&hwctime, ISO_TIMESTAMP_DOT, buf, sizeof(buf)))
+ return EXIT_FAILURE;
+
+ printf("%s\n", buf);
+ return EXIT_SUCCESS;
+}
+
+/*
+ * Adjusts System time, sets the kernel's timezone and RTC timescale.
+ *
+ * The kernel warp_clock function adjusts the System time according to the
+ * tz.tz_minuteswest argument and sets PCIL (see below). At boot settimeofday(2)
+ * has one-shot access to this function as shown in the table below.
+ *
+ * +-------------------------------------------------------------------+
+ * | settimeofday(tv, tz) |
+ * |-------------------------------------------------------------------|
+ * | Arguments | System Time | PCIL | | warp_clock |
+ * | tv | tz | set | warped | set | firsttime | locked |
+ * |---------|---------|---------------|------|-----------|------------|
+ * | pointer | NULL | yes | no | no | 1 | no |
+ * | pointer | pointer | yes | no | no | 0 | yes |
+ * | NULL | ptr2utc | no | no | no | 0 | yes |
+ * | NULL | pointer | no | yes | yes | 0 | yes |
+ * +-------------------------------------------------------------------+
+ * ptr2utc: tz.tz_minuteswest is zero (UTC).
+ * PCIL: persistent_clock_is_local, sets the "11 minute mode" timescale.
+ * firsttime: locks the warp_clock function (initialized to 1 at boot).
+ *
+ * +---------------------------------------------------------------------------+
+ * | op | RTC scale | settimeofday calls |
+ * |---------|-----------|-----------------------------------------------------|
+ * | systz | Local | 1) warps system time*, sets PCIL* and kernel tz |
+ * | systz | UTC | 1st) locks warp_clock* 2nd) sets kernel tz |
+ * | hctosys | Local | 1st) sets PCIL* 2nd) sets system time and kernel tz |
+ * | hctosys | UTC | 1) sets system time and kernel tz |
+ * +---------------------------------------------------------------------------+
+ * * only on first call after boot
+ */
+static int
+set_system_clock(const struct hwclock_control *ctl,
+ const struct timeval newtime)
+{
+ struct tm broken;
+ int minuteswest;
+ int rc = 0;
+ const struct timezone tz_utc = { 0 };
+
+ localtime_r(&newtime.tv_sec, &broken);
+ minuteswest = -get_gmtoff(&broken) / 60;
+
+ if (ctl->verbose) {
+ if (ctl->hctosys && !ctl->universal)
+ printf(_("Calling settimeofday(NULL, %d) to set "
+ "persistent_clock_is_local.\n"), minuteswest);
+ if (ctl->systz && ctl->universal)
+ puts(_("Calling settimeofday(NULL, 0) "
+ "to lock the warp function."));
+ if (ctl->hctosys)
+ printf(_("Calling settimeofday(%ld.%06ld, %d)\n"),
+ newtime.tv_sec, newtime.tv_usec, minuteswest);
+ else {
+ printf(_("Calling settimeofday(NULL, %d) "), minuteswest);
+ if (ctl->universal)
+ puts(_("to set the kernel timezone."));
+ else
+ puts(_("to warp System time."));
+ }
+ }
+
+ if (!ctl->testing) {
+ const struct timezone tz = { minuteswest };
+
+ if (ctl->hctosys && !ctl->universal) /* set PCIL */
+ rc = settimeofday(NULL, &tz);
+ if (ctl->systz && ctl->universal) /* lock warp_clock */
+ rc = settimeofday(NULL, &tz_utc);
+ if (!rc && ctl->hctosys)
+ rc = settimeofday(&newtime, &tz);
+ else if (!rc)
+ rc = settimeofday(NULL, &tz);
+
+ if (rc) {
+ warn(_("settimeofday() failed"));
+ return EXIT_FAILURE;
+ }
+ }
+ return EXIT_SUCCESS;
+}
+
+/*
+ * Refresh the last calibrated and last adjusted timestamps in <*adjtime_p>
+ * to facilitate future drift calculations based on this set point.
+ *
+ * With the --update-drift option:
+ * Update the drift factor in <*adjtime_p> based on the fact that the
+ * Hardware Clock was just calibrated to <nowtime> and before that was
+ * set to the <hclocktime> time scale.
+ */
+static void
+adjust_drift_factor(const struct hwclock_control *ctl,
+ struct adjtime *adjtime_p,
+ const struct timeval nowtime,
+ const struct timeval hclocktime)
+{
+ if (!ctl->update) {
+ if (ctl->verbose)
+ printf(_("Not adjusting drift factor because the "
+ "--update-drift option was not used.\n"));
+ } else if (adjtime_p->last_calib_time == 0) {
+ if (ctl->verbose)
+ printf(_("Not adjusting drift factor because last "
+ "calibration time is zero,\n"
+ "so history is bad and calibration startover "
+ "is necessary.\n"));
+ } else if ((hclocktime.tv_sec - adjtime_p->last_calib_time) < 4 * 60 * 60) {
+ if (ctl->verbose)
+ printf(_("Not adjusting drift factor because it has "
+ "been less than four hours since the last "
+ "calibration.\n"));
+ } else {
+ /*
+ * At adjustment time we drift correct the hardware clock
+ * according to the contents of the adjtime file and refresh
+ * its last adjusted timestamp.
+ *
+ * At calibration time we set the Hardware Clock and refresh
+ * both timestamps in <*adjtime_p>.
+ *
+ * Here, with the --update-drift option, we also update the
+ * drift factor in <*adjtime_p>.
+ *
+ * Let us do computation in doubles. (Floats almost suffice,
+ * but 195 days + 1 second equals 195 days in floats.)
+ */
+ const double sec_per_day = 24.0 * 60.0 * 60.0;
+ double factor_adjust;
+ double drift_factor;
+ struct timeval last_calib;
+
+ last_calib = t2tv(adjtime_p->last_calib_time);
+ /*
+ * Correction to apply to the current drift factor.
+ *
+ * Simplified: uncorrected_drift / days_since_calibration.
+ *
+ * hclocktime is fully corrected with the current drift factor.
+ * Its difference from nowtime is the missed drift correction.
+ */
+ factor_adjust = time_diff(nowtime, hclocktime) /
+ (time_diff(nowtime, last_calib) / sec_per_day);
+
+ drift_factor = adjtime_p->drift_factor + factor_adjust;
+ if (fabs(drift_factor) > MAX_DRIFT) {
+ if (ctl->verbose)
+ printf(_("Clock drift factor was calculated as "
+ "%f seconds/day.\n"
+ "It is far too much. Resetting to zero.\n"),
+ drift_factor);
+ drift_factor = 0;
+ } else {
+ if (ctl->verbose)
+ printf(_("Clock drifted %f seconds in the past "
+ "%f seconds\nin spite of a drift factor of "
+ "%f seconds/day.\n"
+ "Adjusting drift factor by %f seconds/day\n"),
+ time_diff(nowtime, hclocktime),
+ time_diff(nowtime, last_calib),
+ adjtime_p->drift_factor, factor_adjust);
+ }
+
+ adjtime_p->drift_factor = drift_factor;
+ }
+ adjtime_p->last_calib_time = nowtime.tv_sec;
+
+ adjtime_p->last_adj_time = nowtime.tv_sec;
+
+ adjtime_p->not_adjusted = 0;
+
+ adjtime_p->dirty = 1;
+}
+
+/*
+ * Calculate the drift correction currently needed for the
+ * Hardware Clock based on the last time it was adjusted,
+ * and the current drift factor, as stored in the adjtime file.
+ *
+ * The total drift adjustment needed is stored at tdrift_p.
+ *
+ */
+static void
+calculate_adjustment(const struct hwclock_control *ctl,
+ const double factor,
+ const time_t last_time,
+ const double not_adjusted,
+ const time_t systime, struct timeval *tdrift_p)
+{
+ double exact_adjustment;
+
+ exact_adjustment =
+ ((double)(systime - last_time)) * factor / (24 * 60 * 60)
+ + not_adjusted;
+ tdrift_p->tv_sec = (time_t) floor(exact_adjustment);
+ tdrift_p->tv_usec = (exact_adjustment -
+ (double)tdrift_p->tv_sec) * 1E6;
+ if (ctl->verbose) {
+ printf(P_("Time since last adjustment is %ld second\n",
+ "Time since last adjustment is %ld seconds\n",
+ (systime - last_time)),
+ (systime - last_time));
+ printf(_("Calculated Hardware Clock drift is %ld.%06ld seconds\n"),
+ tdrift_p->tv_sec, tdrift_p->tv_usec);
+ }
+}
+
+/*
+ * Write the contents of the <adjtime> structure to its disk file.
+ *
+ * But if the contents are clean (unchanged since read from disk), don't
+ * bother.
+ */
+static int save_adjtime(const struct hwclock_control *ctl,
+ const struct adjtime *adjtime)
+{
+ char *content; /* Stuff to write to disk file */
+ FILE *fp;
+
+ xasprintf(&content, "%f %ld %f\n%ld\n%s\n",
+ adjtime->drift_factor,
+ adjtime->last_adj_time,
+ adjtime->not_adjusted,
+ adjtime->last_calib_time,
+ (adjtime->local_utc == LOCAL) ? "LOCAL" : "UTC");
+
+ if (ctl->verbose){
+ printf(_("New %s data:\n%s"),
+ ctl->adj_file_name, content);
+ }
+
+ if (!ctl->testing) {
+ fp = fopen(ctl->adj_file_name, "w");
+ if (fp == NULL) {
+ warn(_("cannot open %s"), ctl->adj_file_name);
+ return EXIT_FAILURE;
+ } else if (fputs(content, fp) < 0 || close_stream(fp) != 0) {
+ warn(_("cannot update %s"), ctl->adj_file_name);
+ return EXIT_FAILURE;
+ }
+ }
+ return EXIT_SUCCESS;
+}
+
+/*
+ * Do the adjustment requested, by 1) setting the Hardware Clock (if
+ * necessary), and 2) updating the last-adjusted time in the adjtime
+ * structure.
+ *
+ * Do not update anything if the Hardware Clock does not currently present a
+ * valid time.
+ *
+ * <hclocktime> is the drift corrected time read from the Hardware Clock.
+ *
+ * <read_time> was the system time when the <hclocktime> was read, which due
+ * to computational delay could be a short time ago. It is used to define a
+ * trigger point for setting the Hardware Clock. The fractional part of the
+ * Hardware clock set time is subtracted from read_time to 'refer back', or
+ * delay, the trigger point. Fractional parts must be accounted for in this
+ * way, because the Hardware Clock can only be set to a whole second.
+ *
+ * <universal>: the Hardware Clock is kept in UTC.
+ *
+ * <testing>: We are running in test mode (no updating of clock).
+ *
+ */
+static void
+do_adjustment(const struct hwclock_control *ctl, struct adjtime *adjtime_p,
+ const struct timeval hclocktime,
+ const struct timeval read_time)
+{
+ if (adjtime_p->last_adj_time == 0) {
+ if (ctl->verbose)
+ printf(_("Not setting clock because last adjustment time is zero, "
+ "so history is bad.\n"));
+ } else if (fabs(adjtime_p->drift_factor) > MAX_DRIFT) {
+ if (ctl->verbose)
+ printf(_("Not setting clock because drift factor %f is far too high.\n"),
+ adjtime_p->drift_factor);
+ } else {
+ set_hardware_clock_exact(ctl, hclocktime.tv_sec,
+ time_inc(read_time,
+ -(hclocktime.tv_usec / 1E6)));
+ adjtime_p->last_adj_time = hclocktime.tv_sec;
+ adjtime_p->not_adjusted = 0;
+ adjtime_p->dirty = 1;
+ }
+}
+
+static void determine_clock_access_method(const struct hwclock_control *ctl)
+{
+ ur = NULL;
+
+ if (ctl->directisa)
+ ur = probe_for_cmos_clock();
+#ifdef __linux__
+ if (!ur)
+ ur = probe_for_rtc_clock(ctl);
+#endif
+ if (ur) {
+ if (ctl->verbose)
+ puts(ur->interface_name);
+
+ } else {
+ if (ctl->verbose)
+ printf(_("No usable clock interface found.\n"));
+ warnx(_("Cannot access the Hardware Clock via "
+ "any known method."));
+ if (!ctl->verbose)
+ warnx(_("Use the --verbose option to see the "
+ "details of our search for an access "
+ "method."));
+ hwclock_exit(ctl, EXIT_FAILURE);
+ }
+}
+
+/* Do all the normal work of hwclock - read, set clock, etc. */
+static int
+manipulate_clock(const struct hwclock_control *ctl, const time_t set_time,
+ const struct timeval startup_time, struct adjtime *adjtime)
+{
+ /* The time at which we read the Hardware Clock */
+ struct timeval read_time;
+ /*
+ * The Hardware Clock gives us a valid time, or at
+ * least something close enough to fool mktime().
+ */
+ int hclock_valid = 0;
+ /*
+ * Tick synchronized time read from the Hardware Clock and
+ * then drift corrected for all operations except --show.
+ */
+ struct timeval hclocktime = { 0 };
+ /*
+ * hclocktime correlated to startup_time. That is, what drift
+ * corrected Hardware Clock time would have been at start up.
+ */
+ struct timeval startup_hclocktime = { 0 };
+ /* Total Hardware Clock drift correction needed. */
+ struct timeval tdrift;
+
+ if ((ctl->set || ctl->systohc || ctl->adjust) &&
+ (adjtime->local_utc == UTC) != ctl->universal) {
+ adjtime->local_utc = ctl->universal ? UTC : LOCAL;
+ adjtime->dirty = 1;
+ }
+ /*
+ * Negate the drift correction, because we want to 'predict' a
+ * Hardware Clock time that includes drift.
+ */
+ if (ctl->predict) {
+ hclocktime = t2tv(set_time);
+ calculate_adjustment(ctl, adjtime->drift_factor,
+ adjtime->last_adj_time,
+ adjtime->not_adjusted,
+ hclocktime.tv_sec, &tdrift);
+ hclocktime = time_inc(hclocktime, (double)
+ -(tdrift.tv_sec + tdrift.tv_usec / 1E6));
+ if (ctl->verbose) {
+ printf(_ ("Target date: %ld\n"), set_time);
+ printf(_ ("Predicted RTC: %ld\n"), hclocktime.tv_sec);
+ }
+ return display_time(hclocktime);
+ }
+
+ if (ctl->systz)
+ return set_system_clock(ctl, startup_time);
+
+ if (ur->get_permissions())
+ return EXIT_FAILURE;
+
+ /*
+ * Read and drift correct RTC time; except for RTC set functions
+ * without the --update-drift option because: 1) it's not needed;
+ * 2) it enables setting a corrupted RTC without reading it first;
+ * 3) it significantly reduces system shutdown time.
+ */
+ if ( ! ((ctl->set || ctl->systohc) && !ctl->update)) {
+ /*
+ * Timing critical - do not change the order of, or put
+ * anything between the follow three statements.
+ * Synchronization failure MUST exit, because all drift
+ * operations are invalid without it.
+ */
+ if (synchronize_to_clock_tick(ctl))
+ return EXIT_FAILURE;
+ read_hardware_clock(ctl, &hclock_valid, &hclocktime.tv_sec);
+ gettimeofday(&read_time, NULL);
+
+ if (!hclock_valid) {
+ warnx(_("RTC read returned an invalid value."));
+ return EXIT_FAILURE;
+ }
+ /*
+ * Calculate and apply drift correction to the Hardware Clock
+ * time for everything except --show
+ */
+ calculate_adjustment(ctl, adjtime->drift_factor,
+ adjtime->last_adj_time,
+ adjtime->not_adjusted,
+ hclocktime.tv_sec, &tdrift);
+ if (!ctl->show)
+ hclocktime = time_inc(tdrift, hclocktime.tv_sec);
+
+ startup_hclocktime =
+ time_inc(hclocktime, time_diff(startup_time, read_time));
+ }
+ if (ctl->show || ctl->get) {
+ return display_time(startup_hclocktime);
+ } else if (ctl->set) {
+ set_hardware_clock_exact(ctl, set_time, startup_time);
+ if (!ctl->noadjfile)
+ adjust_drift_factor(ctl, adjtime, t2tv(set_time),
+ startup_hclocktime);
+ } else if (ctl->adjust) {
+ if (tdrift.tv_sec > 0 || tdrift.tv_sec < -1)
+ do_adjustment(ctl, adjtime, hclocktime, read_time);
+ else
+ printf(_("Needed adjustment is less than one second, "
+ "so not setting clock.\n"));
+ } else if (ctl->systohc) {
+ struct timeval nowtime, reftime;
+ /*
+ * We can only set_hardware_clock_exact to a
+ * whole seconds time, so we set it with
+ * reference to the most recent whole
+ * seconds time.
+ */
+ gettimeofday(&nowtime, NULL);
+ reftime.tv_sec = nowtime.tv_sec;
+ reftime.tv_usec = 0;
+ set_hardware_clock_exact(ctl, (time_t) reftime.tv_sec, reftime);
+ if (!ctl->noadjfile)
+ adjust_drift_factor(ctl, adjtime, nowtime,
+ hclocktime);
+ } else if (ctl->hctosys) {
+ return set_system_clock(ctl, hclocktime);
+ }
+ if (!ctl->noadjfile && adjtime->dirty)
+ return save_adjtime(ctl, adjtime);
+ return EXIT_SUCCESS;
+}
+
+/**
+ * Get or set the kernel RTC driver's epoch on Alpha machines.
+ * ISA machines are hard coded for 1900.
+ */
+#if defined(__linux__) && defined(__alpha__)
+static void
+manipulate_epoch(const struct hwclock_control *ctl)
+{
+ if (ctl->getepoch) {
+ unsigned long epoch;
+
+ if (get_epoch_rtc(ctl, &epoch))
+ warnx(_("unable to read the RTC epoch."));
+ else
+ printf(_("The RTC epoch is set to %lu.\n"), epoch);
+ } else if (ctl->setepoch) {
+ if (!ctl->epoch_option)
+ warnx(_("--epoch is required for --setepoch."));
+ else if (!ctl->testing)
+ if (set_epoch_rtc(ctl))
+ warnx(_("unable to set the RTC epoch."));
+ }
+}
+#endif /* __linux__ __alpha__ */
+
+static void out_version(void)
+{
+ printf(UTIL_LINUX_VERSION);
+}
+
+static void __attribute__((__noreturn__))
+usage(void)
+{
+ fputs(USAGE_HEADER, stdout);
+ printf(_(" %s [function] [option...]\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, stdout);
+ puts(_("Time clocks utility."));
+
+ fputs(USAGE_FUNCTIONS, stdout);
+ puts(_(" -r, --show display the RTC time"));
+ puts(_(" --get display drift corrected RTC time"));
+ puts(_(" --set set the RTC according to --date"));
+ puts(_(" -s, --hctosys set the system time from the RTC"));
+ puts(_(" -w, --systohc set the RTC from the system time"));
+ puts(_(" --systz send timescale configurations to the kernel"));
+ puts(_(" -a, --adjust adjust the RTC to account for systematic drift"));
+#if defined(__linux__) && defined(__alpha__)
+ puts(_(" --getepoch display the RTC epoch"));
+ puts(_(" --setepoch set the RTC epoch according to --epoch"));
+#endif
+ puts(_(" --predict predict the drifted RTC time according to --date"));
+ fputs(USAGE_OPTIONS, stdout);
+ puts(_(" -u, --utc the RTC timescale is UTC"));
+ puts(_(" -l, --localtime the RTC timescale is Local"));
+#ifdef __linux__
+ printf(_(
+ " -f, --rtc <file> use an alternate file to %1$s\n"), _PATH_RTC_DEV);
+#endif
+ printf(_(
+ " --directisa use the ISA bus instead of %1$s access\n"), _PATH_RTC_DEV);
+ puts(_(" --date <time> date/time input for --set and --predict"));
+ puts(_(" --delay <sec> delay used when set new RTC time"));
+#if defined(__linux__) && defined(__alpha__)
+ puts(_(" --epoch <year> epoch input for --setepoch"));
+#endif
+ puts(_(" --update-drift update the RTC drift factor"));
+ printf(_(
+ " --noadjfile do not use %1$s\n"), _PATH_ADJTIME);
+ printf(_(
+ " --adjfile <file> use an alternate file to %1$s\n"), _PATH_ADJTIME);
+ puts(_(" --test dry run; implies --verbose"));
+ puts(_(" -v, --verbose display more details"));
+ fputs(USAGE_SEPARATOR, stdout);
+ printf(USAGE_HELP_OPTIONS(22));
+ printf(USAGE_MAN_TAIL("hwclock(8)"));
+ exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+ struct hwclock_control ctl = {
+ .show = 1, /* default op is show */
+ .rtc_delay = -1.0 /* unspecified */
+ };
+ struct timeval startup_time;
+ struct adjtime adjtime = { 0 };
+ struct timespec when = { 0 };
+ /*
+ * The time we started up, in seconds into the epoch, including
+ * fractions.
+ */
+ time_t set_time = 0; /* Time to which user said to set Hardware Clock */
+ int rc, c;
+
+ /* Long only options. */
+ enum {
+ OPT_ADJFILE = CHAR_MAX + 1,
+ OPT_DATE,
+ OPT_DELAY,
+ OPT_DIRECTISA,
+ OPT_EPOCH,
+ OPT_GET,
+ OPT_GETEPOCH,
+ OPT_NOADJFILE,
+ OPT_PREDICT,
+ OPT_SET,
+ OPT_SETEPOCH,
+ OPT_SYSTZ,
+ OPT_TEST,
+ OPT_UPDATE
+ };
+
+ static const struct option longopts[] = {
+ { "adjust", no_argument, NULL, 'a' },
+ { "help", no_argument, NULL, 'h' },
+ { "localtime", no_argument, NULL, 'l' },
+ { "show", no_argument, NULL, 'r' },
+ { "hctosys", no_argument, NULL, 's' },
+ { "utc", no_argument, NULL, 'u' },
+ { "version", no_argument, NULL, 'V' },
+ { "systohc", no_argument, NULL, 'w' },
+ { "debug", no_argument, NULL, 'D' },
+ { "ul-debug", required_argument, NULL, 'd' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "set", no_argument, NULL, OPT_SET },
+#if defined(__linux__) && defined(__alpha__)
+ { "getepoch", no_argument, NULL, OPT_GETEPOCH },
+ { "setepoch", no_argument, NULL, OPT_SETEPOCH },
+ { "epoch", required_argument, NULL, OPT_EPOCH },
+#endif
+ { "noadjfile", no_argument, NULL, OPT_NOADJFILE },
+ { "directisa", no_argument, NULL, OPT_DIRECTISA },
+ { "test", no_argument, NULL, OPT_TEST },
+ { "date", required_argument, NULL, OPT_DATE },
+ { "delay", required_argument, NULL, OPT_DELAY },
+#ifdef __linux__
+ { "rtc", required_argument, NULL, 'f' },
+#endif
+ { "adjfile", required_argument, NULL, OPT_ADJFILE },
+ { "systz", no_argument, NULL, OPT_SYSTZ },
+ { "predict", no_argument, NULL, OPT_PREDICT },
+ { "get", no_argument, NULL, OPT_GET },
+ { "update-drift", no_argument, NULL, OPT_UPDATE },
+ { NULL, 0, NULL, 0 }
+ };
+
+ static const ul_excl_t excl[] = { /* rows and cols in ASCII order */
+ { 'a','r','s','w',
+ OPT_GET, OPT_GETEPOCH, OPT_PREDICT,
+ OPT_SET, OPT_SETEPOCH, OPT_SYSTZ },
+ { 'l', 'u' },
+ { OPT_ADJFILE, OPT_NOADJFILE },
+ { OPT_NOADJFILE, OPT_UPDATE },
+ { 0 }
+ };
+ int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+ /* Remember what time we were invoked */
+ gettimeofday(&startup_time, NULL);
+
+#ifdef HAVE_LIBAUDIT
+ hwaudit_fd = audit_open();
+ if (hwaudit_fd < 0 && !(errno == EINVAL || errno == EPROTONOSUPPORT ||
+ errno == EAFNOSUPPORT)) {
+ /*
+ * You get these error codes only when the kernel doesn't
+ * have audit compiled in.
+ */
+ warnx(_("Unable to connect to audit system"));
+ return EXIT_FAILURE;
+ }
+#endif
+ setlocale(LC_ALL, "");
+#ifdef LC_NUMERIC
+ /*
+ * We need LC_CTYPE and LC_TIME and LC_MESSAGES, but must avoid
+ * LC_NUMERIC since it gives problems when we write to /etc/adjtime.
+ * - gqueri@mail.dotcom.fr
+ */
+ setlocale(LC_NUMERIC, "C");
+#endif
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ while ((c = getopt_long(argc, argv,
+ "hvVDd:alrsuwf:", longopts, NULL)) != -1) {
+
+ err_exclusive_options(c, longopts, excl, excl_st);
+
+ switch (c) {
+ case 'D':
+ warnx(_("use --verbose, --debug has been deprecated."));
+ break;
+ case 'v':
+ ctl.verbose = 1;
+ break;
+ case 'd':
+ hwclock_init_debug(optarg);
+ break;
+ case 'a':
+ ctl.adjust = 1;
+ ctl.show = 0;
+ ctl.hwaudit_on = 1;
+ break;
+ case 'l':
+ ctl.local_opt = 1; /* --localtime */
+ break;
+ case 'r':
+ ctl.show = 1;
+ break;
+ case 's':
+ ctl.hctosys = 1;
+ ctl.show = 0;
+ ctl.hwaudit_on = 1;
+ break;
+ case 'u':
+ ctl.utc = 1;
+ break;
+ case 'w':
+ ctl.systohc = 1;
+ ctl.show = 0;
+ ctl.hwaudit_on = 1;
+ break;
+ case OPT_SET:
+ ctl.set = 1;
+ ctl.show = 0;
+ ctl.hwaudit_on = 1;
+ break;
+#if defined(__linux__) && defined(__alpha__)
+ case OPT_GETEPOCH:
+ ctl.getepoch = 1;
+ ctl.show = 0;
+ break;
+ case OPT_SETEPOCH:
+ ctl.setepoch = 1;
+ ctl.show = 0;
+ ctl.hwaudit_on = 1;
+ break;
+ case OPT_EPOCH:
+ ctl.epoch_option = optarg; /* --epoch */
+ break;
+#endif
+ case OPT_NOADJFILE:
+ ctl.noadjfile = 1;
+ break;
+ case OPT_DIRECTISA:
+ ctl.directisa = 1;
+ break;
+ case OPT_TEST:
+ ctl.testing = 1; /* --test */
+ ctl.verbose = 1;
+ break;
+ case OPT_DATE:
+ ctl.date_opt = optarg; /* --date */
+ break;
+ case OPT_DELAY:
+ ctl.rtc_delay = strtod_or_err(optarg, "invalid --delay argument");
+ break;
+ case OPT_ADJFILE:
+ ctl.adj_file_name = optarg; /* --adjfile */
+ break;
+ case OPT_SYSTZ:
+ ctl.systz = 1; /* --systz */
+ ctl.show = 0;
+ ctl.hwaudit_on = 1;
+ break;
+ case OPT_PREDICT:
+ ctl.predict = 1; /* --predict */
+ ctl.show = 0;
+ break;
+ case OPT_GET:
+ ctl.get = 1; /* --get */
+ ctl.show = 0;
+ break;
+ case OPT_UPDATE:
+ ctl.update = 1; /* --update-drift */
+ break;
+#ifdef __linux__
+ case 'f':
+ ctl.rtc_dev_name = optarg; /* --rtc */
+ break;
+#endif
+ case 'V': /* --version */
+ out_version();
+ return 0;
+ case 'h': /* --help */
+ usage();
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if (argc -= optind) {
+ warnx(_("%d too many arguments given"), argc);
+ errtryhelp(EXIT_FAILURE);
+ }
+
+ if (!ctl.adj_file_name)
+ ctl.adj_file_name = _PATH_ADJTIME;
+
+ if (ctl.update && !ctl.set && !ctl.systohc) {
+ warnx(_("--update-drift requires --set or --systohc"));
+ exit(EXIT_FAILURE);
+ }
+
+ if (ctl.noadjfile && !ctl.utc && !ctl.local_opt) {
+ warnx(_("With --noadjfile, you must specify "
+ "either --utc or --localtime"));
+ exit(EXIT_FAILURE);
+ }
+
+ if (ctl.set || ctl.predict) {
+ if (!ctl.date_opt) {
+ warnx(_("--date is required for --set or --predict"));
+ exit(EXIT_FAILURE);
+ }
+ if (parse_date(&when, ctl.date_opt, NULL))
+ set_time = when.tv_sec;
+ else {
+ warnx(_("invalid date '%s'"), ctl.date_opt);
+ exit(EXIT_FAILURE);
+ }
+ }
+
+#if defined(__linux__) && defined(__alpha__)
+ if (ctl.getepoch || ctl.setepoch) {
+ manipulate_epoch(&ctl);
+ hwclock_exit(&ctl, EXIT_SUCCESS);
+ }
+#endif
+
+ if (ctl.verbose) {
+ out_version();
+ printf(_("System Time: %ld.%06ld\n"),
+ startup_time.tv_sec, startup_time.tv_usec);
+ }
+
+ if (!ctl.systz && !ctl.predict)
+ determine_clock_access_method(&ctl);
+
+ if (!ctl.noadjfile && !(ctl.systz && (ctl.utc || ctl.local_opt))) {
+ if ((rc = read_adjtime(&ctl, &adjtime)) != 0)
+ hwclock_exit(&ctl, rc);
+ } else
+ /* Avoid writing adjtime file if we don't have to. */
+ adjtime.dirty = 0;
+ ctl.universal = hw_clock_is_utc(&ctl, adjtime);
+ rc = manipulate_clock(&ctl, set_time, startup_time, &adjtime);
+ if (ctl.testing)
+ puts(_("Test mode: nothing was changed."));
+ hwclock_exit(&ctl, rc);
+ return rc; /* Not reached */
+}
+
+void
+hwclock_exit(const struct hwclock_control *ctl
+#ifndef HAVE_LIBAUDIT
+ __attribute__((__unused__))
+#endif
+ , int status)
+{
+#ifdef HAVE_LIBAUDIT
+ if (ctl->hwaudit_on && !ctl->testing) {
+ audit_log_user_message(hwaudit_fd, AUDIT_USYS_CONFIG,
+ "op=change-system-time", NULL, NULL, NULL,
+ status);
+ }
+ close(hwaudit_fd);
+#endif
+ exit(status);
+}
+
+/*
+ * History of this program:
+ *
+ * 98.08.12 BJH Version 2.4
+ *
+ * Don't use century byte from Hardware Clock. Add comments telling why.
+ *
+ * 98.06.20 BJH Version 2.3.
+ *
+ * Make --hctosys set the kernel timezone from TZ environment variable
+ * and/or /usr/lib/zoneinfo. From Klaus Ripke (klaus@ripke.com).
+ *
+ * 98.03.05 BJH. Version 2.2.
+ *
+ * Add --getepoch and --setepoch.
+ *
+ * Fix some word length things so it works on Alpha.
+ *
+ * Make it work when /dev/rtc doesn't have the interrupt functions. In this
+ * case, busywait for the top of a second instead of blocking and waiting
+ * for the update complete interrupt.
+ *
+ * Fix a bunch of bugs too numerous to mention.
+ *
+ * 97.06.01: BJH. Version 2.1. Read and write the century byte (Byte 50) of
+ * the ISA Hardware Clock when using direct ISA I/O. Problem discovered by
+ * job (jei@iclnl.icl.nl).
+ *
+ * Use the rtc clock access method in preference to the KDGHWCLK method.
+ * Problem discovered by Andreas Schwab <schwab@LS5.informatik.uni-dortmund.de>.
+ *
+ * November 1996: Version 2.0.1. Modifications by Nicolai Langfeldt
+ * (janl@math.uio.no) to make it compile on linux 1.2 machines as well as
+ * more recent versions of the kernel. Introduced the NO_CLOCK access method
+ * and wrote feature test code to detect absence of rtc headers.
+ *
+ ***************************************************************************
+ * Maintenance notes
+ *
+ * To compile this, you must use GNU compiler optimization (-O option) in
+ * order to make the "extern inline" functions from asm/io.h (inb(), etc.)
+ * compile. If you don't optimize, which means the compiler will generate no
+ * inline functions, the references to these functions in this program will
+ * be compiled as external references. Since you probably won't be linking
+ * with any functions by these names, you will have unresolved external
+ * references when you link.
+ *
+ * Here's some info on how we must deal with the time that elapses while
+ * this program runs: There are two major delays as we run:
+ *
+ * 1) Waiting up to 1 second for a transition of the Hardware Clock so
+ * we are synchronized to the Hardware Clock.
+ * 2) Running the "date" program to interpret the value of our --date
+ * option.
+ *
+ * Reading the /etc/adjtime file is the next biggest source of delay and
+ * uncertainty.
+ *
+ * The user wants to know what time it was at the moment he invoked us, not
+ * some arbitrary time later. And in setting the clock, he is giving us the
+ * time at the moment we are invoked, so if we set the clock some time
+ * later, we have to add some time to that.
+ *
+ * So we check the system time as soon as we start up, then run "date" and
+ * do file I/O if necessary, then wait to synchronize with a Hardware Clock
+ * edge, then check the system time again to see how much time we spent. We
+ * immediately read the clock then and (if appropriate) report that time,
+ * and additionally, the delay we measured.
+ *
+ * If we're setting the clock to a time given by the user, we wait some more
+ * so that the total delay is an integral number of seconds, then set the
+ * Hardware Clock to the time the user requested plus that integral number
+ * of seconds. N.B. The Hardware Clock can only be set in integral seconds.
+ *
+ * If we're setting the clock to the system clock value, we wait for the
+ * system clock to reach the top of a second, and then set the Hardware
+ * Clock to the system clock's value.
+ *
+ * Here's an interesting point about setting the Hardware Clock: On my
+ * machine, when you set it, it sets to that precise time. But one can
+ * imagine another clock whose update oscillator marches on a steady one
+ * second period, so updating the clock between any two oscillator ticks is
+ * the same as updating it right at the earlier tick. To avoid any
+ * complications that might cause, we set the clock as soon as possible
+ * after an oscillator tick.
+ *
+ * About synchronizing to the Hardware Clock when reading the time: The
+ * precision of the Hardware Clock counters themselves is one second. You
+ * can't read the counters and find out that is 12:01:02.5. But if you
+ * consider the location in time of the counter's ticks as part of its
+ * value, then its precision is as infinite as time is continuous! What I'm
+ * saying is this: To find out the _exact_ time in the hardware clock, we
+ * wait until the next clock tick (the next time the second counter changes)
+ * and measure how long we had to wait. We then read the value of the clock
+ * counters and subtract the wait time and we know precisely what time it
+ * was when we set out to query the time.
+ *
+ * hwclock uses this method, and considers the Hardware Clock to have
+ * infinite precision.
+ */
diff --git a/sys-utils/hwclock.h b/sys-utils/hwclock.h
new file mode 100644
index 0000000..92fdb5f
--- /dev/null
+++ b/sys-utils/hwclock.h
@@ -0,0 +1,80 @@
+#ifndef HWCLOCK_CLOCK_H
+#define HWCLOCK_CLOCK_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <time.h>
+
+#include "c.h"
+#include "debug.h"
+
+#define HWCLOCK_DEBUG_INIT (1 << 0)
+#define HWCLOCK_DEBUG_RANDOM_SLEEP (1 << 1)
+#define HWCLOCK_DEBUG_DELTA_VS_TARGET (1 << 2)
+#define HWCLOCK_DEBUG_ALL 0xFFFF
+
+UL_DEBUG_DECLARE_MASK(hwclock);
+#define DBG(m, x) __UL_DBG(hwclock, HWCLOCK_DEBUG_, m, x)
+#define ON_DBG(m, x) __UL_DBG_CALL(hwclock, HWCLOCK_DEBUG_, m, x)
+
+struct hwclock_control {
+ char *date_opt;
+ char *adj_file_name;
+ double rtc_delay; /* --delay <seconds> */
+#if defined(__linux__) && defined(__alpha__)
+ char *epoch_option;
+#endif
+#ifdef __linux__
+ char *rtc_dev_name;
+#endif
+ unsigned int
+ hwaudit_on:1,
+ adjust:1,
+ show:1,
+ hctosys:1,
+ utc:1,
+ systohc:1,
+#if defined(__linux__) && defined(__alpha__)
+ getepoch:1,
+ setepoch:1,
+#endif
+ noadjfile:1,
+ local_opt:1,
+ directisa:1,
+ testing:1,
+ systz:1,
+ predict:1,
+ get:1,
+ set:1,
+ update:1,
+ universal:1, /* will store hw_clock_is_utc() return value */
+ verbose:1;
+};
+
+struct clock_ops {
+ char *interface_name;
+ int (*get_permissions) (void);
+ int (*read_hardware_clock) (const struct hwclock_control *ctl, struct tm * tm);
+ int (*set_hardware_clock) (const struct hwclock_control *ctl, const struct tm * tm);
+ int (*synchronize_to_clock_tick) (const struct hwclock_control *ctl);
+ const char *(*get_device_path) (void);
+};
+
+extern struct clock_ops *probe_for_cmos_clock(void);
+extern struct clock_ops *probe_for_rtc_clock(const struct hwclock_control *ctl);
+
+/* hwclock.c */
+extern double time_diff(struct timeval subtrahend, struct timeval subtractor);
+
+/* rtc.c */
+#if defined(__linux__) && defined(__alpha__)
+extern int get_epoch_rtc(const struct hwclock_control *ctl, unsigned long *epoch);
+extern int set_epoch_rtc(const struct hwclock_control *ctl);
+#endif
+
+extern void __attribute__((__noreturn__))
+hwclock_exit(const struct hwclock_control *ctl, int status);
+
+#endif /* HWCLOCK_CLOCK_H */
diff --git a/sys-utils/ipcmk.1 b/sys-utils/ipcmk.1
new file mode 100644
index 0000000..e6ed434
--- /dev/null
+++ b/sys-utils/ipcmk.1
@@ -0,0 +1,54 @@
+.\" Copyright 2008 Hayden A. James (hayden.james@gmail.com)
+.\" May be distributed under the GNU General Public License
+.TH IPCMK "1" "July 2014" "util-linux" "User Commands"
+.SH "NAME"
+ipcmk \- make various IPC resources
+.SH "SYNOPSIS"
+.B ipcmk
+[options]
+.SH "DESCRIPTION"
+.B ipcmk
+allows you to create shared memory segments, message queues,
+and semaphore arrays.
+.SH "OPTIONS"
+.TP
+Resources can be specified with these options:
+.TP
+.BR \-M , " \-\-shmem " \fIsize
+Create a shared memory segment of
+.I size
+bytes.
+The \fIsize\fR argument may be followed by the multiplicative suffixes KiB (=1024), MiB (=1024*1024), and so on for GiB, etc. (the
+"iB" is optional, e.g., "K" has the same meaning as "KiB") or the suffixes KB (=1000), MB (=1000*1000), and so on for GB, etc.
+.TP
+.BR \-Q , " \-\-queue"
+Create a message queue.
+.TP
+.BR \-S , " \-\-semaphore " \fInumber
+Create a semaphore array with
+.I number
+of elements.
+.PP
+Other options are:
+.TP
+.BR \-p , " \-\-mode " \fImode
+Access permissions for the resource. Default is 0644.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.PP
+.SH "SEE ALSO"
+.BR ipcrm (1),
+.BR ipcs (1)
+.SH "AUTHOR"
+.MT hayden.james@gmail.com
+Hayden A. James
+.ME
+.SH "AVAILABILITY"
+The ipcmk command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/ipcmk.c b/sys-utils/ipcmk.c
new file mode 100644
index 0000000..df83652
--- /dev/null
+++ b/sys-utils/ipcmk.c
@@ -0,0 +1,163 @@
+/*
+ * ipcmk.c - used to create ad-hoc IPC segments
+ *
+ * Copyright (C) 2008 Hayden A. James (hayden.james@gmail.com)
+ * Copyright (C) 2008 Karel Zak <kzak@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ipc.h>
+#include <sys/msg.h>
+#include <sys/sem.h>
+#include <sys/shm.h>
+#include <sys/time.h>
+
+#include "c.h"
+#include "nls.h"
+#include "randutils.h"
+#include "strutils.h"
+#include "closestream.h"
+
+static int create_shm(size_t size, int permission)
+{
+ key_t key;
+
+ random_get_bytes(&key, sizeof(key));
+ return shmget(key, size, permission | IPC_CREAT);
+}
+
+static int create_msg(int permission)
+{
+ key_t key;
+
+ random_get_bytes(&key, sizeof(key));
+ return msgget(key, permission | IPC_CREAT);
+}
+
+static int create_sem(int nsems, int permission)
+{
+ key_t key;
+
+ random_get_bytes(&key, sizeof(key));
+ return semget(key, nsems, permission | IPC_CREAT);
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ fputs(USAGE_HEADER, out);
+ fprintf(out, _(" %s [options]\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Create various IPC resources.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -M, --shmem <size> create shared memory segment of size <size>\n"), out);
+ fputs(_(" -S, --semaphore <number> create semaphore array with <number> elements\n"), out);
+ fputs(_(" -Q, --queue create message queue\n"), out);
+ fputs(_(" -p, --mode <mode> permission for the resource (default is 0644)\n"), out);
+
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(26));
+ printf(USAGE_MAN_TAIL("ipcmk(1)"));
+
+ exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+ int permission = 0644;
+ int opt;
+ size_t size = 0;
+ int nsems = 0;
+ int ask_shm = 0, ask_msg = 0, ask_sem = 0;
+ static const struct option longopts[] = {
+ {"shmem", required_argument, NULL, 'M'},
+ {"semaphore", required_argument, NULL, 'S'},
+ {"queue", no_argument, NULL, 'Q'},
+ {"mode", required_argument, NULL, 'p'},
+ {"version", no_argument, NULL, 'V'},
+ {"help", no_argument, NULL, 'h'},
+ {NULL, 0, NULL, 0}
+ };
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ while((opt = getopt_long(argc, argv, "hM:QS:p:Vh", longopts, NULL)) != -1) {
+ switch(opt) {
+ case 'M':
+ size = strtosize_or_err(optarg, _("failed to parse size"));
+ ask_shm = 1;
+ break;
+ case 'Q':
+ ask_msg = 1;
+ break;
+ case 'S':
+ nsems = strtos32_or_err(optarg, _("failed to parse elements"));
+ ask_sem = 1;
+ break;
+ case 'p':
+ permission = strtoul(optarg, NULL, 8);
+ break;
+ case 'h':
+ usage();
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if(!ask_shm && !ask_msg && !ask_sem) {
+ warnx(_("bad usage"));
+ errtryhelp(EXIT_FAILURE);
+ }
+ if (ask_shm) {
+ int shmid;
+ if (-1 == (shmid = create_shm(size, permission)))
+ err(EXIT_FAILURE, _("create share memory failed"));
+ else
+ printf(_("Shared memory id: %d\n"), shmid);
+ }
+
+ if (ask_msg) {
+ int msgid;
+ if (-1 == (msgid = create_msg(permission)))
+ err(EXIT_FAILURE, _("create message queue failed"));
+ else
+ printf(_("Message queue id: %d\n"), msgid);
+ }
+
+ if (ask_sem) {
+ int semid;
+ if (-1 == (semid = create_sem(nsems, permission)))
+ err(EXIT_FAILURE, _("create semaphore failed"));
+ else
+ printf(_("Semaphore id: %d\n"), semid);
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/sys-utils/ipcrm.1 b/sys-utils/ipcrm.1
new file mode 100644
index 0000000..be73dff
--- /dev/null
+++ b/sys-utils/ipcrm.1
@@ -0,0 +1,117 @@
+.\" Copyright 2002 Andre C. Mazzone (linuxdev@karagee.com)
+.\" May be distributed under the GNU General Public License
+.TH IPCRM "1" "July 2014" "util-linux" "User Commands"
+.SH NAME
+ipcrm \- remove certain IPC resources
+.SH SYNOPSIS
+.B ipcrm
+[options]
+.sp
+.B ipcrm
+.RB { shm | msg | sem }
+.IR id ...
+.SH DESCRIPTION
+.B ipcrm
+removes System V inter-process communication (IPC) objects
+and associated data structures from the system.
+In order to delete such objects, you must be superuser, or
+the creator or owner of the object.
+.PP
+System V IPC objects are of three types: shared memory,
+message queues, and semaphores.
+Deletion of a message queue or semaphore object is immediate
+(regardless of whether any process still holds an IPC
+identifier for the object).
+A shared memory object is only removed
+after all currently attached processes have detached
+.RB ( shmdt (2))
+the object from their virtual address space.
+.PP
+Two syntax styles are supported. The old Linux historical syntax specifies
+a three-letter keyword indicating which class of object is to be deleted,
+followed by one or more IPC identifiers for objects of this type.
+.PP
+The SUS-compliant syntax allows the specification of
+zero or more objects of all three types in a single command line,
+with objects specified either by key or by identifier (see below).
+Both keys and identifiers may be specified in decimal, hexadecimal
+(specified with an initial '0x' or '0X'), or octal (specified with
+an initial '0').
+.PP
+The details of the removes are described in
+.BR shmctl (2),
+.BR msgctl (2),
+and
+.BR semctl (2).
+The identifiers and keys can be found by using
+.BR ipcs (1).
+.SH OPTIONS
+.TP
+\fB-a\fR, \fB\-\-all\fR [\fBshm\fR] [\fBmsg\fR] [\fBsem\fR]
+Remove all resources. When an option argument is provided, the removal is
+performed only for the specified resource types. \fIWarning!\fR Do not use
+.B \-a
+if you are unsure how the software using the resources might react to missing
+objects. Some programs create these resources at startup and may not have
+any code to deal with an unexpected disappearance.
+.TP
+.BR \-M , " \-\-shmem\-key " \fIshmkey
+Remove the shared memory segment created with
+.I shmkey
+after the last detach is performed.
+.TP
+.BR \-m , " \-\-shmem\-id " \fIshmid
+Remove the shared memory segment identified by
+.I shmid
+after the last detach is performed.
+.TP
+.BR \-Q , " \-\-queue\-key " \fImsgkey
+Remove the message queue created with
+.IR msgkey .
+.TP
+.BR \-q , " \-\-queue\-id " \fImsgid
+Remove the message queue identified by
+.IR msgid .
+.TP
+.BR \-S , " \-\-semaphore\-key " \fIsemkey
+Remove the semaphore created with
+.IR semkey .
+.TP
+.BR \-s , " \-\-semaphore\-id " \fIsemid
+Remove the semaphore identified by
+.IR semid .
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH NOTES
+In its first Linux implementation, \fBipcrm\fR used the deprecated syntax
+shown in the second line of the
+.BR SYNOPSIS .
+Functionality present in other *nix implementations of \fBipcrm\fR has since
+been added, namely the ability to delete resources by key (not just
+identifier), and to respect the same command-line syntax. For backward
+compatibility the previous syntax is still supported.
+.\" .SH AUTHORS
+.\" Andre C. Mazzone (linuxdev@karagee.com)
+.\" .br
+.\" Krishna Balasubramanian (balasub@cis.ohio-state.edu)
+.SH SEE ALSO
+.nh
+.BR ipcmk (1),
+.BR ipcs (1),
+.BR msgctl (2),
+.BR msgget (2),
+.BR semctl (2),
+.BR semget (2),
+.BR shmctl (2),
+.BR shmdt (2),
+.BR shmget (2),
+.BR ftok (3)
+.SH AVAILABILITY
+The ipcrm command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/ipcrm.c b/sys-utils/ipcrm.c
new file mode 100644
index 0000000..a9f2d1b
--- /dev/null
+++ b/sys-utils/ipcrm.c
@@ -0,0 +1,423 @@
+/*
+ * krishna balasubramanian 1993
+ *
+ * 1999-02-22 Arkadiusz Miśkiewicz <misiek@pld.ORG.PL>
+ * - added Native Language Support
+ *
+ * 1999-04-02 frank zago
+ * - can now remove several id's in the same call
+ *
+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ipc.h>
+#include <sys/msg.h>
+#include <sys/sem.h>
+#include <sys/shm.h>
+#include <sys/types.h>
+#include "c.h"
+#include "nls.h"
+#include "strutils.h"
+#include "closestream.h"
+
+#ifndef HAVE_UNION_SEMUN
+/* according to X/OPEN we have to define it ourselves */
+union semun {
+ int val;
+ struct semid_ds *buf;
+ unsigned short int *array;
+ struct seminfo *__buf;
+};
+#endif
+
+typedef enum type_id {
+ SHM,
+ SEM,
+ MSG,
+ ALL
+} type_id;
+
+static int verbose = 0;
+
+/* print the usage */
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ fputs(USAGE_HEADER, out);
+ fprintf(out, _(" %1$s [options]\n"
+ " %1$s shm|msg|sem <id>...\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Remove certain IPC resources.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -m, --shmem-id <id> remove shared memory segment by id\n"), out);
+ fputs(_(" -M, --shmem-key <key> remove shared memory segment by key\n"), out);
+ fputs(_(" -q, --queue-id <id> remove message queue by id\n"), out);
+ fputs(_(" -Q, --queue-key <key> remove message queue by key\n"), out);
+ fputs(_(" -s, --semaphore-id <id> remove semaphore by id\n"), out);
+ fputs(_(" -S, --semaphore-key <key> remove semaphore by key\n"), out);
+ fputs(_(" -a, --all[=shm|msg|sem] remove all (in the specified category)\n"), out);
+ fputs(_(" -v, --verbose explain what is being done\n"), out);
+
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(28));
+ printf(USAGE_MAN_TAIL("ipcrm(1)"));
+
+ exit(EXIT_SUCCESS);
+}
+
+static int remove_id(int type, int iskey, int id)
+{
+ int ret;
+ char *errmsg;
+ /* needed to delete semaphores */
+ union semun arg;
+ arg.val = 0;
+
+ /* do the removal */
+ switch (type) {
+ case SHM:
+ if (verbose)
+ printf(_("removing shared memory segment id `%d'\n"), id);
+ ret = shmctl(id, IPC_RMID, NULL);
+ break;
+ case MSG:
+ if (verbose)
+ printf(_("removing message queue id `%d'\n"), id);
+ ret = msgctl(id, IPC_RMID, NULL);
+ break;
+ case SEM:
+ if (verbose)
+ printf(_("removing semaphore id `%d'\n"), id);
+ ret = semctl(id, 0, IPC_RMID, arg);
+ break;
+ default:
+ errx(EXIT_FAILURE, "impossible occurred");
+ }
+
+ /* how did the removal go? */
+ if (ret < 0) {
+ switch (errno) {
+ case EACCES:
+ case EPERM:
+ errmsg = iskey ? _("permission denied for key") : _("permission denied for id");
+ break;
+ case EINVAL:
+ errmsg = iskey ? _("invalid key") : _("invalid id");
+ break;
+ case EIDRM:
+ errmsg = iskey ? _("already removed key") : _("already removed id");
+ break;
+ default:
+ err(EXIT_FAILURE, "%s", iskey ? _("key failed") : _("id failed"));
+ }
+ warnx("%s (%d)", errmsg, id);
+ return 1;
+ }
+ return 0;
+}
+
+static int remove_arg_list(type_id type, int argc, char **argv)
+{
+ int id;
+ char *end;
+ int nb_errors = 0;
+
+ do {
+ id = strtoul(argv[0], &end, 10);
+ if (*end != 0) {
+ warnx(_("invalid id: %s"), argv[0]);
+ nb_errors++;
+ } else {
+ if (remove_id(type, 0, id))
+ nb_errors++;
+ }
+ argc--;
+ argv++;
+ } while (argc);
+ return (nb_errors);
+}
+
+static int deprecated_main(int argc, char **argv)
+{
+ type_id type;
+
+ if (!strcmp(argv[1], "shm"))
+ type = SHM;
+ else if (!strcmp(argv[1], "msg"))
+ type = MSG;
+ else if (!strcmp(argv[1], "sem"))
+ type = SEM;
+ else
+ return 0;
+
+ if (argc < 3) {
+ warnx(_("not enough arguments"));
+ errtryhelp(EXIT_FAILURE);
+ }
+
+ if (remove_arg_list(type, argc - 2, &argv[2]))
+ exit(EXIT_FAILURE);
+
+ printf(_("resource(s) deleted\n"));
+ return 1;
+}
+
+static unsigned long strtokey(const char *str, const char *errmesg)
+{
+ unsigned long num;
+ char *end = NULL;
+
+ if (str == NULL || *str == '\0')
+ goto err;
+ errno = 0;
+ /* keys are in hex or decimal */
+ num = strtoul(str, &end, 0);
+
+ if (errno || str == end || (end && *end))
+ goto err;
+
+ return num;
+ err:
+ if (errno)
+ err(EXIT_FAILURE, "%s: '%s'", errmesg, str);
+ else
+ errx(EXIT_FAILURE, "%s: '%s'", errmesg, str);
+ return 0;
+}
+
+static int key_to_id(type_id type, char *s)
+{
+ int id;
+ /* keys are in hex or decimal */
+ key_t key = strtokey(s, "failed to parse argument");
+ if (key == IPC_PRIVATE) {
+ warnx(_("illegal key (%s)"), s);
+ return -1;
+ }
+ switch (type) {
+ case SHM:
+ id = shmget(key, 0, 0);
+ break;
+ case MSG:
+ id = msgget(key, 0);
+ break;
+ case SEM:
+ id = semget(key, 0, 0);
+ break;
+ case ALL:
+ abort();
+ default:
+ errx(EXIT_FAILURE, "impossible occurred");
+ }
+ if (id < 0) {
+ char *errmsg;
+ switch (errno) {
+ case EACCES:
+ errmsg = _("permission denied for key");
+ break;
+ case EIDRM:
+ errmsg = _("already removed key");
+ break;
+ case ENOENT:
+ errmsg = _("invalid key");
+ break;
+ default:
+ err(EXIT_FAILURE, _("key failed"));
+ }
+ warnx("%s (%s)", errmsg, s);
+ }
+ return id;
+}
+
+static int remove_all(type_id type)
+{
+ int ret = 0;
+ int id, rm_me, maxid;
+
+ struct shmid_ds shmseg;
+
+ struct semid_ds semary;
+ struct seminfo seminfo;
+ union semun arg;
+
+ struct msqid_ds msgque;
+ struct msginfo msginfo;
+
+ if (type == SHM || type == ALL) {
+ maxid = shmctl(0, SHM_INFO, &shmseg);
+ if (maxid < 0)
+ errx(EXIT_FAILURE,
+ _("kernel not configured for shared memory"));
+ for (id = 0; id <= maxid; id++) {
+ rm_me = shmctl(id, SHM_STAT, &shmseg);
+ if (rm_me < 0)
+ continue;
+ ret |= remove_id(SHM, 0, rm_me);
+ }
+ }
+ if (type == SEM || type == ALL) {
+ arg.array = (ushort *) (void *)&seminfo;
+ maxid = semctl(0, 0, SEM_INFO, arg);
+ if (maxid < 0)
+ errx(EXIT_FAILURE,
+ _("kernel not configured for semaphores"));
+ for (id = 0; id <= maxid; id++) {
+ arg.buf = (struct semid_ds *)&semary;
+ rm_me = semctl(id, 0, SEM_STAT, arg);
+ if (rm_me < 0)
+ continue;
+ ret |= remove_id(SEM, 0, rm_me);
+ }
+ }
+/* kFreeBSD hackery -- ah 20140723 */
+#ifndef MSG_STAT
+#define MSG_STAT 11
+#endif
+#ifndef MSG_INFO
+#define MSG_INFO 12
+#endif
+ if (type == MSG || type == ALL) {
+ maxid =
+ msgctl(0, MSG_INFO, (struct msqid_ds *)(void *)&msginfo);
+ if (maxid < 0)
+ errx(EXIT_FAILURE,
+ _("kernel not configured for message queues"));
+ for (id = 0; id <= maxid; id++) {
+ rm_me = msgctl(id, MSG_STAT, &msgque);
+ if (rm_me < 0)
+ continue;
+ ret |= remove_id(MSG, 0, rm_me);
+ }
+ }
+ return ret;
+}
+
+int main(int argc, char **argv)
+{
+ int c;
+ int ret = 0;
+ int id = -1;
+ int iskey;
+ int rm_all = 0;
+ type_id what_all = ALL;
+
+ static const struct option longopts[] = {
+ {"shmem-id", required_argument, NULL, 'm'},
+ {"shmem-key", required_argument, NULL, 'M'},
+ {"queue-id", required_argument, NULL, 'q'},
+ {"queue-key", required_argument, NULL, 'Q'},
+ {"semaphore-id", required_argument, NULL, 's'},
+ {"semaphore-key", required_argument, NULL, 'S'},
+ {"all", optional_argument, NULL, 'a'},
+ {"verbose", no_argument, NULL, 'v'},
+ {"version", no_argument, NULL, 'V'},
+ {"help", no_argument, NULL, 'h'},
+ {NULL, 0, NULL, 0}
+ };
+
+ /* if the command is executed without parameters, do nothing */
+ if (argc == 1)
+ return 0;
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ /* check to see if the command is being invoked in the old way if so
+ * then remove argument list */
+ if (deprecated_main(argc, argv))
+ return EXIT_SUCCESS;
+
+ /* process new syntax to conform with SYSV ipcrm */
+ while((c = getopt_long(argc, argv, "q:m:s:Q:M:S:a::vhV", longopts, NULL)) != -1) {
+ iskey = 0;
+ switch (c) {
+ case 'M':
+ iskey = 1;
+ id = key_to_id(SHM, optarg);
+ if (id < 0) {
+ ret++;
+ break;
+ }
+ /* fallthrough */
+ case 'm':
+ if (!iskey)
+ id = strtos32_or_err(optarg, _("failed to parse argument"));
+ if (remove_id(SHM, iskey, id))
+ ret++;
+ break;
+ case 'Q':
+ iskey = 1;
+ id = key_to_id(MSG, optarg);
+ if (id < 0) {
+ ret++;
+ break;
+ }
+ /* fallthrough */
+ case 'q':
+ if (!iskey)
+ id = strtos32_or_err(optarg, _("failed to parse argument"));
+ if (remove_id(MSG, iskey, id))
+ ret++;
+ break;
+ case 'S':
+ iskey = 1;
+ id = key_to_id(SEM, optarg);
+ if (id < 0) {
+ ret++;
+ break;
+ }
+ /* fallthrough */
+ case 's':
+ if (!iskey)
+ id = strtos32_or_err(optarg, _("failed to parse argument"));
+ if (remove_id(SEM, iskey, id))
+ ret++;
+ break;
+ case 'a':
+ rm_all = 1;
+ if (optarg) {
+ if (!strcmp(optarg, "shm"))
+ what_all = SHM;
+ else if (!strcmp(optarg, "msg"))
+ what_all = MSG;
+ else if (!strcmp(optarg, "sem"))
+ what_all = SEM;
+ else
+ errx(EXIT_FAILURE,
+ _("unknown argument: %s"), optarg);
+ } else {
+ what_all = ALL;
+ }
+ break;
+ case 'v':
+ verbose = 1;
+ break;
+ case 'h':
+ usage();
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if (rm_all && remove_all(what_all))
+ ret++;
+
+ /* print usage if we still have some arguments left over */
+ if (optind < argc) {
+ warnx(_("unknown argument: %s"), argv[optind]);
+ errtryhelp(EXIT_FAILURE);
+ }
+
+ return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/sys-utils/ipcs.1 b/sys-utils/ipcs.1
new file mode 100644
index 0000000..93c35e3
--- /dev/null
+++ b/sys-utils/ipcs.1
@@ -0,0 +1,116 @@
+.\" Copyright 1993 Rickard E. Faith (faith@cs.unc.edu)
+.\" May be distributed under the GNU General Public License
+.TH IPCS "1" "July 2014" "util-linux" "User Commands"
+.SH NAME
+ipcs \- show information on IPC facilities
+.SH SYNOPSIS
+.B ipcs
+[options]
+.SH DESCRIPTION
+.B ipcs
+shows information on the inter-process communication facilities
+for which the calling process has read access.
+By default it shows information about all three resources:
+shared memory segments, message queues, and semaphore arrays.
+.SH OPTIONS
+.TP
+\fB\-i\fR, \fB\-\-id\fR \fIid\fR
+Show full details on just the one resource element identified by
+.IR id .
+This option needs to be combined with one of the three resource options:
+.BR \-m ,
+.BR \-q " or"
+.BR \-s .
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+Display help text and exit.
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+Display version information and exit.
+.SS "Resource options"
+.TP
+\fB\-m\fR, \fB\-\-shmems\fR
+Write information about active shared memory segments.
+.TP
+\fB\-q\fR, \fB\-\-queues\fR
+Write information about active message queues.
+.TP
+\fB\-s\fR, \fB\-\-semaphores\fR
+Write information about active semaphore sets.
+.TP
+\fB\-a\fR, \fB\-\-all\fR
+Write information about all three resources (default).
+.SS "Output formats"
+Of these options only one takes effect: the last one specified.
+.TP
+\fB\-c\fR, \fB\-\-creator\fR
+Show creator and owner.
+.TP
+\fB\-l\fR, \fB\-\-limits\fR
+Show resource limits.
+.TP
+\fB\-p\fR, \fB\-\-pid\fR
+Show PIDs of creator and last operator.
+.TP
+\fB\-t\fR, \fB\-\-time\fR
+Write time information. The time of the last control operation that changed
+the access permissions for all facilities, the time of the last
+.BR msgsnd (2)
+and
+.BR msgrcv (2)
+operations on message queues, the time of the last
+.BR shmat (2)
+and
+.BR shmdt (2)
+operations on shared memory, and the time of the last
+.BR semop (2)
+operation on semaphores.
+.TP
+\fB\-u\fR, \fB\-\-summary\fR
+Show status summary.
+.SS "Representation"
+These affect only the \fB\-l\fR (\fB\-\-limits\fR) option.
+.TP
+\fB\-b\fR, \fB\-\-bytes\fR
+Print sizes in bytes.
+.TP
+.B \-\-human
+Print sizes in human-readable format.
+.SH SEE ALSO
+.BR ipcmk (1),
+.BR ipcrm (1),
+.BR msgrcv (2),
+.BR msgsnd (2),
+.BR semget (2),
+.BR semop (2),
+.BR shmat (2),
+.BR shmdt (2),
+.BR shmget (2)
+.SH CONFORMING TO
+The Linux ipcs utility is not fully compatible to the POSIX ipcs utility.
+The Linux version does not support the POSIX
+.BR \-a ,
+.B \-b
+and
+.B \-o
+options, but does support the
+.B \-l
+and
+.B \-u
+options not defined by POSIX. A portable application shall not use the
+.BR \-a ,
+.BR \-b ,
+.BR \-o ,
+.BR \-l ,
+and
+.B \-u
+options.
+.SH AUTHOR
+.UR balasub@cis.ohio-state.edu
+Krishna Balasubramanian
+.UE
+.SH AVAILABILITY
+The ipcs command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/ipcs.c b/sys-utils/ipcs.c
new file mode 100644
index 0000000..73cf28a
--- /dev/null
+++ b/sys-utils/ipcs.c
@@ -0,0 +1,668 @@
+/* Original author unknown, may be "krishna balasub@cis.ohio-state.edu" */
+/*
+ * Modified Sat Oct 9 10:55:28 1993 for 0.99.13
+ *
+ * Patches from Mike Jagdis (jaggy@purplet.demon.co.uk) applied Wed Feb 8
+ * 12:12:21 1995 by faith@cs.unc.edu to print numeric uids if no passwd file
+ * entry.
+ *
+ * Patch from arnolds@ifns.de (Heinz-Ado Arnolds) applied Mon Jul 1 19:30:41
+ * 1996 by janl@math.uio.no to add code missing in case PID: clauses.
+ *
+ * Patched to display the key field -- hy@picksys.com 12/18/96
+ *
+ * 1999-02-22 Arkadiusz Miśkiewicz <misiek@pld.ORG.PL>
+ * - added Native Language Support
+ */
+
+#include <errno.h>
+#include <getopt.h>
+
+#include "c.h"
+#include "nls.h"
+#include "closestream.h"
+
+#include "ipcutils.h"
+
+enum output_formats {
+ NOTSPECIFIED,
+ LIMITS,
+ STATUS,
+ CREATOR,
+ TIME,
+ PID
+};
+enum {
+ OPT_HUMAN = CHAR_MAX + 1
+};
+
+static void do_shm (char format, int unit);
+static void print_shm (int id, int unit);
+static void do_sem (char format);
+static void print_sem (int id);
+static void do_msg (char format, int unit);
+static void print_msg (int id, int unit);
+
+/* we read time as int64_t from /proc, so cast... */
+#define xctime(_x) ctime((time_t *) (_x))
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ fputs(USAGE_HEADER, out);
+ fprintf(out, _(" %1$s [resource-option...] [output-option]\n"
+ " %1$s -m|-q|-s -i <id>\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Show information on IPC facilities.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -i, --id <id> print details on resource identified by <id>\n"), out);
+ printf(USAGE_HELP_OPTIONS(16));
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Resource options:\n"), out);
+ fputs(_(" -m, --shmems shared memory segments\n"), out);
+ fputs(_(" -q, --queues message queues\n"), out);
+ fputs(_(" -s, --semaphores semaphores\n"), out);
+ fputs(_(" -a, --all all (default)\n"), out);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Output options:\n"), out);
+ fputs(_(" -t, --time show attach, detach and change times\n"), out);
+ fputs(_(" -p, --pid show PIDs of creator and last operator\n"), out);
+ fputs(_(" -c, --creator show creator and owner\n"), out);
+ fputs(_(" -l, --limits show resource limits\n"), out);
+ fputs(_(" -u, --summary show status summary\n"), out);
+ fputs(_(" --human show sizes in human-readable format\n"), out);
+ fputs(_(" -b, --bytes show sizes in bytes\n"), out);
+ printf(USAGE_MAN_TAIL("ipcs(1)"));
+
+ exit(EXIT_SUCCESS);
+}
+
+int main (int argc, char **argv)
+{
+ int opt, msg = 0, shm = 0, sem = 0, id = 0, specific = 0;
+ char format = NOTSPECIFIED;
+ int unit = IPC_UNIT_DEFAULT;
+ static const struct option longopts[] = {
+ {"id", required_argument, NULL, 'i'},
+ {"queues", no_argument, NULL, 'q'},
+ {"shmems", no_argument, NULL, 'm'},
+ {"semaphores", no_argument, NULL, 's'},
+ {"all", no_argument, NULL, 'a'},
+ {"time", no_argument, NULL, 't'},
+ {"pid", no_argument, NULL, 'p'},
+ {"creator", no_argument, NULL, 'c'},
+ {"limits", no_argument, NULL, 'l'},
+ {"summary", no_argument, NULL, 'u'},
+ {"human", no_argument, NULL, OPT_HUMAN},
+ {"bytes", no_argument, NULL, 'b'},
+ {"version", no_argument, NULL, 'V'},
+ {"help", no_argument, NULL, 'h'},
+ {NULL, 0, NULL, 0}
+ };
+ char options[] = "i:qmsatpclubVh";
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ while ((opt = getopt_long(argc, argv, options, longopts, NULL)) != -1) {
+ switch (opt) {
+ case 'i':
+ id = atoi (optarg);
+ specific = 1;
+ break;
+ case 'a':
+ msg = shm = sem = 1;
+ break;
+ case 'q':
+ msg = 1;
+ break;
+ case 'm':
+ shm = 1;
+ break;
+ case 's':
+ sem = 1;
+ break;
+ case 't':
+ format = TIME;
+ break;
+ case 'c':
+ format = CREATOR;
+ break;
+ case 'p':
+ format = PID;
+ break;
+ case 'l':
+ format = LIMITS;
+ break;
+ case 'u':
+ format = STATUS;
+ break;
+ case OPT_HUMAN:
+ unit = IPC_UNIT_HUMAN;
+ break;
+ case 'b':
+ unit = IPC_UNIT_BYTES;
+ break;
+ case 'h':
+ usage();
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if (specific && (msg + shm + sem != 1))
+ errx (EXIT_FAILURE,
+ _("when using an ID, a single resource must be specified"));
+ if (specific) {
+ if (msg)
+ print_msg (id, unit);
+ if (shm)
+ print_shm (id, unit);
+ if (sem)
+ print_sem (id);
+ } else {
+ if (!msg && !shm && !sem)
+ msg = shm = sem = 1;
+ printf ("\n");
+ if (msg) {
+ do_msg (format, unit);
+ printf ("\n");
+ }
+ if (shm) {
+ do_shm (format, unit);
+ printf ("\n");
+ }
+ if (sem) {
+ do_sem (format);
+ printf ("\n");
+ }
+ }
+ return EXIT_SUCCESS;
+}
+
+static void do_shm (char format, int unit)
+{
+ struct passwd *pw;
+ struct shm_data *shmds, *shmdsp;
+
+ switch (format) {
+ case LIMITS:
+ {
+ struct ipc_limits lim;
+ uint64_t tmp, pgsz = getpagesize();
+
+ if (ipc_shm_get_limits(&lim)) {
+ printf (_("unable to fetch shared memory limits\n"));
+ return;
+ }
+ printf (_("------ Shared Memory Limits --------\n"));
+ printf (_("max number of segments = %ju\n"), lim.shmmni);
+ ipc_print_size(unit == IPC_UNIT_DEFAULT ? IPC_UNIT_KB : unit,
+ _("max seg size"), lim.shmmax, "\n", 0);
+
+ tmp = (uint64_t) lim.shmall * pgsz;
+ /* overflow handling, at least we don't print ridiculous small values */
+ if (lim.shmall != 0 && tmp / lim.shmall != pgsz) {
+ tmp = UINT64_MAX - (UINT64_MAX % pgsz);
+ }
+ ipc_print_size(unit == IPC_UNIT_DEFAULT ? IPC_UNIT_KB : unit,
+ _("max total shared memory"), tmp, "\n", 0);
+ ipc_print_size(unit == IPC_UNIT_DEFAULT ? IPC_UNIT_BYTES : unit,
+ _("min seg size"), lim.shmmin, "\n", 0);
+ return;
+ }
+ case STATUS:
+ {
+ int maxid;
+ struct shmid_ds shmbuf;
+ struct shm_info *shm_info;
+
+ maxid = shmctl (0, SHM_INFO, &shmbuf);
+ shm_info = (struct shm_info *) &shmbuf;
+ if (maxid < 0) {
+ printf (_("kernel not configured for shared memory\n"));
+ return;
+ }
+
+ printf (_("------ Shared Memory Status --------\n"));
+ /*
+ * TRANSLATORS: This output format is maintained for backward
+ * compatibility as ipcs is used in scripts. For consistency
+ * with the rest, the translated form can follow this model:
+ *
+ * "segments allocated = %d\n"
+ * "pages allocated = %ld\n"
+ * "pages resident = %ld\n"
+ * "pages swapped = %ld\n"
+ * "swap performance = %ld attempts, %ld successes\n"
+ */
+ printf (_("segments allocated %d\n"
+ "pages allocated %ld\n"
+ "pages resident %ld\n"
+ "pages swapped %ld\n"
+ "Swap performance: %ld attempts\t %ld successes\n"),
+ shm_info->used_ids,
+ shm_info->shm_tot,
+ shm_info->shm_rss,
+ shm_info->shm_swp,
+ shm_info->swap_attempts, shm_info->swap_successes);
+ return;
+ }
+
+ /*
+ * Headers only
+ */
+ case CREATOR:
+ printf (_("------ Shared Memory Segment Creators/Owners --------\n"));
+ printf ("%-10s %-10s %-10s %-10s %-10s %-10s\n",
+ _("shmid"),_("perms"),_("cuid"),_("cgid"),_("uid"),_("gid"));
+ break;
+
+ case TIME:
+ printf (_("------ Shared Memory Attach/Detach/Change Times --------\n"));
+ printf ("%-10s %-10s %-20s %-20s %-20s\n",
+ _("shmid"),_("owner"),_("attached"),_("detached"),
+ _("changed"));
+ break;
+
+ case PID:
+ printf (_("------ Shared Memory Creator/Last-op PIDs --------\n"));
+ printf ("%-10s %-10s %-10s %-10s\n",
+ _("shmid"),_("owner"),_("cpid"),_("lpid"));
+ break;
+
+ default:
+ printf (_("------ Shared Memory Segments --------\n"));
+ printf ("%-10s %-10s %-10s %-10s %-10s %-10s %-12s\n",
+ _("key"),_("shmid"),_("owner"),_("perms"),
+ unit == IPC_UNIT_HUMAN ? _("size") : _("bytes"),
+ _("nattch"),_("status"));
+ break;
+ }
+
+ /*
+ * Print data
+ */
+ if (ipc_shm_get_info(-1, &shmds) < 1)
+ return;
+
+ for (shmdsp = shmds; shmdsp->next != NULL; shmdsp = shmdsp->next) {
+ if (format == CREATOR) {
+ ipc_print_perms(stdout, &shmdsp->shm_perm);
+ continue;
+ }
+ pw = getpwuid(shmdsp->shm_perm.uid);
+ switch (format) {
+ case TIME:
+ if (pw)
+ printf ("%-10d %-10.10s", shmdsp->shm_perm.id, pw->pw_name);
+ else
+ printf ("%-10d %-10u", shmdsp->shm_perm.id, shmdsp->shm_perm.uid);
+ /* ctime uses static buffer: use separate calls */
+ printf(" %-20.16s", shmdsp->shm_atim
+ ? xctime(&shmdsp->shm_atim) + 4 : _("Not set"));
+ printf(" %-20.16s", shmdsp->shm_dtim
+ ? xctime(&shmdsp->shm_dtim) + 4 : _("Not set"));
+ printf(" %-20.16s\n", shmdsp->shm_ctim
+ ? xctime(&shmdsp->shm_ctim) + 4 : _("Not set"));
+ break;
+ case PID:
+ if (pw)
+ printf ("%-10d %-10.10s", shmdsp->shm_perm.id, pw->pw_name);
+ else
+ printf ("%-10d %-10u", shmdsp->shm_perm.id, shmdsp->shm_perm.uid);
+ printf (" %-10u %-10u\n",
+ shmdsp->shm_cprid, shmdsp->shm_lprid);
+ break;
+
+ default:
+ printf("0x%08x ", shmdsp->shm_perm.key);
+ if (pw)
+ printf ("%-10d %-10.10s", shmdsp->shm_perm.id, pw->pw_name);
+ else
+ printf ("%-10d %-10u", shmdsp->shm_perm.id, shmdsp->shm_perm.uid);
+ printf (" %-10o ", shmdsp->shm_perm.mode & 0777);
+
+ if (unit == IPC_UNIT_HUMAN)
+ ipc_print_size(unit, NULL, shmdsp->shm_segsz, " ", 6);
+ else
+ ipc_print_size(unit, NULL, shmdsp->shm_segsz, NULL, -10);
+
+ printf (" %-10ju %-6s %-6s\n",
+ shmdsp->shm_nattch,
+ shmdsp->shm_perm.mode & SHM_DEST ? _("dest") : " ",
+ shmdsp->shm_perm.mode & SHM_LOCKED ? _("locked") : " ");
+ break;
+ }
+ }
+
+ ipc_shm_free_info(shmds);
+ return;
+}
+
+static void do_sem (char format)
+{
+ struct passwd *pw;
+ struct sem_data *semds, *semdsp;
+
+ switch (format) {
+ case LIMITS:
+ {
+ struct ipc_limits lim;
+
+ if (ipc_sem_get_limits(&lim)) {
+ printf (_("unable to fetch semaphore limits\n"));
+ return;
+ }
+ printf (_("------ Semaphore Limits --------\n"));
+ printf (_("max number of arrays = %d\n"), lim.semmni);
+ printf (_("max semaphores per array = %d\n"), lim.semmsl);
+ printf (_("max semaphores system wide = %d\n"), lim.semmns);
+ printf (_("max ops per semop call = %d\n"), lim.semopm);
+ printf (_("semaphore max value = %u\n"), lim.semvmx);
+ return;
+ }
+ case STATUS:
+ {
+ struct seminfo seminfo;
+ union semun arg;
+ arg.array = (ushort *) (void *) &seminfo;
+ if (semctl (0, 0, SEM_INFO, arg) < 0) {
+ printf (_("kernel not configured for semaphores\n"));
+ return;
+ }
+ printf (_("------ Semaphore Status --------\n"));
+ printf (_("used arrays = %d\n"), seminfo.semusz);
+ printf (_("allocated semaphores = %d\n"), seminfo.semaem);
+ return;
+ }
+
+ case CREATOR:
+ printf (_("------ Semaphore Arrays Creators/Owners --------\n"));
+ printf ("%-10s %-10s %-10s %-10s %-10s %-10s\n",
+ _("semid"),_("perms"),_("cuid"),_("cgid"),_("uid"),_("gid"));
+ break;
+
+ case TIME:
+ printf (_("------ Semaphore Operation/Change Times --------\n"));
+ printf ("%-8s %-10s %-26.24s %-26.24s\n",
+ _("semid"),_("owner"),_("last-op"),_("last-changed"));
+ break;
+
+ case PID:
+ break;
+
+ default:
+ printf (_("------ Semaphore Arrays --------\n"));
+ printf ("%-10s %-10s %-10s %-10s %-10s\n",
+ _("key"),_("semid"),_("owner"),_("perms"),_("nsems"));
+ break;
+ }
+
+ /*
+ * Print data
+ */
+ if (ipc_sem_get_info(-1, &semds) < 1)
+ return;
+
+ for (semdsp = semds; semdsp->next != NULL; semdsp = semdsp->next) {
+ if (format == CREATOR) {
+ ipc_print_perms(stdout, &semdsp->sem_perm);
+ continue;
+ }
+ pw = getpwuid(semdsp->sem_perm.uid);
+ switch (format) {
+ case TIME:
+ if (pw)
+ printf ("%-8d %-10.10s", semdsp->sem_perm.id, pw->pw_name);
+ else
+ printf ("%-8d %-10u", semdsp->sem_perm.id, semdsp->sem_perm.uid);
+ printf (" %-26.24s", semdsp->sem_otime
+ ? xctime(&semdsp->sem_otime) : _("Not set"));
+ printf (" %-26.24s\n", semdsp->sem_ctime
+ ? xctime( &semdsp->sem_ctime) : _("Not set"));
+ break;
+ case PID:
+ break;
+
+ default:
+ printf("0x%08x ", semdsp->sem_perm.key);
+ if (pw)
+ printf ("%-10d %-10.10s", semdsp->sem_perm.id, pw->pw_name);
+ else
+ printf ("%-10d %-10u", semdsp->sem_perm.id, semdsp->sem_perm.uid);
+ printf (" %-10o %-10ju\n",
+ semdsp->sem_perm.mode & 0777,
+ semdsp->sem_nsems);
+ break;
+ }
+ }
+
+ ipc_sem_free_info(semds);
+ return;
+}
+
+static void do_msg (char format, int unit)
+{
+ struct passwd *pw;
+ struct msg_data *msgds, *msgdsp;
+
+ switch (format) {
+ case LIMITS:
+ {
+ struct ipc_limits lim;
+
+ if (ipc_msg_get_limits(&lim)) {
+ printf (_("unable to fetch message limits\n"));
+ return;
+ }
+ printf (_("------ Messages Limits --------\n"));
+ printf (_("max queues system wide = %d\n"), lim.msgmni);
+ ipc_print_size(unit == IPC_UNIT_DEFAULT ? IPC_UNIT_BYTES : unit,
+ _("max size of message"), lim.msgmax, "\n", 0);
+ ipc_print_size(unit == IPC_UNIT_DEFAULT ? IPC_UNIT_BYTES : unit,
+ _("default max size of queue"), lim.msgmnb, "\n", 0);
+ return;
+ }
+ case STATUS:
+ {
+ struct msginfo msginfo;
+ if (msgctl (0, MSG_INFO, (struct msqid_ds *) (void *) &msginfo) < 0) {
+ printf (_("kernel not configured for message queues\n"));
+ return;
+ }
+ printf (_("------ Messages Status --------\n"));
+#ifndef __FreeBSD_kernel__
+ printf (_("allocated queues = %d\n"), msginfo.msgpool);
+ printf (_("used headers = %d\n"), msginfo.msgmap);
+#endif
+ ipc_print_size(unit, _("used space"), msginfo.msgtql,
+ unit == IPC_UNIT_DEFAULT ? _(" bytes\n") : "\n", 0);
+ return;
+ }
+ case CREATOR:
+ printf (_("------ Message Queues Creators/Owners --------\n"));
+ printf ("%-10s %-10s %-10s %-10s %-10s %-10s\n",
+ _("msqid"),_("perms"),_("cuid"),_("cgid"),_("uid"),_("gid"));
+ break;
+
+ case TIME:
+ printf (_("------ Message Queues Send/Recv/Change Times --------\n"));
+ printf ("%-8s %-10s %-20s %-20s %-20s\n",
+ _("msqid"),_("owner"),_("send"),_("recv"),_("change"));
+ break;
+
+ case PID:
+ printf (_("------ Message Queues PIDs --------\n"));
+ printf ("%-10s %-10s %-10s %-10s\n",
+ _("msqid"),_("owner"),_("lspid"),_("lrpid"));
+ break;
+
+ default:
+ printf (_("------ Message Queues --------\n"));
+ printf ("%-10s %-10s %-10s %-10s %-12s %-12s\n",
+ _("key"), _("msqid"), _("owner"), _("perms"),
+ unit == IPC_UNIT_HUMAN ? _("size") : _("used-bytes"),
+ _("messages"));
+ break;
+ }
+
+ /*
+ * Print data
+ */
+ if (ipc_msg_get_info(-1, &msgds) < 1)
+ return;
+
+ for (msgdsp = msgds; msgdsp->next != NULL; msgdsp = msgdsp->next) {
+ if (format == CREATOR) {
+ ipc_print_perms(stdout, &msgdsp->msg_perm);
+ continue;
+ }
+ pw = getpwuid(msgdsp->msg_perm.uid);
+ switch (format) {
+ case TIME:
+ if (pw)
+ printf ("%-8d %-10.10s", msgdsp->msg_perm.id, pw->pw_name);
+ else
+ printf ("%-8d %-10u", msgdsp->msg_perm.id, msgdsp->msg_perm.uid);
+ printf (" %-20.16s", msgdsp->q_stime
+ ? xctime(&msgdsp->q_stime) + 4 : _("Not set"));
+ printf (" %-20.16s", msgdsp->q_rtime
+ ? xctime(&msgdsp->q_rtime) + 4 : _("Not set"));
+ printf (" %-20.16s\n", msgdsp->q_ctime
+ ? xctime(&msgdsp->q_ctime) + 4 : _("Not set"));
+ break;
+ case PID:
+ if (pw)
+ printf ("%-8d %-10.10s", msgdsp->msg_perm.id, pw->pw_name);
+ else
+ printf ("%-8d %-10u", msgdsp->msg_perm.id, msgdsp->msg_perm.uid);
+ printf (" %5d %5d\n",
+ msgdsp->q_lspid, msgdsp->q_lrpid);
+ break;
+
+ default:
+ printf( "0x%08x ",msgdsp->msg_perm.key );
+ if (pw)
+ printf ("%-10d %-10.10s", msgdsp->msg_perm.id, pw->pw_name);
+ else
+ printf ("%-10d %-10u", msgdsp->msg_perm.id, msgdsp->msg_perm.uid);
+ printf (" %-10o ", msgdsp->msg_perm.mode & 0777);
+
+ if (unit == IPC_UNIT_HUMAN)
+ ipc_print_size(unit, NULL, msgdsp->q_cbytes, " ", 6);
+ else
+ ipc_print_size(unit, NULL, msgdsp->q_cbytes, NULL, -12);
+
+ printf (" %-12ju\n", msgdsp->q_qnum);
+ break;
+ }
+ }
+
+ ipc_msg_free_info(msgds);
+ return;
+}
+
+static void print_shm(int shmid, int unit)
+{
+ struct shm_data *shmdata;
+
+ if (ipc_shm_get_info(shmid, &shmdata) < 1) {
+ warnx(_("id %d not found"), shmid);
+ return;
+ }
+
+ printf(_("\nShared memory Segment shmid=%d\n"), shmid);
+ printf(_("uid=%u\tgid=%u\tcuid=%u\tcgid=%u\n"),
+ shmdata->shm_perm.uid, shmdata->shm_perm.gid,
+ shmdata->shm_perm.cuid, shmdata->shm_perm.cgid);
+ printf(_("mode=%#o\taccess_perms=%#o\n"), shmdata->shm_perm.mode,
+ shmdata->shm_perm.mode & 0777);
+ ipc_print_size(unit, unit == IPC_UNIT_HUMAN ? _("size=") : _("bytes="),
+ shmdata->shm_segsz, "\t", 0);
+ printf(_("lpid=%u\tcpid=%u\tnattch=%jd\n"),
+ shmdata->shm_lprid, shmdata->shm_cprid,
+ shmdata->shm_nattch);
+ printf(_("att_time=%-26.24s\n"),
+ shmdata->shm_atim ? xctime(&(shmdata->shm_atim)) : _("Not set"));
+ printf(_("det_time=%-26.24s\n"),
+ shmdata->shm_dtim ? xctime(&shmdata->shm_dtim) : _("Not set"));
+ printf(_("change_time=%-26.24s\n"), xctime(&shmdata->shm_ctim));
+ printf("\n");
+
+ ipc_shm_free_info(shmdata);
+}
+
+static void print_msg(int msgid, int unit)
+{
+ struct msg_data *msgdata;
+
+ if (ipc_msg_get_info(msgid, &msgdata) < 1) {
+ warnx(_("id %d not found"), msgid);
+ return;
+ }
+
+ printf(_("\nMessage Queue msqid=%d\n"), msgid);
+ printf(_("uid=%u\tgid=%u\tcuid=%u\tcgid=%u\tmode=%#o\n"),
+ msgdata->msg_perm.uid, msgdata->msg_perm.gid,
+ msgdata->msg_perm.cuid, msgdata->msg_perm.cgid,
+ msgdata->msg_perm.mode);
+ ipc_print_size(unit, unit == IPC_UNIT_HUMAN ? _("csize=") : _("cbytes="),
+ msgdata->q_cbytes, "\t", 0);
+ ipc_print_size(unit, unit == IPC_UNIT_HUMAN ? _("qsize=") : _("qbytes="),
+ msgdata->q_qbytes, "\t", 0);
+ printf("qnum=%jd\tlspid=%d\tlrpid=%d\n",
+ msgdata->q_qnum,
+ msgdata->q_lspid, msgdata->q_lrpid);
+ printf(_("send_time=%-26.24s\n"),
+ msgdata->q_stime ? xctime(&msgdata->q_stime) : _("Not set"));
+ printf(_("rcv_time=%-26.24s\n"),
+ msgdata->q_rtime ? xctime(&msgdata->q_rtime) : _("Not set"));
+ printf(_("change_time=%-26.24s\n"),
+ msgdata->q_ctime ? xctime(&msgdata->q_ctime) : _("Not set"));
+ printf("\n");
+
+ ipc_msg_free_info(msgdata);
+}
+
+static void print_sem(int semid)
+{
+ struct sem_data *semdata;
+ size_t i;
+
+ if (ipc_sem_get_info(semid, &semdata) < 1) {
+ warnx(_("id %d not found"), semid);
+ return;
+ }
+
+ printf(_("\nSemaphore Array semid=%d\n"), semid);
+ printf(_("uid=%u\t gid=%u\t cuid=%u\t cgid=%u\n"),
+ semdata->sem_perm.uid, semdata->sem_perm.gid,
+ semdata->sem_perm.cuid, semdata->sem_perm.cgid);
+ printf(_("mode=%#o, access_perms=%#o\n"),
+ semdata->sem_perm.mode, semdata->sem_perm.mode & 0777);
+ printf(_("nsems = %ju\n"), semdata->sem_nsems);
+ printf(_("otime = %-26.24s\n"),
+ semdata->sem_otime ? xctime(&semdata->sem_otime) : _("Not set"));
+ printf(_("ctime = %-26.24s\n"), xctime(&semdata->sem_ctime));
+
+ printf("%-10s %-10s %-10s %-10s %-10s\n",
+ _("semnum"), _("value"), _("ncount"), _("zcount"), _("pid"));
+
+ for (i = 0; i < semdata->sem_nsems; i++) {
+ struct sem_elem *e = &semdata->elements[i];
+ printf("%-10zu %-10d %-10d %-10d %-10d\n",
+ i, e->semval, e->ncount, e->zcount, e->pid);
+ }
+ printf("\n");
+ ipc_sem_free_info(semdata);
+}
diff --git a/sys-utils/ipcutils.c b/sys-utils/ipcutils.c
new file mode 100644
index 0000000..5fe297f
--- /dev/null
+++ b/sys-utils/ipcutils.c
@@ -0,0 +1,533 @@
+#include <inttypes.h>
+
+#include "c.h"
+#include "nls.h"
+#include "xalloc.h"
+#include "path.h"
+#include "pathnames.h"
+#include "ipcutils.h"
+#include "strutils.h"
+
+#ifndef SEMVMX
+# define SEMVMX 32767 /* <= 32767 semaphore maximum value */
+#endif
+#ifndef SHMMIN
+# define SHMMIN 1 /* min shared segment size in bytes */
+#endif
+
+
+int ipc_msg_get_limits(struct ipc_limits *lim)
+{
+ if (access(_PATH_PROC_IPC_MSGMNI, F_OK) == 0 &&
+ access(_PATH_PROC_IPC_MSGMNB, F_OK) == 0 &&
+ access(_PATH_PROC_IPC_MSGMAX, F_OK) == 0) {
+
+ ul_path_read_s32(NULL, &lim->msgmni, _PATH_PROC_IPC_MSGMNI);
+ ul_path_read_s32(NULL, &lim->msgmnb, _PATH_PROC_IPC_MSGMNB);
+ ul_path_read_u64(NULL, &lim->msgmax, _PATH_PROC_IPC_MSGMAX);
+ } else {
+ struct msginfo msginfo;
+
+ if (msgctl(0, IPC_INFO, (struct msqid_ds *) &msginfo) < 0)
+ return 1;
+ lim->msgmni = msginfo.msgmni;
+ lim->msgmnb = msginfo.msgmnb;
+ lim->msgmax = msginfo.msgmax;
+ }
+
+ return 0;
+}
+
+int ipc_sem_get_limits(struct ipc_limits *lim)
+{
+ FILE *f;
+ int rc = 0;
+
+ lim->semvmx = SEMVMX;
+
+ f = fopen(_PATH_PROC_IPC_SEM, "r");
+ if (f) {
+ rc = fscanf(f, "%d\t%d\t%d\t%d",
+ &lim->semmsl, &lim->semmns, &lim->semopm, &lim->semmni);
+ fclose(f);
+ }
+
+ if (rc != 4) {
+ struct seminfo seminfo = { .semmni = 0 };
+ union semun arg = { .array = (ushort *) &seminfo };
+
+ if (semctl(0, 0, IPC_INFO, arg) < 0)
+ return 1;
+ lim->semmni = seminfo.semmni;
+ lim->semmsl = seminfo.semmsl;
+ lim->semmns = seminfo.semmns;
+ lim->semopm = seminfo.semopm;
+ }
+
+ return 0;
+}
+
+int ipc_shm_get_limits(struct ipc_limits *lim)
+{
+ lim->shmmin = SHMMIN;
+
+ if (access(_PATH_PROC_IPC_SHMALL, F_OK) == 0 &&
+ access(_PATH_PROC_IPC_SHMMAX, F_OK) == 0 &&
+ access(_PATH_PROC_IPC_SHMMNI, F_OK) == 0) {
+
+ ul_path_read_u64(NULL, &lim->shmall, _PATH_PROC_IPC_SHMALL);
+ ul_path_read_u64(NULL, &lim->shmmax, _PATH_PROC_IPC_SHMMAX);
+ ul_path_read_u64(NULL, &lim->shmmni, _PATH_PROC_IPC_SHMMNI);
+
+ } else {
+ struct shminfo *shminfo;
+ struct shmid_ds shmbuf;
+
+ if (shmctl(0, IPC_INFO, &shmbuf) < 0)
+ return 1;
+ shminfo = (struct shminfo *) &shmbuf;
+ lim->shmmni = shminfo->shmmni;
+ lim->shmall = shminfo->shmall;
+ lim->shmmax = shminfo->shmmax;
+ }
+
+ return 0;
+}
+
+int ipc_shm_get_info(int id, struct shm_data **shmds)
+{
+ FILE *f;
+ int i = 0, maxid;
+ char buf[BUFSIZ];
+ struct shm_data *p;
+ struct shmid_ds dummy;
+
+ p = *shmds = xcalloc(1, sizeof(struct shm_data));
+ p->next = NULL;
+
+ f = fopen(_PATH_PROC_SYSV_SHM, "r");
+ if (!f)
+ goto shm_fallback;
+
+ while (fgetc(f) != '\n'); /* skip header */
+
+ while (fgets(buf, sizeof(buf), f) != NULL) {
+ /* scan for the first 14-16 columns (e.g. Linux 2.6.32 has 14) */
+ p->shm_rss = 0xdead;
+ p->shm_swp = 0xdead;
+ if (sscanf(buf,
+ "%d %d %o %"SCNu64 " %u %u "
+ "%"SCNu64 " %u %u %u %u %"SCNi64 " %"SCNi64 " %"SCNi64
+ " %"SCNu64 " %"SCNu64 "\n",
+ &p->shm_perm.key,
+ &p->shm_perm.id,
+ &p->shm_perm.mode,
+ &p->shm_segsz,
+ &p->shm_cprid,
+ &p->shm_lprid,
+ &p->shm_nattch,
+ &p->shm_perm.uid,
+ &p->shm_perm.gid,
+ &p->shm_perm.cuid,
+ &p->shm_perm.cgid,
+ &p->shm_atim,
+ &p->shm_dtim,
+ &p->shm_ctim,
+ &p->shm_rss,
+ &p->shm_swp) < 14)
+ continue; /* invalid line, skipped */
+
+ if (id > -1) {
+ /* ID specified */
+ if (id == p->shm_perm.id) {
+ i = 1;
+ break;
+ } else
+ continue;
+ }
+
+ p->next = xcalloc(1, sizeof(struct shm_data));
+ p = p->next;
+ p->next = NULL;
+ i++;
+ }
+
+ if (i == 0)
+ free(*shmds);
+ fclose(f);
+ return i;
+
+ /* Fallback; /proc or /sys file(s) missing. */
+shm_fallback:
+ maxid = shmctl(0, SHM_INFO, &dummy);
+
+ for (int j = 0; j <= maxid; j++) {
+ int shmid;
+ struct shmid_ds shmseg;
+ struct ipc_perm *ipcp = &shmseg.shm_perm;
+
+ shmid = shmctl(j, SHM_STAT, &shmseg);
+ if (shmid < 0 || (id > -1 && shmid != id)) {
+ continue;
+ }
+
+ i++;
+ p->shm_perm.key = ipcp->KEY;
+ p->shm_perm.id = shmid;
+ p->shm_perm.mode = ipcp->mode;
+ p->shm_segsz = shmseg.shm_segsz;
+ p->shm_cprid = shmseg.shm_cpid;
+ p->shm_lprid = shmseg.shm_lpid;
+ p->shm_nattch = shmseg.shm_nattch;
+ p->shm_perm.uid = ipcp->uid;
+ p->shm_perm.gid = ipcp->gid;
+ p->shm_perm.cuid = ipcp->cuid;
+ p->shm_perm.cgid = ipcp->cuid;
+ p->shm_atim = shmseg.shm_atime;
+ p->shm_dtim = shmseg.shm_dtime;
+ p->shm_ctim = shmseg.shm_ctime;
+ p->shm_rss = 0xdead;
+ p->shm_swp = 0xdead;
+
+ if (id < 0) {
+ p->next = xcalloc(1, sizeof(struct shm_data));
+ p = p->next;
+ p->next = NULL;
+ } else
+ break;
+ }
+
+ if (i == 0)
+ free(*shmds);
+ return i;
+}
+
+void ipc_shm_free_info(struct shm_data *shmds)
+{
+ while (shmds) {
+ struct shm_data *next = shmds->next;
+ free(shmds);
+ shmds = next;
+ }
+}
+
+static void get_sem_elements(struct sem_data *p)
+{
+ size_t i;
+
+ if (!p || !p->sem_nsems || p->sem_perm.id < 0)
+ return;
+
+ p->elements = xcalloc(p->sem_nsems, sizeof(struct sem_elem));
+
+ for (i = 0; i < p->sem_nsems; i++) {
+ struct sem_elem *e = &p->elements[i];
+ union semun arg = { .val = 0 };
+
+ e->semval = semctl(p->sem_perm.id, i, GETVAL, arg);
+ if (e->semval < 0)
+ err(EXIT_FAILURE, _("%s failed"), "semctl(GETVAL)");
+
+ e->ncount = semctl(p->sem_perm.id, i, GETNCNT, arg);
+ if (e->ncount < 0)
+ err(EXIT_FAILURE, _("%s failed"), "semctl(GETNCNT)");
+
+ e->zcount = semctl(p->sem_perm.id, i, GETZCNT, arg);
+ if (e->zcount < 0)
+ err(EXIT_FAILURE, _("%s failed"), "semctl(GETZCNT)");
+
+ e->pid = semctl(p->sem_perm.id, i, GETPID, arg);
+ if (e->pid < 0)
+ err(EXIT_FAILURE, _("%s failed"), "semctl(GETPID)");
+ }
+}
+
+int ipc_sem_get_info(int id, struct sem_data **semds)
+{
+ FILE *f;
+ int i = 0, maxid;
+ struct sem_data *p;
+ struct seminfo dummy;
+ union semun arg;
+
+ p = *semds = xcalloc(1, sizeof(struct sem_data));
+ p->next = NULL;
+
+ f = fopen(_PATH_PROC_SYSV_SEM, "r");
+ if (!f)
+ goto sem_fallback;
+
+ while (fgetc(f) != '\n') ; /* skip header */
+
+ while (feof(f) == 0) {
+ if (fscanf(f,
+ "%d %d %o %" SCNu64 " %u %u %u %u %"
+ SCNi64 " %" SCNi64 "\n",
+ &p->sem_perm.key,
+ &p->sem_perm.id,
+ &p->sem_perm.mode,
+ &p->sem_nsems,
+ &p->sem_perm.uid,
+ &p->sem_perm.gid,
+ &p->sem_perm.cuid,
+ &p->sem_perm.cgid,
+ &p->sem_otime,
+ &p->sem_ctime) != 10)
+ continue;
+
+ if (id > -1) {
+ /* ID specified */
+ if (id == p->sem_perm.id) {
+ get_sem_elements(p);
+ i = 1;
+ break;
+ } else
+ continue;
+ }
+
+ p->next = xcalloc(1, sizeof(struct sem_data));
+ p = p->next;
+ p->next = NULL;
+ i++;
+ }
+
+ if (i == 0)
+ free(*semds);
+ fclose(f);
+ return i;
+
+ /* Fallback; /proc or /sys file(s) missing. */
+sem_fallback:
+ arg.array = (ushort *) (void *)&dummy;
+ maxid = semctl(0, 0, SEM_INFO, arg);
+
+ for (int j = 0; j <= maxid; j++) {
+ int semid;
+ struct semid_ds semseg;
+ struct ipc_perm *ipcp = &semseg.sem_perm;
+ arg.buf = (struct semid_ds *)&semseg;
+
+ semid = semctl(j, 0, SEM_STAT, arg);
+ if (semid < 0 || (id > -1 && semid != id)) {
+ continue;
+ }
+
+ i++;
+ p->sem_perm.key = ipcp->KEY;
+ p->sem_perm.id = semid;
+ p->sem_perm.mode = ipcp->mode;
+ p->sem_nsems = semseg.sem_nsems;
+ p->sem_perm.uid = ipcp->uid;
+ p->sem_perm.gid = ipcp->gid;
+ p->sem_perm.cuid = ipcp->cuid;
+ p->sem_perm.cgid = ipcp->cuid;
+ p->sem_otime = semseg.sem_otime;
+ p->sem_ctime = semseg.sem_ctime;
+
+ if (id < 0) {
+ p->next = xcalloc(1, sizeof(struct sem_data));
+ p = p->next;
+ p->next = NULL;
+ i++;
+ } else {
+ get_sem_elements(p);
+ break;
+ }
+ }
+
+ if (i == 0)
+ free(*semds);
+ return i;
+}
+
+void ipc_sem_free_info(struct sem_data *semds)
+{
+ while (semds) {
+ struct sem_data *next = semds->next;
+ free(semds->elements);
+ free(semds);
+ semds = next;
+ }
+}
+
+int ipc_msg_get_info(int id, struct msg_data **msgds)
+{
+ FILE *f;
+ int i = 0, maxid;
+ struct msg_data *p;
+ struct msqid_ds dummy;
+ struct msqid_ds msgseg;
+
+ p = *msgds = xcalloc(1, sizeof(struct msg_data));
+ p->next = NULL;
+
+ f = fopen(_PATH_PROC_SYSV_MSG, "r");
+ if (!f)
+ goto msg_fallback;
+
+ while (fgetc(f) != '\n') ; /* skip header */
+
+ while (feof(f) == 0) {
+ if (fscanf(f,
+ "%d %d %o %" SCNu64 " %" SCNu64
+ " %u %u %u %u %u %u %" SCNi64 " %" SCNi64 " %" SCNi64 "\n",
+ &p->msg_perm.key,
+ &p->msg_perm.id,
+ &p->msg_perm.mode,
+ &p->q_cbytes,
+ &p->q_qnum,
+ &p->q_lspid,
+ &p->q_lrpid,
+ &p->msg_perm.uid,
+ &p->msg_perm.gid,
+ &p->msg_perm.cuid,
+ &p->msg_perm.cgid,
+ &p->q_stime,
+ &p->q_rtime,
+ &p->q_ctime) != 14)
+ continue;
+
+ if (id > -1) {
+ /* ID specified */
+ if (id == p->msg_perm.id) {
+ if (msgctl(id, IPC_STAT, &msgseg) != -1)
+ p->q_qbytes = msgseg.msg_qbytes;
+ i = 1;
+ break;
+ } else
+ continue;
+ }
+
+ p->next = xcalloc(1, sizeof(struct msg_data));
+ p = p->next;
+ p->next = NULL;
+ i++;
+ }
+
+ if (i == 0)
+ free(*msgds);
+ fclose(f);
+ return i;
+
+ /* Fallback; /proc or /sys file(s) missing. */
+msg_fallback:
+ maxid = msgctl(0, MSG_INFO, &dummy);
+
+ for (int j = 0; j <= maxid; j++) {
+ int msgid;
+ struct ipc_perm *ipcp = &msgseg.msg_perm;
+
+ msgid = msgctl(j, MSG_STAT, &msgseg);
+ if (msgid < 0 || (id > -1 && msgid != id)) {
+ continue;
+ }
+
+ i++;
+ p->msg_perm.key = ipcp->KEY;
+ p->msg_perm.id = msgid;
+ p->msg_perm.mode = ipcp->mode;
+ p->q_cbytes = msgseg.msg_cbytes;
+ p->q_qnum = msgseg.msg_qnum;
+ p->q_lspid = msgseg.msg_lspid;
+ p->q_lrpid = msgseg.msg_lrpid;
+ p->msg_perm.uid = ipcp->uid;
+ p->msg_perm.gid = ipcp->gid;
+ p->msg_perm.cuid = ipcp->cuid;
+ p->msg_perm.cgid = ipcp->cgid;
+ p->q_stime = msgseg.msg_stime;
+ p->q_rtime = msgseg.msg_rtime;
+ p->q_ctime = msgseg.msg_ctime;
+ p->q_qbytes = msgseg.msg_qbytes;
+
+ if (id < 0) {
+ p->next = xcalloc(1, sizeof(struct msg_data));
+ p = p->next;
+ p->next = NULL;
+ } else
+ break;
+ }
+
+ if (i == 0)
+ free(*msgds);
+ return i;
+}
+
+void ipc_msg_free_info(struct msg_data *msgds)
+{
+ while (msgds) {
+ struct msg_data *next = msgds->next;
+ free(msgds);
+ msgds = next;
+ }
+}
+
+void ipc_print_perms(FILE *f, struct ipc_stat *is)
+{
+ struct passwd *pw;
+ struct group *gr;
+
+ fprintf(f, "%-10d %-10o", is->id, is->mode & 0777);
+
+ if ((pw = getpwuid(is->cuid)))
+ fprintf(f, " %-10s", pw->pw_name);
+ else
+ fprintf(f, " %-10u", is->cuid);
+
+ if ((gr = getgrgid(is->cgid)))
+ fprintf(f, " %-10s", gr->gr_name);
+ else
+ fprintf(f, " %-10u", is->cgid);
+
+ if ((pw = getpwuid(is->uid)))
+ fprintf(f, " %-10s", pw->pw_name);
+ else
+ fprintf(f, " %-10u", is->uid);
+
+ if ((gr = getgrgid(is->gid)))
+ fprintf(f, " %-10s\n", gr->gr_name);
+ else
+ fprintf(f, " %-10u\n", is->gid);
+}
+
+void ipc_print_size(int unit, char *msg, uint64_t size, const char *end,
+ int width)
+{
+ char format[32];
+
+ if (!msg)
+ /* NULL */ ;
+ else if (msg[strlen(msg) - 1] == '=')
+ printf("%s", msg);
+ else if (unit == IPC_UNIT_BYTES)
+ printf(_("%s (bytes) = "), msg);
+ else if (unit == IPC_UNIT_KB)
+ printf(_("%s (kbytes) = "), msg);
+ else
+ printf("%s = ", msg);
+
+ switch (unit) {
+ case IPC_UNIT_DEFAULT:
+ case IPC_UNIT_BYTES:
+ sprintf(format, "%%%dju", width);
+ printf(format, size);
+ break;
+ case IPC_UNIT_KB:
+ sprintf(format, "%%%dju", width);
+ printf(format, size / 1024);
+ break;
+ case IPC_UNIT_HUMAN:
+ {
+ char *tmp;
+ sprintf(format, "%%%ds", width);
+ printf(format, (tmp = size_to_human_string(SIZE_SUFFIX_1LETTER, size)));
+ free(tmp);
+ break;
+ }
+ default:
+ /* impossible occurred */
+ abort();
+ }
+
+ if (end)
+ printf("%s", end);
+}
diff --git a/sys-utils/ipcutils.h b/sys-utils/ipcutils.h
new file mode 100644
index 0000000..db85f57
--- /dev/null
+++ b/sys-utils/ipcutils.h
@@ -0,0 +1,187 @@
+#ifndef UTIL_LINUX_IPCUTILS_H
+#define UTIL_LINUX_IPCUTILS_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/ipc.h>
+#include <sys/msg.h>
+#include <sys/sem.h>
+#include <sys/shm.h>
+#include <sys/types.h>
+#include <time.h>
+#include <unistd.h>
+#include <grp.h>
+#include <pwd.h>
+#include <stdint.h>
+
+/*
+ * SHM_DEST and SHM_LOCKED are defined in kernel headers, but inside
+ * #ifdef __KERNEL__ ... #endif
+ */
+#ifndef SHM_DEST
+ /* shm_mode upper byte flags */
+# define SHM_DEST 01000 /* segment will be destroyed on last detach */
+# define SHM_LOCKED 02000 /* segment will not be swapped */
+#endif
+
+/* For older kernels the same holds for the defines below */
+#ifndef MSG_STAT
+# define MSG_STAT 11
+# define MSG_INFO 12
+#endif
+
+#ifndef SHM_STAT
+# define SHM_STAT 13
+# define SHM_INFO 14
+struct shm_info {
+ int used_ids;
+ unsigned long shm_tot; /* total allocated shm */
+ unsigned long shm_rss; /* total resident shm */
+ unsigned long shm_swp; /* total swapped shm */
+ unsigned long swap_attempts;
+ unsigned long swap_successes;
+};
+#endif
+
+#ifndef SEM_STAT
+# define SEM_STAT 18
+# define SEM_INFO 19
+#endif
+
+/* Some versions of libc only define IPC_INFO when __USE_GNU is defined. */
+#ifndef IPC_INFO
+# define IPC_INFO 3
+#endif
+
+/*
+ * * The last arg of semctl is a union semun, but where is it defined? X/OPEN
+ * * tells us to define it ourselves, but until recently Linux include files
+ * * would also define it.
+ * */
+#ifndef HAVE_UNION_SEMUN
+/* according to X/OPEN we have to define it ourselves */
+union semun {
+ int val;
+ struct semid_ds *buf;
+ unsigned short int *array;
+ struct seminfo *__buf;
+};
+#endif
+
+/*
+ * X/OPEN (Jan 1987) does not define fields key, seq in struct ipc_perm;
+ * glibc-1.09 has no support for sysv ipc.
+ * glibc 2 uses __key, __seq
+ */
+#if defined (__GLIBC__) && __GLIBC__ >= 2
+# define KEY __key
+#else
+# define KEY key
+#endif
+
+/* Size printing in ipcs is using these. */
+enum {
+ IPC_UNIT_DEFAULT,
+ IPC_UNIT_BYTES,
+ IPC_UNIT_KB,
+ IPC_UNIT_HUMAN
+};
+
+struct ipc_limits {
+ uint64_t shmmni; /* max number of segments */
+ uint64_t shmmax; /* max segment size */
+ uint64_t shmall; /* max total shared memory */
+ uint64_t shmmin; /* min segment size */
+
+ int semmni; /* max number of arrays */
+ int semmsl; /* max semaphores per array */
+ int semmns; /* max semaphores system wide */
+ int semopm; /* max ops per semop call */
+ unsigned int semvmx; /* semaphore max value (constant) */
+
+ int msgmni; /* max queues system wide */
+ uint64_t msgmax; /* max size of message */
+ int msgmnb; /* default max size of queue */
+};
+
+extern int ipc_msg_get_limits(struct ipc_limits *lim);
+extern int ipc_sem_get_limits(struct ipc_limits *lim);
+extern int ipc_shm_get_limits(struct ipc_limits *lim);
+
+struct ipc_stat {
+ int id;
+ key_t key;
+ uid_t uid; /* current uid */
+ gid_t gid; /* current gid */
+ uid_t cuid; /* creator uid */
+ gid_t cgid; /* creator gid */
+ unsigned int mode;
+};
+
+extern void ipc_print_perms(FILE *f, struct ipc_stat *is);
+extern void ipc_print_size(int unit, char *msg, uint64_t size, const char *end, int width);
+
+/* See 'struct shmid_kernel' in kernel sources
+ */
+struct shm_data {
+ struct ipc_stat shm_perm;
+
+ uint64_t shm_nattch;
+ uint64_t shm_segsz;
+ int64_t shm_atim; /* __kernel_time_t is signed long */
+ int64_t shm_dtim;
+ int64_t shm_ctim;
+ pid_t shm_cprid;
+ pid_t shm_lprid;
+ uint64_t shm_rss;
+ uint64_t shm_swp;
+
+ struct shm_data *next;
+};
+
+extern int ipc_shm_get_info(int id, struct shm_data **shmds);
+extern void ipc_shm_free_info(struct shm_data *shmds);
+
+/* See 'struct sem_array' in kernel sources
+ */
+struct sem_elem {
+ int semval;
+ int ncount; /* processes waiting on increase semval */
+ int zcount; /* processes waiting on semval set to zero */
+ pid_t pid; /* process last executed semop(2) call */
+};
+struct sem_data {
+ struct ipc_stat sem_perm;
+
+ int64_t sem_ctime;
+ int64_t sem_otime;
+ uint64_t sem_nsems;
+
+ struct sem_elem *elements;
+ struct sem_data *next;
+};
+
+extern int ipc_sem_get_info(int id, struct sem_data **semds);
+extern void ipc_sem_free_info(struct sem_data *semds);
+
+/* See 'struct msg_queue' in kernel sources
+ */
+struct msg_data {
+ struct ipc_stat msg_perm;
+
+ int64_t q_stime;
+ int64_t q_rtime;
+ int64_t q_ctime;
+ uint64_t q_cbytes;
+ uint64_t q_qnum;
+ uint64_t q_qbytes;
+ pid_t q_lspid;
+ pid_t q_lrpid;
+
+ struct msg_data *next;
+};
+
+extern int ipc_msg_get_info(int id, struct msg_data **msgds);
+extern void ipc_msg_free_info(struct msg_data *msgds);
+
+#endif /* UTIL_LINUX_IPCUTILS_H */
diff --git a/sys-utils/ldattach.8 b/sys-utils/ldattach.8
new file mode 100644
index 0000000..1b4683d
--- /dev/null
+++ b/sys-utils/ldattach.8
@@ -0,0 +1,155 @@
+.\" Copyright 2008 Tilman Schmidt (tilman@imap.cc)
+.\" May be distributed under the GNU General Public License version 2 or later
+.TH LDATTACH 8 "July 2014" "util-linux" "System Administration"
+.SH NAME
+ldattach \- attach a line discipline to a serial line
+.SH SYNOPSIS
+.B ldattach
+.RB [ \-1278denoVh ]
+.RB [ \-i
+.IR iflag ]
+.RB [ \-s
+.IR speed ]
+.I ldisc device
+.SH DESCRIPTION
+The
+.B ldattach
+daemon opens the specified
+.I device
+file
+(which should refer to a serial device)
+and attaches the line discipline
+.I ldisc
+to it for processing of the sent and/or received data.
+It then goes into the background keeping the device open so that the
+line discipline stays loaded.
+.sp
+The line discipline
+.I ldisc
+may be specified either by name
+or by number.
+.sp
+In order to detach the line discipline,
+.BR kill (1)
+the
+.B ldattach
+process.
+.sp
+With no arguments,
+.B ldattach
+prints usage information.
+.SH LINE DISCIPLINES
+Depending on the kernel release, the following line disciplines are supported:
+.TP
+.BR TTY ( 0 )
+The default line discipline,
+providing transparent operation (raw mode)
+as well as the habitual terminal line editing capabilities (cooked mode).
+.TP
+.BR SLIP ( 1 )
+Serial Line IP (SLIP) protocol processor
+for transmitting TCP/IP packets over serial lines.
+.TP
+.BR MOUSE ( 2 )
+Device driver for RS232 connected pointing devices (serial mice).
+.TP
+.BR PPP ( 3 )
+Point to Point Protocol (PPP) processor
+for transmitting network packets over serial lines.
+.TP
+.BR STRIP ( 4 )
+.TP
+.BR AX25 ( 5 )
+.TP
+.BR X25 ( 6 )
+Line driver for transmitting X.25 packets over asynchronous serial lines.
+.TP
+.BR 6PACK ( 7 )
+.TP
+.BR R3964 ( 9 )
+Driver for Simatic R3964 module.
+.TP
+.BR IRDA ( 11 )
+Linux IrDa (infrared data transmission) driver -
+see http://irda.sourceforge.net/
+.TP
+.BR HDLC ( 13 )
+Synchronous HDLC driver.
+.TP
+.BR SYNC_PPP ( 14 )
+Synchronous PPP driver.
+.TP
+.BR HCI ( 15 )
+Bluetooth HCI UART driver.
+.TP
+.BR GIGASET_M101 ( 16 )
+Driver for Siemens Gigaset M101 serial DECT adapter.
+.TP
+.BR PPS ( 18 )
+Driver for serial line Pulse Per Second (PPS) source.
+.TP
+.BR GSM0710 ( 21 )
+Driver for GSM 07.10 multiplexing protocol modem (CMUX).
+.SH OPTIONS
+.TP
+.BR \-1 , " \-\-onestopbit"
+Set the number of stop bits of the serial line to one.
+.TP
+.BR \-2 , " \-\-twostopbits"
+Set the number of stop bits of the serial line to two.
+.TP
+.BR \-7 , " \-\-sevenbits"
+Set the character size of the serial line to 7 bits.
+.TP
+.BR \-8 , " \-\-eightbits"
+Set the character size of the serial line to 8 bits.
+.TP
+.BR \-d , " \-\-debug"
+Keep
+.B ldattach
+in the foreground so that it can be interrupted or debugged,
+and to print verbose messages about its progress to standard error output.
+.TP
+.BR \-e , " \-\-evenparity"
+Set the parity of the serial line to even.
+.TP
+.BR -i , " --iflag " [ \- ] \fIvalue\fR...
+Set the specified bits in the c_iflag word of the serial line.
+The given \fIvalue\fP may be a number or a symbolic name.
+If \fIvalue\fP is prefixed by a minus sign, the specified bits are cleared
+instead. Several comma-separated values may be given in order to
+set and clear multiple bits.
+.TP
+.BR \-n , " \-\-noparity"
+Set the parity of the serial line to none.
+.TP
+.BR \-o , " \-\-oddparity"
+Set the parity of the serial line to odd.
+.TP
+.BR \-s , " \-\-speed " \fIvalue
+Set the speed (the baud rate) of the serial line to the specified \fIvalue\fR.
+.TP
+.BR \-c , " \-\-intro\-command " \fIstring
+Define an intro command that is sent through the serial line before the invocation
+of ldattach. E.g. in conjunction with line discipline GSM0710, the command
+\'AT+CMUX=0\\r\' is commonly suitable to switch the modem into the CMUX mode.
+.TP
+.BR \-p , " \-\-pause " \fIvalue
+Sleep for \fIvalue\fR seconds before the invocation of ldattach. Default is one second.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH "SEE ALSO"
+.BR inputattach (1),
+.BR ttys (4)
+.SH AUTHOR
+.nf
+Tilman Schmidt (tilman@imap.cc)
+.fi
+.SH AVAILABILITY
+The ldattach command is part of the util-linux package
+and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/ldattach.c b/sys-utils/ldattach.c
new file mode 100644
index 0000000..d33d685
--- /dev/null
+++ b/sys-utils/ldattach.c
@@ -0,0 +1,489 @@
+/* line discipline loading daemon
+ * open a serial device and attach a line discipline on it
+ *
+ * Usage:
+ * ldattach GIGASET_M101 /dev/ttyS0
+ *
+ * =====================================================================
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ * =====================================================================
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <termios.h>
+#include <unistd.h>
+
+#include "c.h"
+#include "all-io.h"
+#include "nls.h"
+#include "strutils.h"
+#include "closestream.h"
+
+#include <signal.h>
+#include <sys/socket.h>
+#include <linux/if.h>
+
+#include <linux/tty.h> /* for N_GSM0710 */
+
+#ifdef LINUX_GSMMUX_H
+# include <linux/gsmmux.h> /* Add by guowenxue */
+#else
+struct gsm_config
+{
+ unsigned int adaption;
+ unsigned int encapsulation;
+ unsigned int initiator;
+ unsigned int t1;
+ unsigned int t2;
+ unsigned int t3;
+ unsigned int n2;
+ unsigned int mru;
+ unsigned int mtu;
+ unsigned int k;
+ unsigned int i;
+ unsigned int unused[8]; /* Padding for expansion without
+ breaking stuff */
+};
+# define GSMIOC_GETCONF _IOR('G', 0, struct gsm_config)
+# define GSMIOC_SETCONF _IOW('G', 1, struct gsm_config)
+#endif
+
+#ifndef N_GIGASET_M101
+# define N_GIGASET_M101 16
+#endif
+
+#ifndef N_PPS
+# define N_PPS 18
+#endif
+
+#ifndef N_GSM0710
+# define N_GSM0710 21
+#endif
+
+#define MAXINTROPARMLEN 32
+
+/* attach a line discipline ioctl */
+#ifndef TIOCSETD
+# define TIOCSETD 0x5423
+#endif
+
+static int debug = 0;
+
+struct ld_table {
+ const char *name;
+ int value;
+};
+
+/* currently supported line disciplines, plus some aliases */
+static const struct ld_table ld_discs[] = {
+ { "TTY", N_TTY },
+ { "SLIP", N_SLIP },
+ { "MOUSE", N_MOUSE },
+ { "PPP", N_PPP },
+ { "STRIP", N_STRIP },
+ { "AX25", N_AX25 },
+ { "X25", N_X25 },
+ { "6PACK", N_6PACK },
+ { "R3964", N_R3964 },
+ { "IRDA", N_IRDA },
+ { "HDLC", N_HDLC },
+ { "SYNC_PPP", N_SYNC_PPP },
+ { "SYNCPPP", N_SYNC_PPP },
+ { "HCI", N_HCI },
+ { "GIGASET_M101", N_GIGASET_M101 },
+ { "M101", N_GIGASET_M101 },
+ { "GIGASET", N_GIGASET_M101 },
+ { "PPS", N_PPS },
+ { "GSM0710", N_GSM0710},
+ { NULL, 0 }
+};
+
+/* known c_iflag names */
+static const struct ld_table ld_iflags[] =
+{
+ { "IGNBRK", IGNBRK },
+ { "BRKINT", BRKINT },
+ { "IGNPAR", IGNPAR },
+ { "PARMRK", PARMRK },
+ { "INPCK", INPCK },
+ { "ISTRIP", ISTRIP },
+ { "INLCR", INLCR },
+ { "IGNCR", IGNCR },
+ { "ICRNL", ICRNL },
+ { "IUCLC", IUCLC },
+ { "IXON", IXON },
+ { "IXANY", IXANY },
+ { "IXOFF", IXOFF },
+ { "IMAXBEL", IMAXBEL },
+ { "IUTF8", IUTF8 },
+ { NULL, 0 }
+};
+
+static void dbg(char *fmt, ...)
+{
+ va_list args;
+
+ if (debug == 0)
+ return;
+ fflush(NULL);
+ va_start(args, fmt);
+#ifdef HAVE_VWARNX
+ vwarnx(fmt, args);
+#else
+ fprintf(stderr, "%s: ", program_invocation_short_name);
+ vfprintf(stderr, fmt, args);
+ fprintf(stderr, "\n");
+#endif
+ va_end(args);
+ fflush(NULL);
+ return;
+}
+
+static int lookup_table(const struct ld_table *tab, const char *str)
+{
+ const struct ld_table *t;
+
+ for (t = tab; t && t->name; t++)
+ if (!strcasecmp(t->name, str))
+ return t->value;
+ return -1;
+}
+
+static void print_table(FILE * out, const struct ld_table *tab)
+{
+ const struct ld_table *t;
+ int i;
+
+ for (t = tab, i = 1; t && t->name; t++, i++) {
+ fprintf(out, " %-12s", t->name);
+ if (!(i % 5))
+ fputc('\n', out);
+ }
+}
+
+static int parse_iflag(char *str, int *set_iflag, int *clr_iflag)
+{
+ int iflag;
+ char *s;
+
+ for (s = strtok(str, ","); s != NULL; s = strtok(NULL, ",")) {
+ if (*s == '-')
+ s++;
+ if ((iflag = lookup_table(ld_iflags, s)) < 0)
+ iflag = strtos32_or_err(s, _("invalid iflag"));
+ if (s > str && *(s - 1) == '-')
+ *clr_iflag |= iflag;
+ else
+ *set_iflag |= iflag;
+ }
+ dbg("iflag (set/clear): %d/%d", *set_iflag, *clr_iflag);
+ return 0;
+}
+
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+
+ fputs(USAGE_HEADER, out);
+ fprintf(out, _(" %s [options] <ldisc> <device>\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Attach a line discipline to a serial line.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -d, --debug print verbose messages to stderr\n"), out);
+ fputs(_(" -s, --speed <value> set serial line speed\n"), out);
+ fputs(_(" -c, --intro-command <string> intro sent before ldattach\n"), out);
+ fputs(_(" -p, --pause <seconds> pause between intro and ldattach\n"), out);
+ fputs(_(" -7, --sevenbits set character size to 7 bits\n"), out);
+ fputs(_(" -8, --eightbits set character size to 8 bits\n"), out);
+ fputs(_(" -n, --noparity set parity to none\n"), out);
+ fputs(_(" -e, --evenparity set parity to even\n"), out);
+ fputs(_(" -o, --oddparity set parity to odd\n"), out);
+ fputs(_(" -1, --onestopbit set stop bits to one\n"), out);
+ fputs(_(" -2, --twostopbits set stop bits to two\n"), out);
+ fputs(_(" -i, --iflag [-]<iflag> set input mode flag\n"), out);
+
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(25));
+
+ fputs(_("\nKnown <ldisc> names:\n"), out);
+ print_table(out, ld_discs);
+ fputs(USAGE_SEPARATOR, out);
+
+ fputs(_("\nKnown <iflag> names:\n"), out);
+ print_table(out, ld_iflags);
+
+ printf(USAGE_MAN_TAIL("ldattach(8)"));
+ exit(EXIT_SUCCESS);
+}
+
+static int my_cfsetspeed(struct termios *ts, int speed)
+{
+ /* Standard speeds
+ * -- cfsetspeed() is able to translate number to Bxxx constants
+ */
+ if (cfsetspeed(ts, speed) == 0)
+ return 0;
+
+ /* Nonstandard speeds
+ * -- we have to bypass glibc and set the speed manually (because glibc
+ * checks for speed and supports Bxxx bit rates only)...
+ */
+#ifdef _HAVE_STRUCT_TERMIOS_C_ISPEED
+# define BOTHER 0010000 /* non standard rate */
+ dbg("using non-standard speeds");
+ ts->c_ospeed = ts->c_ispeed = speed;
+ ts->c_cflag &= ~CBAUD;
+ ts->c_cflag |= BOTHER;
+ return 0;
+#else
+ return -1;
+#endif
+}
+
+static void handler(int s)
+{
+ dbg("got SIG %i -> exiting", s);
+ exit(EXIT_SUCCESS);
+}
+
+static void gsm0710_set_conf(int tty_fd)
+{
+ struct gsm_config c;
+
+ /* Add by guowenxue */
+ /* get n_gsm configuration */
+ ioctl(tty_fd, GSMIOC_GETCONF, &c);
+ /* we are initiator and need encoding 0 (basic) */
+ c.initiator = 1;
+ c.encapsulation = 0;
+ /* our modem defaults to a maximum size of 127 bytes */
+ c.mru = 127;
+ c.mtu = 127;
+ /* set the new configuration */
+ ioctl(tty_fd, GSMIOC_SETCONF, &c);
+ /* Add by guowenxue end*/
+}
+
+int main(int argc, char **argv)
+{
+ int tty_fd;
+ struct termios ts;
+ int speed = 0, bits = '-', parity = '-', stop = '-';
+ int set_iflag = 0, clr_iflag = 0;
+ int ldisc;
+ int optc;
+ char *dev;
+ int intropause = 1;
+ char *introparm = NULL;
+
+ static const struct option opttbl[] = {
+ {"speed", required_argument, NULL, 's'},
+ {"sevenbits", no_argument, NULL, '7'},
+ {"eightbits", no_argument, NULL, '8'},
+ {"noparity", no_argument, NULL, 'n'},
+ {"evenparity", no_argument, NULL, 'e'},
+ {"oddparity", no_argument, NULL, 'o'},
+ {"onestopbit", no_argument, NULL, '1'},
+ {"twostopbits", no_argument, NULL, '2'},
+ {"iflag", required_argument, NULL, 'i'},
+ {"help", no_argument, NULL, 'h'},
+ {"version", no_argument, NULL, 'V'},
+ {"debug", no_argument, NULL, 'd'},
+ {"intro-command", no_argument, NULL, 'c'},
+ {"pause", no_argument, NULL, 'p'},
+ {NULL, 0, NULL, 0}
+ };
+
+ signal(SIGKILL, handler);
+ signal(SIGINT, handler);
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ /* parse options */
+ if (argc == 0)
+ errx(EXIT_FAILURE, _("bad usage"));
+
+ while ((optc =
+ getopt_long(argc, argv, "dhV78neo12s:i:c:p:", opttbl,
+ NULL)) >= 0) {
+ switch (optc) {
+ case 'd':
+ debug = 1;
+ break;
+ case '1':
+ case '2':
+ stop = optc;
+ break;
+ case '7':
+ case '8':
+ bits = optc;
+ break;
+ case 'n':
+ case 'e':
+ case 'o':
+ parity = optc;
+ break;
+ case 's':
+ speed = strtos32_or_err(optarg, _("invalid speed argument"));
+ break;
+ case 'p':
+ intropause = strtou32_or_err(optarg, _("invalid pause argument"));
+ if (intropause > 10)
+ errx(EXIT_FAILURE, "invalid pause: %s", optarg);
+ break;
+ case 'c':
+ introparm = optarg;
+ break;
+ case 'i':
+ parse_iflag(optarg, &set_iflag, &clr_iflag);
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ case 'h':
+ usage();
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if (argc - optind != 2) {
+ warnx(_("not enough arguments"));
+ errtryhelp(EXIT_FAILURE);
+ }
+ /* parse line discipline specification */
+ ldisc = lookup_table(ld_discs, argv[optind]);
+ if (ldisc < 0)
+ ldisc = strtos32_or_err(argv[optind], _("invalid line discipline argument"));
+
+ /* ldisc specific option settings */
+ if (ldisc == N_GIGASET_M101) {
+ /* device specific defaults for line speed and data format */
+ if (speed == 0)
+ speed = 115200;
+ if (bits == '-')
+ bits = '8';
+ if (parity == '-')
+ parity = 'n';
+ if (stop == '-')
+ stop = '1';
+ }
+
+ /* open device */
+ dev = argv[optind + 1];
+ if ((tty_fd = open(dev, O_RDWR | O_NOCTTY)) < 0)
+ err(EXIT_FAILURE, _("cannot open %s"), dev);
+ if (!isatty(tty_fd))
+ errx(EXIT_FAILURE, _("%s is not a serial line"), dev);
+
+ dbg("opened %s", dev);
+
+ /* set line speed and format */
+ if (tcgetattr(tty_fd, &ts) < 0)
+ err(EXIT_FAILURE,
+ _("cannot get terminal attributes for %s"), dev);
+ cfmakeraw(&ts);
+ if (speed && my_cfsetspeed(&ts, speed) < 0)
+ errx(EXIT_FAILURE, _("speed %d unsupported"), speed);
+
+ switch (stop) {
+ case '1':
+ ts.c_cflag &= ~CSTOPB;
+ break;
+ case '2':
+ ts.c_cflag |= CSTOPB;
+ break;
+ case '-':
+ break;
+ default:
+ abort();
+ }
+ switch (bits) {
+ case '7':
+ ts.c_cflag = (ts.c_cflag & ~CSIZE) | CS7;
+ break;
+ case '8':
+ ts.c_cflag = (ts.c_cflag & ~CSIZE) | CS8;
+ break;
+ case '-':
+ break;
+ default:
+ abort();
+ }
+ switch (parity) {
+ case 'n':
+ ts.c_cflag &= ~(PARENB | PARODD);
+ break;
+ case 'e':
+ ts.c_cflag |= PARENB;
+ ts.c_cflag &= ~PARODD;
+ break;
+ case 'o':
+ ts.c_cflag |= (PARENB | PARODD);
+ break;
+ case '-':
+ break;
+ default:
+ abort();
+ }
+
+ ts.c_cflag |= CREAD; /* just to be on the safe side */
+ ts.c_iflag |= set_iflag;
+ ts.c_iflag &= ~clr_iflag;
+
+ if (tcsetattr(tty_fd, TCSAFLUSH, &ts) < 0)
+ err(EXIT_FAILURE,
+ _("cannot set terminal attributes for %s"), dev);
+
+ dbg("set to raw %d %c%c%c: cflag=0x%x",
+ speed, bits, parity, stop, ts.c_cflag);
+
+ if (introparm && *introparm)
+ {
+ dbg("intro command is '%s'", introparm);
+ if (write_all(tty_fd, introparm, strlen(introparm)) != 0)
+ err(EXIT_FAILURE,
+ _("cannot write intro command to %s"), dev);
+
+ if (intropause) {
+ dbg("waiting for %d seconds", intropause);
+ sleep(intropause);
+ }
+ }
+
+ /* Attach the line discipline. */
+ if (ioctl(tty_fd, TIOCSETD, &ldisc) < 0)
+ err(EXIT_FAILURE, _("cannot set line discipline"));
+
+ dbg("line discipline set to %d", ldisc);
+
+ /* ldisc specific post-attach actions */
+ if (ldisc == N_GSM0710)
+ gsm0710_set_conf(tty_fd);
+
+ /* Go into background if not in debug mode. */
+ if (!debug && daemon(0, 0) < 0)
+ err(EXIT_FAILURE, _("cannot daemonize"));
+
+ /* Sleep to keep the line discipline active. */
+ pause();
+
+ exit(EXIT_SUCCESS);
+}
diff --git a/sys-utils/losetup.8 b/sys-utils/losetup.8
new file mode 100644
index 0000000..c31d747
--- /dev/null
+++ b/sys-utils/losetup.8
@@ -0,0 +1,208 @@
+.TH LOSETUP 8 "November 2015" "util-linux" "System Administration"
+.SH NAME
+losetup \- set up and control loop devices
+.SH SYNOPSIS
+.ad l
+Get info:
+.sp
+.in +5
+.B losetup
+[\fIloopdev\fP]
+.sp
+.B losetup -l
+.RB [ \-a ]
+.sp
+.B losetup -j
+.I file
+.RB [ \-o
+.IR offset ]
+.sp
+.in -5
+Detach a loop device:
+.sp
+.in +5
+.B "losetup \-d"
+.IR loopdev ...
+.sp
+.in -5
+Detach all associated loop devices:
+.sp
+.in +5
+.B "losetup \-D"
+.sp
+.in -5
+Set up a loop device:
+.sp
+.in +5
+.B losetup
+.RB [ \-o
+.IR offset ]
+.RB [ \-\-sizelimit
+.IR size ]
+.RB [ \-\-sector\-size
+.IR size ]
+.in +8
+.RB [ \-Pr ]
+.RB [ \-\-show ] " \-f" | \fIloopdev\fP
+.I file
+.sp
+.in -13
+Resize a loop device:
+.sp
+.in +5
+.B "losetup \-c"
+.I loopdev
+.in -5
+.ad b
+.SH DESCRIPTION
+.B losetup
+is used to associate loop devices with regular files or block devices,
+to detach loop devices, and to query the status of a loop device. If only the
+\fIloopdev\fP argument is given, the status of the corresponding loop
+device is shown. If no option is given, all loop devices are shown.
+.sp
+Note that the old output format (i.e., \fBlosetup -a\fR) with comma-delimited
+strings is deprecated in favour of the \fB--list\fR output format.
+.sp
+It's possible to create more independent loop devices for the same backing
+file.
+.B This setup may be dangerous, can cause data loss, corruption and overwrites.
+Use \fB\-\-nooverlap\fR with \fB\-\-find\fR during setup to avoid this problem.
+
+.SH OPTIONS
+The \fIsize\fR and \fIoffset\fR
+arguments may be followed by the multiplicative suffixes KiB (=1024),
+MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB" is
+optional, e.g., "K" has the same meaning as "KiB") or the suffixes
+KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB and YB.
+
+.TP
+.BR \-a , " \-\-all"
+Show the status of all loop devices. Note that not all information is accessible
+for non-root users. See also \fB\-\-list\fR. The old output format (as printed
+without \fB--list)\fR is deprecated.
+.TP
+.BR \-d , " \-\-detach " \fIloopdev\fR...
+Detach the file or device associated with the specified loop device(s). Note
+that since Linux v3.7 kernel uses "lazy device destruction". The detach
+operation does not return EBUSY error anymore if device is actively used by
+system, but it is marked by autoclear flag and destroyed later.
+.TP
+.BR \-D , " \-\-detach\-all"
+Detach all associated loop devices.
+.TP
+.BR \-f , " \-\-find " "\fR[\fIfile\fR]"
+Find the first unused loop device. If a \fIfile\fR argument is present, use
+the found device as loop device. Otherwise, just print its name.
+.IP "\fB\-\-show\fP"
+Display the name of the assigned loop device if the \fB\-f\fP option and a
+\fIfile\fP argument are present.
+.TP
+.BR \-L , " \-\-nooverlap"
+Check for conflicts between loop devices to avoid situation when the same
+backing file is shared between more loop devices. If the file is already used
+by another device then re-use the device rather than a new one. The option
+makes sense only with \fB\-\-find\fP.
+.TP
+.BR \-j , " \-\-associated " \fIfile\fR " \fR[\fB\-o \fIoffset\fR]"
+Show the status of all loop devices associated with the given \fIfile\fR.
+.TP
+.BR \-o , " \-\-offset " \fIoffset
+The data start is moved \fIoffset\fP bytes into the specified file or device. The \fIoffset\fP
+may be followed by the multiplicative suffixes; see above.
+.IP "\fB\-\-sizelimit \fIsize\fP"
+The data end is set to no more than \fIsize\fP bytes after the data start. The \fIsize\fP
+may be followed by the multiplicative suffixes; see above.
+.TP
+.BR \-b , " \-\-sector-size " \fIsize
+Set the logical sector size of the loop device in bytes (since Linux 4.14). The
+option may be used when create a new loop device as well as stand-alone command
+to modify sector size of the already existing loop device.
+.TP
+.BR \-c , " \-\-set\-capacity " \fIloopdev
+Force the loop driver to reread the size of the file associated with the
+specified loop device.
+.TP
+.BR \-P , " \-\-partscan"
+Force the kernel to scan the partition table on a newly created loop device.
+.TP
+.BR \-r , " \-\-read\-only"
+Set up a read-only loop device.
+.TP
+.BR \-\-direct\-io [ =on | off ]
+Enable or disable direct I/O for the backing file. The optional argument
+can be either \fBon\fR or \fBoff\fR. If the argument is omitted, it defaults
+to \fBon\fR.
+.TP
+.BR \-v , " \-\-verbose"
+Verbose mode.
+.TP
+.BR \-l , " \-\-list"
+If a loop device or the \fB-a\fR option is specified, print the default columns
+for either the specified loop device or all loop devices; the default is to
+print info about all devices. See also \fB\-\-output\fP, \fB\-\-noheadings\fP,
+\fB\-\-raw\fP, and \fB\-\-json\fP.
+.TP
+.BR \-O , " \-\-output " \fIcolumn\fR[,\fIcolumn\fR]...
+Specify the columns that are to be printed for the \fB\-\-list\fP output.
+Use \fB\-\-help\fR to get a list of all supported columns.
+.TP
+.B \-\-output\-all
+Output all available columns.
+.TP
+.BR \-n , " \-\-noheadings"
+Don't print headings for \fB\-\-list\fP output format.
+.IP "\fB\-\-raw\fP"
+Use the raw \fB\-\-list\fP output format.
+.TP
+.BR \-J , " \-\-json"
+Use JSON format for \fB\-\-list\fP output.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+
+.SH ENCRYPTION
+.B Cryptoloop is no longer supported in favor of dm-crypt.
+.B For more details see cryptsetup(8).
+
+.SH RETURN VALUE
+.B losetup
+returns 0 on success, nonzero on failure. When
+.B losetup
+displays the status of a loop device, it returns 1 if the device
+is not configured and 2 if an error occurred which prevented
+determining the status of the device.
+
+.SH FILES
+.TP
+.I /dev/loop[0..N]
+loop block devices
+.TP
+.I /dev/loop-control
+loop control device
+
+.SH EXAMPLE
+The following commands can be used as an example of using the loop device.
+.nf
+.IP
+# dd if=/dev/zero of=~/file.img bs=1024k count=10
+# losetup --find --show ~/file.img
+/dev/loop0
+# mkfs -t ext2 /dev/loop0
+# mount /dev/loop0 /mnt
+ ...
+# umount /dev/loop0
+# losetup --detach /dev/loop0
+.fi
+.SH ENVIRONMENT
+.IP LOOPDEV_DEBUG=all
+enables debug output.
+.SH AUTHORS
+Karel Zak <kzak@redhat.com>, based on the original version from
+Theodore Ts'o <tytso@athena.mit.edu>
+.SH AVAILABILITY
+The losetup command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/losetup.c b/sys-utils/losetup.c
new file mode 100644
index 0000000..7d14f56
--- /dev/null
+++ b/sys-utils/losetup.c
@@ -0,0 +1,917 @@
+/*
+ * Copyright (C) 2011 Karel Zak <kzak@redhat.com>
+ * Originally from Ted's losetup.c
+ *
+ * losetup.c - setup and control loop devices
+ */
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <inttypes.h>
+#include <getopt.h>
+
+#include <libsmartcols.h>
+
+#include "c.h"
+#include "nls.h"
+#include "strutils.h"
+#include "loopdev.h"
+#include "closestream.h"
+#include "optutils.h"
+#include "xalloc.h"
+#include "canonicalize.h"
+#include "pathnames.h"
+
+enum {
+ A_CREATE = 1, /* setup a new device */
+ A_DELETE, /* delete given device(s) */
+ A_DELETE_ALL, /* delete all devices */
+ A_SHOW, /* list devices */
+ A_SHOW_ONE, /* print info about one device */
+ A_FIND_FREE, /* find first unused */
+ A_SET_CAPACITY, /* set device capacity */
+ A_SET_DIRECT_IO, /* set accessing backing file by direct io */
+ A_SET_BLOCKSIZE, /* set logical block size of the loop device */
+};
+
+enum {
+ COL_NAME = 0,
+ COL_AUTOCLR,
+ COL_BACK_FILE,
+ COL_BACK_INO,
+ COL_BACK_MAJMIN,
+ COL_MAJMIN,
+ COL_OFFSET,
+ COL_PARTSCAN,
+ COL_RO,
+ COL_SIZELIMIT,
+ COL_DIO,
+ COL_LOGSEC,
+};
+
+/* basic output flags */
+static int no_headings;
+static int raw;
+static int json;
+
+struct colinfo {
+ const char *name;
+ double whint;
+ int flags;
+ const char *help;
+
+ int json_type; /* default is string */
+};
+
+static struct colinfo infos[] = {
+ [COL_AUTOCLR] = { "AUTOCLEAR", 1, SCOLS_FL_RIGHT, N_("autoclear flag set"), SCOLS_JSON_BOOLEAN},
+ [COL_BACK_FILE] = { "BACK-FILE", 0.3, 0, N_("device backing file")},
+ [COL_BACK_INO] = { "BACK-INO", 4, SCOLS_FL_RIGHT, N_("backing file inode number"), SCOLS_JSON_NUMBER},
+ [COL_BACK_MAJMIN] = { "BACK-MAJ:MIN", 6, 0, N_("backing file major:minor device number")},
+ [COL_NAME] = { "NAME", 0.25, 0, N_("loop device name")},
+ [COL_OFFSET] = { "OFFSET", 5, SCOLS_FL_RIGHT, N_("offset from the beginning"), SCOLS_JSON_NUMBER},
+ [COL_PARTSCAN] = { "PARTSCAN", 1, SCOLS_FL_RIGHT, N_("partscan flag set"), SCOLS_JSON_BOOLEAN},
+ [COL_RO] = { "RO", 1, SCOLS_FL_RIGHT, N_("read-only device"), SCOLS_JSON_BOOLEAN},
+ [COL_SIZELIMIT] = { "SIZELIMIT", 5, SCOLS_FL_RIGHT, N_("size limit of the file in bytes"), SCOLS_JSON_NUMBER},
+ [COL_MAJMIN] = { "MAJ:MIN", 3, 0, N_("loop device major:minor number")},
+ [COL_DIO] = { "DIO", 1, SCOLS_FL_RIGHT, N_("access backing file with direct-io"), SCOLS_JSON_BOOLEAN},
+ [COL_LOGSEC] = { "LOG-SEC", 4, SCOLS_FL_RIGHT, N_("logical sector size in bytes"), SCOLS_JSON_NUMBER},
+};
+
+static int columns[ARRAY_SIZE(infos) * 2] = {-1};
+static size_t ncolumns;
+
+static int get_column_id(int num)
+{
+ assert(num >= 0);
+ assert((size_t) num < ncolumns);
+ assert(columns[num] < (int) ARRAY_SIZE(infos));
+ return columns[num];
+}
+
+static struct colinfo *get_column_info(int num)
+{
+ return &infos[ get_column_id(num) ];
+}
+
+static int column_name_to_id(const char *name, size_t namesz)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(infos); i++) {
+ const char *cn = infos[i].name;
+
+ if (!strncasecmp(name, cn, namesz) && !*(cn + namesz))
+ return i;
+ }
+ warnx(_("unknown column: %s"), name);
+ return -1;
+}
+
+static int printf_loopdev(struct loopdev_cxt *lc)
+{
+ uint64_t x;
+ dev_t dev = 0;
+ ino_t ino = 0;
+ char *fname;
+ uint32_t type;
+
+ fname = loopcxt_get_backing_file(lc);
+ if (!fname)
+ return -EINVAL;
+
+ if (loopcxt_get_backing_devno(lc, &dev) == 0)
+ loopcxt_get_backing_inode(lc, &ino);
+
+ if (!dev && !ino) {
+ /*
+ * Probably non-root user (no permissions to
+ * call LOOP_GET_STATUS ioctls).
+ */
+ printf("%s: []: (%s)",
+ loopcxt_get_device(lc), fname);
+
+ if (loopcxt_get_offset(lc, &x) == 0 && x)
+ printf(_(", offset %ju"), x);
+
+ if (loopcxt_get_sizelimit(lc, &x) == 0 && x)
+ printf(_(", sizelimit %ju"), x);
+ goto done;
+ }
+
+ printf("%s: [%04d]:%" PRIu64 " (%s)",
+ loopcxt_get_device(lc), (int) dev, ino, fname);
+
+ if (loopcxt_get_offset(lc, &x) == 0 && x)
+ printf(_(", offset %ju"), x);
+
+ if (loopcxt_get_sizelimit(lc, &x) == 0 && x)
+ printf(_(", sizelimit %ju"), x);
+
+ if (loopcxt_get_encrypt_type(lc, &type) == 0) {
+ const char *e = loopcxt_get_crypt_name(lc);
+
+ if ((!e || !*e) && type == 1)
+ e = "XOR";
+ if (e && *e)
+ printf(_(", encryption %s (type %u)"), e, type);
+ }
+
+done:
+ free(fname);
+ printf("\n");
+ return 0;
+}
+
+static int show_all_loops(struct loopdev_cxt *lc, const char *file,
+ uint64_t offset, int flags)
+{
+ struct stat sbuf, *st = &sbuf;
+ char *cn_file = NULL;
+
+ if (loopcxt_init_iterator(lc, LOOPITER_FL_USED))
+ return -1;
+
+ if (!file || stat(file, st))
+ st = NULL;
+
+ while (loopcxt_next(lc) == 0) {
+ if (file) {
+ int used;
+ const char *bf = cn_file ? cn_file : file;
+
+ used = loopcxt_is_used(lc, st, bf, offset, 0, flags);
+ if (!used && !cn_file) {
+ bf = cn_file = canonicalize_path(file);
+ used = loopcxt_is_used(lc, st, bf, offset, 0, flags);
+ }
+ if (!used)
+ continue;
+ }
+ printf_loopdev(lc);
+ }
+ loopcxt_deinit_iterator(lc);
+ free(cn_file);
+ return 0;
+}
+
+static int delete_loop(struct loopdev_cxt *lc)
+{
+ if (loopcxt_delete_device(lc))
+ warn(_("%s: detach failed"), loopcxt_get_device(lc));
+ else
+ return 0;
+
+ return -1;
+}
+
+static int delete_all_loops(struct loopdev_cxt *lc)
+{
+ int res = 0;
+
+ if (loopcxt_init_iterator(lc, LOOPITER_FL_USED))
+ return -1;
+
+ while (loopcxt_next(lc) == 0)
+ res += delete_loop(lc);
+
+ loopcxt_deinit_iterator(lc);
+ return res;
+}
+
+static int set_scols_data(struct loopdev_cxt *lc, struct libscols_line *ln)
+{
+ size_t i;
+
+ for (i = 0; i < ncolumns; i++) {
+ const char *p = NULL; /* external data */
+ char *np = NULL; /* allocated here */
+ uint64_t x = 0;
+ int rc = 0;
+
+ switch(get_column_id(i)) {
+ case COL_NAME:
+ p = loopcxt_get_device(lc);
+ break;
+ case COL_BACK_FILE:
+ p = loopcxt_get_backing_file(lc);
+ break;
+ case COL_OFFSET:
+ if (loopcxt_get_offset(lc, &x) == 0)
+ xasprintf(&np, "%jd", x);
+ break;
+ case COL_SIZELIMIT:
+ if (loopcxt_get_sizelimit(lc, &x) == 0)
+ xasprintf(&np, "%jd", x);
+ break;
+ case COL_BACK_MAJMIN:
+ {
+ dev_t dev = 0;
+ if (loopcxt_get_backing_devno(lc, &dev) == 0 && dev)
+ xasprintf(&np, "%8u:%-3u", major(dev), minor(dev));
+ break;
+ }
+ case COL_MAJMIN:
+ {
+ struct stat st;
+
+ if (loopcxt_get_device(lc)
+ && stat(loopcxt_get_device(lc), &st) == 0
+ && S_ISBLK(st.st_mode)
+ && major(st.st_rdev) == LOOPDEV_MAJOR)
+ xasprintf(&np, "%3u:%-3u", major(st.st_rdev),
+ minor(st.st_rdev));
+ break;
+ }
+ case COL_BACK_INO:
+ {
+ ino_t ino = 0;
+ if (loopcxt_get_backing_inode(lc, &ino) == 0 && ino)
+ xasprintf(&np, "%ju", ino);
+ break;
+ }
+ case COL_AUTOCLR:
+ p = loopcxt_is_autoclear(lc) ? "1" : "0";
+ break;
+ case COL_RO:
+ p = loopcxt_is_readonly(lc) ? "1" : "0";
+ break;
+ case COL_DIO:
+ p = loopcxt_is_dio(lc) ? "1" : "0";
+ break;
+ case COL_PARTSCAN:
+ p = loopcxt_is_partscan(lc) ? "1" : "0";
+ break;
+ case COL_LOGSEC:
+ if (loopcxt_get_blocksize(lc, &x) == 0)
+ xasprintf(&np, "%jd", x);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+
+ if (p)
+ rc = scols_line_set_data(ln, i, p); /* calls strdup() */
+ else if (np)
+ rc = scols_line_refer_data(ln, i, np); /* only refers */
+
+ if (rc)
+ err(EXIT_FAILURE, _("failed to add output data"));
+ }
+
+ return 0;
+}
+
+static int show_table(struct loopdev_cxt *lc,
+ const char *file,
+ uint64_t offset,
+ int flags)
+{
+ struct stat sbuf, *st = &sbuf;
+ struct libscols_table *tb;
+ struct libscols_line *ln;
+ int rc = 0;
+ size_t i;
+
+ scols_init_debug(0);
+
+ if (!(tb = scols_new_table()))
+ err(EXIT_FAILURE, _("failed to allocate output table"));
+ scols_table_enable_raw(tb, raw);
+ scols_table_enable_json(tb, json);
+ scols_table_enable_noheadings(tb, no_headings);
+
+ if (json)
+ scols_table_set_name(tb, "loopdevices");
+
+ for (i = 0; i < ncolumns; i++) {
+ struct colinfo *ci = get_column_info(i);
+ struct libscols_column *cl;
+
+ cl = scols_table_new_column(tb, ci->name, ci->whint, ci->flags);
+ if (!cl)
+ err(EXIT_FAILURE, _("failed to allocate output column"));
+ if (json)
+ scols_column_set_json_type(cl, ci->json_type);
+ }
+
+ /* only one loopdev requested (already assigned to loopdev_cxt) */
+ if (loopcxt_get_device(lc)) {
+ ln = scols_table_new_line(tb, NULL);
+ if (!ln)
+ err(EXIT_FAILURE, _("failed to allocate output line"));
+ rc = set_scols_data(lc, ln);
+
+ /* list all loopdevs */
+ } else {
+ char *cn_file = NULL;
+
+ rc = loopcxt_init_iterator(lc, LOOPITER_FL_USED);
+ if (rc)
+ goto done;
+ if (!file || stat(file, st))
+ st = NULL;
+
+ while (loopcxt_next(lc) == 0) {
+ if (file) {
+ int used;
+ const char *bf = cn_file ? cn_file : file;
+
+ used = loopcxt_is_used(lc, st, bf, offset, 0, flags);
+ if (!used && !cn_file) {
+ bf = cn_file = canonicalize_path(file);
+ used = loopcxt_is_used(lc, st, bf, offset, 0, flags);
+ }
+ if (!used)
+ continue;
+ }
+
+ ln = scols_table_new_line(tb, NULL);
+ if (!ln)
+ err(EXIT_FAILURE, _("failed to allocate output line"));
+ rc = set_scols_data(lc, ln);
+ if (rc)
+ break;
+ }
+
+ loopcxt_deinit_iterator(lc);
+ free(cn_file);
+ }
+done:
+ if (rc == 0)
+ rc = scols_print_table(tb);
+ scols_unref_table(tb);
+ return rc;
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ size_t i;
+
+ fputs(USAGE_HEADER, out);
+
+ fprintf(out,
+ _(" %1$s [options] [<loopdev>]\n"
+ " %1$s [options] -f | <loopdev> <file>\n"),
+ program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Set up and control loop devices.\n"), out);
+
+ /* commands */
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -a, --all list all used devices\n"), out);
+ fputs(_(" -d, --detach <loopdev>... detach one or more devices\n"), out);
+ fputs(_(" -D, --detach-all detach all used devices\n"), out);
+ fputs(_(" -f, --find find first unused device\n"), out);
+ fputs(_(" -c, --set-capacity <loopdev> resize the device\n"), out);
+ fputs(_(" -j, --associated <file> list all devices associated with <file>\n"), out);
+ fputs(_(" -L, --nooverlap avoid possible conflict between devices\n"), out);
+
+ /* commands options */
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_(" -o, --offset <num> start at offset <num> into file\n"), out);
+ fputs(_(" --sizelimit <num> device is limited to <num> bytes of the file\n"), out);
+ fputs(_(" -b --sector-size <num> set the logical sector size to <num>\n"), out);
+ fputs(_(" -P, --partscan create a partitioned loop device\n"), out);
+ fputs(_(" -r, --read-only set up a read-only loop device\n"), out);
+ fputs(_(" --direct-io[=<on|off>] open backing file with O_DIRECT\n"), out);
+ fputs(_(" --show print device name after setup (with -f)\n"), out);
+ fputs(_(" -v, --verbose verbose mode\n"), out);
+
+ /* output options */
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_(" -J, --json use JSON --list output format\n"), out);
+ fputs(_(" -l, --list list info about all or specified (default)\n"), out);
+ fputs(_(" -n, --noheadings don't print headings for --list output\n"), out);
+ fputs(_(" -O, --output <cols> specify columns to output for --list\n"), out);
+ fputs(_(" --output-all output all columns\n"), out);
+ fputs(_(" --raw use raw --list output format\n"), out);
+
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(31));
+
+ fputs(USAGE_COLUMNS, out);
+ for (i = 0; i < ARRAY_SIZE(infos); i++)
+ fprintf(out, " %12s %s\n", infos[i].name, _(infos[i].help));
+
+ printf(USAGE_MAN_TAIL("losetup(8)"));
+
+ exit(EXIT_SUCCESS);
+}
+
+static void warn_size(const char *filename, uint64_t size)
+{
+ struct stat st;
+
+ if (!size) {
+ if (stat(filename, &st) || S_ISBLK(st.st_mode))
+ return;
+ size = st.st_size;
+ }
+
+ if (size < 512)
+ warnx(_("%s: Warning: file is smaller than 512 bytes; the loop device "
+ "may be useless or invisible for system tools."),
+ filename);
+ else if (size % 512)
+ warnx(_("%s: Warning: file does not fit into a 512-byte sector; "
+ "the end of the file will be ignored."),
+ filename);
+}
+
+static int create_loop(struct loopdev_cxt *lc,
+ int nooverlap, int lo_flags, int flags,
+ const char *file, uint64_t offset, uint64_t sizelimit)
+{
+ int hasdev = loopcxt_has_device(lc);
+ int rc = 0;
+
+ /* losetup --find --noverlap file.img */
+ if (!hasdev && nooverlap) {
+ rc = loopcxt_find_overlap(lc, file, offset, sizelimit);
+ switch (rc) {
+ case 0: /* not found */
+ break;
+
+ case 1: /* overlap */
+ loopcxt_deinit(lc);
+ errx(EXIT_FAILURE, _("%s: overlapping loop device exists"), file);
+
+ case 2: /* overlap -- full size and offset match (reuse) */
+ {
+ uint32_t lc_encrypt_type;
+
+ /* Once a loop is initialized RO, there is no
+ * way to change its parameters. */
+ if (loopcxt_is_readonly(lc)
+ && !(lo_flags & LO_FLAGS_READ_ONLY)) {
+ loopcxt_deinit(lc);
+ errx(EXIT_FAILURE, _("%s: overlapping read-only loop device exists"), file);
+ }
+
+ /* This is no more supported, but check to be safe. */
+ if (loopcxt_get_encrypt_type(lc, &lc_encrypt_type) == 0
+ && lc_encrypt_type != LO_CRYPT_NONE) {
+ loopcxt_deinit(lc);
+ errx(EXIT_FAILURE, _("%s: overlapping encrypted loop device exists"), file);
+ }
+
+ lc->info.lo_flags &= ~LO_FLAGS_AUTOCLEAR;
+ if (loopcxt_set_status(lc)) {
+ loopcxt_deinit(lc);
+ errx(EXIT_FAILURE, _("%s: failed to re-use loop device"), file);
+ }
+ return 0; /* success, re-use */
+ }
+ default: /* error */
+ loopcxt_deinit(lc);
+ errx(EXIT_FAILURE, _("failed to inspect loop devices"));
+ return -errno;
+ }
+ }
+
+ if (hasdev && !is_loopdev(loopcxt_get_device(lc)))
+ loopcxt_add_device(lc);
+
+ /* losetup --noverlap /dev/loopN file.img */
+ if (hasdev && nooverlap) {
+ struct loopdev_cxt lc2;
+
+ if (loopcxt_init(&lc2, 0)) {
+ loopcxt_deinit(lc);
+ err(EXIT_FAILURE, _("failed to initialize loopcxt"));
+ }
+ rc = loopcxt_find_overlap(&lc2, file, offset, sizelimit);
+ loopcxt_deinit(&lc2);
+
+ if (rc) {
+ loopcxt_deinit(lc);
+ if (rc > 0)
+ errx(EXIT_FAILURE, _("%s: overlapping loop device exists"), file);
+ err(EXIT_FAILURE, _("%s: failed to check for conflicting loop devices"), file);
+ }
+ }
+
+ /* Create a new device */
+ do {
+ const char *errpre;
+
+ /* Note that loopcxt_{find_unused,set_device}() resets
+ * loopcxt struct.
+ */
+ if (!hasdev && (rc = loopcxt_find_unused(lc))) {
+ warnx(_("cannot find an unused loop device"));
+ break;
+ }
+ if (flags & LOOPDEV_FL_OFFSET)
+ loopcxt_set_offset(lc, offset);
+ if (flags & LOOPDEV_FL_SIZELIMIT)
+ loopcxt_set_sizelimit(lc, sizelimit);
+ if (lo_flags)
+ loopcxt_set_flags(lc, lo_flags);
+ if ((rc = loopcxt_set_backing_file(lc, file))) {
+ warn(_("%s: failed to use backing file"), file);
+ break;
+ }
+ errno = 0;
+ rc = loopcxt_setup_device(lc);
+ if (rc == 0)
+ break; /* success */
+ if (errno == EBUSY && !hasdev)
+ continue;
+
+ /* errors */
+ errpre = hasdev && loopcxt_get_fd(lc) < 0 ?
+ loopcxt_get_device(lc) : file;
+ warn(_("%s: failed to set up loop device"), errpre);
+ break;
+ } while (hasdev == 0);
+
+ return rc;
+}
+
+int main(int argc, char **argv)
+{
+ struct loopdev_cxt lc;
+ int act = 0, flags = 0, no_overlap = 0, c;
+ char *file = NULL;
+ uint64_t offset = 0, sizelimit = 0, blocksize = 0;
+ int res = 0, showdev = 0, lo_flags = 0;
+ char *outarg = NULL;
+ int list = 0;
+ unsigned long use_dio = 0, set_dio = 0, set_blocksize = 0;
+
+ enum {
+ OPT_SIZELIMIT = CHAR_MAX + 1,
+ OPT_SHOW,
+ OPT_RAW,
+ OPT_DIO,
+ OPT_OUTPUT_ALL
+ };
+ static const struct option longopts[] = {
+ { "all", no_argument, NULL, 'a' },
+ { "set-capacity", required_argument, NULL, 'c' },
+ { "detach", required_argument, NULL, 'd' },
+ { "detach-all", no_argument, NULL, 'D' },
+ { "find", no_argument, NULL, 'f' },
+ { "nooverlap", no_argument, NULL, 'L' },
+ { "help", no_argument, NULL, 'h' },
+ { "associated", required_argument, NULL, 'j' },
+ { "json", no_argument, NULL, 'J' },
+ { "list", no_argument, NULL, 'l' },
+ { "sector-size", required_argument, NULL, 'b' },
+ { "noheadings", no_argument, NULL, 'n' },
+ { "offset", required_argument, NULL, 'o' },
+ { "output", required_argument, NULL, 'O' },
+ { "output-all", no_argument, NULL, OPT_OUTPUT_ALL },
+ { "sizelimit", required_argument, NULL, OPT_SIZELIMIT },
+ { "partscan", no_argument, NULL, 'P' },
+ { "read-only", no_argument, NULL, 'r' },
+ { "direct-io", optional_argument, NULL, OPT_DIO },
+ { "raw", no_argument, NULL, OPT_RAW },
+ { "show", no_argument, NULL, OPT_SHOW },
+ { "verbose", no_argument, NULL, 'v' },
+ { "version", no_argument, NULL, 'V' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ static const ul_excl_t excl[] = { /* rows and cols in ASCII order */
+ { 'D','a','c','d','f','j' },
+ { 'D','c','d','f','l' },
+ { 'D','c','d','f','O' },
+ { 'J',OPT_RAW },
+ { 0 }
+ };
+ int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ if (loopcxt_init(&lc, 0))
+ err(EXIT_FAILURE, _("failed to initialize loopcxt"));
+
+ while ((c = getopt_long(argc, argv, "ab:c:d:Dfhj:JlLno:O:PrvV",
+ longopts, NULL)) != -1) {
+
+ err_exclusive_options(c, longopts, excl, excl_st);
+
+ switch (c) {
+ case 'a':
+ act = A_SHOW;
+ break;
+ case 'b':
+ set_blocksize = 1;
+ blocksize = strtosize_or_err(optarg, _("failed to parse logical block size"));
+ break;
+ case 'c':
+ act = A_SET_CAPACITY;
+ if (!is_loopdev(optarg) ||
+ loopcxt_set_device(&lc, optarg))
+ err(EXIT_FAILURE, _("%s: failed to use device"),
+ optarg);
+ break;
+ case 'r':
+ lo_flags |= LO_FLAGS_READ_ONLY;
+ break;
+ case 'd':
+ act = A_DELETE;
+ if (!is_loopdev(optarg) ||
+ loopcxt_set_device(&lc, optarg))
+ err(EXIT_FAILURE, _("%s: failed to use device"),
+ optarg);
+ break;
+ case 'D':
+ act = A_DELETE_ALL;
+ break;
+ case 'f':
+ act = A_FIND_FREE;
+ break;
+ case 'h':
+ usage();
+ break;
+ case 'J':
+ json = 1;
+ break;
+ case 'j':
+ act = A_SHOW;
+ file = optarg;
+ break;
+ case 'l':
+ list = 1;
+ break;
+ case 'L':
+ no_overlap = 1;
+ break;
+ case 'n':
+ no_headings = 1;
+ break;
+ case OPT_RAW:
+ raw = 1;
+ break;
+ case 'o':
+ offset = strtosize_or_err(optarg, _("failed to parse offset"));
+ flags |= LOOPDEV_FL_OFFSET;
+ break;
+ case 'O':
+ outarg = optarg;
+ list = 1;
+ break;
+ case OPT_OUTPUT_ALL:
+ for (ncolumns = 0; ncolumns < ARRAY_SIZE(infos); ncolumns++)
+ columns[ncolumns] = ncolumns;
+ break;
+ case 'P':
+ lo_flags |= LO_FLAGS_PARTSCAN;
+ break;
+ case OPT_SHOW:
+ showdev = 1;
+ break;
+ case OPT_DIO:
+ use_dio = set_dio = 1;
+ if (optarg)
+ use_dio = parse_switch(optarg, _("argument error"), "on", "off", NULL);
+ break;
+ case 'v':
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ case OPT_SIZELIMIT: /* --sizelimit */
+ sizelimit = strtosize_or_err(optarg, _("failed to parse size"));
+ flags |= LOOPDEV_FL_SIZELIMIT;
+ break;
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ ul_path_init_debug();
+ ul_sysfs_init_debug();
+
+ /* default is --list --all */
+ if (argc == 1) {
+ act = A_SHOW;
+ list = 1;
+ }
+
+ if (!act && argc == 2 && (raw || json)) {
+ act = A_SHOW;
+ list = 1;
+ }
+
+ /* default --list output columns */
+ if (list && !ncolumns) {
+ columns[ncolumns++] = COL_NAME;
+ columns[ncolumns++] = COL_SIZELIMIT;
+ columns[ncolumns++] = COL_OFFSET;
+ columns[ncolumns++] = COL_AUTOCLR;
+ columns[ncolumns++] = COL_RO;
+ columns[ncolumns++] = COL_BACK_FILE;
+ columns[ncolumns++] = COL_DIO;
+ columns[ncolumns++] = COL_LOGSEC;
+ }
+
+ if (act == A_FIND_FREE && optind < argc) {
+ /*
+ * losetup -f <backing_file>
+ */
+ act = A_CREATE;
+ file = argv[optind++];
+
+ if (optind < argc)
+ errx(EXIT_FAILURE, _("unexpected arguments"));
+ }
+
+ if (list && !act && optind == argc)
+ /*
+ * losetup --list defaults to --all
+ */
+ act = A_SHOW;
+
+ if (!act && optind + 1 == argc) {
+ /*
+ * losetup [--list] <device>
+ * OR
+ * losetup {--direct-io[=off]|--logical-blocksize=size}... <device>
+ */
+ if (!(set_dio || set_blocksize))
+ act = A_SHOW_ONE;
+ if (set_dio)
+ act = A_SET_DIRECT_IO;
+ if (set_blocksize)
+ act = A_SET_BLOCKSIZE;
+ if (!is_loopdev(argv[optind]) ||
+ loopcxt_set_device(&lc, argv[optind]))
+ err(EXIT_FAILURE, _("%s: failed to use device"),
+ argv[optind]);
+ optind++;
+ }
+ if (!act) {
+ /*
+ * losetup <loopdev> <backing_file>
+ */
+ act = A_CREATE;
+
+ if (optind >= argc)
+ errx(EXIT_FAILURE, _("no loop device specified"));
+ /* don't use is_loopdev() here, the device does not have exist yet */
+ if (loopcxt_set_device(&lc, argv[optind]))
+ err(EXIT_FAILURE, _("%s: failed to use device"),
+ argv[optind]);
+ optind++;
+
+ if (optind >= argc)
+ errx(EXIT_FAILURE, _("no file specified"));
+ file = argv[optind++];
+ }
+
+ if (act != A_CREATE &&
+ (sizelimit || lo_flags || showdev))
+ errx(EXIT_FAILURE,
+ _("the options %s are allowed during loop device setup only"),
+ "--{sizelimit,read-only,show}");
+
+ if ((flags & LOOPDEV_FL_OFFSET) &&
+ act != A_CREATE && (act != A_SHOW || !file))
+ errx(EXIT_FAILURE, _("the option --offset is not allowed in this context"));
+
+ if (outarg && string_add_to_idarray(outarg, columns, ARRAY_SIZE(columns),
+ &ncolumns, column_name_to_id) < 0)
+ return EXIT_FAILURE;
+
+ switch (act) {
+ case A_CREATE:
+ res = create_loop(&lc, no_overlap, lo_flags, flags, file, offset, sizelimit);
+ if (res == 0) {
+ if (showdev)
+ printf("%s\n", loopcxt_get_device(&lc));
+ warn_size(file, sizelimit);
+ if (set_dio || set_blocksize)
+ goto lo_set_post;
+ }
+ break;
+ case A_DELETE:
+ res = delete_loop(&lc);
+ while (optind < argc) {
+ if (!is_loopdev(argv[optind]) ||
+ loopcxt_set_device(&lc, argv[optind]))
+ warn(_("%s: failed to use device"),
+ argv[optind]);
+ optind++;
+ res += delete_loop(&lc);
+ }
+ break;
+ case A_DELETE_ALL:
+ res = delete_all_loops(&lc);
+ break;
+ case A_FIND_FREE:
+ res = loopcxt_find_unused(&lc);
+ if (res) {
+ int errsv = errno;
+
+ if (access(_PATH_DEV_LOOPCTL, F_OK) == 0 &&
+ access(_PATH_DEV_LOOPCTL, W_OK) != 0)
+ ;
+ else
+ errno = errsv;
+
+ warn(_("cannot find an unused loop device"));
+ } else
+ printf("%s\n", loopcxt_get_device(&lc));
+ break;
+ case A_SHOW:
+ if (list)
+ res = show_table(&lc, file, offset, flags);
+ else
+ res = show_all_loops(&lc, file, offset, flags);
+ break;
+ case A_SHOW_ONE:
+ if (list)
+ res = show_table(&lc, NULL, 0, 0);
+ else
+ res = printf_loopdev(&lc);
+ if (res)
+ warn("%s", loopcxt_get_device(&lc));
+ break;
+ case A_SET_CAPACITY:
+ res = loopcxt_set_capacity(&lc);
+ if (res)
+ warn(_("%s: set capacity failed"),
+ loopcxt_get_device(&lc));
+ break;
+ case A_SET_DIRECT_IO:
+ case A_SET_BLOCKSIZE:
+ lo_set_post:
+ if (set_dio) {
+ res = loopcxt_set_dio(&lc, use_dio);
+ if (res)
+ warn(_("%s: set direct io failed"),
+ loopcxt_get_device(&lc));
+ }
+ if (set_blocksize) {
+ res = loopcxt_set_blocksize(&lc, blocksize);
+ if (res)
+ warn(_("%s: set logical block size failed"),
+ loopcxt_get_device(&lc));
+ }
+ break;
+ default:
+ warnx(_("bad usage"));
+ errtryhelp(EXIT_FAILURE);
+ break;
+ }
+
+ loopcxt_deinit(&lc);
+ return res ? EXIT_FAILURE : EXIT_SUCCESS;
+}
+
diff --git a/sys-utils/lscpu-arm.c b/sys-utils/lscpu-arm.c
new file mode 100644
index 0000000..37b8f66
--- /dev/null
+++ b/sys-utils/lscpu-arm.c
@@ -0,0 +1,252 @@
+/*
+ * lscpu-arm.c - ARM CPU identification tables
+ *
+ * Copyright (C) 2018 Riku Voipio <riku.voipio@iki.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The information here is gathered from
+ * - ARM manuals
+ * - Linux kernel: arch/armX/include/asm/cputype.h
+ * - GCC sources: config/arch/arch-cores.def
+ * - Ancient wisdom
+ */
+#include "lscpu.h"
+
+struct id_part {
+ const int id;
+ const char* name;
+};
+
+static const struct id_part arm_part[] = {
+ { 0x810, "ARM810" },
+ { 0x920, "ARM920" },
+ { 0x922, "ARM922" },
+ { 0x926, "ARM926" },
+ { 0x940, "ARM940" },
+ { 0x946, "ARM946" },
+ { 0x966, "ARM966" },
+ { 0xa20, "ARM1020" },
+ { 0xa22, "ARM1022" },
+ { 0xa26, "ARM1026" },
+ { 0xb02, "ARM11 MPCore" },
+ { 0xb36, "ARM1136" },
+ { 0xb56, "ARM1156" },
+ { 0xb76, "ARM1176" },
+ { 0xc05, "Cortex-A5" },
+ { 0xc07, "Cortex-A7" },
+ { 0xc08, "Cortex-A8" },
+ { 0xc09, "Cortex-A9" },
+ { 0xc0d, "Cortex-A17" }, /* Originally A12 */
+ { 0xc0f, "Cortex-A15" },
+ { 0xc0e, "Cortex-A17" },
+ { 0xc14, "Cortex-R4" },
+ { 0xc15, "Cortex-R5" },
+ { 0xc17, "Cortex-R7" },
+ { 0xc18, "Cortex-R8" },
+ { 0xc20, "Cortex-M0" },
+ { 0xc21, "Cortex-M1" },
+ { 0xc23, "Cortex-M3" },
+ { 0xc24, "Cortex-M4" },
+ { 0xc27, "Cortex-M7" },
+ { 0xc60, "Cortex-M0+" },
+ { 0xd01, "Cortex-A32" },
+ { 0xd03, "Cortex-A53" },
+ { 0xd04, "Cortex-A35" },
+ { 0xd05, "Cortex-A55" },
+ { 0xd07, "Cortex-A57" },
+ { 0xd08, "Cortex-A72" },
+ { 0xd09, "Cortex-A73" },
+ { 0xd0a, "Cortex-A75" },
+ { 0xd13, "Cortex-R52" },
+ { 0xd20, "Cortex-M23" },
+ { 0xd21, "Cortex-M33" },
+ { -1, "unknown" },
+};
+
+static const struct id_part brcm_part[] = {
+ { 0x0f, "Brahma B15" },
+ { 0x100, "Brahma B53" },
+ { 0x516, "ThunderX2" },
+ { -1, "unknown" },
+};
+
+static const struct id_part dec_part[] = {
+ { 0xa10, "SA110" },
+ { 0xa11, "SA1100" },
+ { -1, "unknown" },
+};
+
+static const struct id_part cavium_part[] = {
+ { 0x0a0, "ThunderX" },
+ { 0x0a1, "ThunderX 88XX" },
+ { 0x0a2, "ThunderX 81XX" },
+ { 0x0a3, "ThunderX 83XX" },
+ { 0x0af, "ThunderX2 99xx" },
+ { -1, "unknown" },
+};
+
+static const struct id_part apm_part[] = {
+ { 0x000, "X-Gene" },
+ { -1, "unknown" },
+};
+
+static const struct id_part qcom_part[] = {
+ { 0x00f, "Scorpion" },
+ { 0x02d, "Scorpion" },
+ { 0x04d, "Krait" },
+ { 0x06f, "Krait" },
+ { 0x201, "Kryo" },
+ { 0x205, "Kryo" },
+ { 0x211, "Kryo" },
+ { 0x800, "Falkor V1/Kryo" },
+ { 0x801, "Kryo V2" },
+ { 0xc00, "Falkor" },
+ { 0xc01, "Saphira" },
+ { -1, "unknown" },
+};
+
+static const struct id_part samsung_part[] = {
+ { 0x001, "exynos-m1" },
+ { -1, "unknown" },
+};
+
+static const struct id_part nvidia_part[] = {
+ { 0x000, "Denver" },
+ { 0x003, "Denver 2" },
+ { -1, "unknown" },
+};
+
+static const struct id_part marvell_part[] = {
+ { 0x131, "Feroceon 88FR131" },
+ { 0x581, "PJ4/PJ4b" },
+ { 0x584, "PJ4B-MP" },
+ { -1, "unknown" },
+};
+
+static const struct id_part faraday_part[] = {
+ { 0x526, "FA526" },
+ { 0x626, "FA626" },
+ { -1, "unknown" },
+};
+
+static const struct id_part intel_part[] = {
+ { 0x200, "i80200" },
+ { 0x210, "PXA250A" },
+ { 0x212, "PXA210A" },
+ { 0x242, "i80321-400" },
+ { 0x243, "i80321-600" },
+ { 0x290, "PXA250B/PXA26x" },
+ { 0x292, "PXA210B" },
+ { 0x2c2, "i80321-400-B0" },
+ { 0x2c3, "i80321-600-B0" },
+ { 0x2d0, "PXA250C/PXA255/PXA26x" },
+ { 0x2d2, "PXA210C" },
+ { 0x411, "PXA27x" },
+ { 0x41c, "IPX425-533" },
+ { 0x41d, "IPX425-400" },
+ { 0x41f, "IPX425-266" },
+ { 0x682, "PXA32x" },
+ { 0x683, "PXA930/PXA935" },
+ { 0x688, "PXA30x" },
+ { 0x689, "PXA31x" },
+ { 0xb11, "SA1110" },
+ { 0xc12, "IPX1200" },
+ { -1, "unknown" },
+};
+
+static const struct id_part unknown_part[] = {
+ { -1, "unknown" },
+};
+
+struct hw_impl {
+ const int id;
+ const struct id_part *parts;
+ const char *name;
+};
+
+static const struct hw_impl hw_implementer[] = {
+ { 0x41, arm_part, "ARM" },
+ { 0x42, brcm_part, "Broadcom" },
+ { 0x43, cavium_part, "Cavium" },
+ { 0x44, dec_part, "DEC" },
+ { 0x4e, nvidia_part, "Nvidia" },
+ { 0x50, apm_part, "APM" },
+ { 0x51, qcom_part, "Qualcomm" },
+ { 0x53, samsung_part, "Samsung" },
+ { 0x56, marvell_part, "Marvell" },
+ { 0x66, faraday_part, "Faraday" },
+ { 0x69, intel_part, "Intel" },
+ { -1, unknown_part, "unknown" },
+};
+
+void arm_cpu_decode(struct lscpu_desc *desc)
+{
+ int j, impl, part;
+ const struct id_part *parts = NULL;
+ char *end;
+
+ if (desc->vendor == NULL || desc->model == NULL)
+ return;
+ if ((strncmp(desc->vendor,"0x",2) || strncmp(desc->model,"0x",2) ))
+ return;
+
+ errno = 0;
+ impl = (int) strtol(desc->vendor, &end, 0);
+ if (errno || desc->vendor == end)
+ return;
+
+ errno = 0;
+ part = (int) strtol(desc->model, &end, 0);
+ if (errno || desc->model == end)
+ return;
+
+ for (j = 0; hw_implementer[j].id != -1; j++) {
+ if (hw_implementer[j].id == impl) {
+ parts = hw_implementer[j].parts;
+ desc->vendor = (char *) hw_implementer[j].name;
+ break;
+ }
+ }
+
+ if (parts == NULL)
+ return;
+
+ for (j = 0; parts[j].id != -1; j++) {
+ if (parts[j].id == part) {
+ desc->modelname = (char *) parts[j].name;
+ break;
+ }
+ }
+
+ /* Print out the rXpY string for ARM cores */
+ if (impl == 0x41 && desc->revision && desc->stepping) {
+ int revision, variant;
+ char buf[8];
+
+ errno = 0;
+ revision = (int) strtol(desc->revision, &end, 10);
+ if (errno || desc->revision == end)
+ return;
+
+ errno = 0;
+ variant = (int) strtol(desc->stepping, &end, 0);
+ if (errno || desc->stepping == end)
+ return;
+
+ snprintf(buf, sizeof(buf), "r%dp%d", variant, revision);
+ desc->stepping = xstrdup(buf);
+ }
+}
diff --git a/sys-utils/lscpu-dmi.c b/sys-utils/lscpu-dmi.c
new file mode 100644
index 0000000..29bd2e4
--- /dev/null
+++ b/sys-utils/lscpu-dmi.c
@@ -0,0 +1,305 @@
+/*
+ * lscpu-dmi - Module to parse SMBIOS information
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Code originally taken from the dmidecode utility and slightly rewritten
+ * to suite the needs of lscpu
+ */
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "lscpu.h"
+
+#define _PATH_SYS_DMI "/sys/firmware/dmi/tables/DMI"
+
+#define WORD(x) (uint16_t)(*(const uint16_t *)(x))
+#define DWORD(x) (uint32_t)(*(const uint32_t *)(x))
+
+struct dmi_header
+{
+ uint8_t type;
+ uint8_t length;
+ uint16_t handle;
+ uint8_t *data;
+};
+
+static int checksum(const uint8_t *buf, size_t len)
+{
+ uint8_t sum = 0;
+ size_t a;
+
+ for (a = 0; a < len; a++)
+ sum += buf[a];
+ return (sum == 0);
+}
+
+static void *get_mem_chunk(size_t base, size_t len, const char *devmem)
+{
+ void *p = NULL;
+ int fd;
+
+ if ((fd = open(devmem, O_RDONLY)) < 0)
+ return NULL;
+
+ if (!(p = malloc(len)))
+ goto nothing;
+ if (lseek(fd, base, SEEK_SET) == -1)
+ goto nothing;
+ if (read_all(fd, p, len) == -1)
+ goto nothing;
+
+ close(fd);
+ return p;
+
+nothing:
+ free(p);
+ close(fd);
+ return NULL;
+}
+
+static void to_dmi_header(struct dmi_header *h, uint8_t *data)
+{
+ h->type = data[0];
+ h->length = data[1];
+ h->handle = WORD(data + 2);
+ h->data = data;
+}
+
+static char *dmi_string(const struct dmi_header *dm, uint8_t s)
+{
+ char *bp = (char *)dm->data;
+
+ if (s == 0)
+ return NULL;
+
+ bp += dm->length;
+ while (s > 1 && *bp)
+ {
+ bp += strlen(bp);
+ bp++;
+ s--;
+ }
+
+ if (!*bp)
+ return NULL;
+
+ return bp;
+}
+
+static int hypervisor_from_dmi_table(uint32_t base, uint16_t len,
+ uint16_t num, const char *devmem)
+{
+ uint8_t *buf;
+ uint8_t *data;
+ int i = 0;
+ char *vendor = NULL;
+ char *product = NULL;
+ char *manufacturer = NULL;
+ int rc = HYPER_NONE;
+
+ data = buf = get_mem_chunk(base, len, devmem);
+ if (!buf)
+ goto done;
+
+ /* 4 is the length of an SMBIOS structure header */
+ while (i < num && data + 4 <= buf + len) {
+ uint8_t *next;
+ struct dmi_header h;
+
+ to_dmi_header(&h, data);
+
+ /*
+ * If a short entry is found (less than 4 bytes), not only it
+ * is invalid, but we cannot reliably locate the next entry.
+ * Better stop at this point.
+ */
+ if (h.length < 4)
+ goto done;
+
+ /* look for the next handle */
+ next = data + h.length;
+ while (next - buf + 1 < len && (next[0] != 0 || next[1] != 0))
+ next++;
+ next += 2;
+ switch (h.type) {
+ case 0:
+ vendor = dmi_string(&h, data[0x04]);
+ break;
+ case 1:
+ manufacturer = dmi_string(&h, data[0x04]);
+ product = dmi_string(&h, data[0x05]);
+ break;
+ default:
+ break;
+ }
+
+ data = next;
+ i++;
+ }
+ if (manufacturer && !strcmp(manufacturer, "innotek GmbH"))
+ rc = HYPER_INNOTEK;
+ else if (manufacturer && strstr(manufacturer, "HITACHI") &&
+ product && strstr(product, "LPAR"))
+ rc = HYPER_HITACHI;
+ else if (vendor && !strcmp(vendor, "Parallels"))
+ rc = HYPER_PARALLELS;
+done:
+ free(buf);
+ return rc;
+}
+
+#if defined(__x86_64__) || defined(__i386__)
+static int hypervisor_decode_legacy(uint8_t *buf, const char *devmem)
+{
+ if (!checksum(buf, 0x0F))
+ return -1;
+
+ return hypervisor_from_dmi_table(DWORD(buf + 0x08), WORD(buf + 0x06),
+ WORD(buf + 0x0C),
+ devmem);
+}
+#endif
+
+static int hypervisor_decode_smbios(uint8_t *buf, const char *devmem)
+{
+ if (!checksum(buf, buf[0x05])
+ || memcmp(buf + 0x10, "_DMI_", 5) != 0
+ || !checksum(buf + 0x10, 0x0F))
+ return -1;
+
+ return hypervisor_from_dmi_table(DWORD(buf + 0x18), WORD(buf + 0x16),
+ WORD(buf + 0x1C),
+ devmem);
+}
+
+static int hypervisor_decode_sysfw(void)
+{
+ static char const sys_fw_dmi_tables[] = _PATH_SYS_DMI;
+ struct stat st;
+
+ if (stat(sys_fw_dmi_tables, &st))
+ return -1;
+
+ return hypervisor_from_dmi_table(0, st.st_size, st.st_size / 4,
+ sys_fw_dmi_tables);
+}
+
+/*
+ * Probe for EFI interface
+ */
+#define EFI_NOT_FOUND (-1)
+#define EFI_NO_SMBIOS (-2)
+static int address_from_efi(size_t *address)
+{
+ FILE *tab;
+ char linebuf[64];
+ int ret;
+
+ *address = 0; /* Prevent compiler warning */
+
+ /*
+ * Linux up to 2.6.6: /proc/efi/systab
+ * Linux 2.6.7 and up: /sys/firmware/efi/systab
+ */
+ if (!(tab = fopen("/sys/firmware/efi/systab", "r")) &&
+ !(tab = fopen("/proc/efi/systab", "r")))
+ return EFI_NOT_FOUND; /* No EFI interface */
+
+ ret = EFI_NO_SMBIOS;
+ while ((fgets(linebuf, sizeof(linebuf) - 1, tab)) != NULL) {
+ char *addrp = strchr(linebuf, '=');
+ if (!addrp)
+ continue;
+ *(addrp++) = '\0';
+ if (strcmp(linebuf, "SMBIOS") == 0) {
+ *address = strtoul(addrp, NULL, 0);
+ ret = 0;
+ break;
+ }
+ }
+
+ fclose(tab);
+ return ret;
+}
+
+int read_hypervisor_dmi(void)
+{
+ int rc = HYPER_NONE;
+ uint8_t *buf = NULL;
+ size_t fp = 0;
+
+ if (sizeof(uint8_t) != 1
+ || sizeof(uint16_t) != 2
+ || sizeof(uint32_t) != 4
+ || '\0' != 0)
+ goto done;
+
+ /* -1 : no DMI in /sys,
+ * 0 : DMI exist, nothing detected (HYPER_NONE)
+ * >0 : hypervisor detected
+ */
+ rc = hypervisor_decode_sysfw();
+ if (rc >= HYPER_NONE)
+ goto done;
+
+ /* First try EFI (ia64, Intel-based Mac) */
+ switch (address_from_efi(&fp)) {
+ case EFI_NOT_FOUND:
+ goto memory_scan;
+ case EFI_NO_SMBIOS:
+ goto done;
+ }
+
+ buf = get_mem_chunk(fp, 0x20, _PATH_DEV_MEM);
+ if (!buf)
+ goto done;
+
+ rc = hypervisor_decode_smbios(buf, _PATH_DEV_MEM);
+ if (rc >= HYPER_NONE)
+ goto done;
+
+ free(buf);
+ buf = NULL;
+memory_scan:
+#if defined(__x86_64__) || defined(__i386__)
+ /* Fallback to memory scan (x86, x86_64) */
+ buf = get_mem_chunk(0xF0000, 0x10000, _PATH_DEV_MEM);
+ if (!buf)
+ goto done;
+
+ for (fp = 0; fp <= 0xFFF0; fp += 16) {
+ if (memcmp(buf + fp, "_SM_", 4) == 0 && fp <= 0xFFE0) {
+ rc = hypervisor_decode_smbios(buf + fp, _PATH_DEV_MEM);
+ if (rc < 0)
+ fp += 16;
+
+ } else if (memcmp(buf + fp, "_DMI_", 5) == 0)
+ rc = hypervisor_decode_legacy(buf + fp, _PATH_DEV_MEM);
+
+ if (rc >= HYPER_NONE)
+ break;
+ }
+#endif
+done:
+ free(buf);
+ return rc < 0 ? HYPER_NONE : rc;
+}
diff --git a/sys-utils/lscpu.1 b/sys-utils/lscpu.1
new file mode 100644
index 0000000..23dee9b
--- /dev/null
+++ b/sys-utils/lscpu.1
@@ -0,0 +1,184 @@
+.TH LSCPU 1 "November 2015" "util-linux" "User Commands"
+.SH NAME
+lscpu \- display information about the CPU architecture
+.SH SYNOPSIS
+.B lscpu
+.RB [ \-a | \-b | \-c | \-J "] [" \-x "] [" \-y "] [" \-s " \fIdirectory\fP] [" \-e [=\fIlist\fP]| \-p [=\fIlist\fP]]
+.br
+.B lscpu
+.BR \-h | \-V
+.SH DESCRIPTION
+.B lscpu
+gathers CPU architecture information from sysfs, /proc/cpuinfo and any
+applicable architecture-specific libraries (e.g.\& librtas on Powerpc). The
+command output can be optimized for parsing or for easy readability by humans.
+The information includes, for example, the number of CPUs, threads, cores,
+sockets, and Non-Uniform Memory Access (NUMA) nodes. There is also information
+about the CPU caches and cache sharing, family, model, bogoMIPS, byte order,
+and stepping.
+.sp
+In virtualized environments, the CPU architecture information displayed
+reflects the configuration of the guest operating system which is
+typically different from the physical (host) system. On architectures that
+support retrieving physical topology information,
+.B lscpu
+also displays the number of physical sockets, chips, cores in the host system.
+.sp
+Options that result in an output table have a \fIlist\fP argument. Use this
+argument to customize the command output. Specify a comma-separated list of
+column labels to limit the output table to only the specified columns, arranged
+in the specified order. See \fBCOLUMNS\fP for a list of valid column labels. The
+column labels are not case sensitive.
+.sp
+Not all columns are supported on all architectures. If an unsupported column is
+specified, \fBlscpu\fP prints the column but does not provide any data for it.
+
+.SS COLUMNS
+Note that topology elements (core, socket, etc.) use a sequential unique ID
+starting from zero, but CPU logical numbers follow the kernel where there is
+no guarantee of sequential numbering.
+.TP
+.B CPU
+The logical CPU number of a CPU as used by the Linux kernel.
+.TP
+.B CORE
+The logical core number. A core can contain several CPUs.
+.TP
+.B SOCKET
+The logical socket number. A socket can contain several cores.
+.TP
+.B BOOK
+The logical book number. A book can contain several sockets.
+.TP
+.B DRAWER
+The logical drawer number. A drawer can contain several books.
+.TP
+.B NODE
+The logical NUMA node number. A node can contain several drawers.
+.TP
+.B CACHE
+Information about how caches are shared between CPUs.
+.TP
+.B ADDRESS
+The physical address of a CPU.
+.TP
+.B ONLINE
+Indicator that shows whether the Linux instance currently makes use of the CPU.
+.TP
+.B CONFIGURED
+Indicator that shows if the hypervisor has allocated the CPU to the virtual
+hardware on which the Linux instance runs. CPUs that are configured can be set
+online by the Linux instance.
+This column contains data only if your hardware system and hypervisor support
+dynamic CPU resource allocation.
+.TP
+.B POLARIZATION
+This column contains data for Linux instances that run on virtual hardware with
+a hypervisor that can switch the CPU dispatching mode (polarization). The
+polarization can be:
+.RS
+.TP 12
+.B horizontal\fP
+The workload is spread across all available CPUs.
+.TP 12
+.B vertical
+The workload is concentrated on few CPUs.
+.P
+For vertical polarization, the column also shows the degree of concentration,
+high, medium, or low. This column contains data only if your hardware system
+and hypervisor support CPU polarization.
+.RE
+.TP
+.B MAXMHZ
+Maximum megahertz value for the CPU. Useful when \fBlscpu\fP is used as hardware
+inventory information gathering tool. Notice that the megahertz value is
+dynamic, and driven by CPU governor depending on current resource need.
+.TP
+.B MINMHZ
+Minimum megahertz value for the CPU.
+.SH OPTIONS
+.TP
+.BR \-a , " \-\-all"
+Include lines for online and offline CPUs in the output (default for \fB-e\fR).
+This option may only be specified together with option \fB-e\fR or \fB-p\fR.
+.TP
+.BR \-b , " \-\-online"
+Limit the output to online CPUs (default for \fB-p\fR).
+This option may only be specified together with option \fB-e\fR or \fB-p\fR.
+.TP
+.BR \-c , " \-\-offline"
+Limit the output to offline CPUs.
+This option may only be specified together with option \fB-e\fR or \fB-p\fR.
+.TP
+.BR \-e , " \-\-extended" [=\fIlist\fP]
+Display the CPU information in human-readable format.
+
+If the \fIlist\fP argument is omitted, all columns for which data is available
+are included in the command output.
+
+When specifying the \fIlist\fP argument, the string of option, equal sign (=), and
+\fIlist\fP must not contain any blanks or other whitespace.
+Examples: '\fB-e=cpu,node\fP' or '\fB--extended=cpu,node\fP'.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.TP
+.BR \-J , " \-\-json"
+Use JSON output format for the default summary or extended output (see \fB\-\-extended\fP).
+.TP
+.BR \-p , " \-\-parse" [=\fIlist\fP]
+Optimize the command output for easy parsing.
+
+If the \fIlist\fP argument is omitted, the command output is compatible with earlier
+versions of \fBlscpu\fP. In this compatible format, two commas are used to separate
+CPU cache columns. If no CPU caches are identified the cache column is omitted.
+.br
+If the \fIlist\fP argument is used, cache columns are separated with a colon (:).
+
+When specifying the \fIlist\fP argument, the string of option, equal sign (=), and
+\fIlist\fP must not contain any blanks or other whitespace.
+Examples: '\fB-p=cpu,node\fP' or '\fB--parse=cpu,node\fP'.
+.TP
+.BR \-s , " \-\-sysroot " \fIdirectory\fP
+Gather CPU data for a Linux instance other than the instance from which the
+\fBlscpu\fP command is issued. The specified \fIdirectory\fP is the system root
+of the Linux instance to be inspected.
+.TP
+.BR \-x , " \-\-hex"
+Use hexadecimal masks for CPU sets (for example 0x3). The default is to print
+the sets in list format (for example 0,1).
+.TP
+.BR \-y , " \-\-physical"
+Display physical IDs for all columns with topology elements (core, socket, etc.).
+Other than logical IDs, which are assigned by \fBlscpu\fP, physical IDs are
+platform-specific values that are provided by the kernel. Physical IDs are not
+necessarily unique and they might not be arranged sequentially.
+If the kernel could not retrieve a physical ID for an element \fBlscpu\fP prints
+the dash (-) character.
+
+The CPU logical numbers are not affected by this option.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.B \-\-output\-all
+Output all available columns. This option must be combined with either
+.BR \-\-extended " or " \-\-parse .
+.SH BUGS
+The basic overview of CPU family, model, etc. is always based on the first
+CPU only.
+
+Sometimes in Xen Dom0 the kernel reports wrong data.
+
+On virtual hardware the number of cores per socket, etc. can be wrong.
+.SH AUTHOR
+.nf
+Cai Qian <qcai@redhat.com>
+Karel Zak <kzak@redhat.com>
+Heiko Carstens <heiko.carstens@de.ibm.com>
+.fi
+.SH "SEE ALSO"
+.BR chcpu (8)
+.SH AVAILABILITY
+The lscpu command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/lscpu.c b/sys-utils/lscpu.c
new file mode 100644
index 0000000..1ff9069
--- /dev/null
+++ b/sys-utils/lscpu.c
@@ -0,0 +1,2134 @@
+/*
+ * lscpu - CPU architecture information helper
+ *
+ * Copyright (C) 2008 Cai Qian <qcai@redhat.com>
+ * Copyright (C) 2008 Karel Zak <kzak@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <assert.h>
+#include <ctype.h>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#if (defined(__x86_64__) || defined(__i386__))
+# if !defined( __SANITIZE_ADDRESS__)
+# define INCLUDE_VMWARE_BDOOR
+# else
+# warning VMWARE detection disabled by __SANITIZE_ADDRESS__
+# endif
+#endif
+
+#ifdef INCLUDE_VMWARE_BDOOR
+# include <stdint.h>
+# include <signal.h>
+# include <strings.h>
+# include <setjmp.h>
+# ifdef HAVE_SYS_IO_H
+# include <sys/io.h>
+# endif
+#endif
+
+#if defined(HAVE_LIBRTAS)
+#include <librtas.h>
+#endif
+
+#include <libsmartcols.h>
+
+#include "closestream.h"
+#include "optutils.h"
+
+#include "lscpu.h"
+
+#define CACHE_MAX 100
+
+/* /sys paths */
+#define _PATH_SYS_SYSTEM "/sys/devices/system"
+#define _PATH_SYS_HYP_FEATURES "/sys/hypervisor/properties/features"
+#define _PATH_SYS_CPU _PATH_SYS_SYSTEM "/cpu"
+#define _PATH_SYS_NODE _PATH_SYS_SYSTEM "/node"
+
+/* Xen Domain feature flag used for /sys/hypervisor/properties/features */
+#define XENFEAT_supervisor_mode_kernel 3
+#define XENFEAT_mmu_pt_update_preserve_ad 5
+#define XENFEAT_hvm_callback_vector 8
+
+#define XEN_FEATURES_PV_MASK (1U << XENFEAT_mmu_pt_update_preserve_ad)
+#define XEN_FEATURES_PVH_MASK ( (1U << XENFEAT_supervisor_mode_kernel) \
+ | (1U << XENFEAT_hvm_callback_vector) )
+
+static const char *virt_types[] = {
+ [VIRT_NONE] = N_("none"),
+ [VIRT_PARA] = N_("para"),
+ [VIRT_FULL] = N_("full"),
+ [VIRT_CONT] = N_("container"),
+};
+
+static const char *hv_vendors[] = {
+ [HYPER_NONE] = NULL,
+ [HYPER_XEN] = "Xen",
+ [HYPER_KVM] = "KVM",
+ [HYPER_MSHV] = "Microsoft",
+ [HYPER_VMWARE] = "VMware",
+ [HYPER_IBM] = "IBM",
+ [HYPER_VSERVER] = "Linux-VServer",
+ [HYPER_UML] = "User-mode Linux",
+ [HYPER_INNOTEK] = "Innotek GmbH",
+ [HYPER_HITACHI] = "Hitachi",
+ [HYPER_PARALLELS] = "Parallels",
+ [HYPER_VBOX] = "Oracle",
+ [HYPER_OS400] = "OS/400",
+ [HYPER_PHYP] = "pHyp",
+ [HYPER_SPAR] = "Unisys s-Par",
+ [HYPER_WSL] = "Windows Subsystem for Linux"
+};
+
+static const int hv_vendor_pci[] = {
+ [HYPER_NONE] = 0x0000,
+ [HYPER_XEN] = 0x5853,
+ [HYPER_KVM] = 0x0000,
+ [HYPER_MSHV] = 0x1414,
+ [HYPER_VMWARE] = 0x15ad,
+ [HYPER_VBOX] = 0x80ee,
+};
+
+static const int hv_graphics_pci[] = {
+ [HYPER_NONE] = 0x0000,
+ [HYPER_XEN] = 0x0001,
+ [HYPER_KVM] = 0x0000,
+ [HYPER_MSHV] = 0x5353,
+ [HYPER_VMWARE] = 0x0710,
+ [HYPER_VBOX] = 0xbeef,
+};
+
+
+/* dispatching modes */
+static const char *disp_modes[] = {
+ [DISP_HORIZONTAL] = N_("horizontal"),
+ [DISP_VERTICAL] = N_("vertical")
+};
+
+static struct polarization_modes polar_modes[] = {
+ [POLAR_UNKNOWN] = {"U", "-"},
+ [POLAR_VLOW] = {"VL", "vert-low"},
+ [POLAR_VMEDIUM] = {"VM", "vert-medium"},
+ [POLAR_VHIGH] = {"VH", "vert-high"},
+ [POLAR_HORIZONTAL] = {"H", "horizontal"},
+};
+
+static int maxcpus; /* size in bits of kernel cpu mask */
+
+#define is_cpu_online(_d, _cpu) \
+ ((_d) && (_d)->online ? \
+ CPU_ISSET_S((_cpu), CPU_ALLOC_SIZE(maxcpus), (_d)->online) : 0)
+#define is_cpu_present(_d, _cpu) \
+ ((_d) && (_d)->present ? \
+ CPU_ISSET_S((_cpu), CPU_ALLOC_SIZE(maxcpus), (_d)->present) : 0)
+
+#define real_cpu_num(_d, _i) ((_d)->idx2cpunum[(_i)])
+
+/*
+ * IDs
+ */
+enum {
+ COL_CPU,
+ COL_CORE,
+ COL_SOCKET,
+ COL_NODE,
+ COL_BOOK,
+ COL_DRAWER,
+ COL_CACHE,
+ COL_POLARIZATION,
+ COL_ADDRESS,
+ COL_CONFIGURED,
+ COL_ONLINE,
+ COL_MAXMHZ,
+ COL_MINMHZ,
+};
+
+/* column description
+ */
+struct lscpu_coldesc {
+ const char *name;
+ const char *help;
+
+ unsigned int is_abbr:1; /* name is abbreviation */
+};
+
+static struct lscpu_coldesc coldescs[] =
+{
+ [COL_CPU] = { "CPU", N_("logical CPU number"), 1 },
+ [COL_CORE] = { "CORE", N_("logical core number") },
+ [COL_SOCKET] = { "SOCKET", N_("logical socket number") },
+ [COL_NODE] = { "NODE", N_("logical NUMA node number") },
+ [COL_BOOK] = { "BOOK", N_("logical book number") },
+ [COL_DRAWER] = { "DRAWER", N_("logical drawer number") },
+ [COL_CACHE] = { "CACHE", N_("shows how caches are shared between CPUs") },
+ [COL_POLARIZATION] = { "POLARIZATION", N_("CPU dispatching mode on virtual hardware") },
+ [COL_ADDRESS] = { "ADDRESS", N_("physical address of a CPU") },
+ [COL_CONFIGURED] = { "CONFIGURED", N_("shows if the hypervisor has allocated the CPU") },
+ [COL_ONLINE] = { "ONLINE", N_("shows if Linux currently makes use of the CPU") },
+ [COL_MAXMHZ] = { "MAXMHZ", N_("shows the maximum MHz of the CPU") },
+ [COL_MINMHZ] = { "MINMHZ", N_("shows the minimum MHz of the CPU") }
+};
+
+static int
+column_name_to_id(const char *name, size_t namesz)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(coldescs); i++) {
+ const char *cn = coldescs[i].name;
+
+ if (!strncasecmp(name, cn, namesz) && !*(cn + namesz))
+ return i;
+ }
+ warnx(_("unknown column: %s"), name);
+ return -1;
+}
+
+/* Lookup a pattern and get the value from cpuinfo.
+ * Format is:
+ *
+ * "<pattern> : <key>"
+ */
+static int
+lookup(char *line, char *pattern, char **value)
+{
+ char *p, *v;
+ int len = strlen(pattern);
+
+ /* don't re-fill already found tags, first one wins */
+ if (!*line || *value)
+ return 0;
+
+ /* pattern */
+ if (strncmp(line, pattern, len))
+ return 0;
+
+ /* white spaces */
+ for (p = line + len; isspace(*p); p++);
+
+ /* separator */
+ if (*p != ':')
+ return 0;
+
+ /* white spaces */
+ for (++p; isspace(*p); p++);
+
+ /* value */
+ if (!*p)
+ return 0;
+ v = p;
+
+ /* end of value */
+ len = strlen(line) - 1;
+ for (p = line + len; isspace(*(p-1)); p--);
+ *p = '\0';
+
+ *value = xstrdup(v);
+ return 1;
+}
+
+/* Parse extra cache lines contained within /proc/cpuinfo but which are not
+ * part of the cache topology information within the sysfs filesystem.
+ * This is true for all shared caches on e.g. s390. When there are layers of
+ * hypervisors in between it is not knows which CPUs share which caches.
+ * Therefore information about shared caches is only available in
+ * /proc/cpuinfo.
+ * Format is:
+ * "cache<nr> : level=<lvl> type=<type> scope=<scope> size=<size> line_size=<lsz> associativity=<as>"
+ */
+static int
+lookup_cache(char *line, struct lscpu_desc *desc)
+{
+ struct cpu_cache *cache;
+ long long size;
+ char *p, type;
+ int level;
+
+ /* Make sure line starts with "cache<nr> :" */
+ if (strncmp(line, "cache", 5))
+ return 0;
+ for (p = line + 5; isdigit(*p); p++);
+ for (; isspace(*p); p++);
+ if (*p != ':')
+ return 0;
+
+ p = strstr(line, "scope=") + 6;
+ /* Skip private caches, also present in sysfs */
+ if (!p || strncmp(p, "Private", 7) == 0)
+ return 0;
+ p = strstr(line, "level=");
+ if (!p || sscanf(p, "level=%d", &level) != 1)
+ return 0;
+ p = strstr(line, "type=") + 5;
+ if (!p || !*p)
+ return 0;
+ type = 0;
+ if (strncmp(p, "Data", 4) == 0)
+ type = 'd';
+ if (strncmp(p, "Instruction", 11) == 0)
+ type = 'i';
+ p = strstr(line, "size=");
+ if (!p || sscanf(p, "size=%lld", &size) != 1)
+ return 0;
+
+ desc->necaches++;
+ desc->ecaches = xrealloc(desc->ecaches,
+ desc->necaches * sizeof(struct cpu_cache));
+ cache = &desc->ecaches[desc->necaches - 1];
+ memset(cache, 0 , sizeof(*cache));
+ if (type)
+ xasprintf(&cache->name, "L%d%c", level, type);
+ else
+ xasprintf(&cache->name, "L%d", level);
+ xasprintf(&cache->size, "%lldK", size);
+ return 1;
+}
+
+/* Don't init the mode for platforms where we are not able to
+ * detect that CPU supports 64-bit mode.
+ */
+static int
+init_mode(struct lscpu_modifier *mod)
+{
+ int m = 0;
+
+ if (mod->system == SYSTEM_SNAPSHOT)
+ /* reading info from any /{sys,proc} dump, don't mix it with
+ * information about our real CPU */
+ return 0;
+
+#if defined(__alpha__) || defined(__ia64__)
+ m |= MODE_64BIT; /* 64bit platforms only */
+#endif
+ /* platforms with 64bit flag in /proc/cpuinfo, define
+ * 32bit default here */
+#if defined(__i386__) || defined(__x86_64__) || \
+ defined(__s390x__) || defined(__s390__) || defined(__sparc_v9__)
+ m |= MODE_32BIT;
+#endif
+ return m;
+}
+
+#if defined(HAVE_LIBRTAS)
+#define PROCESSOR_MODULE_INFO 43
+static int strbe16toh(const char *buf, int offset)
+{
+ return (buf[offset] << 8) + buf[offset+1];
+}
+
+static void read_physical_info_powerpc(struct lscpu_desc *desc)
+{
+ char buf[BUFSIZ];
+ int rc, len, ntypes;
+
+ desc->physsockets = desc->physchips = desc->physcoresperchip = 0;
+
+ rc = rtas_get_sysparm(PROCESSOR_MODULE_INFO, sizeof(buf), buf);
+ if (rc < 0)
+ return;
+
+ len = strbe16toh(buf, 0);
+ if (len < 8)
+ return;
+
+ ntypes = strbe16toh(buf, 2);
+
+ assert(ntypes <= 1);
+ if (!ntypes)
+ return;
+
+ desc->physsockets = strbe16toh(buf, 4);
+ desc->physchips = strbe16toh(buf, 6);
+ desc->physcoresperchip = strbe16toh(buf, 8);
+}
+#else
+static void read_physical_info_powerpc(
+ struct lscpu_desc *desc __attribute__((__unused__)))
+{
+}
+#endif
+
+
+static void
+read_basicinfo(struct lscpu_desc *desc, struct lscpu_modifier *mod)
+{
+ FILE *fp;
+ char buf[BUFSIZ];
+ struct utsname utsbuf;
+ size_t setsize;
+ cpu_set_t *cpuset = NULL;
+
+ /* architecture */
+ if (uname(&utsbuf) == -1)
+ err(EXIT_FAILURE, _("error: uname failed"));
+
+ fp = ul_path_fopen(desc->procfs, "r", "cpuinfo");
+ if (!fp)
+ err(EXIT_FAILURE, _("cannot open %s"), "/proc/cpuinfo");
+ desc->arch = xstrdup(utsbuf.machine);
+
+ /* details */
+ while (fgets(buf, sizeof(buf), fp) != NULL) {
+ if (lookup(buf, "vendor", &desc->vendor)) ;
+ else if (lookup(buf, "vendor_id", &desc->vendor)) ;
+ else if (lookup(buf, "CPU implementer", &desc->vendor)) ; /* ARM and aarch64 */
+ else if (lookup(buf, "family", &desc->family)) ;
+ else if (lookup(buf, "cpu family", &desc->family)) ;
+ else if (lookup(buf, "model", &desc->model)) ;
+ else if (lookup(buf, "CPU part", &desc->model)) ; /* ARM and aarch64 */
+ else if (lookup(buf, "model name", &desc->modelname)) ;
+ else if (lookup(buf, "stepping", &desc->stepping)) ;
+ else if (lookup(buf, "CPU variant", &desc->stepping)) ; /* aarch64 */
+ else if (lookup(buf, "cpu MHz", &desc->mhz)) ;
+ else if (lookup(buf, "cpu MHz dynamic", &desc->dynamic_mhz)) ; /* s390 */
+ else if (lookup(buf, "cpu MHz static", &desc->static_mhz)) ; /* s390 */
+ else if (lookup(buf, "flags", &desc->flags)) ; /* x86 */
+ else if (lookup(buf, "features", &desc->flags)) ; /* s390 */
+ else if (lookup(buf, "Features", &desc->flags)) ; /* aarch64 */
+ else if (lookup(buf, "type", &desc->flags)) ; /* sparc64 */
+ else if (lookup(buf, "bogomips", &desc->bogomips)) ;
+ else if (lookup(buf, "BogoMIPS", &desc->bogomips)) ; /* aarch64 */
+ else if (lookup(buf, "bogomips per cpu", &desc->bogomips)) ; /* s390 */
+ else if (lookup(buf, "cpu", &desc->cpu)) ;
+ else if (lookup(buf, "revision", &desc->revision)) ;
+ else if (lookup(buf, "CPU revision", &desc->revision)) ; /* aarch64 */
+ else if (lookup(buf, "max thread id", &desc->mtid)) ; /* s390 */
+ else if (lookup(buf, "address sizes", &desc->addrsz)) ; /* x86 */
+ else if (lookup_cache(buf, desc)) ;
+ else
+ continue;
+ }
+
+ desc->mode = init_mode(mod);
+
+ if (desc->flags) {
+ snprintf(buf, sizeof(buf), " %s ", desc->flags);
+ if (strstr(buf, " svm "))
+ desc->virtflag = xstrdup("svm");
+ else if (strstr(buf, " vmx "))
+ desc->virtflag = xstrdup("vmx");
+ if (strstr(buf, " lm "))
+ desc->mode |= MODE_32BIT | MODE_64BIT; /* x86_64 */
+ if (strstr(buf, " zarch "))
+ desc->mode |= MODE_32BIT | MODE_64BIT; /* s390x */
+ if (strstr(buf, " sun4v ") || strstr(buf, " sun4u "))
+ desc->mode |= MODE_32BIT | MODE_64BIT; /* sparc64 */
+ }
+
+ if (desc->arch && mod->system != SYSTEM_SNAPSHOT) {
+ if (strcmp(desc->arch, "ppc64") == 0)
+ desc->mode |= MODE_32BIT | MODE_64BIT;
+ else if (strcmp(desc->arch, "ppc") == 0)
+ desc->mode |= MODE_32BIT;
+ }
+
+ fclose(fp);
+
+ if (ul_path_read_s32(desc->syscpu, &maxcpus, "kernel_max") == 0)
+ /* note that kernel_max is maximum index [NR_CPUS-1] */
+ maxcpus += 1;
+
+ else if (mod->system == SYSTEM_LIVE)
+ /* the root is '/' so we are working with data from the current kernel */
+ maxcpus = get_max_number_of_cpus();
+
+ if (maxcpus <= 0)
+ /* error or we are reading some /sys snapshot instead of the
+ * real /sys, let's use any crazy number... */
+ maxcpus = 2048;
+
+ setsize = CPU_ALLOC_SIZE(maxcpus);
+
+ if (ul_path_readf_cpulist(desc->syscpu, &cpuset, maxcpus, "possible") == 0) {
+ int num, idx;
+
+ desc->ncpuspos = CPU_COUNT_S(setsize, cpuset);
+ desc->idx2cpunum = xcalloc(desc->ncpuspos, sizeof(int));
+
+ for (num = 0, idx = 0; num < maxcpus; num++) {
+ if (CPU_ISSET_S(num, setsize, cpuset))
+ desc->idx2cpunum[idx++] = num;
+ }
+ cpuset_free(cpuset);
+ cpuset = NULL;
+ } else
+ err(EXIT_FAILURE, _("failed to determine number of CPUs: %s"),
+ _PATH_SYS_CPU "/possible");
+
+
+ /* get mask for present CPUs */
+ if (ul_path_readf_cpulist(desc->syscpu, &desc->present, maxcpus, "present") == 0)
+ desc->ncpus = CPU_COUNT_S(setsize, desc->present);
+
+ /* get mask for online CPUs */
+ if (ul_path_readf_cpulist(desc->syscpu, &desc->online, maxcpus, "online") == 0)
+ desc->nthreads = CPU_COUNT_S(setsize, desc->online);
+
+ /* get dispatching mode */
+ if (ul_path_read_s32(desc->syscpu, &desc->dispatching, "dispatching") != 0)
+ desc->dispatching = -1;
+
+ if (mod->system == SYSTEM_LIVE)
+ read_physical_info_powerpc(desc);
+
+ if ((fp = ul_path_fopen(desc->procfs, "r", "sysinfo"))) {
+ while (fgets(buf, sizeof(buf), fp) != NULL && !desc->machinetype)
+ lookup(buf, "Type", &desc->machinetype);
+ fclose(fp);
+ }
+}
+
+static int
+has_pci_device(struct lscpu_desc *desc, unsigned int vendor, unsigned int device)
+{
+ FILE *f;
+ unsigned int num, fn, ven, dev;
+ int res = 1;
+
+ f = ul_path_fopen(desc->procfs, "r", "bus/pci/devices");
+ if (!f)
+ return 0;
+
+ /* for more details about bus/pci/devices format see
+ * drivers/pci/proc.c in linux kernel
+ */
+ while(fscanf(f, "%02x%02x\t%04x%04x\t%*[^\n]",
+ &num, &fn, &ven, &dev) == 4) {
+
+ if (ven == vendor && dev == device)
+ goto found;
+ }
+
+ res = 0;
+found:
+ fclose(f);
+ return res;
+}
+
+#if defined(__x86_64__) || defined(__i386__)
+
+/*
+ * This CPUID leaf returns the information about the hypervisor.
+ * EAX : maximum input value for CPUID supported by the hypervisor.
+ * EBX, ECX, EDX : Hypervisor vendor ID signature. E.g. VMwareVMware.
+ */
+#define HYPERVISOR_INFO_LEAF 0x40000000
+
+static inline void
+cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ __asm__(
+#if defined(__PIC__) && defined(__i386__)
+ /* x86 PIC cannot clobber ebx -- gcc bitches */
+ "xchg %%ebx, %%esi;"
+ "cpuid;"
+ "xchg %%esi, %%ebx;"
+ : "=S" (*ebx),
+#else
+ "cpuid;"
+ : "=b" (*ebx),
+#endif
+ "=a" (*eax),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "1" (op), "c"(0));
+}
+
+static void
+read_hypervisor_cpuid(struct lscpu_desc *desc)
+{
+ unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
+ char hyper_vendor_id[13];
+
+ memset(hyper_vendor_id, 0, sizeof(hyper_vendor_id));
+
+ cpuid(HYPERVISOR_INFO_LEAF, &eax, &ebx, &ecx, &edx);
+ memcpy(hyper_vendor_id + 0, &ebx, 4);
+ memcpy(hyper_vendor_id + 4, &ecx, 4);
+ memcpy(hyper_vendor_id + 8, &edx, 4);
+ hyper_vendor_id[12] = '\0';
+
+ if (!hyper_vendor_id[0])
+ return;
+
+ if (!strncmp("XenVMMXenVMM", hyper_vendor_id, 12))
+ desc->hyper = HYPER_XEN;
+ else if (!strncmp("KVMKVMKVM", hyper_vendor_id, 9))
+ desc->hyper = HYPER_KVM;
+ else if (!strncmp("Microsoft Hv", hyper_vendor_id, 12))
+ desc->hyper = HYPER_MSHV;
+ else if (!strncmp("VMwareVMware", hyper_vendor_id, 12))
+ desc->hyper = HYPER_VMWARE;
+ else if (!strncmp("UnisysSpar64", hyper_vendor_id, 12))
+ desc->hyper = HYPER_SPAR;
+}
+
+#else /* ! (__x86_64__ || __i386__) */
+static void
+read_hypervisor_cpuid(struct lscpu_desc *desc __attribute__((__unused__)))
+{
+}
+#endif
+
+static int is_devtree_compatible(struct lscpu_desc *desc, const char *str)
+{
+ FILE *fd = ul_path_fopen(desc->procfs, "r", "device-tree/compatible");
+
+ if (fd) {
+ char buf[256];
+ size_t i, len;
+
+ memset(buf, 0, sizeof(buf));
+ len = fread(buf, 1, sizeof(buf) - 1, fd);
+ fclose(fd);
+
+ for (i = 0; i < len;) {
+ if (!strcmp(&buf[i], str))
+ return 1;
+ i += strlen(&buf[i]);
+ i++;
+ }
+ }
+
+ return 0;
+}
+
+static int
+read_hypervisor_powerpc(struct lscpu_desc *desc)
+{
+ assert(!desc->hyper);
+
+ /* IBM iSeries: legacy, para-virtualized on top of OS/400 */
+ if (ul_path_access(desc->procfs, F_OK, "iSeries") == 0) {
+ desc->hyper = HYPER_OS400;
+ desc->virtype = VIRT_PARA;
+
+ /* PowerNV (POWER Non-Virtualized, bare-metal) */
+ } else if (is_devtree_compatible(desc, "ibm,powernv")) {
+ desc->hyper = HYPER_NONE;
+ desc->virtype = VIRT_NONE;
+
+ /* PowerVM (IBM's proprietary hypervisor, aka pHyp) */
+ } else if (ul_path_access(desc->procfs, F_OK, "device-tree/ibm,partition-name") == 0
+ && ul_path_access(desc->procfs, F_OK, "device-tree/hmc-managed?") == 0
+ && ul_path_access(desc->procfs, F_OK, "device-tree/chosen/qemu,graphic-width") != 0) {
+
+ FILE *fd;
+ desc->hyper = HYPER_PHYP;
+ desc->virtype = VIRT_PARA;
+
+ fd = ul_path_fopen(desc->procfs, "r", "device-tree/ibm,partition-name");
+ if (fd) {
+ char buf[256];
+ if (fscanf(fd, "%255s", buf) == 1 && !strcmp(buf, "full"))
+ desc->virtype = VIRT_NONE;
+ fclose(fd);
+ }
+
+ /* Qemu */
+ } else if (is_devtree_compatible(desc, "qemu,pseries")) {
+ desc->hyper = HYPER_KVM;
+ desc->virtype = VIRT_PARA;
+ }
+ return desc->hyper;
+}
+
+#ifdef INCLUDE_VMWARE_BDOOR
+
+#define VMWARE_BDOOR_MAGIC 0x564D5868
+#define VMWARE_BDOOR_PORT 0x5658
+#define VMWARE_BDOOR_CMD_GETVERSION 10
+
+static UL_ASAN_BLACKLIST
+void vmware_bdoor(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
+{
+ __asm__(
+#if defined(__PIC__) && defined(__i386__)
+ /* x86 PIC cannot clobber ebx -- gcc bitches */
+ "xchg %%ebx, %%esi;"
+ "inl (%%dx), %%eax;"
+ "xchg %%esi, %%ebx;"
+ : "=S" (*ebx),
+#else
+ "inl (%%dx), %%eax;"
+ : "=b" (*ebx),
+#endif
+ "=a" (*eax),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (VMWARE_BDOOR_MAGIC),
+ "1" (VMWARE_BDOOR_CMD_GETVERSION),
+ "2" (VMWARE_BDOOR_PORT),
+ "3" (0)
+ : "memory");
+}
+
+static jmp_buf segv_handler_env;
+
+static void
+segv_handler(__attribute__((__unused__)) int sig,
+ __attribute__((__unused__)) siginfo_t *info,
+ __attribute__((__unused__)) void *ignored)
+{
+ siglongjmp(segv_handler_env, 1);
+}
+
+static int
+is_vmware_platform(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+ struct sigaction act, oact;
+
+ /*
+ * FIXME: Not reliable for non-root users. Note it works as expected if
+ * vmware_bdoor() is not optimized for PIE, but then it fails to build
+ * on 32bit x86 systems. See lscpu git log for more details (commit
+ * 7845b91dbc7690064a2be6df690e4aaba728fb04). kzak [3-Nov-2016]
+ */
+ if (getuid() != 0)
+ return 0;
+
+ /*
+ * The assembly routine for vmware detection works
+ * fine under vmware, even if ran as regular user. But
+ * on real HW or under other hypervisors, it segfaults (which is
+ * expected). So we temporarily install SIGSEGV handler to catch
+ * the signal. All this magic is needed because lscpu
+ * isn't supposed to require root privileges.
+ */
+ if (sigsetjmp(segv_handler_env, 1))
+ return 0;
+
+ memset(&act, 0, sizeof(act));
+ act.sa_sigaction = segv_handler;
+ act.sa_flags = SA_SIGINFO;
+
+ if (sigaction(SIGSEGV, &act, &oact))
+ err(EXIT_FAILURE, _("cannot set signal handler"));
+
+ vmware_bdoor(&eax, &ebx, &ecx, &edx);
+
+ if (sigaction(SIGSEGV, &oact, NULL))
+ err(EXIT_FAILURE, _("cannot restore signal handler"));
+
+ return eax != (uint32_t)-1 && ebx == VMWARE_BDOOR_MAGIC;
+}
+
+#else /* ! INCLUDE_VMWARE_BDOOR */
+
+static int
+is_vmware_platform(void)
+{
+ return 0;
+}
+
+#endif /* INCLUDE_VMWARE_BDOOR */
+
+static void
+read_hypervisor(struct lscpu_desc *desc, struct lscpu_modifier *mod)
+{
+ FILE *fd;
+
+ /* We have to detect WSL first. is_vmware_platform() crashes on Windows 10. */
+
+ if ((fd = ul_path_fopen(desc->procfs, "r", "sys/kernel/osrelease"))) {
+ char buf[256];
+
+ if (fgets(buf, sizeof(buf), fd) != NULL) {
+ if (strstr(buf, "Microsoft")) {
+ desc->hyper = HYPER_WSL;
+ desc->virtype = VIRT_CONT;
+ }
+ }
+ fclose(fd);
+ if (desc->virtype)
+ return;
+ }
+
+ if (mod->system != SYSTEM_SNAPSHOT) {
+ read_hypervisor_cpuid(desc);
+ if (!desc->hyper)
+ desc->hyper = read_hypervisor_dmi();
+ if (!desc->hyper && is_vmware_platform())
+ desc->hyper = HYPER_VMWARE;
+ }
+
+ if (desc->hyper) {
+ desc->virtype = VIRT_FULL;
+
+ if (desc->hyper == HYPER_XEN) {
+ uint32_t features;
+
+ fd = ul_prefix_fopen(desc->prefix, "r", _PATH_SYS_HYP_FEATURES);
+
+ if (fd && fscanf(fd, "%x", &features) == 1) {
+ /* Xen PV domain */
+ if (features & XEN_FEATURES_PV_MASK)
+ desc->virtype = VIRT_PARA;
+ /* Xen PVH domain */
+ else if ((features & XEN_FEATURES_PVH_MASK)
+ == XEN_FEATURES_PVH_MASK)
+ desc->virtype = VIRT_PARA;
+ }
+ if (fd)
+ fclose(fd);
+ }
+ } else if (read_hypervisor_powerpc(desc) > 0) {}
+
+ /* Xen para-virt or dom0 */
+ else if (ul_path_access(desc->procfs, F_OK, "xen") == 0) {
+ int dom0 = 0;
+
+ fd = ul_path_fopen(desc->procfs, "r", "xen/capabilities");
+ if (fd) {
+ char buf[256];
+
+ if (fscanf(fd, "%255s", buf) == 1 &&
+ !strcmp(buf, "control_d"))
+ dom0 = 1;
+ fclose(fd);
+ }
+ desc->virtype = dom0 ? VIRT_NONE : VIRT_PARA;
+ desc->hyper = HYPER_XEN;
+
+ /* Xen full-virt on non-x86_64 */
+ } else if (has_pci_device(desc, hv_vendor_pci[HYPER_XEN], hv_graphics_pci[HYPER_XEN])) {
+ desc->hyper = HYPER_XEN;
+ desc->virtype = VIRT_FULL;
+ } else if (has_pci_device(desc, hv_vendor_pci[HYPER_VMWARE], hv_graphics_pci[HYPER_VMWARE])) {
+ desc->hyper = HYPER_VMWARE;
+ desc->virtype = VIRT_FULL;
+ } else if (has_pci_device(desc, hv_vendor_pci[HYPER_VBOX], hv_graphics_pci[HYPER_VBOX])) {
+ desc->hyper = HYPER_VBOX;
+ desc->virtype = VIRT_FULL;
+
+ /* IBM PR/SM */
+ } else if ((fd = ul_path_fopen(desc->procfs, "r", "sysinfo"))) {
+ char buf[BUFSIZ];
+
+ desc->hyper = HYPER_IBM;
+ desc->hypervisor = "PR/SM";
+ desc->virtype = VIRT_FULL;
+ while (fgets(buf, sizeof(buf), fd) != NULL) {
+ char *str, *p;
+
+ if (!strstr(buf, "Control Program:"))
+ continue;
+ if (!strstr(buf, "KVM"))
+ desc->hyper = HYPER_IBM;
+ else
+ desc->hyper = HYPER_KVM;
+ p = strchr(buf, ':');
+ if (!p)
+ continue;
+ xasprintf(&str, "%s", p + 1);
+
+ /* remove leading, trailing and repeating whitespace */
+ while (*str == ' ')
+ str++;
+ desc->hypervisor = str;
+ str += strlen(str) - 1;
+ while ((*str == '\n') || (*str == ' '))
+ *(str--) = '\0';
+ while ((str = strstr(desc->hypervisor, " ")))
+ memmove(str, str + 1, strlen(str));
+ break;
+ }
+ fclose(fd);
+ }
+
+ /* OpenVZ/Virtuozzo - /proc/vz dir should exist
+ * /proc/bc should not */
+ else if (ul_path_access(desc->procfs, F_OK, "vz") == 0 &&
+ ul_path_access(desc->procfs, F_OK, "bc") != 0) {
+ desc->hyper = HYPER_PARALLELS;
+ desc->virtype = VIRT_CONT;
+
+ /* IBM */
+ } else if (desc->vendor &&
+ (strcmp(desc->vendor, "PowerVM Lx86") == 0 ||
+ strcmp(desc->vendor, "IBM/S390") == 0)) {
+ desc->hyper = HYPER_IBM;
+ desc->virtype = VIRT_FULL;
+
+ /* User-mode-linux */
+ } else if (desc->modelname && strstr(desc->modelname, "UML")) {
+ desc->hyper = HYPER_UML;
+ desc->virtype = VIRT_PARA;
+
+ /* Linux-VServer */
+ } else if ((fd = ul_path_fopen(desc->procfs, "r", "self/status"))) {
+ char buf[BUFSIZ];
+ char *val = NULL;
+
+ while (fgets(buf, sizeof(buf), fd) != NULL) {
+ if (lookup(buf, "VxID", &val))
+ break;
+ }
+ fclose(fd);
+
+ if (val) {
+ char *org = val;
+
+ while (isdigit(*val))
+ ++val;
+ if (!*val) {
+ desc->hyper = HYPER_VSERVER;
+ desc->virtype = VIRT_CONT;
+ }
+ free(org);
+ }
+ }
+}
+
+/* add @set to the @ary, unnecessary set is deallocated. */
+static int add_cpuset_to_array(cpu_set_t **ary, int *items, cpu_set_t *set)
+{
+ int i;
+ size_t setsize = CPU_ALLOC_SIZE(maxcpus);
+
+ if (!ary)
+ return -1;
+
+ for (i = 0; i < *items; i++) {
+ if (CPU_EQUAL_S(setsize, set, ary[i]))
+ break;
+ }
+ if (i == *items) {
+ ary[*items] = set;
+ ++*items;
+ return 0;
+ }
+ CPU_FREE(set);
+ return 1;
+}
+
+static void
+read_topology(struct lscpu_desc *desc, int idx)
+{
+ cpu_set_t *thread_siblings, *core_siblings;
+ cpu_set_t *book_siblings, *drawer_siblings;
+ int coreid, socketid, bookid, drawerid;
+ int i, num = real_cpu_num(desc, idx);
+
+ if (ul_path_accessf(desc->syscpu, F_OK, "cpu%d/topology/thread_siblings", num) != 0)
+ return;
+
+ ul_path_readf_cpuset(desc->syscpu, &thread_siblings, maxcpus,
+ "cpu%d/topology/thread_siblings", num);
+ ul_path_readf_cpuset(desc->syscpu, &core_siblings, maxcpus,
+ "cpu%d/topology/core_siblings", num);
+ ul_path_readf_cpuset(desc->syscpu, &book_siblings, maxcpus,
+ "cpu%d/topology/book_siblings", num);
+ ul_path_readf_cpuset(desc->syscpu, &drawer_siblings, maxcpus,
+ "cpu%d/topology/drawer_siblings", num);
+
+ if (ul_path_readf_s32(desc->syscpu, &coreid, "cpu%d/topology/core_id", num) != 0)
+ coreid = -1;
+
+ if (ul_path_readf_s32(desc->syscpu, &socketid, "cpu%d/topology/physical_package_id", num) != 0)
+ socketid = -1;
+
+ if (ul_path_readf_s32(desc->syscpu, &bookid, "cpu%d/topology/book_id", num) != 0)
+ bookid = -1;
+
+ if (ul_path_readf_s32(desc->syscpu, &drawerid, "cpu%d/topology/drawer_id", num) != 0)
+ drawerid = -1;
+
+ if (!desc->coremaps) {
+ int ndrawers, nbooks, nsockets, ncores, nthreads;
+ size_t setsize = CPU_ALLOC_SIZE(maxcpus);
+
+ /* threads within one core */
+ nthreads = CPU_COUNT_S(setsize, thread_siblings);
+ if (!nthreads)
+ nthreads = 1;
+
+ /* cores within one socket */
+ ncores = CPU_COUNT_S(setsize, core_siblings) / nthreads;
+ if (!ncores)
+ ncores = 1;
+
+ /* number of sockets within one book. Because of odd /
+ * non-present cpu maps and to keep calculation easy we make
+ * sure that nsockets and nbooks is at least 1.
+ */
+ nsockets = desc->ncpus / nthreads / ncores;
+ if (!nsockets)
+ nsockets = 1;
+
+ /* number of books */
+ nbooks = desc->ncpus / nthreads / ncores / nsockets;
+ if (!nbooks)
+ nbooks = 1;
+
+ /* number of drawers */
+ ndrawers = desc->ncpus / nbooks / nthreads / ncores / nsockets;
+ if (!ndrawers)
+ ndrawers = 1;
+
+ /* all threads, see also read_basicinfo()
+ * -- fallback for kernels without
+ * /sys/devices/system/cpu/online.
+ */
+ if (!desc->nthreads)
+ desc->nthreads = ndrawers * nbooks * nsockets * ncores * nthreads;
+
+ /* For each map we make sure that it can have up to ncpuspos
+ * entries. This is because we cannot reliably calculate the
+ * number of cores, sockets and books on all architectures.
+ * E.g. completely virtualized architectures like s390 may
+ * have multiple sockets of different sizes.
+ */
+ desc->coremaps = xcalloc(desc->ncpuspos, sizeof(cpu_set_t *));
+ desc->socketmaps = xcalloc(desc->ncpuspos, sizeof(cpu_set_t *));
+ desc->coreids = xcalloc(desc->ncpuspos, sizeof(*desc->drawerids));
+ desc->socketids = xcalloc(desc->ncpuspos, sizeof(*desc->drawerids));
+ for (i = 0; i < desc->ncpuspos; i++)
+ desc->coreids[i] = desc->socketids[i] = -1;
+ if (book_siblings) {
+ desc->bookmaps = xcalloc(desc->ncpuspos, sizeof(cpu_set_t *));
+ desc->bookids = xcalloc(desc->ncpuspos, sizeof(*desc->drawerids));
+ for (i = 0; i < desc->ncpuspos; i++)
+ desc->bookids[i] = -1;
+ }
+ if (drawer_siblings) {
+ desc->drawermaps = xcalloc(desc->ncpuspos, sizeof(cpu_set_t *));
+ desc->drawerids = xcalloc(desc->ncpuspos, sizeof(*desc->drawerids));
+ for (i = 0; i < desc->ncpuspos; i++)
+ desc->drawerids[i] = -1;
+ }
+ }
+
+ add_cpuset_to_array(desc->socketmaps, &desc->nsockets, core_siblings);
+ desc->coreids[idx] = coreid;
+ add_cpuset_to_array(desc->coremaps, &desc->ncores, thread_siblings);
+ desc->socketids[idx] = socketid;
+ if (book_siblings) {
+ add_cpuset_to_array(desc->bookmaps, &desc->nbooks, book_siblings);
+ desc->bookids[idx] = bookid;
+ }
+ if (drawer_siblings) {
+ add_cpuset_to_array(desc->drawermaps, &desc->ndrawers, drawer_siblings);
+ desc->drawerids[idx] = drawerid;
+ }
+}
+
+static void
+read_polarization(struct lscpu_desc *desc, int idx)
+{
+ char mode[64];
+ int num = real_cpu_num(desc, idx);
+
+ if (desc->dispatching < 0)
+ return;
+ if (ul_path_accessf(desc->syscpu, F_OK, "cpu%d/polarization", num) != 0)
+ return;
+ if (!desc->polarization)
+ desc->polarization = xcalloc(desc->ncpuspos, sizeof(int));
+
+ ul_path_readf_buffer(desc->syscpu, mode, sizeof(mode), "cpu%d/polarization", num);
+
+ if (strncmp(mode, "vertical:low", sizeof(mode)) == 0)
+ desc->polarization[idx] = POLAR_VLOW;
+ else if (strncmp(mode, "vertical:medium", sizeof(mode)) == 0)
+ desc->polarization[idx] = POLAR_VMEDIUM;
+ else if (strncmp(mode, "vertical:high", sizeof(mode)) == 0)
+ desc->polarization[idx] = POLAR_VHIGH;
+ else if (strncmp(mode, "horizontal", sizeof(mode)) == 0)
+ desc->polarization[idx] = POLAR_HORIZONTAL;
+ else
+ desc->polarization[idx] = POLAR_UNKNOWN;
+}
+
+static void
+read_address(struct lscpu_desc *desc, int idx)
+{
+ int num = real_cpu_num(desc, idx);
+
+ if (ul_path_accessf(desc->syscpu, F_OK, "cpu%d/address", num) != 0)
+ return;
+ if (!desc->addresses)
+ desc->addresses = xcalloc(desc->ncpuspos, sizeof(int));
+ ul_path_readf_s32(desc->syscpu, &desc->addresses[idx], "cpu%d/address", num);
+}
+
+static void
+read_configured(struct lscpu_desc *desc, int idx)
+{
+ int num = real_cpu_num(desc, idx);
+
+ if (ul_path_accessf(desc->syscpu, F_OK, "cpu%d/configure", num) != 0)
+ return;
+ if (!desc->configured)
+ desc->configured = xcalloc(desc->ncpuspos, sizeof(int));
+ ul_path_readf_s32(desc->syscpu, &desc->configured[idx], "cpu%d/configure", num);
+}
+
+/* Read overall maximum frequency of cpu */
+static char *
+cpu_max_mhz(struct lscpu_desc *desc, char *buf, size_t bufsz)
+{
+ int i;
+ float cpu_freq = 0.0;
+ size_t setsize = CPU_ALLOC_SIZE(maxcpus);
+
+ if (desc->present) {
+ for (i = 0; i < desc->ncpuspos; i++) {
+ if (CPU_ISSET_S(real_cpu_num(desc, i), setsize, desc->present)
+ && desc->maxmhz[i]) {
+ float freq = atof(desc->maxmhz[i]);
+
+ if (freq > cpu_freq)
+ cpu_freq = freq;
+ }
+ }
+ }
+ snprintf(buf, bufsz, "%.4f", cpu_freq);
+ return buf;
+}
+
+/* Read overall minimum frequency of cpu */
+static char *
+cpu_min_mhz(struct lscpu_desc *desc, char *buf, size_t bufsz)
+{
+ int i;
+ float cpu_freq = -1.0;
+ size_t setsize = CPU_ALLOC_SIZE(maxcpus);
+
+ if (desc->present) {
+ for (i = 0; i < desc->ncpuspos; i++) {
+ if (CPU_ISSET_S(real_cpu_num(desc, i), setsize, desc->present)
+ && desc->minmhz[i]) {
+ float freq = atof(desc->minmhz[i]);
+
+ if (cpu_freq < 0.0 || freq < cpu_freq)
+ cpu_freq = freq;
+ }
+ }
+ }
+ snprintf(buf, bufsz, "%.4f", cpu_freq);
+ return buf;
+}
+
+
+static void
+read_max_mhz(struct lscpu_desc *desc, int idx)
+{
+ int num = real_cpu_num(desc, idx);
+ int mhz;
+
+ if (ul_path_readf_s32(desc->syscpu, &mhz, "cpu%d/cpufreq/cpuinfo_max_freq", num) != 0)
+ return;
+ if (!desc->maxmhz)
+ desc->maxmhz = xcalloc(desc->ncpuspos, sizeof(char *));
+ xasprintf(&desc->maxmhz[idx], "%.4f", (float) mhz / 1000);
+}
+
+static void
+read_min_mhz(struct lscpu_desc *desc, int idx)
+{
+ int num = real_cpu_num(desc, idx);
+ int mhz;
+
+ if (ul_path_readf_s32(desc->syscpu, &mhz, "cpu%d/cpufreq/cpuinfo_min_freq", num) != 0)
+ return;
+ if (!desc->minmhz)
+ desc->minmhz = xcalloc(desc->ncpuspos, sizeof(char *));
+ xasprintf(&desc->minmhz[idx], "%.4f", (float) mhz / 1000);
+}
+
+static int
+cachecmp(const void *a, const void *b)
+{
+ struct cpu_cache *c1 = (struct cpu_cache *) a;
+ struct cpu_cache *c2 = (struct cpu_cache *) b;
+
+ return strcmp(c2->name, c1->name);
+}
+
+static void
+read_cache(struct lscpu_desc *desc, int idx)
+{
+ char buf[256];
+ int i;
+ int num = real_cpu_num(desc, idx);
+
+ if (!desc->ncaches) {
+ while (ul_path_accessf(desc->syscpu, F_OK,
+ "cpu%d/cache/index%d",
+ num, desc->ncaches) == 0)
+ desc->ncaches++;
+
+ if (!desc->ncaches)
+ return;
+ desc->caches = xcalloc(desc->ncaches, sizeof(*desc->caches));
+ }
+ for (i = 0; i < desc->ncaches; i++) {
+ struct cpu_cache *ca = &desc->caches[i];
+ cpu_set_t *map;
+
+ if (ul_path_accessf(desc->syscpu, F_OK,
+ "cpu%d/cache/index%d", num, i) != 0)
+ continue;
+ if (!ca->name) {
+ int type = 0, level;
+
+ /* cache type */
+ if (ul_path_readf_buffer(desc->syscpu, buf, sizeof(buf),
+ "cpu%d/cache/index%d/type", num, i) > 0) {
+ if (!strcmp(buf, "Data"))
+ type = 'd';
+ else if (!strcmp(buf, "Instruction"))
+ type = 'i';
+ }
+
+ /* cache level */
+ ul_path_readf_s32(desc->syscpu, &level,
+ "cpu%d/cache/index%d/level", num, i);
+ if (type)
+ snprintf(buf, sizeof(buf), "L%d%c", level, type);
+ else
+ snprintf(buf, sizeof(buf), "L%d", level);
+
+ ca->name = xstrdup(buf);
+
+ /* cache size */
+ if (ul_path_readf_string(desc->syscpu, &ca->size,
+ "cpu%d/cache/index%d/size", num, i) < 0)
+ ca->size = xstrdup("unknown size");
+ }
+
+ /* information about how CPUs share different caches */
+ ul_path_readf_cpuset(desc->syscpu, &map, maxcpus,
+ "cpu%d/cache/index%d/shared_cpu_map", num, i);
+
+ if (!ca->sharedmaps)
+ ca->sharedmaps = xcalloc(desc->ncpuspos, sizeof(cpu_set_t *));
+ add_cpuset_to_array(ca->sharedmaps, &ca->nsharedmaps, map);
+ }
+}
+
+static inline int is_node_dirent(struct dirent *d)
+{
+ return
+ d &&
+#ifdef _DIRENT_HAVE_D_TYPE
+ (d->d_type == DT_DIR || d->d_type == DT_UNKNOWN) &&
+#endif
+ strncmp(d->d_name, "node", 4) == 0 &&
+ isdigit_string(d->d_name + 4);
+}
+
+static int
+nodecmp(const void *ap, const void *bp)
+{
+ int *a = (int *) ap, *b = (int *) bp;
+ return *a - *b;
+}
+
+static void
+read_nodes(struct lscpu_desc *desc)
+{
+ int i = 0;
+ DIR *dir;
+ struct dirent *d;
+ struct path_cxt *sysnode;
+
+ desc->nnodes = 0;
+
+ sysnode = ul_new_path(_PATH_SYS_NODE);
+ if (!sysnode)
+ err(EXIT_FAILURE, _("failed to initialize %s handler"), _PATH_SYS_NODE);
+ ul_path_set_prefix(sysnode, desc->prefix);
+
+ dir = ul_path_opendir(sysnode, NULL);
+ if (!dir)
+ goto done;
+
+ while ((d = readdir(dir))) {
+ if (is_node_dirent(d))
+ desc->nnodes++;
+ }
+
+ if (!desc->nnodes) {
+ closedir(dir);
+ goto done;
+ }
+
+ desc->nodemaps = xcalloc(desc->nnodes, sizeof(cpu_set_t *));
+ desc->idx2nodenum = xmalloc(desc->nnodes * sizeof(int));
+
+ rewinddir(dir);
+ while ((d = readdir(dir)) && i < desc->nnodes) {
+ if (is_node_dirent(d))
+ desc->idx2nodenum[i++] = strtol_or_err(((d->d_name) + 4),
+ _("Failed to extract the node number"));
+ }
+ closedir(dir);
+ qsort(desc->idx2nodenum, desc->nnodes, sizeof(int), nodecmp);
+
+ /* information about how nodes share different CPUs */
+ for (i = 0; i < desc->nnodes; i++)
+ ul_path_readf_cpuset(sysnode, &desc->nodemaps[i], maxcpus,
+ "node%d/cpumap", desc->idx2nodenum[i]);
+done:
+ ul_unref_path(sysnode);
+}
+
+static char *
+get_cell_data(struct lscpu_desc *desc, int idx, int col,
+ struct lscpu_modifier *mod,
+ char *buf, size_t bufsz)
+{
+ size_t setsize = CPU_ALLOC_SIZE(maxcpus);
+ size_t i;
+ int cpu = real_cpu_num(desc, idx);
+
+ *buf = '\0';
+
+ switch (col) {
+ case COL_CPU:
+ snprintf(buf, bufsz, "%d", cpu);
+ break;
+ case COL_CORE:
+ if (mod->physical) {
+ if (desc->coreids[idx] == -1)
+ snprintf(buf, bufsz, "-");
+ else
+ snprintf(buf, bufsz, "%d", desc->coreids[idx]);
+ } else {
+ if (cpuset_ary_isset(cpu, desc->coremaps,
+ desc->ncores, setsize, &i) == 0)
+ snprintf(buf, bufsz, "%zu", i);
+ }
+ break;
+ case COL_SOCKET:
+ if (mod->physical) {
+ if (desc->socketids[idx] == -1)
+ snprintf(buf, bufsz, "-");
+ else
+ snprintf(buf, bufsz, "%d", desc->socketids[idx]);
+ } else {
+ if (cpuset_ary_isset(cpu, desc->socketmaps,
+ desc->nsockets, setsize, &i) == 0)
+ snprintf(buf, bufsz, "%zu", i);
+ }
+ break;
+ case COL_NODE:
+ if (cpuset_ary_isset(cpu, desc->nodemaps,
+ desc->nnodes, setsize, &i) == 0)
+ snprintf(buf, bufsz, "%d", desc->idx2nodenum[i]);
+ break;
+ case COL_DRAWER:
+ if (mod->physical) {
+ if (desc->drawerids[idx] == -1)
+ snprintf(buf, bufsz, "-");
+ else
+ snprintf(buf, bufsz, "%d", desc->drawerids[idx]);
+ } else {
+ if (cpuset_ary_isset(cpu, desc->drawermaps,
+ desc->ndrawers, setsize, &i) == 0)
+ snprintf(buf, bufsz, "%zu", i);
+ }
+ break;
+ case COL_BOOK:
+ if (mod->physical) {
+ if (desc->bookids[idx] == -1)
+ snprintf(buf, bufsz, "-");
+ else
+ snprintf(buf, bufsz, "%d", desc->bookids[idx]);
+ } else {
+ if (cpuset_ary_isset(cpu, desc->bookmaps,
+ desc->nbooks, setsize, &i) == 0)
+ snprintf(buf, bufsz, "%zu", i);
+ }
+ break;
+ case COL_CACHE:
+ {
+ char *p = buf;
+ size_t sz = bufsz;
+ int j;
+
+ for (j = desc->ncaches - 1; j >= 0; j--) {
+ struct cpu_cache *ca = &desc->caches[j];
+
+ if (cpuset_ary_isset(cpu, ca->sharedmaps,
+ ca->nsharedmaps, setsize, &i) == 0) {
+ int x = snprintf(p, sz, "%zu", i);
+ if (x < 0 || (size_t) x >= sz)
+ return NULL;
+ p += x;
+ sz -= x;
+ }
+ if (j != 0) {
+ if (sz < 2)
+ return NULL;
+ *p++ = mod->compat ? ',' : ':';
+ *p = '\0';
+ sz--;
+ }
+ }
+ break;
+ }
+ case COL_POLARIZATION:
+ if (desc->polarization) {
+ int x = desc->polarization[idx];
+
+ snprintf(buf, bufsz, "%s",
+ mod->mode == OUTPUT_PARSABLE ?
+ polar_modes[x].parsable :
+ polar_modes[x].readable);
+ }
+ break;
+ case COL_ADDRESS:
+ if (desc->addresses)
+ snprintf(buf, bufsz, "%d", desc->addresses[idx]);
+ break;
+ case COL_CONFIGURED:
+ if (!desc->configured)
+ break;
+ if (mod->mode == OUTPUT_PARSABLE)
+ snprintf(buf, bufsz, "%s",
+ desc->configured[idx] ? _("Y") : _("N"));
+ else
+ snprintf(buf, bufsz, "%s",
+ desc->configured[idx] ? _("yes") : _("no"));
+ break;
+ case COL_ONLINE:
+ if (!desc->online)
+ break;
+ if (mod->mode == OUTPUT_PARSABLE)
+ snprintf(buf, bufsz, "%s",
+ is_cpu_online(desc, cpu) ? _("Y") : _("N"));
+ else
+ snprintf(buf, bufsz, "%s",
+ is_cpu_online(desc, cpu) ? _("yes") : _("no"));
+ break;
+ case COL_MAXMHZ:
+ if (desc->maxmhz && desc->maxmhz[idx])
+ xstrncpy(buf, desc->maxmhz[idx], bufsz);
+ break;
+ case COL_MINMHZ:
+ if (desc->minmhz && desc->minmhz[idx])
+ xstrncpy(buf, desc->minmhz[idx], bufsz);
+ break;
+ }
+ return buf;
+}
+
+static char *
+get_cell_header(struct lscpu_desc *desc, int col,
+ struct lscpu_modifier *mod,
+ char *buf, size_t bufsz)
+{
+ *buf = '\0';
+
+ if (col == COL_CACHE) {
+ char *p = buf;
+ size_t sz = bufsz;
+ int i;
+
+ for (i = desc->ncaches - 1; i >= 0; i--) {
+ int x = snprintf(p, sz, "%s", desc->caches[i].name);
+ if (x < 0 || (size_t) x >= sz)
+ return NULL;
+ sz -= x;
+ p += x;
+ if (i > 0) {
+ if (sz < 2)
+ return NULL;
+ *p++ = mod->compat ? ',' : ':';
+ *p = '\0';
+ sz--;
+ }
+ }
+ if (desc->ncaches)
+ return buf;
+ }
+ snprintf(buf, bufsz, "%s", coldescs[col].name);
+ return buf;
+}
+
+/*
+ * [-p] backend, we support two parsable formats:
+ *
+ * 1) "compatible" -- this format is compatible with the original lscpu(1)
+ * output and it contains fixed set of the columns. The CACHE columns are at
+ * the end of the line and the CACHE is not printed if the number of the caches
+ * is zero. The CACHE columns are separated by two commas, for example:
+ *
+ * $ lscpu --parse
+ * # CPU,Core,Socket,Node,,L1d,L1i,L2
+ * 0,0,0,0,,0,0,0
+ * 1,1,0,0,,1,1,0
+ *
+ * 2) "user defined output" -- this format prints always all columns without
+ * special prefix for CACHE column. If there are not CACHEs then the column is
+ * empty and the header "Cache" is printed rather than a real name of the cache.
+ * The CACHE columns are separated by ':'.
+ *
+ * $ lscpu --parse=CPU,CORE,SOCKET,NODE,CACHE
+ * # CPU,Core,Socket,Node,L1d:L1i:L2
+ * 0,0,0,0,0:0:0
+ * 1,1,0,0,1:1:0
+ */
+static void
+print_parsable(struct lscpu_desc *desc, int cols[], int ncols,
+ struct lscpu_modifier *mod)
+{
+ char buf[BUFSIZ], *data;
+ int i;
+
+ /*
+ * Header
+ */
+ printf(_(
+ "# The following is the parsable format, which can be fed to other\n"
+ "# programs. Each different item in every column has an unique ID\n"
+ "# starting from zero.\n"));
+
+ fputs("# ", stdout);
+ for (i = 0; i < ncols; i++) {
+ int col = cols[i];
+
+ if (col == COL_CACHE) {
+ if (mod->compat && !desc->ncaches)
+ continue;
+ if (mod->compat && i != 0)
+ putchar(',');
+ }
+ if (i > 0)
+ putchar(',');
+
+ data = get_cell_header(desc, col, mod, buf, sizeof(buf));
+
+ if (data && * data && col != COL_CACHE &&
+ !coldescs[col].is_abbr) {
+ /*
+ * For normal column names use mixed case (e.g. "Socket")
+ */
+ char *p = data + 1;
+
+ while (p && *p != '\0') {
+ *p = tolower((unsigned int) *p);
+ p++;
+ }
+ }
+ fputs(data && *data ? data : "", stdout);
+ }
+ putchar('\n');
+
+ /*
+ * Data
+ */
+ for (i = 0; i < desc->ncpuspos; i++) {
+ int c;
+ int cpu = real_cpu_num(desc, i);
+
+ if (!mod->offline && desc->online && !is_cpu_online(desc, cpu))
+ continue;
+ if (!mod->online && desc->online && is_cpu_online(desc, cpu))
+ continue;
+ if (desc->present && !is_cpu_present(desc, cpu))
+ continue;
+ for (c = 0; c < ncols; c++) {
+ if (mod->compat && cols[c] == COL_CACHE) {
+ if (!desc->ncaches)
+ continue;
+ if (c > 0)
+ putchar(',');
+ }
+ if (c > 0)
+ putchar(',');
+
+ data = get_cell_data(desc, i, cols[c], mod,
+ buf, sizeof(buf));
+ fputs(data && *data ? data : "", stdout);
+ }
+ putchar('\n');
+ }
+}
+
+/*
+ * [-e] backend
+ */
+static void
+print_readable(struct lscpu_desc *desc, int cols[], int ncols,
+ struct lscpu_modifier *mod)
+{
+ int i;
+ char buf[BUFSIZ];
+ const char *data;
+ struct libscols_table *table;
+
+ scols_init_debug(0);
+
+ table = scols_new_table();
+ if (!table)
+ err(EXIT_FAILURE, _("failed to allocate output table"));
+ if (mod->json) {
+ scols_table_enable_json(table, 1);
+ scols_table_set_name(table, "cpus");
+ }
+
+ for (i = 0; i < ncols; i++) {
+ data = get_cell_header(desc, cols[i], mod, buf, sizeof(buf));
+ if (!scols_table_new_column(table, data, 0, 0))
+ err(EXIT_FAILURE, _("failed to allocate output column"));
+ }
+
+ for (i = 0; i < desc->ncpuspos; i++) {
+ int c;
+ struct libscols_line *line;
+ int cpu = real_cpu_num(desc, i);
+
+ if (!mod->offline && desc->online && !is_cpu_online(desc, cpu))
+ continue;
+ if (!mod->online && desc->online && is_cpu_online(desc, cpu))
+ continue;
+ if (desc->present && !is_cpu_present(desc, cpu))
+ continue;
+
+ line = scols_table_new_line(table, NULL);
+ if (!line)
+ err(EXIT_FAILURE, _("failed to allocate output line"));
+
+ for (c = 0; c < ncols; c++) {
+ data = get_cell_data(desc, i, cols[c], mod,
+ buf, sizeof(buf));
+ if (!data || !*data)
+ data = "-";
+ if (scols_line_set_data(line, c, data))
+ err(EXIT_FAILURE, _("failed to add output data"));
+ }
+ }
+
+ scols_print_table(table);
+ scols_unref_table(table);
+}
+
+
+static void __attribute__ ((__format__(printf, 3, 4)))
+ add_summary_sprint(struct libscols_table *tb,
+ const char *txt,
+ const char *fmt,
+ ...)
+{
+ struct libscols_line *ln = scols_table_new_line(tb, NULL);
+ char *data;
+ va_list args;
+
+ if (!ln)
+ err(EXIT_FAILURE, _("failed to allocate output line"));
+
+ /* description column */
+ scols_line_set_data(ln, 0, txt);
+
+ /* data column */
+ va_start(args, fmt);
+ xvasprintf(&data, fmt, args);
+ va_end(args);
+
+ if (data && scols_line_refer_data(ln, 1, data))
+ err(EXIT_FAILURE, _("failed to add output data"));
+}
+
+#define add_summary_n(tb, txt, num) add_summary_sprint(tb, txt, "%d", num)
+#define add_summary_s(tb, txt, str) add_summary_sprint(tb, txt, "%s", str)
+
+static void
+print_cpuset(struct libscols_table *tb,
+ const char *key, cpu_set_t *set, int hex)
+{
+ size_t setsize = CPU_ALLOC_SIZE(maxcpus);
+ size_t setbuflen = 7 * maxcpus;
+ char setbuf[setbuflen], *p;
+
+ if (hex) {
+ p = cpumask_create(setbuf, setbuflen, set, setsize);
+ add_summary_s(tb, key, p);
+ } else {
+ p = cpulist_create(setbuf, setbuflen, set, setsize);
+ add_summary_s(tb, key, p);
+ }
+}
+
+/*
+ * default output
+ */
+static void
+print_summary(struct lscpu_desc *desc, struct lscpu_modifier *mod)
+{
+ char buf[BUFSIZ];
+ int i = 0;
+ size_t setsize = CPU_ALLOC_SIZE(maxcpus);
+ struct libscols_table *tb;
+
+ scols_init_debug(0);
+
+ tb = scols_new_table();
+ if (!tb)
+ err(EXIT_FAILURE, _("failed to allocate output table"));
+
+ scols_table_enable_noheadings(tb, 1);
+ if (mod->json) {
+ scols_table_enable_json(tb, 1);
+ scols_table_set_name(tb, "lscpu");
+ }
+
+ if (scols_table_new_column(tb, "field", 0, 0) == NULL ||
+ scols_table_new_column(tb, "data", 0, SCOLS_FL_NOEXTREMES) == NULL)
+ err(EXIT_FAILURE, _("failed to initialize output column"));
+
+ add_summary_s(tb, _("Architecture:"), desc->arch);
+ if (desc->mode) {
+ char *p = buf;
+
+ if (desc->mode & MODE_32BIT) {
+ strcpy(p, "32-bit, ");
+ p += 8;
+ }
+ if (desc->mode & MODE_64BIT) {
+ strcpy(p, "64-bit, ");
+ p += 8;
+ }
+ *(p - 2) = '\0';
+ add_summary_s(tb, _("CPU op-mode(s):"), buf);
+ }
+#if !defined(WORDS_BIGENDIAN)
+ add_summary_s(tb, _("Byte Order:"), "Little Endian");
+#else
+ add_summary_s(tb, _("Byte Order:"), "Big Endian");
+#endif
+
+ if (desc->addrsz)
+ add_summary_s(tb, _("Address sizes:"), desc->addrsz);
+
+ add_summary_n(tb, _("CPU(s):"), desc->ncpus);
+
+ if (desc->online)
+ print_cpuset(tb, mod->hex ? _("On-line CPU(s) mask:") :
+ _("On-line CPU(s) list:"),
+ desc->online, mod->hex);
+
+ if (desc->online && CPU_COUNT_S(setsize, desc->online) != desc->ncpus) {
+ cpu_set_t *set;
+
+ /* Linux kernel provides cpuset of off-line CPUs that contains
+ * all configured CPUs (see /sys/devices/system/cpu/offline),
+ * but want to print real (present in system) off-line CPUs only.
+ */
+ set = cpuset_alloc(maxcpus, NULL, NULL);
+ if (!set)
+ err(EXIT_FAILURE, _("failed to callocate cpu set"));
+ CPU_ZERO_S(setsize, set);
+ for (i = 0; i < desc->ncpuspos; i++) {
+ int cpu = real_cpu_num(desc, i);
+ if (!is_cpu_online(desc, cpu) && is_cpu_present(desc, cpu))
+ CPU_SET_S(cpu, setsize, set);
+ }
+ print_cpuset(tb, mod->hex ? _("Off-line CPU(s) mask:") :
+ _("Off-line CPU(s) list:"),
+ set, mod->hex);
+ cpuset_free(set);
+ }
+
+ if (desc->nsockets) {
+ int threads_per_core, cores_per_socket, sockets_per_book;
+ int books_per_drawer, drawers;
+ FILE *fd;
+
+ threads_per_core = cores_per_socket = sockets_per_book = 0;
+ books_per_drawer = drawers = 0;
+ /* s390 detects its cpu topology via /proc/sysinfo, if present.
+ * Using simply the cpu topology masks in sysfs will not give
+ * usable results since everything is virtualized. E.g.
+ * virtual core 0 may have only 1 cpu, but virtual core 2 may
+ * five cpus.
+ * If the cpu topology is not exported (e.g. 2nd level guest)
+ * fall back to old calculation scheme.
+ */
+ if ((fd = ul_path_fopen(desc->procfs, "r", "sysinfo"))) {
+ int t0, t1;
+
+ while (fd && fgets(buf, sizeof(buf), fd) != NULL) {
+ if (sscanf(buf, "CPU Topology SW:%d%d%d%d%d%d",
+ &t0, &t1, &drawers, &books_per_drawer,
+ &sockets_per_book,
+ &cores_per_socket) == 6)
+ break;
+ }
+ if (fd)
+ fclose(fd);
+ }
+ if (desc->mtid)
+ threads_per_core = atoi(desc->mtid) + 1;
+ add_summary_n(tb, _("Thread(s) per core:"),
+ threads_per_core ?: desc->nthreads / desc->ncores);
+ add_summary_n(tb, _("Core(s) per socket:"),
+ cores_per_socket ?: desc->ncores / desc->nsockets);
+ if (desc->nbooks) {
+ add_summary_n(tb, _("Socket(s) per book:"),
+ sockets_per_book ?: desc->nsockets / desc->nbooks);
+ if (desc->ndrawers) {
+ add_summary_n(tb, _("Book(s) per drawer:"),
+ books_per_drawer ?: desc->nbooks / desc->ndrawers);
+ add_summary_n(tb, _("Drawer(s):"), drawers ?: desc->ndrawers);
+ } else {
+ add_summary_n(tb, _("Book(s):"), books_per_drawer ?: desc->nbooks);
+ }
+ } else {
+ add_summary_n(tb, _("Socket(s):"), sockets_per_book ?: desc->nsockets);
+ }
+ }
+ if (desc->nnodes)
+ add_summary_n(tb, _("NUMA node(s):"), desc->nnodes);
+ if (desc->vendor)
+ add_summary_s(tb, _("Vendor ID:"), desc->vendor);
+ if (desc->machinetype)
+ add_summary_s(tb, _("Machine type:"), desc->machinetype);
+ if (desc->family)
+ add_summary_s(tb, _("CPU family:"), desc->family);
+ if (desc->model || desc->revision)
+ add_summary_s(tb, _("Model:"), desc->revision ? desc->revision : desc->model);
+ if (desc->modelname || desc->cpu)
+ add_summary_s(tb, _("Model name:"), desc->cpu ? desc->cpu : desc->modelname);
+ if (desc->stepping)
+ add_summary_s(tb, _("Stepping:"), desc->stepping);
+ if (desc->mhz)
+ add_summary_s(tb, _("CPU MHz:"), desc->mhz);
+ if (desc->dynamic_mhz)
+ add_summary_s(tb, _("CPU dynamic MHz:"), desc->dynamic_mhz);
+ if (desc->static_mhz)
+ add_summary_s(tb, _("CPU static MHz:"), desc->static_mhz);
+ if (desc->maxmhz)
+ add_summary_s(tb, _("CPU max MHz:"), cpu_max_mhz(desc, buf, sizeof(buf)));
+ if (desc->minmhz)
+ add_summary_s(tb, _("CPU min MHz:"), cpu_min_mhz(desc, buf, sizeof(buf)));
+ if (desc->bogomips)
+ add_summary_s(tb, _("BogoMIPS:"), desc->bogomips);
+ if (desc->virtflag) {
+ if (!strcmp(desc->virtflag, "svm"))
+ add_summary_s(tb, _("Virtualization:"), "AMD-V");
+ else if (!strcmp(desc->virtflag, "vmx"))
+ add_summary_s(tb, _("Virtualization:"), "VT-x");
+ }
+ if (desc->hypervisor)
+ add_summary_s(tb, _("Hypervisor:"), desc->hypervisor);
+ if (desc->hyper) {
+ add_summary_s(tb, _("Hypervisor vendor:"), hv_vendors[desc->hyper]);
+ add_summary_s(tb, _("Virtualization type:"), _(virt_types[desc->virtype]));
+ }
+ if (desc->dispatching >= 0)
+ add_summary_s(tb, _("Dispatching mode:"), _(disp_modes[desc->dispatching]));
+ if (desc->ncaches) {
+ for (i = desc->ncaches - 1; i >= 0; i--) {
+ snprintf(buf, sizeof(buf),
+ _("%s cache:"), desc->caches[i].name);
+ add_summary_s(tb, buf, desc->caches[i].size);
+ }
+ }
+ if (desc->necaches) {
+ for (i = desc->necaches - 1; i >= 0; i--) {
+ snprintf(buf, sizeof(buf),
+ _("%s cache:"), desc->ecaches[i].name);
+ add_summary_s(tb, buf, desc->ecaches[i].size);
+ }
+ }
+
+ for (i = 0; i < desc->nnodes; i++) {
+ snprintf(buf, sizeof(buf), _("NUMA node%d CPU(s):"), desc->idx2nodenum[i]);
+ print_cpuset(tb, buf, desc->nodemaps[i], mod->hex);
+ }
+
+ if (desc->physsockets) {
+ add_summary_n(tb, _("Physical sockets:"), desc->physsockets);
+ add_summary_n(tb, _("Physical chips:"), desc->physchips);
+ add_summary_n(tb, _("Physical cores/chip:"), desc->physcoresperchip);
+ }
+
+ if (desc->flags)
+ add_summary_s(tb, _("Flags:"), desc->flags);
+
+ scols_print_table(tb);
+ scols_unref_table(tb);
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ size_t i;
+
+ fputs(USAGE_HEADER, out);
+ fprintf(out, _(" %s [options]\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Display information about the CPU architecture.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -a, --all print both online and offline CPUs (default for -e)\n"), out);
+ fputs(_(" -b, --online print online CPUs only (default for -p)\n"), out);
+ fputs(_(" -c, --offline print offline CPUs only\n"), out);
+ fputs(_(" -J, --json use JSON for default or extended format\n"), out);
+ fputs(_(" -e, --extended[=<list>] print out an extended readable format\n"), out);
+ fputs(_(" -p, --parse[=<list>] print out a parsable format\n"), out);
+ fputs(_(" -s, --sysroot <dir> use specified directory as system root\n"), out);
+ fputs(_(" -x, --hex print hexadecimal masks rather than lists of CPUs\n"), out);
+ fputs(_(" -y, --physical print physical instead of logical IDs\n"), out);
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(25));
+
+ fputs(USAGE_COLUMNS, out);
+ for (i = 0; i < ARRAY_SIZE(coldescs); i++)
+ fprintf(out, " %13s %s\n", coldescs[i].name, _(coldescs[i].help));
+
+ printf(USAGE_MAN_TAIL("lscpu(1)"));
+
+ exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char *argv[])
+{
+ struct lscpu_modifier _mod = { .mode = OUTPUT_SUMMARY }, *mod = &_mod;
+ struct lscpu_desc _desc = { .flags = NULL }, *desc = &_desc;
+ int c, i;
+ int columns[ARRAY_SIZE(coldescs)], ncolumns = 0;
+ int cpu_modifier_specified = 0;
+ size_t setsize;
+
+ enum {
+ OPT_OUTPUT_ALL = CHAR_MAX + 1,
+ };
+ static const struct option longopts[] = {
+ { "all", no_argument, NULL, 'a' },
+ { "online", no_argument, NULL, 'b' },
+ { "offline", no_argument, NULL, 'c' },
+ { "help", no_argument, NULL, 'h' },
+ { "extended", optional_argument, NULL, 'e' },
+ { "json", no_argument, NULL, 'J' },
+ { "parse", optional_argument, NULL, 'p' },
+ { "sysroot", required_argument, NULL, 's' },
+ { "physical", no_argument, NULL, 'y' },
+ { "hex", no_argument, NULL, 'x' },
+ { "version", no_argument, NULL, 'V' },
+ { "output-all", no_argument, NULL, OPT_OUTPUT_ALL },
+ { NULL, 0, NULL, 0 }
+ };
+
+ static const ul_excl_t excl[] = { /* rows and cols in ASCII order */
+ { 'a','b','c' },
+ { 'e','p' },
+ { 0 }
+ };
+ int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ while ((c = getopt_long(argc, argv, "abce::hJp::s:xyV", longopts, NULL)) != -1) {
+
+ err_exclusive_options(c, longopts, excl, excl_st);
+
+ switch (c) {
+ case 'a':
+ mod->online = mod->offline = 1;
+ cpu_modifier_specified = 1;
+ break;
+ case 'b':
+ mod->online = 1;
+ cpu_modifier_specified = 1;
+ break;
+ case 'c':
+ mod->offline = 1;
+ cpu_modifier_specified = 1;
+ break;
+ case 'h':
+ usage();
+ case 'J':
+ mod->json = 1;
+ break;
+ case 'p':
+ case 'e':
+ if (optarg) {
+ if (*optarg == '=')
+ optarg++;
+ ncolumns = string_to_idarray(optarg,
+ columns, ARRAY_SIZE(columns),
+ column_name_to_id);
+ if (ncolumns < 0)
+ return EXIT_FAILURE;
+ }
+ mod->mode = c == 'p' ? OUTPUT_PARSABLE : OUTPUT_READABLE;
+ break;
+ case 's':
+ desc->prefix = optarg;
+ mod->system = SYSTEM_SNAPSHOT;
+ break;
+ case 'x':
+ mod->hex = 1;
+ break;
+ case 'y':
+ mod->physical = 1;
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ case OPT_OUTPUT_ALL:
+ {
+ size_t sz;
+ for (sz = 0; sz < ARRAY_SIZE(coldescs); sz++)
+ columns[sz] = 1;
+ break;
+ }
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if (cpu_modifier_specified && mod->mode == OUTPUT_SUMMARY) {
+ fprintf(stderr,
+ _("%s: options --all, --online and --offline may only "
+ "be used with options --extended or --parse.\n"),
+ program_invocation_short_name);
+ return EXIT_FAILURE;
+ }
+
+ if (argc != optind) {
+ warnx(_("bad usage"));
+ errtryhelp(EXIT_FAILURE);
+ }
+
+ /* set default cpu display mode if none was specified */
+ if (!mod->online && !mod->offline) {
+ mod->online = 1;
+ mod->offline = mod->mode == OUTPUT_READABLE ? 1 : 0;
+ }
+
+ ul_path_init_debug();
+
+ /* /sys/devices/system/cpu */
+ desc->syscpu = ul_new_path(_PATH_SYS_CPU);
+ if (!desc->syscpu)
+ err(EXIT_FAILURE, _("failed to initialize CPUs sysfs handler"));
+ if (desc->prefix)
+ ul_path_set_prefix(desc->syscpu, desc->prefix);
+
+ /* /proc */
+ desc->procfs = ul_new_path("/proc");
+ if (!desc->procfs)
+ err(EXIT_FAILURE, _("failed to initialize procfs handler"));
+ if (desc->prefix)
+ ul_path_set_prefix(desc->procfs, desc->prefix);
+
+ read_basicinfo(desc, mod);
+
+ setsize = CPU_ALLOC_SIZE(maxcpus);
+
+ for (i = 0; i < desc->ncpuspos; i++) {
+ /* only consider present CPUs */
+ if (desc->present &&
+ !CPU_ISSET_S(real_cpu_num(desc, i), setsize, desc->present))
+ continue;
+ read_topology(desc, i);
+ read_cache(desc, i);
+ read_polarization(desc, i);
+ read_address(desc, i);
+ read_configured(desc, i);
+ read_max_mhz(desc, i);
+ read_min_mhz(desc, i);
+ }
+
+ if (desc->caches)
+ qsort(desc->caches, desc->ncaches,
+ sizeof(struct cpu_cache), cachecmp);
+
+ if (desc->ecaches)
+ qsort(desc->ecaches, desc->necaches,
+ sizeof(struct cpu_cache), cachecmp);
+
+ read_nodes(desc);
+ read_hypervisor(desc, mod);
+ arm_cpu_decode(desc);
+
+ switch(mod->mode) {
+ case OUTPUT_SUMMARY:
+ print_summary(desc, mod);
+ break;
+ case OUTPUT_PARSABLE:
+ if (!ncolumns) {
+ columns[ncolumns++] = COL_CPU;
+ columns[ncolumns++] = COL_CORE;
+ columns[ncolumns++] = COL_SOCKET;
+ columns[ncolumns++] = COL_NODE;
+ columns[ncolumns++] = COL_CACHE;
+ mod->compat = 1;
+ }
+ print_parsable(desc, columns, ncolumns, mod);
+ break;
+ case OUTPUT_READABLE:
+ if (!ncolumns) {
+ /* No list was given. Just print whatever is there. */
+ columns[ncolumns++] = COL_CPU;
+ if (desc->nodemaps)
+ columns[ncolumns++] = COL_NODE;
+ if (desc->drawermaps)
+ columns[ncolumns++] = COL_DRAWER;
+ if (desc->bookmaps)
+ columns[ncolumns++] = COL_BOOK;
+ if (desc->socketmaps)
+ columns[ncolumns++] = COL_SOCKET;
+ if (desc->coremaps)
+ columns[ncolumns++] = COL_CORE;
+ if (desc->caches)
+ columns[ncolumns++] = COL_CACHE;
+ if (desc->online)
+ columns[ncolumns++] = COL_ONLINE;
+ if (desc->configured)
+ columns[ncolumns++] = COL_CONFIGURED;
+ if (desc->polarization)
+ columns[ncolumns++] = COL_POLARIZATION;
+ if (desc->addresses)
+ columns[ncolumns++] = COL_ADDRESS;
+ if (desc->maxmhz)
+ columns[ncolumns++] = COL_MAXMHZ;
+ if (desc->minmhz)
+ columns[ncolumns++] = COL_MINMHZ;
+ }
+ print_readable(desc, columns, ncolumns, mod);
+ break;
+ }
+
+ ul_unref_path(desc->syscpu);
+ ul_unref_path(desc->procfs);
+ return EXIT_SUCCESS;
+}
diff --git a/sys-utils/lscpu.h b/sys-utils/lscpu.h
new file mode 100644
index 0000000..24bc11e
--- /dev/null
+++ b/sys-utils/lscpu.h
@@ -0,0 +1,194 @@
+#ifndef LSCPU_H
+#define LSCPU_H
+
+#include "c.h"
+#include "nls.h"
+#include "cpuset.h"
+#include "xalloc.h"
+#include "strutils.h"
+#include "bitops.h"
+#include "path.h"
+#include "pathnames.h"
+#include "all-io.h"
+
+/* virtualization types */
+enum {
+ VIRT_NONE = 0,
+ VIRT_PARA,
+ VIRT_FULL,
+ VIRT_CONT
+};
+
+/* hypervisor vendors */
+enum {
+ HYPER_NONE = 0,
+ HYPER_XEN,
+ HYPER_KVM,
+ HYPER_MSHV,
+ HYPER_VMWARE,
+ HYPER_IBM, /* sys-z powervm */
+ HYPER_VSERVER,
+ HYPER_UML,
+ HYPER_INNOTEK, /* VBOX */
+ HYPER_HITACHI,
+ HYPER_PARALLELS, /* OpenVZ/VIrtuozzo */
+ HYPER_VBOX,
+ HYPER_OS400,
+ HYPER_PHYP,
+ HYPER_SPAR,
+ HYPER_WSL,
+};
+
+/* CPU modes */
+enum {
+ MODE_32BIT = (1 << 1),
+ MODE_64BIT = (1 << 2)
+};
+
+/* cache(s) description */
+struct cpu_cache {
+ char *name;
+ char *size;
+
+ int nsharedmaps;
+ cpu_set_t **sharedmaps;
+};
+
+/* dispatching modes */
+enum {
+ DISP_HORIZONTAL = 0,
+ DISP_VERTICAL = 1
+};
+
+/* cpu polarization */
+enum {
+ POLAR_UNKNOWN = 0,
+ POLAR_VLOW,
+ POLAR_VMEDIUM,
+ POLAR_VHIGH,
+ POLAR_HORIZONTAL
+};
+
+struct polarization_modes {
+ char *parsable;
+ char *readable;
+};
+
+
+/* global description */
+struct lscpu_desc {
+ const char *prefix; /* path to /sys and /proc snapshot or NULL */
+
+ struct path_cxt *syscpu; /* _PATH_SYS_CPU path handler */
+ struct path_cxt *procfs; /* /proc path handler */
+
+ char *arch;
+ char *vendor;
+ char *machinetype; /* s390 */
+ char *family;
+ char *model;
+ char *modelname;
+ char *revision; /* alternative for model (ppc) */
+ char *cpu; /* alternative for modelname (ppc, sparc) */
+ char *virtflag; /* virtualization flag (vmx, svm) */
+ char *hypervisor; /* hypervisor software */
+ int hyper; /* hypervisor vendor ID */
+ int virtype; /* VIRT_PARA|FULL|NONE ? */
+ char *mhz;
+ char *dynamic_mhz; /* dynamic mega hertz (s390) */
+ char *static_mhz; /* static mega hertz (s390) */
+ char **maxmhz; /* maximum mega hertz */
+ char **minmhz; /* minimum mega hertz */
+ char *stepping;
+ char *bogomips;
+ char *flags;
+ char *mtid; /* maximum thread id (s390) */
+ char *addrsz; /* address sizes */
+ int dispatching; /* none, horizontal or vertical */
+ int mode; /* rm, lm or/and tm */
+
+ int ncpuspos; /* maximal possible CPUs */
+ int ncpus; /* number of present CPUs */
+ cpu_set_t *present; /* mask with present CPUs */
+ cpu_set_t *online; /* mask with online CPUs */
+
+ int nthreads; /* number of online threads */
+
+ int ncaches;
+ struct cpu_cache *caches;
+
+ int necaches; /* extra caches (s390) */
+ struct cpu_cache *ecaches;
+
+ /*
+ * All maps are sequentially indexed (0..ncpuspos), the array index
+ * does not have match with cpuX number as presented by kernel. You
+ * have to use real_cpu_num() to get the real cpuX number.
+ *
+ * For example, the possible system CPUs are: 1,3,5, it means that
+ * ncpuspos=3, so all arrays are in range 0..3.
+ */
+ int *idx2cpunum; /* mapping index to CPU num */
+
+ int nnodes; /* number of NUMA modes */
+ int *idx2nodenum; /* Support for discontinuous nodes */
+ cpu_set_t **nodemaps; /* array with NUMA nodes */
+
+ /* drawers -- based on drawer_siblings (internal kernel map of cpuX's
+ * hardware threads within the same drawer */
+ int ndrawers; /* number of all online drawers */
+ cpu_set_t **drawermaps; /* unique drawer_siblings */
+ int *drawerids; /* physical drawer ids */
+
+ /* books -- based on book_siblings (internal kernel map of cpuX's
+ * hardware threads within the same book */
+ int nbooks; /* number of all online books */
+ cpu_set_t **bookmaps; /* unique book_siblings */
+ int *bookids; /* physical book ids */
+
+ /* sockets -- based on core_siblings (internal kernel map of cpuX's
+ * hardware threads within the same physical_package_id (socket)) */
+ int nsockets; /* number of all online sockets */
+ cpu_set_t **socketmaps; /* unique core_siblings */
+ int *socketids; /* physical socket ids */
+
+ /* cores -- based on thread_siblings (internal kernel map of cpuX's
+ * hardware threads within the same core as cpuX) */
+ int ncores; /* number of all online cores */
+ cpu_set_t **coremaps; /* unique thread_siblings */
+ int *coreids; /* physical core ids */
+
+ int *polarization; /* cpu polarization */
+ int *addresses; /* physical cpu addresses */
+ int *configured; /* cpu configured */
+ int physsockets; /* Physical sockets (modules) */
+ int physchips; /* Physical chips */
+ int physcoresperchip; /* Physical cores per chip */
+};
+
+enum {
+ OUTPUT_SUMMARY = 0, /* default */
+ OUTPUT_PARSABLE, /* -p */
+ OUTPUT_READABLE, /* -e */
+};
+
+enum {
+ SYSTEM_LIVE = 0, /* analyzing a live system */
+ SYSTEM_SNAPSHOT, /* analyzing a snapshot of a different system */
+};
+
+struct lscpu_modifier {
+ int mode; /* OUTPUT_* */
+ int system; /* SYSTEM_* */
+ unsigned int hex:1, /* print CPU masks rather than CPU lists */
+ compat:1, /* use backwardly compatible format */
+ online:1, /* print online CPUs */
+ offline:1, /* print offline CPUs */
+ json:1, /* JSON output format */
+ physical:1; /* use physical numbers */
+};
+
+extern int read_hypervisor_dmi(void);
+extern void arm_cpu_decode(struct lscpu_desc *desc);
+
+#endif /* LSCPU_H */
diff --git a/sys-utils/lsipc.1 b/sys-utils/lsipc.1
new file mode 100644
index 0000000..9bb1dce
--- /dev/null
+++ b/sys-utils/lsipc.1
@@ -0,0 +1,139 @@
+.\" Copyright 2015 Ondrej Oprala(ooprala@redhat.com)
+.\" May be distributed under the GNU General Public License
+.TH LSIPC "1" "November 2015" "util-linux" "User Commands"
+.SH NAME
+lsipc \- show information on IPC facilities currently employed in the system
+.SH SYNOPSIS
+.B lsipc
+[options]
+.SH DESCRIPTION
+.B lsipc
+shows information on the inter-process communication facilities
+for which the calling process has read access.
+.SH OPTIONS
+.TP
+\fB\-i\fR, \fB\-\-id\fR \fIid\fR
+Show full details on just the one resource element identified by
+.IR id .
+This option needs to be combined with one of the three resource options:
+.BR \-m ,
+.BR \-q " or"
+.BR \-s .
+It is possible to override the default output format for this option with the
+\fB\-\-list\fR, \fB\-\-raw\fR, \fB\-\-json\fR or \fB\-\-export\fR option.
+.TP
+\fB\-g\fR, \fB\-\-global\fR
+Show system-wide usage and limits of IPC resources.
+This option may be combined with one of the three resource options:
+.BR \-m ,
+.BR \-q " or"
+.BR \-s .
+The default is to show information about all resources.
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+Display help text and exit.
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+Display version information and exit.
+.SS "Resource options"
+.TP
+\fB\-m\fR, \fB\-\-shmems\fR
+Write information about active shared memory segments.
+.TP
+\fB\-q\fR, \fB\-\-queues\fR
+Write information about active message queues.
+.TP
+\fB\-s\fR, \fB\-\-semaphores\fR
+Write information about active semaphore sets.
+.SS "Output formatting"
+.TP
+\fB\-c\fR, \fB\-\-creator\fR
+Show creator and owner.
+.TP
+\fB\-e\fR, \fB\-\-export\fR
+Output data in the format of NAME=VALUE.
+.TP
+\fB\-J\fR, \fB\-\-json\fR
+Use the JSON output format.
+.TP
+\fB\-l\fR, \fB\-\-list\fR
+Use the list output format. This is the default, except when \fB\-\-id\fR
+is used.
+.TP
+\fB\-n\fR, \fB\-\-newline\fR
+Display each piece of information on a separate line.
+.TP
+\fB\-\-noheadings\fR
+Do not print a header line.
+.TP
+\fB\-\-notruncate\fR
+Don't truncate output.
+.TP
+\fB\-o\fR, \fB\-\-output \fIlist\fP
+Specify which output columns to print. Use
+.B \-\-help
+to get a list of all supported columns.
+.TP
+\fB\-b\fR, \fB\-\-bytes\fR
+Print size in bytes rather than in human readable format.
+.TP
+\fB\-r\fR, \fB\-\-raw\fR
+Raw output (no columnation).
+.TP
+\fB\-t\fR, \fB\-\-time\fR
+Write time information. The time of the last control operation that changed
+the access permissions for all facilities, the time of the last
+.BR msgsnd (2)
+and
+.BR msgrcv (2)
+operations on message queues, the time of the last
+.BR shmat (2)
+and
+.BR shmdt (2)
+operations on shared memory, and the time of the last
+.BR semop (2)
+operation on semaphores.
+.TP
+\fB\-\-time\-format\fR \fItype\fP
+Display dates in short, full or iso format. The default is short, this time
+format is designed to be space efficient and human readable.
+.TP
+\fB\-P\fR, \fB\-\-numeric\-perms\fR
+Print numeric permissions in PERMS column.
+
+.SH EXIT STATUS
+.TP
+0
+if OK,
+.TP
+1
+if incorrect arguments specified,
+.TP
+2
+if a serious error occurs.
+.SH SEE ALSO
+.BR ipcmk (1),
+.BR ipcrm (1),
+.BR msgrcv (2),
+.BR msgsnd (2),
+.BR semget (2),
+.BR semop (2),
+.BR shmat (2),
+.BR shmdt (2),
+.BR shmget (2)
+.SH HISTORY
+The \fBlsipc\fP utility is inspired by the \fBipcs\fP utility.
+.SH AUTHORS
+.MT ooprala@redhat.com
+Ondrej Oprala
+.ME
+.br
+.MT kzak@redhat.com
+Karel Zak
+.ME
+
+.SH AVAILABILITY
+The lsipc command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/lsipc.c b/sys-utils/lsipc.c
new file mode 100644
index 0000000..e8ada57
--- /dev/null
+++ b/sys-utils/lsipc.c
@@ -0,0 +1,1338 @@
+/*
+ * lsipc - List information about IPC instances employed in the system
+ *
+ * Copyright (C) 2015 Ondrej Oprala <ooprala@redhat.com>
+ * Copyright (C) 2015 Karel Zak <ooprala@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ *
+ * lsipc is inspired by the ipcs utility. The aim is to create
+ * a utility unencumbered by a standard to provide more flexible
+ * means of controlling the output.
+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include <libsmartcols.h>
+
+#include "c.h"
+#include "nls.h"
+#include "closestream.h"
+#include "strutils.h"
+#include "optutils.h"
+#include "xalloc.h"
+#include "procutils.h"
+#include "ipcutils.h"
+#include "timeutils.h"
+
+/*
+ * time modes
+ * */
+enum {
+ TIME_INVALID = 0,
+ TIME_SHORT,
+ TIME_FULL,
+ TIME_ISO
+};
+
+/*
+ * IDs
+ */
+enum {
+ /* generic */
+ COLDESC_IDX_GEN_FIRST = 0,
+ COL_KEY = COLDESC_IDX_GEN_FIRST,
+ COL_ID,
+ COL_OWNER,
+ COL_PERMS,
+ COL_CUID,
+ COL_CUSER,
+ COL_CGID,
+ COL_CGROUP,
+ COL_UID,
+ COL_USER,
+ COL_GID,
+ COL_GROUP,
+ COL_CTIME,
+ COLDESC_IDX_GEN_LAST = COL_CTIME,
+
+ /* msgq-specific */
+ COLDESC_IDX_MSG_FIRST,
+ COL_USEDBYTES = COLDESC_IDX_MSG_FIRST,
+ COL_MSGS,
+ COL_SEND,
+ COL_RECV,
+ COL_LSPID,
+ COL_LRPID,
+ COLDESC_IDX_MSG_LAST = COL_LRPID,
+
+ /* shm-specific */
+ COLDESC_IDX_SHM_FIRST,
+ COL_SIZE = COLDESC_IDX_SHM_FIRST,
+ COL_NATTCH,
+ COL_STATUS,
+ COL_ATTACH,
+ COL_DETACH,
+ COL_COMMAND,
+ COL_CPID,
+ COL_LPID,
+ COLDESC_IDX_SHM_LAST = COL_LPID,
+
+ /* sem-specific */
+ COLDESC_IDX_SEM_FIRST,
+ COL_NSEMS = COLDESC_IDX_SEM_FIRST,
+ COL_OTIME,
+ COLDESC_IDX_SEM_LAST = COL_OTIME,
+
+ /* summary (--global) */
+ COLDESC_IDX_SUM_FIRST,
+ COL_RESOURCE = COLDESC_IDX_SUM_FIRST,
+ COL_DESC,
+ COL_LIMIT,
+ COL_USED,
+ COL_USEPERC,
+ COLDESC_IDX_SUM_LAST = COL_USEPERC
+};
+
+/* not all columns apply to all options, so we specify a legal range for each */
+static size_t LOWER, UPPER;
+
+/*
+ * output modes
+ */
+enum {
+ OUT_EXPORT = 1,
+ OUT_NEWLINE,
+ OUT_RAW,
+ OUT_JSON,
+ OUT_PRETTY,
+ OUT_LIST
+};
+
+struct lsipc_control {
+ int outmode;
+ unsigned int noheadings : 1, /* don't print header line */
+ notrunc : 1, /* don't truncate columns */
+ bytes : 1, /* SIZE in bytes */
+ numperms : 1, /* numeric permissions */
+ time_mode : 2;
+};
+
+struct lsipc_coldesc {
+ const char *name;
+ const char *help;
+ const char *pretty_name;
+
+ double whint; /* width hint */
+ long flag;
+};
+
+static const struct lsipc_coldesc coldescs[] =
+{
+ /* common */
+ [COL_KEY] = { "KEY", N_("Resource key"), N_("Key"), 1},
+ [COL_ID] = { "ID", N_("Resource ID"), N_("ID"), 1},
+ [COL_OWNER] = { "OWNER", N_("Owner's username or UID"), N_("Owner"), 1, SCOLS_FL_RIGHT},
+ [COL_PERMS] = { "PERMS", N_("Permissions"), N_("Permissions"), 1, SCOLS_FL_RIGHT},
+ [COL_CUID] = { "CUID", N_("Creator UID"), N_("Creator UID"), 1, SCOLS_FL_RIGHT},
+ [COL_CUSER] = { "CUSER", N_("Creator user"), N_("Creator user"), 1 },
+ [COL_CGID] = { "CGID", N_("Creator GID"), N_("Creator GID"), 1, SCOLS_FL_RIGHT},
+ [COL_CGROUP] = { "CGROUP", N_("Creator group"), N_("Creator group"), 1 },
+ [COL_UID] = { "UID", N_("User ID"), N_("UID"), 1, SCOLS_FL_RIGHT},
+ [COL_USER] = { "USER", N_("User name"), N_("User name"), 1},
+ [COL_GID] = { "GID", N_("Group ID"), N_("GID"), 1, SCOLS_FL_RIGHT},
+ [COL_GROUP] = { "GROUP", N_("Group name"), N_("Group name"), 1},
+ [COL_CTIME] = { "CTIME", N_("Time of the last change"), N_("Last change"), 1, SCOLS_FL_RIGHT},
+
+ /* msgq-specific */
+ [COL_USEDBYTES] = { "USEDBYTES",N_("Bytes used"), N_("Bytes used"), 1, SCOLS_FL_RIGHT},
+ [COL_MSGS] = { "MSGS", N_("Number of messages"), N_("Messages"), 1},
+ [COL_SEND] = { "SEND", N_("Time of last msg sent"), N_("Msg sent"), 1, SCOLS_FL_RIGHT},
+ [COL_RECV] = { "RECV", N_("Time of last msg received"), N_("Msg received"), 1, SCOLS_FL_RIGHT},
+ [COL_LSPID] = { "LSPID", N_("PID of the last msg sender"), N_("Msg sender"), 1, SCOLS_FL_RIGHT},
+ [COL_LRPID] = { "LRPID", N_("PID of the last msg receiver"), N_("Msg receiver"), 1, SCOLS_FL_RIGHT},
+
+ /* shm-specific */
+ [COL_SIZE] = { "SIZE", N_("Segment size"), N_("Segment size"), 1, SCOLS_FL_RIGHT},
+ [COL_NATTCH] = { "NATTCH", N_("Number of attached processes"), N_("Attached processes"), 1, SCOLS_FL_RIGHT},
+ [COL_STATUS] = { "STATUS", N_("Status"), N_("Status"), 1, SCOLS_FL_NOEXTREMES},
+ [COL_ATTACH] = { "ATTACH", N_("Attach time"), N_("Attach time"), 1, SCOLS_FL_RIGHT},
+ [COL_DETACH] = { "DETACH", N_("Detach time"), N_("Detach time"), 1, SCOLS_FL_RIGHT},
+ [COL_COMMAND] = { "COMMAND", N_("Creator command line"), N_("Creator command"), 0, SCOLS_FL_TRUNC},
+ [COL_CPID] = { "CPID", N_("PID of the creator"), N_("Creator PID"), 1, SCOLS_FL_RIGHT},
+ [COL_LPID] = { "LPID", N_("PID of last user"), N_("Last user PID"), 1, SCOLS_FL_RIGHT},
+
+ /* sem-specific */
+ [COL_NSEMS] = { "NSEMS", N_("Number of semaphores"), N_("Semaphores"), 1, SCOLS_FL_RIGHT},
+ [COL_OTIME] = { "OTIME", N_("Time of the last operation"), N_("Last operation"), 1, SCOLS_FL_RIGHT},
+
+ /* cols for summarized information */
+ [COL_RESOURCE] = { "RESOURCE", N_("Resource name"), N_("Resource"), 1 },
+ [COL_DESC] = { "DESCRIPTION",N_("Resource description"), N_("Description"), 1 },
+ [COL_USED] = { "USED", N_("Currently used"), N_("Used"), 1, SCOLS_FL_RIGHT },
+ [COL_USEPERC] = { "USE%", N_("Currently use percentage"), N_("Use"), 1, SCOLS_FL_RIGHT },
+ [COL_LIMIT] = { "LIMIT", N_("System-wide limit"), N_("Limit"), 1, SCOLS_FL_RIGHT },
+};
+
+
+/* columns[] array specifies all currently wanted output column. The columns
+ * are defined by coldescs[] array and you can specify (on command line) each
+ * column twice. That's enough, dynamically allocated array of the columns is
+ * unnecessary overkill and over-engineering in this case */
+static int columns[ARRAY_SIZE(coldescs) * 2];
+static size_t ncolumns;
+
+static inline size_t err_columns_index(size_t arysz, size_t idx)
+{
+ if (idx >= arysz)
+ errx(EXIT_FAILURE, _("too many columns specified, "
+ "the limit is %zu columns"),
+ arysz - 1);
+ return idx;
+}
+
+#define add_column(ary, n, id) \
+ ((ary)[ err_columns_index(ARRAY_SIZE(ary), (n)) ] = (id))
+
+static int column_name_to_id(const char *name, size_t namesz)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(coldescs); i++) {
+ const char *cn = coldescs[i].name;
+
+ if (!strncasecmp(name, cn, namesz) && !*(cn + namesz)) {
+ if (i > COL_CTIME) {
+ if (i >= LOWER && i <= UPPER)
+ return i;
+ else {
+ warnx(_("column %s does not apply to the specified IPC"), name);
+ return -1;
+ }
+ } else
+ return i;
+ }
+ }
+ warnx(_("unknown column: %s"), name);
+ return -1;
+}
+
+static int get_column_id(int num)
+{
+ assert(num >= 0);
+ assert((size_t) num < ncolumns);
+ assert((size_t) columns[num] < ARRAY_SIZE(coldescs));
+ return columns[num];
+}
+
+static const struct lsipc_coldesc *get_column_desc(int num)
+{
+ return &coldescs[ get_column_id(num) ];
+}
+
+static char *get_username(struct passwd **pw, uid_t id)
+{
+ if (!*pw || (*pw)->pw_uid != id)
+ *pw = getpwuid(id);
+
+ return *pw ? xstrdup((*pw)->pw_name) : NULL;
+}
+
+static char *get_groupname(struct group **gr, gid_t id)
+{
+ if (!*gr || (*gr)->gr_gid != id)
+ *gr = getgrgid(id);
+
+ return *gr ? xstrdup((*gr)->gr_name) : NULL;
+}
+
+static int parse_time_mode(const char *s)
+{
+ struct lsipc_timefmt {
+ const char *name;
+ const int val;
+ };
+ static const struct lsipc_timefmt timefmts[] = {
+ {"iso", TIME_ISO},
+ {"full", TIME_FULL},
+ {"short", TIME_SHORT},
+ };
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(timefmts); i++) {
+ if (strcmp(timefmts[i].name, s) == 0)
+ return timefmts[i].val;
+ }
+ errx(EXIT_FAILURE, _("unknown time format: %s"), s);
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ size_t i;
+
+ fputs(USAGE_HEADER, out);
+ fprintf(out, _(" %s [options]\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Show information on IPC facilities.\n"), out);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Resource options:\n"), out);
+ fputs(_(" -m, --shmems shared memory segments\n"), out);
+ fputs(_(" -q, --queues message queues\n"), out);
+ fputs(_(" -s, --semaphores semaphores\n"), out);
+ fputs(_(" -g, --global info about system-wide usage (may be used with -m, -q and -s)\n"), out);
+ fputs(_(" -i, --id <id> print details on resource identified by <id>\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" --noheadings don't print headings\n"), out);
+ fputs(_(" --notruncate don't truncate output\n"), out);
+ fputs(_(" --time-format=<type> display dates in short, full or iso format\n"), out);
+ fputs(_(" -b, --bytes print SIZE in bytes rather than in human readable format\n"), out);
+ fputs(_(" -c, --creator show creator and owner\n"), out);
+ fputs(_(" -e, --export display in an export-able output format\n"), out);
+ fputs(_(" -J, --json use the JSON output format\n"), out);
+ fputs(_(" -n, --newline display each piece of information on a new line\n"), out);
+ fputs(_(" -l, --list force list output format (for example with --id)\n"), out);
+ fputs(_(" -o, --output[=<list>] define the columns to output\n"), out);
+ fputs(_(" -P, --numeric-perms print numeric permissions (PERMS column)\n"), out);
+ fputs(_(" -r, --raw display in raw mode\n"), out);
+ fputs(_(" -t, --time show attach, detach and change times\n"), out);
+
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(26));
+
+ fprintf(out, _("\nGeneric columns:\n"));
+ for (i = COLDESC_IDX_GEN_FIRST; i <= COLDESC_IDX_GEN_LAST; i++)
+ fprintf(out, " %14s %s\n", coldescs[i].name, _(coldescs[i].help));
+
+ fprintf(out, _("\nShared-memory columns (--shmems):\n"));
+ for (i = COLDESC_IDX_SHM_FIRST; i <= COLDESC_IDX_SHM_LAST; i++)
+ fprintf(out, " %14s %s\n", coldescs[i].name, _(coldescs[i].help));
+
+ fprintf(out, _("\nMessage-queue columns (--queues):\n"));
+ for (i = COLDESC_IDX_MSG_FIRST; i <= COLDESC_IDX_MSG_LAST; i++)
+ fprintf(out, " %14s %s\n", coldescs[i].name, _(coldescs[i].help));
+
+ fprintf(out, _("\nSemaphore columns (--semaphores):\n"));
+ for (i = COLDESC_IDX_SEM_FIRST; i <= COLDESC_IDX_SEM_LAST; i++)
+ fprintf(out, " %14s %s\n", coldescs[i].name, _(coldescs[i].help));
+
+ fprintf(out, _("\nSummary columns (--global):\n"));
+ for (i = COLDESC_IDX_SUM_FIRST; i <= COLDESC_IDX_SUM_LAST; i++)
+ fprintf(out, " %14s %s\n", coldescs[i].name, _(coldescs[i].help));
+
+ printf(USAGE_MAN_TAIL("lsipc(1)"));
+ exit(EXIT_SUCCESS);
+}
+
+static struct libscols_table *new_table(struct lsipc_control *ctl)
+{
+ struct libscols_table *table = scols_new_table();
+
+ if (!table)
+ err(EXIT_FAILURE, _("failed to allocate output table"));
+
+ if (ctl->noheadings)
+ scols_table_enable_noheadings(table, 1);
+
+ switch(ctl->outmode) {
+ case OUT_NEWLINE:
+ scols_table_set_column_separator(table, "\n");
+ /* fallthrough */
+ case OUT_EXPORT:
+ scols_table_enable_export(table, 1);
+ break;
+ case OUT_RAW:
+ scols_table_enable_raw(table, 1);
+ break;
+ case OUT_PRETTY:
+ scols_table_enable_noheadings(table, 1);
+ break;
+ case OUT_JSON:
+ scols_table_enable_json(table, 1);
+ break;
+ default:
+ break;
+ }
+ return table;
+}
+
+static struct libscols_table *setup_table(struct lsipc_control *ctl)
+{
+ struct libscols_table *table = new_table(ctl);
+ size_t n;
+
+ for (n = 0; n < ncolumns; n++) {
+ const struct lsipc_coldesc *desc = get_column_desc(n);
+ int flags = desc->flag;
+
+ if (ctl->notrunc)
+ flags &= ~SCOLS_FL_TRUNC;
+ if (!scols_table_new_column(table, desc->name, desc->whint, flags))
+ goto fail;
+ }
+ return table;
+fail:
+ scols_unref_table(table);
+ return NULL;
+}
+
+static int print_pretty(struct libscols_table *table)
+{
+ struct libscols_iter *itr = scols_new_iter(SCOLS_ITER_FORWARD);
+ struct libscols_column *col;
+ struct libscols_cell *data;
+ struct libscols_line *ln;
+ const char *hstr, *dstr;
+ int n = 0;
+
+ ln = scols_table_get_line(table, 0);
+ while (!scols_table_next_column(table, itr, &col)) {
+
+ data = scols_line_get_cell(ln, n);
+
+ hstr = N_(get_column_desc(n)->pretty_name);
+ dstr = scols_cell_get_data(data);
+
+ if (dstr)
+ printf("%s:%*c%-36s\n", hstr, 35 - (int)strlen(hstr), ' ', dstr);
+ ++n;
+ }
+
+ /* this is used to pretty-print detailed info about a semaphore array */
+ if (ln) {
+ struct libscols_table *subtab = scols_line_get_userdata(ln);
+ if (subtab) {
+ printf(_("Elements:\n\n"));
+ scols_print_table(subtab);
+ }
+ }
+
+ scols_free_iter(itr);
+ return 0;
+
+}
+
+static int print_table(struct lsipc_control *ctl, struct libscols_table *tb)
+{
+ if (ctl->outmode == OUT_PRETTY)
+ print_pretty(tb);
+ else
+ scols_print_table(tb);
+ return 0;
+}
+static struct timeval now;
+
+static char *make_time(int mode, time_t time)
+{
+ char buf[64] = {0};
+
+ switch(mode) {
+ case TIME_FULL:
+ {
+ struct tm tm;
+ char *s;
+
+ localtime_r(&time, &tm);
+ asctime_r(&tm, buf);
+ if (*(s = buf + strlen(buf) - 1) == '\n')
+ *s = '\0';
+ break;
+ }
+ case TIME_SHORT:
+ strtime_short(&time, &now, 0, buf, sizeof(buf));
+ break;
+ case TIME_ISO:
+ strtime_iso(&time, ISO_TIMESTAMP_T, buf, sizeof(buf));
+ break;
+ default:
+ errx(EXIT_FAILURE, _("unsupported time type"));
+ }
+ return xstrdup(buf);
+}
+
+static void global_set_data(struct libscols_table *tb, const char *resource,
+ const char *desc, uintmax_t used, uintmax_t limit, int usage)
+{
+ struct libscols_line *ln;
+ size_t n;
+
+ ln = scols_table_new_line(tb, NULL);
+ if (!ln)
+ err(EXIT_FAILURE, _("failed to allocate output line"));
+
+ for (n = 0; n < ncolumns; n++) {
+ int rc = 0;
+ char *arg = NULL;
+
+ switch (get_column_id(n)) {
+ case COL_RESOURCE:
+ rc = scols_line_set_data(ln, n, resource);
+ break;
+ case COL_DESC:
+ rc = scols_line_set_data(ln, n, desc);
+ break;
+ case COL_USED:
+ if (usage) {
+ xasprintf(&arg, "%ju", used);
+ rc = scols_line_refer_data(ln, n, arg);
+ } else
+ rc = scols_line_set_data(ln, n, "-");
+ break;
+ case COL_USEPERC:
+ if (usage) {
+ xasprintf(&arg, "%2.2f%%", (double) used / limit * 100);
+ rc = scols_line_refer_data(ln, n, arg);
+ } else
+ rc = scols_line_set_data(ln, n, "-");
+ break;
+ case COL_LIMIT:
+ xasprintf(&arg, "%ju", limit);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ }
+
+ if (rc != 0)
+ err(EXIT_FAILURE, _("failed to add output data"));
+ }
+}
+
+static void setup_sem_elements_columns(struct libscols_table *tb)
+{
+ scols_table_set_name(tb, "elements");
+ if (!scols_table_new_column(tb, "SEMNUM", 0, SCOLS_FL_RIGHT))
+ err_oom();
+ if (!scols_table_new_column(tb, "VALUE", 0, SCOLS_FL_RIGHT))
+ err_oom();
+ if (!scols_table_new_column(tb, "NCOUNT", 0, SCOLS_FL_RIGHT))
+ err_oom();
+ if (!scols_table_new_column(tb, "ZCOUNT", 0, SCOLS_FL_RIGHT))
+ err_oom();
+ if (!scols_table_new_column(tb, "PID", 0, SCOLS_FL_RIGHT))
+ err_oom();
+ if (!scols_table_new_column(tb, "COMMAND", 0, SCOLS_FL_RIGHT))
+ err_oom();
+}
+
+static void do_sem(int id, struct lsipc_control *ctl, struct libscols_table *tb)
+{
+ struct libscols_line *ln;
+ struct passwd *pw = NULL, *cpw = NULL;
+ struct group *gr = NULL, *cgr = NULL;
+ struct sem_data *semds, *semdsp;
+ char *arg = NULL;
+
+ scols_table_set_name(tb, "semaphores");
+
+ if (ipc_sem_get_info(id, &semds) < 1) {
+ if (id > -1)
+ warnx(_("id %d not found"), id);
+ return;
+ }
+ for (semdsp = semds; semdsp->next != NULL || id > -1; semdsp = semdsp->next) {
+ size_t n;
+
+ ln = scols_table_new_line(tb, NULL);
+ if (!ln)
+ err(EXIT_FAILURE, _("failed to allocate output line"));
+
+ for (n = 0; n < ncolumns; n++) {
+ int rc = 0;
+ switch (get_column_id(n)) {
+ case COL_KEY:
+ xasprintf(&arg, "0x%08x",semdsp->sem_perm.key);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_ID:
+ xasprintf(&arg, "%d",semdsp->sem_perm.id);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_OWNER:
+ arg = get_username(&pw, semdsp->sem_perm.uid);
+ if (!arg)
+ xasprintf(&arg, "%u", semdsp->sem_perm.uid);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_PERMS:
+ if (ctl->numperms)
+ xasprintf(&arg, "%#o", semdsp->sem_perm.mode & 0777);
+ else {
+ arg = xmalloc(11);
+ xstrmode(semdsp->sem_perm.mode & 0777, arg);
+ }
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_CUID:
+ xasprintf(&arg, "%u", semdsp->sem_perm.cuid);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_CUSER:
+ arg = get_username(&cpw, semdsp->sem_perm.cuid);
+ if (arg)
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_CGID:
+ xasprintf(&arg, "%u", semdsp->sem_perm.cgid);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_CGROUP:
+ arg = get_groupname(&cgr, semdsp->sem_perm.cgid);
+ if (arg)
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_UID:
+ xasprintf(&arg, "%u", semdsp->sem_perm.uid);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_USER:
+ arg = get_username(&pw, semdsp->sem_perm.uid);
+ if (arg)
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_GID:
+ xasprintf(&arg, "%u", semdsp->sem_perm.gid);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_GROUP:
+ arg = get_groupname(&gr, semdsp->sem_perm.gid);
+ if (arg)
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_CTIME:
+ if (semdsp->sem_ctime != 0) {
+ rc = scols_line_refer_data(ln, n,
+ make_time(ctl->time_mode,
+ (time_t)semdsp->sem_ctime));
+ }
+ break;
+ case COL_NSEMS:
+ xasprintf(&arg, "%ju", semdsp->sem_nsems);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_OTIME:
+ if (semdsp->sem_otime != 0) {
+ rc = scols_line_refer_data(ln, n,
+ make_time(ctl->time_mode,
+ (time_t)semdsp->sem_otime));
+ }
+ break;
+ }
+ if (rc != 0)
+ err(EXIT_FAILURE, _("failed to add output data"));
+ arg = NULL;
+ }
+
+ if (id > -1 && semds->sem_nsems) {
+ /* Create extra table with ID specific semaphore elements */
+ struct libscols_table *sub = new_table(ctl);
+ size_t i;
+ int rc = 0;
+
+ scols_table_enable_noheadings(sub, 0);
+ setup_sem_elements_columns(sub);
+
+ for (i = 0; i < semds->sem_nsems; i++) {
+ struct sem_elem *e = &semds->elements[i];
+ struct libscols_line *sln = scols_table_new_line(sub, NULL);
+
+ if (!sln)
+ err(EXIT_FAILURE, _("failed to allocate output line"));
+
+ /* SEMNUM */
+ xasprintf(&arg, "%zu", i);
+ rc = scols_line_refer_data(sln, 0, arg);
+ if (rc)
+ break;
+
+ /* VALUE */
+ xasprintf(&arg, "%d", e->semval);
+ rc = scols_line_refer_data(sln, 1, arg);
+ if (rc)
+ break;
+
+ /* NCOUNT */
+ xasprintf(&arg, "%d", e->ncount);
+ rc = scols_line_refer_data(sln, 2, arg);
+ if (rc)
+ break;
+
+ /* ZCOUNT */
+ xasprintf(&arg, "%d", e->zcount);
+ rc = scols_line_refer_data(sln, 3, arg);
+ if (rc)
+ break;
+
+ /* PID */
+ xasprintf(&arg, "%d", e->pid);
+ rc = scols_line_refer_data(sln, 4, arg);
+ if (rc)
+ break;
+
+ /* COMMAND */
+ arg = proc_get_command(e->pid);
+ rc = scols_line_refer_data(sln, 5, arg);
+ if (rc)
+ break;
+ }
+
+ if (rc != 0)
+ err(EXIT_FAILURE, _("failed to set data"));
+
+ scols_line_set_userdata(ln, (void *)sub);
+ break;
+ }
+ }
+ ipc_sem_free_info(semds);
+}
+
+static void do_sem_global(struct libscols_table *tb)
+{
+ struct sem_data *semds, *semdsp;
+ struct ipc_limits lim;
+ int nsems = 0, nsets = 0;
+
+ ipc_sem_get_limits(&lim);
+
+ if (ipc_sem_get_info(-1, &semds) > 0) {
+ for (semdsp = semds; semdsp->next != NULL; semdsp = semdsp->next) {
+ ++nsets;
+ nsems += semds->sem_nsems;
+ }
+ ipc_sem_free_info(semds);
+ }
+
+ global_set_data(tb, "SEMMNI", _("Number of semaphore identifiers"), nsets, lim.semmni, 1);
+ global_set_data(tb, "SEMMNS", _("Total number of semaphores"), nsems, lim.semmns, 1);
+ global_set_data(tb, "SEMMSL", _("Max semaphores per semaphore set."), 0, lim.semmsl, 0);
+ global_set_data(tb, "SEMOPM", _("Max number of operations per semop(2)"), 0, lim.semopm, 0);
+ global_set_data(tb, "SEMVMX", _("Semaphore max value"), 0, lim.semvmx, 0);
+}
+
+static void do_msg(int id, struct lsipc_control *ctl, struct libscols_table *tb)
+{
+ struct libscols_line *ln;
+ struct passwd *pw = NULL;
+ struct group *gr = NULL;
+ struct msg_data *msgds, *msgdsp;
+ char *arg = NULL;
+
+ if (ipc_msg_get_info(id, &msgds) < 1) {
+ if (id > -1)
+ warnx(_("id %d not found"), id);
+ return;
+ }
+ scols_table_set_name(tb, "messages");
+
+ for (msgdsp = msgds; msgdsp->next != NULL || id > -1 ; msgdsp = msgdsp->next) {
+ size_t n;
+ ln = scols_table_new_line(tb, NULL);
+
+ if (!ln)
+ err(EXIT_FAILURE, _("failed to allocate output line"));
+
+ /* no need to call getpwuid() for the same user */
+ if (!(pw && pw->pw_uid == msgdsp->msg_perm.uid))
+ pw = getpwuid(msgdsp->msg_perm.uid);
+
+ /* no need to call getgrgid() for the same user */
+ if (!(gr && gr->gr_gid == msgdsp->msg_perm.gid))
+ gr = getgrgid(msgdsp->msg_perm.gid);
+
+ for (n = 0; n < ncolumns; n++) {
+ int rc = 0;
+
+ switch (get_column_id(n)) {
+ case COL_KEY:
+ xasprintf(&arg, "0x%08x",msgdsp->msg_perm.key);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_ID:
+ xasprintf(&arg, "%d",msgdsp->msg_perm.id);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_OWNER:
+ arg = get_username(&pw, msgdsp->msg_perm.uid);
+ if (!arg)
+ xasprintf(&arg, "%u", msgdsp->msg_perm.uid);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_PERMS:
+ if (ctl->numperms)
+ xasprintf(&arg, "%#o", msgdsp->msg_perm.mode & 0777);
+ else {
+ arg = xmalloc(11);
+ xstrmode(msgdsp->msg_perm.mode & 0777, arg);
+ rc = scols_line_refer_data(ln, n, arg);
+ }
+ break;
+ case COL_CUID:
+ xasprintf(&arg, "%u", msgdsp->msg_perm.cuid);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_CUSER:
+ arg = get_username(&pw, msgdsp->msg_perm.cuid);
+ if (arg)
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_CGID:
+ xasprintf(&arg, "%u", msgdsp->msg_perm.cuid);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_CGROUP:
+ arg = get_groupname(&gr, msgdsp->msg_perm.cgid);
+ if (arg)
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_UID:
+ xasprintf(&arg, "%u", msgdsp->msg_perm.uid);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_USER:
+ arg = get_username(&pw, msgdsp->msg_perm.uid);
+ if (arg)
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_GID:
+ xasprintf(&arg, "%u", msgdsp->msg_perm.gid);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_GROUP:
+ arg = get_groupname(&gr,msgdsp->msg_perm.gid);
+ if (arg)
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_CTIME:
+ if (msgdsp->q_ctime != 0)
+ rc = scols_line_refer_data(ln, n,
+ make_time(ctl->time_mode,
+ (time_t)msgdsp->q_ctime));
+ break;
+ case COL_USEDBYTES:
+ xasprintf(&arg, "%ju", msgdsp->q_cbytes);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_MSGS:
+ xasprintf(&arg, "%ju", msgdsp->q_qnum);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_SEND:
+ if (msgdsp->q_stime != 0)
+ rc = scols_line_refer_data(ln, n,
+ make_time(ctl->time_mode,
+ (time_t)msgdsp->q_stime));
+ break;
+ case COL_RECV:
+ if (msgdsp->q_rtime != 0)
+ rc = scols_line_refer_data(ln, n,
+ make_time(ctl->time_mode,
+ (time_t)msgdsp->q_rtime));
+ break;
+ case COL_LSPID:
+ xasprintf(&arg, "%u", msgdsp->q_lspid);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_LRPID:
+ xasprintf(&arg, "%u", msgdsp->q_lrpid);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ }
+ if (rc != 0)
+ err(EXIT_FAILURE, _("failed to set data"));
+ arg = NULL;
+ }
+ if (id > -1)
+ break;
+ }
+ ipc_msg_free_info(msgds);
+}
+
+
+static void do_msg_global(struct libscols_table *tb)
+{
+ struct msg_data *msgds, *msgdsp;
+ struct ipc_limits lim;
+ int msgqs = 0;
+
+ ipc_msg_get_limits(&lim);
+
+ /* count number of used queues */
+ if (ipc_msg_get_info(-1, &msgds) > 0) {
+ for (msgdsp = msgds; msgdsp->next != NULL; msgdsp = msgdsp->next)
+ ++msgqs;
+ ipc_msg_free_info(msgds);
+ }
+
+ global_set_data(tb, "MSGMNI", _("Number of message queues"), msgqs, lim.msgmni, 1);
+ global_set_data(tb, "MSGMAX", _("Max size of message (bytes)"), 0, lim.msgmax, 0);
+ global_set_data(tb, "MSGMNB", _("Default max size of queue (bytes)"), 0, lim.msgmnb, 0);
+}
+
+
+static void do_shm(int id, struct lsipc_control *ctl, struct libscols_table *tb)
+{
+ struct libscols_line *ln;
+ struct passwd *pw = NULL;
+ struct group *gr = NULL;
+ struct shm_data *shmds, *shmdsp;
+ char *arg = NULL;
+
+ if (ipc_shm_get_info(id, &shmds) < 1) {
+ if (id > -1)
+ warnx(_("id %d not found"), id);
+ return;
+ }
+
+ scols_table_set_name(tb, "sharedmemory");
+
+ for (shmdsp = shmds; shmdsp->next != NULL || id > -1 ; shmdsp = shmdsp->next) {
+ size_t n;
+ ln = scols_table_new_line(tb, NULL);
+
+ if (!ln)
+ err(EXIT_FAILURE, _("failed to allocate output line"));
+
+ for (n = 0; n < ncolumns; n++) {
+ int rc = 0;
+
+ switch (get_column_id(n)) {
+ case COL_KEY:
+ xasprintf(&arg, "0x%08x",shmdsp->shm_perm.key);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_ID:
+ xasprintf(&arg, "%d",shmdsp->shm_perm.id);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_OWNER:
+ arg = get_username(&pw, shmdsp->shm_perm.uid);
+ if (!arg)
+ xasprintf(&arg, "%u", shmdsp->shm_perm.uid);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_PERMS:
+ if (ctl->numperms)
+ xasprintf(&arg, "%#o", shmdsp->shm_perm.mode & 0777);
+ else {
+ arg = xmalloc(11);
+ xstrmode(shmdsp->shm_perm.mode & 0777, arg);
+ }
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_CUID:
+ xasprintf(&arg, "%u", shmdsp->shm_perm.cuid);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_CUSER:
+ arg = get_username(&pw, shmdsp->shm_perm.cuid);
+ if (arg)
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_CGID:
+ xasprintf(&arg, "%u", shmdsp->shm_perm.cuid);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_CGROUP:
+ arg = get_groupname(&gr, shmdsp->shm_perm.cgid);
+ if (arg)
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_UID:
+ xasprintf(&arg, "%u", shmdsp->shm_perm.uid);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_USER:
+ arg = get_username(&pw, shmdsp->shm_perm.uid);
+ if (arg)
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_GID:
+ xasprintf(&arg, "%u", shmdsp->shm_perm.gid);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_GROUP:
+ arg = get_groupname(&gr, shmdsp->shm_perm.gid);
+ if (arg)
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_CTIME:
+ if (shmdsp->shm_ctim != 0)
+ rc = scols_line_refer_data(ln, n,
+ make_time(ctl->time_mode,
+ (time_t)shmdsp->shm_ctim));
+ break;
+ case COL_SIZE:
+ if (ctl->bytes)
+ xasprintf(&arg, "%ju", shmdsp->shm_segsz);
+ else
+ arg = size_to_human_string(SIZE_SUFFIX_1LETTER, shmdsp->shm_segsz);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_NATTCH:
+ xasprintf(&arg, "%ju", shmdsp->shm_nattch);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_STATUS: {
+ int comma = 0;
+ size_t offt = 0;
+
+ free(arg);
+ arg = xcalloc(1, sizeof(char) * strlen(_("dest"))
+ + strlen(_("locked"))
+ + strlen(_("hugetlb"))
+ + strlen(_("noreserve")) + 4);
+#ifdef SHM_DEST
+ if (shmdsp->shm_perm.mode & SHM_DEST) {
+ offt += sprintf(arg, "%s", _("dest"));
+ comma++;
+ }
+#endif
+#ifdef SHM_LOCKED
+ if (shmdsp->shm_perm.mode & SHM_LOCKED) {
+ if (comma)
+ arg[offt++] = ',';
+ offt += sprintf(arg + offt, "%s", _("locked"));
+ }
+#endif
+#ifdef SHM_HUGETLB
+ if (shmdsp->shm_perm.mode & SHM_HUGETLB) {
+ if (comma)
+ arg[offt++] = ',';
+ offt += sprintf(arg + offt, "%s", _("hugetlb"));
+ }
+#endif
+#ifdef SHM_NORESERVE
+ if (shmdsp->shm_perm.mode & SHM_NORESERVE) {
+ if (comma)
+ arg[offt++] = ',';
+ sprintf(arg + offt, "%s", _("noreserve"));
+ }
+#endif
+ rc = scols_line_refer_data(ln, n, arg);
+ }
+ break;
+ case COL_ATTACH:
+ if (shmdsp->shm_atim != 0)
+ rc = scols_line_refer_data(ln, n,
+ make_time(ctl->time_mode,
+ (time_t)shmdsp->shm_atim));
+ break;
+ case COL_DETACH:
+ if (shmdsp->shm_dtim != 0)
+ rc = scols_line_refer_data(ln, n,
+ make_time(ctl->time_mode,
+ (time_t)shmdsp->shm_dtim));
+ break;
+ case COL_CPID:
+ xasprintf(&arg, "%u", shmdsp->shm_cprid);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_LPID:
+ xasprintf(&arg, "%u", shmdsp->shm_lprid);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ case COL_COMMAND:
+ arg = proc_get_command(shmdsp->shm_cprid);
+ rc = scols_line_refer_data(ln, n, arg);
+ break;
+ }
+ if (rc != 0)
+ err(EXIT_FAILURE, _("failed to set data"));
+ arg = NULL;
+ }
+ if (id > -1)
+ break;
+ }
+ ipc_shm_free_info(shmds);
+}
+
+static void do_shm_global(struct libscols_table *tb)
+{
+ struct shm_data *shmds, *shmdsp;
+ uint64_t nsegs = 0, sum_segsz = 0;
+ struct ipc_limits lim;
+
+ ipc_shm_get_limits(&lim);
+
+ if (ipc_shm_get_info(-1, &shmds) > 0) {
+ for (shmdsp = shmds; shmdsp->next != NULL; shmdsp = shmdsp->next) {
+ ++nsegs;
+ sum_segsz += shmdsp->shm_segsz;
+ }
+ ipc_shm_free_info(shmds);
+ }
+
+ global_set_data(tb, "SHMMNI", _("Shared memory segments"), nsegs, lim.shmmni, 1);
+ global_set_data(tb, "SHMALL", _("Shared memory pages"), sum_segsz / getpagesize(), lim.shmall, 1);
+ global_set_data(tb, "SHMMAX", _("Max size of shared memory segment (bytes)"), 0, lim.shmmax, 0);
+ global_set_data(tb, "SHMMIN", _("Min size of shared memory segment (bytes)"), 0, lim.shmmin, 0);
+}
+
+int main(int argc, char *argv[])
+{
+ int opt, msg = 0, sem = 0, shm = 0, id = -1;
+ int show_time = 0, show_creat = 0, global = 0;
+ size_t i;
+ struct lsipc_control *ctl = xcalloc(1, sizeof(struct lsipc_control));
+ static struct libscols_table *tb;
+ char *outarg = NULL;
+
+ /* long only options. */
+ enum {
+ OPT_NOTRUNC = CHAR_MAX + 1,
+ OPT_NOHEAD,
+ OPT_TIME_FMT
+ };
+
+ static const struct option longopts[] = {
+ { "bytes", no_argument, NULL, 'b' },
+ { "creator", no_argument, NULL, 'c' },
+ { "export", no_argument, NULL, 'e' },
+ { "global", no_argument, NULL, 'g' },
+ { "help", no_argument, NULL, 'h' },
+ { "id", required_argument, NULL, 'i' },
+ { "json", no_argument, NULL, 'J' },
+ { "list", no_argument, NULL, 'l' },
+ { "newline", no_argument, NULL, 'n' },
+ { "noheadings", no_argument, NULL, OPT_NOHEAD },
+ { "notruncate", no_argument, NULL, OPT_NOTRUNC },
+ { "numeric-perms", no_argument, NULL, 'P' },
+ { "output", required_argument, NULL, 'o' },
+ { "queues", no_argument, NULL, 'q' },
+ { "raw", no_argument, NULL, 'r' },
+ { "semaphores", no_argument, NULL, 's' },
+ { "shmems", no_argument, NULL, 'm' },
+ { "time", no_argument, NULL, 't' },
+ { "time-format", required_argument, NULL, OPT_TIME_FMT },
+ { "version", no_argument, NULL, 'V' },
+ {NULL, 0, NULL, 0}
+ };
+
+ static const ul_excl_t excl[] = { /* rows and cols in ASCII order */
+ { 'J', 'e', 'l', 'n', 'r' },
+ { 'g', 'i' },
+ { 'c', 'o', 't' },
+ { 'm', 'q', 's' },
+ { 0 }
+ };
+ int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ ctl->time_mode = 0;
+
+ scols_init_debug(0);
+
+ while ((opt = getopt_long(argc, argv, "bceghi:Jlmno:PqrstV", longopts, NULL)) != -1) {
+
+ err_exclusive_options(opt, longopts, excl, excl_st);
+
+ switch (opt) {
+ case 'b':
+ ctl->bytes = 1;
+ break;
+ case 'i':
+ id = strtos32_or_err(optarg, _("failed to parse IPC identifier"));
+ break;
+ case 'e':
+ ctl->outmode = OUT_EXPORT;
+ break;
+ case 'r':
+ ctl->outmode = OUT_RAW;
+ break;
+ case 'o':
+ outarg = optarg;
+ break;
+ case 'g':
+ global = 1;
+ break;
+ case 'q':
+ msg = 1;
+ add_column(columns, ncolumns++, COL_KEY);
+ add_column(columns, ncolumns++, COL_ID);
+ add_column(columns, ncolumns++, COL_PERMS);
+ add_column(columns, ncolumns++, COL_OWNER);
+ add_column(columns, ncolumns++, COL_USEDBYTES);
+ add_column(columns, ncolumns++, COL_MSGS);
+ add_column(columns, ncolumns++, COL_LSPID);
+ add_column(columns, ncolumns++, COL_LRPID);
+ LOWER = COLDESC_IDX_MSG_FIRST;
+ UPPER = COLDESC_IDX_MSG_LAST;
+ break;
+ case 'l':
+ ctl->outmode = OUT_LIST;
+ break;
+ case 'm':
+ shm = 1;
+ add_column(columns, ncolumns++, COL_KEY);
+ add_column(columns, ncolumns++, COL_ID);
+ add_column(columns, ncolumns++, COL_PERMS);
+ add_column(columns, ncolumns++, COL_OWNER);
+ add_column(columns, ncolumns++, COL_SIZE);
+ add_column(columns, ncolumns++, COL_NATTCH);
+ add_column(columns, ncolumns++, COL_STATUS);
+ add_column(columns, ncolumns++, COL_CTIME);
+ add_column(columns, ncolumns++, COL_CPID);
+ add_column(columns, ncolumns++, COL_LPID);
+ add_column(columns, ncolumns++, COL_COMMAND);
+ LOWER = COLDESC_IDX_SHM_FIRST;
+ UPPER = COLDESC_IDX_SHM_LAST;
+ break;
+ case 'n':
+ ctl->outmode = OUT_NEWLINE;
+ break;
+ case 'P':
+ ctl->numperms = 1;
+ break;
+ case 's':
+ sem = 1;
+ add_column(columns, ncolumns++, COL_KEY);
+ add_column(columns, ncolumns++, COL_ID);
+ add_column(columns, ncolumns++, COL_PERMS);
+ add_column(columns, ncolumns++, COL_OWNER);
+ add_column(columns, ncolumns++, COL_NSEMS);
+ LOWER = COLDESC_IDX_SEM_FIRST;
+ UPPER = COLDESC_IDX_SEM_LAST;
+ break;
+ case OPT_NOTRUNC:
+ ctl->notrunc = 1;
+ break;
+ case OPT_NOHEAD:
+ ctl->noheadings = 1;
+ break;
+ case OPT_TIME_FMT:
+ ctl->time_mode = parse_time_mode(optarg);
+ break;
+ case 'J':
+ ctl->outmode = OUT_JSON;
+ break;
+ case 't':
+ show_time = 1;
+ break;
+ case 'c':
+ show_creat = 1;
+ break;
+ case 'h':
+ usage();
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ /* default is global */
+ if (msg + shm + sem == 0) {
+ msg = shm = sem = global = 1;
+ if (show_time || show_creat || id != -1)
+ errx(EXIT_FAILURE, _("--global is mutually exclusive with --creator, --id and --time"));
+ }
+ if (global) {
+ add_column(columns, ncolumns++, COL_RESOURCE);
+ add_column(columns, ncolumns++, COL_DESC);
+ add_column(columns, ncolumns++, COL_LIMIT);
+ add_column(columns, ncolumns++, COL_USED);
+ add_column(columns, ncolumns++, COL_USEPERC);
+ LOWER = COLDESC_IDX_SUM_FIRST;
+ UPPER = COLDESC_IDX_SUM_LAST;
+ }
+
+ /* default to pretty-print if --id specified */
+ if (id != -1 && !ctl->outmode)
+ ctl->outmode = OUT_PRETTY;
+
+ if (!ctl->time_mode)
+ ctl->time_mode = ctl->outmode == OUT_PRETTY ? TIME_FULL : TIME_SHORT;
+
+ if (ctl->outmode == OUT_PRETTY && !(optarg || show_creat || show_time)) {
+ /* all columns for lsipc --<RESOURCE> --id <ID> */
+ for (ncolumns = 0, i = 0; i < ARRAY_SIZE(coldescs); i++)
+ columns[ncolumns++] = i;
+ } else {
+ if (show_creat) {
+ add_column(columns, ncolumns++, COL_CUID);
+ add_column(columns, ncolumns++, COL_CGID);
+ add_column(columns, ncolumns++, COL_UID);
+ add_column(columns, ncolumns++, COL_GID);
+ }
+ if (msg && show_time) {
+ add_column(columns, ncolumns++, COL_SEND);
+ add_column(columns, ncolumns++, COL_RECV);
+ add_column(columns, ncolumns++, COL_CTIME);
+ }
+ if (shm && show_time) {
+ /* keep "COMMAND" as last column */
+ size_t cmd = columns[ncolumns - 1] == COL_COMMAND;
+
+ if (cmd)
+ ncolumns--;
+ add_column(columns, ncolumns++, COL_ATTACH);
+ add_column(columns, ncolumns++, COL_DETACH);
+ if (cmd)
+ add_column(columns, ncolumns++, COL_COMMAND);
+ }
+ if (sem && show_time) {
+ add_column(columns, ncolumns++, COL_OTIME);
+ add_column(columns, ncolumns++, COL_CTIME);
+ }
+ }
+
+ if (outarg && string_add_to_idarray(outarg, columns, ARRAY_SIZE(columns),
+ &ncolumns, column_name_to_id) < 0)
+ return EXIT_FAILURE;
+
+ tb = setup_table(ctl);
+ if (!tb)
+ return EXIT_FAILURE;
+
+ if (global)
+ scols_table_set_name(tb, "ipclimits");
+
+ if (msg) {
+ if (global)
+ do_msg_global(tb);
+ else
+ do_msg(id, ctl, tb);
+ }
+ if (shm) {
+ if (global)
+ do_shm_global(tb);
+ else
+ do_shm(id, ctl, tb);
+ }
+ if (sem) {
+ if (global)
+ do_sem_global(tb);
+ else
+ do_sem(id, ctl, tb);
+ }
+
+ print_table(ctl, tb);
+
+ scols_unref_table(tb);
+ free(ctl);
+
+ return EXIT_SUCCESS;
+}
+
diff --git a/sys-utils/lsmem.1 b/sys-utils/lsmem.1
new file mode 100644
index 0000000..4476d3e
--- /dev/null
+++ b/sys-utils/lsmem.1
@@ -0,0 +1,99 @@
+.TH LSMEM 1 "October 2016" "util-linux" "User Commands"
+.SH NAME
+lsmem \- list the ranges of available memory with their online status
+.SH SYNOPSIS
+.B lsmem
+[options]
+.SH DESCRIPTION
+The \fBlsmem\fP command lists the ranges of available memory with their online
+status. The listed memory blocks correspond to the memory block representation
+in sysfs. The command also shows the memory block size and the amount of memory
+in online and offline state.
+
+The default output compatible with original implementation from s390-tools, but
+it's strongly recommended to avoid using default outputs in your scripts.
+Always explicitly define expected columns by using the \fB\-\-output\fR option
+together with a columns list in environments where a stable output is required.
+
+The \fBlsmem\fP command lists a new memory range always when the current memory
+block distinguish from the previous block by some output column. This default
+behavior is possible to override by the \fB\-\-split\fR option (e.g. \fBlsmem
+\-\-split=ZONES\fR). The special word "none" may be used to ignore all
+differences between memory blocks and to create as large as possible continuous
+ranges. The opposite semantic is \fB\-\-all\fR to list individual memory
+blocks.
+
+Note that some output columns may provide inaccurate information if a split policy
+forces \fBlsmem\fP to ignore differences in some attributes. For example if you
+merge removable and non-removable memory blocks to the one range than all
+the range will be marked as non-removable on \fBlsmem\fP output.
+
+Not all columns are supported on all systems. If an unsupported column is
+specified, \fBlsmem\fP prints the column but does not provide any data for it.
+
+Use the \fB\-\-help\fR option to see the columns description.
+
+.SH OPTIONS
+.TP
+.BR \-a ", " \-\-all
+List each individual memory block, instead of combining memory blocks with
+similar attributes.
+.TP
+.BR \-b , " \-\-bytes"
+Print the SIZE column in bytes rather than in a human-readable format.
+.TP
+.BR \-h ", " \-\-help
+Display help text and exit.
+.TP
+.BR \-J , " \-\-json"
+Use JSON output format.
+.TP
+.BR \-n , " \-\-noheadings"
+Do not print a header line.
+.TP
+.BR \-o , " \-\-output " \fIlist\fP
+Specify which output columns to print. Use \fB\-\-help\fR
+to get a list of all supported columns.
+The default list of columns may be extended if \fIlist\fP is
+specified in the format \fB+\fIlist\fP (e.g. \fBlsmem \-o +NODE\fP).
+.TP
+.B \-\-output\-all
+Output all available columns.
+.TP
+.BR \-P , " \-\-pairs"
+Produce output in the form of key="value" pairs.
+All potentially unsafe characters are hex-escaped (\\x<code>).
+.TP
+.BR \-r , " \-\-raw"
+Produce output in raw format. All potentially unsafe characters are hex-escaped
+(\\x<code>).
+.TP
+.BR \-S , " \-\-split " \fIlist\fP
+Specify which columns (attributes) use to split memory blocks to ranges. The
+supported columns are STATE, REMOVABLE, NODE and ZONES, or "none". The another columns are
+silently ignored. For more details see DESCRIPTION above.
+.TP
+.BR \-s , " \-\-sysroot " \fIdirectory\fP
+Gather memory data for a Linux instance other than the instance from which the
+\fBlsmem\fP command is issued. The specified \fIdirectory\fP is the system
+root of the Linux instance to be inspected.
+.TP
+.BR \-V ", " \-\-version
+Display version information and exit.
+.TP
+\fB\-\-summary\fR[=\fIwhen\fR]
+This option controls summary lines output. The optional argument \fIwhen\fP can be
+\fBnever\fR, \fBalways\fR or \fBonly\fR. If the \fIwhen\fR argument is
+omitted, it defaults to \fB"only"\fR. The summary output is suppressed for
+\fB\-\-raw\fR, \fB\-\-pairs\fR and \fB\-\-json\fR.
+.SH AUTHOR
+.B lsmem
+was originally written by Gerald Schaefer for s390-tools in Perl. The C version
+for util-linux was written by Clemens von Mann, Heiko Carstens and Karel Zak.
+.SH SEE ALSO
+.BR chmem (8)
+.SH AVAILABILITY
+The \fBlsmem\fP command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/lsmem.c b/sys-utils/lsmem.c
new file mode 100644
index 0000000..8638336
--- /dev/null
+++ b/sys-utils/lsmem.c
@@ -0,0 +1,747 @@
+/*
+ * lsmem - Show memory configuration
+ *
+ * Copyright IBM Corp. 2016
+ * Copyright (C) 2016 Karel Zak <kzak@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include <c.h>
+#include <nls.h>
+#include <path.h>
+#include <strutils.h>
+#include <closestream.h>
+#include <xalloc.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <assert.h>
+#include <optutils.h>
+#include <libsmartcols.h>
+
+#define _PATH_SYS_MEMORY "/sys/devices/system/memory"
+
+#define MEMORY_STATE_ONLINE 0
+#define MEMORY_STATE_OFFLINE 1
+#define MEMORY_STATE_GOING_OFFLINE 2
+#define MEMORY_STATE_UNKNOWN 3
+
+enum zone_id {
+ ZONE_DMA = 0,
+ ZONE_DMA32,
+ ZONE_NORMAL,
+ ZONE_HIGHMEM,
+ ZONE_MOVABLE,
+ ZONE_DEVICE,
+ ZONE_NONE,
+ ZONE_UNKNOWN,
+ MAX_NR_ZONES,
+};
+
+struct memory_block {
+ uint64_t index;
+ uint64_t count;
+ int state;
+ int node;
+ int nr_zones;
+ int zones[MAX_NR_ZONES];
+ unsigned int removable:1;
+};
+
+struct lsmem {
+ struct path_cxt *sysmem; /* _PATH_SYS_MEMORY directory handler */
+ struct dirent **dirs;
+ int ndirs;
+ struct memory_block *blocks;
+ int nblocks;
+ uint64_t block_size;
+ uint64_t mem_online;
+ uint64_t mem_offline;
+
+ struct libscols_table *table;
+ unsigned int have_nodes : 1,
+ raw : 1,
+ export : 1,
+ json : 1,
+ noheadings : 1,
+ summary : 1,
+ list_all : 1,
+ bytes : 1,
+ want_summary : 1,
+ want_table : 1,
+ split_by_node : 1,
+ split_by_state : 1,
+ split_by_removable : 1,
+ split_by_zones : 1,
+ have_zones : 1;
+};
+
+
+enum {
+ COL_RANGE,
+ COL_SIZE,
+ COL_STATE,
+ COL_REMOVABLE,
+ COL_BLOCK,
+ COL_NODE,
+ COL_ZONES,
+};
+
+static char *zone_names[] = {
+ [ZONE_DMA] = "DMA",
+ [ZONE_DMA32] = "DMA32",
+ [ZONE_NORMAL] = "Normal",
+ [ZONE_HIGHMEM] = "Highmem",
+ [ZONE_MOVABLE] = "Movable",
+ [ZONE_DEVICE] = "Device",
+ [ZONE_NONE] = "None", /* block contains more than one zone, can't be offlined */
+ [ZONE_UNKNOWN] = "Unknown",
+};
+
+/* column names */
+struct coldesc {
+ const char *name; /* header */
+ double whint; /* width hint (N < 1 is in percent of termwidth) */
+ int flags; /* SCOLS_FL_* */
+ const char *help;
+};
+
+/* columns descriptions */
+static struct coldesc coldescs[] = {
+ [COL_RANGE] = { "RANGE", 0, 0, N_("start and end address of the memory range")},
+ [COL_SIZE] = { "SIZE", 5, SCOLS_FL_RIGHT, N_("size of the memory range")},
+ [COL_STATE] = { "STATE", 0, SCOLS_FL_RIGHT, N_("online status of the memory range")},
+ [COL_REMOVABLE] = { "REMOVABLE", 0, SCOLS_FL_RIGHT, N_("memory is removable")},
+ [COL_BLOCK] = { "BLOCK", 0, SCOLS_FL_RIGHT, N_("memory block number or blocks range")},
+ [COL_NODE] = { "NODE", 0, SCOLS_FL_RIGHT, N_("numa node of memory")},
+ [COL_ZONES] = { "ZONES", 0, SCOLS_FL_RIGHT, N_("valid zones for the memory range")},
+};
+
+/* columns[] array specifies all currently wanted output column. The columns
+ * are defined by coldescs[] array and you can specify (on command line) each
+ * column twice. That's enough, dynamically allocated array of the columns is
+ * unnecessary overkill and over-engineering in this case */
+static int columns[ARRAY_SIZE(coldescs) * 2];
+static size_t ncolumns;
+
+static inline size_t err_columns_index(size_t arysz, size_t idx)
+{
+ if (idx >= arysz)
+ errx(EXIT_FAILURE, _("too many columns specified, "
+ "the limit is %zu columns"),
+ arysz - 1);
+ return idx;
+}
+
+/*
+ * name must be null-terminated
+ */
+static int zone_name_to_id(const char *name)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(zone_names); i++) {
+ if (!strcasecmp(name, zone_names[i]))
+ return i;
+ }
+ return ZONE_UNKNOWN;
+}
+
+#define add_column(ary, n, id) \
+ ((ary)[ err_columns_index(ARRAY_SIZE(ary), (n)) ] = (id))
+
+static int column_name_to_id(const char *name, size_t namesz)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(coldescs); i++) {
+ const char *cn = coldescs[i].name;
+
+ if (!strncasecmp(name, cn, namesz) && !*(cn + namesz))
+ return i;
+ }
+ warnx(_("unknown column: %s"), name);
+ return -1;
+}
+
+static inline int get_column_id(int num)
+{
+ assert(num >= 0);
+ assert((size_t) num < ncolumns);
+ assert(columns[num] < (int) ARRAY_SIZE(coldescs));
+
+ return columns[num];
+}
+
+static inline struct coldesc *get_column_desc(int num)
+{
+ return &coldescs[ get_column_id(num) ];
+}
+
+static inline void reset_split_policy(struct lsmem *l, int enable)
+{
+ l->split_by_state = enable;
+ l->split_by_node = enable;
+ l->split_by_removable = enable;
+ l->split_by_zones = enable;
+}
+
+static void set_split_policy(struct lsmem *l, int cols[], size_t ncols)
+{
+ size_t i;
+
+ reset_split_policy(l, 0);
+
+ for (i = 0; i < ncols; i++) {
+ switch (cols[i]) {
+ case COL_STATE:
+ l->split_by_state = 1;
+ break;
+ case COL_NODE:
+ l->split_by_node = 1;
+ break;
+ case COL_REMOVABLE:
+ l->split_by_removable = 1;
+ break;
+ case COL_ZONES:
+ l->split_by_zones = 1;
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+static void add_scols_line(struct lsmem *lsmem, struct memory_block *blk)
+{
+ size_t i;
+ struct libscols_line *line;
+
+ line = scols_table_new_line(lsmem->table, NULL);
+ if (!line)
+ err_oom();
+
+ for (i = 0; i < ncolumns; i++) {
+ char *str = NULL;
+
+ switch (get_column_id(i)) {
+ case COL_RANGE:
+ {
+ uint64_t start = blk->index * lsmem->block_size;
+ uint64_t size = blk->count * lsmem->block_size;
+ xasprintf(&str, "0x%016"PRIx64"-0x%016"PRIx64, start, start + size - 1);
+ break;
+ }
+ case COL_SIZE:
+ if (lsmem->bytes)
+ xasprintf(&str, "%"PRId64, (uint64_t) blk->count * lsmem->block_size);
+ else
+ str = size_to_human_string(SIZE_SUFFIX_1LETTER,
+ (uint64_t) blk->count * lsmem->block_size);
+ break;
+ case COL_STATE:
+ str = xstrdup(
+ blk->state == MEMORY_STATE_ONLINE ? _("online") :
+ blk->state == MEMORY_STATE_OFFLINE ? _("offline") :
+ blk->state == MEMORY_STATE_GOING_OFFLINE ? _("on->off") :
+ "?");
+ break;
+ case COL_REMOVABLE:
+ if (blk->state == MEMORY_STATE_ONLINE)
+ str = xstrdup(blk->removable ? _("yes") : _("no"));
+ break;
+ case COL_BLOCK:
+ if (blk->count == 1)
+ xasprintf(&str, "%"PRId64, blk->index);
+ else
+ xasprintf(&str, "%"PRId64"-%"PRId64,
+ blk->index, blk->index + blk->count - 1);
+ break;
+ case COL_NODE:
+ if (lsmem->have_nodes)
+ xasprintf(&str, "%d", blk->node);
+ break;
+ case COL_ZONES:
+ if (lsmem->have_zones) {
+ char valid_zones[BUFSIZ];
+ int j, zone_id;
+
+ valid_zones[0] = '\0';
+ for (j = 0; j < blk->nr_zones; j++) {
+ zone_id = blk->zones[j];
+ if (strlen(valid_zones) +
+ strlen(zone_names[zone_id]) > BUFSIZ - 2)
+ break;
+ strcat(valid_zones, zone_names[zone_id]);
+ if (j + 1 < blk->nr_zones)
+ strcat(valid_zones, "/");
+ }
+ str = xstrdup(valid_zones);
+ }
+ break;
+ }
+
+ if (str && scols_line_refer_data(line, i, str) != 0)
+ err_oom();
+ }
+}
+
+static void fill_scols_table(struct lsmem *lsmem)
+{
+ int i;
+
+ for (i = 0; i < lsmem->nblocks; i++)
+ add_scols_line(lsmem, &lsmem->blocks[i]);
+}
+
+static void print_summary(struct lsmem *lsmem)
+{
+ if (lsmem->bytes) {
+ printf("%-23s %15"PRId64"\n",_("Memory block size:"), lsmem->block_size);
+ printf("%-23s %15"PRId64"\n",_("Total online memory:"), lsmem->mem_online);
+ printf("%-23s %15"PRId64"\n",_("Total offline memory:"), lsmem->mem_offline);
+ } else {
+ char *p;
+
+ if ((p = size_to_human_string(SIZE_SUFFIX_1LETTER, lsmem->block_size)))
+ printf("%-23s %5s\n",_("Memory block size:"), p);
+ free(p);
+
+ if ((p = size_to_human_string(SIZE_SUFFIX_1LETTER, lsmem->mem_online)))
+ printf("%-23s %5s\n",_("Total online memory:"), p);
+ free(p);
+
+ if ((p = size_to_human_string(SIZE_SUFFIX_1LETTER, lsmem->mem_offline)))
+ printf("%-23s %5s\n",_("Total offline memory:"), p);
+ free(p);
+ }
+}
+
+static int memory_block_get_node(struct lsmem *lsmem, char *name)
+{
+ struct dirent *de;
+ DIR *dir;
+ int node;
+
+ dir = ul_path_opendir(lsmem->sysmem, name);
+ if (!dir)
+ err(EXIT_FAILURE, _("Failed to open %s"), name);
+
+ node = -1;
+ while ((de = readdir(dir)) != NULL) {
+ if (strncmp("node", de->d_name, 4))
+ continue;
+ if (!isdigit_string(de->d_name + 4))
+ continue;
+ node = strtol(de->d_name + 4, NULL, 10);
+ break;
+ }
+ closedir(dir);
+ return node;
+}
+
+static void memory_block_read_attrs(struct lsmem *lsmem, char *name,
+ struct memory_block *blk)
+{
+ char *line = NULL;
+ int i, x = 0;
+
+ memset(blk, 0, sizeof(*blk));
+
+ blk->count = 1;
+ blk->state = MEMORY_STATE_UNKNOWN;
+ blk->index = strtoumax(name + 6, NULL, 10); /* get <num> of "memory<num>" */
+
+ if (ul_path_readf_s32(lsmem->sysmem, &x, "%s/removable", name) == 0)
+ blk->removable = x == 1;
+
+ if (ul_path_readf_string(lsmem->sysmem, &line, "%s/state", name) > 0) {
+ if (strcmp(line, "offline") == 0)
+ blk->state = MEMORY_STATE_OFFLINE;
+ else if (strcmp(line, "online") == 0)
+ blk->state = MEMORY_STATE_ONLINE;
+ else if (strcmp(line, "going-offline") == 0)
+ blk->state = MEMORY_STATE_GOING_OFFLINE;
+ free(line);
+ }
+
+ if (lsmem->have_nodes)
+ blk->node = memory_block_get_node(lsmem, name);
+
+ blk->nr_zones = 0;
+ if (lsmem->have_zones &&
+ ul_path_readf_string(lsmem->sysmem, &line, "%s/valid_zones", name) > 0) {
+
+ char *token = strtok(line, " ");
+
+ for (i = 0; token && i < MAX_NR_ZONES; i++) {
+ blk->zones[i] = zone_name_to_id(token);
+ blk->nr_zones++;
+ token = strtok(NULL, " ");
+ }
+
+ free(line);
+ }
+}
+
+static int is_mergeable(struct lsmem *lsmem, struct memory_block *blk)
+{
+ struct memory_block *curr;
+ int i;
+
+ if (!lsmem->nblocks)
+ return 0;
+ curr = &lsmem->blocks[lsmem->nblocks - 1];
+ if (lsmem->list_all)
+ return 0;
+ if (curr->index + curr->count != blk->index)
+ return 0;
+ if (lsmem->split_by_state && curr->state != blk->state)
+ return 0;
+ if (lsmem->split_by_removable && curr->removable != blk->removable)
+ return 0;
+ if (lsmem->split_by_node && lsmem->have_nodes) {
+ if (curr->node != blk->node)
+ return 0;
+ }
+ if (lsmem->split_by_zones && lsmem->have_zones) {
+ if (curr->nr_zones != blk->nr_zones)
+ return 0;
+ for (i = 0; i < curr->nr_zones; i++) {
+ if (curr->zones[i] == ZONE_UNKNOWN ||
+ curr->zones[i] != blk->zones[i])
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static void read_info(struct lsmem *lsmem)
+{
+ struct memory_block blk;
+ char buf[128];
+ int i;
+
+ if (ul_path_read_buffer(lsmem->sysmem, buf, sizeof(buf), "block_size_bytes") <= 0)
+ err(EXIT_FAILURE, _("failed to read memory block size"));
+ lsmem->block_size = strtoumax(buf, NULL, 16);
+
+ for (i = 0; i < lsmem->ndirs; i++) {
+ memory_block_read_attrs(lsmem, lsmem->dirs[i]->d_name, &blk);
+ if (blk.state == MEMORY_STATE_ONLINE)
+ lsmem->mem_online += lsmem->block_size;
+ else
+ lsmem->mem_offline += lsmem->block_size;
+ if (is_mergeable(lsmem, &blk)) {
+ lsmem->blocks[lsmem->nblocks - 1].count++;
+ continue;
+ }
+ lsmem->nblocks++;
+ lsmem->blocks = xrealloc(lsmem->blocks, lsmem->nblocks * sizeof(blk));
+ *&lsmem->blocks[lsmem->nblocks - 1] = blk;
+ }
+}
+
+static int memory_block_filter(const struct dirent *de)
+{
+ if (strncmp("memory", de->d_name, 6))
+ return 0;
+ return isdigit_string(de->d_name + 6);
+}
+
+static void read_basic_info(struct lsmem *lsmem)
+{
+ char dir[PATH_MAX];
+
+ if (ul_path_access(lsmem->sysmem, F_OK, "block_size_bytes") != 0)
+ errx(EXIT_FAILURE, _("This system does not support memory blocks"));
+
+ ul_path_get_abspath(lsmem->sysmem, dir, sizeof(dir), NULL);
+
+ lsmem->ndirs = scandir(dir, &lsmem->dirs, memory_block_filter, versionsort);
+ if (lsmem->ndirs <= 0)
+ err(EXIT_FAILURE, _("Failed to read %s"), dir);
+
+ if (memory_block_get_node(lsmem, lsmem->dirs[0]->d_name) != -1)
+ lsmem->have_nodes = 1;
+
+ /* The valid_zones sysmem attribute was introduced with kernel 3.18 */
+ if (ul_path_access(lsmem->sysmem, F_OK, "memory0/valid_zones") == 0)
+ lsmem->have_zones = 1;
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ size_t i;
+
+ fputs(USAGE_HEADER, out);
+ fprintf(out, _(" %s [options]\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("List the ranges of available memory with their online status.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -J, --json use JSON output format\n"), out);
+ fputs(_(" -P, --pairs use key=\"value\" output format\n"), out);
+ fputs(_(" -a, --all list each individual memory block\n"), out);
+ fputs(_(" -b, --bytes print SIZE in bytes rather than in human readable format\n"), out);
+ fputs(_(" -n, --noheadings don't print headings\n"), out);
+ fputs(_(" -o, --output <list> output columns\n"), out);
+ fputs(_(" --output-all output all columns\n"), out);
+ fputs(_(" -r, --raw use raw output format\n"), out);
+ fputs(_(" -S, --split <list> split ranges by specified columns\n"), out);
+ fputs(_(" -s, --sysroot <dir> use the specified directory as system root\n"), out);
+ fputs(_(" --summary[=when] print summary information (never,always or only)\n"), out);
+
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(22));
+
+ fputs(USAGE_COLUMNS, out);
+ for (i = 0; i < ARRAY_SIZE(coldescs); i++)
+ fprintf(out, " %10s %s\n", coldescs[i].name, _(coldescs[i].help));
+
+ printf(USAGE_MAN_TAIL("lsmem(1)"));
+
+ exit(out == stderr ? EXIT_FAILURE : EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+ struct lsmem _lsmem = {
+ .want_table = 1,
+ .want_summary = 1
+ }, *lsmem = &_lsmem;
+
+ const char *outarg = NULL, *splitarg = NULL, *prefix = NULL;
+ int c;
+ size_t i;
+
+ enum {
+ LSMEM_OPT_SUMARRY = CHAR_MAX + 1,
+ OPT_OUTPUT_ALL
+ };
+
+ static const struct option longopts[] = {
+ {"all", no_argument, NULL, 'a'},
+ {"bytes", no_argument, NULL, 'b'},
+ {"help", no_argument, NULL, 'h'},
+ {"json", no_argument, NULL, 'J'},
+ {"noheadings", no_argument, NULL, 'n'},
+ {"output", required_argument, NULL, 'o'},
+ {"output-all", no_argument, NULL, OPT_OUTPUT_ALL},
+ {"pairs", no_argument, NULL, 'P'},
+ {"raw", no_argument, NULL, 'r'},
+ {"sysroot", required_argument, NULL, 's'},
+ {"split", required_argument, NULL, 'S'},
+ {"version", no_argument, NULL, 'V'},
+ {"summary", optional_argument, NULL, LSMEM_OPT_SUMARRY },
+ {NULL, 0, NULL, 0}
+ };
+ static const ul_excl_t excl[] = { /* rows and cols in ASCII order */
+ { 'J', 'P', 'r' },
+ { 'S', 'a' },
+ { 0 }
+ };
+ int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ while ((c = getopt_long(argc, argv, "abhJno:PrS:s:V", longopts, NULL)) != -1) {
+
+ err_exclusive_options(c, longopts, excl, excl_st);
+
+ switch (c) {
+ case 'a':
+ lsmem->list_all = 1;
+ break;
+ case 'b':
+ lsmem->bytes = 1;
+ break;
+ case 'h':
+ usage();
+ break;
+ case 'J':
+ lsmem->json = 1;
+ lsmem->want_summary = 0;
+ break;
+ case 'n':
+ lsmem->noheadings = 1;
+ break;
+ case 'o':
+ outarg = optarg;
+ break;
+ case OPT_OUTPUT_ALL:
+ for (ncolumns = 0; (size_t)ncolumns < ARRAY_SIZE(coldescs); ncolumns++)
+ columns[ncolumns] = ncolumns;
+ break;
+ case 'P':
+ lsmem->export = 1;
+ lsmem->want_summary = 0;
+ break;
+ case 'r':
+ lsmem->raw = 1;
+ lsmem->want_summary = 0;
+ break;
+ case 's':
+ prefix = optarg;
+ break;
+ case 'S':
+ splitarg = optarg;
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return 0;
+ case LSMEM_OPT_SUMARRY:
+ if (optarg) {
+ if (strcmp(optarg, "never") == 0)
+ lsmem->want_summary = 0;
+ else if (strcmp(optarg, "only") == 0)
+ lsmem->want_table = 0;
+ else if (strcmp(optarg, "always") == 0)
+ lsmem->want_summary = 1;
+ else
+ errx(EXIT_FAILURE, _("unsupported --summary argument"));
+ } else
+ lsmem->want_table = 0;
+ break;
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if (argc != optind) {
+ warnx(_("bad usage"));
+ errtryhelp(EXIT_FAILURE);
+ }
+
+ if (lsmem->want_table + lsmem->want_summary == 0)
+ errx(EXIT_FAILURE, _("options --{raw,json,pairs} and --summary=only are mutually exclusive"));
+
+ ul_path_init_debug();
+
+ lsmem->sysmem = ul_new_path(_PATH_SYS_MEMORY);
+ if (!lsmem->sysmem)
+ err(EXIT_FAILURE, _("failed to initialize %s handler"), _PATH_SYS_MEMORY);
+ if (prefix && ul_path_set_prefix(lsmem->sysmem, prefix) != 0)
+ err(EXIT_FAILURE, _("invalid argument to --sysroot"));
+
+ /* Shortcut to avoid scols machinery on --summary=only */
+ if (lsmem->want_table == 0 && lsmem->want_summary) {
+ read_basic_info(lsmem);
+ read_info(lsmem);
+ print_summary(lsmem);
+ return EXIT_SUCCESS;
+ }
+
+ /*
+ * Default columns
+ */
+ if (!ncolumns) {
+ add_column(columns, ncolumns++, COL_RANGE);
+ add_column(columns, ncolumns++, COL_SIZE);
+ add_column(columns, ncolumns++, COL_STATE);
+ add_column(columns, ncolumns++, COL_REMOVABLE);
+ add_column(columns, ncolumns++, COL_BLOCK);
+ }
+
+ if (outarg && string_add_to_idarray(outarg, columns, ARRAY_SIZE(columns),
+ &ncolumns, column_name_to_id) < 0)
+ return EXIT_FAILURE;
+
+ /*
+ * Initialize output
+ */
+ scols_init_debug(0);
+
+ if (!(lsmem->table = scols_new_table()))
+ errx(EXIT_FAILURE, _("failed to initialize output table"));
+ scols_table_enable_raw(lsmem->table, lsmem->raw);
+ scols_table_enable_export(lsmem->table, lsmem->export);
+ scols_table_enable_json(lsmem->table, lsmem->json);
+ scols_table_enable_noheadings(lsmem->table, lsmem->noheadings);
+
+ if (lsmem->json)
+ scols_table_set_name(lsmem->table, "memory");
+
+ for (i = 0; i < ncolumns; i++) {
+ struct coldesc *ci = get_column_desc(i);
+ struct libscols_column *cl;
+
+ cl = scols_table_new_column(lsmem->table, ci->name, ci->whint, ci->flags);
+ if (!cl)
+ err(EXIT_FAILURE, _("Failed to initialize output column"));
+
+ if (lsmem->json) {
+ int id = get_column_id(i);
+
+ switch (id) {
+ case COL_SIZE:
+ if (!lsmem->bytes)
+ break;
+ /* fallthrough */
+ case COL_NODE:
+ scols_column_set_json_type(cl, SCOLS_JSON_NUMBER);
+ break;
+ case COL_REMOVABLE:
+ scols_column_set_json_type(cl, SCOLS_JSON_BOOLEAN);
+ break;
+ }
+ }
+ }
+
+ if (splitarg) {
+ int split[ARRAY_SIZE(coldescs)] = { 0 };
+ static size_t nsplits = 0;
+
+ if (strcasecmp(splitarg, "none") == 0)
+ ;
+ else if (string_add_to_idarray(splitarg, split, ARRAY_SIZE(split),
+ &nsplits, column_name_to_id) < 0)
+ return EXIT_FAILURE;
+
+ set_split_policy(lsmem, split, nsplits);
+
+ } else
+ /* follow output columns */
+ set_split_policy(lsmem, columns, ncolumns);
+
+ /*
+ * Read data and print output
+ */
+ read_basic_info(lsmem);
+ read_info(lsmem);
+
+ if (lsmem->want_table) {
+ fill_scols_table(lsmem);
+ scols_print_table(lsmem->table);
+
+ if (lsmem->want_summary)
+ fputc('\n', stdout);
+ }
+
+ if (lsmem->want_summary)
+ print_summary(lsmem);
+
+ scols_unref_table(lsmem->table);
+ ul_unref_path(lsmem->sysmem);
+ return 0;
+}
diff --git a/sys-utils/lsns.8 b/sys-utils/lsns.8
new file mode 100644
index 0000000..aba3726
--- /dev/null
+++ b/sys-utils/lsns.8
@@ -0,0 +1,93 @@
+.\" Man page for the lsns command.
+.\" Copyright 2015 Karel Zak <kzak@redhat.com>
+.\" May be distributed under the GNU General Public License
+
+.TH LSNS 8 "December 2015" "util-linux" "System Administration"
+.SH NAME
+lsns \- list namespaces
+.SH SYNOPSIS
+.B lsns
+[options]
+.RI [ namespace ]
+
+.SH DESCRIPTION
+.B lsns
+lists information about all the currently accessible namespaces or about the
+given \fInamespace\fP. The \fInamespace\fP identifier is an inode number.
+
+The default output is subject to change. So whenever possible, you should
+avoid using default outputs in your scripts. Always explicitly define expected
+columns by using the \fB\-\-output\fR option together with a columns list in
+environments where a stable output is required.
+
+\fBNSFS\fP column, printed when \fBnet\fP is specified for
+\fB\-\-type\fR option, is special; it uses multi-line cells.
+Use the option \fB\-\-nowrap\fR is for switching to "," separated single-line
+representation.
+
+Note that \fBlsns\fR reads information directly from the /proc filesystem and
+for non-root users it may return incomplete information. The current /proc
+filesystem may be unshared and affected by a PID namespace
+(see \fBunshare \-\-mount\-proc\fP for more details).
+.B lsns
+is not able to see persistent namespaces without processes where the namespace
+instance is held by a bind mount to /proc/\fIpid\fR/ns/\fItype\fR.
+
+.SH OPTIONS
+.TP
+.BR \-J , " \-\-json"
+Use JSON output format.
+.TP
+.BR \-l , " \-\-list"
+Use list output format.
+.TP
+.BR \-n , " \-\-noheadings"
+Do not print a header line.
+.TP
+.BR \-o , " \-\-output " \fIlist\fP
+Specify which output columns to print. Use \fB\-\-help\fR
+to get a list of all supported columns.
+
+The default list of columns may be extended if \fIlist\fP is
+specified in the format \fB+\fIlist\fP (e.g. \fBlsns \-o +PATH\fP).
+.TP
+.B \-\-output\-all
+Output all available columns.
+.TP
+.BR \-p , " \-\-task " \fIpid\fP
+Display only the namespaces held by the process with this \fIpid\fR.
+.TP
+.BR \-r , " \-\-raw"
+Use the raw output format.
+.TP
+.BR \-t , " \-\-type " \fItype\fP
+Display the specified \fItype\fP of namespaces only. The supported types are
+\fBmnt\fP, \fBnet\fP, \fBipc\fP, \fBuser\fP, \fBpid\fP, \fButs\fP and
+\fBcgroup\fP. This option may be given more than once.
+.TP
+.BR \-u , " \-\-notruncate"
+Do not truncate text in columns.
+.TP
+.BR \-W , " \-\-nowrap"
+Do not use multi-line text in columns.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+
+.SH AUTHORS
+.nf
+Karel Zak <kzak@redhat.com>
+.fi
+
+.SH "SEE ALSO"
+.BR nsenter (1),
+.BR unshare (1),
+.BR clone (2),
+.BR namespaces (7)
+
+.SH AVAILABILITY
+The lsns command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/lsns.c b/sys-utils/lsns.c
new file mode 100644
index 0000000..38ea2e0
--- /dev/null
+++ b/sys-utils/lsns.c
@@ -0,0 +1,1100 @@
+/*
+ * lsns(8) - list system namespaces
+ *
+ * Copyright (C) 2015 Karel Zak <kzak@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+#include <stdio.h>
+#include <string.h>
+#include <getopt.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <wchar.h>
+#include <libsmartcols.h>
+#include <libmount.h>
+
+#ifdef HAVE_LINUX_NET_NAMESPACE_H
+#include <stdbool.h>
+#include <sys/socket.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/net_namespace.h>
+#endif
+
+#include "pathnames.h"
+#include "nls.h"
+#include "xalloc.h"
+#include "c.h"
+#include "list.h"
+#include "closestream.h"
+#include "optutils.h"
+#include "procutils.h"
+#include "strutils.h"
+#include "namespace.h"
+#include "idcache.h"
+
+#include "debug.h"
+
+static UL_DEBUG_DEFINE_MASK(lsns);
+UL_DEBUG_DEFINE_MASKNAMES(lsns) = UL_DEBUG_EMPTY_MASKNAMES;
+
+#define LSNS_DEBUG_INIT (1 << 1)
+#define LSNS_DEBUG_PROC (1 << 2)
+#define LSNS_DEBUG_NS (1 << 3)
+#define LSNS_DEBUG_ALL 0xFFFF
+
+#define LSNS_NETNS_UNUSABLE -2
+
+#define DBG(m, x) __UL_DBG(lsns, LSNS_DEBUG_, m, x)
+#define ON_DBG(m, x) __UL_DBG_CALL(lsns, LSNS_DEBUG_, m, x)
+
+#define UL_DEBUG_CURRENT_MASK UL_DEBUG_MASK(lsns)
+#include "debugobj.h"
+
+static struct idcache *uid_cache = NULL;
+
+/* column IDs */
+enum {
+ COL_NS = 0,
+ COL_TYPE,
+ COL_PATH,
+ COL_NPROCS,
+ COL_PID,
+ COL_PPID,
+ COL_COMMAND,
+ COL_UID,
+ COL_USER,
+ COL_NETNSID,
+ COL_NSFS,
+};
+
+/* column names */
+struct colinfo {
+ const char *name; /* header */
+ double whint; /* width hint (N < 1 is in percent of termwidth) */
+ int flags; /* SCOLS_FL_* */
+ const char *help;
+ int json_type;
+};
+
+/* columns descriptions */
+static const struct colinfo infos[] = {
+ [COL_NS] = { "NS", 10, SCOLS_FL_RIGHT, N_("namespace identifier (inode number)"), SCOLS_JSON_NUMBER },
+ [COL_TYPE] = { "TYPE", 5, 0, N_("kind of namespace") },
+ [COL_PATH] = { "PATH", 0, 0, N_("path to the namespace")},
+ [COL_NPROCS] = { "NPROCS", 5, SCOLS_FL_RIGHT, N_("number of processes in the namespace"), SCOLS_JSON_NUMBER },
+ [COL_PID] = { "PID", 5, SCOLS_FL_RIGHT, N_("lowest PID in the namespace"), SCOLS_JSON_NUMBER },
+ [COL_PPID] = { "PPID", 5, SCOLS_FL_RIGHT, N_("PPID of the PID"), SCOLS_JSON_NUMBER },
+ [COL_COMMAND] = { "COMMAND", 0, SCOLS_FL_TRUNC, N_("command line of the PID")},
+ [COL_UID] = { "UID", 0, SCOLS_FL_RIGHT, N_("UID of the PID"), SCOLS_JSON_NUMBER},
+ [COL_USER] = { "USER", 0, 0, N_("username of the PID")},
+ [COL_NETNSID] = { "NETNSID", 0, SCOLS_FL_RIGHT, N_("namespace ID as used by network subsystem")},
+ [COL_NSFS] = { "NSFS", 0, SCOLS_FL_WRAP, N_("nsfs mountpoint (usually used network subsystem)")}
+};
+
+static int columns[ARRAY_SIZE(infos) * 2];
+static size_t ncolumns;
+
+enum {
+ LSNS_ID_MNT = 0,
+ LSNS_ID_NET,
+ LSNS_ID_PID,
+ LSNS_ID_UTS,
+ LSNS_ID_IPC,
+ LSNS_ID_USER,
+ LSNS_ID_CGROUP
+};
+
+static char *ns_names[] = {
+ [LSNS_ID_MNT] = "mnt",
+ [LSNS_ID_NET] = "net",
+ [LSNS_ID_PID] = "pid",
+ [LSNS_ID_UTS] = "uts",
+ [LSNS_ID_IPC] = "ipc",
+ [LSNS_ID_USER] = "user",
+ [LSNS_ID_CGROUP] = "cgroup"
+};
+
+struct lsns_namespace {
+ ino_t id;
+ int type; /* LSNS_* */
+ int nprocs;
+ int netnsid;
+
+ struct lsns_process *proc;
+
+ struct list_head namespaces; /* lsns->processes member */
+ struct list_head processes; /* head of lsns_process *siblings */
+};
+
+struct lsns_process {
+ pid_t pid; /* process PID */
+ pid_t ppid; /* parent's PID */
+ pid_t tpid; /* thread group */
+ char state;
+ uid_t uid;
+
+ ino_t ns_ids[ARRAY_SIZE(ns_names)];
+ struct list_head ns_siblings[ARRAY_SIZE(ns_names)];
+
+ struct list_head processes; /* list of processes */
+
+ struct libscols_line *outline;
+ struct lsns_process *parent;
+
+ int netnsid;
+};
+
+struct lsns {
+ struct list_head processes;
+ struct list_head namespaces;
+
+ pid_t fltr_pid; /* filter out by PID */
+ ino_t fltr_ns; /* filter out by namespace */
+ int fltr_types[ARRAY_SIZE(ns_names)];
+ int fltr_ntypes;
+
+ unsigned int raw : 1,
+ json : 1,
+ tree : 1,
+ list : 1,
+ no_trunc : 1,
+ no_headings: 1,
+ no_wrap : 1;
+
+ struct libmnt_table *tab;
+};
+
+struct netnsid_cache {
+ ino_t ino;
+ int id;
+ struct list_head netnsids;
+};
+
+static struct list_head netnsids_cache;
+
+static int netlink_fd = -1;
+
+static void lsns_init_debug(void)
+{
+ __UL_INIT_DEBUG_FROM_ENV(lsns, LSNS_DEBUG_, 0, LSNS_DEBUG);
+}
+
+static int ns_name2type(const char *name)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(ns_names); i++) {
+ if (strcmp(ns_names[i], name) == 0)
+ return i;
+ }
+ return -1;
+}
+
+static int column_name_to_id(const char *name, size_t namesz)
+{
+ size_t i;
+
+ assert(name);
+
+ for (i = 0; i < ARRAY_SIZE(infos); i++) {
+ const char *cn = infos[i].name;
+
+ if (!strncasecmp(name, cn, namesz) && !*(cn + namesz))
+ return i;
+ }
+ warnx(_("unknown column: %s"), name);
+ return -1;
+}
+
+static int has_column(int id)
+{
+ size_t i;
+
+ for (i = 0; i < ncolumns; i++) {
+ if (columns[i] == id)
+ return 1;
+ }
+ return 0;
+}
+
+static inline int get_column_id(int num)
+{
+ assert(num >= 0);
+ assert((size_t) num < ncolumns);
+ assert(columns[num] < (int) ARRAY_SIZE(infos));
+
+ return columns[num];
+}
+
+static inline const struct colinfo *get_column_info(unsigned num)
+{
+ return &infos[ get_column_id(num) ];
+}
+
+static int get_ns_ino(int dir, const char *nsname, ino_t *ino)
+{
+ struct stat st;
+ char path[16];
+
+ snprintf(path, sizeof(path), "ns/%s", nsname);
+
+ if (fstatat(dir, path, &st, 0) != 0)
+ return -errno;
+ *ino = st.st_ino;
+ return 0;
+}
+
+static int parse_proc_stat(FILE *fp, pid_t *pid, char *state, pid_t *ppid)
+{
+ char *line = NULL, *p;
+ size_t len = 0;
+ int rc;
+
+ if (getline(&line, &len, fp) < 0) {
+ rc = -errno;
+ goto error;
+ }
+
+ p = strrchr(line, ')');
+ if (p == NULL ||
+ sscanf(line, "%d (", pid) != 1 ||
+ sscanf(p, ") %c %d*[^\n]", state, ppid) != 2) {
+ rc = -EINVAL;
+ goto error;
+ }
+ rc = 0;
+
+error:
+ free(line);
+ return rc;
+}
+
+#ifdef HAVE_LINUX_NET_NAMESPACE_H
+static int netnsid_cache_find(ino_t netino, int *netnsid)
+{
+ struct list_head *p;
+
+ list_for_each(p, &netnsids_cache) {
+ struct netnsid_cache *e = list_entry(p,
+ struct netnsid_cache,
+ netnsids);
+ if (e->ino == netino) {
+ *netnsid = e->id;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static void netnsid_cache_add(ino_t netino, int netnsid)
+{
+ struct netnsid_cache *e;
+
+ e = xcalloc(1, sizeof(*e));
+ e->ino = netino;
+ e->id = netnsid;
+ INIT_LIST_HEAD(&e->netnsids);
+ list_add(&e->netnsids, &netnsids_cache);
+}
+
+static int get_netnsid_via_netlink_send_request(int target_fd)
+{
+ unsigned char req[NLMSG_SPACE(sizeof(struct rtgenmsg))
+ + RTA_SPACE(sizeof(int32_t))];
+
+ struct nlmsghdr *nlh = (struct nlmsghdr *)req;
+ struct rtgenmsg *rt = NLMSG_DATA(req);
+ struct rtattr *rta = (struct rtattr *)
+ (req + NLMSG_SPACE(sizeof(struct rtgenmsg)));
+ int32_t *fd = RTA_DATA(rta);
+
+ nlh->nlmsg_len = sizeof(req);
+ nlh->nlmsg_flags = NLM_F_REQUEST;
+ nlh->nlmsg_type = RTM_GETNSID;
+ rt->rtgen_family = AF_UNSPEC;
+ rta->rta_type = NETNSA_FD;
+ rta->rta_len = RTA_SPACE(sizeof(int32_t));
+ *fd = target_fd;
+
+ if (send(netlink_fd, req, sizeof(req), 0) < 0)
+ return -1;
+ return 0;
+}
+
+static int get_netnsid_via_netlink_recv_response(int *netnsid)
+{
+ unsigned char res[NLMSG_SPACE(sizeof(struct rtgenmsg))
+ + ((RTA_SPACE(sizeof(int32_t))
+ < RTA_SPACE(sizeof(struct nlmsgerr)))
+ ? RTA_SPACE(sizeof(struct nlmsgerr))
+ : RTA_SPACE(sizeof(int32_t)))];
+ int rtalen;
+ ssize_t reslen;
+
+ struct nlmsghdr *nlh;
+ struct rtattr *rta;
+
+ reslen = recv(netlink_fd, res, sizeof(res), 0);
+ if (reslen < 0)
+ return -1;
+
+ nlh = (struct nlmsghdr *)res;
+ if (!(NLMSG_OK(nlh, (size_t)reslen)
+ && nlh->nlmsg_type == RTM_NEWNSID))
+ return -1;
+
+ rtalen = NLMSG_PAYLOAD(nlh, sizeof(struct rtgenmsg));
+ rta = (struct rtattr *)(res + NLMSG_SPACE(sizeof(struct rtgenmsg)));
+ if (!(RTA_OK(rta, rtalen)
+ && rta->rta_type == NETNSA_NSID))
+ return -1;
+
+ *netnsid = *(int *)RTA_DATA(rta);
+
+ return 0;
+}
+
+static int get_netnsid_via_netlink(int dir, const char *path)
+{
+ int netnsid;
+ int target_fd;
+
+ if (netlink_fd < 0)
+ return LSNS_NETNS_UNUSABLE;
+
+ target_fd = openat(dir, path, O_RDONLY);
+ if (target_fd < 0)
+ return LSNS_NETNS_UNUSABLE;
+
+ if (get_netnsid_via_netlink_send_request(target_fd) < 0) {
+ netnsid = LSNS_NETNS_UNUSABLE;
+ goto out;
+ }
+
+ if (get_netnsid_via_netlink_recv_response(&netnsid) < 0) {
+ netnsid = LSNS_NETNS_UNUSABLE;
+ goto out;
+ }
+
+ out:
+ close(target_fd);
+ return netnsid;
+}
+
+static int get_netnsid(int dir, ino_t netino)
+{
+ int netnsid;
+
+ if (!netnsid_cache_find(netino, &netnsid)) {
+ netnsid = get_netnsid_via_netlink(dir, "ns/net");
+ netnsid_cache_add(netino, netnsid);
+ }
+
+ return netnsid;
+}
+#else
+static int get_netnsid(int dir __attribute__((__unused__)),
+ ino_t netino __attribute__((__unused__)))
+{
+ return LSNS_NETNS_UNUSABLE;
+}
+#endif /* HAVE_LINUX_NET_NAMESPACE_H */
+
+static int read_process(struct lsns *ls, pid_t pid)
+{
+ struct lsns_process *p = NULL;
+ char buf[BUFSIZ];
+ DIR *dir;
+ int rc = 0, fd;
+ FILE *f = NULL;
+ size_t i;
+ struct stat st;
+
+ DBG(PROC, ul_debug("reading %d", (int) pid));
+
+ snprintf(buf, sizeof(buf), "/proc/%d", pid);
+ dir = opendir(buf);
+ if (!dir)
+ return -errno;
+
+ p = xcalloc(1, sizeof(*p));
+ p->netnsid = LSNS_NETNS_UNUSABLE;
+
+ if (fstat(dirfd(dir), &st) == 0) {
+ p->uid = st.st_uid;
+ add_uid(uid_cache, st.st_uid);
+ }
+
+ fd = openat(dirfd(dir), "stat", O_RDONLY);
+ if (fd < 0) {
+ rc = -errno;
+ goto done;
+ }
+ if (!(f = fdopen(fd, "r"))) {
+ rc = -errno;
+ goto done;
+ }
+ rc = parse_proc_stat(f, &p->pid, &p->state, &p->ppid);
+ if (rc < 0)
+ goto done;
+ rc = 0;
+
+ for (i = 0; i < ARRAY_SIZE(p->ns_ids); i++) {
+ INIT_LIST_HEAD(&p->ns_siblings[i]);
+
+ if (!ls->fltr_types[i])
+ continue;
+
+ rc = get_ns_ino(dirfd(dir), ns_names[i], &p->ns_ids[i]);
+ if (rc && rc != -EACCES && rc != -ENOENT)
+ goto done;
+ if (i == LSNS_ID_NET)
+ p->netnsid = get_netnsid(dirfd(dir), p->ns_ids[i]);
+ rc = 0;
+ }
+
+ INIT_LIST_HEAD(&p->processes);
+
+ DBG(PROC, ul_debugobj(p, "new pid=%d", p->pid));
+ list_add_tail(&p->processes, &ls->processes);
+done:
+ if (f)
+ fclose(f);
+ closedir(dir);
+ if (rc)
+ free(p);
+ return rc;
+}
+
+static int read_processes(struct lsns *ls)
+{
+ struct proc_processes *proc = NULL;
+ pid_t pid;
+ int rc = 0;
+
+ DBG(PROC, ul_debug("opening /proc"));
+
+ if (!(proc = proc_open_processes())) {
+ rc = -errno;
+ goto done;
+ }
+
+ while (proc_next_pid(proc, &pid) == 0) {
+ rc = read_process(ls, pid);
+ if (rc && rc != -EACCES && rc != -ENOENT)
+ break;
+ rc = 0;
+ }
+done:
+ DBG(PROC, ul_debug("closing /proc"));
+ proc_close_processes(proc);
+ return rc;
+}
+
+static struct lsns_namespace *get_namespace(struct lsns *ls, ino_t ino)
+{
+ struct list_head *p;
+
+ list_for_each(p, &ls->namespaces) {
+ struct lsns_namespace *ns = list_entry(p, struct lsns_namespace, namespaces);
+
+ if (ns->id == ino)
+ return ns;
+ }
+ return NULL;
+}
+
+static int namespace_has_process(struct lsns_namespace *ns, pid_t pid)
+{
+ struct list_head *p;
+
+ list_for_each(p, &ns->processes) {
+ struct lsns_process *proc = list_entry(p, struct lsns_process, ns_siblings[ns->type]);
+
+ if (proc->pid == pid)
+ return 1;
+ }
+ return 0;
+}
+
+static struct lsns_namespace *add_namespace(struct lsns *ls, int type, ino_t ino)
+{
+ struct lsns_namespace *ns = xcalloc(1, sizeof(*ns));
+
+ if (!ns)
+ return NULL;
+
+ DBG(NS, ul_debugobj(ns, "new %s[%ju]", ns_names[type], (uintmax_t)ino));
+
+ INIT_LIST_HEAD(&ns->processes);
+ INIT_LIST_HEAD(&ns->namespaces);
+
+ ns->type = type;
+ ns->id = ino;
+
+ list_add_tail(&ns->namespaces, &ls->namespaces);
+ return ns;
+}
+
+static int add_process_to_namespace(struct lsns *ls, struct lsns_namespace *ns, struct lsns_process *proc)
+{
+ struct list_head *p;
+
+ DBG(NS, ul_debugobj(ns, "add process [%p] pid=%d to %s[%ju]",
+ proc, proc->pid, ns_names[ns->type], (uintmax_t)ns->id));
+
+ list_for_each(p, &ls->processes) {
+ struct lsns_process *xproc = list_entry(p, struct lsns_process, processes);
+
+ if (xproc->pid == proc->ppid) /* my parent */
+ proc->parent = xproc;
+ else if (xproc->ppid == proc->pid) /* my child */
+ xproc->parent = proc;
+ }
+
+ list_add_tail(&proc->ns_siblings[ns->type], &ns->processes);
+ ns->nprocs++;
+
+ if (!ns->proc || ns->proc->pid > proc->pid)
+ ns->proc = proc;
+
+ return 0;
+}
+
+static int cmp_namespaces(struct list_head *a, struct list_head *b,
+ __attribute__((__unused__)) void *data)
+{
+ struct lsns_namespace *xa = list_entry(a, struct lsns_namespace, namespaces),
+ *xb = list_entry(b, struct lsns_namespace, namespaces);
+
+ return cmp_numbers(xa->id, xb->id);
+}
+
+static int netnsid_xasputs(char **str, int netnsid)
+{
+ if (netnsid >= 0)
+ return xasprintf(str, "%d", netnsid);
+#ifdef NETNSA_NSID_NOT_ASSIGNED
+ else if (netnsid == NETNSA_NSID_NOT_ASSIGNED)
+ return xasprintf(str, "%s", "unassigned");
+#endif
+ else
+ return 0;
+}
+
+static int read_namespaces(struct lsns *ls)
+{
+ struct list_head *p;
+
+ DBG(NS, ul_debug("reading namespace"));
+
+ list_for_each(p, &ls->processes) {
+ size_t i;
+ struct lsns_namespace *ns;
+ struct lsns_process *proc = list_entry(p, struct lsns_process, processes);
+
+ for (i = 0; i < ARRAY_SIZE(proc->ns_ids); i++) {
+ if (proc->ns_ids[i] == 0)
+ continue;
+ if (!(ns = get_namespace(ls, proc->ns_ids[i]))) {
+ ns = add_namespace(ls, i, proc->ns_ids[i]);
+ if (!ns)
+ return -ENOMEM;
+ }
+ add_process_to_namespace(ls, ns, proc);
+ }
+ }
+
+ list_sort(&ls->namespaces, cmp_namespaces, NULL);
+
+ return 0;
+}
+
+static int is_nsfs_root(struct libmnt_fs *fs, void *data)
+{
+ if (!mnt_fs_match_fstype(fs, "nsfs") || !mnt_fs_get_root(fs))
+ return 0;
+
+ return (strcmp(mnt_fs_get_root(fs), (char *)data) == 0);
+}
+
+static int is_path_included(const char *path_set, const char *elt,
+ const char sep)
+{
+ size_t elt_len;
+ size_t path_set_len;
+ char *tmp;
+
+
+ tmp = strstr(path_set, elt);
+ if (!tmp)
+ return 0;
+
+ elt_len = strlen(elt);
+ path_set_len = strlen(path_set);
+
+ /* path_set includes only elt or
+ * path_set includes elt as the first element.
+ */
+ if (tmp == path_set
+ && ((path_set_len == elt_len)
+ || (path_set[elt_len] == sep)))
+ return 1;
+
+ /* path_set includes elt at the middle
+ * or as the last element.
+ */
+ if ((*(tmp - 1) == sep)
+ && ((*(tmp + elt_len) == sep)
+ || (*(tmp + elt_len) == '\0')))
+ return 1;
+
+ return 0;
+}
+
+static int nsfs_xasputs(char **str,
+ struct lsns_namespace *ns,
+ struct libmnt_table *tab,
+ char sep)
+{
+ struct libmnt_iter *itr = mnt_new_iter(MNT_ITER_FORWARD);
+ char *expected_root;
+ struct libmnt_fs *fs = NULL;
+
+ xasprintf(&expected_root, "%s:[%ju]", ns_names[ns->type], (uintmax_t)ns->id);
+ *str = NULL;
+
+ while (mnt_table_find_next_fs(tab, itr, is_nsfs_root,
+ expected_root, &fs) == 0) {
+
+ const char *tgt = mnt_fs_get_target(fs);
+
+ if (!*str)
+ xasprintf(str, "%s", tgt);
+
+ else if (!is_path_included(*str, tgt, sep)) {
+ char *tmp = NULL;
+
+ xasprintf(&tmp, "%s%c%s", *str, sep, tgt);
+ free(*str);
+ *str = tmp;
+ }
+ }
+ free(expected_root);
+ mnt_free_iter(itr);
+
+ return 1;
+}
+static void add_scols_line(struct lsns *ls, struct libscols_table *table,
+ struct lsns_namespace *ns, struct lsns_process *proc)
+{
+ size_t i;
+ struct libscols_line *line;
+
+ assert(ns);
+ assert(table);
+
+ line = scols_table_new_line(table,
+ ls->tree && proc->parent ? proc->parent->outline : NULL);
+ if (!line) {
+ warn(_("failed to add line to output"));
+ return;
+ }
+
+ for (i = 0; i < ncolumns; i++) {
+ char *str = NULL;
+
+ switch (get_column_id(i)) {
+ case COL_NS:
+ xasprintf(&str, "%ju", (uintmax_t)ns->id);
+ break;
+ case COL_PID:
+ xasprintf(&str, "%d", (int) proc->pid);
+ break;
+ case COL_PPID:
+ xasprintf(&str, "%d", (int) proc->ppid);
+ break;
+ case COL_TYPE:
+ xasprintf(&str, "%s", ns_names[ns->type]);
+ break;
+ case COL_NPROCS:
+ xasprintf(&str, "%d", ns->nprocs);
+ break;
+ case COL_COMMAND:
+ str = proc_get_command(proc->pid);
+ if (!str)
+ str = proc_get_command_name(proc->pid);
+ break;
+ case COL_PATH:
+ xasprintf(&str, "/proc/%d/ns/%s", (int) proc->pid, ns_names[ns->type]);
+ break;
+ case COL_UID:
+ xasprintf(&str, "%d", (int) proc->uid);
+ break;
+ case COL_USER:
+ xasprintf(&str, "%s", get_id(uid_cache, proc->uid)->name);
+ break;
+ case COL_NETNSID:
+ if (ns->type == LSNS_ID_NET)
+ netnsid_xasputs(&str, proc->netnsid);
+ break;
+ case COL_NSFS:
+ nsfs_xasputs(&str, ns, ls->tab, ls->no_wrap ? ',' : '\n');
+ break;
+ default:
+ break;
+ }
+
+ if (str && scols_line_refer_data(line, i, str) != 0)
+ err_oom();
+ }
+
+ proc->outline = line;
+}
+
+static struct libscols_table *init_scols_table(struct lsns *ls)
+{
+ struct libscols_table *tab;
+ size_t i;
+
+ tab = scols_new_table();
+ if (!tab) {
+ warn(_("failed to initialize output table"));
+ return NULL;
+ }
+
+ scols_table_enable_raw(tab, ls->raw);
+ scols_table_enable_json(tab, ls->json);
+ scols_table_enable_noheadings(tab, ls->no_headings);
+
+ if (ls->json)
+ scols_table_set_name(tab, "namespaces");
+
+ for (i = 0; i < ncolumns; i++) {
+ const struct colinfo *col = get_column_info(i);
+ int flags = col->flags;
+ struct libscols_column *cl;
+
+ if (ls->no_trunc)
+ flags &= ~SCOLS_FL_TRUNC;
+ if (ls->tree && get_column_id(i) == COL_COMMAND)
+ flags |= SCOLS_FL_TREE;
+ if (ls->no_wrap)
+ flags &= ~SCOLS_FL_WRAP;
+
+ cl = scols_table_new_column(tab, col->name, col->whint, flags);
+ if (cl == NULL) {
+ warnx(_("failed to initialize output column"));
+ goto err;
+ }
+ if (ls->json)
+ scols_column_set_json_type(cl, col->json_type);
+
+ if (!ls->no_wrap && get_column_id(i) == COL_NSFS) {
+ scols_column_set_wrapfunc(cl,
+ scols_wrapnl_chunksize,
+ scols_wrapnl_nextchunk,
+ NULL);
+ scols_column_set_safechars(cl, "\n");
+ }
+ }
+
+ return tab;
+err:
+ scols_unref_table(tab);
+ return NULL;
+}
+
+static int show_namespaces(struct lsns *ls)
+{
+ struct libscols_table *tab;
+ struct list_head *p;
+ int rc = 0;
+
+ tab = init_scols_table(ls);
+ if (!tab)
+ return -ENOMEM;
+
+ list_for_each(p, &ls->namespaces) {
+ struct lsns_namespace *ns = list_entry(p, struct lsns_namespace, namespaces);
+
+ if (ls->fltr_pid != 0 && !namespace_has_process(ns, ls->fltr_pid))
+ continue;
+
+ add_scols_line(ls, tab, ns, ns->proc);
+ }
+
+ scols_print_table(tab);
+ scols_unref_table(tab);
+ return rc;
+}
+
+static void show_process(struct lsns *ls, struct libscols_table *tab,
+ struct lsns_process *proc, struct lsns_namespace *ns)
+{
+ /*
+ * create a tree from parent->child relation, but only if the parent is
+ * within the same namespace
+ */
+ if (ls->tree
+ && proc->parent
+ && !proc->parent->outline
+ && proc->parent->ns_ids[ns->type] == proc->ns_ids[ns->type])
+ show_process(ls, tab, proc->parent, ns);
+
+ add_scols_line(ls, tab, ns, proc);
+}
+
+
+static int show_namespace_processes(struct lsns *ls, struct lsns_namespace *ns)
+{
+ struct libscols_table *tab;
+ struct list_head *p;
+
+ tab = init_scols_table(ls);
+ if (!tab)
+ return -ENOMEM;
+
+ list_for_each(p, &ns->processes) {
+ struct lsns_process *proc = list_entry(p, struct lsns_process, ns_siblings[ns->type]);
+
+ if (!proc->outline)
+ show_process(ls, tab, proc, ns);
+ }
+
+
+ scols_print_table(tab);
+ scols_unref_table(tab);
+ return 0;
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ size_t i;
+
+ fputs(USAGE_HEADER, out);
+
+ fprintf(out,
+ _(" %s [options] [<namespace>]\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("List system namespaces.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -J, --json use JSON output format\n"), out);
+ fputs(_(" -l, --list use list format output\n"), out);
+ fputs(_(" -n, --noheadings don't print headings\n"), out);
+ fputs(_(" -o, --output <list> define which output columns to use\n"), out);
+ fputs(_(" --output-all output all columns\n"), out);
+ fputs(_(" -p, --task <pid> print process namespaces\n"), out);
+ fputs(_(" -r, --raw use the raw output format\n"), out);
+ fputs(_(" -u, --notruncate don't truncate text in columns\n"), out);
+ fputs(_(" -W, --nowrap don't use multi-line representation\n"), out);
+ fputs(_(" -t, --type <name> namespace type (mnt, net, ipc, user, pid, uts, cgroup)\n"), out);
+
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(24));
+
+ fputs(USAGE_COLUMNS, out);
+ for (i = 0; i < ARRAY_SIZE(infos); i++)
+ fprintf(out, " %11s %s\n", infos[i].name, _(infos[i].help));
+
+ printf(USAGE_MAN_TAIL("lsns(8)"));
+
+ exit(EXIT_SUCCESS);
+}
+
+
+int main(int argc, char *argv[])
+{
+ struct lsns ls;
+ int c;
+ int r = 0;
+ char *outarg = NULL;
+ enum {
+ OPT_OUTPUT_ALL = CHAR_MAX + 1
+ };
+ static const struct option long_opts[] = {
+ { "json", no_argument, NULL, 'J' },
+ { "task", required_argument, NULL, 'p' },
+ { "help", no_argument, NULL, 'h' },
+ { "output", required_argument, NULL, 'o' },
+ { "output-all", no_argument, NULL, OPT_OUTPUT_ALL },
+ { "notruncate", no_argument, NULL, 'u' },
+ { "version", no_argument, NULL, 'V' },
+ { "noheadings", no_argument, NULL, 'n' },
+ { "nowrap", no_argument, NULL, 'W' },
+ { "list", no_argument, NULL, 'l' },
+ { "raw", no_argument, NULL, 'r' },
+ { "type", required_argument, NULL, 't' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ static const ul_excl_t excl[] = { /* rows and cols in ASCII order */
+ { 'J','r' },
+ { 0 }
+ };
+ int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+ int is_net = 0;
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ lsns_init_debug();
+ memset(&ls, 0, sizeof(ls));
+
+ INIT_LIST_HEAD(&ls.processes);
+ INIT_LIST_HEAD(&ls.namespaces);
+ INIT_LIST_HEAD(&netnsids_cache);
+
+ while ((c = getopt_long(argc, argv,
+ "Jlp:o:nruhVt:W", long_opts, NULL)) != -1) {
+
+ err_exclusive_options(c, long_opts, excl, excl_st);
+
+ switch(c) {
+ case 'J':
+ ls.json = 1;
+ break;
+ case 'l':
+ ls.list = 1;
+ break;
+ case 'o':
+ outarg = optarg;
+ break;
+ case OPT_OUTPUT_ALL:
+ for (ncolumns = 0; ncolumns < ARRAY_SIZE(infos); ncolumns++)
+ columns[ncolumns] = ncolumns;
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ case 'p':
+ ls.fltr_pid = strtos32_or_err(optarg, _("invalid PID argument"));
+ break;
+ case 'h':
+ usage();
+ case 'n':
+ ls.no_headings = 1;
+ break;
+ case 'r':
+ ls.no_wrap = ls.raw = 1;
+ break;
+ case 'u':
+ ls.no_trunc = 1;
+ break;
+ case 't':
+ {
+ int type = ns_name2type(optarg);
+ if (type < 0)
+ errx(EXIT_FAILURE, _("unknown namespace type: %s"), optarg);
+ ls.fltr_types[type] = 1;
+ ls.fltr_ntypes++;
+ if (type == LSNS_ID_NET)
+ is_net = 1;
+ break;
+ }
+ case 'W':
+ ls.no_wrap = 1;
+ break;
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if (!ls.fltr_ntypes) {
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(ns_names); i++)
+ ls.fltr_types[i] = 1;
+ }
+
+ if (optind < argc) {
+ if (ls.fltr_pid)
+ errx(EXIT_FAILURE, _("--task is mutually exclusive with <namespace>"));
+ ls.fltr_ns = strtou64_or_err(argv[optind], _("invalid namespace argument"));
+ ls.tree = ls.list ? 0 : 1;
+
+ if (!ncolumns) {
+ columns[ncolumns++] = COL_PID;
+ columns[ncolumns++] = COL_PPID;
+ columns[ncolumns++] = COL_USER;
+ columns[ncolumns++] = COL_COMMAND;
+ }
+ }
+
+ if (!ncolumns) {
+ columns[ncolumns++] = COL_NS;
+ columns[ncolumns++] = COL_TYPE;
+ columns[ncolumns++] = COL_NPROCS;
+ columns[ncolumns++] = COL_PID;
+ columns[ncolumns++] = COL_USER;
+ if (is_net) {
+ columns[ncolumns++] = COL_NETNSID;
+ columns[ncolumns++] = COL_NSFS;
+ }
+ columns[ncolumns++] = COL_COMMAND;
+ }
+
+ if (outarg && string_add_to_idarray(outarg, columns, ARRAY_SIZE(columns),
+ &ncolumns, column_name_to_id) < 0)
+ return EXIT_FAILURE;
+
+ scols_init_debug(0);
+
+ uid_cache = new_idcache();
+ if (!uid_cache)
+ err(EXIT_FAILURE, _("failed to allocate UID cache"));
+
+#ifdef HAVE_LINUX_NET_NAMESPACE_H
+ if (has_column(COL_NETNSID))
+ netlink_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+#endif
+ if (has_column(COL_NSFS)) {
+ ls.tab = mnt_new_table_from_file(_PATH_PROC_MOUNTINFO);
+ if (!ls.tab)
+ err(MNT_EX_FAIL, _("failed to parse %s"), _PATH_PROC_MOUNTINFO);
+ }
+
+ r = read_processes(&ls);
+ if (!r)
+ r = read_namespaces(&ls);
+ if (!r) {
+ if (ls.fltr_ns) {
+ struct lsns_namespace *ns = get_namespace(&ls, ls.fltr_ns);
+
+ if (!ns)
+ errx(EXIT_FAILURE, _("not found namespace: %ju"), (uintmax_t) ls.fltr_ns);
+ r = show_namespace_processes(&ls, ns);
+ } else
+ r = show_namespaces(&ls);
+ }
+
+ mnt_free_table(ls.tab);
+ if (netlink_fd >= 0)
+ close(netlink_fd);
+ free_idcache(uid_cache);
+ return r == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/sys-utils/mount.8 b/sys-utils/mount.8
new file mode 100644
index 0000000..da0ac5b
--- /dev/null
+++ b/sys-utils/mount.8
@@ -0,0 +1,2589 @@
+.\" Copyright (c) 1996-2004 Andries Brouwer
+.\" Copyright (C) 2006-2012 Karel Zak <kzak@redhat.com>
+.\"
+.\" This page is somewhat derived from a page that was
+.\" (c) 1980, 1989, 1991 The Regents of the University of California
+.\" and had been heavily modified by Rik Faith and myself.
+.\" (Probably no BSD text remains.)
+.\" Fragments of text were written by Werner Almesberger, Remy Card,
+.\" Stephen Tweedie and Eric Youngdale.
+.\"
+.\" This is free documentation; you can redistribute it and/or
+.\" modify it under the terms of the GNU General Public License as
+.\" published by the Free Software Foundation; either version 2 of
+.\" the License, or (at your option) any later version.
+.\"
+.\" The GNU General Public License's references to "object code"
+.\" and "executables" are to be interpreted as the output of any
+.\" document formatting or typesetting system, including
+.\" intermediate and printed output.
+.\"
+.\" This manual is distributed in the hope that it will be useful,
+.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
+.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+.\" GNU General Public License for more details.
+.\"
+.\" You should have received a copy of the GNU General Public License along
+.\" with this program; if not, write to the Free Software Foundation, Inc.,
+.\" 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+.\"
+.TH MOUNT 8 "August 2015" "util-linux" "System Administration"
+.SH NAME
+mount \- mount a filesystem
+.SH SYNOPSIS
+.B mount
+.RB [ \-l | \-h | \-V ]
+.LP
+.B mount \-a
+.RB [ \-fFnrsvw ]
+.RB [ \-t
+.IR fstype ]
+.RB [ \-O
+.IR optlist ]
+.LP
+.B mount
+.RB [ \-fnrsvw ]
+.RB [ \-o
+.IR options ]
+.IR device | dir
+.LP
+.B mount
+.RB [ \-fnrsvw ]
+.RB [ \-t
+.IB fstype ]
+.RB [ \-o
+.IR options ]
+.I device dir
+.SH DESCRIPTION
+All files accessible in a Unix system are arranged in one big
+tree, the file hierarchy, rooted at
+.IR / .
+These files can be spread out over several devices. The
+.B mount
+command serves to attach the filesystem found on some device
+to the big file tree. Conversely, the
+.BR umount (8)
+command will detach it again. The filesystem is used to control how data is
+stored on the device or provided in a virtual way by network or another services.
+
+The standard form of the
+.B mount
+command is:
+.RS
+
+.br
+.BI "mount \-t" " type device dir"
+.br
+
+.RE
+This tells the kernel to attach the filesystem found on
+.I device
+(which is of type
+.IR type )
+at the directory
+.IR dir .
+The option \fB\-t \fItype\fR is optional. The
+.B mount
+command is usually able to detect a filesystem. The root permissions are necessary
+to mount a filesystem by default. See section "Non-superuser mounts" below for more details.
+The previous contents (if any) and owner and mode of
+.I dir
+become invisible, and as long as this filesystem remains mounted,
+the pathname
+.I dir
+refers to the root of the filesystem on
+.IR device .
+
+If only the directory or the device is given, for example:
+.RS
+.sp
+.B mount /dir
+.sp
+.RE
+then \fBmount\fR looks for a mountpoint (and if not found then for a device) in the
+.I /etc/fstab
+file. It's possible to use the
+.B \-\-target
+or
+.B \-\-source
+options to avoid ambivalent interpretation of the given argument. For example:
+.RS
+.sp
+.B mount \-\-target /mountpoint
+.sp
+.RE
+
+The same filesystem may be mounted more than once, and in some cases (e.g.
+network filesystems) the same filesystem maybe be mounted on the same
+mountpoint more times. The mount command does not implement any policy to
+control this behavior. All behavior is controlled by kernel and it is usually
+specific to filesystem driver. The exception is \fB\-\-all\fR, in this case
+already mounted filesystems are ignored (see \fB\-\-all\fR below for more details).
+
+.SS Listing the mounts
+The listing mode is maintained for backward compatibility only.
+
+For more robust and customizable output use
+.BR findmnt (8),
+\fBespecially in your scripts\fP. Note that control characters in the
+mountpoint name are replaced with '?'.
+
+The following command lists all mounted filesystems (of type
+.IR type ):
+.RS
+.sp
+.BR "mount " [ \-l "] [" "\-t \fItype\/\fP" ]
+.sp
+.RE
+The option \fB\-l\fR adds labels to this listing. See below.
+
+.SS Indicating the device and filesystem
+Most devices are indicated by a filename (of a block special device), like
+.IR /dev/sda1 ,
+but there are other possibilities. For example, in the case of an NFS mount,
+.I device
+may look like
+.IR knuth.cwi.nl:/dir .
+It is also possible to indicate a block special device using its filesystem label
+or UUID (see the \fB\-L\fR and \fB\-U\fR options below), or its partition label
+or UUID. Partition identifiers are supported for example for GUID Partition
+Tables (GPT).
+
+The device name of disk partitions are unstable; hardware reconfiguration,
+adding or removing a device can cause change in names. This is reason why it's
+strongly recommended to use filesystem or partition identificators like UUID or
+LABEL.
+
+The command \fBlsblk --fs\fR provides overview of filesystems, LABELs and UUIDs
+on available block devices. The command \fBblkid -p <device>\fR provides details about
+a filesystem on the specified device.
+
+Don't forget that there is no guarantee that UUIDs and labels are really
+unique, especially if you move, share or copy the device. Use
+.B "lsblk \-o +UUID,PARTUUID"
+to verify that the UUIDs are really unique in your system.
+
+The recommended setup is to use tags (e.g.\& \fBUUID=\fIuuid\fR) rather than
+.I /dev/disk/by-{label,uuid,partuuid,partlabel}
+udev symlinks in the
+.I /etc/fstab
+file. Tags are
+more readable, robust and portable. The
+.BR mount (8)
+command internally uses udev
+symlinks, so the use of symlinks in /etc/fstab has no advantage over tags.
+For more details see
+.BR libblkid (3).
+
+Note that
+.BR mount (8)
+uses UUIDs as strings. The UUIDs from the command line or from
+.BR fstab (5)
+are not converted to internal binary representation. The string representation
+of the UUID should be based on lower case characters.
+
+The
+.I proc
+filesystem is not associated with a special device, and when
+mounting it, an arbitrary keyword, such as
+.I proc
+can be used instead of a device specification.
+(The customary choice
+.I none
+is less fortunate: the error message `none already mounted' from
+.B mount
+can be confusing.)
+
+.SS The files /etc/fstab, /etc/mtab and /proc/mounts
+The file
+.I /etc/fstab
+(see
+.BR fstab (5)),
+may contain lines describing what devices are usually
+mounted where, using which options. The default location of the
+.BR fstab (5)
+file can be overridden with the
+.BI \-\-fstab " path"
+command-line option (see below for more details).
+.LP
+The command
+.RS
+.sp
+.B mount \-a
+.RB [ \-t
+.IR type ]
+.RB [ \-O
+.IR optlist ]
+.sp
+.RE
+(usually given in a bootscript) causes all filesystems mentioned in
+.I fstab
+(of the proper type and/or having or not having the proper options)
+to be mounted as indicated, except for those whose line contains the
+.B noauto
+keyword. Adding the
+.B \-F
+option will make \fBmount\fR fork, so that the
+filesystems are mounted simultaneously.
+.LP
+When mounting a filesystem mentioned in
+.I fstab
+or
+.IR mtab ,
+it suffices to specify on the command line only the device, or only the mount point.
+.sp
+The programs
+.B mount
+and
+.B umount
+traditionally maintained a list of currently mounted filesystems in the file
+.IR /etc/mtab .
+The support for regular classic
+.I /etc/mtab
+is completely disabled in compile time by default, because on current Linux
+systems it is better to make it a symlink to
+.I /proc/mounts
+instead. The regular mtab file maintained in userspace cannot reliably
+work with namespaces, containers and other advanced Linux features.
+If the regular mtab support is enabled than it's possible to
+use the file as well as the symlink.
+.sp
+If no arguments are given to
+.BR mount ,
+the list of mounted filesystems is printed.
+.sp
+If you want to override mount options from
+.I /etc/fstab
+you have to use the \fB\-o\fR option:
+.RS
+.sp
+.BI mount " device" \fR| "dir " \-o " options"
+.sp
+.RE
+and then the mount options from the command line will be appended to
+the list of options from
+.IR /etc/fstab .
+This default behaviour is possible to change by command line
+option \fB\-\-options\-mode\fR.
+The usual behavior is that the last option wins if there are conflicting
+ones.
+.sp
+The
+.B mount
+program does not read the
+.I /etc/fstab
+file if both
+.I device
+(or LABEL, UUID, PARTUUID or PARTLABEL) and
+.I dir
+are specified. For example, to mount device
+.BR foo " at " /dir :
+.RS
+.sp
+.B "mount /dev/foo /dir"
+.sp
+.RE
+This default behaviour is possible to change by command line option
+\fB\-\-options\-source\-force\fR to always read configuration from fstab. For
+non-root users
+.B mount
+always read fstab configuration.
+
+.SS Non-superuser mounts
+Normally, only the superuser can mount filesystems.
+However, when
+.I fstab
+contains the
+.B user
+option on a line, anybody can mount the corresponding filesystem.
+.LP
+Thus, given a line
+.RS
+.sp
+.B "/dev/cdrom /cd iso9660 ro,user,noauto,unhide"
+.sp
+.RE
+any user can mount the iso9660 filesystem found on an inserted CDROM
+using the command:
+.RS
+.B "mount /cd"
+.sp
+.RE
+Note that \fBmount\fR is very strict about non-root users and all paths
+specified on command line are verified before fstab is parsed or a helper
+program is executed. It's strongly recommended to use a valid mountpoint to
+specify filesystem, otherwise \fBmount\fR may fail. For example it's bad idea
+to use NFS or CIFS source on command line.
+.PP
+For more details, see
+.BR fstab (5).
+Only the user that mounted a filesystem can unmount it again.
+If any user should be able to unmount it, then use
+.B users
+instead of
+.B user
+in the
+.I fstab
+line.
+The
+.B owner
+option is similar to the
+.B user
+option, with the restriction that the user must be the owner
+of the special file. This may be useful e.g.\& for
+.I /dev/fd
+if a login script makes the console user owner of this device.
+The
+.B group
+option is similar, with the restriction that the user must be
+member of the group of the special file.
+
+.SS Bind mount operation
+Remount part of the file hierarchy somewhere else. The call is:
+
+.RS
+.br
+.B mount \-\-bind
+.I olddir newdir
+.RE
+
+or by using this fstab entry:
+
+.RS
+.br
+.BI / olddir
+.BI / newdir
+.B none bind
+.RE
+
+After this call the same contents are accessible in two places.
+
+It is important to understand that "bind" does not to create any second-class
+or special node in the kernel VFS. The "bind" is just another operation to
+attach a filesystem. There is nowhere stored information that the filesystem
+has been attached by "bind" operation. The \fIolddir\fR and \fInewdir\fR are
+independent and the \fIolddir\fR maybe be umounted.
+
+One can also remount a single file (on a single file). It's also
+possible to use the bind mount to create a mountpoint from a regular
+directory, for example:
+
+.RS
+.br
+.B mount \-\-bind foo foo
+.RE
+
+The bind mount call attaches only (part of) a single filesystem, not possible
+submounts. The entire file hierarchy including submounts is attached
+a second place by using:
+
+.RS
+.br
+.B mount \-\-rbind
+.I olddir newdir
+.RE
+
+Note that the filesystem mount options maintained by kernel will remain the same as those
+on the original mount point. The userspace mount options (e.g. _netdev) will not be copied
+by
+.BR mount (8)
+and it's necessary explicitly specify the options on mount command line.
+
+.BR mount (8)
+since v2.27 allows to change the mount options by passing the
+relevant options along with
+.BR \-\-bind .
+For example:
+
+.RS
+.br
+.B mount -o bind,ro foo foo
+.RE
+
+This feature is not supported by the Linux kernel; it is implemented in userspace
+by an additional \fBmount\fR(2) remounting system call.
+This solution is not atomic.
+
+The alternative (classic) way to create a read-only bind mount is to use the remount
+operation, for example:
+
+.RS
+.br
+.B mount \-\-bind
+.I olddir newdir
+.br
+.B mount \-o remount,bind,ro
+.I olddir newdir
+.RE
+
+Note that a read-only bind will create a read-only mountpoint (VFS entry),
+but the original filesystem superblock will still be writable, meaning that the
+.I olddir
+will be writable, but the
+.I newdir
+will be read-only.
+
+It's also possible to change nosuid, nodev, noexec, noatime, nodiratime and
+relatime VFS entry flags by "remount,bind" operation. The another (for example
+filesystem specific flags) are silently ignored. It's impossible to change mount
+options recursively (for example with \fB-o rbind,ro\fR).
+
+.BR mount (8)
+since v2.31 ignores the \fBbind\fR flag from
+.I /etc/fstab
+on
+.B remount operation
+(if "-o remount" specified on command line). This is necessary to fully control
+mount options on remount by command line. In the previous versions the bind
+flag has been always applied and it was impossible to re-define mount options
+without interaction with the bind semantic. This
+.BR mount (8)
+behavior does not affect situations when "remount,bind" is specified in the
+.I /etc/fstab
+file.
+.RE
+
+.SS The move operation
+Move a
+.B mounted tree
+to another place (atomically). The call is:
+
+.RS
+.br
+.B mount \-\-move
+.I olddir newdir
+.RE
+
+This will cause the contents which previously appeared under
+.I olddir
+to now be accessible under
+.IR newdir .
+The physical location of the files is not changed.
+Note that
+.I olddir
+has to be a mountpoint.
+
+Note also that moving a mount residing under a shared mount is invalid and
+unsupported. Use
+.B findmnt \-o TARGET,PROPAGATION
+to see the current propagation flags.
+
+.SS Shared subtree operations
+Since Linux 2.6.15 it is possible to mark a mount and its submounts as shared,
+private, slave or unbindable. A shared mount provides the ability to create mirrors
+of that mount such that mounts and unmounts within any of the mirrors propagate
+to the other mirror. A slave mount receives propagation from its master, but
+not vice versa. A private mount carries no propagation abilities. An
+unbindable mount is a private mount which cannot be cloned through a bind
+operation. The detailed semantics are documented in
+.I Documentation/filesystems/sharedsubtree.txt
+file in the kernel source tree.
+
+Supported operations are:
+
+.RS
+.nf
+.BI "mount \-\-make\-shared " mountpoint
+.BI "mount \-\-make\-slave " mountpoint
+.BI "mount \-\-make\-private " mountpoint
+.BI "mount \-\-make\-unbindable " mountpoint
+.fi
+.RE
+
+The following commands allow one to recursively change the type of all the
+mounts under a given mountpoint.
+
+.RS
+.nf
+.BI "mount \-\-make\-rshared " mountpoint
+.BI "mount \-\-make\-rslave " mountpoint
+.BI "mount \-\-make\-rprivate " mountpoint
+.BI "mount \-\-make\-runbindable " mountpoint
+.fi
+.RE
+
+.BR mount (8)
+.B does not read
+.BR fstab (5)
+when a \fB\-\-make-\fR* operation is requested. All necessary information has to be
+specified on the command line.
+
+Note that the Linux kernel does not allow to change multiple propagation flags
+with a single
+.BR mount (2)
+system call, and the flags cannot be mixed with other mount options and operations.
+
+Since util-linux 2.23 the \fBmount\fR command allows to do more propagation
+(topology) changes by one mount(8) call and do it also together with other
+mount operations. This feature is EXPERIMENTAL. The propagation flags are applied
+by additional \fBmount\fR(2) system calls when the preceding mount operations
+were successful. Note that this use case is not atomic. It is possible to
+specify the propagation flags in
+.BR fstab (5)
+as mount options
+.RB ( private ,
+.BR slave ,
+.BR shared ,
+.BR unbindable ,
+.BR rprivate ,
+.BR rslave ,
+.BR rshared ,
+.BR runbindable ).
+
+For example:
+
+.RS
+.nf
+.B mount \-\-make\-private \-\-make\-unbindable /dev/sda1 /foo
+.fi
+.RE
+
+is the same as:
+
+.RS
+.nf
+.B mount /dev/sda1 /foox
+.B mount \-\-make\-private /foo
+.B mount \-\-make\-unbindable /foo
+.fi
+.RE
+
+.SH COMMAND-LINE OPTIONS
+The full set of mount options used by an invocation of
+.B mount
+is determined by first extracting the
+mount options for the filesystem from the
+.I fstab
+table, then applying any options specified by the
+.B \-o
+argument, and finally applying a
+.BR \-r " or " \-w
+option, when present.
+
+The command \fBmount\fR does not pass all command-line options to the
+\fB/sbin/mount.\fIsuffix\fR mount helpers. The interface between \fBmount\fR
+and the mount helpers is described below in the section \fBEXTERNAL HELPERS\fR.
+.sp
+Command-line options available for the
+.B mount
+command are:
+.TP
+.BR \-a , " \-\-all"
+Mount all filesystems (of the given types) mentioned in
+.I fstab
+(except for those whose line contains the
+.B noauto
+keyword). The filesystems are mounted following their order in
+.IR fstab .
+The mount command compares filesystem source, target (and fs root for bind
+mount or btrfs) to detect already mounted filesystems. The kernel table with
+already mounted filesystems is cached during \fBmount \-\-all\fR. It means
+that all duplicated fstab entries will be mounted.
+.sp
+Note that it is a bad practice to use \fBmount \-a\fR for
+.I fstab
+checking. The recommended solution is \fBfindmnt \-\-verify\fR.
+.TP
+.BR \-B , " \-\-bind"
+Remount a subtree somewhere else (so that its contents are available
+in both places). See above, under \fBBind mounts\fR.
+.TP
+.BR \-c , " \-\-no\-canonicalize"
+Don't canonicalize paths. The mount command canonicalizes all paths
+(from command line or fstab) by default. This option can be used
+together with the
+.B \-f
+flag for already canonicalized absolute paths. The option is designed for mount
+helpers which call \fBmount -i\fR. It is strongly recommended to not use this
+command-line option for normal mount operations.
+.sp
+Note that \fBmount\fR(8) does not pass this option to the
+\fB/sbin/mount.\fItype\fR helpers.
+.TP
+.BR \-F , " \-\-fork"
+(Used in conjunction with
+.BR \-a .)
+Fork off a new incarnation of \fBmount\fR for each device.
+This will do the mounts on different devices or different NFS servers
+in parallel.
+This has the advantage that it is faster; also NFS timeouts go in
+parallel. A disadvantage is that the mounts are done in undefined order.
+Thus, you cannot use this option if you want to mount both
+.I /usr
+and
+.IR /usr/spool .
+.IP "\fB\-f, \-\-fake\fP"
+Causes everything to be done except for the actual system call; if it's not
+obvious, this ``fakes'' mounting the filesystem. This option is useful in
+conjunction with the
+.B \-v
+flag to determine what the
+.B mount
+command is trying to do. It can also be used to add entries for devices
+that were mounted earlier with the \fB\-n\fR option. The \fB\-f\fR option
+checks for an existing record in /etc/mtab and fails when the record already
+exists (with a regular non-fake mount, this check is done by the kernel).
+.IP "\fB\-i, \-\-internal\-only\fP"
+Don't call the \fB/sbin/mount.\fIfilesystem\fR helper even if it exists.
+.TP
+.BR \-L , " \-\-label " \fIlabel
+Mount the partition that has the specified
+.IR label .
+.TP
+.BR \-l , " \-\-show\-labels"
+Add the labels in the mount output. \fBmount\fR must have
+permission to read the disk device (e.g.\& be set-user-ID root) for this to work.
+One can set such a label for ext2, ext3 or ext4 using the
+.BR e2label (8)
+utility, or for XFS using
+.BR xfs_admin (8),
+or for reiserfs using
+.BR reiserfstune (8).
+.TP
+.BR \-M , " \-\-move"
+Move a subtree to some other place. See above, the subsection
+\fBThe move operation\fR.
+.TP
+.BR \-n , " \-\-no\-mtab"
+Mount without writing in
+.IR /etc/mtab .
+This is necessary for example when
+.I /etc
+is on a read-only filesystem.
+.TP
+.BR \-N , " \-\-namespace " \fIns
+Perform mount in namespace specified by \fIns\fR.
+\fIns\fR is either PID of process running in that namespace
+or special file representing that namespace.
+.sp
+.BR mount (8)
+switches to the namespace when it reads /etc/fstab, writes /etc/mtab (or writes to /run/mount) and calls
+.BR mount (2)
+system call, otherwise it runs in the original namespace. It means that the target namespace does not have
+to contain any libraries or another requirements necessary to execute
+.BR mount (2)
+command.
+.sp
+See \fBnamespaces\fR(7) for more information.
+.TP
+.BR \-O , " \-\-test\-opts " \fIopts
+Limit the set of filesystems to which the
+.B \-a
+option applies. In this regard it is like the
+.B \-t
+option except that
+.B \-O
+is useless without
+.BR \-a .
+For example, the command:
+.RS
+.RS
+.sp
+.B "mount \-a \-O no_netdev"
+.sp
+.RE
+mounts all filesystems except those which have the option
+.I _netdev
+specified in the options field in the
+.I /etc/fstab
+file.
+
+It is different from
+.B \-t
+in that each option is matched exactly; a leading
+.B no
+at the beginning of one option does not negate the rest.
+
+The
+.B \-t
+and
+.B \-O
+options are cumulative in effect; that is, the command
+.RS
+.sp
+.B "mount \-a \-t ext2 \-O _netdev"
+.sp
+.RE
+mounts all ext2 filesystems with the _netdev option, not all filesystems
+that are either ext2 or have the _netdev option specified.
+.RE
+.TP
+.BR \-o , " \-\-options " \fIopts
+Use the specified mount options. The \fIopts\fR argument is
+a comma-separated list. For example:
+.RS
+.RS
+.sp
+.B "mount LABEL=mydisk \-o noatime,nodev,nosuid"
+.sp
+.RE
+
+For more details, see the
+.B FILESYSTEM-INDEPENDENT MOUNT OPTIONS
+and
+.B FILESYSTEM-SPECIFIC MOUNT OPTIONS
+sections.
+.RE
+
+.TP
+.BR "\-\-options\-mode " \fImode
+Controls how to combine options from fstab/mtab with options from command line.
+\fImode\fR can be one of
+.BR ignore ", " append ", " prepend " or " replace .
+For example \fBappend\fR means that options from fstab are appended to options from command line.
+Default value is \fBprepend\fR -- it means command line options are evaluated after fstab options.
+Note that the last option wins if there are conflicting ones.
+
+.TP
+.BR "\-\-options\-source " \fIsource
+Source of default options.
+\fIsource\fR is comma separated list of
+.BR fstab ", " mtab " and " disable .
+\fBdisable\fR disables
+.BR fstab " and " mtab
+and disables \fB\-\-options\-source\-force\fR.
+Default value is \fBfstab,mtab\fR.
+
+.TP
+.B \-\-options\-source\-force
+Use options from fstab/mtab even if both \fIdevice\fR and \fIdir\fR are specified.
+
+.TP
+.BR \-R , " \-\-rbind"
+Remount a subtree and all possible submounts somewhere else (so that its
+contents are available in both places). See above, the subsection
+\fBBind mounts\fR.
+.TP
+.BR \-r , " \-\-read\-only"
+Mount the filesystem read-only. A synonym is
+.BR "\-o ro" .
+.sp
+Note that, depending on the filesystem type, state and kernel behavior, the
+system may still write to the device. For example, ext3 and ext4 will replay the
+journal if the filesystem is dirty. To prevent this kind of write access, you
+may want to mount an ext3 or ext4 filesystem with the \fBro,noload\fR mount
+options or set the block device itself to read-only mode, see the
+.BR blockdev (8)
+command.
+.TP
+.B \-s
+Tolerate sloppy mount options rather than failing. This will ignore mount
+options not supported by a filesystem type. Not all filesystems support this
+option. Currently it's supported by the \fBmount.nfs\fR mount helper only.
+.TP
+.BI \-\-source " device"
+If only one argument for the mount command is given then the argument might be
+interpreted as target (mountpoint) or source (device). This option allows to
+explicitly define that the argument is the mount source.
+.TP
+.BI \-\-target " directory"
+If only one argument for the mount command is given then the argument might be
+interpreted as target (mountpoint) or source (device). This option allows to
+explicitly define that the argument is the mount target.
+.TP
+.BR \-T , " \-\-fstab " \fIpath
+Specifies an alternative fstab file. If \fIpath\fP is a directory then the files
+in the directory are sorted by
+.BR strverscmp (3);
+files that start with "."\& or without an \&.fstab extension are ignored. The option
+can be specified more than once. This option is mostly designed for initramfs
+or chroot scripts where additional configuration is specified beyond standard
+system configuration.
+.sp
+Note that \fBmount\fR(8) does not pass the option \fB\-\-fstab\fP to the
+\fB/sbin/mount.\fItype\fR helpers, meaning that the alternative fstab files will be
+invisible for the helpers. This is no problem for normal mounts, but user
+(non-root) mounts always require fstab to verify the user's rights.
+.TP
+.BR \-t , " \-\-types " \fIfstype
+The argument following the
+.B \-t
+is used to indicate the filesystem type. The filesystem types which are
+currently supported depend on the running kernel. See
+.I /proc/filesystems
+and
+.I /lib/modules/$(uname -r)/kernel/fs
+for a complete list of the filesystems. The most common are ext2, ext3, ext4,
+xfs, btrfs, vfat, sysfs, proc, nfs and cifs.
+.sp
+The programs
+.B mount
+and
+.B umount
+support filesystem subtypes. The subtype is defined by a '.subtype' suffix. For
+example 'fuse.sshfs'. It's recommended to use subtype notation rather than add
+any prefix to the mount source (for example 'sshfs#example.com' is
+deprecated).
+
+If no
+.B \-t
+option is given, or if the
+.B auto
+type is specified, mount will try to guess the desired type.
+Mount uses the blkid library for guessing the filesystem
+type; if that does not turn up anything that looks familiar,
+mount will try to read the file
+.IR /etc/filesystems ,
+or, if that does not exist,
+.IR /proc/filesystems .
+All of the filesystem types listed there will be tried,
+except for those that are labeled "nodev" (e.g.\&
+.IR devpts ,
+.I proc
+and
+.IR nfs ).
+If
+.I /etc/filesystems
+ends in a line with a single *, mount will read
+.I /proc/filesystems
+afterwards. While trying, all filesystem types will be
+mounted with the mount option \fBsilent\fR.
+.sp
+The
+.B auto
+type may be useful for user-mounted floppies.
+Creating a file
+.I /etc/filesystems
+can be useful to change the probe order (e.g., to try vfat before msdos
+or ext3 before ext2) or if you use a kernel module autoloader.
+.sp
+More than one type may be specified in a comma-separated
+list, for option
+.B \-t
+as well as in an
+.I /etc/fstab
+entry. The list of filesystem types for option
+.B \-t
+can be prefixed with
+.B no
+to specify the filesystem types on which no action should be taken.
+The prefix
+.B no
+has no effect when specified in an
+.I /etc/fstab
+entry.
+.sp
+The prefix
+.B no
+can be meaningful with the
+.B \-a
+option. For example, the command
+.RS
+.RS
+.sp
+.B "mount \-a \-t nomsdos,smbfs"
+.sp
+.RE
+mounts all filesystems except those of type
+.I msdos
+and
+.IR smbfs .
+.sp
+For most types all the
+.B mount
+program has to do is issue a simple
+.BR mount (2)
+system call, and no detailed knowledge of the filesystem type is required.
+For a few types however (like nfs, nfs4, cifs, smbfs, ncpfs) an ad hoc code is
+necessary. The nfs, nfs4, cifs, smbfs, and ncpfs filesystems
+have a separate mount program. In order to make it possible to
+treat all types in a uniform way, \fBmount\fR will execute the program
+.BI /sbin/mount. type
+(if that exists) when called with type
+.IR type .
+Since different versions of the
+.B smbmount
+program have different calling conventions,
+.B /sbin/mount.smbfs
+may have to be a shell script that sets up the desired call.
+.RE
+.TP
+.BR \-U , " \-\-uuid " \fIuuid
+Mount the partition that has the specified
+.IR uuid .
+.TP
+.BR \-v , " \-\-verbose"
+Verbose mode.
+.TP
+.BR \-w , " \-\-rw" , " \-\-read\-write"
+Mount the filesystem read/write. The read-write is kernel default. A synonym is
+.BR "\-o rw" .
+
+Note that specify \fB\-w\fR on command line forces \fBmount\fR command
+to never try read-only mount on write-protected devices. The default is
+try read-only if the previous mount syscall with read-write flags failed.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+
+.SH FILESYSTEM-INDEPENDENT MOUNT OPTIONS
+Some of these options are only useful when they appear in the
+.I /etc/fstab
+file.
+
+Some of these options could be enabled or disabled by default
+in the system kernel. To check the current setting see the options
+in /proc/mounts. Note that filesystems also have per-filesystem
+specific default mount options (see for example \fBtune2fs \-l\fP
+output for extN filesystems).
+
+The following options apply to any filesystem that is being
+mounted (but not every filesystem actually honors them \(en e.g.\&, the
+.B sync
+option today has an effect only for ext2, ext3, ext4, fat, vfat and ufs):
+
+.TP
+.B async
+All I/O to the filesystem should be done asynchronously. (See also the
+.B sync
+option.)
+.TP
+.B atime
+Do not use the \fBnoatime\fR feature, so the inode access time is controlled
+by kernel defaults. See also the descriptions of the \fB\%relatime\fR and
+.B strictatime
+mount options.
+.TP
+.B noatime
+Do not update inode access times on this filesystem (e.g.\& for faster
+access on the news spool to speed up news servers). This works for all
+inode types (directories too), so it implies \fB\%nodiratime\fR.
+.TP
+.B auto
+Can be mounted with the
+.B \-a
+option.
+.TP
+.B noauto
+Can only be mounted explicitly (i.e., the
+.B \-a
+option will not cause the filesystem to be mounted).
+.TP
+.na
+.BR context=\fIcontext ", " fscontext=\fIcontext ", " defcontext=\fIcontext ", and " \%rootcontext=\fIcontext
+.ad
+The
+.B context=
+option is useful when mounting filesystems that do not support
+extended attributes, such as a floppy or hard disk formatted with VFAT, or
+systems that are not normally running under SELinux, such as an ext3 or ext4 formatted
+
+disk from a non-SELinux workstation. You can also use
+.B context=
+on filesystems you do not trust, such as a floppy. It also helps in compatibility with
+xattr-supporting filesystems on earlier 2.4.<x> kernel versions. Even where
+xattrs are supported, you can save time not having to label every file by
+assigning the entire disk one security context.
+
+A commonly used option for removable media is
+.BR \%context="system_u:object_r:removable_t" .
+
+Two other options are
+.B fscontext=
+and
+.BR defcontext= ,
+both of which are mutually exclusive of the context option. This means you
+can use fscontext and defcontext with each other, but neither can be used with
+context.
+
+The
+.B fscontext=
+option works for all filesystems, regardless of their xattr
+support. The fscontext option sets the overarching filesystem label to a
+specific security context. This filesystem label is separate from the
+individual labels on the files. It represents the entire filesystem for
+certain kinds of permission checks, such as during mount or file creation.
+Individual file labels are still obtained from the xattrs on the files
+themselves. The context option actually sets the aggregate context that
+fscontext provides, in addition to supplying the same label for individual
+files.
+
+You can set the default security context for unlabeled files using
+.B defcontext=
+option. This overrides the value set for unlabeled files in the policy and requires a
+filesystem that supports xattr labeling.
+
+The
+.B rootcontext=
+option allows you to explicitly label the root inode of a FS being mounted
+before that FS or inode becomes visible to userspace. This was found to be
+useful for things like stateless linux.
+
+Note that the kernel rejects any remount request that includes the context
+option, \fBeven\fP when unchanged from the current context.
+
+.BR "Warning: the \fIcontext\fP value might contain commas" ,
+in which case the value has to be properly quoted, otherwise
+.BR mount (8)
+will interpret the comma as a separator between mount options. Don't forget that
+the shell strips off quotes and thus
+.BR "double quoting is required" .
+For example:
+.RS
+.RS
+.sp
+.nf
+.B mount \-t tmpfs none /mnt \-o \e
+.B \ \ 'context="system_u:object_r:tmp_t:s0:c127,c456",noexec'
+.fi
+.sp
+.RE
+For more details, see
+.BR selinux (8).
+.RE
+
+.TP
+.B defaults
+Use the default options:
+.BR rw ", " suid ", " dev ", " exec ", " auto ", " nouser ", and " async .
+
+Note that the real set of all default mount options depends on kernel
+and filesystem type. See the beginning of this section for more details.
+.TP
+.B dev
+Interpret character or block special devices on the filesystem.
+.TP
+.B nodev
+Do not interpret character or block special devices on the file
+system.
+.TP
+.B diratime
+Update directory inode access times on this filesystem. This is the default.
+(This option is ignored when \fBnoatime\fR is set.)
+.TP
+.B nodiratime
+Do not update directory inode access times on this filesystem.
+(This option is implied when \fBnoatime\fR is set.)
+.TP
+.B dirsync
+All directory updates within the filesystem should be done synchronously.
+This affects the following system calls: creat, link, unlink, symlink,
+mkdir, rmdir, mknod and rename.
+.TP
+.B exec
+Permit execution of binaries.
+.TP
+.B noexec
+Do not permit direct execution of any binaries on the mounted filesystem.
+.TP
+.B group
+Allow an ordinary user to mount the filesystem if one
+of that user's groups matches the group of the device.
+This option implies the options
+.BR nosuid " and " nodev
+(unless overridden by subsequent options, as in the option line
+.BR group,dev,suid ).
+.TP
+.B iversion
+Every time the inode is modified, the i_version field will be incremented.
+.TP
+.B noiversion
+Do not increment the i_version inode field.
+.TP
+.B mand
+Allow mandatory locks on this filesystem. See
+.BR fcntl (2).
+.TP
+.B nomand
+Do not allow mandatory locks on this filesystem.
+.TP
+.B _netdev
+The filesystem resides on a device that requires network access
+(used to prevent the system from attempting to mount these filesystems
+until the network has been enabled on the system).
+.TP
+.B nofail
+Do not report errors for this device if it does not exist.
+.TP
+.B relatime
+Update inode access times relative to modify or change time. Access
+time is only updated if the previous access time was earlier than the
+current modify or change time. (Similar to \fB\%noatime\fR, but it doesn't
+break \fBmutt\fR or other applications that need to know if a file has been
+read since the last time it was modified.)
+
+Since Linux 2.6.30, the kernel defaults to the behavior provided by this
+option (unless
+.B \%noatime
+was specified), and the
+.B \%strictatime
+option is required to obtain traditional semantics. In addition, since Linux
+2.6.30, the file's last access time is always updated if it is more than 1
+day old.
+.TP
+.B norelatime
+Do not use the
+.B relatime
+feature. See also the
+.B strictatime
+mount option.
+.TP
+.B strictatime
+Allows to explicitly request full atime updates. This makes it
+possible for the kernel to default to
+.B \%relatime
+or
+.B \%noatime
+but still allow userspace to override it. For more details about the default
+system mount options see /proc/mounts.
+.TP
+.B nostrictatime
+Use the kernel's default behavior for inode access time updates.
+.TP
+.B lazytime
+Only update times (atime, mtime, ctime) on the in-memory version of the file inode.
+
+This mount option significantly reduces writes to the inode table for
+workloads that perform frequent random writes to preallocated files.
+
+The on-disk timestamps are updated only when:
+.sp
+.RS
+- the inode needs to be updated for some change unrelated to file timestamps
+.sp
+- the application employs
+.BR fsync (2),
+.BR syncfs (2),
+or
+.BR sync (2)
+.sp
+- an undeleted inode is evicted from memory
+.sp
+- more than 24 hours have passed since the i-node was written to disk.
+.RE
+.sp
+.TP
+.B nolazytime
+Do not use the lazytime feature.
+.TP
+.B suid
+Allow set-user-ID or set-group-ID bits to take
+effect.
+.TP
+.B nosuid
+Do not allow set-user-ID or set-group-ID bits to take
+effect.
+.TP
+.B silent
+Turn on the silent flag.
+.TP
+.B loud
+Turn off the silent flag.
+.TP
+.B owner
+Allow an ordinary user to mount the filesystem if that
+user is the owner of the device.
+This option implies the options
+.BR nosuid " and " nodev
+(unless overridden by subsequent options, as in the option line
+.BR owner,dev,suid ).
+.TP
+.B remount
+Attempt to remount an already-mounted filesystem. This is commonly
+used to change the mount flags for a filesystem, especially to make a
+readonly filesystem writable. It does not change device or mount point.
+
+The remount operation together with the
+.B bind
+flag has special semantic. See above, the subsection \fBBind mounts\fR.
+
+The remount functionality follows the standard way the mount command works
+with options from fstab. This means that \fBmount\fR does not
+read fstab (or mtab) only when both
+.I device
+and
+.I dir
+are specified.
+.sp
+.in +4
+.B "mount \-o remount,rw /dev/foo /dir"
+.in
+.sp
+After this call all old mount options are replaced and arbitrary stuff from
+fstab (or mtab) is ignored, except the loop= option which is internally
+generated and maintained by the mount command.
+.sp
+.in +4
+.B "mount \-o remount,rw /dir"
+.in
+.sp
+After this call, mount reads fstab and merges these options with
+the options from the command line (\fB\-o\fR).
+If no mountpoint is found in fstab, then a remount with unspecified source is
+allowed.
+.TP
+.B ro
+Mount the filesystem read-only.
+.TP
+.B rw
+Mount the filesystem read-write.
+.TP
+.B sync
+All I/O to the filesystem should be done synchronously. In the case of
+media with a limited number of write cycles
+(e.g.\& some flash drives), \fBsync\fR may cause life-cycle shortening.
+.TP
+.B user
+Allow an ordinary user to mount the filesystem.
+The name of the mounting user is written to the mtab file (or to the private
+libmount file in /run/mount on systems without a regular mtab) so that this
+same user can unmount the filesystem again.
+This option implies the options
+.BR noexec ", " nosuid ", and " nodev
+(unless overridden by subsequent options, as in the option line
+.BR user,exec,dev,suid ).
+.TP
+.B nouser
+Forbid an ordinary user to mount the filesystem.
+This is the default; it does not imply any other options.
+.TP
+.B users
+Allow any user to mount and to unmount the filesystem, even
+when some other ordinary user mounted it.
+This option implies the options
+.BR noexec ", " nosuid ", and " nodev
+(unless overridden by subsequent options, as in the option line
+.BR users,exec,dev,suid ).
+.TP
+.B X-*
+All options prefixed with "X-" are interpreted as comments or as userspace
+application-specific options. These options are not stored in the user space (e.g. mtab file),
+nor sent to the mount.\fItype\fR helpers nor to the
+.BR mount (2)
+system call. The suggested format is \fBX-\fIappname\fR.\fIoption\fR.
+.TP
+.B x-*
+The same as \fBX-*\fR options, but stored permanently in the user space. It
+means the options are also available for umount or another operations. Note
+that maintain mount options in user space is tricky, because it's necessary use
+libmount based tools and there is no guarantee that the options will be always
+available (for example after a move mount operation or in unshared namespace).
+
+Note that before util-linux v2.30 the x-* options have not been maintained by
+libmount and stored in user space (functionality was the same as have X-* now),
+but due to growing number of use-cases (in initrd, systemd etc.) the
+functionality have been extended to keep existing fstab configurations usable
+without a change.
+.TP
+.BR X-mount.mkdir [ = \fImode\fR ]
+Allow to make a target directory (mountpoint). The optional argument
+.I mode
+specifies the filesystem access mode used for
+.BR mkdir (2)
+in octal notation. The default mode is 0755. This functionality is supported
+only for root users. The option is also supported as x-mount.mkdir, this notation
+is deprecated for mount.mkdir since v2.30.
+
+.SH "FILESYSTEM-SPECIFIC MOUNT OPTIONS"
+You should consult the respective man page for the filesystem first.
+If you want to know what options the ext4 filesystem supports, then check the
+.BR ext4 (5)
+man page.
+If that doesn't exist, you can also check the corresponding mount page like
+.BR mount.cifs (8).
+Note that you might have to install the respective userland tools.
+.sp
+The following options apply only to certain filesystems.
+We sort them by filesystem. They all follow the
+.B \-o
+flag.
+.sp
+What options are supported depends a bit on the running kernel.
+More info may be found in the kernel source subdirectory
+.IR Documentation/filesystems .
+
+.SS "Mount options for adfs"
+.TP
+\fBuid=\fP\,\fIvalue\fP and \fBgid=\fP\,\fIvalue\fP
+Set the owner and group of the files in the filesystem (default: uid=gid=0).
+.TP
+\fBownmask=\fP\,\fIvalue\fP and \fBothmask=\fP\,\fIvalue\fP
+Set the permission mask for ADFS 'owner' permissions and 'other' permissions,
+respectively (default: 0700 and 0077, respectively).
+See also
+.IR /usr/src/linux/Documentation/filesystems/adfs.txt .
+
+.SS "Mount options for affs"
+.TP
+\fBuid=\fP\,\fIvalue\fP and \fBgid=\fP\,\fIvalue\fP
+Set the owner and group of the root of the filesystem (default: uid=gid=0,
+but with option
+.B uid
+or
+.B gid
+without specified value, the UID and GID of the current process are taken).
+.TP
+\fBsetuid=\fP\,\fIvalue\fP and \fBsetgid=\fP\,\fIvalue\fP
+Set the owner and group of all files.
+.TP
+.BI mode= value
+Set the mode of all files to
+.IR value " & 0777"
+disregarding the original permissions.
+Add search permission to directories that have read permission.
+The value is given in octal.
+.TP
+.B protect
+Do not allow any changes to the protection bits on the filesystem.
+.TP
+.B usemp
+Set UID and GID of the root of the filesystem to the UID and GID
+of the mount point upon the first sync or umount, and then
+clear this option. Strange...
+.TP
+.B verbose
+Print an informational message for each successful mount.
+.TP
+.BI prefix= string
+Prefix used before volume name, when following a link.
+.TP
+.BI volume= string
+Prefix (of length at most 30) used before '/' when following a symbolic link.
+.TP
+.BI reserved= value
+(Default: 2.) Number of unused blocks at the start of the device.
+.TP
+.BI root= value
+Give explicitly the location of the root block.
+.TP
+.BI bs= value
+Give blocksize. Allowed values are 512, 1024, 2048, 4096.
+.TP
+.BR grpquota | noquota | quota | usrquota
+These options are accepted but ignored.
+(However, quota utilities may react to such strings in
+.IR /etc/fstab .)
+
+.SS "Mount options for debugfs"
+The debugfs filesystem is a pseudo filesystem, traditionally mounted on
+.IR /sys/kernel/debug .
+.\" or just /debug
+.\" present since 2.6.11
+As of kernel version 3.4, debugfs has the following options:
+.TP
+.BI uid= n ", gid=" n
+Set the owner and group of the mountpoint.
+.TP
+.BI mode= value
+Sets the mode of the mountpoint.
+
+.SS "Mount options for devpts"
+The devpts filesystem is a pseudo filesystem, traditionally mounted on
+.IR /dev/pts .
+In order to acquire a pseudo terminal, a process opens
+.IR /dev/ptmx ;
+the number of the pseudo terminal is then made available to the process
+and the pseudo terminal slave can be accessed as
+.IR /dev/pts/ <number>.
+.TP
+\fBuid=\fP\,\fIvalue\fP and \fBgid=\fP\,\fIvalue\fP
+This sets the owner or the group of newly created PTYs to
+the specified values. When nothing is specified, they will
+be set to the UID and GID of the creating process.
+For example, if there is a tty group with GID 5, then
+.B gid=5
+will cause newly created PTYs to belong to the tty group.
+.TP
+.BI mode= value
+Set the mode of newly created PTYs to the specified value.
+The default is 0600.
+A value of
+.B mode=620
+and
+.B gid=5
+makes "mesg y" the default on newly created PTYs.
+.TP
+\fBnewinstance
+Create a private instance of devpts filesystem, such that
+indices of ptys allocated in this new instance are
+independent of indices created in other instances of devpts.
+
+All mounts of devpts without this
+.B newinstance
+option share the same set of pty indices (i.e. legacy mode).
+Each mount of devpts with the
+.B newinstance
+option has a private set of pty indices.
+
+This option is mainly used to support containers in the
+linux kernel. It is implemented in linux kernel versions
+starting with 2.6.29. Further, this mount option is valid
+only if CONFIG_DEVPTS_MULTIPLE_INSTANCES is enabled in the
+kernel configuration.
+
+To use this option effectively,
+.I /dev/ptmx
+must be a symbolic link to
+.I pts/ptmx.
+See
+.I Documentation/filesystems/devpts.txt
+in the linux kernel source tree for details.
+.TP
+.BI ptmxmode= value
+
+Set the mode for the new
+.I ptmx
+device node in the devpts filesystem.
+
+With the support for multiple instances of devpts (see
+.B newinstance
+option above), each instance has a private
+.I ptmx
+node in the root of the devpts filesystem (typically
+.IR /dev/pts/ptmx ).
+
+For compatibility with older versions of the kernel, the
+default mode of the new
+.I ptmx
+node is 0000.
+.BI ptmxmode= value
+specifies a more useful mode for the
+.I ptmx
+node and is highly recommended when the
+.B newinstance
+option is specified.
+
+This option is only implemented in linux kernel versions
+starting with 2.6.29. Further, this option is valid only if
+CONFIG_DEVPTS_MULTIPLE_INSTANCES is enabled in the kernel
+configuration.
+
+.SS "Mount options for fat"
+(Note:
+.I fat
+is not a separate filesystem, but a common part of the
+.IR msdos ,
+.I umsdos
+and
+.I vfat
+filesystems.)
+.TP
+.BR blocksize= { 512 | 1024 | 2048 }
+Set blocksize (default 512). This option is obsolete.
+.TP
+\fBuid=\fP\,\fIvalue\fP and \fBgid=\fP\,\fIvalue\fP
+Set the owner and group of all files.
+(Default: the UID and GID of the current process.)
+.TP
+.BI umask= value
+Set the umask (the bitmask of the permissions that are
+.B not
+present). The default is the umask of the current process.
+The value is given in octal.
+.TP
+.BI dmask= value
+Set the umask applied to directories only.
+The default is the umask of the current process.
+The value is given in octal.
+.TP
+.BI fmask= value
+Set the umask applied to regular files only.
+The default is the umask of the current process.
+The value is given in octal.
+.TP
+.BI allow_utime= value
+This option controls the permission check of mtime/atime.
+.RS
+.TP
+.B 20
+If current process is in group of file's group ID, you can change timestamp.
+.TP
+.B 2
+Other users can change timestamp.
+.PP
+The default is set from `dmask' option. (If the directory is writable,
+.BR utime (2)
+is also allowed. I.e.\& \s+3~\s0dmask & 022)
+
+Normally
+.BR utime (2)
+checks current process is owner of the file, or it has
+CAP_FOWNER capability. But FAT filesystem doesn't have UID/GID on disk, so
+normal check is too inflexible. With this option you can relax it.
+.RE
+.TP
+.BI check= value
+Three different levels of pickiness can be chosen:
+.RS
+.TP
+.BR r [ elaxed ]
+Upper and lower case are accepted and equivalent, long name parts are
+truncated (e.g.\&
+.I verylongname.foobar
+becomes
+.IR verylong.foo ),
+leading and embedded spaces are accepted in each name part (name and extension).
+.TP
+.BR n [ ormal ]
+Like "relaxed", but many special characters (*, ?, <, spaces, etc.) are
+rejected. This is the default.
+.TP
+.BR s [ trict ]
+Like "normal", but names that contain long parts or special characters
+that are sometimes used on Linux but are not accepted by MS-DOS
+(+, =, etc.) are rejected.
+.RE
+.TP
+.BI codepage= value
+Sets the codepage for converting to shortname characters on FAT
+and VFAT filesystems. By default, codepage 437 is used.
+.TP
+.BI conv= mode
+This option is obsolete and may fail or being ignored.
+.TP
+.BI cvf_format= module
+Forces the driver to use the CVF (Compressed Volume File) module
+.RI cvf_ module
+instead of auto-detection. If the kernel supports kmod, the
+cvf_format=xxx option also controls on-demand CVF module loading.
+This option is obsolete.
+.TP
+.BI cvf_option= option
+Option passed to the CVF module. This option is obsolete.
+.TP
+.B debug
+Turn on the
+.I debug
+flag. A version string and a list of filesystem parameters will be
+printed (these data are also printed if the parameters appear to be
+inconsistent).
+.TP
+.B discard
+If set, causes discard/TRIM commands to be issued to the block device
+when blocks are freed. This is useful for SSD devices and
+sparse/thinly-provisioned LUNs.
+.TP
+.B dos1xfloppy
+If set, use a fallback default BIOS Parameter Block configuration, determined
+by backing device size. These static parameters match defaults assumed by DOS
+1.x for 160 kiB, 180 kiB, 320 kiB, and 360 kiB floppies and floppy images.
+.TP
+.BR errors= { panic | continue | remount-ro }
+Specify FAT behavior on critical errors: panic, continue without doing
+anything, or remount the partition in read-only mode (default behavior).
+.TP
+.BR fat= { 12 | 16 | 32 }
+Specify a 12, 16 or 32 bit fat. This overrides
+the automatic FAT type detection routine. Use with caution!
+.TP
+.BI iocharset= value
+Character set to use for converting between 8 bit characters
+and 16 bit Unicode characters. The default is iso8859-1.
+Long filenames are stored on disk in Unicode format.
+.TP
+.BR nfs= { stale_rw | nostale_ro }
+Enable this only if you want to export the FAT filesystem over NFS.
+
+.BR stale_rw :
+This option maintains an index (cache) of directory inodes which is used by the
+nfs-related code to improve look-ups. Full file operations (read/write) over
+NFS are supported but with cache eviction at NFS server, this could result in
+spurious
+.B ESTALE
+errors.
+
+.BR nostale_ro :
+This option bases the inode number and file handle
+on the on-disk location of a file in the FAT directory entry.
+This ensures that
+.B ESTALE
+will not be returned after a file is
+evicted from the inode cache. However, it means that operations
+such as rename, create and unlink could cause file handles that
+previously pointed at one file to point at a different file,
+potentially causing data corruption. For this reason, this
+option also mounts the filesystem readonly.
+
+To maintain backward compatibility, '-o nfs' is also accepted,
+defaulting to
+.BR stale_rw .
+.TP
+.B tz=UTC
+This option disables the conversion of timestamps
+between local time (as used by Windows on FAT) and UTC
+(which Linux uses internally). This is particularly
+useful when mounting devices (like digital cameras)
+that are set to UTC in order to avoid the pitfalls of
+local time.
+.TP
+.BI time_offset= minutes
+Set offset for conversion of timestamps from local time used by FAT to UTC.
+I.e.,
+.I minutes
+will be subtracted from each timestamp to convert it to UTC used
+internally by Linux. This is useful when the time zone set in the kernel via
+.BR settimeofday (2)
+is not the time zone used by the filesystem. Note
+that this option still does not provide correct time stamps in all cases in
+presence of DST - time stamps in a different DST setting will be off by one
+hour.
+.TP
+.B quiet
+Turn on the
+.I quiet
+flag. Attempts to chown or chmod files do not return errors,
+although they fail. Use with caution!
+.TP
+.B rodir
+FAT has the ATTR_RO (read-only) attribute. On Windows, the ATTR_RO of the
+directory will just be ignored, and is used only by applications as a flag
+(e.g.\& it's set for the customized folder).
+
+If you want to use ATTR_RO as read-only flag even for the directory, set this
+option.
+.TP
+.B showexec
+If set, the execute permission bits of the file will be allowed only if
+the extension part of the name is \&.EXE, \&.COM, or \&.BAT. Not set by default.
+.TP
+.B sys_immutable
+If set, ATTR_SYS attribute on FAT is handled as IMMUTABLE flag on Linux.
+Not set by default.
+.TP
+.B flush
+If set, the filesystem will try to flush to disk more early than normal.
+Not set by default.
+.TP
+.B usefree
+Use the "free clusters" value stored on FSINFO. It'll
+be used to determine number of free clusters without
+scanning disk. But it's not used by default, because
+recent Windows don't update it correctly in some
+case. If you are sure the "free clusters" on FSINFO is
+correct, by this option you can avoid scanning disk.
+.TP
+.BR dots ", " nodots ", " dotsOK= [ yes | no ]
+Various misguided attempts to force Unix or DOS conventions
+onto a FAT filesystem.
+
+.SS "Mount options for hfs"
+.TP
+.BI creator= cccc ", type=" cccc
+Set the creator/type values as shown by the MacOS finder
+used for creating new files. Default values: '????'.
+.TP
+.BI uid= n ", gid=" n
+Set the owner and group of all files.
+(Default: the UID and GID of the current process.)
+.TP
+.BI dir_umask= n ", file_umask=" n ", umask=" n
+Set the umask used for all directories, all regular files, or all
+files and directories. Defaults to the umask of the current process.
+.TP
+.BI session= n
+Select the CDROM session to mount.
+Defaults to leaving that decision to the CDROM driver.
+This option will fail with anything but a CDROM as underlying device.
+.TP
+.BI part= n
+Select partition number n from the device.
+Only makes sense for CDROMs.
+Defaults to not parsing the partition table at all.
+.TP
+.B quiet
+Don't complain about invalid mount options.
+
+.SS "Mount options for hpfs"
+.TP
+\fBuid=\fP\,\fIvalue\fP and \fBgid=\fP\,\fIvalue\fP
+Set the owner and group of all files. (Default: the UID and GID
+of the current process.)
+.TP
+.BI umask= value
+Set the umask (the bitmask of the permissions that are
+.B not
+present). The default is the umask of the current process.
+The value is given in octal.
+.TP
+.BR case= { lower | asis }
+Convert all files names to lower case, or leave them.
+(Default:
+.BR case=lower .)
+.TP
+.BI conv= mode
+This option is obsolete and may fail or being ignored.
+.TP
+.B nocheck
+Do not abort mounting when certain consistency checks fail.
+
+.SS "Mount options for iso9660"
+ISO 9660 is a standard describing a filesystem structure to be used
+on CD-ROMs. (This filesystem type is also seen on some DVDs. See also the
+.I udf
+filesystem.)
+
+Normal
+.I iso9660
+filenames appear in an 8.3 format (i.e., DOS-like restrictions on filename
+length), and in addition all characters are in upper case. Also there is
+no field for file ownership, protection, number of links, provision for
+block/character devices, etc.
+
+Rock Ridge is an extension to iso9660 that provides all of these UNIX-like
+features. Basically there are extensions to each directory record that
+supply all of the additional information, and when Rock Ridge is in use,
+the filesystem is indistinguishable from a normal UNIX filesystem (except
+that it is read-only, of course).
+.TP
+.B norock
+Disable the use of Rock Ridge extensions, even if available. Cf.\&
+.BR map .
+.TP
+.B nojoliet
+Disable the use of Microsoft Joliet extensions, even if available. Cf.\&
+.BR map .
+.TP
+.BR check= { r [ elaxed ]| s [ trict ]}
+With
+.BR check=relaxed ,
+a filename is first converted to lower case before doing the lookup.
+This is probably only meaningful together with
+.B norock
+and
+.BR map=normal .
+(Default:
+.BR check=strict .)
+.TP
+\fBuid=\fP\,\fIvalue\fP and \fBgid=\fP\,\fIvalue\fP
+Give all files in the filesystem the indicated user or group id,
+possibly overriding the information found in the Rock Ridge extensions.
+(Default:
+.BR uid=0,gid=0 .)
+.TP
+.BR map= { n [ ormal ]| o [ ff ]| a [ corn ]}
+For non-Rock Ridge volumes, normal name translation maps upper
+to lower case ASCII, drops a trailing `;1', and converts `;' to `.'.
+With
+.B map=off
+no name translation is done. See
+.BR norock .
+(Default:
+.BR map=normal .)
+.B map=acorn
+is like
+.B map=normal
+but also apply Acorn extensions if present.
+.TP
+.BI mode= value
+For non-Rock Ridge volumes, give all files the indicated mode.
+(Default: read and execute permission for everybody.)
+Octal mode values require a leading 0.
+.TP
+.B unhide
+Also show hidden and associated files.
+(If the ordinary files and the associated or hidden files have
+the same filenames, this may make the ordinary files inaccessible.)
+.TP
+.BR block= { 512 | 1024 | 2048 }
+Set the block size to the indicated value.
+(Default:
+.BR block=1024 .)
+.TP
+.BI conv= mode
+This option is obsolete and may fail or being ignored.
+.TP
+.B cruft
+If the high byte of the file length contains other garbage,
+set this mount option to ignore the high order bits of the file length.
+This implies that a file cannot be larger than 16\ MB.
+.TP
+.BI session= x
+Select number of session on multisession CD.
+.TP
+.BI sbsector= xxx
+Session begins from sector xxx.
+.LP
+The following options are the same as for vfat and specifying them only makes
+sense when using discs encoded using Microsoft's Joliet extensions.
+.TP
+.BI iocharset= value
+Character set to use for converting 16 bit Unicode characters on CD
+to 8 bit characters. The default is iso8859-1.
+.TP
+.B utf8
+Convert 16 bit Unicode characters on CD to UTF-8.
+
+.SS "Mount options for jfs"
+.TP
+.BI iocharset= name
+Character set to use for converting from Unicode to ASCII. The default is
+to do no conversion. Use
+.B iocharset=utf8
+for UTF8 translations. This requires CONFIG_NLS_UTF8 to be set in
+the kernel
+.I ".config"
+file.
+.TP
+.BI resize= value
+Resize the volume to
+.I value
+blocks. JFS only supports growing a volume, not shrinking it. This option
+is only valid during a remount, when the volume is mounted read-write. The
+.B resize
+keyword with no value will grow the volume to the full size of the partition.
+.TP
+.B nointegrity
+Do not write to the journal. The primary use of this option is to allow
+for higher performance when restoring a volume from backup media. The
+integrity of the volume is not guaranteed if the system abnormally ends.
+.TP
+.B integrity
+Default. Commit metadata changes to the journal. Use this option to remount
+a volume where the
+.B nointegrity
+option was previously specified in order to restore normal behavior.
+.TP
+.BR errors= { continue | remount-ro | panic }
+Define the behavior when an error is encountered.
+(Either ignore errors and just mark the filesystem erroneous and continue,
+or remount the filesystem read-only, or panic and halt the system.)
+.TP
+.BR noquota | quota | usrquota | grpquota
+These options are accepted but ignored.
+
+.SS "Mount options for msdos"
+See mount options for fat.
+If the
+.I msdos
+filesystem detects an inconsistency, it reports an error and sets the file
+system read-only. The filesystem can be made writable again by remounting
+it.
+
+.SS "Mount options for ncpfs"
+Just like
+.IR nfs ", the " ncpfs
+implementation expects a binary argument (a
+.IR "struct ncp_mount_data" )
+to the mount system call. This argument is constructed by
+.BR ncpmount (8)
+and the current version of
+.B mount
+(2.12) does not know anything about ncpfs.
+
+.SS "Mount options for ntfs"
+.TP
+.BI iocharset= name
+Character set to use when returning file names.
+Unlike VFAT, NTFS suppresses names that contain
+nonconvertible characters. Deprecated.
+.TP
+.BI nls= name
+New name for the option earlier called
+.IR iocharset .
+.TP
+.B utf8
+Use UTF-8 for converting file names.
+.TP
+.BR uni_xlate= { 0 | 1 | 2 }
+For 0 (or `no' or `false'), do not use escape sequences
+for unknown Unicode characters.
+For 1 (or `yes' or `true') or 2, use vfat-style 4-byte escape sequences
+starting with ":". Here 2 give a little-endian encoding
+and 1 a byteswapped bigendian encoding.
+.TP
+.B posix=[0|1]
+If enabled (posix=1), the filesystem distinguishes between
+upper and lower case. The 8.3 alias names are presented as
+hard links instead of being suppressed. This option is obsolete.
+.TP
+\fBuid=\fP\,\fIvalue\fP, \fBgid=\fP\,\fIvalue\fP and \fBumask=\fP\,\fIvalue\fP
+Set the file permission on the filesystem.
+The umask value is given in octal.
+By default, the files are owned by root and not readable by somebody else.
+
+.SS "Mount options for overlay"
+Since Linux 3.18 the overlay pseudo filesystem implements a union mount for
+other filesystems.
+
+An overlay filesystem combines two filesystems - an \fBupper\fR filesystem and
+a \fBlower\fR filesystem. When a name exists in both filesystems, the object
+in the upper filesystem is visible while the object in the lower filesystem is
+either hidden or, in the case of directories, merged with the upper object.
+
+The lower filesystem can be any filesystem supported by Linux and does not need
+to be writable. The lower filesystem can even be another overlayfs. The upper
+filesystem will normally be writable and if it is it must support the creation
+of trusted.* extended attributes, and must provide a valid d_type in readdir
+responses, so NFS is not suitable.
+
+A read-only overlay of two read-only filesystems may use any filesystem type.
+The options \fBlowerdir\fR and \fBupperdir\fR are combined into a merged
+directory by using:
+
+.RS
+.br
+.nf
+.B "mount \-t overlay overlay \e"
+.B " \-olowerdir=/lower,upperdir=/upper,workdir=/work /merged"
+.fi
+.br
+.RE
+
+.TP
+.BI lowerdir= directory
+Any filesystem, does not need to be on a writable filesystem.
+.TP
+.BI upperdir= directory
+The upperdir is normally on a writable filesystem.
+.TP
+.BI workdir= directory
+The workdir needs to be an empty directory on the same filesystem as upperdir.
+
+.SS "Mount options for reiserfs"
+Reiserfs is a journaling filesystem.
+.TP
+.B conv
+Instructs version 3.6 reiserfs software to mount a version 3.5 filesystem,
+using the 3.6 format for newly created objects. This filesystem will no
+longer be compatible with reiserfs 3.5 tools.
+.TP
+.BR hash= { rupasov | tea | r5 | detect }
+Choose which hash function reiserfs will use to find files within directories.
+.RS
+.TP
+.B rupasov
+A hash invented by Yury Yu.\& Rupasov. It is fast and preserves locality,
+mapping lexicographically close file names to close hash values.
+This option should not be used, as it causes a high probability of hash
+collisions.
+.TP
+.B tea
+A Davis-Meyer function implemented by Jeremy Fitzhardinge.
+It uses hash permuting bits in the name. It gets high randomness
+and, therefore, low probability of hash collisions at some CPU cost.
+This may be used if EHASHCOLLISION errors are experienced with the r5 hash.
+.TP
+.B r5
+A modified version of the rupasov hash. It is used by default and is
+the best choice unless the filesystem has huge directories and
+unusual file-name patterns.
+.TP
+.B detect
+Instructs
+.I mount
+to detect which hash function is in use by examining
+the filesystem being mounted, and to write this information into
+the reiserfs superblock. This is only useful on the first mount of
+an old format filesystem.
+.RE
+.TP
+.B hashed_relocation
+Tunes the block allocator. This may provide performance improvements
+in some situations.
+.TP
+.B no_unhashed_relocation
+Tunes the block allocator. This may provide performance improvements
+in some situations.
+.TP
+.B noborder
+Disable the border allocator algorithm invented by Yury Yu.\& Rupasov.
+This may provide performance improvements in some situations.
+.TP
+.B nolog
+Disable journaling. This will provide slight performance improvements in
+some situations at the cost of losing reiserfs's fast recovery from crashes.
+Even with this option turned on, reiserfs still performs all journaling
+operations, save for actual writes into its journaling area. Implementation
+of
+.I nolog
+is a work in progress.
+.TP
+.B notail
+By default, reiserfs stores small files and `file tails' directly into its
+tree. This confuses some utilities such as
+.BR LILO (8).
+This option is used to disable packing of files into the tree.
+.TP
+.B replayonly
+Replay the transactions which are in the journal, but do not actually
+mount the filesystem. Mainly used by
+.IR reiserfsck .
+.TP
+.BI resize= number
+A remount option which permits online expansion of reiserfs partitions.
+Instructs reiserfs to assume that the device has
+.I number
+blocks.
+This option is designed for use with devices which are under logical
+volume management (LVM).
+There is a special
+.I resizer
+utility which can be obtained from
+.IR ftp://ftp.namesys.com/pub/reiserfsprogs .
+.TP
+.B user_xattr
+Enable Extended User Attributes. See the
+.BR attr (5)
+manual page.
+.TP
+.B acl
+Enable POSIX Access Control Lists. See the
+.BR acl (5)
+manual page.
+.TP
+.BR barrier=none " / " barrier=flush "
+This disables / enables the use of write barriers in the journaling code.
+barrier=none disables, barrier=flush enables (default). This also requires an
+IO stack which can support barriers, and if reiserfs gets an error on a barrier
+write, it will disable barriers again with a warning. Write barriers enforce
+proper on-disk ordering of journal commits, making volatile disk write caches
+safe to use, at some performance penalty. If your disks are battery-backed in
+one way or another, disabling barriers may safely improve performance.
+
+.SS "Mount options for ubifs"
+UBIFS is a flash filesystem which works on top of UBI volumes. Note that
+\fBatime\fR is not supported and is always turned off.
+.TP
+The device name may be specified as
+.RS
+.B ubiX_Y
+UBI device number
+.BR X ,
+volume number
+.B Y
+.TP
+.B ubiY
+UBI device number
+.BR 0 ,
+volume number
+.B Y
+.TP
+.B ubiX:NAME
+UBI device number
+.BR X ,
+volume with name
+.B NAME
+.TP
+.B ubi:NAME
+UBI device number
+.BR 0 ,
+volume with name
+.B NAME
+.RE
+Alternative
+.B !
+separator may be used instead of
+.BR : .
+.TP
+The following mount options are available:
+.TP
+.B bulk_read
+Enable bulk-read. VFS read-ahead is disabled because it slows down the file
+system. Bulk-Read is an internal optimization. Some flashes may read faster if
+the data are read at one go, rather than at several read requests. For
+example, OneNAND can do "read-while-load" if it reads more than one NAND page.
+.TP
+.B no_bulk_read
+Do not bulk-read. This is the default.
+.TP
+.B chk_data_crc
+Check data CRC-32 checksums. This is the default.
+.TP
+.BR no_chk_data_crc .
+Do not check data CRC-32 checksums. With this option, the filesystem does not
+check CRC-32 checksum for data, but it does check it for the internal indexing
+information. This option only affects reading, not writing. CRC-32 is always
+calculated when writing the data.
+.TP
+.BR compr= { none | lzo | zlib }
+Select the default compressor which is used when new files are written. It is
+still possible to read compressed files if mounted with the
+.B none
+option.
+
+.SS "Mount options for udf"
+UDF is the "Universal Disk Format" filesystem defined by OSTA, the Optical
+Storage Technology Association, and is often used for DVD-ROM, frequently
+in the form of a hybrid UDF/ISO-9660 filesystem. It is, however,
+perfectly usable by itself on disk drives, flash drives and other block devices.
+See also
+.IR iso9660 .
+.TP
+.B uid=
+Make all files in the filesystem belong to the given user.
+uid=forget can be specified independently of (or usually in
+addition to) uid=<user> and results in UDF
+not storing uids to the media. In fact the recorded uid
+is the 32-bit overflow uid -1 as defined by the UDF standard.
+The value is given as either <user> which is a valid user name or the corresponding
+decimal user id, or the special string "forget".
+.TP
+.B gid=
+Make all files in the filesystem belong to the given group.
+gid=forget can be specified independently of (or usually in
+addition to) gid=<group> and results in UDF
+not storing gids to the media. In fact the recorded gid
+is the 32-bit overflow gid -1 as defined by the UDF standard.
+The value is given as either <group> which is a valid group name or the corresponding
+decimal group id, or the special string "forget".
+.TP
+.B umask=
+Mask out the given permissions from all inodes read from the filesystem.
+The value is given in octal.
+.TP
+.B mode=
+If mode= is set the permissions of all non-directory inodes read from the
+filesystem will be set to the given mode. The value is given in octal.
+.TP
+.B dmode=
+If dmode= is set the permissions of all directory inodes read from the
+filesystem will be set to the given dmode. The value is given in octal.
+.TP
+.B bs=
+Set the block size. Default value prior to kernel version 2.6.30 was
+2048. Since 2.6.30 and prior to 4.11 it was logical device block size with
+fallback to 2048. Since 4.11 it is logical block size with fallback to
+any valid block size between logical device block size and 4096.
+
+For other details see the \fBmkudffs\fP(8) 2.0+ manpage, sections
+\fBCOMPATIBILITY\fP and \fBBLOCK SIZE\fP.
+.TP
+.B unhide
+Show otherwise hidden files.
+.TP
+.B undelete
+Show deleted files in lists.
+.TP
+.B adinicb
+Embed data in the inode. (default)
+.TP
+.B noadinicb
+Don't embed data in the inode.
+.TP
+.B shortad
+Use short UDF address descriptors.
+.TP
+.B longad
+Use long UDF address descriptors. (default)
+.TP
+.B nostrict
+Unset strict conformance.
+.TP
+.B iocharset=
+Set the NLS character set. This requires kernel compiled with CONFIG_UDF_NLS option.
+.TP
+.B utf8
+Set the UTF-8 character set.
+.SS Mount options for debugging and disaster recovery
+.TP
+.B novrs
+Ignore the Volume Recognition Sequence and attempt to mount anyway.
+.TP
+.B session=
+Select the session number for multi-session recorded optical media. (default= last session)
+.TP
+.B anchor=
+Override standard anchor location. (default= 256)
+.TP
+.B lastblock=
+Set the last block of the filesystem.
+.SS Unused historical mount options that may be encountered and should be removed
+.TP
+.B uid=ignore
+Ignored, use uid=<user> instead.
+.TP
+.B gid=ignore
+Ignored, use gid=<group> instead.
+.TP
+.B volume=
+Unimplemented and ignored.
+.TP
+.B partition=
+Unimplemented and ignored.
+.TP
+.B fileset=
+Unimplemented and ignored.
+.TP
+.B rootdir=
+Unimplemented and ignored.
+
+.SS "Mount options for ufs"
+.TP
+.BI ufstype= value
+UFS is a filesystem widely used in different operating systems.
+The problem are differences among implementations. Features of some
+implementations are undocumented, so its hard to recognize the
+type of ufs automatically.
+That's why the user must specify the type of ufs by mount option.
+Possible values are:
+.RS
+.TP
+.B old
+Old format of ufs, this is the default, read only.
+(Don't forget to give the \-r option.)
+.TP
+.B 44bsd
+For filesystems created by a BSD-like system (NetBSD, FreeBSD, OpenBSD).
+.TP
+.B ufs2
+Used in FreeBSD 5.x supported as read-write.
+.TP
+.B 5xbsd
+Synonym for ufs2.
+.TP
+.B sun
+For filesystems created by SunOS or Solaris on Sparc.
+.TP
+.B sunx86
+For filesystems created by Solaris on x86.
+.TP
+.B hp
+For filesystems created by HP-UX, read-only.
+.TP
+.B nextstep
+For filesystems created by NeXTStep (on NeXT station) (currently read only).
+.TP
+.B nextstep-cd
+For NextStep CDROMs (block_size == 2048), read-only.
+.TP
+.B openstep
+For filesystems created by OpenStep (currently read only).
+The same filesystem type is also used by Mac OS X.
+.RE
+
+.TP
+.BI onerror= value
+Set behavior on error:
+.RS
+.TP
+.B panic
+If an error is encountered, cause a kernel panic.
+.TP
+.RB [ lock | umount | repair ]
+These mount options don't do anything at present;
+when an error is encountered only a console message is printed.
+.RE
+
+.SS "Mount options for umsdos"
+See mount options for msdos.
+The
+.B dotsOK
+option is explicitly killed by
+.IR umsdos .
+
+.SS "Mount options for vfat"
+First of all, the mount options for
+.I fat
+are recognized.
+The
+.B dotsOK
+option is explicitly killed by
+.IR vfat .
+Furthermore, there are
+.TP
+.B uni_xlate
+Translate unhandled Unicode characters to special escaped sequences.
+This lets you backup and restore filenames that are created with any
+Unicode characters. Without this option, a '?' is used when no
+translation is possible. The escape character is ':' because it is
+otherwise invalid on the vfat filesystem. The escape sequence
+that gets used, where u is the Unicode character,
+is: ':', (u & 0x3f), ((u>>6) & 0x3f), (u>>12).
+.TP
+.B posix
+Allow two files with names that only differ in case.
+This option is obsolete.
+.TP
+.B nonumtail
+First try to make a short name without sequence number,
+before trying
+.IR name\s+3~\s0num.ext .
+.TP
+.B utf8
+UTF8 is the filesystem safe 8-bit encoding of Unicode that is used by the
+console. It can be enabled for the filesystem with this option or disabled
+with utf8=0, utf8=no or utf8=false. If `uni_xlate' gets set, UTF8 gets
+disabled.
+.TP
+.BI shortname= mode
+Defines the behavior for creation and display of filenames which fit into
+8.3 characters. If a long name for a file exists, it will always be the
+preferred one for display. There are four \fImode\fRs:
+.RS
+.TP
+.B lower
+Force the short name to lower case upon display; store a long name when
+the short name is not all upper case.
+.TP
+.B win95
+Force the short name to upper case upon display; store a long name when
+the short name is not all upper case.
+.TP
+.B winnt
+Display the short name as is; store a long name when the short name is
+not all lower case or all upper case.
+.TP
+.B mixed
+Display the short name as is; store a long name when the short name is not
+all upper case. This mode is the default since Linux 2.6.32.
+.RE
+
+.SS "Mount options for usbfs"
+.TP
+\fBdevuid=\fP\,\fIuid\fP and \fBdevgid=\fP\,\fIgid\fP and \fBdevmode=\fP\,\fImode\fP
+Set the owner and group and mode of the device files in the usbfs filesystem
+(default: uid=gid=0, mode=0644). The mode is given in octal.
+.TP
+\fBbusuid=\fP\,\fIuid\fP and \fBbusgid=\fP\,\fIgid\fP and \fBbusmode=\fP\,\fImode\fP
+Set the owner and group and mode of the bus directories in the usbfs
+filesystem (default: uid=gid=0, mode=0555). The mode is given in octal.
+.TP
+\fBlistuid=\fP\,\fIuid\fP and \fBlistgid=\fP\,\fIgid\fP and \fBlistmode=\fP\,\fImode\fP
+Set the owner and group and mode of the file
+.I devices
+(default: uid=gid=0, mode=0444). The mode is given in octal.
+
+.SH "THE LOOP DEVICE"
+One further possible type is a mount via the loop device. For example,
+the command
+.RS
+.sp
+.B "mount /tmp/disk.img /mnt \-t vfat \-o loop=/dev/loop3"
+.sp
+.RE
+will set up the loop device
+.I /dev/loop3
+to correspond to the file
+.IR /tmp/disk.img ,
+and then mount this device on
+.IR /mnt .
+
+If no explicit loop device is mentioned
+(but just an option `\fB\-o loop\fP' is given), then
+.B mount
+will try to find some unused loop device and use that, for example
+.RS
+.sp
+.B "mount /tmp/disk.img /mnt \-o loop"
+.sp
+.RE
+The mount command
+.B automatically
+creates a loop device from a regular file if a filesystem type is
+not specified or the filesystem is known for libblkid, for example:
+.RS
+.sp
+.B "mount /tmp/disk.img /mnt"
+.sp
+.B "mount \-t ext4 /tmp/disk.img /mnt"
+.sp
+.RE
+This type of mount knows about three options, namely
+.BR loop ", " offset " and " sizelimit ,
+that are really options to
+.BR \%losetup (8).
+(These options can be used in addition to those specific
+to the filesystem type.)
+
+Since Linux 2.6.25 auto-destruction of loop devices is supported,
+meaning that any loop device allocated by
+.B mount
+will be freed by
+.B umount
+independently of
+.IR /etc/mtab .
+
+You can also free a loop device by hand, using
+.BR "losetup \-d " or " umount \-d" .
+
+Since util-linux v2.29 mount command re-uses the loop device rather than
+initialize a new device if the same backing file is already used for some loop
+device with the same offset and sizelimit. This is necessary to avoid
+a filesystem corruption.
+
+.SH RETURN CODES
+.B mount
+has the following return codes (the bits can be ORed):
+.TP
+.B 0
+success
+.TP
+.B 1
+incorrect invocation or permissions
+.TP
+.B 2
+system error (out of memory, cannot fork, no more loop devices)
+.TP
+.B 4
+internal
+.B mount
+bug
+.TP
+.B 8
+user interrupt
+.TP
+.B 16
+problems writing or locking /etc/mtab
+.TP
+.B 32
+mount failure
+.TP
+.B 64
+some mount succeeded
+.RE
+
+The command \fBmount \-a\fR returns 0 (all succeeded), 32 (all failed), or 64 (some
+failed, some succeeded).
+
+.SH "EXTERNAL HELPERS"
+The syntax of external mount helpers is:
+.sp
+.in +4
+.BI /sbin/mount. suffix
+.I spec dir
+.RB [ \-sfnv ]
+.RB [ \-N
+.IR namespace ]
+.RB [ \-o
+.IR options ]
+.RB [ \-t
+.IR type \fB. subtype ]
+.in
+.sp
+where the \fIsuffix\fR is the filesystem type and the \fB\-sfnvoN\fR options have
+the same meaning as the normal mount options. The \fB\-t\fR option is used for
+filesystems with subtypes support (for example
+.BR "/sbin/mount.fuse \-t fuse.sshfs" ).
+
+The command \fBmount\fR does not pass the mount options
+.BR unbindable ,
+.BR runbindable ,
+.BR private ,
+.BR rprivate ,
+.BR slave ,
+.BR rslave ,
+.BR shared ,
+.BR rshared ,
+.BR auto ,
+.BR noauto ,
+.BR comment ,
+.BR x-* ,
+.BR loop ,
+.B offset
+and
+.B sizelimit
+to the mount.<suffix> helpers. All other options are used in a
+comma-separated list as argument to the \fB\-o\fR option.
+
+.SH FILES
+See also "\fBThe files /etc/fstab, /etc/mtab and /proc/mounts\fR" section above.
+.TP 18n
+.I /etc/fstab
+filesystem table
+.TP
+.I /run/mount
+libmount private runtime directory
+.TP
+.I /etc/mtab
+table of mounted filesystems or symlink to /proc/mounts
+.TP
+.I /etc/mtab\s+3~\s0
+lock file (unused on systems with mtab symlink)
+.TP
+.I /etc/mtab.tmp
+temporary file (unused on systems with mtab symlink)
+.TP
+.I /etc/filesystems
+a list of filesystem types to try
+.SH ENVIRONMENT
+.IP LIBMOUNT_FSTAB=<path>
+overrides the default location of the fstab file (ignored for suid)
+.IP LIBMOUNT_MTAB=<path>
+overrides the default location of the mtab file (ignored for suid)
+.IP LIBMOUNT_DEBUG=all
+enables libmount debug output
+.IP LIBBLKID_DEBUG=all
+enables libblkid debug output
+.IP LOOPDEV_DEBUG=all
+enables loop device setup debug output
+.SH "SEE ALSO"
+.na
+.BR mount (2),
+.BR umount (2),
+.BR umount (8),
+.BR fstab (5),
+.BR nfs (5),
+.BR xfs (5),
+.BR e2label (8),
+.BR findmnt (8),
+.BR losetup (8),
+.BR mke2fs (8),
+.BR mountd (8),
+.BR nfsd (8),
+.BR swapon (8),
+.BR tune2fs (8),
+.BR xfs_admin (8)
+.ad
+.SH BUGS
+It is possible for a corrupted filesystem to cause a crash.
+.PP
+Some Linux filesystems don't support
+.BR "\-o sync " nor " \-o dirsync"
+(the ext2, ext3, ext4, fat and vfat filesystems
+.I do
+support synchronous updates (a la BSD) when mounted with the
+.B sync
+option).
+.PP
+The
+.B "\-o remount"
+may not be able to change mount parameters (all
+.IR ext2fs -specific
+parameters, except
+.BR sb ,
+are changeable with a remount, for example, but you can't change
+.B gid
+or
+.B umask
+for the
+.IR fatfs ).
+.PP
+It is possible that the files
+.I /etc/mtab
+and
+.I /proc/mounts
+don't match on systems with a regular mtab file. The first file is based only on
+the mount command options, but the content of the second file also depends on
+the kernel and others settings (e.g.\& on a remote NFS server -- in certain cases
+the mount command may report unreliable information about an NFS mount point
+and the /proc/mounts file usually contains more reliable information.) This is
+another reason to replace the mtab file with a symlink to the
+.I /proc/mounts
+file.
+.PP
+Checking files on NFS filesystems referenced by file descriptors (i.e.\& the
+.B fcntl
+and
+.B ioctl
+families of functions) may lead to inconsistent results due to the lack of
+a consistency check in the kernel even if noac is used.
+.PP
+The
+.B loop
+option with the
+.B offset
+or
+.B sizelimit
+options used may fail when using older kernels if the
+.B mount
+command can't confirm that the size of the block device has been configured
+as requested. This situation can be worked around by using
+the
+.B losetup
+command manually before calling
+.B mount
+with the configured loop device.
+.SH HISTORY
+A
+.B mount
+command existed in Version 5 AT&T UNIX.
+.SH AUTHORS
+.nf
+Karel Zak <kzak@redhat.com>
+.fi
+.SH AVAILABILITY
+The mount command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/mount.c b/sys-utils/mount.c
new file mode 100644
index 0000000..5e139e8
--- /dev/null
+++ b/sys-utils/mount.c
@@ -0,0 +1,918 @@
+/*
+ * mount(8) -- mount a filesystem
+ *
+ * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
+ * Written by Karel Zak <kzak@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <stdarg.h>
+#include <libmount.h>
+#include <ctype.h>
+
+#include "nls.h"
+#include "c.h"
+#include "env.h"
+#include "strutils.h"
+#include "closestream.h"
+#include "canonicalize.h"
+
+#define XALLOC_EXIT_CODE MNT_EX_SYSERR
+#include "xalloc.h"
+
+#define OPTUTILS_EXIT_CODE MNT_EX_USAGE
+#include "optutils.h"
+
+/*** TODO: DOCS:
+ *
+ * --options-mode={ignore,append,prepend,replace} MNT_OMODE_{IGNORE, ...}
+ * --options-source={fstab,mtab,disable} MNT_OMODE_{FSTAB,MTAB,NOTAB}
+ * --options-source-force MNT_OMODE_FORCE
+ */
+
+static int mk_exit_code(struct libmnt_context *cxt, int rc);
+
+static void __attribute__((__noreturn__)) exit_non_root(const char *option)
+{
+ const uid_t ruid = getuid();
+ const uid_t euid = geteuid();
+
+ if (ruid == 0 && euid != 0) {
+ /* user is root, but setuid to non-root */
+ if (option)
+ errx(MNT_EX_USAGE, _("only root can use \"--%s\" option "
+ "(effective UID is %u)"),
+ option, euid);
+ errx(MNT_EX_USAGE, _("only root can do that "
+ "(effective UID is %u)"), euid);
+ }
+ if (option)
+ errx(MNT_EX_USAGE, _("only root can use \"--%s\" option"), option);
+ errx(MNT_EX_USAGE, _("only root can do that"));
+}
+
+static void __attribute__((__noreturn__)) print_version(void)
+{
+ const char *ver = NULL;
+ const char **features = NULL, **p;
+
+ mnt_get_library_version(&ver);
+ mnt_get_library_features(&features);
+
+ printf(_("%s from %s (libmount %s"),
+ program_invocation_short_name,
+ PACKAGE_STRING,
+ ver);
+ p = features;
+ while (p && *p) {
+ fputs(p == features ? ": " : ", ", stdout);
+ fputs(*p++, stdout);
+ }
+ fputs(")\n", stdout);
+ exit(MNT_EX_SUCCESS);
+}
+
+static int table_parser_errcb(struct libmnt_table *tb __attribute__((__unused__)),
+ const char *filename, int line)
+{
+ if (filename)
+ warnx(_("%s: parse error at line %d -- ignored"), filename, line);
+ return 1;
+}
+
+/*
+ * Replace control chars with '?' to be compatible with coreutils. For more
+ * robust solution use findmnt(1) where we use \x?? hex encoding.
+ */
+static void safe_fputs(const char *data)
+{
+ const char *p;
+
+ for (p = data; p && *p; p++) {
+ if (iscntrl((unsigned char) *p))
+ fputc('?', stdout);
+ else
+ fputc(*p, stdout);
+ }
+}
+
+static void print_all(struct libmnt_context *cxt, char *pattern, int show_label)
+{
+ struct libmnt_table *tb;
+ struct libmnt_iter *itr = NULL;
+ struct libmnt_fs *fs;
+ struct libmnt_cache *cache = NULL;
+
+ if (mnt_context_get_mtab(cxt, &tb))
+ err(MNT_EX_SYSERR, _("failed to read mtab"));
+
+ itr = mnt_new_iter(MNT_ITER_FORWARD);
+ if (!itr)
+ err(MNT_EX_SYSERR, _("failed to initialize libmount iterator"));
+ if (show_label)
+ cache = mnt_new_cache();
+
+ while (mnt_table_next_fs(tb, itr, &fs) == 0) {
+ const char *type = mnt_fs_get_fstype(fs);
+ const char *src = mnt_fs_get_source(fs);
+ const char *optstr = mnt_fs_get_options(fs);
+ char *xsrc = NULL;
+
+ if (type && pattern && !mnt_match_fstype(type, pattern))
+ continue;
+
+ if (!mnt_fs_is_pseudofs(fs))
+ xsrc = mnt_pretty_path(src, cache);
+ printf ("%s on ", xsrc ? xsrc : src);
+ safe_fputs(mnt_fs_get_target(fs));
+
+ if (type)
+ printf (" type %s", type);
+ if (optstr)
+ printf (" (%s)", optstr);
+ if (show_label && src) {
+ char *lb = mnt_cache_find_tag_value(cache, src, "LABEL");
+ if (lb)
+ printf (" [%s]", lb);
+ }
+ fputc('\n', stdout);
+ free(xsrc);
+ }
+
+ mnt_unref_cache(cache);
+ mnt_free_iter(itr);
+}
+
+/*
+ * mount -a [-F]
+ */
+static int mount_all(struct libmnt_context *cxt)
+{
+ struct libmnt_iter *itr;
+ struct libmnt_fs *fs;
+ int mntrc, ignored, rc = MNT_EX_SUCCESS;
+
+ int nsucc = 0, nerrs = 0;
+
+ itr = mnt_new_iter(MNT_ITER_FORWARD);
+ if (!itr) {
+ warn(_("failed to initialize libmount iterator"));
+ return MNT_EX_SYSERR;
+ }
+
+ while (mnt_context_next_mount(cxt, itr, &fs, &mntrc, &ignored) == 0) {
+
+ const char *tgt = mnt_fs_get_target(fs);
+
+ if (ignored) {
+ if (mnt_context_is_verbose(cxt))
+ printf(ignored == 1 ? _("%-25s: ignored\n") :
+ _("%-25s: already mounted\n"),
+ tgt);
+ } else if (mnt_context_is_fork(cxt)) {
+ if (mnt_context_is_verbose(cxt))
+ printf("%-25s: mount successfully forked\n", tgt);
+ } else {
+ if (mk_exit_code(cxt, mntrc) == MNT_EX_SUCCESS) {
+ nsucc++;
+
+ /* Note that MNT_EX_SUCCESS return code does
+ * not mean that FS has been really mounted
+ * (e.g. nofail option) */
+ if (mnt_context_get_status(cxt)
+ && mnt_context_is_verbose(cxt))
+ printf("%-25s: successfully mounted\n", tgt);
+ } else
+ nerrs++;
+ }
+ }
+
+ if (mnt_context_is_parent(cxt)) {
+ /* wait for mount --fork children */
+ int nchildren = 0;
+
+ nerrs = 0, nsucc = 0;
+
+ rc = mnt_context_wait_for_children(cxt, &nchildren, &nerrs);
+ if (!rc && nchildren)
+ nsucc = nchildren - nerrs;
+ }
+
+ if (nerrs == 0)
+ rc = MNT_EX_SUCCESS; /* all success */
+ else if (nsucc == 0)
+ rc = MNT_EX_FAIL; /* all failed */
+ else
+ rc = MNT_EX_SOMEOK; /* some success, some failed */
+
+ mnt_free_iter(itr);
+ return rc;
+}
+
+static void success_message(struct libmnt_context *cxt)
+{
+ unsigned long mflags = 0;
+ const char *tgt, *src, *pr = program_invocation_short_name;
+
+ if (mnt_context_helper_executed(cxt)
+ || mnt_context_get_status(cxt) != 1)
+ return;
+
+ mnt_context_get_mflags(cxt, &mflags);
+ tgt = mnt_context_get_target(cxt);
+ src = mnt_context_get_source(cxt);
+
+ if (mflags & MS_MOVE)
+ printf(_("%s: %s moved to %s.\n"), pr, src, tgt);
+ else if (mflags & MS_BIND)
+ printf(_("%s: %s bound on %s.\n"), pr, src, tgt);
+ else if (mflags & MS_PROPAGATION) {
+ if (src && strcmp(src, "none") != 0 && tgt)
+ printf(_("%s: %s mounted on %s.\n"), pr, src, tgt);
+
+ printf(_("%s: %s propagation flags changed.\n"), pr, tgt);
+ } else
+ printf(_("%s: %s mounted on %s.\n"), pr, src, tgt);
+}
+
+#if defined(HAVE_LIBSELINUX) && defined(HAVE_SECURITY_GET_INITIAL_CONTEXT)
+#include <selinux/selinux.h>
+#include <selinux/context.h>
+
+static void selinux_warning(struct libmnt_context *cxt, const char *tgt)
+{
+
+ if (tgt && mnt_context_is_verbose(cxt) && is_selinux_enabled() > 0) {
+ security_context_t raw = NULL, def = NULL;
+
+ if (getfilecon(tgt, &raw) > 0
+ && security_get_initial_context("file", &def) == 0) {
+
+ if (!selinux_file_context_cmp(raw, def))
+ printf(_(
+ "mount: %s does not contain SELinux labels.\n"
+ " You just mounted an file system that supports labels which does not\n"
+ " contain labels, onto an SELinux box. It is likely that confined\n"
+ " applications will generate AVC messages and not be allowed access to\n"
+ " this file system. For more details see restorecon(8) and mount(8).\n"),
+ tgt);
+ }
+ freecon(raw);
+ freecon(def);
+ }
+}
+#else
+# define selinux_warning(_x, _y)
+#endif
+
+/*
+ * Returns exit status (MNT_EX_*) and/or prints error message.
+ */
+static int mk_exit_code(struct libmnt_context *cxt, int rc)
+{
+ const char *tgt;
+ char buf[BUFSIZ] = { 0 };
+
+ rc = mnt_context_get_excode(cxt, rc, buf, sizeof(buf));
+ tgt = mnt_context_get_target(cxt);
+
+ if (*buf) {
+ const char *spec = tgt;
+ if (!spec)
+ spec = mnt_context_get_source(cxt);
+ if (!spec)
+ spec = "???";
+ warnx("%s: %s.", spec, buf);
+ }
+
+ if (rc == MNT_EX_SUCCESS && mnt_context_get_status(cxt) == 1) {
+ selinux_warning(cxt, tgt);
+ }
+ return rc;
+}
+
+static struct libmnt_table *append_fstab(struct libmnt_context *cxt,
+ struct libmnt_table *fstab,
+ const char *path)
+{
+
+ if (!fstab) {
+ fstab = mnt_new_table();
+ if (!fstab)
+ err(MNT_EX_SYSERR, _("failed to initialize libmount table"));
+
+ mnt_table_set_parser_errcb(fstab, table_parser_errcb);
+ mnt_context_set_fstab(cxt, fstab);
+
+ mnt_unref_table(fstab); /* reference is handled by @cxt now */
+ }
+
+ if (mnt_table_parse_fstab(fstab, path))
+ errx(MNT_EX_USAGE,_("%s: failed to parse"), path);
+
+ return fstab;
+}
+
+/*
+ * Check source and target paths -- non-root user should not be able to
+ * resolve paths which are unreadable for him.
+ */
+static void sanitize_paths(struct libmnt_context *cxt)
+{
+ const char *p;
+ struct libmnt_fs *fs = mnt_context_get_fs(cxt);
+
+ if (!fs)
+ return;
+
+ p = mnt_fs_get_target(fs);
+ if (p) {
+ char *np = canonicalize_path_restricted(p);
+ if (!np)
+ err(MNT_EX_USAGE, "%s", p);
+ mnt_fs_set_target(fs, np);
+ free(np);
+ }
+
+ p = mnt_fs_get_srcpath(fs);
+ if (p) {
+ char *np = canonicalize_path_restricted(p);
+ if (!np)
+ err(MNT_EX_USAGE, "%s", p);
+ mnt_fs_set_source(fs, np);
+ free(np);
+ }
+}
+
+static void append_option(struct libmnt_context *cxt, const char *opt)
+{
+ if (opt && (*opt == '=' || *opt == '\'' || *opt == '\"' || isblank(*opt)))
+ errx(MNT_EX_USAGE, _("unsupported option format: %s"), opt);
+ if (mnt_context_append_options(cxt, opt))
+ err(MNT_EX_SYSERR, _("failed to append option '%s'"), opt);
+}
+
+static int has_remount_flag(struct libmnt_context *cxt)
+{
+ unsigned long mflags = 0;
+
+ if (mnt_context_get_mflags(cxt, &mflags))
+ return 0;
+
+ return mflags & MS_REMOUNT;
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ fputs(USAGE_HEADER, out);
+ fprintf(out, _(
+ " %1$s [-lhV]\n"
+ " %1$s -a [options]\n"
+ " %1$s [options] [--source] <source> | [--target] <directory>\n"
+ " %1$s [options] <source> <directory>\n"
+ " %1$s <operation> <mountpoint> [<target>]\n"),
+ program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Mount a filesystem.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fprintf(out, _(
+ " -a, --all mount all filesystems mentioned in fstab\n"
+ " -c, --no-canonicalize don't canonicalize paths\n"
+ " -f, --fake dry run; skip the mount(2) syscall\n"
+ " -F, --fork fork off for each device (use with -a)\n"
+ " -T, --fstab <path> alternative file to /etc/fstab\n"));
+ fprintf(out, _(
+ " -i, --internal-only don't call the mount.<type> helpers\n"));
+ fprintf(out, _(
+ " -l, --show-labels show also filesystem labels\n"));
+ fprintf(out, _(
+ " -n, --no-mtab don't write to /etc/mtab\n"));
+ fprintf(out, _(
+ " --options-mode <mode>\n"
+ " what to do with options loaded from fstab\n"
+ " --options-source <source>\n"
+ " mount options source\n"
+ " --options-source-force\n"
+ " force use of options from fstab/mtab\n"));
+ fprintf(out, _(
+ " -o, --options <list> comma-separated list of mount options\n"
+ " -O, --test-opts <list> limit the set of filesystems (use with -a)\n"
+ " -r, --read-only mount the filesystem read-only (same as -o ro)\n"
+ " -t, --types <list> limit the set of filesystem types\n"));
+ fprintf(out, _(
+ " --source <src> explicitly specifies source (path, label, uuid)\n"
+ " --target <target> explicitly specifies mountpoint\n"));
+ fprintf(out, _(
+ " -v, --verbose say what is being done\n"));
+ fprintf(out, _(
+ " -w, --rw, --read-write mount the filesystem read-write (default)\n"));
+ fprintf(out, _(
+ " -N, --namespace <ns> perform mount in another namespace\n"));
+
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(25));
+
+ fprintf(out, _(
+ "\nSource:\n"
+ " -L, --label <label> synonym for LABEL=<label>\n"
+ " -U, --uuid <uuid> synonym for UUID=<uuid>\n"
+ " LABEL=<label> specifies device by filesystem label\n"
+ " UUID=<uuid> specifies device by filesystem UUID\n"
+ " PARTLABEL=<label> specifies device by partition label\n"
+ " PARTUUID=<uuid> specifies device by partition UUID\n"));
+
+ fprintf(out, _(
+ " <device> specifies device by path\n"
+ " <directory> mountpoint for bind mounts (see --bind/rbind)\n"
+ " <file> regular file for loopdev setup\n"));
+
+ fprintf(out, _(
+ "\nOperations:\n"
+ " -B, --bind mount a subtree somewhere else (same as -o bind)\n"
+ " -M, --move move a subtree to some other place\n"
+ " -R, --rbind mount a subtree and all submounts somewhere else\n"));
+ fprintf(out, _(
+ " --make-shared mark a subtree as shared\n"
+ " --make-slave mark a subtree as slave\n"
+ " --make-private mark a subtree as private\n"
+ " --make-unbindable mark a subtree as unbindable\n"));
+ fprintf(out, _(
+ " --make-rshared recursively mark a whole subtree as shared\n"
+ " --make-rslave recursively mark a whole subtree as slave\n"
+ " --make-rprivate recursively mark a whole subtree as private\n"
+ " --make-runbindable recursively mark a whole subtree as unbindable\n"));
+
+ printf(USAGE_MAN_TAIL("mount(8)"));
+
+ exit(MNT_EX_SUCCESS);
+}
+
+struct flag_str {
+ int value;
+ char *str;
+};
+
+static int omode2mask(const char *str)
+{
+ size_t i;
+
+ static const struct flag_str flags[] = {
+ { MNT_OMODE_IGNORE, "ignore" },
+ { MNT_OMODE_APPEND, "append" },
+ { MNT_OMODE_PREPEND, "prepend" },
+ { MNT_OMODE_REPLACE, "replace" },
+ };
+
+ for (i = 0; i < ARRAY_SIZE(flags); i++) {
+ if (!strcmp(str, flags[i].str))
+ return flags[i].value;
+ }
+ return -EINVAL;
+}
+
+static long osrc2mask(const char *str, size_t len)
+{
+ size_t i;
+
+ static const struct flag_str flags[] = {
+ { MNT_OMODE_FSTAB, "fstab" },
+ { MNT_OMODE_MTAB, "mtab" },
+ { MNT_OMODE_NOTAB, "disable" },
+ };
+
+ for (i = 0; i < ARRAY_SIZE(flags); i++) {
+ if (!strncmp(str, flags[i].str, len) && !flags[i].str[len])
+ return flags[i].value;
+ }
+ return -EINVAL;
+}
+
+static pid_t parse_pid(const char *str)
+{
+ char *end;
+ pid_t ret;
+
+ errno = 0;
+ ret = strtoul(str, &end, 10);
+
+ if (ret < 0 || errno || end == str || (end && *end))
+ return 0;
+ return ret;
+}
+
+int main(int argc, char **argv)
+{
+ int c, rc = MNT_EX_SUCCESS, all = 0, show_labels = 0;
+ struct libmnt_context *cxt;
+ struct libmnt_table *fstab = NULL;
+ char *srcbuf = NULL;
+ char *types = NULL;
+ int oper = 0, is_move = 0;
+ int propa = 0;
+ int optmode = 0, optmode_mode = 0, optmode_src = 0;
+
+ enum {
+ MOUNT_OPT_SHARED = CHAR_MAX + 1,
+ MOUNT_OPT_SLAVE,
+ MOUNT_OPT_PRIVATE,
+ MOUNT_OPT_UNBINDABLE,
+ MOUNT_OPT_RSHARED,
+ MOUNT_OPT_RSLAVE,
+ MOUNT_OPT_RPRIVATE,
+ MOUNT_OPT_RUNBINDABLE,
+ MOUNT_OPT_TARGET,
+ MOUNT_OPT_SOURCE,
+ MOUNT_OPT_OPTMODE,
+ MOUNT_OPT_OPTSRC,
+ MOUNT_OPT_OPTSRC_FORCE
+ };
+
+ static const struct option longopts[] = {
+ { "all", no_argument, NULL, 'a' },
+ { "fake", no_argument, NULL, 'f' },
+ { "fstab", required_argument, NULL, 'T' },
+ { "fork", no_argument, NULL, 'F' },
+ { "help", no_argument, NULL, 'h' },
+ { "no-mtab", no_argument, NULL, 'n' },
+ { "read-only", no_argument, NULL, 'r' },
+ { "ro", no_argument, NULL, 'r' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "version", no_argument, NULL, 'V' },
+ { "read-write", no_argument, NULL, 'w' },
+ { "rw", no_argument, NULL, 'w' },
+ { "options", required_argument, NULL, 'o' },
+ { "test-opts", required_argument, NULL, 'O' },
+ { "types", required_argument, NULL, 't' },
+ { "uuid", required_argument, NULL, 'U' },
+ { "label", required_argument, NULL, 'L' },
+ { "bind", no_argument, NULL, 'B' },
+ { "move", no_argument, NULL, 'M' },
+ { "rbind", no_argument, NULL, 'R' },
+ { "make-shared", no_argument, NULL, MOUNT_OPT_SHARED },
+ { "make-slave", no_argument, NULL, MOUNT_OPT_SLAVE },
+ { "make-private", no_argument, NULL, MOUNT_OPT_PRIVATE },
+ { "make-unbindable", no_argument, NULL, MOUNT_OPT_UNBINDABLE },
+ { "make-rshared", no_argument, NULL, MOUNT_OPT_RSHARED },
+ { "make-rslave", no_argument, NULL, MOUNT_OPT_RSLAVE },
+ { "make-rprivate", no_argument, NULL, MOUNT_OPT_RPRIVATE },
+ { "make-runbindable", no_argument, NULL, MOUNT_OPT_RUNBINDABLE },
+ { "no-canonicalize", no_argument, NULL, 'c' },
+ { "internal-only", no_argument, NULL, 'i' },
+ { "show-labels", no_argument, NULL, 'l' },
+ { "target", required_argument, NULL, MOUNT_OPT_TARGET },
+ { "source", required_argument, NULL, MOUNT_OPT_SOURCE },
+ { "options-mode", required_argument, NULL, MOUNT_OPT_OPTMODE },
+ { "options-source", required_argument, NULL, MOUNT_OPT_OPTSRC },
+ { "options-source-force", no_argument, NULL, MOUNT_OPT_OPTSRC_FORCE},
+ { "namespace", required_argument, NULL, 'N' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ static const ul_excl_t excl[] = { /* rows and cols in ASCII order */
+ { 'B','M','R' }, /* bind,move,rbind */
+ { 'L','U', MOUNT_OPT_SOURCE }, /* label,uuid,source */
+ { 0 }
+ };
+ int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+ sanitize_env();
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ strutils_set_exitcode(MNT_EX_USAGE);
+
+ mnt_init_debug(0);
+ cxt = mnt_new_context();
+ if (!cxt)
+ err(MNT_EX_SYSERR, _("libmount context allocation failed"));
+
+ mnt_context_set_tables_errcb(cxt, table_parser_errcb);
+
+ while ((c = getopt_long(argc, argv, "aBcfFhilL:Mno:O:rRsU:vVwt:T:N:",
+ longopts, NULL)) != -1) {
+
+ /* only few options are allowed for non-root users */
+ if (mnt_context_is_restricted(cxt) &&
+ !strchr("hlLUVvrist", c) &&
+ c != MOUNT_OPT_TARGET &&
+ c != MOUNT_OPT_SOURCE)
+ exit_non_root(option_to_longopt(c, longopts));
+
+ err_exclusive_options(c, longopts, excl, excl_st);
+
+ switch(c) {
+ case 'a':
+ all = 1;
+ break;
+ case 'c':
+ mnt_context_disable_canonicalize(cxt, TRUE);
+ break;
+ case 'f':
+ mnt_context_enable_fake(cxt, TRUE);
+ break;
+ case 'F':
+ mnt_context_enable_fork(cxt, TRUE);
+ break;
+ case 'h':
+ usage();
+ break;
+ case 'i':
+ mnt_context_disable_helpers(cxt, TRUE);
+ break;
+ case 'n':
+ mnt_context_disable_mtab(cxt, TRUE);
+ break;
+ case 'r':
+ append_option(cxt, "ro");
+ mnt_context_enable_rwonly_mount(cxt, FALSE);
+ break;
+ case 'v':
+ mnt_context_enable_verbose(cxt, TRUE);
+ break;
+ case 'V':
+ print_version();
+ break;
+ case 'w':
+ append_option(cxt, "rw");
+ mnt_context_enable_rwonly_mount(cxt, TRUE);
+ break;
+ case 'o':
+ append_option(cxt, optarg);
+ break;
+ case 'O':
+ if (mnt_context_set_options_pattern(cxt, optarg))
+ err(MNT_EX_SYSERR, _("failed to set options pattern"));
+ break;
+ case 'L':
+ xasprintf(&srcbuf, "LABEL=\"%s\"", optarg);
+ mnt_context_disable_swapmatch(cxt, 1);
+ mnt_context_set_source(cxt, srcbuf);
+ free(srcbuf);
+ break;
+ case 'U':
+ xasprintf(&srcbuf, "UUID=\"%s\"", optarg);
+ mnt_context_disable_swapmatch(cxt, 1);
+ mnt_context_set_source(cxt, srcbuf);
+ free(srcbuf);
+ break;
+ case 'l':
+ show_labels = 1;
+ break;
+ case 't':
+ types = optarg;
+ break;
+ case 'T':
+ fstab = append_fstab(cxt, fstab, optarg);
+ break;
+ case 's':
+ mnt_context_enable_sloppy(cxt, TRUE);
+ break;
+ case 'B':
+ oper = 1;
+ append_option(cxt, "bind");
+ break;
+ case 'M':
+ oper = 1;
+ is_move = 1;
+ break;
+ case 'R':
+ oper = 1;
+ append_option(cxt, "rbind");
+ break;
+ case 'N':
+ {
+ char path[PATH_MAX];
+ pid_t pid = parse_pid(optarg);
+
+ if (pid)
+ snprintf(path, sizeof(path), "/proc/%i/ns/mnt", pid);
+
+ if (mnt_context_set_target_ns(cxt, pid ? path : optarg))
+ err(MNT_EX_SYSERR, _("failed to set target namespace to %s"), pid ? path : optarg);
+ break;
+ }
+ case MOUNT_OPT_SHARED:
+ append_option(cxt, "shared");
+ propa = 1;
+ break;
+ case MOUNT_OPT_SLAVE:
+ append_option(cxt, "slave");
+ propa = 1;
+ break;
+ case MOUNT_OPT_PRIVATE:
+ append_option(cxt, "private");
+ propa = 1;
+ break;
+ case MOUNT_OPT_UNBINDABLE:
+ append_option(cxt, "unbindable");
+ propa = 1;
+ break;
+ case MOUNT_OPT_RSHARED:
+ append_option(cxt, "rshared");
+ propa = 1;
+ break;
+ case MOUNT_OPT_RSLAVE:
+ append_option(cxt, "rslave");
+ propa = 1;
+ break;
+ case MOUNT_OPT_RPRIVATE:
+ append_option(cxt, "rprivate");
+ propa = 1;
+ break;
+ case MOUNT_OPT_RUNBINDABLE:
+ append_option(cxt, "runbindable");
+ propa = 1;
+ break;
+ case MOUNT_OPT_TARGET:
+ mnt_context_disable_swapmatch(cxt, 1);
+ mnt_context_set_target(cxt, optarg);
+ break;
+ case MOUNT_OPT_SOURCE:
+ mnt_context_disable_swapmatch(cxt, 1);
+ mnt_context_set_source(cxt, optarg);
+ break;
+ case MOUNT_OPT_OPTMODE:
+ optmode_mode = omode2mask(optarg);
+ if (optmode_mode == -EINVAL) {
+ warnx(_("bad usage"));
+ errtryhelp(MNT_EX_USAGE);
+ }
+ break;
+ case MOUNT_OPT_OPTSRC:
+ {
+ unsigned long tmp = 0;
+ if (string_to_bitmask(optarg, &tmp, osrc2mask)) {
+ warnx(_("bad usage"));
+ errtryhelp(MNT_EX_USAGE);
+ }
+ optmode_src = tmp;
+ break;
+ }
+ case MOUNT_OPT_OPTSRC_FORCE:
+ optmode |= MNT_OMODE_FORCE;
+ break;
+ default:
+ errtryhelp(MNT_EX_USAGE);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ optmode |= optmode_mode | optmode_src;
+ if (optmode) {
+ if (!optmode_mode)
+ optmode |= MNT_OMODE_PREPEND;
+ if (!optmode_src)
+ optmode |= MNT_OMODE_FSTAB | MNT_OMODE_MTAB;
+ mnt_context_set_optsmode(cxt, optmode);
+ }
+
+ if (fstab && !mnt_context_is_nocanonicalize(cxt)) {
+ /*
+ * We have external (context independent) fstab instance, let's
+ * make a connection between the fstab and the canonicalization
+ * cache.
+ */
+ mnt_table_set_cache(fstab, mnt_context_get_cache(cxt));
+ }
+
+ if (!mnt_context_get_source(cxt) &&
+ !mnt_context_get_target(cxt) &&
+ !argc &&
+ !all) {
+ if (oper || mnt_context_get_options(cxt)) {
+ warnx(_("bad usage"));
+ errtryhelp(MNT_EX_USAGE);
+ }
+ print_all(cxt, types, show_labels);
+ goto done;
+ }
+
+ /* Non-root users are allowed to use -t to print_all(),
+ but not to mount */
+ if (mnt_context_is_restricted(cxt) && types)
+ exit_non_root("types");
+
+ if (oper && (types || all || mnt_context_get_source(cxt))) {
+ warnx(_("bad usage"));
+ errtryhelp(MNT_EX_USAGE);
+ }
+
+ if (types && (all || strchr(types, ',') ||
+ strncmp(types, "no", 2) == 0))
+ mnt_context_set_fstype_pattern(cxt, types);
+ else if (types)
+ mnt_context_set_fstype(cxt, types);
+
+ if (all) {
+ /*
+ * A) Mount all
+ */
+ rc = mount_all(cxt);
+ goto done;
+
+ } else if (argc == 0 && (mnt_context_get_source(cxt) ||
+ mnt_context_get_target(cxt))) {
+ /*
+ * B) mount -L|-U|--source|--target
+ *
+ * non-root may specify source *or* target, but not both
+ */
+ if (mnt_context_is_restricted(cxt) &&
+ mnt_context_get_source(cxt) &&
+ mnt_context_get_target(cxt))
+ exit_non_root(NULL);
+
+ } else if (argc == 1 && (!mnt_context_get_source(cxt) ||
+ !mnt_context_get_target(cxt))) {
+ /*
+ * C) mount [-L|-U|--source] <target>
+ * mount [--target <dir>] <source>
+ * mount <source|target>
+ *
+ * non-root may specify source *or* target, but not both
+ *
+ * It does not matter for libmount if we set source or target
+ * here (the library is able to swap it), but it matters for
+ * sanitize_paths().
+ */
+ int istag = mnt_tag_is_valid(argv[0]);
+
+ if (istag && mnt_context_get_source(cxt))
+ /* -L, -U or --source together with LABEL= or UUID= */
+ errx(MNT_EX_USAGE, _("source specified more than once"));
+ else if (istag || mnt_context_get_target(cxt))
+ mnt_context_set_source(cxt, argv[0]);
+ else
+ mnt_context_set_target(cxt, argv[0]);
+
+ if (mnt_context_is_restricted(cxt) &&
+ mnt_context_get_source(cxt) &&
+ mnt_context_get_target(cxt))
+ exit_non_root(NULL);
+
+ } else if (argc == 2 && !mnt_context_get_source(cxt)
+ && !mnt_context_get_target(cxt)) {
+ /*
+ * D) mount <source> <target>
+ */
+ if (mnt_context_is_restricted(cxt))
+ exit_non_root(NULL);
+
+ mnt_context_set_source(cxt, argv[0]);
+ mnt_context_set_target(cxt, argv[1]);
+
+ } else {
+ warnx(_("bad usage"));
+ errtryhelp(MNT_EX_USAGE);
+ }
+
+ if (mnt_context_is_restricted(cxt))
+ sanitize_paths(cxt);
+
+ if (is_move)
+ /* "move" as option string is not supported by libmount */
+ mnt_context_set_mflags(cxt, MS_MOVE);
+
+ if ((oper && !has_remount_flag(cxt)) || propa)
+ /* For --make-* or --bind is fstab/mtab unnecessary */
+ mnt_context_set_optsmode(cxt, MNT_OMODE_NOTAB);
+
+ rc = mnt_context_mount(cxt);
+ rc = mk_exit_code(cxt, rc);
+
+ if (rc == MNT_EX_SUCCESS && mnt_context_is_verbose(cxt))
+ success_message(cxt);
+done:
+ mnt_free_context(cxt);
+ return rc;
+}
+
diff --git a/sys-utils/mountpoint.1 b/sys-utils/mountpoint.1
new file mode 100644
index 0000000..afc469e
--- /dev/null
+++ b/sys-utils/mountpoint.1
@@ -0,0 +1,58 @@
+.TH MOUNTPOINT 1 "July 2014" "util-linux" "User Commands"
+.SH NAME
+mountpoint \- see if a directory or file is a mountpoint
+.SH SYNOPSIS
+.B mountpoint
+.RB [ \-d | \-q ]
+.I directory
+|
+.I file
+.sp
+.B mountpoint
+.B \-x
+.I device
+
+.SH DESCRIPTION
+.B mountpoint
+checks whether the given
+.I directory
+or
+.I file
+is mentioned in the /proc/self/mountinfo file.
+.SH OPTIONS
+.TP
+.BR \-d , " \-\-fs\-devno"
+Show the major/minor numbers of the device that is mounted on the given
+directory.
+.TP
+.BR \-q , " \-\-quiet"
+Be quiet - don't print anything.
+.TP
+.BR \-x , " \-\-devno"
+Show the major/minor numbers of the given blockdevice on standard output.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH EXIT STATUS
+Zero if the directory or file is a mountpoint, non-zero if not.
+.SH AUTHOR
+.PP
+Karel Zak <kzak@redhat.com>
+.SH ENVIRONMENT
+.IP LIBMOUNT_DEBUG=all
+enables libmount debug output.
+.SH NOTES
+.PP
+The util-linux
+.B mountpoint
+implementation was written from scratch for libmount. The original version
+for sysvinit suite was written by Miquel van Smoorenburg.
+
+.SH SEE ALSO
+.BR mount (8)
+.SH AVAILABILITY
+The mountpoint command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/mountpoint.c b/sys-utils/mountpoint.c
new file mode 100644
index 0000000..00a74da
--- /dev/null
+++ b/sys-utils/mountpoint.c
@@ -0,0 +1,203 @@
+/*
+ * mountpoint(1) - see if a directory is a mountpoint
+ *
+ * This is libmount based reimplementation of the mountpoint(1)
+ * from sysvinit project.
+ *
+ *
+ * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
+ * Written by Karel Zak <kzak@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include <libmount.h>
+
+#include "nls.h"
+#include "xalloc.h"
+#include "c.h"
+#include "closestream.h"
+#include "pathnames.h"
+
+struct mountpoint_control {
+ char *path;
+ dev_t dev;
+ struct stat st;
+ unsigned int
+ dev_devno:1,
+ fs_devno:1,
+ quiet:1;
+};
+
+static int dir_to_device(struct mountpoint_control *ctl)
+{
+ struct libmnt_table *tb = mnt_new_table_from_file(_PATH_PROC_MOUNTINFO);
+ struct libmnt_fs *fs;
+ struct libmnt_cache *cache;
+ int rc = -1;
+
+ if (!tb) {
+ /*
+ * Fallback. Traditional way to detect mountpoints. This way
+ * is independent on /proc, but not able to detect bind mounts.
+ */
+ struct stat pst;
+ char buf[PATH_MAX], *cn;
+ int len;
+
+ cn = mnt_resolve_path(ctl->path, NULL); /* canonicalize */
+
+ len = snprintf(buf, sizeof(buf), "%s/..", cn ? cn : ctl->path);
+ free(cn);
+
+ if (len < 0 || (size_t) len >= sizeof(buf))
+ return -1;
+ if (stat(buf, &pst) !=0)
+ return -1;
+
+ if (ctl->st.st_dev != pst.st_dev || ctl->st.st_ino == pst.st_ino) {
+ ctl->dev = ctl->st.st_dev;
+ return 0;
+ }
+
+ return -1;
+ }
+
+ /* to canonicalize all necessary paths */
+ cache = mnt_new_cache();
+ mnt_table_set_cache(tb, cache);
+ mnt_unref_cache(cache);
+
+ fs = mnt_table_find_target(tb, ctl->path, MNT_ITER_BACKWARD);
+ if (fs && mnt_fs_get_target(fs)) {
+ ctl->dev = mnt_fs_get_devno(fs);
+ rc = 0;
+ }
+
+ mnt_unref_table(tb);
+ return rc;
+}
+
+static int print_devno(const struct mountpoint_control *ctl)
+{
+ if (!S_ISBLK(ctl->st.st_mode)) {
+ if (!ctl->quiet)
+ warnx(_("%s: not a block device"), ctl->path);
+ return -1;
+ }
+ printf("%u:%u\n", major(ctl->st.st_rdev), minor(ctl->st.st_rdev));
+ return 0;
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ fputs(USAGE_HEADER, out);
+ fprintf(out,
+ _(" %1$s [-qd] /path/to/directory\n"
+ " %1$s -x /dev/device\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Check whether a directory or file is a mountpoint.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -q, --quiet quiet mode - don't print anything\n"
+ " -d, --fs-devno print maj:min device number of the filesystem\n"
+ " -x, --devno print maj:min device number of the block device\n"), out);
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(20));
+ printf(USAGE_MAN_TAIL("mountpoint(1)"));
+
+ exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+ int c;
+ struct mountpoint_control ctl = { NULL };
+
+ static const struct option longopts[] = {
+ { "quiet", no_argument, NULL, 'q' },
+ { "fs-devno", no_argument, NULL, 'd' },
+ { "devno", no_argument, NULL, 'x' },
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, 'V' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ mnt_init_debug(0);
+
+ while ((c = getopt_long(argc, argv, "qdxhV", longopts, NULL)) != -1) {
+
+ switch(c) {
+ case 'q':
+ ctl.quiet = 1;
+ break;
+ case 'd':
+ ctl.fs_devno = 1;
+ break;
+ case 'x':
+ ctl.dev_devno = 1;
+ break;
+ case 'h':
+ usage();
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if (optind + 1 != argc) {
+ warnx(_("bad usage"));
+ errtryhelp(EXIT_FAILURE);
+ }
+
+ ctl.path = argv[optind];
+
+ if (stat(ctl.path, &ctl.st)) {
+ if (!ctl.quiet)
+ err(EXIT_FAILURE, "%s", ctl.path);
+ return EXIT_FAILURE;
+ }
+ if (ctl.dev_devno)
+ return print_devno(&ctl) ? EXIT_FAILURE : EXIT_SUCCESS;
+ if (dir_to_device(&ctl)) {
+ if (!ctl.quiet)
+ printf(_("%s is not a mountpoint\n"), ctl.path);
+ return EXIT_FAILURE;
+ }
+ if (ctl.fs_devno)
+ printf("%u:%u\n", major(ctl.dev), minor(ctl.dev));
+ else if (!ctl.quiet)
+ printf(_("%s is a mountpoint\n"), ctl.path);
+ return EXIT_SUCCESS;
+}
diff --git a/sys-utils/nsenter.1 b/sys-utils/nsenter.1
new file mode 100644
index 0000000..aacae53
--- /dev/null
+++ b/sys-utils/nsenter.1
@@ -0,0 +1,269 @@
+.TH NSENTER 1 "June 2013" "util-linux" "User Commands"
+.SH NAME
+nsenter \- run program with namespaces of other processes
+.SH SYNOPSIS
+.B nsenter
+[options]
+.RI [ program
+.RI [ arguments ]]
+.SH DESCRIPTION
+Enters the namespaces of one or more other processes and then executes the specified
+\fIprogram\fP. If \fIprogram\fP is not given, then ``${SHELL}'' is run (default: /bin\:/sh).
+.PP
+Enterable namespaces are:
+.TP
+.B mount namespace
+Mounting and unmounting filesystems will not affect the rest of the system,
+except for filesystems which are explicitly marked as shared (with
+\fBmount --make-\:shared\fP; see \fI/proc\:/self\:/mountinfo\fP for the
+\fBshared\fP flag).
+For further details, see
+.BR mount_namespaces (7)
+and the discussion of the
+.B CLONE_NEWNS
+flag in
+.BR clone (2).
+.TP
+.B UTS namespace
+Setting hostname or domainname will not affect the rest of the system.
+For further details, see
+.BR namespaces (7)
+and the discussion of the
+.B CLONE_NEWUTS
+flag in
+.BR clone (2).
+.TP
+.B IPC namespace
+The process will have an independent namespace for POSIX message queues
+as well as System V message queues,
+semaphore sets and shared memory segments.
+For further details, see
+.BR namespaces (7)
+and the discussion of the
+.B CLONE_NEWIPC
+flag in
+.BR clone (2).
+.TP
+.B network namespace
+The process will have independent IPv4 and IPv6 stacks, IP routing tables,
+firewall rules, the
+.I /proc\:/net
+and
+.I /sys\:/class\:/net
+directory trees, sockets, etc.
+For further details, see
+.BR namespaces (7)
+and the discussion of the
+.B CLONE_NEWNET
+flag in
+.BR clone (2).
+.TP
+.B PID namespace
+Children will have a set of PID to process mappings separate from the
+.B nsenter
+process
+For further details, see
+.BR pid_namespaces (7)
+and
+the discussion of the
+.B CLONE_NEWPID
+flag in
+.B nsenter
+will fork by default if changing the PID namespace, so that the new program
+and its children share the same PID namespace and are visible to each other.
+If \fB\-\-no\-fork\fP is used, the new program will be exec'ed without forking.
+.TP
+.B user namespace
+The process will have a distinct set of UIDs, GIDs and capabilities.
+For further details, see
+.BR user_namespaces (7)
+and the discussion of the
+.B CLONE_NEWUSER
+flag in
+.BR clone (2).
+.TP
+.B cgroup namespace
+The process will have a virtualized view of \fI/proc\:/self\:/cgroup\fP, and new
+cgroup mounts will be rooted at the namespace cgroup root.
+For further details, see
+.BR cgroup_namespaces (7)
+and the discussion of the
+.B CLONE_NEWCGROUP
+flag in
+.BR clone (2).
+.TP
+See \fBclone\fP(2) for the exact semantics of the flags.
+.SH OPTIONS
+Various of the options below that relate to namespaces take an optional
+.I file
+argument.
+This should be one of the
+.I /proc/[pid]/ns/*
+files described in
+.BR namespaces (7).
+.TP
+\fB\-a\fR, \fB\-\-all\fR
+Enter all namespaces of the target process by the default
+.I /proc/[pid]/ns/*
+namespace paths. The default paths to the target process namespaces may be
+overwritten by namespace specific options (e.g. --all --mount=[path]).
+
+The user namespace will be ignored if the same as the caller's current user
+namespace. It prevents a caller that has dropped capabilities from regaining
+those capabilities via a call to setns(). See
+.BR setns (2)
+for more details.
+.TP
+\fB\-t\fR, \fB\-\-target\fR \fIpid\fP
+Specify a target process to get contexts from. The paths to the contexts
+specified by
+.I pid
+are:
+.RS
+.PD 0
+.IP "" 20
+.TP
+/proc/\fIpid\fR/ns/mnt
+the mount namespace
+.TP
+/proc/\fIpid\fR/ns/uts
+the UTS namespace
+.TP
+/proc/\fIpid\fR/ns/ipc
+the IPC namespace
+.TP
+/proc/\fIpid\fR/ns/net
+the network namespace
+.TP
+/proc/\fIpid\fR/ns/pid
+the PID namespace
+.TP
+/proc/\fIpid\fR/ns/user
+the user namespace
+.TP
+/proc/\fIpid\fR/ns/cgroup
+the cgroup namespace
+.TP
+/proc/\fIpid\fR/root
+the root directory
+.TP
+/proc/\fIpid\fR/cwd
+the working directory respectively
+.PD
+.RE
+.TP
+\fB\-m\fR, \fB\-\-mount\fR[=\fIfile\fR]
+Enter the mount namespace. If no file is specified, enter the mount namespace
+of the target process.
+If
+.I file
+is specified, enter the mount namespace
+specified by
+.IR file .
+.TP
+\fB\-u\fR, \fB\-\-uts\fR[=\fIfile\fR]
+Enter the UTS namespace. If no file is specified, enter the UTS namespace of
+the target process.
+If
+.I file
+is specified, enter the UTS namespace specified by
+.IR file .
+.TP
+\fB\-i\fR, \fB\-\-ipc\fR[=\fIfile\fR]
+Enter the IPC namespace. If no file is specified, enter the IPC namespace of
+the target process.
+If
+.I file
+is specified, enter the IPC namespace specified by
+.IR file .
+.TP
+\fB\-n\fR, \fB\-\-net\fR[=\fIfile\fR]
+Enter the network namespace. If no file is specified, enter the network
+namespace of the target process.
+If
+.I file
+is specified, enter the network namespace specified by
+.IR file .
+.TP
+\fB\-p\fR, \fB\-\-pid\fR[=\fIfile\fR]
+Enter the PID namespace. If no file is specified, enter the PID namespace of
+the target process.
+If
+.I file
+is specified, enter the PID namespace specified by
+.IR file .
+.TP
+\fB\-U\fR, \fB\-\-user\fR[=\fIfile\fR]
+Enter the user namespace. If no file is specified, enter the user namespace of
+the target process.
+If
+.I file
+is specified, enter the user namespace specified by
+.IR file .
+See also the \fB\-\-setuid\fR and \fB\-\-setgid\fR options.
+.TP
+\fB\-C\fR, \fB\-\-cgroup\fR[=\fIfile\fR]
+Enter the cgroup namespace. If no file is specified, enter the cgroup namespace of
+the target process.
+If
+.I file
+is specified, enter the cgroup namespace specified by
+.IR file .
+.TP
+\fB\-G\fR, \fB\-\-setgid\fR \fIgid\fR
+Set the group ID which will be used in the entered namespace and drop
+supplementary groups.
+.BR nsenter (1)
+always sets GID for user namespaces, the default is 0.
+.TP
+\fB\-S\fR, \fB\-\-setuid\fR \fIuid\fR
+Set the user ID which will be used in the entered namespace.
+.BR nsenter (1)
+always sets UID for user namespaces, the default is 0.
+.TP
+\fB\-\-preserve\-credentials\fR
+Don't modify UID and GID when enter user namespace. The default is to
+drops supplementary groups and sets GID and UID to 0.
+.TP
+\fB\-r\fR, \fB\-\-root\fR[=\fIdirectory\fR]
+Set the root directory. If no directory is specified, set the root directory to
+the root directory of the target process. If directory is specified, set the
+root directory to the specified directory.
+.TP
+\fB\-w\fR, \fB\-\-wd\fR[=\fIdirectory\fR]
+Set the working directory. If no directory is specified, set the working
+directory to the working directory of the target process. If directory is
+specified, set the working directory to the specified directory.
+.TP
+\fB\-F\fR, \fB\-\-no\-fork\fR
+Do not fork before exec'ing the specified program. By default, when entering a
+PID namespace, \fBnsenter\fP calls \fBfork\fP before calling \fBexec\fP so that
+any children will also be in the newly entered PID namespace.
+.TP
+\fB\-Z\fR, \fB\-\-follow\-context\fR
+Set the SELinux security context used for executing a new process according to
+already running process specified by \fB\-\-target\fR PID. (The util-linux has
+to be compiled with SELinux support otherwise the option is unavailable.)
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+Display version information and exit.
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+Display help text and exit.
+.SH SEE ALSO
+.BR clone (2),
+.BR setns (2),
+.BR namespaces (7)
+.SH AUTHORS
+.UR biederm@xmission.com
+Eric Biederman
+.UE
+.br
+.UR kzak@redhat.com
+Karel Zak
+.UE
+.SH AVAILABILITY
+The nsenter command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/nsenter.c b/sys-utils/nsenter.c
new file mode 100644
index 0000000..fbfcf98
--- /dev/null
+++ b/sys-utils/nsenter.c
@@ -0,0 +1,484 @@
+/*
+ * nsenter(1) - command-line interface for setns(2)
+ *
+ * Copyright (C) 2012-2013 Eric Biederman <ebiederm@xmission.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; version 2.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <dirent.h>
+#include <errno.h>
+#include <getopt.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <grp.h>
+#include <sys/stat.h>
+
+#ifdef HAVE_LIBSELINUX
+# include <selinux/selinux.h>
+#endif
+
+#include "strutils.h"
+#include "nls.h"
+#include "c.h"
+#include "closestream.h"
+#include "namespace.h"
+#include "exec_shell.h"
+
+static struct namespace_file {
+ int nstype;
+ const char *name;
+ int fd;
+} namespace_files[] = {
+ /* Careful the order is significant in this array.
+ *
+ * The user namespace comes either first or last: first if
+ * you're using it to increase your privilege and last if
+ * you're using it to decrease. We enter the namespaces in
+ * two passes starting initially from offset 1 and then offset
+ * 0 if that fails.
+ */
+ { .nstype = CLONE_NEWUSER, .name = "ns/user", .fd = -1 },
+ { .nstype = CLONE_NEWCGROUP,.name = "ns/cgroup", .fd = -1 },
+ { .nstype = CLONE_NEWIPC, .name = "ns/ipc", .fd = -1 },
+ { .nstype = CLONE_NEWUTS, .name = "ns/uts", .fd = -1 },
+ { .nstype = CLONE_NEWNET, .name = "ns/net", .fd = -1 },
+ { .nstype = CLONE_NEWPID, .name = "ns/pid", .fd = -1 },
+ { .nstype = CLONE_NEWNS, .name = "ns/mnt", .fd = -1 },
+ { .nstype = 0, .name = NULL, .fd = -1 }
+};
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+
+ fputs(USAGE_HEADER, out);
+ fprintf(out, _(" %s [options] [<program> [<argument>...]]\n"),
+ program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Run a program with namespaces of other processes.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -a, --all enter all namespaces\n"), out);
+ fputs(_(" -t, --target <pid> target process to get namespaces from\n"), out);
+ fputs(_(" -m, --mount[=<file>] enter mount namespace\n"), out);
+ fputs(_(" -u, --uts[=<file>] enter UTS namespace (hostname etc)\n"), out);
+ fputs(_(" -i, --ipc[=<file>] enter System V IPC namespace\n"), out);
+ fputs(_(" -n, --net[=<file>] enter network namespace\n"), out);
+ fputs(_(" -p, --pid[=<file>] enter pid namespace\n"), out);
+ fputs(_(" -C, --cgroup[=<file>] enter cgroup namespace\n"), out);
+ fputs(_(" -U, --user[=<file>] enter user namespace\n"), out);
+ fputs(_(" -S, --setuid <uid> set uid in entered namespace\n"), out);
+ fputs(_(" -G, --setgid <gid> set gid in entered namespace\n"), out);
+ fputs(_(" --preserve-credentials do not touch uids or gids\n"), out);
+ fputs(_(" -r, --root[=<dir>] set the root directory\n"), out);
+ fputs(_(" -w, --wd[=<dir>] set the working directory\n"), out);
+ fputs(_(" -F, --no-fork do not fork before exec'ing <program>\n"), out);
+#ifdef HAVE_LIBSELINUX
+ fputs(_(" -Z, --follow-context set SELinux context according to --target PID\n"), out);
+#endif
+
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(24));
+ printf(USAGE_MAN_TAIL("nsenter(1)"));
+
+ exit(EXIT_SUCCESS);
+}
+
+static pid_t namespace_target_pid = 0;
+static int root_fd = -1;
+static int wd_fd = -1;
+
+static void open_target_fd(int *fd, const char *type, const char *path)
+{
+ char pathbuf[PATH_MAX];
+
+ if (!path && namespace_target_pid) {
+ snprintf(pathbuf, sizeof(pathbuf), "/proc/%u/%s",
+ namespace_target_pid, type);
+ path = pathbuf;
+ }
+ if (!path)
+ errx(EXIT_FAILURE,
+ _("neither filename nor target pid supplied for %s"),
+ type);
+
+ if (*fd >= 0)
+ close(*fd);
+
+ *fd = open(path, O_RDONLY);
+ if (*fd < 0)
+ err(EXIT_FAILURE, _("cannot open %s"), path);
+}
+
+static void open_namespace_fd(int nstype, const char *path)
+{
+ struct namespace_file *nsfile;
+
+ for (nsfile = namespace_files; nsfile->nstype; nsfile++) {
+ if (nstype != nsfile->nstype)
+ continue;
+
+ open_target_fd(&nsfile->fd, nsfile->name, path);
+ return;
+ }
+ /* This should never happen */
+ assert(nsfile->nstype);
+}
+
+static int get_ns_ino(const char *path, ino_t *ino)
+{
+ struct stat st;
+
+ if (stat(path, &st) != 0)
+ return -errno;
+ *ino = st.st_ino;
+ return 0;
+}
+
+static int is_same_namespace(pid_t a, pid_t b, const char *type)
+{
+ char path[PATH_MAX];
+ ino_t a_ino = 0, b_ino = 0;
+
+ snprintf(path, sizeof(path), "/proc/%u/%s", a, type);
+ if (get_ns_ino(path, &a_ino) != 0)
+ err(EXIT_FAILURE, _("stat of %s failed"), path);
+
+ snprintf(path, sizeof(path), "/proc/%u/%s", b, type);
+ if (get_ns_ino(path, &b_ino) != 0)
+ err(EXIT_FAILURE, _("stat of %s failed"), path);
+
+ return a_ino == b_ino;
+}
+
+static void continue_as_child(void)
+{
+ pid_t child = fork();
+ int status;
+ pid_t ret;
+
+ if (child < 0)
+ err(EXIT_FAILURE, _("fork failed"));
+
+ /* Only the child returns */
+ if (child == 0)
+ return;
+
+ for (;;) {
+ ret = waitpid(child, &status, WUNTRACED);
+ if ((ret == child) && (WIFSTOPPED(status))) {
+ /* The child suspended so suspend us as well */
+ kill(getpid(), SIGSTOP);
+ kill(child, SIGCONT);
+ } else {
+ break;
+ }
+ }
+ /* Return the child's exit code if possible */
+ if (WIFEXITED(status)) {
+ exit(WEXITSTATUS(status));
+ } else if (WIFSIGNALED(status)) {
+ kill(getpid(), WTERMSIG(status));
+ }
+ exit(EXIT_FAILURE);
+}
+
+int main(int argc, char *argv[])
+{
+ enum {
+ OPT_PRESERVE_CRED = CHAR_MAX + 1
+ };
+ static const struct option longopts[] = {
+ { "all", no_argument, NULL, 'a' },
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, 'V'},
+ { "target", required_argument, NULL, 't' },
+ { "mount", optional_argument, NULL, 'm' },
+ { "uts", optional_argument, NULL, 'u' },
+ { "ipc", optional_argument, NULL, 'i' },
+ { "net", optional_argument, NULL, 'n' },
+ { "pid", optional_argument, NULL, 'p' },
+ { "user", optional_argument, NULL, 'U' },
+ { "cgroup", optional_argument, NULL, 'C' },
+ { "setuid", required_argument, NULL, 'S' },
+ { "setgid", required_argument, NULL, 'G' },
+ { "root", optional_argument, NULL, 'r' },
+ { "wd", optional_argument, NULL, 'w' },
+ { "no-fork", no_argument, NULL, 'F' },
+ { "preserve-credentials", no_argument, NULL, OPT_PRESERVE_CRED },
+#ifdef HAVE_LIBSELINUX
+ { "follow-context", no_argument, NULL, 'Z' },
+#endif
+ { NULL, 0, NULL, 0 }
+ };
+
+ struct namespace_file *nsfile;
+ int c, pass, namespaces = 0, setgroups_nerrs = 0, preserve_cred = 0;
+ bool do_rd = false, do_wd = false, force_uid = false, force_gid = false;
+ bool do_all = false;
+ int do_fork = -1; /* unknown yet */
+ uid_t uid = 0;
+ gid_t gid = 0;
+#ifdef HAVE_LIBSELINUX
+ bool selinux = 0;
+#endif
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ while ((c =
+ getopt_long(argc, argv, "+ahVt:m::u::i::n::p::C::U::S:G:r::w::FZ",
+ longopts, NULL)) != -1) {
+ switch (c) {
+ case 'h':
+ usage();
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ case 'a':
+ do_all = true;
+ break;
+ case 't':
+ namespace_target_pid =
+ strtoul_or_err(optarg, _("failed to parse pid"));
+ break;
+ case 'm':
+ if (optarg)
+ open_namespace_fd(CLONE_NEWNS, optarg);
+ else
+ namespaces |= CLONE_NEWNS;
+ break;
+ case 'u':
+ if (optarg)
+ open_namespace_fd(CLONE_NEWUTS, optarg);
+ else
+ namespaces |= CLONE_NEWUTS;
+ break;
+ case 'i':
+ if (optarg)
+ open_namespace_fd(CLONE_NEWIPC, optarg);
+ else
+ namespaces |= CLONE_NEWIPC;
+ break;
+ case 'n':
+ if (optarg)
+ open_namespace_fd(CLONE_NEWNET, optarg);
+ else
+ namespaces |= CLONE_NEWNET;
+ break;
+ case 'p':
+ if (optarg)
+ open_namespace_fd(CLONE_NEWPID, optarg);
+ else
+ namespaces |= CLONE_NEWPID;
+ break;
+ case 'C':
+ if (optarg)
+ open_namespace_fd(CLONE_NEWCGROUP, optarg);
+ else
+ namespaces |= CLONE_NEWCGROUP;
+ break;
+ case 'U':
+ if (optarg)
+ open_namespace_fd(CLONE_NEWUSER, optarg);
+ else
+ namespaces |= CLONE_NEWUSER;
+ break;
+ case 'S':
+ uid = strtoul_or_err(optarg, _("failed to parse uid"));
+ force_uid = true;
+ break;
+ case 'G':
+ gid = strtoul_or_err(optarg, _("failed to parse gid"));
+ force_gid = true;
+ break;
+ case 'F':
+ do_fork = 0;
+ break;
+ case 'r':
+ if (optarg)
+ open_target_fd(&root_fd, "root", optarg);
+ else
+ do_rd = true;
+ break;
+ case 'w':
+ if (optarg)
+ open_target_fd(&wd_fd, "cwd", optarg);
+ else
+ do_wd = true;
+ break;
+ case OPT_PRESERVE_CRED:
+ preserve_cred = 1;
+ break;
+#ifdef HAVE_LIBSELINUX
+ case 'Z':
+ selinux = 1;
+ break;
+#endif
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+#ifdef HAVE_LIBSELINUX
+ if (selinux && is_selinux_enabled() > 0) {
+ char *scon = NULL;
+
+ if (!namespace_target_pid)
+ errx(EXIT_FAILURE, _("no target PID specified for --follow-context"));
+ if (getpidcon(namespace_target_pid, &scon) < 0)
+ errx(EXIT_FAILURE, _("failed to get %d SELinux context"),
+ (int) namespace_target_pid);
+ if (setexeccon(scon) < 0)
+ errx(EXIT_FAILURE, _("failed to set exec context to '%s'"), scon);
+ freecon(scon);
+ }
+#endif
+
+ if (do_all) {
+ if (!namespace_target_pid)
+ errx(EXIT_FAILURE, _("no target PID specified for --all"));
+ for (nsfile = namespace_files; nsfile->nstype; nsfile++) {
+ if (nsfile->fd >= 0)
+ continue; /* namespace already specified */
+
+ /* It is not permitted to use setns(2) to reenter the caller's
+ * current user namespace; see setns(2) man page for more details.
+ */
+ if (nsfile->nstype & CLONE_NEWUSER
+ && is_same_namespace(getpid(), namespace_target_pid, nsfile->name))
+ continue;
+
+ namespaces |= nsfile->nstype;
+ }
+ }
+
+ /*
+ * Open remaining namespace and directory descriptors.
+ */
+ for (nsfile = namespace_files; nsfile->nstype; nsfile++)
+ if (nsfile->nstype & namespaces)
+ open_namespace_fd(nsfile->nstype, NULL);
+ if (do_rd)
+ open_target_fd(&root_fd, "root", NULL);
+ if (do_wd)
+ open_target_fd(&wd_fd, "cwd", NULL);
+
+ /*
+ * Update namespaces variable to contain all requested namespaces
+ */
+ for (nsfile = namespace_files; nsfile->nstype; nsfile++) {
+ if (nsfile->fd < 0)
+ continue;
+ namespaces |= nsfile->nstype;
+ }
+
+ /* for user namespaces we always set UID and GID (default is 0)
+ * and clear root's groups if --preserve-credentials is no specified */
+ if ((namespaces & CLONE_NEWUSER) && !preserve_cred) {
+ force_uid = true, force_gid = true;
+
+ /* We call setgroups() before and after we enter user namespace,
+ * let's complain only if both fail */
+ if (setgroups(0, NULL) != 0)
+ setgroups_nerrs++;
+ }
+
+ /*
+ * Now that we know which namespaces we want to enter, enter
+ * them. Do this in two passes, not entering the user
+ * namespace on the first pass. So if we're deprivileging the
+ * container we'll enter the user namespace last and if we're
+ * privileging it then we enter the user namespace first
+ * (because the initial setns will fail).
+ */
+ for (pass = 0; pass < 2; pass ++) {
+ for (nsfile = namespace_files + 1 - pass; nsfile->nstype; nsfile++) {
+ if (nsfile->fd < 0)
+ continue;
+ if (nsfile->nstype == CLONE_NEWPID && do_fork == -1)
+ do_fork = 1;
+ if (setns(nsfile->fd, nsfile->nstype)) {
+ if (pass != 0)
+ err(EXIT_FAILURE,
+ _("reassociate to namespace '%s' failed"),
+ nsfile->name);
+ else
+ continue;
+ }
+
+ close(nsfile->fd);
+ nsfile->fd = -1;
+ }
+ }
+
+ /* Remember the current working directory if I'm not changing it */
+ if (root_fd >= 0 && wd_fd < 0) {
+ wd_fd = open(".", O_RDONLY);
+ if (wd_fd < 0)
+ err(EXIT_FAILURE,
+ _("cannot open current working directory"));
+ }
+
+ /* Change the root directory */
+ if (root_fd >= 0) {
+ if (fchdir(root_fd) < 0)
+ err(EXIT_FAILURE,
+ _("change directory by root file descriptor failed"));
+
+ if (chroot(".") < 0)
+ err(EXIT_FAILURE, _("chroot failed"));
+
+ close(root_fd);
+ root_fd = -1;
+ }
+
+ /* Change the working directory */
+ if (wd_fd >= 0) {
+ if (fchdir(wd_fd) < 0)
+ err(EXIT_FAILURE,
+ _("change directory by working directory file descriptor failed"));
+
+ close(wd_fd);
+ wd_fd = -1;
+ }
+
+ if (do_fork == 1)
+ continue_as_child();
+
+ if (force_uid || force_gid) {
+ if (force_gid && setgroups(0, NULL) != 0 && setgroups_nerrs) /* drop supplementary groups */
+ err(EXIT_FAILURE, _("setgroups failed"));
+ if (force_gid && setgid(gid) < 0) /* change GID */
+ err(EXIT_FAILURE, _("setgid failed"));
+ if (force_uid && setuid(uid) < 0) /* change UID */
+ err(EXIT_FAILURE, _("setuid failed"));
+ }
+
+ if (optind < argc) {
+ execvp(argv[optind], argv + optind);
+ errexec(argv[optind]);
+ }
+ exec_shell();
+}
diff --git a/sys-utils/pivot_root.8 b/sys-utils/pivot_root.8
new file mode 100644
index 0000000..febedd0
--- /dev/null
+++ b/sys-utils/pivot_root.8
@@ -0,0 +1,75 @@
+.TH PIVOT_ROOT 8 "August 2011" "util-linux" "System Administration"
+.SH NAME
+pivot_root \- change the root filesystem
+.SH SYNOPSIS
+.B pivot_root
+.I new_root put_old
+.SH DESCRIPTION
+\fBpivot_root\fP moves the root file system of the current process to the
+directory \fIput_old\fP and makes \fInew_root\fP the new root file system.
+Since \fBpivot_root\fP(8) simply calls \fBpivot_root\fP(2), we refer to
+the man page of the latter for further details.
+
+Note that, depending on the implementation of \fBpivot_root\fP, root and
+cwd of the caller may or may not change. The following is a sequence for
+invoking \fBpivot_root\fP that works in either case, assuming that
+\fBpivot_root\fP and \fBchroot\fP are in the current \fBPATH\fP:
+.sp
+cd \fInew_root\fP
+.br
+pivot_root . \fIput_old\fP
+.br
+exec chroot . \fIcommand\fP
+.sp
+Note that \fBchroot\fP must be available under the old root and under the new
+root, because \fBpivot_root\fP may or may not have implicitly changed the
+root directory of the shell.
+
+Note that \fBexec chroot\fP changes the running executable, which is
+necessary if the old root directory should be unmounted afterwards.
+Also note that standard input, output, and error may still point to a
+device on the old root file system, keeping it busy. They can easily be
+changed when invoking \fBchroot\fP (see below; note the absence of
+leading slashes to make it work whether \fBpivot_root\fP has changed the
+shell's root or not).
+.SH OPTIONS
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+Display version information and exit.
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+Display help text and exit.
+.SH EXAMPLES
+Change the root file system to /dev/hda1 from an interactive shell:
+.sp
+.nf
+mount /dev/hda1 /new-root
+cd /new-root
+pivot_root . old-root
+exec chroot . sh <dev/console >dev/console 2>&1
+umount /old-root
+.fi
+.sp
+Mount the new root file system over NFS from 10.0.0.1:/my_root and run
+\fBinit\fP:
+.sp
+.nf
+ifconfig lo 127.0.0.1 up # for portmap
+# configure Ethernet or such
+portmap # for lockd (implicitly started by mount)
+mount -o ro 10.0.0.1:/my_root /mnt
+killall portmap # portmap keeps old root busy
+cd /mnt
+pivot_root . old_root
+exec chroot . sh -c 'umount /old_root; exec /sbin/init' \\
+ <dev/console >dev/console 2>&1
+.fi
+.SH "SEE ALSO"
+.BR chroot (1),
+.BR pivot_root (2),
+.BR mount (8),
+.BR switch_root (8),
+.BR umount (8)
+.SH AVAILABILITY
+The pivot_root command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/pivot_root.c b/sys-utils/pivot_root.c
new file mode 100644
index 0000000..ea76d94
--- /dev/null
+++ b/sys-utils/pivot_root.c
@@ -0,0 +1,80 @@
+/*
+ * pivot_root.c - Change the root file system
+ *
+ * Copyright (C) 2000 Werner Almesberger
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+#include <err.h>
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "c.h"
+#include "nls.h"
+#include "closestream.h"
+
+#define pivot_root(new_root,put_old) syscall(SYS_pivot_root,new_root,put_old)
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ fputs(USAGE_HEADER, out);
+ fprintf(out, _(" %s [options] new_root put_old\n"),
+ program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Change the root filesystem.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ printf(USAGE_HELP_OPTIONS(16));
+ printf(USAGE_MAN_TAIL("pivot_root(8)"));
+ exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+ int ch;
+ static const struct option longopts[] = {
+ {"version", no_argument, NULL, 'V'},
+ {"help", no_argument, NULL, 'h'},
+ {NULL, 0, NULL, 0}
+ };
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ while ((ch = getopt_long(argc, argv, "Vh", longopts, NULL)) != -1)
+ switch (ch) {
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ case 'h':
+ usage();
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+
+ if (argc != 3) {
+ warnx(_("bad usage"));
+ errtryhelp(EXIT_FAILURE);
+ }
+ if (pivot_root(argv[1], argv[2]) < 0)
+ err(EXIT_FAILURE, _("failed to change root from `%s' to `%s'"),
+ argv[1], argv[2]);
+
+ return EXIT_SUCCESS;
+}
diff --git a/sys-utils/prlimit.1 b/sys-utils/prlimit.1
new file mode 100644
index 0000000..1230b3a
--- /dev/null
+++ b/sys-utils/prlimit.1
@@ -0,0 +1,120 @@
+.\" prlimit.1 --
+.\" Copyright 2011 Davidlohr Bueso <dave@gnu.org>
+.\" May be distributed under the GNU General Public License
+
+.TH PRLIMIT 1 "July 2014" "util-linux" "User Commands"
+.SH NAME
+prlimit \- get and set process resource limits
+.SH SYNOPSIS
+.BR prlimit " [options]"
+.RB [ \-\-\fIresource\fR [ =\fIlimits\fR]
+.RB [ \-\-pid\ \fIPID\fR]
+
+.BR prlimit " [options]"
+.RB [ \-\-\fIresource\fR [ =\fIlimits\fR]
+.IR "command " [ argument ...]
+
+.SH DESCRIPTION
+Given a process ID and one or more resources, \fBprlimit\fP tries to retrieve
+and/or modify the limits.
+
+When \fIcommand\fR is given,
+.B prlimit
+will run this command with the given arguments.
+
+The \fIlimits\fP parameter is composed of a soft and a hard value, separated
+by a colon (:), in order to modify the existing values. If no \fIlimits\fR are
+given, \fBprlimit\fP will display the current values. If one of the values
+is not given, then the existing one will be used. To specify the unlimited or
+infinity limit (RLIM_INFINITY), the -1 or 'unlimited' string can be passed.
+
+Because of the nature of limits, the soft limit must be lower or equal to the
+high limit (also called the ceiling). To see all available resource limits,
+refer to the RESOURCE OPTIONS section.
+
+.IP "\fIsoft\fP:\fIhard\fP Specify both limits."
+.IP "\fIsoft\fP: Specify only the soft limit."
+.IP ":\fIhard\fP Specify only the hard limit."
+.IP "\fIvalue\fP Specify both limits to the same value."
+
+.SH GENERAL OPTIONS
+.IP "\fB\-h, \-\-help\fP"
+Display help text and exit.
+.IP "\fB\-\-noheadings\fP"
+Do not print a header line.
+.IP "\fB\-o, \-\-output \fIlist\fP"
+Define the output columns to use. If no output arrangement is specified,
+then a default set is used.
+Use \fB\-\-help\fP to get a list of all supported columns.
+.IP "\fB\-p, \-\-pid\fP"
+Specify the process id; if none is given, the running process will be used.
+.IP "\fB\-\-raw\fP"
+Use the raw output format.
+.IP "\fB\-\-verbose\fP"
+Verbose mode.
+.IP "\fB\-V, \-\-version\fP"
+Display version information and exit.
+
+.SH RESOURCE OPTIONS
+.IP "\fB\-c, \-\-core\fP[=\fIlimits\fR]"
+Maximum size of a core file.
+.IP "\fB\-d, \-\-data\fP[=\fIlimits\fR]"
+Maximum data size.
+.IP "\fB\-e, \-\-nice\fP[=\fIlimits\fR]"
+Maximum nice priority allowed to raise.
+.IP "\fB\-f, \-\-fsize\fP[=\fIlimits\fR]"
+Maximum file size.
+.IP "\fB\-i, \-\-sigpending\fP[=\fIlimits\fR]"
+Maximum number of pending signals.
+.IP "\fB\-l, \-\-memlock\fP[=\fIlimits\fR]"
+Maximum locked-in-memory address space.
+.IP "\fB\-m, \-\-rss\fP[=\fIlimits\fR]"
+Maximum Resident Set Size (RSS).
+.IP "\fB\-n, \-\-nofile\fP[=\fIlimits\fR]"
+Maximum number of open files.
+.IP "\fB\-q, \-\-msgqueue\fP[=\fIlimits\fR]"
+Maximum number of bytes in POSIX message queues.
+.IP "\fB\-r, \-\-rtprio\fP[=\fIlimits\fR]"
+Maximum real-time priority.
+.IP "\fB\-s, \-\-stack\fP[=\fIlimits\fR]"
+Maximum size of the stack.
+.IP "\fB\-t, \-\-cpu\fP[=\fIlimits\fR]"
+CPU time, in seconds.
+.IP "\fB\-u, \-\-nproc\fP[=\fIlimits\fR]"
+Maximum number of processes.
+.IP "\fB\-v, \-\-as\fP[=\fIlimits\fR]"
+Address space limit.
+.IP "\fB\-x, \-\-locks\fP[=\fIlimits\fR]"
+Maximum number of file locks held.
+.IP "\fB\-y, \-\-rttime\fP[=\fIlimits\fR]"
+Timeout for real-time tasks.
+
+.SH EXAMPLES
+.IP "\fBprlimit \-\-pid 13134\fP"
+Display limit values for all current resources.
+.IP "\fBprlimit \-\-pid 13134 \--rss --nofile=1024:4095\fP"
+Display the limits of the RSS, and set the soft and hard limits for the number
+of open files to 1024 and 4095, respectively.
+.IP "\fBprlimit \-\-pid 13134 --nproc=512:\fP"
+Modify only the soft limit for the number of processes.
+.IP "\fBprlimit \-\-pid $$ --nproc=unlimited\fP"
+Set for the current process both the soft and ceiling values for the number of
+processes to unlimited.
+.IP "\fBprlimit --cpu=10 sort -u hugefile\fP"
+Set both the soft and hard CPU time limit to ten seconds and run 'sort'.
+
+.SH "SEE ALSO"
+.BR ulimit (1),
+.BR prlimit (2)
+
+.SH NOTES
+The prlimit system call is supported since Linux 2.6.36, older kernels will
+break this program.
+
+.SH AUTHORS
+.nf
+Davidlohr Bueso <dave@gnu.org> - In memory of Dennis M. Ritchie.
+.fi
+.SH AVAILABILITY
+The prlimit command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/prlimit.c b/sys-utils/prlimit.c
new file mode 100644
index 0000000..6f80636
--- /dev/null
+++ b/sys-utils/prlimit.c
@@ -0,0 +1,646 @@
+/*
+ * prlimit - get/set process resource limits.
+ *
+ * Copyright (C) 2011 Davidlohr Bueso <dave@gnu.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <assert.h>
+#include <unistd.h>
+#include <sys/resource.h>
+
+#include <libsmartcols.h>
+
+#include "c.h"
+#include "nls.h"
+#include "xalloc.h"
+#include "strutils.h"
+#include "list.h"
+#include "closestream.h"
+
+#ifndef RLIMIT_RTTIME
+# define RLIMIT_RTTIME 15
+#endif
+
+enum {
+ AS,
+ CORE,
+ CPU,
+ DATA,
+ FSIZE,
+ LOCKS,
+ MEMLOCK,
+ MSGQUEUE,
+ NICE,
+ NOFILE,
+ NPROC,
+ RSS,
+ RTPRIO,
+ RTTIME,
+ SIGPENDING,
+ STACK
+};
+
+/* basic output flags */
+static int no_headings;
+static int raw;
+
+struct prlimit_desc {
+ const char *name;
+ const char *help;
+ const char *unit;
+ int resource;
+};
+
+static struct prlimit_desc prlimit_desc[] =
+{
+ [AS] = { "AS", N_("address space limit"), N_("bytes"), RLIMIT_AS },
+ [CORE] = { "CORE", N_("max core file size"), N_("bytes"), RLIMIT_CORE },
+ [CPU] = { "CPU", N_("CPU time"), N_("seconds"), RLIMIT_CPU },
+ [DATA] = { "DATA", N_("max data size"), N_("bytes"), RLIMIT_DATA },
+ [FSIZE] = { "FSIZE", N_("max file size"), N_("bytes"), RLIMIT_FSIZE },
+ [LOCKS] = { "LOCKS", N_("max number of file locks held"), N_("locks"), RLIMIT_LOCKS },
+ [MEMLOCK] = { "MEMLOCK", N_("max locked-in-memory address space"), N_("bytes"), RLIMIT_MEMLOCK },
+ [MSGQUEUE] = { "MSGQUEUE", N_("max bytes in POSIX mqueues"), N_("bytes"), RLIMIT_MSGQUEUE },
+ [NICE] = { "NICE", N_("max nice prio allowed to raise"), NULL, RLIMIT_NICE },
+ [NOFILE] = { "NOFILE", N_("max number of open files"), N_("files"), RLIMIT_NOFILE },
+ [NPROC] = { "NPROC", N_("max number of processes"), N_("processes"), RLIMIT_NPROC },
+ [RSS] = { "RSS", N_("max resident set size"), N_("bytes"), RLIMIT_RSS },
+ [RTPRIO] = { "RTPRIO", N_("max real-time priority"), NULL, RLIMIT_RTPRIO },
+ [RTTIME] = { "RTTIME", N_("timeout for real-time tasks"), N_("microsecs"), RLIMIT_RTTIME },
+ [SIGPENDING] = { "SIGPENDING", N_("max number of pending signals"), N_("signals"), RLIMIT_SIGPENDING },
+ [STACK] = { "STACK", N_("max stack size"), N_("bytes"), RLIMIT_STACK }
+};
+
+#define MAX_RESOURCES ARRAY_SIZE(prlimit_desc)
+
+struct prlimit {
+ struct list_head lims;
+
+ struct rlimit rlim;
+ struct prlimit_desc *desc;
+ int modify; /* PRLIMIT_{SOFT,HARD} mask */
+};
+
+#define PRLIMIT_EMPTY_LIMIT {{ 0, 0, }, NULL, 0 }
+
+enum {
+ COL_HELP,
+ COL_RES,
+ COL_SOFT,
+ COL_HARD,
+ COL_UNITS,
+};
+
+/* column names */
+struct colinfo {
+ const char *name; /* header */
+ double whint; /* width hint (N < 1 is in percent of termwidth) */
+ int flags; /* SCOLS_FL_* */
+ const char *help;
+};
+
+/* columns descriptions */
+static struct colinfo infos[] = {
+ [COL_RES] = { "RESOURCE", 0.25, SCOLS_FL_TRUNC, N_("resource name") },
+ [COL_HELP] = { "DESCRIPTION", 0.1, SCOLS_FL_TRUNC, N_("resource description")},
+ [COL_SOFT] = { "SOFT", 0.1, SCOLS_FL_RIGHT, N_("soft limit")},
+ [COL_HARD] = { "HARD", 1, SCOLS_FL_RIGHT, N_("hard limit (ceiling)")},
+ [COL_UNITS] = { "UNITS", 0.1, SCOLS_FL_TRUNC, N_("units")},
+};
+
+static int columns[ARRAY_SIZE(infos) * 2];
+static int ncolumns;
+
+
+
+#define INFINITY_STR "unlimited"
+#define INFINITY_STRLEN (sizeof(INFINITY_STR) - 1)
+
+#define PRLIMIT_SOFT (1 << 1)
+#define PRLIMIT_HARD (1 << 2)
+
+static pid_t pid; /* calling process (default) */
+static int verbose;
+
+#ifndef HAVE_PRLIMIT
+# include <sys/syscall.h>
+static int prlimit(pid_t p, int resource,
+ const struct rlimit *new_limit,
+ struct rlimit *old_limit)
+{
+ return syscall(SYS_prlimit64, p, resource, new_limit, old_limit);
+}
+#endif
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ size_t i;
+
+ fputs(USAGE_HEADER, out);
+
+ fprintf(out,
+ _(" %s [options] [-p PID]\n"), program_invocation_short_name);
+ fprintf(out,
+ _(" %s [options] COMMAND\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Show or change the resource limits of a process.\n"), out);
+
+ fputs(_("\nGeneral Options:\n"), out);
+ fputs(_(" -p, --pid <pid> process id\n"
+ " -o, --output <list> define which output columns to use\n"
+ " --noheadings don't print headings\n"
+ " --raw use the raw output format\n"
+ " --verbose verbose output\n"
+ ), out);
+ printf(USAGE_HELP_OPTIONS(24));
+
+ fputs(_("\nResources Options:\n"), out);
+ fputs(_(" -c, --core maximum size of core files created\n"
+ " -d, --data maximum size of a process's data segment\n"
+ " -e, --nice maximum nice priority allowed to raise\n"
+ " -f, --fsize maximum size of files written by the process\n"
+ " -i, --sigpending maximum number of pending signals\n"
+ " -l, --memlock maximum size a process may lock into memory\n"
+ " -m, --rss maximum resident set size\n"
+ " -n, --nofile maximum number of open files\n"
+ " -q, --msgqueue maximum bytes in POSIX message queues\n"
+ " -r, --rtprio maximum real-time scheduling priority\n"
+ " -s, --stack maximum stack size\n"
+ " -t, --cpu maximum amount of CPU time in seconds\n"
+ " -u, --nproc maximum number of user processes\n"
+ " -v, --as size of virtual memory\n"
+ " -x, --locks maximum number of file locks\n"
+ " -y, --rttime CPU time in microseconds a process scheduled\n"
+ " under real-time scheduling\n"), out);
+
+ fputs(USAGE_COLUMNS, out);
+ for (i = 0; i < ARRAY_SIZE(infos); i++)
+ fprintf(out, " %11s %s\n", infos[i].name, _(infos[i].help));
+
+ printf(USAGE_MAN_TAIL("prlimit(1)"));
+
+ exit(EXIT_SUCCESS);
+}
+
+static inline int get_column_id(int num)
+{
+ assert(num < ncolumns);
+ assert(columns[num] < (int) ARRAY_SIZE(infos));
+
+ return columns[num];
+}
+
+static inline struct colinfo *get_column_info(unsigned num)
+{
+ return &infos[ get_column_id(num) ];
+}
+
+static void add_scols_line(struct libscols_table *table, struct prlimit *l)
+{
+ int i;
+ struct libscols_line *line;
+
+ assert(table);
+ assert(l);
+
+ line = scols_table_new_line(table, NULL);
+ if (!line)
+ err(EXIT_FAILURE, _("failed to allocate output line"));
+
+ for (i = 0; i < ncolumns; i++) {
+ char *str = NULL;
+
+ switch (get_column_id(i)) {
+ case COL_RES:
+ str = xstrdup(l->desc->name);
+ break;
+ case COL_HELP:
+ str = xstrdup(l->desc->help);
+ break;
+ case COL_SOFT:
+ if (l->rlim.rlim_cur == RLIM_INFINITY)
+ str = xstrdup(_("unlimited"));
+ else
+ xasprintf(&str, "%llu", (unsigned long long) l->rlim.rlim_cur);
+ break;
+ case COL_HARD:
+ if (l->rlim.rlim_max == RLIM_INFINITY)
+ str = xstrdup(_("unlimited"));
+ else
+ xasprintf(&str, "%llu", (unsigned long long) l->rlim.rlim_max);
+ break;
+ case COL_UNITS:
+ str = l->desc->unit ? xstrdup(_(l->desc->unit)) : NULL;
+ break;
+ default:
+ break;
+ }
+
+ if (str && scols_line_refer_data(line, i, str))
+ err(EXIT_FAILURE, _("failed to add output data"));
+ }
+}
+
+static int column_name_to_id(const char *name, size_t namesz)
+{
+ size_t i;
+
+ assert(name);
+
+ for (i = 0; i < ARRAY_SIZE(infos); i++) {
+ const char *cn = infos[i].name;
+
+ if (!strncasecmp(name, cn, namesz) && !*(cn + namesz))
+ return i;
+ }
+ warnx(_("unknown column: %s"), name);
+ return -1;
+}
+
+static void rem_prlim(struct prlimit *lim)
+{
+ if (!lim)
+ return;
+ list_del(&lim->lims);
+ free(lim);
+}
+
+static int show_limits(struct list_head *lims)
+{
+ int i;
+ struct list_head *p, *pnext;
+ struct libscols_table *table;
+
+ table = scols_new_table();
+ if (!table)
+ err(EXIT_FAILURE, _("failed to allocate output table"));
+
+ scols_table_enable_raw(table, raw);
+ scols_table_enable_noheadings(table, no_headings);
+
+ for (i = 0; i < ncolumns; i++) {
+ struct colinfo *col = get_column_info(i);
+
+ if (!scols_table_new_column(table, col->name, col->whint, col->flags))
+ err(EXIT_FAILURE, _("failed to allocate output column"));
+ }
+
+ list_for_each_safe(p, pnext, lims) {
+ struct prlimit *lim = list_entry(p, struct prlimit, lims);
+
+ add_scols_line(table, lim);
+ rem_prlim(lim);
+ }
+
+ scols_print_table(table);
+ scols_unref_table(table);
+ return 0;
+}
+
+/*
+ * If one of the limits is unknown (default value for not being passed), we
+ * need to get the current limit and use it. I see no other way other than
+ * using prlimit(2).
+ */
+static void get_unknown_hardsoft(struct prlimit *lim)
+{
+ struct rlimit old;
+
+ if (prlimit(pid, lim->desc->resource, NULL, &old) == -1)
+ err(EXIT_FAILURE, _("failed to get old %s limit"),
+ lim->desc->name);
+
+ if (!(lim->modify & PRLIMIT_SOFT))
+ lim->rlim.rlim_cur = old.rlim_cur;
+ else if (!(lim->modify & PRLIMIT_HARD))
+ lim->rlim.rlim_max = old.rlim_max;
+}
+
+static void do_prlimit(struct list_head *lims)
+{
+ struct list_head *p, *pnext;
+
+ list_for_each_safe(p, pnext, lims) {
+ struct rlimit *new = NULL, *old = NULL;
+ struct prlimit *lim = list_entry(p, struct prlimit, lims);
+
+ if (lim->modify) {
+ if (lim->modify != (PRLIMIT_HARD | PRLIMIT_SOFT))
+ get_unknown_hardsoft(lim);
+
+ if ((lim->rlim.rlim_cur > lim->rlim.rlim_max) &&
+ (lim->rlim.rlim_cur != RLIM_INFINITY ||
+ lim->rlim.rlim_max != RLIM_INFINITY))
+ errx(EXIT_FAILURE, _("the soft limit %s cannot exceed the hard limit"),
+ lim->desc->name);
+ new = &lim->rlim;
+ } else
+ old = &lim->rlim;
+
+ if (verbose && new) {
+ printf(_("New %s limit for pid %d: "), lim->desc->name,
+ pid ? pid : getpid());
+ if (new->rlim_cur == RLIM_INFINITY)
+ printf("<%s", _("unlimited"));
+ else
+ printf("<%ju", (uintmax_t)new->rlim_cur);
+
+ if (new->rlim_max == RLIM_INFINITY)
+ printf(":%s>\n", _("unlimited"));
+ else
+ printf(":%ju>\n", (uintmax_t)new->rlim_max);
+ }
+
+ if (prlimit(pid, lim->desc->resource, new, old) == -1)
+ err(EXIT_FAILURE, lim->modify ?
+ _("failed to set the %s resource limit") :
+ _("failed to get the %s resource limit"),
+ lim->desc->name);
+
+ if (lim->modify)
+ rem_prlim(lim); /* modify only; don't show */
+ }
+}
+
+static int get_range(char *str, rlim_t *soft, rlim_t *hard, int *found)
+{
+ char *end = NULL;
+
+ if (!str)
+ return 0;
+
+ *found = errno = 0;
+ *soft = *hard = RLIM_INFINITY;
+
+ if (!strcmp(str, INFINITY_STR)) { /* <unlimited> */
+ *found |= PRLIMIT_SOFT | PRLIMIT_HARD;
+ return 0;
+
+ } else if (*str == ':') { /* <:hard> */
+ str++;
+
+ if (strcmp(str, INFINITY_STR) != 0) {
+ *hard = strtoull(str, &end, 10);
+
+ if (errno || !end || *end || end == str)
+ return -1;
+ }
+ *found |= PRLIMIT_HARD;
+ return 0;
+
+ }
+
+ if (strncmp(str, INFINITY_STR, INFINITY_STRLEN) == 0) {
+ /* <unlimited> or <unlimited:> */
+ end = str + INFINITY_STRLEN;
+ } else {
+ /* <value> or <soft:> */
+ *hard = *soft = strtoull(str, &end, 10);
+ if (errno || !end || end == str)
+ return -1;
+ }
+
+ if (*end == ':' && !*(end + 1)) /* <soft:> */
+ *found |= PRLIMIT_SOFT;
+
+ else if (*end == ':') { /* <soft:hard> */
+ str = end + 1;
+
+ if (!strcmp(str, INFINITY_STR))
+ *hard = RLIM_INFINITY;
+ else {
+ end = NULL;
+ errno = 0;
+ *hard = strtoull(str, &end, 10);
+
+ if (errno || !end || *end || end == str)
+ return -1;
+ }
+ *found |= PRLIMIT_SOFT | PRLIMIT_HARD;
+
+ } else /* <value> */
+ *found |= PRLIMIT_SOFT | PRLIMIT_HARD;
+
+ return 0;
+}
+
+
+static int parse_prlim(struct rlimit *lim, char *ops, size_t id)
+{
+ rlim_t soft, hard;
+ int found = 0;
+
+ if (get_range(ops, &soft, &hard, &found))
+ errx(EXIT_FAILURE, _("failed to parse %s limit"),
+ prlimit_desc[id].name);
+
+ lim->rlim_cur = soft;
+ lim->rlim_max = hard;
+
+ return found;
+}
+
+static int add_prlim(char *ops, struct list_head *lims, size_t id)
+{
+ struct prlimit *lim = xcalloc(1, sizeof(*lim));
+
+ INIT_LIST_HEAD(&lim->lims);
+ lim->desc = &prlimit_desc[id];
+
+ if (ops)
+ lim->modify = parse_prlim(&lim->rlim, ops, id);
+
+ list_add_tail(&lim->lims, lims);
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int opt;
+ struct list_head lims;
+
+ enum {
+ VERBOSE_OPTION = CHAR_MAX + 1,
+ RAW_OPTION,
+ NOHEADINGS_OPTION
+ };
+
+ static const struct option longopts[] = {
+ { "pid", required_argument, NULL, 'p' },
+ { "output", required_argument, NULL, 'o' },
+ { "as", optional_argument, NULL, 'v' },
+ { "core", optional_argument, NULL, 'c' },
+ { "cpu", optional_argument, NULL, 't' },
+ { "data", optional_argument, NULL, 'd' },
+ { "fsize", optional_argument, NULL, 'f' },
+ { "locks", optional_argument, NULL, 'x' },
+ { "memlock", optional_argument, NULL, 'l' },
+ { "msgqueue", optional_argument, NULL, 'q' },
+ { "nice", optional_argument, NULL, 'e' },
+ { "nofile", optional_argument, NULL, 'n' },
+ { "nproc", optional_argument, NULL, 'u' },
+ { "rss", optional_argument, NULL, 'm' },
+ { "rtprio", optional_argument, NULL, 'r' },
+ { "rttime", optional_argument, NULL, 'y' },
+ { "sigpending", optional_argument, NULL, 'i' },
+ { "stack", optional_argument, NULL, 's' },
+ { "version", no_argument, NULL, 'V' },
+ { "help", no_argument, NULL, 'h' },
+ { "noheadings", no_argument, NULL, NOHEADINGS_OPTION },
+ { "raw", no_argument, NULL, RAW_OPTION },
+ { "verbose", no_argument, NULL, VERBOSE_OPTION },
+ { NULL, 0, NULL, 0 }
+ };
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ INIT_LIST_HEAD(&lims);
+
+ /*
+ * Something is very wrong if this doesn't succeed,
+ * assuming STACK is the last resource, of course.
+ */
+ assert(MAX_RESOURCES == STACK + 1);
+
+ while((opt = getopt_long(argc, argv,
+ "+c::d::e::f::i::l::m::n::q::r::s::t::u::v::x::y::p:o:vVh",
+ longopts, NULL)) != -1) {
+ switch(opt) {
+ case 'c':
+ add_prlim(optarg, &lims, CORE);
+ break;
+ case 'd':
+ add_prlim(optarg, &lims, DATA);
+ break;
+ case 'e':
+ add_prlim(optarg, &lims, NICE);
+ break;
+ case 'f':
+ add_prlim(optarg, &lims, FSIZE);
+ break;
+ case 'i':
+ add_prlim(optarg, &lims, SIGPENDING);
+ break;
+ case 'l':
+ add_prlim(optarg, &lims, MEMLOCK);
+ break;
+ case 'm':
+ add_prlim(optarg, &lims, RSS);
+ break;
+ case 'n':
+ add_prlim(optarg, &lims, NOFILE);
+ break;
+ case 'q':
+ add_prlim(optarg, &lims, MSGQUEUE);
+ break;
+ case 'r':
+ add_prlim(optarg, &lims, RTPRIO);
+ break;
+ case 's':
+ add_prlim(optarg, &lims, STACK);
+ break;
+ case 't':
+ add_prlim(optarg, &lims, CPU);
+ break;
+ case 'u':
+ add_prlim(optarg, &lims, NPROC);
+ break;
+ case 'v':
+ add_prlim(optarg, &lims, AS);
+ break;
+ case 'x':
+ add_prlim(optarg, &lims, LOCKS);
+ break;
+ case 'y':
+ add_prlim(optarg, &lims, RTTIME);
+ break;
+
+ case 'p':
+ if (pid)
+ errx(EXIT_FAILURE, _("option --pid may be specified only once"));
+ pid = strtos32_or_err(optarg, _("invalid PID argument"));
+ break;
+ case 'h':
+ usage();
+ case 'o':
+ ncolumns = string_to_idarray(optarg,
+ columns, ARRAY_SIZE(columns),
+ column_name_to_id);
+ if (ncolumns < 0)
+ return EXIT_FAILURE;
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+
+ case NOHEADINGS_OPTION:
+ no_headings = 1;
+ break;
+ case VERBOSE_OPTION:
+ verbose++;
+ break;
+ case RAW_OPTION:
+ raw = 1;
+ break;
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+ if (argc > optind && pid)
+ errx(EXIT_FAILURE, _("options --pid and COMMAND are mutually exclusive"));
+ if (!ncolumns) {
+ /* default columns */
+ columns[ncolumns++] = COL_RES;
+ columns[ncolumns++] = COL_HELP;
+ columns[ncolumns++] = COL_SOFT;
+ columns[ncolumns++] = COL_HARD;
+ columns[ncolumns++] = COL_UNITS;
+ }
+
+ scols_init_debug(0);
+
+ if (list_empty(&lims)) {
+ /* default is to print all resources */
+ size_t n;
+
+ for (n = 0; n < MAX_RESOURCES; n++)
+ add_prlim(NULL, &lims, n);
+ }
+
+ do_prlimit(&lims);
+
+ if (!list_empty(&lims))
+ show_limits(&lims);
+
+ if (argc > optind) {
+ /* prlimit [options] COMMAND */
+ execvp(argv[optind], &argv[optind]);
+ errexec(argv[optind]);
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/sys-utils/readprofile.8 b/sys-utils/readprofile.8
new file mode 100644
index 0000000..a37aa0a
--- /dev/null
+++ b/sys-utils/readprofile.8
@@ -0,0 +1,153 @@
+.TH READPROFILE "8" "October 2011" "util-linux" "System Administration"
+.SH NAME
+readprofile \- read kernel profiling information
+.SH SYNOPSIS
+.B readprofile
+[options]
+.SH VERSION
+This manpage documents version 2.0 of the program.
+.SH DESCRIPTION
+.LP
+The
+.B readprofile
+command uses the
+.I /proc/profile
+information to print ascii data on standard output. The output is
+organized in three columns: the first is the number of clock ticks,
+the second is the name of the C function in the kernel where those
+many ticks occurred, and the third is the normalized `load' of the
+procedure, calculated as a ratio between the number of ticks and the
+length of the procedure. The output is filled with blanks to ease
+readability.
+.SH OPTIONS
+.TP
+\fB\-a\fR, \fB\-\-all\fR
+Print all symbols in the mapfile. By default the procedures with
+reported ticks are not printed.
+.TP
+\fB\-b\fR, \fB\-\-histbin\fR
+Print individual histogram-bin counts.
+.TP
+\fB\-i\fR, \fB\-\-info\fR
+Info. This makes
+.B readprofile
+only print the profiling step used by the kernel. The profiling step
+is the resolution of the profiling buffer, and is chosen during
+kernel configuration (through `make config'), or in the kernel's
+command line. If the
+.B \-t
+(terse) switch is used together with
+.B \-i
+only the decimal number is printed.
+.TP
+\fB\-m\fR, \fB\-\-mapfile\fR \fImapfile\fR
+Specify a mapfile, which by default is
+.IR /usr/src/linux/System.map .
+You should specify the map file on cmdline if your current kernel
+isn't the last one you compiled, or if you keep System.map elsewhere.
+If the name of the map file ends with `.gz' it is decompressed on the
+fly.
+.TP
+\fB\-M\fR, \fB\-\-multiplier\fR \fImultiplier\fR
+On some architectures it is possible to alter the frequency at which
+the kernel delivers profiling interrupts to each CPU. This option
+allows you to set the frequency, as a multiplier of the system clock
+frequency, HZ. Linux 2.6.16 dropped multiplier support for most systems.
+This option also resets the profiling buffer, and requires superuser
+privileges.
+.TP
+\fB\-p\fR, \fB\-\-profile\fR \fIpro-file\fR
+Specify a different profiling buffer, which by default is
+.IR /proc/profile .
+Using a different pro-file is useful if you want to `freeze' the
+kernel profiling at some time and read it later. The
+.I /proc/profile
+file can be copied using `cat' or `cp'. There is no more support for
+compressed profile buffers, like in
+.B readprofile-1.1,
+because the program needs to know the size of the buffer in advance.
+.TP
+\fB\-r\fR, \fB\-\-reset\fR
+Reset the profiling buffer. This can only be invoked by root,
+because
+.I /proc/profile
+is readable by everybody but writable only by the superuser.
+However, you can make
+.B readprofile
+set-user-ID 0, in order to reset the buffer without gaining privileges.
+.TP
+\fB\-s, \fB\-\-counters\fR
+Print individual counters within functions.
+.TP
+\fB\-v\fR, \fB\-\-verbose\fR
+Verbose. The output is organized in four columns and filled with
+blanks. The first column is the RAM address of a kernel function,
+the second is the name of the function, the third is the number of
+clock ticks and the last is the normalized load.
+.TP
+\fB\-V\fR, \fB\-\-version\fR
+Display version information and exit.
+.TP
+\fB\-h\fR, \fB\-\-help\fR
+Display help text and exit.
+.SH EXAMPLES
+Browse the profiling buffer ordering by clock ticks:
+.nf
+ readprofile | sort -nr | less
+
+.fi
+Print the 20 most loaded procedures:
+.nf
+ readprofile | sort -nr +2 | head -20
+
+.fi
+Print only filesystem profile:
+.nf
+ readprofile | grep _ext2
+
+.fi
+Look at all the kernel information, with ram addresses:
+.nf
+ readprofile -av | less
+
+.fi
+Browse a `frozen' profile buffer for a non current kernel:
+.nf
+ readprofile -p ~/profile.freeze -m /zImage.map.gz
+
+.fi
+Request profiling at 2kHz per CPU, and reset the profiling buffer:
+.nf
+ sudo readprofile -M 20
+.fi
+.SH BUGS
+.LP
+.B readprofile
+only works with a 1.3.x or newer kernel, because
+.I /proc/profile
+changed in the step from 1.2 to 1.3
+.LP
+This program only works with ELF kernels. The change for a.out
+kernels is trivial, and left as an exercise to the a.out user.
+.LP
+To enable profiling, the kernel must be rebooted, because no
+profiling module is available, and it wouldn't be easy to build. To
+enable profiling, you can specify "profile=2" (or another number) on
+the kernel commandline. The number you specify is the two-exponent
+used as profiling step.
+.LP
+Profiling is disabled when interrupts are inhibited. This means that
+many profiling ticks happen when interrupts are re-enabled. Watch
+out for misleading information.
+.SH FILES
+.nf
+/proc/profile A binary snapshot of the profiling buffer.
+/usr/src/linux/System.map The symbol table for the kernel.
+/usr/src/linux/* The program being profiled :-)
+.fi
+.SH AVAILABILITY
+The readprofile command is part of the util-linux package and is
+available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/readprofile.c b/sys-utils/readprofile.c
new file mode 100644
index 0000000..0350738
--- /dev/null
+++ b/sys-utils/readprofile.c
@@ -0,0 +1,407 @@
+/*
+ * readprofile.c - used to read /proc/profile
+ *
+ * Copyright (C) 1994,1996 Alessandro Rubini (rubini@ipvvis.unipv.it)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/*
+ * 1999-02-22 Arkadiusz Miśkiewicz <misiek@pld.ORG.PL>
+ * - added Native Language Support
+ * 1999-09-01 Stephane Eranian <eranian@cello.hpl.hp.com>
+ * - 64bit clean patch
+ * 3Feb2001 Andrew Morton <andrewm@uow.edu.au>
+ * - -M option to write profile multiplier.
+ * 2001-11-07 Werner Almesberger <wa@almesberger.net>
+ * - byte order auto-detection and -n option
+ * 2001-11-09 Werner Almesberger <wa@almesberger.net>
+ * - skip step size (index 0)
+ * 2002-03-09 John Levon <moz@compsoc.man.ac.uk>
+ * - make maplineno do something
+ * 2002-11-28 Mads Martin Joergensen +
+ * - also try /boot/System.map-`uname -r`
+ * 2003-04-09 Werner Almesberger <wa@almesberger.net>
+ * - fixed off-by eight error and improved heuristics in byte order detection
+ * 2003-08-12 Nikita Danilov <Nikita@Namesys.COM>
+ * - added -s option; example of use:
+ * "readprofile -s -m /boot/System.map-test | grep __d_lookup | sort -n -k3"
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/utsname.h>
+#include <unistd.h>
+
+#include "nls.h"
+#include "xalloc.h"
+#include "closestream.h"
+
+#define S_LEN 128
+
+/* These are the defaults */
+static char defaultmap[]="/boot/System.map";
+static char defaultpro[]="/proc/profile";
+
+static FILE *myopen(char *name, char *mode, int *flag)
+{
+ int len = strlen(name);
+
+ if (!strcmp(name + len - 3, ".gz")) {
+ FILE *res;
+ char *cmdline = xmalloc(len + 6);
+ sprintf(cmdline, "zcat %s", name);
+ res = popen(cmdline, mode);
+ free(cmdline);
+ *flag = 1;
+ return res;
+ }
+ *flag = 0;
+ return fopen(name, mode);
+}
+
+#ifndef BOOT_SYSTEM_MAP
+#define BOOT_SYSTEM_MAP "/boot/System.map-"
+#endif
+
+static char *boot_uname_r_str(void)
+{
+ struct utsname uname_info;
+ char *s;
+ size_t len;
+
+ if (uname(&uname_info))
+ return "";
+ len = strlen(BOOT_SYSTEM_MAP) + strlen(uname_info.release) + 1;
+ s = xmalloc(len);
+ strcpy(s, BOOT_SYSTEM_MAP);
+ strcat(s, uname_info.release);
+ return s;
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ fputs(USAGE_HEADER, out);
+ fprintf(out, _(" %s [options]\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Display kernel profiling information.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fprintf(out,
+ _(" -m, --mapfile <mapfile> (defaults: \"%s\" and\n"), defaultmap);
+ fprintf(out,
+ _(" \"%s\")\n"), boot_uname_r_str());
+ fprintf(out,
+ _(" -p, --profile <pro-file> (default: \"%s\")\n"), defaultpro);
+ fputs(_(" -M, --multiplier <mult> set the profiling multiplier to <mult>\n"), out);
+ fputs(_(" -i, --info print only info about the sampling step\n"), out);
+ fputs(_(" -v, --verbose print verbose data\n"), out);
+ fputs(_(" -a, --all print all symbols, even if count is 0\n"), out);
+ fputs(_(" -b, --histbin print individual histogram-bin counts\n"), out);
+ fputs(_(" -s, --counters print individual counters within functions\n"), out);
+ fputs(_(" -r, --reset reset all the counters (root only)\n"), out);
+ fputs(_(" -n, --no-auto disable byte order auto-detection\n"), out);
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(27));
+ printf(USAGE_MAN_TAIL("readprofile(8)"));
+ exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+ FILE *map;
+ int proFd;
+ char *mapFile, *proFile, *mult = NULL;
+ size_t len = 0, indx = 1;
+ unsigned long long add0 = 0;
+ unsigned int step;
+ unsigned int *buf, total, fn_len;
+ unsigned long long fn_add, next_add; /* current and next address */
+ char fn_name[S_LEN], next_name[S_LEN]; /* current and next name */
+ char mode[8];
+ int c;
+ ssize_t rc;
+ int optAll = 0, optInfo = 0, optReset = 0, optVerbose = 0, optNative = 0;
+ int optBins = 0, optSub = 0;
+ char mapline[S_LEN];
+ int maplineno = 1;
+ int popenMap; /* flag to tell if popen() has been used */
+ int header_printed;
+
+ static const struct option longopts[] = {
+ {"mapfile", required_argument, NULL, 'm'},
+ {"profile", required_argument, NULL, 'p'},
+ {"multiplier", required_argument, NULL, 'M'},
+ {"info", no_argument, NULL, 'i'},
+ {"verbose", no_argument, NULL, 'v'},
+ {"all", no_argument, NULL, 'a'},
+ {"histbin", no_argument, NULL, 'b'},
+ {"counters", no_argument, NULL, 's'},
+ {"reset", no_argument, NULL, 'r'},
+ {"no-auto", no_argument, NULL, 'n'},
+ {"version", no_argument, NULL, 'V'},
+ {"help", no_argument, NULL, 'h'},
+ {NULL, 0, NULL, 0}
+ };
+
+#define next (current^1)
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ proFile = defaultpro;
+ mapFile = defaultmap;
+
+ while ((c = getopt_long(argc, argv, "m:p:M:ivabsrnVh", longopts, NULL)) != -1) {
+ switch (c) {
+ case 'm':
+ mapFile = optarg;
+ break;
+ case 'n':
+ optNative++;
+ break;
+ case 'p':
+ proFile = optarg;
+ break;
+ case 'a':
+ optAll++;
+ break;
+ case 'b':
+ optBins++;
+ break;
+ case 's':
+ optSub++;
+ break;
+ case 'i':
+ optInfo++;
+ break;
+ case 'M':
+ mult = optarg;
+ break;
+ case 'r':
+ optReset++;
+ break;
+ case 'v':
+ optVerbose++;
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ case 'h':
+ usage();
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if (optReset || mult) {
+ int multiplier, fd, to_write;
+
+ /* When writing the multiplier, if the length of the
+ * write is not sizeof(int), the multiplier is not
+ * changed. */
+ if (mult) {
+ multiplier = strtoul(mult, NULL, 10);
+ to_write = sizeof(int);
+ } else {
+ multiplier = 0;
+ /* sth different from sizeof(int) */
+ to_write = 1;
+ }
+ /* try to become root, just in case */
+ ignore_result( setuid(0) );
+ fd = open(defaultpro, O_WRONLY);
+ if (fd < 0)
+ err(EXIT_FAILURE, "%s", defaultpro);
+ if (write(fd, &multiplier, to_write) != to_write)
+ err(EXIT_FAILURE, _("error writing %s"), defaultpro);
+ close(fd);
+ exit(EXIT_SUCCESS);
+ }
+
+ /* Use an fd for the profiling buffer, to skip stdio overhead */
+ if (((proFd = open(proFile, O_RDONLY)) < 0)
+ || ((int)(len = lseek(proFd, 0, SEEK_END)) < 0)
+ || (lseek(proFd, 0, SEEK_SET) < 0))
+ err(EXIT_FAILURE, "%s", proFile);
+
+ buf = xmalloc(len);
+
+ rc = read(proFd, buf, len);
+ if (rc < 0 || (size_t) rc != len)
+ err(EXIT_FAILURE, "%s", proFile);
+ close(proFd);
+
+ if (!optNative) {
+ int entries = len / sizeof(*buf);
+ int big = 0, small = 0;
+ unsigned *p;
+ size_t i;
+
+ for (p = buf + 1; p < buf + entries; p++) {
+ if (*p & ~0U << (sizeof(*buf) * 4))
+ big++;
+ if (*p & ((1 << (sizeof(*buf) * 4)) - 1))
+ small++;
+ }
+ if (big > small) {
+ warnx(_("Assuming reversed byte order. "
+ "Use -n to force native byte order."));
+ for (p = buf; p < buf + entries; p++)
+ for (i = 0; i < sizeof(*buf) / 2; i++) {
+ unsigned char *b = (unsigned char *)p;
+ unsigned char tmp;
+ tmp = b[i];
+ b[i] = b[sizeof(*buf) - i - 1];
+ b[sizeof(*buf) - i - 1] = tmp;
+ }
+ }
+ }
+
+ step = buf[0];
+ if (optInfo) {
+ printf(_("Sampling_step: %u\n"), step);
+ exit(EXIT_SUCCESS);
+ }
+
+ total = 0;
+
+ map = myopen(mapFile, "r", &popenMap);
+ if (map == NULL && mapFile == defaultmap) {
+ mapFile = boot_uname_r_str();
+ map = myopen(mapFile, "r", &popenMap);
+ }
+ if (map == NULL)
+ err(EXIT_FAILURE, "%s", mapFile);
+
+ while (fgets(mapline, S_LEN, map)) {
+ if (sscanf(mapline, "%llx %7[^\n ] %127[^\n ]", &fn_add, mode, fn_name) != 3)
+ errx(EXIT_FAILURE, _("%s(%i): wrong map line"), mapFile,
+ maplineno);
+ /* only elf works like this */
+ if (!strcmp(fn_name, "_stext") || !strcmp(fn_name, "__stext")) {
+ add0 = fn_add;
+ break;
+ }
+ maplineno++;
+ }
+
+ if (!add0)
+ errx(EXIT_FAILURE, _("can't find \"_stext\" in %s"), mapFile);
+
+ /*
+ * Main loop.
+ */
+ while (fgets(mapline, S_LEN, map)) {
+ unsigned int this = 0;
+ int done = 0;
+
+ if (sscanf(mapline, "%llx %7[^\n ] %127[^\n ]", &next_add, mode, next_name) != 3)
+ errx(EXIT_FAILURE, _("%s(%i): wrong map line"), mapFile,
+ maplineno);
+ header_printed = 0;
+
+ /* the kernel only profiles up to _etext */
+ if (!strcmp(next_name, "_etext") ||
+ !strcmp(next_name, "__etext"))
+ done = 1;
+ else {
+ /* ignore any LEADING (before a '[tT]' symbol
+ * is found) Absolute symbols and __init_end
+ * because some architectures place it before
+ * .text section */
+ if ((*mode == 'A' || *mode == '?')
+ && (total == 0 || !strcmp(next_name, "__init_end")))
+ continue;
+ if (*mode != 'T' && *mode != 't' &&
+ *mode != 'W' && *mode != 'w')
+ break; /* only text is profiled */
+ }
+
+ if (indx >= len / sizeof(*buf))
+ errx(EXIT_FAILURE,
+ _("profile address out of range. Wrong map file?"));
+
+ while (indx < (next_add - add0) / step) {
+ if (optBins && (buf[indx] || optAll)) {
+ if (!header_printed) {
+ printf("%s:\n", fn_name);
+ header_printed = 1;
+ }
+ printf("\t%llx\t%u\n", (indx - 1) * step + add0,
+ buf[indx]);
+ }
+ this += buf[indx++];
+ }
+ total += this;
+
+ if (optBins) {
+ if (optVerbose || this > 0)
+ printf(" total\t\t\t\t%u\n", this);
+ } else if ((this || optAll) &&
+ (fn_len = next_add - fn_add) != 0) {
+ if (optVerbose)
+ printf("%016llx %-40s %6u %8.4f\n", fn_add,
+ fn_name, this, this / (double)fn_len);
+ else
+ printf("%6u %-40s %8.4f\n",
+ this, fn_name, this / (double)fn_len);
+ if (optSub) {
+ unsigned long long scan;
+
+ for (scan = (fn_add - add0) / step + 1;
+ scan < (next_add - add0) / step;
+ scan++) {
+ unsigned long long addr;
+ addr = (scan - 1) * step + add0;
+ printf("\t%#llx\t%s+%#llx\t%u\n",
+ addr, fn_name, addr - fn_add,
+ buf[scan]);
+ }
+ }
+ }
+
+ fn_add = next_add;
+ strcpy(fn_name, next_name);
+
+ maplineno++;
+ if (done)
+ break;
+ }
+
+ /* clock ticks, out of kernel text - probably modules */
+ printf("%6u %s\n", buf[len / sizeof(*buf) - 1], "*unknown*");
+
+ /* trailer */
+ if (optVerbose)
+ printf("%016x %-40s %6u %8.4f\n",
+ 0, "total", total, total / (double)(fn_add - add0));
+ else
+ printf("%6u %-40s %8.4f\n",
+ total, _("total"), total / (double)(fn_add - add0));
+
+ popenMap ? pclose(map) : fclose(map);
+ exit(EXIT_SUCCESS);
+}
diff --git a/sys-utils/renice.1 b/sys-utils/renice.1
new file mode 100644
index 0000000..6b735fa
--- /dev/null
+++ b/sys-utils/renice.1
@@ -0,0 +1,119 @@
+.\" Copyright (c) 1983, 1991, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)renice.8 8.1 (Berkeley) 6/9/93
+.\"
+.TH RENICE "1" "July 2014" "util-linux" "User Commands"
+.SH NAME
+renice \- alter priority of running processes
+.SH SYNOPSIS
+.B renice
+.RB [ \-n ]
+.I priority
+.RB [ \-g | \-p | \-u ]
+.IR identifier ...
+.SH DESCRIPTION
+.B renice
+alters the scheduling priority of one or more running processes. The
+first argument is the \fIpriority\fR value to be used.
+The other arguments are interpreted as process IDs (by default),
+process group IDs, user IDs, or user names.
+.BR renice 'ing
+a process group causes all processes in the process group to have their
+scheduling priority altered.
+.BR renice 'ing
+a user causes all processes owned by the user to have their scheduling
+priority altered.
+.PP
+.SH OPTIONS
+.TP
+.BR \-n , " \-\-priority " \fIpriority\fR
+Specify the scheduling
+.I priority
+to be used for the process, process group, or user. Use of the option
+.BR \-n " or " \-\-priority
+is optional, but when used it must be the first argument.
+.TP
+.BR \-g , " \-\-pgrp
+Interpret the succeeding arguments as process group IDs.
+.TP
+.BR \-p , " \-\-pid
+Interpret the succeeding arguments as process IDs
+(the default).
+.TP
+.BR \-u , " \-\-user
+Interpret the succeeding arguments as usernames or UIDs.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH EXAMPLES
+The following command would change the priority of the processes with
+PIDs 987 and 32, plus all processes owned by the users daemon and root:
+.TP
+.B " renice" +1 987 -u daemon root -p 32
+.SH NOTES
+Users other than the superuser may only alter the priority of processes they
+own. Furthermore, an unprivileged user can only
+.I increase
+the ``nice value'' (i.e., choose a lower priority)
+and such changes are irreversible unless (since Linux 2.6.12)
+the user has a suitable ``nice'' resource limit (see
+.BR ulimit (1)
+and
+.BR getrlimit (2)).
+
+The superuser may alter the priority of any process and set the priority to any
+value in the range \-20 to 19.
+Useful priorities are: 19 (the affected processes will run only when nothing
+else in the system wants to), 0 (the ``base'' scheduling priority), anything
+negative (to make things go very fast).
+.SH FILES
+.TP
+.I /etc/passwd
+to map user names to user IDs
+.SH SEE ALSO
+.BR nice (1),
+.BR getpriority (2),
+.BR setpriority (2),
+.BR credentials (7),
+.BR sched (7)
+.SH HISTORY
+The
+.B renice
+command appeared in 4.0BSD.
+.SH AVAILABILITY
+The renice command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/renice.c b/sys-utils/renice.c
new file mode 100644
index 0000000..3ae71f9
--- /dev/null
+++ b/sys-utils/renice.c
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 1983, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+ /* 1999-02-22 Arkadiusz Miśkiewicz <misiek@pld.ORG.PL>
+ * - added Native Language Support
+ */
+
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include <stdio.h>
+#include <pwd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include "nls.h"
+#include "c.h"
+#include "closestream.h"
+
+static const char *idtype[] = {
+ [PRIO_PROCESS] = N_("process ID"),
+ [PRIO_PGRP] = N_("process group ID"),
+ [PRIO_USER] = N_("user ID"),
+};
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ fputs(USAGE_HEADER, out);
+ fprintf(out,
+ _(" %1$s [-n] <priority> [-p|--pid] <pid>...\n"
+ " %1$s [-n] <priority> -g|--pgrp <pgid>...\n"
+ " %1$s [-n] <priority> -u|--user <user>...\n"),
+ program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Alter the priority of running processes.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -n, --priority <num> specify the nice increment value\n"), out);
+ fputs(_(" -p, --pid <id> interpret argument as process ID (default)\n"), out);
+ fputs(_(" -g, --pgrp <id> interpret argument as process group ID\n"), out);
+ fputs(_(" -u, --user <name>|<id> interpret argument as username or user ID\n"), out);
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(24));
+ printf(USAGE_MAN_TAIL("renice(1)"));
+ exit(EXIT_SUCCESS);
+}
+
+static int getprio(const int which, const int who, int *prio)
+{
+ errno = 0;
+ *prio = getpriority(which, who);
+ if (*prio == -1 && errno) {
+ warn(_("failed to get priority for %d (%s)"), who, idtype[which]);
+ return -errno;
+ }
+ return 0;
+}
+
+static int donice(const int which, const int who, const int prio)
+{
+ int oldprio, newprio;
+
+ if (getprio(which, who, &oldprio) != 0)
+ return 1;
+ if (setpriority(which, who, prio) < 0) {
+ warn(_("failed to set priority for %d (%s)"), who, idtype[which]);
+ return 1;
+ }
+ if (getprio(which, who, &newprio) != 0)
+ return 1;
+ printf(_("%d (%s) old priority %d, new priority %d\n"),
+ who, idtype[which], oldprio, newprio);
+ return 0;
+}
+
+/*
+ * Change the priority (the nice value) of processes
+ * or groups of processes which are already running.
+ */
+int main(int argc, char **argv)
+{
+ int which = PRIO_PROCESS;
+ int who = 0, prio, errs = 0;
+ char *endptr = NULL;
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ argc--;
+ argv++;
+
+ if (argc == 1) {
+ if (strcmp(*argv, "-h") == 0 ||
+ strcmp(*argv, "--help") == 0)
+ usage();
+
+ if (strcmp(*argv, "-v") == 0 ||
+ strcmp(*argv, "-V") == 0 ||
+ strcmp(*argv, "--version") == 0) {
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ }
+ }
+
+ if (*argv && (strcmp(*argv, "-n") == 0 || strcmp(*argv, "--priority") == 0)) {
+ argc--;
+ argv++;
+ }
+
+ if (argc < 2) {
+ warnx(_("not enough arguments"));
+ errtryhelp(EXIT_FAILURE);
+ }
+
+ prio = strtol(*argv, &endptr, 10);
+ if (*endptr) {
+ warnx(_("invalid priority '%s'"), *argv);
+ errtryhelp(EXIT_FAILURE);
+ }
+ argc--;
+ argv++;
+
+ for (; argc > 0; argc--, argv++) {
+ if (strcmp(*argv, "-g") == 0 || strcmp(*argv, "--pgrp") == 0) {
+ which = PRIO_PGRP;
+ continue;
+ }
+ if (strcmp(*argv, "-u") == 0 || strcmp(*argv, "--user") == 0) {
+ which = PRIO_USER;
+ continue;
+ }
+ if (strcmp(*argv, "-p") == 0 || strcmp(*argv, "--pid") == 0) {
+ which = PRIO_PROCESS;
+ continue;
+ }
+ if (which == PRIO_USER) {
+ struct passwd *pwd = getpwnam(*argv);
+
+ if (pwd != NULL)
+ who = pwd->pw_uid;
+ else
+ who = strtol(*argv, &endptr, 10);
+ if (who < 0 || *endptr) {
+ warnx(_("unknown user %s"), *argv);
+ errs = 1;
+ continue;
+ }
+ } else {
+ who = strtol(*argv, &endptr, 10);
+ if (who < 0 || *endptr) {
+ /* TRANSLATORS: The first %s is one of the above
+ * three ID names. Read: "bad value for %s: %s" */
+ warnx(_("bad %s value: %s"), idtype[which], *argv);
+ errs = 1;
+ continue;
+ }
+ }
+ errs |= donice(which, who, prio);
+ }
+ return errs != 0 ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/sys-utils/rfkill.8 b/sys-utils/rfkill.8
new file mode 100644
index 0000000..9eff913
--- /dev/null
+++ b/sys-utils/rfkill.8
@@ -0,0 +1,120 @@
+.\" -*- nroff -*-
+.TH RFKILL "8" "2017-07-06" "util-linux" "System Administration"
+.SH NAME
+rfkill \- tool for enabling and disabling wireless devices
+.SH SYNOPSIS
+.B rfkill
+.RI [ options ]
+.RI [ command ]
+.RI [ id|type \ ...]
+
+.SH DESCRIPTION
+.B rfkill
+lists, enabling and disabling wireless devices.
+
+The command "list" output format is deprecated and maintained for backward
+compatibility only. The new output format is the default when no command is
+specified or when the option \fB\-\-output\fR is used.
+
+The default output is subject to change. So whenever possible, you should
+avoid using default outputs in your scripts. Always explicitly define expected
+columns by using the \fB\-\-output\fR option together with a columns list in
+environments where a stable output is required.
+
+
+.SH OPTIONS
+.TP
+\fB\-J\fR, \fB\-\-json\fR
+Use JSON output format.
+.TP
+\fB\-n\fR, \fB\-\-noheadings\fR
+Do not print a header line.
+.TP
+\fB\-o\fR, \fB\-\-output\fR
+Specify which output columns to print. Use \-\-help to get a list of
+available columns.
+.TP
+.B \-\-output\-all
+Output all available columns.
+.TP
+\fB\-r\fR, \fB\-\-raw\fR
+Use the raw output format.
+.TP
+.B \-\-help
+Display help text and exit.
+.TP
+.B \-\-version
+Display version information and exit.
+.SH COMMANDS
+.TP
+.B help
+Display help text and exit.
+.TP
+.B event
+Listen for rfkill events and display them on stdout.
+.TP
+\fBlist \fR[\fIid\fR|\fItype\fR ...]
+List the current state of all available devices. The command output format is deprecated, see the section DESCRIPTION.
+It is a good idea to check with
+.B list
+command
+.IR id " or " type
+scope is appropriate before setting
+.BR block " or " unblock .
+Special
+.I all
+type string will match everything. Use of multiple
+.IR id " or " type
+arguments is supported.
+.TP
+\fBblock \fBid\fR|\fBtype\fR [...]
+Disable the corresponding device.
+.TP
+\fBunblock \fBid\fR|\fBtype\fR [...]
+Enable the corresponding device. If the device is hard\-blocked, for example
+via a hardware switch, it will remain unavailable though it is now
+soft\-unblocked.
+.SH EXAMPLES
+rfkill --output ID,TYPE
+.br
+rfkill block all
+.br
+rfkill unblock wlan
+.br
+rfkill block bluetooth uwb wimax wwan gps fm nfc
+.SH AUTHORS
+.B rfkill
+was originally written by
+.MT johannes@\:sipsolutions.\:net
+Johannes Berg
+.ME
+and
+.MT marcel@\:holtmann.\:org
+Marcel Holtmann
+.ME .
+The code has been later modified by
+.MT kerolasa@\:iki.\:fi
+Sami Kerola
+.ME
+and
+.MT kzak@\:redhat.\:com
+Karel Zak
+.ME
+for util-linux project.
+.PP
+This manual page was written by
+.MT linux@\:youmustbejoking.\:demon.\:co.uk
+Darren Salt
+.ME ,
+for the Debian project (and may be used by others).
+.SH "SEE ALSO"
+.BR powertop (8),
+.BR systemd-rfkill (8),
+.UR https://\:git.\:kernel.\:org/\:pub/\:scm/\:linux/\:kernel/\:git/\:torvalds/\:linux.git/\:tree/\:Documentation/\:rfkill.txt
+Linux kernel documentation
+.UE
+.SH AVAILABILITY
+The rfkill command is part of the util\-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util\-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/rfkill.c b/sys-utils/rfkill.c
new file mode 100644
index 0000000..a93e8ba
--- /dev/null
+++ b/sys-utils/rfkill.c
@@ -0,0 +1,751 @@
+/*
+ * /dev/rfkill userspace tool
+ *
+ * Copyright 2009 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2009 Marcel Holtmann <marcel@holtmann.org>
+ * Copyright 2009 Tim Gardner <tim.gardner@canonical.com>
+ * Copyright 2017 Sami Kerola <kerolasa@iki.fi>
+ * Copyright (C) 2017 Karel Zak <kzak@redhat.com>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <ctype.h>
+#include <getopt.h>
+#include <libsmartcols.h>
+#include <linux/rfkill.h>
+#include <poll.h>
+#include <sys/syslog.h>
+#include <sys/time.h>
+
+#include "c.h"
+#include "closestream.h"
+#include "nls.h"
+#include "optutils.h"
+#include "pathnames.h"
+#include "strutils.h"
+#include "timeutils.h"
+#include "widechar.h"
+#include "xalloc.h"
+
+
+/*
+ * NFC supported by kernel since v3.10 (year 2013); FM and another types are from
+ * year 2009 (2.6.33) or older.
+ */
+#ifndef RFKILL_TYPE_NFC
+# ifndef RFKILL_TYPE_FM
+# define RFKILL_TYPE_FM RFKILL_TYPE_GPS + 1
+# endif
+# define RFKILL_TYPE_NFC RFKILL_TYPE_FM + 1
+# undef NUM_RFKILL_TYPES
+# define NUM_RFKILL_TYPES RFKILL_TYPE_NFC + 1
+#endif
+
+struct rfkill_type_str {
+ enum rfkill_type type; /* ID */
+ const char *name; /* generic name */
+ const char *desc; /* human readable name */
+};
+
+static const struct rfkill_type_str rfkill_type_strings[] = {
+ { .type = RFKILL_TYPE_ALL, .name = "all" },
+ { .type = RFKILL_TYPE_WLAN, .name = "wlan", .desc = "Wireless LAN" },
+ { .type = RFKILL_TYPE_WLAN, .name = "wifi" }, /* alias */
+ { .type = RFKILL_TYPE_BLUETOOTH, .name = "bluetooth", .desc = "Bluetooth" },
+ { .type = RFKILL_TYPE_UWB, .name = "uwb", .desc = "Ultra-Wideband" },
+ { .type = RFKILL_TYPE_UWB, .name = "ultrawideband" }, /* alias */
+ { .type = RFKILL_TYPE_WIMAX, .name = "wimax", .desc = "WiMAX" },
+ { .type = RFKILL_TYPE_WWAN, .name = "wwan", .desc = "Wireless WAN" },
+ { .type = RFKILL_TYPE_GPS, .name = "gps", .desc = "GPS" },
+ { .type = RFKILL_TYPE_FM, .name = "fm", .desc = "FM" },
+ { .type = RFKILL_TYPE_NFC, .name = "nfc", .desc = "NFC" },
+ { .type = NUM_RFKILL_TYPES, .name = NULL }
+};
+
+struct rfkill_id {
+ union {
+ enum rfkill_type type;
+ uint32_t index;
+ };
+ enum {
+ RFKILL_IS_INVALID,
+ RFKILL_IS_TYPE,
+ RFKILL_IS_INDEX,
+ RFKILL_IS_ALL
+ } result;
+};
+
+/* supported actions */
+enum {
+ ACT_LIST,
+ ACT_HELP,
+ ACT_EVENT,
+ ACT_BLOCK,
+ ACT_UNBLOCK,
+
+ ACT_LIST_OLD
+};
+
+static char *rfkill_actions[] = {
+ [ACT_LIST] = "list",
+ [ACT_HELP] = "help",
+ [ACT_EVENT] = "event",
+ [ACT_BLOCK] = "block",
+ [ACT_UNBLOCK] = "unblock"
+};
+
+/* column IDs */
+enum {
+ COL_DEVICE,
+ COL_ID,
+ COL_TYPE,
+ COL_DESC,
+ COL_SOFT,
+ COL_HARD
+};
+
+/* column names */
+struct colinfo {
+ const char *name; /* header */
+ double whint; /* width hint (N < 1 is in percent of termwidth) */
+ int flags; /* SCOLS_FL_* */
+ const char *help;
+};
+
+/* columns descriptions */
+static const struct colinfo infos[] = {
+ [COL_DEVICE] = {"DEVICE", 0, 0, N_("kernel device name")},
+ [COL_ID] = {"ID", 2, SCOLS_FL_RIGHT, N_("device identifier value")},
+ [COL_TYPE] = {"TYPE", 0, 0, N_("device type name that can be used as identifier")},
+ [COL_DESC] = {"TYPE-DESC", 0, 0, N_("device type description")},
+ [COL_SOFT] = {"SOFT", 0, SCOLS_FL_RIGHT, N_("status of software block")},
+ [COL_HARD] = {"HARD", 0, SCOLS_FL_RIGHT, N_("status of hardware block")}
+};
+
+static int columns[ARRAY_SIZE(infos) * 2];
+static size_t ncolumns;
+
+struct control {
+ struct libscols_table *tb;
+ unsigned int
+ json:1,
+ no_headings:1,
+ raw:1;
+};
+
+static int column_name_to_id(const char *name, size_t namesz)
+{
+ size_t i;
+
+ assert(name);
+
+ for (i = 0; i < ARRAY_SIZE(infos); i++) {
+ const char *cn = infos[i].name;
+
+ if (!strncasecmp(name, cn, namesz) && !*(cn + namesz))
+ return i;
+ }
+ warnx(_("unknown column: %s"), name);
+ return -1;
+}
+
+static int get_column_id(size_t num)
+{
+ assert(num < ncolumns);
+ assert(columns[num] < (int)ARRAY_SIZE(infos));
+ return columns[num];
+}
+
+static const struct colinfo *get_column_info(int num)
+{
+ return &infos[get_column_id(num)];
+}
+
+static int string_to_action(const char *str)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(rfkill_actions); i++)
+ if (strcmp(str, rfkill_actions[i]) == 0)
+ return i;
+
+ return -EINVAL;
+}
+
+static int rfkill_ro_open(int nonblock)
+{
+ int fd;
+
+ fd = open(_PATH_DEV_RFKILL, O_RDONLY);
+ if (fd < 0) {
+ warn(_("cannot open %s"), _PATH_DEV_RFKILL);
+ return -errno;
+ }
+
+ if (nonblock && fcntl(fd, F_SETFL, O_NONBLOCK) < 0) {
+ warn(_("cannot set non-blocking %s"), _PATH_DEV_RFKILL);
+ close(fd);
+ return -errno;
+ }
+
+ return fd;
+}
+
+/* returns: 0 success, 1 read again, < 0 error */
+static int rfkill_read_event(int fd, struct rfkill_event *event)
+{
+ ssize_t len = read(fd, event, sizeof(*event));
+
+ if (len < 0) {
+ if (errno == EAGAIN)
+ return 1;
+ warn(_("cannot read %s"), _PATH_DEV_RFKILL);
+ return -errno;
+ }
+
+ if (len < RFKILL_EVENT_SIZE_V1) {
+ warnx(_("wrong size of rfkill event: %zu < %d"), len, RFKILL_EVENT_SIZE_V1);
+ return 1;
+ }
+
+ return 0;
+}
+
+
+static int rfkill_event(void)
+{
+ struct rfkill_event event;
+ struct timeval tv;
+ char date_buf[ISO_BUFSIZ];
+ struct pollfd p;
+ int fd, n;
+
+ fd = rfkill_ro_open(0);
+ if (fd < 0)
+ return -errno;
+
+ memset(&p, 0, sizeof(p));
+ p.fd = fd;
+ p.events = POLLIN | POLLHUP;
+
+ /* interrupted by signal only */
+ while (1) {
+ int rc = 1; /* recover-able error */
+
+ n = poll(&p, 1, -1);
+ if (n < 0) {
+ warn(_("failed to poll %s"), _PATH_DEV_RFKILL);
+ goto failed;
+ }
+
+ if (n)
+ rc = rfkill_read_event(fd, &event);
+ if (rc < 0)
+ goto failed;
+ if (rc)
+ continue;
+
+ gettimeofday(&tv, NULL);
+ strtimeval_iso(&tv, ISO_TIMESTAMP_COMMA, date_buf,
+ sizeof(date_buf));
+ printf("%s: idx %u type %u op %u soft %u hard %u\n",
+ date_buf,
+ event.idx, event.type, event.op, event.soft, event.hard);
+ fflush(stdout);
+ }
+
+failed:
+ close(fd);
+ return -1;
+}
+
+static const char *get_sys_attr(uint32_t idx, const char *attr)
+{
+ static char name[128];
+ char path[PATH_MAX];
+ FILE *f;
+ char *p;
+
+ snprintf(path, sizeof(path), _PATH_SYS_RFKILL "/rfkill%u/%s", idx, attr);
+ f = fopen(path, "r");
+ if (!f)
+ goto done;
+ if (!fgets(name, sizeof(name), f))
+ goto done;
+ p = strchr(name, '\n');
+ if (p)
+ *p = '\0';
+done:
+ if (f)
+ fclose(f);
+ return name;
+}
+
+static struct rfkill_id rfkill_id_to_type(const char *s)
+{
+ const struct rfkill_type_str *p;
+ struct rfkill_id ret;
+
+ if (islower(*s)) {
+ for (p = rfkill_type_strings; p->name != NULL; p++) {
+ if (!strcmp(s, p->name)) {
+ ret.type = p->type;
+ if (!strcmp(s, "all"))
+ ret.result = RFKILL_IS_ALL;
+ else
+ ret.result = RFKILL_IS_TYPE;
+ return ret;
+ }
+ }
+ } else if (isdigit(*s)) {
+ /* assume a numeric character implies an index. */
+ char filename[64];
+
+ ret.index = strtou32_or_err(s, _("invalid identifier"));
+ snprintf(filename, sizeof(filename) - 1,
+ _PATH_SYS_RFKILL "/rfkill%" PRIu32 "/name", ret.index);
+ if (access(filename, F_OK) == 0)
+ ret.result = RFKILL_IS_INDEX;
+ else
+ ret.result = RFKILL_IS_INVALID;
+ return ret;
+ }
+
+ ret.result = RFKILL_IS_INVALID;
+ return ret;
+}
+
+static const char *rfkill_type_to_desc(enum rfkill_type type)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(rfkill_type_strings); i++) {
+ if (type == rfkill_type_strings[i].type)
+ return rfkill_type_strings[i].desc;
+ }
+
+ return NULL;
+}
+
+
+static int event_match(struct rfkill_event *event, struct rfkill_id *id)
+{
+ if (event->op != RFKILL_OP_ADD)
+ return 0;
+
+ /* filter out unwanted results */
+ switch (id->result) {
+ case RFKILL_IS_TYPE:
+ if (event->type != id->type)
+ return 0;
+ break;
+ case RFKILL_IS_INDEX:
+ if (event->idx != id->index)
+ return 0;
+ break;
+ case RFKILL_IS_ALL:
+ break;
+ default:
+ abort();
+ }
+
+ return 1;
+}
+
+static void fill_table_row(struct libscols_table *tb, struct rfkill_event *event)
+{
+ static struct libscols_line *ln;
+ size_t i;
+
+ assert(tb);
+
+ ln = scols_table_new_line(tb, NULL);
+ if (!ln) {
+ errno = ENOMEM;
+ errx(EXIT_FAILURE, _("failed to allocate output line"));
+ }
+
+ for (i = 0; i < (size_t)ncolumns; i++) {
+ char *str = NULL;
+ switch (get_column_id(i)) {
+ case COL_DEVICE:
+ str = xstrdup(get_sys_attr(event->idx, "name"));
+ break;
+ case COL_ID:
+ xasprintf(&str, "%" PRIu32, event->idx);
+ break;
+ case COL_TYPE:
+ str = xstrdup(get_sys_attr(event->idx, "type"));
+ break;
+ case COL_DESC:
+ str = xstrdup(rfkill_type_to_desc(event->type));
+ break;
+ case COL_SOFT:
+ str = xstrdup(event->soft ? _("blocked") : _("unblocked"));
+ break;
+ case COL_HARD:
+ str = xstrdup(event->hard ? _("blocked") : _("unblocked"));
+ break;
+ default:
+ abort();
+ }
+ if (str && scols_line_refer_data(ln, i, str))
+ errx(EXIT_FAILURE, _("failed to add output data"));
+ }
+}
+
+static int rfkill_list_old(const char *param)
+{
+ struct rfkill_id id = { .result = RFKILL_IS_ALL };
+ struct rfkill_event event;
+ int fd, rc = 0;
+
+ if (param) {
+ id = rfkill_id_to_type(param);
+ if (id.result == RFKILL_IS_INVALID) {
+ warnx(_("invalid identifier: %s"), param);
+ return -EINVAL;
+ }
+ }
+
+ fd = rfkill_ro_open(1);
+
+ while (1) {
+ rc = rfkill_read_event(fd, &event);
+ if (rc < 0)
+ break;
+ if (rc == 1 && errno == EAGAIN) {
+ rc = 0; /* done */
+ break;
+ }
+ if (rc == 0 && event_match(&event, &id)) {
+ char *name = xstrdup(get_sys_attr(event.idx, "name")),
+ *type = xstrdup(rfkill_type_to_desc(event.type));
+
+ if (!type)
+ type = xstrdup(get_sys_attr(event.idx, "type"));
+
+ printf("%u: %s: %s\n", event.idx, name, type);
+ printf("\tSoft blocked: %s\n", event.soft ? "yes" : "no");
+ printf("\tHard blocked: %s\n", event.hard ? "yes" : "no");
+
+ free(name);
+ free(type);
+ }
+ }
+ close(fd);
+ return rc;
+}
+
+static void rfkill_list_init(struct control *ctrl)
+{
+ size_t i;
+
+ scols_init_debug(0);
+
+ ctrl->tb = scols_new_table();
+ if (!ctrl->tb)
+ err(EXIT_FAILURE, _("failed to allocate output table"));
+
+ scols_table_enable_json(ctrl->tb, ctrl->json);
+ scols_table_enable_noheadings(ctrl->tb, ctrl->no_headings);
+ scols_table_enable_raw(ctrl->tb, ctrl->raw);
+
+ for (i = 0; i < (size_t) ncolumns; i++) {
+ const struct colinfo *col = get_column_info(i);
+ struct libscols_column *cl;
+
+ cl = scols_table_new_column(ctrl->tb, col->name, col->whint, col->flags);
+ if (!cl)
+ err(EXIT_FAILURE, _("failed to allocate output column"));
+ if (ctrl->json) {
+ int id = get_column_id(i);
+ if (id == COL_ID)
+ scols_column_set_json_type(cl, SCOLS_JSON_NUMBER);
+ }
+ }
+}
+
+static int rfkill_list_fill(struct control const *ctrl, const char *param)
+{
+ struct rfkill_id id = { .result = RFKILL_IS_ALL };
+ struct rfkill_event event;
+ int fd, rc = 0;
+
+ if (param) {
+ id = rfkill_id_to_type(param);
+ if (id.result == RFKILL_IS_INVALID) {
+ warnx(_("invalid identifier: %s"), param);
+ return -EINVAL;
+ }
+ }
+
+ fd = rfkill_ro_open(1);
+
+ while (1) {
+ rc = rfkill_read_event(fd, &event);
+ if (rc < 0)
+ break;
+ if (rc == 1 && errno == EAGAIN) {
+ rc = 0; /* done */
+ break;
+ }
+ if (rc == 0 && event_match(&event, &id))
+ fill_table_row(ctrl->tb, &event);
+ }
+ close(fd);
+ return rc;
+}
+
+static void rfkill_list_output(struct control const *ctrl)
+{
+ scols_print_table(ctrl->tb);
+ scols_unref_table(ctrl->tb);
+}
+
+static int rfkill_block(uint8_t block, const char *param)
+{
+ struct rfkill_id id;
+ struct rfkill_event event = {
+ .op = RFKILL_OP_CHANGE_ALL,
+ .soft = block,
+ 0
+ };
+ ssize_t len;
+ int fd;
+ char *message = NULL;
+
+ id = rfkill_id_to_type(param);
+
+ switch (id.result) {
+ case RFKILL_IS_INVALID:
+ warnx(_("invalid identifier: %s"), param);
+ return -1;
+ case RFKILL_IS_TYPE:
+ event.type = id.type;
+ xasprintf(&message, "type %s", param);
+ break;
+ case RFKILL_IS_INDEX:
+ event.op = RFKILL_OP_CHANGE;
+ event.idx = id.index;
+ xasprintf(&message, "id %d", id.index);
+ break;
+ case RFKILL_IS_ALL:
+ message = xstrdup("all");
+ break;
+ default:
+ abort();
+ }
+
+ fd = open(_PATH_DEV_RFKILL, O_RDWR);
+ if (fd < 0) {
+ warn(_("cannot open %s"), _PATH_DEV_RFKILL);
+ free(message);
+ return -errno;
+ }
+
+ len = write(fd, &event, sizeof(event));
+ if (len < 0)
+ warn(_("write failed: %s"), _PATH_DEV_RFKILL);
+ else {
+ openlog("rfkill", 0, LOG_USER);
+ syslog(LOG_NOTICE, "%s set for %s", block ? "block" : "unblock", message);
+ closelog();
+ }
+ free(message);
+ return close(fd);
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ size_t i;
+
+ fputs(USAGE_HEADER, stdout);
+ fprintf(stdout, _(" %s [options] command [identifier ...]\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, stdout);
+ fputs(_("Tool for enabling and disabling wireless devices.\n"), stdout);
+
+ fputs(USAGE_OPTIONS, stdout);
+ fputs(_(" -J, --json use JSON output format\n"), stdout);
+ fputs(_(" -n, --noheadings don't print headings\n"), stdout);
+ fputs(_(" -o, --output <list> define which output columns to use\n"), stdout);
+ fputs(_(" --output-all output all columns\n"), stdout);
+ fputs(_(" -r, --raw use the raw output format\n"), stdout);
+
+ fputs(USAGE_SEPARATOR, stdout);
+ printf(USAGE_HELP_OPTIONS(24));
+
+ fputs(USAGE_COLUMNS, stdout);
+ for (i = 0; i < ARRAY_SIZE(infos); i++)
+ fprintf(stdout, " %-10s %s\n", infos[i].name, _(infos[i].help));
+
+ fputs(USAGE_COMMANDS, stdout);
+
+ /*
+ * TRANSLATORS: command names should not be translated, explaining
+ * them as additional field after identifier is fine, for example
+ *
+ * list [identifier] (lista [tarkenne])
+ */
+ fputs(_(" help\n"), stdout);
+ fputs(_(" event\n"), stdout);
+ fputs(_(" list [identifier]\n"), stdout);
+ fputs(_(" block identifier\n"), stdout);
+ fputs(_(" unblock identifier\n"), stdout);
+
+ fprintf(stdout, USAGE_MAN_TAIL("rfkill(8)"));
+ exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+ struct control ctrl = { 0 };
+ int c, act = ACT_LIST, list_all = 0;
+ char *outarg = NULL;
+ enum {
+ OPT_LIST_TYPES = CHAR_MAX + 1
+ };
+ static const struct option longopts[] = {
+ { "json", no_argument, NULL, 'J' },
+ { "noheadings", no_argument, NULL, 'n' },
+ { "output", required_argument, NULL, 'o' },
+ { "output-all", no_argument, NULL, OPT_LIST_TYPES },
+ { "raw", no_argument, NULL, 'r' },
+ { "version", no_argument, NULL, 'V' },
+ { "help", no_argument, NULL, 'h' },
+ { NULL, 0, NULL, 0 }
+ };
+ static const ul_excl_t excl[] = {
+ {'J', 'r'},
+ {0}
+ };
+ int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+ int ret = 0;
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ while ((c = getopt_long(argc, argv, "Jno:rVh", longopts, NULL)) != -1) {
+ err_exclusive_options(c, longopts, excl, excl_st);
+ switch (c) {
+ case 'J':
+ ctrl.json = 1;
+ break;
+ case 'n':
+ ctrl.no_headings = 1;
+ break;
+ case 'o':
+ outarg = optarg;
+ break;
+ case OPT_LIST_TYPES:
+ list_all = 1;
+ break;
+ case 'r':
+ ctrl.raw = 1;
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ case 'h':
+ usage();
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+ argc -= optind;
+ argv += optind;
+
+ if (argc > 0) {
+ act = string_to_action(*argv);
+ if (act < 0)
+ errtryhelp(EXIT_FAILURE);
+ argv++;
+ argc--;
+
+ /*
+ * For backward compatibility we use old output format if
+ * "list" explicitly specified and--output not defined.
+ */
+ if (!outarg && act == ACT_LIST)
+ act = ACT_LIST_OLD;
+ }
+
+ switch (act) {
+ case ACT_LIST_OLD:
+ /* Deprecated in favour of ACT_LIST */
+ if (!argc)
+ ret |= rfkill_list_old(NULL); /* ALL */
+ else while (argc) {
+ ret |= rfkill_list_old(*argv);
+ argc--;
+ argv++;
+ }
+ break;
+
+ case ACT_LIST:
+ columns[ncolumns++] = COL_ID;
+ columns[ncolumns++] = COL_TYPE;
+ columns[ncolumns++] = COL_DEVICE;
+ if (list_all)
+ columns[ncolumns++] = COL_DESC;
+ columns[ncolumns++] = COL_SOFT;
+ columns[ncolumns++] = COL_HARD;
+
+ if (outarg
+ && string_add_to_idarray(outarg, columns,
+ ARRAY_SIZE(columns), &ncolumns,
+ column_name_to_id) < 0)
+ return EXIT_FAILURE;
+
+ rfkill_list_init(&ctrl);
+ if (!argc)
+ ret |= rfkill_list_fill(&ctrl, NULL); /* ALL */
+ else while (argc) {
+ ret |= rfkill_list_fill(&ctrl, *argv);
+ argc--;
+ argv++;
+ }
+ rfkill_list_output(&ctrl);
+ break;
+
+ case ACT_EVENT:
+ ret = rfkill_event();
+ break;
+
+ case ACT_HELP:
+ usage();
+ break;
+
+ case ACT_BLOCK:
+ while (argc) {
+ ret |= rfkill_block(1, *argv);
+ argc--;
+ argv++;
+ }
+ break;
+
+ case ACT_UNBLOCK:
+ while (argc) {
+ ret |= rfkill_block(0, *argv);
+ argv++;
+ argc--;
+ }
+ break;
+ }
+
+ return ret ? EXIT_FAILURE : EXIT_SUCCESS;
+}
diff --git a/sys-utils/rtcwake.8 b/sys-utils/rtcwake.8
new file mode 100644
index 0000000..4a5f8d7
--- /dev/null
+++ b/sys-utils/rtcwake.8
@@ -0,0 +1,189 @@
+.\" Copyright (c) 2007, SUSE LINUX Products GmbH
+.\" Bernhard Walle <bwalle@suse.de>
+.\"
+.\" This program is free software; you can redistribute it and/or
+.\" modify it under the terms of the GNU General Public License
+.\" as published by the Free Software Foundation; either version 2
+.\" of the License, or (at your option) any later version.
+.\"
+.\" This program is distributed in the hope that it will be useful,
+.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
+.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+.\" GNU General Public License for more details.
+.\"
+.\" You should have received a copy of the GNU General Public License
+.\" along with this program; if not, write to the Free Software
+.\" Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+.\" 02110-1301, USA.
+.\"
+.TH RTCWAKE 8 "June 2015" "util-linux" "System Administration"
+.SH NAME
+rtcwake \- enter a system sleep state until specified wakeup time
+.SH SYNOPSIS
+.B rtcwake
+[options]
+.RB [ \-d
+.IR device ]
+.RB [ \-m
+.IR standby_mode ]
+.RB { "\-s \fIseconds\fP" | "\-t \fItime_t\fP" }
+.SH DESCRIPTION
+This program is used to enter a system sleep state and to automatically
+wake from it at a specified time.
+.PP
+This uses cross-platform Linux interfaces to enter a system sleep state, and
+leave it no later than a specified time. It uses any RTC framework driver that
+supports standard driver model wakeup flags.
+.PP
+This is normally used like the old \fBapmsleep\fP utility, to wake from a suspend
+state like ACPI S1 (standby) or S3 (suspend-to-RAM). Most platforms can
+implement those without analogues of BIOS, APM, or ACPI.
+.PP
+On some systems, this can also be used like \fBnvram-wakeup\fP, waking from states
+like ACPI S4 (suspend to disk). Not all systems have persistent media that are
+appropriate for such suspend modes.
+.PP
+Note that alarm functionality depends on hardware; not every RTC is able to setup
+an alarm up to 24 hours in the future.
+.PP
+The suspend setup maybe be interrupted by active hardware; for example wireless USB
+input devices that continue to send events for some fraction of a second after the
+return key is pressed.
+.B rtcwake
+tries to avoid this problem and it waits to terminal to settle down before
+entering a system sleep.
+
+.SH OPTIONS
+.TP
+.BR \-A , " \-\-adjfile " \fIfile
+Specify an alternative path to the adjust file.
+.TP
+.BR \-a , " \-\-auto"
+Read the clock mode (whether the hardware clock is set to UTC or local time)
+from the \fIadjtime\fP file, where
+.BR hwclock (8)
+stores that information. This is the default.
+.TP
+.BR \-\-date " \fItimestamp"
+Set the wakeup time to the value of the timestamp. Format of the
+timestamp can be any of the following:
+.TS
+tab(|);
+l2 l.
+YYYYMMDDhhmmss
+YYYY-MM-DD hh:mm:ss
+YYYY-MM-DD hh:mm|(seconds will be set to 00)
+YYYY-MM-DD|(time will be set to 00:00:00)
+hh:mm:ss|(date will be set to today)
+hh:mm|(date will be set to today, seconds to 00)
+tomorrow|(time is set to 00:00:00)
++5min
+.TE
+.TP
+.BR \-d , " \-\-device " \fIdevice
+Use the specified \fIdevice\fP instead of \fBrtc0\fP as realtime clock.
+This option is only relevant if your system has more than one RTC.
+You may specify \fBrtc1\fP, \fBrtc2\fP, ... here.
+.TP
+.BR \-l , " \-\-local"
+Assume that the hardware clock is set to local time, regardless of the
+contents of the \fIadjtime\fP file.
+.TP
+.B \-\-list\-modes
+List available \-\-mode option arguments.
+.TP
+.BR \-m , " \-\-mode " \fImode
+Go into the given standby state. Valid values for \fImode\fP are:
+.RS
+.TP
+.B standby
+ACPI state S1. This state offers minimal, though real, power savings, while
+providing a very low-latency transition back to a working system. This is the
+default mode.
+.TP
+.B freeze
+The processes are frozen, all the devices are suspended and all the processors
+idled. This state is a general state that does not need any platform-specific
+support, but it saves less power than Suspend-to-RAM, because the system is
+still in a running state. (Available since Linux 3.9.)
+.TP
+.B mem
+ACPI state S3 (Suspend-to-RAM). This state offers significant power savings as
+everything in the system is put into a low-power state, except for memory,
+which is placed in self-refresh mode to retain its contents.
+.TP
+.B disk
+ACPI state S4 (Suspend-to-disk). This state offers the greatest power savings,
+and can be used even in the absence of low-level platform support for power
+management. This state operates similarly to Suspend-to-RAM, but includes a
+final step of writing memory contents to disk.
+.TP
+.B off
+ACPI state S5 (Poweroff). This is done by calling '/sbin/shutdown'.
+Not officially supported by ACPI, but it usually works.
+.TP
+.B no
+Don't suspend, only set the RTC wakeup time.
+.TP
+.B on
+Don't suspend, but read the RTC device until an alarm time appears.
+This mode is useful for debugging.
+.TP
+.B disable
+Disable a previously set alarm.
+.TP
+.B show
+Print alarm information in format: "alarm: off|on <time>".
+The time is in ctime() output format, e.g. "alarm: on Tue Nov 16 04:48:45 2010".
+.RE
+.TP
+.BR \-n , " \-\-dry-run"
+This option does everything apart from actually setting up the alarm,
+suspending the system, or waiting for the alarm.
+.TP
+.BR \-s , " \-\-seconds " \fIseconds
+Set the wakeup time to \fIseconds\fP in the future from now.
+.TP
+.BR \-t , " \-\-time " \fItime_t
+Set the wakeup time to the absolute time \fItime_t\fP. \fItime_t\fP
+is the time in seconds since 1970-01-01, 00:00 UTC. Use the
+.BR date (1)
+tool to convert between human-readable time and \fItime_t\fP.
+.TP
+.BR \-u , " \-\-utc"
+Assume that the hardware clock is set to UTC (Universal Time Coordinated),
+regardless of the contents of the \fIadjtime\fP file.
+.TP
+.BR \-v , " \-\-verbose"
+Be verbose.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH NOTES
+Some PC systems can't currently exit sleep states such as \fBmem\fP
+using only the kernel code accessed by this driver.
+They need help from userspace code to make the framebuffer work again.
+.SH FILES
+.I /etc/adjtime
+.SH HISTORY
+The program was posted several times on LKML and other lists
+before appearing in kernel commit message for Linux 2.6 in the GIT
+commit 87ac84f42a7a580d0dd72ae31d6a5eb4bfe04c6d.
+.SH AUTHORS
+The program was written by David Brownell <dbrownell@users.sourceforge.net>
+and improved by Bernhard Walle <bwalle@suse.de>.
+.SH COPYRIGHT
+This is free software. You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+There is NO WARRANTY, to the extent permitted by law.
+.SH "SEE ALSO"
+.BR hwclock (8),
+.BR date (1)
+.SH AVAILABILITY
+The rtcwake command is part of the util-linux package and is available from the
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/rtcwake.8.in b/sys-utils/rtcwake.8.in
new file mode 100644
index 0000000..167f7f9
--- /dev/null
+++ b/sys-utils/rtcwake.8.in
@@ -0,0 +1,189 @@
+.\" Copyright (c) 2007, SUSE LINUX Products GmbH
+.\" Bernhard Walle <bwalle@suse.de>
+.\"
+.\" This program is free software; you can redistribute it and/or
+.\" modify it under the terms of the GNU General Public License
+.\" as published by the Free Software Foundation; either version 2
+.\" of the License, or (at your option) any later version.
+.\"
+.\" This program is distributed in the hope that it will be useful,
+.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
+.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+.\" GNU General Public License for more details.
+.\"
+.\" You should have received a copy of the GNU General Public License
+.\" along with this program; if not, write to the Free Software
+.\" Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+.\" 02110-1301, USA.
+.\"
+.TH RTCWAKE 8 "June 2015" "util-linux" "System Administration"
+.SH NAME
+rtcwake \- enter a system sleep state until specified wakeup time
+.SH SYNOPSIS
+.B rtcwake
+[options]
+.RB [ \-d
+.IR device ]
+.RB [ \-m
+.IR standby_mode ]
+.RB { "\-s \fIseconds\fP" | "\-t \fItime_t\fP" }
+.SH DESCRIPTION
+This program is used to enter a system sleep state and to automatically
+wake from it at a specified time.
+.PP
+This uses cross-platform Linux interfaces to enter a system sleep state, and
+leave it no later than a specified time. It uses any RTC framework driver that
+supports standard driver model wakeup flags.
+.PP
+This is normally used like the old \fBapmsleep\fP utility, to wake from a suspend
+state like ACPI S1 (standby) or S3 (suspend-to-RAM). Most platforms can
+implement those without analogues of BIOS, APM, or ACPI.
+.PP
+On some systems, this can also be used like \fBnvram-wakeup\fP, waking from states
+like ACPI S4 (suspend to disk). Not all systems have persistent media that are
+appropriate for such suspend modes.
+.PP
+Note that alarm functionality depends on hardware; not every RTC is able to setup
+an alarm up to 24 hours in the future.
+.PP
+The suspend setup maybe be interrupted by active hardware; for example wireless USB
+input devices that continue to send events for some fraction of a second after the
+return key is pressed.
+.B rtcwake
+tries to avoid this problem and it waits to terminal to settle down before
+entering a system sleep.
+
+.SH OPTIONS
+.TP
+.BR \-A , " \-\-adjfile " \fIfile
+Specify an alternative path to the adjust file.
+.TP
+.BR \-a , " \-\-auto"
+Read the clock mode (whether the hardware clock is set to UTC or local time)
+from the \fIadjtime\fP file, where
+.BR hwclock (8)
+stores that information. This is the default.
+.TP
+.BR \-\-date " \fItimestamp"
+Set the wakeup time to the value of the timestamp. Format of the
+timestamp can be any of the following:
+.TS
+tab(|);
+l2 l.
+YYYYMMDDhhmmss
+YYYY-MM-DD hh:mm:ss
+YYYY-MM-DD hh:mm|(seconds will be set to 00)
+YYYY-MM-DD|(time will be set to 00:00:00)
+hh:mm:ss|(date will be set to today)
+hh:mm|(date will be set to today, seconds to 00)
+tomorrow|(time is set to 00:00:00)
++5min
+.TE
+.TP
+.BR \-d , " \-\-device " \fIdevice
+Use the specified \fIdevice\fP instead of \fBrtc0\fP as realtime clock.
+This option is only relevant if your system has more than one RTC.
+You may specify \fBrtc1\fP, \fBrtc2\fP, ... here.
+.TP
+.BR \-l , " \-\-local"
+Assume that the hardware clock is set to local time, regardless of the
+contents of the \fIadjtime\fP file.
+.TP
+.B \-\-list\-modes
+List available \-\-mode option arguments.
+.TP
+.BR \-m , " \-\-mode " \fImode
+Go into the given standby state. Valid values for \fImode\fP are:
+.RS
+.TP
+.B standby
+ACPI state S1. This state offers minimal, though real, power savings, while
+providing a very low-latency transition back to a working system. This is the
+default mode.
+.TP
+.B freeze
+The processes are frozen, all the devices are suspended and all the processors
+idled. This state is a general state that does not need any platform-specific
+support, but it saves less power than Suspend-to-RAM, because the system is
+still in a running state. (Available since Linux 3.9.)
+.TP
+.B mem
+ACPI state S3 (Suspend-to-RAM). This state offers significant power savings as
+everything in the system is put into a low-power state, except for memory,
+which is placed in self-refresh mode to retain its contents.
+.TP
+.B disk
+ACPI state S4 (Suspend-to-disk). This state offers the greatest power savings,
+and can be used even in the absence of low-level platform support for power
+management. This state operates similarly to Suspend-to-RAM, but includes a
+final step of writing memory contents to disk.
+.TP
+.B off
+ACPI state S5 (Poweroff). This is done by calling '/sbin/shutdown'.
+Not officially supported by ACPI, but it usually works.
+.TP
+.B no
+Don't suspend, only set the RTC wakeup time.
+.TP
+.B on
+Don't suspend, but read the RTC device until an alarm time appears.
+This mode is useful for debugging.
+.TP
+.B disable
+Disable a previously set alarm.
+.TP
+.B show
+Print alarm information in format: "alarm: off|on <time>".
+The time is in ctime() output format, e.g. "alarm: on Tue Nov 16 04:48:45 2010".
+.RE
+.TP
+.BR \-n , " \-\-dry-run"
+This option does everything apart from actually setting up the alarm,
+suspending the system, or waiting for the alarm.
+.TP
+.BR \-s , " \-\-seconds " \fIseconds
+Set the wakeup time to \fIseconds\fP in the future from now.
+.TP
+.BR \-t , " \-\-time " \fItime_t
+Set the wakeup time to the absolute time \fItime_t\fP. \fItime_t\fP
+is the time in seconds since 1970-01-01, 00:00 UTC. Use the
+.BR date (1)
+tool to convert between human-readable time and \fItime_t\fP.
+.TP
+.BR \-u , " \-\-utc"
+Assume that the hardware clock is set to UTC (Universal Time Coordinated),
+regardless of the contents of the \fIadjtime\fP file.
+.TP
+.BR \-v , " \-\-verbose"
+Be verbose.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH NOTES
+Some PC systems can't currently exit sleep states such as \fBmem\fP
+using only the kernel code accessed by this driver.
+They need help from userspace code to make the framebuffer work again.
+.SH FILES
+.I @ADJTIME_PATH@
+.SH HISTORY
+The program was posted several times on LKML and other lists
+before appearing in kernel commit message for Linux 2.6 in the GIT
+commit 87ac84f42a7a580d0dd72ae31d6a5eb4bfe04c6d.
+.SH AUTHORS
+The program was written by David Brownell <dbrownell@users.sourceforge.net>
+and improved by Bernhard Walle <bwalle@suse.de>.
+.SH COPYRIGHT
+This is free software. You may redistribute copies of it under the terms
+of the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
+There is NO WARRANTY, to the extent permitted by law.
+.SH "SEE ALSO"
+.BR hwclock (8),
+.BR date (1)
+.SH AVAILABILITY
+The rtcwake command is part of the util-linux package and is available from the
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/rtcwake.c b/sys-utils/rtcwake.c
new file mode 100644
index 0000000..b63c646
--- /dev/null
+++ b/sys-utils/rtcwake.c
@@ -0,0 +1,655 @@
+/*
+ * rtcwake -- enter a system sleep state until specified wakeup time.
+ *
+ * This uses cross-platform Linux interfaces to enter a system sleep state,
+ * and leave it no later than a specified time. It uses any RTC framework
+ * driver that supports standard driver model wakeup flags.
+ *
+ * This is normally used like the old "apmsleep" utility, to wake from a
+ * suspend state like ACPI S1 (standby) or S3 (suspend-to-RAM). Most
+ * platforms can implement those without analogues of BIOS, APM, or ACPI.
+ *
+ * On some systems, this can also be used like "nvram-wakeup", waking
+ * from states like ACPI S4 (suspend to disk). Not all systems have
+ * persistent media that are appropriate for such suspend modes.
+ *
+ * The best way to set the system's RTC is so that it holds the current
+ * time in UTC. Use the "-l" flag to tell this program that the system
+ * RTC uses a local timezone instead (maybe you dual-boot MS-Windows).
+ * That flag should not be needed on systems with adjtime support.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/rtc.h>
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <termios.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "c.h"
+#include "closestream.h"
+#include "env.h"
+#include "nls.h"
+#include "optutils.h"
+#include "pathnames.h"
+#include "strutils.h"
+#include "strv.h"
+#include "timeutils.h"
+#include "xalloc.h"
+
+#ifndef RTC_AF
+# define RTC_AF 0x20 /* Alarm interrupt */
+#endif
+
+#define ADJTIME_ZONE_BUFSIZ 8
+#define SYS_WAKEUP_PATH_TEMPLATE "/sys/class/rtc/%s/device/power/wakeup"
+#define SYS_POWER_STATE_PATH "/sys/power/state"
+#define DEFAULT_RTC_DEVICE "/dev/rtc0"
+
+enum rtc_modes { /* manual page --mode option explains these. */
+ OFF_MODE = 0,
+ NO_MODE,
+ ON_MODE,
+ DISABLE_MODE,
+ SHOW_MODE,
+
+ SYSFS_MODE /* keep it last */
+
+};
+
+static const char *rtcwake_mode_string[] = {
+ [OFF_MODE] = "off",
+ [NO_MODE] = "no",
+ [ON_MODE] = "on",
+ [DISABLE_MODE] = "disable",
+ [SHOW_MODE] = "show"
+};
+
+enum clock_modes {
+ CM_AUTO,
+ CM_UTC,
+ CM_LOCAL
+};
+
+struct rtcwake_control {
+ char *mode_str; /* name of the requested mode */
+ char **possible_modes; /* modes listed in /sys/power/state */
+ char *adjfile; /* adjtime file path */
+ enum clock_modes clock_mode; /* hwclock timezone */
+ time_t sys_time; /* system time */
+ time_t rtc_time; /* hardware time */
+ unsigned int verbose:1, /* verbose messaging */
+ dryrun:1; /* do not set alarm, suspend system, etc */
+};
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ fputs(USAGE_HEADER, out);
+ fprintf(out,
+ _(" %s [options]\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Enter a system sleep state until a specified wakeup time.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -a, --auto reads the clock mode from adjust file (default)\n"), out);
+ fprintf(out,
+ _(" -A, --adjfile <file> specifies the path to the adjust file\n"
+ " the default is %s\n"), _PATH_ADJTIME);
+ fputs(_(" --date <timestamp> date time of timestamp to wake\n"), out);
+ fputs(_(" -d, --device <device> select rtc device (rtc0|rtc1|...)\n"), out);
+ fputs(_(" -n, --dry-run does everything, but suspend\n"), out);
+ fputs(_(" -l, --local RTC uses local timezone\n"), out);
+ fputs(_(" --list-modes list available modes\n"), out);
+ fputs(_(" -m, --mode <mode> standby|mem|... sleep mode\n"), out);
+ fputs(_(" -s, --seconds <seconds> seconds to sleep\n"), out);
+ fputs(_(" -t, --time <time_t> time to wake\n"), out);
+ fputs(_(" -u, --utc RTC uses UTC\n"), out);
+ fputs(_(" -v, --verbose verbose messages\n"), out);
+
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(26));
+ printf(USAGE_MAN_TAIL("rtcwake(8)"));
+ exit(EXIT_SUCCESS);
+}
+
+static int is_wakeup_enabled(const char *devname)
+{
+ char buf[128], *s;
+ FILE *f;
+ size_t skip = 0;
+
+ if (startswith(devname, "/dev/"))
+ skip = 5;
+ snprintf(buf, sizeof buf, SYS_WAKEUP_PATH_TEMPLATE, devname + skip);
+ f = fopen(buf, "r");
+ if (!f) {
+ warn(_("cannot open %s"), buf);
+ return 0;
+ }
+
+ s = fgets(buf, sizeof buf, f);
+ fclose(f);
+ if (!s)
+ return 0;
+ s = strchr(buf, '\n');
+ if (!s)
+ return 0;
+ *s = 0;
+ /* wakeup events could be disabled or not supported */
+ return strcmp(buf, "enabled") == 0;
+}
+
+static int get_basetimes(struct rtcwake_control *ctl, int fd)
+{
+ struct tm tm = { 0 };
+ struct rtc_time rtc;
+
+ /* This process works in RTC time, except when working
+ * with the system clock (which always uses UTC).
+ */
+ if (ctl->clock_mode == CM_UTC)
+ xsetenv("TZ", "UTC", 1);
+ tzset();
+ /* Read rtc and system clocks "at the same time", or as
+ * precisely (+/- a second) as we can read them.
+ */
+ if (ioctl(fd, RTC_RD_TIME, &rtc) < 0) {
+ warn(_("read rtc time failed"));
+ return -1;
+ }
+
+ ctl->sys_time = time(NULL);
+ if (ctl->sys_time == (time_t)-1) {
+ warn(_("read system time failed"));
+ return -1;
+ }
+ /* Convert rtc_time to normal arithmetic-friendly form,
+ * updating tm.tm_wday as used by asctime().
+ */
+ tm.tm_sec = rtc.tm_sec;
+ tm.tm_min = rtc.tm_min;
+ tm.tm_hour = rtc.tm_hour;
+ tm.tm_mday = rtc.tm_mday;
+ tm.tm_mon = rtc.tm_mon;
+ tm.tm_year = rtc.tm_year;
+ tm.tm_isdst = -1; /* assume the system knows better than the RTC */
+
+ ctl->rtc_time = mktime(&tm);
+ if (ctl->rtc_time == (time_t)-1) {
+ warn(_("convert rtc time failed"));
+ return -1;
+ }
+
+ if (ctl->verbose) {
+ /* Unless the system uses UTC, either delta or tzone
+ * reflects a seconds offset from UTC. The value can
+ * help sort out problems like bugs in your C library. */
+ printf("\tdelta = %ld\n", ctl->sys_time - ctl->rtc_time);
+ printf("\ttzone = %ld\n", timezone);
+ printf("\ttzname = %s\n", tzname[daylight]);
+ gmtime_r(&ctl->rtc_time, &tm);
+ printf("\tsystime = %ld, (UTC) %s",
+ (long) ctl->sys_time, asctime(gmtime(&ctl->sys_time)));
+ printf("\trtctime = %ld, (UTC) %s",
+ (long) ctl->rtc_time, asctime(&tm));
+ }
+ return 0;
+}
+
+static int setup_alarm(struct rtcwake_control *ctl, int fd, time_t *wakeup)
+{
+ struct tm *tm;
+ struct rtc_wkalrm wake = { 0 };
+
+ /* The wakeup time is in POSIX time (more or less UTC). Ideally
+ * RTCs use that same time; but PCs can't do that if they need to
+ * boot MS-Windows. Messy...
+ *
+ * When clock_mode == CM_UTC this process's timezone is UTC, so
+ * we'll pass a UTC date to the RTC.
+ *
+ * Else clock_mode == CM_LOCAL so the time given to the RTC will
+ * instead use the local time zone. */
+ tm = localtime(wakeup);
+ wake.time.tm_sec = tm->tm_sec;
+ wake.time.tm_min = tm->tm_min;
+ wake.time.tm_hour = tm->tm_hour;
+ wake.time.tm_mday = tm->tm_mday;
+ wake.time.tm_mon = tm->tm_mon;
+ wake.time.tm_year = tm->tm_year;
+ /* wday, yday, and isdst fields are unused */
+ wake.time.tm_wday = -1;
+ wake.time.tm_yday = -1;
+ wake.time.tm_isdst = -1;
+ wake.enabled = 1;
+
+ if (!ctl->dryrun && ioctl(fd, RTC_WKALM_SET, &wake) < 0) {
+ warn(_("set rtc wake alarm failed"));
+ return -1;
+ }
+ return 0;
+}
+
+static char **get_sys_power_states(struct rtcwake_control *ctl)
+{
+ int fd = -1;
+
+ if (!ctl->possible_modes) {
+ char buf[256] = { 0 };
+
+ fd = open(SYS_POWER_STATE_PATH, O_RDONLY);
+ if (fd < 0)
+ goto nothing;
+ if (read(fd, &buf, sizeof(buf) - 1) <= 0)
+ goto nothing;
+ ctl->possible_modes = strv_split(buf, " \n");
+ close(fd);
+ }
+ return ctl->possible_modes;
+nothing:
+ if (fd >= 0)
+ close(fd);
+ return NULL;
+}
+
+static void wait_stdin(struct rtcwake_control *ctl)
+{
+ struct pollfd fd[] = {
+ {.fd = STDIN_FILENO, .events = POLLIN}
+ };
+ int tries = 0;
+
+ while (tries < 8 && poll(fd, 1, 10) == 1) {
+ if (ctl->verbose)
+ warnx(_("discarding stdin"));
+ xusleep(250000);
+ tcflush(STDIN_FILENO, TCIFLUSH);
+ tries++;
+ }
+}
+
+static void suspend_system(struct rtcwake_control *ctl)
+{
+ FILE *f = fopen(SYS_POWER_STATE_PATH, "w");
+
+ if (!f) {
+ warn(_("cannot open %s"), SYS_POWER_STATE_PATH);
+ return;
+ }
+
+ if (!ctl->dryrun) {
+ if (isatty(STDIN_FILENO))
+ wait_stdin(ctl);
+ fprintf(f, "%s\n", ctl->mode_str);
+ fflush(f);
+ }
+ /* this executes after wake from suspend */
+ if (close_stream(f))
+ errx(EXIT_FAILURE, _("write error"));
+}
+
+static int read_clock_mode(struct rtcwake_control *ctl)
+{
+ FILE *fp;
+ char linebuf[ADJTIME_ZONE_BUFSIZ];
+
+ fp = fopen(ctl->adjfile, "r");
+ if (!fp)
+ return -1;
+ /* skip two lines */
+ if (skip_fline(fp) || skip_fline(fp)) {
+ fclose(fp);
+ return -1;
+ }
+ /* read third line */
+ if (!fgets(linebuf, sizeof linebuf, fp)) {
+ fclose(fp);
+ return -1;
+ }
+
+ if (strncmp(linebuf, "UTC", 3) == 0)
+ ctl->clock_mode = CM_UTC;
+ else if (strncmp(linebuf, "LOCAL", 5) == 0)
+ ctl->clock_mode = CM_LOCAL;
+ else if (ctl->verbose)
+ warnx(_("unexpected third line in: %s: %s"), ctl->adjfile, linebuf);
+
+ fclose(fp);
+ return 0;
+}
+
+static int print_alarm(struct rtcwake_control *ctl, int fd)
+{
+ struct rtc_wkalrm wake;
+ struct tm tm = { 0 };
+ time_t alarm;
+
+ if (ioctl(fd, RTC_WKALM_RD, &wake) < 0) {
+ warn(_("read rtc alarm failed"));
+ return -1;
+ }
+
+ if (wake.enabled != 1 || wake.time.tm_year == -1) {
+ printf(_("alarm: off\n"));
+ return 0;
+ }
+ tm.tm_sec = wake.time.tm_sec;
+ tm.tm_min = wake.time.tm_min;
+ tm.tm_hour = wake.time.tm_hour;
+ tm.tm_mday = wake.time.tm_mday;
+ tm.tm_mon = wake.time.tm_mon;
+ tm.tm_year = wake.time.tm_year;
+ tm.tm_isdst = -1; /* assume the system knows better than the RTC */
+
+ alarm = mktime(&tm);
+ if (alarm == (time_t)-1) {
+ warn(_("convert time failed"));
+ return -1;
+ }
+ /* 0 if both UTC, or expresses diff if RTC in local time */
+ alarm += ctl->sys_time - ctl->rtc_time;
+ printf(_("alarm: on %s"), ctime(&alarm));
+
+ return 0;
+}
+
+static int get_rtc_mode(struct rtcwake_control *ctl, const char *s)
+{
+ size_t i;
+ char **modes = get_sys_power_states(ctl), **m;
+
+ STRV_FOREACH(m, modes) {
+ if (strcmp(s, *m) == 0)
+ return SYSFS_MODE;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(rtcwake_mode_string); i++)
+ if (!strcmp(s, rtcwake_mode_string[i]))
+ return i;
+
+ return -EINVAL;
+}
+
+static int open_dev_rtc(const char *devname)
+{
+ int fd;
+ char *devpath = NULL;
+
+ if (startswith(devname, "/dev"))
+ devpath = xstrdup(devname);
+ else
+ xasprintf(&devpath, "/dev/%s", devname);
+ fd = open(devpath, O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ err(EXIT_FAILURE, _("%s: unable to find device"), devpath);
+ free(devpath);
+ return fd;
+}
+
+static void list_modes(struct rtcwake_control *ctl)
+{
+ size_t i;
+ char **modes = get_sys_power_states(ctl), **m;
+
+ if (!modes)
+ errx(EXIT_FAILURE, _("could not read: %s"), SYS_POWER_STATE_PATH);
+
+ STRV_FOREACH(m, modes)
+ printf("%s ", *m);
+
+ for (i = 0; i < ARRAY_SIZE(rtcwake_mode_string); i++)
+ printf("%s ", rtcwake_mode_string[i]);
+ putchar('\n');
+}
+
+int main(int argc, char **argv)
+{
+ struct rtcwake_control ctl = {
+ .mode_str = "suspend", /* default mode */
+ .adjfile = _PATH_ADJTIME,
+ .clock_mode = CM_AUTO
+ };
+ char *devname = DEFAULT_RTC_DEVICE;
+ unsigned seconds = 0;
+ int suspend = SYSFS_MODE;
+ int rc = EXIT_SUCCESS;
+ int t;
+ int fd;
+ time_t alarm = 0;
+ enum {
+ OPT_DATE = CHAR_MAX + 1,
+ OPT_LIST
+ };
+ static const struct option long_options[] = {
+ { "adjfile", required_argument, NULL, 'A' },
+ { "auto", no_argument, NULL, 'a' },
+ { "dry-run", no_argument, NULL, 'n' },
+ { "local", no_argument, NULL, 'l' },
+ { "utc", no_argument, NULL, 'u' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "version", no_argument, NULL, 'V' },
+ { "help", no_argument, NULL, 'h' },
+ { "mode", required_argument, NULL, 'm' },
+ { "device", required_argument, NULL, 'd' },
+ { "seconds", required_argument, NULL, 's' },
+ { "time", required_argument, NULL, 't' },
+ { "date", required_argument, NULL, OPT_DATE },
+ { "list-modes", no_argument, NULL, OPT_LIST },
+ { NULL, 0, NULL, 0 }
+ };
+ static const ul_excl_t excl[] = {
+ { 'a', 'l', 'u' },
+ { 's', 't', OPT_DATE },
+ };
+ int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ while ((t = getopt_long(argc, argv, "A:ahd:lm:ns:t:uVv",
+ long_options, NULL)) != EOF) {
+ err_exclusive_options(t, long_options, excl, excl_st);
+ switch (t) {
+ case 'A':
+ /* for better compatibility with hwclock */
+ ctl.adjfile = optarg;
+ break;
+ case 'a':
+ ctl.clock_mode = CM_AUTO;
+ break;
+ case 'd':
+ devname = optarg;
+ break;
+ case 'l':
+ ctl.clock_mode = CM_LOCAL;
+ break;
+
+ case OPT_LIST:
+ list_modes(&ctl);
+ return EXIT_SUCCESS;
+
+ case 'm':
+ if ((suspend = get_rtc_mode(&ctl, optarg)) < 0)
+ errx(EXIT_FAILURE, _("unrecognized suspend state '%s'"), optarg);
+ ctl.mode_str = optarg;
+ break;
+ case 'n':
+ ctl.dryrun = 1;
+ break;
+ case 's':
+ /* alarm time, seconds-to-sleep (relative) */
+ seconds = strtou32_or_err(optarg, _("invalid seconds argument"));
+ break;
+ case 't':
+ /* alarm time, time_t (absolute, seconds since epoch) */
+ alarm = strtou32_or_err(optarg, _("invalid time argument"));
+ break;
+ case OPT_DATE:
+ { /* alarm time, see timestamp format from manual */
+ usec_t p;
+ if (parse_timestamp(optarg, &p) < 0)
+ errx(EXIT_FAILURE, _("invalid time value \"%s\""), optarg);
+ alarm = (time_t) (p / 1000000);
+ break;
+ }
+ case 'u':
+ ctl.clock_mode = CM_UTC;
+ break;
+ case 'v':
+ ctl.verbose = 1;
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ exit(EXIT_SUCCESS);
+ case 'h':
+ usage();
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if (ctl.clock_mode == CM_AUTO && read_clock_mode(&ctl) < 0) {
+ printf(_("%s: assuming RTC uses UTC ...\n"), program_invocation_short_name);
+ ctl.clock_mode = CM_UTC;
+ }
+
+ if (ctl.verbose)
+ printf("%s", ctl.clock_mode == CM_UTC ? _("Using UTC time.\n") :
+ _("Using local time.\n"));
+
+ if (!alarm && !seconds && suspend != DISABLE_MODE && suspend != SHOW_MODE)
+ errx(EXIT_FAILURE, _("must provide wake time (see --seconds, --time and --date options)"));
+
+ /* device must exist and (if we'll sleep) be wakeup-enabled */
+ fd = open_dev_rtc(devname);
+
+ if (suspend != ON_MODE && suspend != NO_MODE && !is_wakeup_enabled(devname))
+ errx(EXIT_FAILURE, _("%s not enabled for wakeup events"), devname);
+
+ /* relative or absolute alarm time, normalized to time_t */
+ if (get_basetimes(&ctl, fd) < 0)
+ exit(EXIT_FAILURE);
+
+ if (ctl.verbose)
+ printf(_("alarm %ld, sys_time %ld, rtc_time %ld, seconds %u\n"),
+ alarm, ctl.sys_time, ctl.rtc_time, seconds);
+
+ if (suspend != DISABLE_MODE && suspend != SHOW_MODE) {
+ /* perform alarm setup when the show or disable modes are not set */
+ if (alarm) {
+ if (alarm < ctl.sys_time)
+ errx(EXIT_FAILURE, _("time doesn't go backward to %s"),
+ ctime(&alarm));
+ alarm -= ctl.sys_time - ctl.rtc_time;
+ } else
+ alarm = ctl.rtc_time + seconds + 1;
+
+ if (setup_alarm(&ctl, fd, &alarm) < 0)
+ exit(EXIT_FAILURE);
+
+ if (suspend == NO_MODE || suspend == ON_MODE)
+ printf(_("%s: wakeup using %s at %s"),
+ program_invocation_short_name, devname,
+ ctime(&alarm));
+ else
+ printf(_("%s: wakeup from \"%s\" using %s at %s"),
+ program_invocation_short_name, ctl.mode_str, devname,
+ ctime(&alarm));
+ fflush(stdout);
+ xusleep(10 * 1000);
+ }
+
+ switch (suspend) {
+ case NO_MODE:
+ if (ctl.verbose)
+ printf(_("suspend mode: no; leaving\n"));
+ ctl.dryrun = 1; /* to skip disabling alarm at the end */
+ break;
+ case OFF_MODE:
+ {
+ char *arg[5];
+ int i = 0;
+
+ if (ctl.verbose)
+ printf(_("suspend mode: off; executing %s\n"),
+ _PATH_SHUTDOWN);
+ arg[i++] = _PATH_SHUTDOWN;
+ arg[i++] = "-h";
+ arg[i++] = "-P";
+ arg[i++] = "now";
+ arg[i] = NULL;
+ if (!ctl.dryrun) {
+ execv(arg[0], arg);
+ warn(_("failed to execute %s"), _PATH_SHUTDOWN);
+ rc = EXIT_FAILURE;
+ }
+ break;
+ }
+ case ON_MODE:
+ {
+ unsigned long data;
+
+ if (ctl.verbose)
+ printf(_("suspend mode: on; reading rtc\n"));
+ if (!ctl.dryrun) {
+ do {
+ t = read(fd, &data, sizeof data);
+ if (t < 0) {
+ warn(_("rtc read failed"));
+ break;
+ }
+ if (ctl.verbose)
+ printf("... %s: %03lx\n", devname, data);
+ } while (!(data & RTC_AF));
+ }
+ break;
+ }
+ case DISABLE_MODE:
+ /* just break, alarm gets disabled in the end */
+ if (ctl.verbose)
+ printf(_("suspend mode: disable; disabling alarm\n"));
+ break;
+ case SHOW_MODE:
+ if (ctl.verbose)
+ printf(_("suspend mode: show; printing alarm info\n"));
+ if (print_alarm(&ctl, fd))
+ rc = EXIT_FAILURE;
+ ctl.dryrun = 1; /* don't really disable alarm in the end, just show */
+ break;
+ default:
+ if (ctl.verbose)
+ printf(_("suspend mode: %s; suspending system\n"), ctl.mode_str);
+ sync();
+ suspend_system(&ctl);
+ }
+
+ if (!ctl.dryrun) {
+ struct rtc_wkalrm wake;
+
+ if (ioctl(fd, RTC_WKALM_RD, &wake) < 0) {
+ warn(_("read rtc alarm failed"));
+ rc = EXIT_FAILURE;
+ } else {
+ wake.enabled = 0;
+ if (ioctl(fd, RTC_WKALM_SET, &wake) < 0) {
+ warn(_("disable rtc alarm interrupt failed"));
+ rc = EXIT_FAILURE;
+ }
+ }
+ }
+
+ close(fd);
+ return rc;
+}
diff --git a/sys-utils/setarch.8 b/sys-utils/setarch.8
new file mode 100644
index 0000000..efa3d50
--- /dev/null
+++ b/sys-utils/setarch.8
@@ -0,0 +1,143 @@
+.TH SETARCH 8 "December 2017" "util-linux" "System Administration"
+.SH NAME
+setarch \- change reported architecture in new program environment and/or set personality flags
+.SH SYNOPSIS
+.B setarch
+.RI [ arch ]
+[options]
+.RI [ program
+.RI [ argument ...]]
+.sp
+.B setarch
+.BR \-\-list | \-h | \-V
+.sp
+.B arch
+[options]
+.RI [ program
+.RI [ argument ...]]
+.SH DESCRIPTION
+.B setarch
+modifies execution domains and process personality flags.
+.PP
+The execution domains currently only affects the output of \fBuname -m\fR.
+For example, on an AMD64 system, running \fBsetarch i386 \fIprogram\fR
+will cause \fIprogram\fR to see i686 instead of x86_64 as the machine type.
+It also allows to set various personality options.
+The default \fIprogram\fR is \fB/bin/sh\fR.
+.PP
+Since version 2.33 the
+.I arch
+command line argument is optional and
+.B setarch
+may be used to change personality flags (ADDR_LIMIT_*, SHORT_INODE, etc) without
+modification of the execution domain.
+.SH OPTIONS
+.TP
+.B \-\-list
+List the architectures that \fBsetarch\fR knows about. Whether \fBsetarch\fR
+can actually set each of these architectures depends on the running kernel.
+.TP
+.B \-\-uname\-2.6
+Causes the \fIprogram\fR to see a kernel version number beginning with 2.6.
+Turns on UNAME26.
+.TP
+.BR \-v , " \-\-verbose"
+Be verbose.
+.TP
+\fB\-3\fR, \fB\-\-3gb\fR
+Specifies
+.I program
+should use a maximum of 3GB of address space. Supported on x86. Turns on
+ADDR_LIMIT_3GB.
+.TP
+\fB\-\-4gb\fR
+This option has no effect. It is retained for backward compatibility only,
+and may be removed in future releases.
+.TP
+\fB\-B\fR, \fB\-\-32bit\fR
+Limit the address space to 32 bits to emulate hardware. Supported on ARM
+and Alpha. Turns on ADDR_LIMIT_32BIT.
+.TP
+\fB\-F\fR, \fB\-\-fdpic\-funcptrs\fR
+Treat user-space function pointers to signal handlers as pointers to address
+descriptors. This option has no effect on architectures that do not support
+FDPIC ELF binaries. In kernel v4.14 support is limited to ARM, Blackfin,
+Fujitsu FR-V, and SuperH CPU architectures.
+.TP
+\fB\-I\fR, \fB\-\-short\-inode\fR
+Obsolete bug emulation flag. Turns on SHORT_INODE.
+.TP
+\fB\-L\fR, \fB\-\-addr\-compat\-layout\fR
+Provide legacy virtual address space layout. Use when the
+.I program
+binary does not have PT_GNU_STACK ELF header. Turns on
+ADDR_COMPAT_LAYOUT.
+.TP
+\fB\-R\fR, \fB\-\-addr\-no\-randomize\fR
+Disables randomization of the virtual address space. Turns on
+ADDR_NO_RANDOMIZE.
+.TP
+\fB\-S\fR, \fB\-\-whole\-seconds\fR
+Obsolete bug emulation flag. Turns on WHOLE_SECONDS.
+.TP
+\fB\-T\fR, \fB\-\-sticky\-timeouts\fR
+This makes
+.BR select (2),
+.BR pselect (2),
+and
+.BR ppoll (2)
+system calls preserve the timeout value instead of modifying it to reflect
+the amount of time not slept when interrupted by a signal handler. Use when
+.I program
+depends on this behavior. For more details see the timeout description in
+.BR select (2)
+manual page. Turns on STICKY_TIMEOUTS.
+.TP
+\fB\-X\fR, \fB\-\-read\-implies\-exec\fR
+If this is set then
+.BR mmap (3)
+PROT_READ will also add the PROT_EXEC bit - as expected by legacy x86
+binaries. Notice that the ELF loader will automatically set this bit when
+it encounters a legacy binary. Turns on READ_IMPLIES_EXEC.
+.TP
+\fB\-Z\fR, \fB\-\-mmap\-page\-zero\fR
+SVr4 bug emulation that will set
+.BR mmap (3)
+page zero as read-only. Use when
+.I program
+depends on this behavior, and the source code is not available to be fixed.
+Turns on MMAP_PAGE_ZERO.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH EXAMPLES
+setarch --addr-no-randomize mytestprog
+.br
+setarch ppc32 rpmbuild --target=ppc --rebuild foo.src.rpm
+.br
+setarch ppc32 -v -vL3 rpmbuild --target=ppc --rebuild bar.src.rpm
+.br
+setarch ppc32 --32bit rpmbuild --target=ppc --rebuild foo.src.rpm
+.SH AUTHOR
+.MT sopwith@redhat.com
+Elliot Lee
+.ME
+.br
+.MT jnovy@redhat.com
+Jindrich Novy
+.ME
+.br
+.MT kzak@redhat.com
+Karel Zak
+.ME
+.SH "SEE ALSO"
+.BR personality (2),
+.BR select (2)
+.SH AVAILABILITY
+The setarch command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/setarch.c b/sys-utils/setarch.c
new file mode 100644
index 0000000..7c0a63f
--- /dev/null
+++ b/sys-utils/setarch.c
@@ -0,0 +1,446 @@
+/*
+ * Copyright (C) 2003-2007 Red Hat, Inc.
+ *
+ * This file is part of util-linux.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ *
+ * Written by Elliot Lee <sopwith@redhat.com>
+ * New personality options & code added by Jindrich Novy <jnovy@redhat.com>
+ * ADD_NO_RANDOMIZE flag added by Arjan van de Ven <arjanv@redhat.com>
+ * Help and MIPS support from Mike Frysinger (vapier@gentoo.org)
+ * Better error handling from Dmitry V. Levin (ldv@altlinux.org)
+ *
+ * based on ideas from the ppc32 util by Guy Streeter (2002-01), based on the
+ * sparc32 util by Jakub Jelinek (1998, 1999)
+ */
+
+#include <sys/personality.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <limits.h>
+#include <sys/utsname.h>
+#include "nls.h"
+#include "c.h"
+#include "closestream.h"
+
+#ifndef HAVE_PERSONALITY
+# include <syscall.h>
+# define personality(pers) ((long)syscall(SYS_personality, pers))
+#endif
+
+#define turn_on(_flag, _opts) \
+ do { \
+ (_opts) |= _flag; \
+ if (verbose) \
+ printf(_("Switching on %s.\n"), #_flag); \
+ } while(0)
+
+#ifndef UNAME26
+# define UNAME26 0x0020000
+#endif
+#ifndef ADDR_NO_RANDOMIZE
+# define ADDR_NO_RANDOMIZE 0x0040000
+#endif
+#ifndef FDPIC_FUNCPTRS
+# define FDPIC_FUNCPTRS 0x0080000
+#endif
+#ifndef MMAP_PAGE_ZERO
+# define MMAP_PAGE_ZERO 0x0100000
+#endif
+#ifndef ADDR_COMPAT_LAYOUT
+# define ADDR_COMPAT_LAYOUT 0x0200000
+#endif
+#ifndef READ_IMPLIES_EXEC
+# define READ_IMPLIES_EXEC 0x0400000
+#endif
+#ifndef ADDR_LIMIT_32BIT
+# define ADDR_LIMIT_32BIT 0x0800000
+#endif
+#ifndef SHORT_INODE
+# define SHORT_INODE 0x1000000
+#endif
+#ifndef WHOLE_SECONDS
+# define WHOLE_SECONDS 0x2000000
+#endif
+#ifndef STICKY_TIMEOUTS
+# define STICKY_TIMEOUTS 0x4000000
+#endif
+#ifndef ADDR_LIMIT_3GB
+# define ADDR_LIMIT_3GB 0x8000000
+#endif
+
+
+struct arch_domain {
+ int perval; /* PER_* */
+ const char *target_arch;
+ const char *result_arch;
+};
+
+
+static void __attribute__((__noreturn__)) usage(int archwrapper)
+{
+ fputs(USAGE_HEADER, stdout);
+ if (!archwrapper)
+ printf(_(" %s [<arch>] [options] [<program> [<argument>...]]\n"), program_invocation_short_name);
+ else
+ printf(_(" %s [options] [<program> [<argument>...]]\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, stdout);
+ fputs(_("Change the reported architecture and set personality flags.\n"), stdout);
+
+ fputs(USAGE_OPTIONS, stdout);
+ fputs(_(" -B, --32bit turns on ADDR_LIMIT_32BIT\n"), stdout);
+ fputs(_(" -F, --fdpic-funcptrs makes function pointers point to descriptors\n"), stdout);
+ fputs(_(" -I, --short-inode turns on SHORT_INODE\n"), stdout);
+ fputs(_(" -L, --addr-compat-layout changes the way virtual memory is allocated\n"), stdout);
+ fputs(_(" -R, --addr-no-randomize disables randomization of the virtual address space\n"), stdout);
+ fputs(_(" -S, --whole-seconds turns on WHOLE_SECONDS\n"), stdout);
+ fputs(_(" -T, --sticky-timeouts turns on STICKY_TIMEOUTS\n"), stdout);
+ fputs(_(" -X, --read-implies-exec turns on READ_IMPLIES_EXEC\n"), stdout);
+ fputs(_(" -Z, --mmap-page-zero turns on MMAP_PAGE_ZERO\n"), stdout);
+ fputs(_(" -3, --3gb limits the used address space to a maximum of 3 GB\n"), stdout);
+ fputs(_(" --4gb ignored (for backward compatibility only)\n"), stdout);
+ fputs(_(" --uname-2.6 turns on UNAME26\n"), stdout);
+ fputs(_(" -v, --verbose say what options are being switched on\n"), stdout);
+
+ if (!archwrapper)
+ fputs(_(" --list list settable architectures, and exit\n"), stdout);
+
+ fputs(USAGE_SEPARATOR, stdout);
+ printf(USAGE_HELP_OPTIONS(26));
+ printf(USAGE_MAN_TAIL("setarch(8)"));
+
+ exit(EXIT_SUCCESS);
+}
+
+/*
+ * Returns inilialized list of all available execution domains.
+ */
+static struct arch_domain *init_arch_domains(void)
+{
+ struct utsname un;
+ size_t i;
+
+ static struct arch_domain transitions[] =
+ {
+ {UNAME26, "uname26", NULL},
+ {PER_LINUX32, "linux32", NULL},
+ {PER_LINUX, "linux64", NULL},
+#if defined(__powerpc__) || defined(__powerpc64__)
+# ifdef __BIG_ENDIAN__
+ {PER_LINUX32, "ppc32", "ppc"},
+ {PER_LINUX32, "ppc", "ppc"},
+ {PER_LINUX, "ppc64", "ppc64"},
+ {PER_LINUX, "ppc64pseries", "ppc64"},
+ {PER_LINUX, "ppc64iseries", "ppc64"},
+# else
+ {PER_LINUX32, "ppc32", "ppcle"},
+ {PER_LINUX32, "ppc", "ppcle"},
+ {PER_LINUX32, "ppc32le", "ppcle"},
+ {PER_LINUX32, "ppcle", "ppcle"},
+ {PER_LINUX, "ppc64le", "ppc64le"},
+# endif
+#endif
+#if defined(__x86_64__) || defined(__i386__) || defined(__ia64__)
+ {PER_LINUX32, "i386", "i386"},
+ {PER_LINUX32, "i486", "i386"},
+ {PER_LINUX32, "i586", "i386"},
+ {PER_LINUX32, "i686", "i386"},
+ {PER_LINUX32, "athlon", "i386"},
+#endif
+#if defined(__x86_64__) || defined(__i386__)
+ {PER_LINUX, "x86_64", "x86_64"},
+#endif
+#if defined(__ia64__) || defined(__i386__)
+ {PER_LINUX, "ia64", "ia64"},
+#endif
+#if defined(__hppa__)
+ {PER_LINUX32, "parisc32", "parisc"},
+ {PER_LINUX32, "parisc", "parisc"},
+ {PER_LINUX, "parisc64", "parisc64"},
+#endif
+#if defined(__s390x__) || defined(__s390__)
+ {PER_LINUX32, "s390", "s390"},
+ {PER_LINUX, "s390x", "s390x"},
+#endif
+#if defined(__sparc64__) || defined(__sparc__)
+ {PER_LINUX32, "sparc", "sparc"},
+ {PER_LINUX32, "sparc32bash", "sparc"},
+ {PER_LINUX32, "sparc32", "sparc"},
+ {PER_LINUX, "sparc64", "sparc64"},
+#endif
+#if defined(__mips64__) || defined(__mips__)
+ {PER_LINUX32, "mips32", "mips"},
+ {PER_LINUX32, "mips", "mips"},
+ {PER_LINUX, "mips64", "mips64"},
+#endif
+#if defined(__alpha__)
+ {PER_LINUX, "alpha", "alpha"},
+ {PER_LINUX, "alphaev5", "alpha"},
+ {PER_LINUX, "alphaev56", "alpha"},
+ {PER_LINUX, "alphaev6", "alpha"},
+ {PER_LINUX, "alphaev67", "alpha"},
+#endif
+ /* place holder, will be filled up at runtime */
+ {-1, NULL, NULL},
+ {-1, NULL, NULL}
+ };
+
+ /* Add the trivial transition {PER_LINUX, machine, machine} if no
+ * such target_arch is hardcoded yet. */
+ uname(&un);
+ for (i = 0; transitions[i].perval >= 0; i++)
+ if (!strcmp(un.machine, transitions[i].target_arch))
+ break;
+ if (transitions[i].perval < 0) {
+ unsigned long wrdsz = CHAR_BIT * sizeof(void *);
+ if (wrdsz == 32 || wrdsz == 64) {
+ /* fill up the place holder */
+ transitions[i].perval = wrdsz == 32 ? PER_LINUX32 : PER_LINUX;
+ transitions[i].target_arch = un.machine;
+ transitions[i].result_arch = un.machine;
+ }
+ }
+
+ return transitions;
+}
+
+/*
+ * List all execution domains from transitions
+ */
+static void list_arch_domains(struct arch_domain *doms)
+{
+ struct arch_domain *d;
+
+ for (d = doms; d->target_arch != NULL; d++)
+ printf("%s\n", d->target_arch);
+}
+
+static struct arch_domain *get_arch_domain(struct arch_domain *doms, const char *pers)
+{
+ struct arch_domain *d;
+
+ for (d = doms; d->perval >= 0; d++) {
+ if (!strcmp(pers, d->target_arch))
+ break;
+ }
+
+ return !d || d->perval < 0 ? NULL : d;
+}
+
+static void verify_arch_domain(struct arch_domain *dom, const char *wanted)
+{
+ struct utsname un;
+
+ if (!dom || !dom->result_arch)
+ return;
+
+ uname(&un);
+ if (strcmp(un.machine, dom->result_arch)) {
+ if (strcmp(dom->result_arch, "i386")
+ || (strcmp(un.machine, "i486")
+ && strcmp(un.machine, "i586")
+ && strcmp(un.machine, "i686")
+ && strcmp(un.machine, "athlon")))
+ errx(EXIT_FAILURE, _("Kernel cannot set architecture to %s"), wanted);
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ const char *arch = NULL;
+ unsigned long options = 0;
+ int verbose = 0;
+ int archwrapper;
+ int c;
+ struct arch_domain *doms, *target;
+ unsigned long pers_value = 0;
+ char *shell = NULL, *shell_arg = NULL;
+
+ /* Options without equivalent short options */
+ enum {
+ OPT_4GB = CHAR_MAX + 1,
+ OPT_UNAME26,
+ OPT_LIST
+ };
+
+ /* Options --3gb and --4gb are for compatibility with an old
+ * Debian setarch implementation. */
+ static const struct option longopts[] = {
+ {"help", no_argument, NULL, 'h'},
+ {"version", no_argument, NULL, 'V'},
+ {"verbose", no_argument, NULL, 'v'},
+ {"addr-no-randomize", no_argument, NULL, 'R'},
+ {"fdpic-funcptrs", no_argument, NULL, 'F'},
+ {"mmap-page-zero", no_argument, NULL, 'Z'},
+ {"addr-compat-layout", no_argument, NULL, 'L'},
+ {"read-implies-exec", no_argument, NULL, 'X'},
+ {"32bit", no_argument, NULL, 'B'},
+ {"short-inode", no_argument, NULL, 'I'},
+ {"whole-seconds", no_argument, NULL, 'S'},
+ {"sticky-timeouts", no_argument, NULL, 'T'},
+ {"3gb", no_argument, NULL, '3'},
+ {"4gb", no_argument, NULL, OPT_4GB},
+ {"uname-2.6", no_argument, NULL, OPT_UNAME26},
+ {"list", no_argument, NULL, OPT_LIST},
+ {NULL, 0, NULL, 0}
+ };
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ if (argc < 1) {
+ warnx(_("Not enough arguments"));
+ errtryhelp(EXIT_FAILURE);
+ }
+ archwrapper = strcmp(program_invocation_short_name, "setarch") != 0;
+ if (archwrapper) {
+ arch = program_invocation_short_name; /* symlinks to setarch */
+
+ /* Don't use ifdef sparc here, we get "Unrecognized architecture"
+ * error message later if necessary */
+ if (strcmp(arch, "sparc32bash") == 0) {
+ shell = "/bin/bash";
+ shell_arg = "";
+ goto set_arch;
+ }
+ } else {
+ if (1 < argc && *argv[1] != '-') {
+ arch = argv[1];
+ argv[1] = argv[0]; /* for getopt_long() to get the program name */
+ argv++;
+ argc--;
+ }
+ }
+
+ while ((c = getopt_long(argc, argv, "+hVv3BFILRSTXZ", longopts, NULL)) != -1) {
+ switch (c) {
+ case 'h':
+ usage(archwrapper);
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ case 'v':
+ verbose = 1;
+ break;
+ case 'R':
+ turn_on(ADDR_NO_RANDOMIZE, options);
+ break;
+ case 'F':
+ turn_on(FDPIC_FUNCPTRS, options);
+ break;
+ case 'Z':
+ turn_on(MMAP_PAGE_ZERO, options);
+ break;
+ case 'L':
+ turn_on(ADDR_COMPAT_LAYOUT, options);
+ break;
+ case 'X':
+ turn_on(READ_IMPLIES_EXEC, options);
+ break;
+ case 'B':
+ turn_on(ADDR_LIMIT_32BIT, options);
+ break;
+ case 'I':
+ turn_on(SHORT_INODE, options);
+ break;
+ case 'S':
+ turn_on(WHOLE_SECONDS, options);
+ break;
+ case 'T':
+ turn_on(STICKY_TIMEOUTS, options);
+ break;
+ case '3':
+ turn_on(ADDR_LIMIT_3GB, options);
+ break;
+ case OPT_4GB: /* just ignore this one */
+ break;
+ case OPT_UNAME26:
+ turn_on(UNAME26, options);
+ break;
+ case OPT_LIST:
+ if (!archwrapper) {
+ list_arch_domains(init_arch_domains());
+ return EXIT_SUCCESS;
+ } else
+ warnx(_("unrecognized option '--list'"));
+ /* fallthrough */
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if (!arch && !options)
+ errx(EXIT_FAILURE, _("no architecture argument or personality flags specified"));
+
+ argc -= optind;
+ argv += optind;
+
+set_arch:
+ /* get execution domain (architecture) */
+ if (arch) {
+ doms = init_arch_domains();
+ target = get_arch_domain(doms, arch);
+
+ if (!target)
+ errx(EXIT_FAILURE, _("%s: Unrecognized architecture"), arch);
+ pers_value = target->perval;
+ }
+
+ /* add personality flags */
+ pers_value |= options;
+
+ /* call kernel */
+ if (personality(pers_value) < 0) {
+ /*
+ * Depending on architecture and kernel version, personality
+ * syscall is either capable or incapable of returning an error.
+ * If the return value is not an error, then it's the previous
+ * personality value, which can be an arbitrary value
+ * undistinguishable from an error value.
+ * To make things clear, a second call is needed.
+ */
+ if (personality(pers_value) < 0)
+ err(EXIT_FAILURE, _("failed to set personality to %s"), arch);
+ }
+
+ /* make sure architecture is set as expected */
+ if (arch)
+ verify_arch_domain(target, arch);
+
+ if (!argc) {
+ shell = "/bin/sh";
+ shell_arg = "-sh";
+ }
+ if (verbose) {
+ printf(_("Execute command `%s'.\n"), shell ? shell : argv[0]);
+ /* flush all output streams before exec */
+ fflush(NULL);
+ }
+
+ /* Execute shell */
+ if (shell) {
+ execl(shell, shell_arg, NULL);
+ errexec(shell);
+ }
+
+ /* Execute on command line specified command */
+ execvp(argv[0], argv);
+ errexec(argv[0]);
+}
diff --git a/sys-utils/setpriv.1 b/sys-utils/setpriv.1
new file mode 100644
index 0000000..9ff9058
--- /dev/null
+++ b/sys-utils/setpriv.1
@@ -0,0 +1,222 @@
+.TH SETPRIV 1 "July 2014" "util-linux" "User Commands"
+.SH NAME
+setpriv \- run a program with different Linux privilege settings
+.SH SYNOPSIS
+.B setpriv
+[options]
+.I program
+.RI [ arguments ]
+.SH DESCRIPTION
+Sets or queries various Linux privilege settings that are inherited across
+.BR execve (2).
+.PP
+In comparison to
+.BR su (1)
+and
+.BR runuser (1),
+.BR setpriv (1)
+neither uses PAM, nor does it prompt for a password.
+It is a simple, non-set-user-ID wrapper around
+.BR execve (2),
+and can be used to drop privileges in the same way as
+.BR setuidgid (8)
+from
+.BR daemontools ,
+.BR chpst (8)
+from
+.BR runit ,
+or similar tools shipped by other service managers.
+.SH OPTION
+.TP
+.B \-\-clear\-groups
+Clear supplementary groups.
+.TP
+.BR \-d , " \-\-dump"
+Dump current privilege state. Can be specified more than once to show extra,
+mostly useless, information. Incompatible with all other options.
+.TP
+.B \-\-groups \fIgroup\fR...
+Set supplementary groups. The argument is a comma-separated list of GIDs or names.
+.TP
+.BR \-\-inh\-caps " (" + | \- ) \fIcap "... or " \-\-ambient-caps " (" + | \- ) \fIcap "... or " \-\-bounding\-set " (" + | \- ) \fIcap ...
+Set the inheritable capabilities, ambient capabilities or the capability bounding set. See
+.BR capabilities (7).
+The argument is a comma-separated list of
+.BI + cap
+and
+.BI \- cap
+entries, which add or remove an entry respectively. \fIcap\fR can either be a
+human-readable name as seen in
+.BR capabilities (7)
+without the \fIcap_\fR prefix or of the format
+.BI cap_N ,
+where \fIN\fR is the internal capability index used by Linux.
+.B +all
+and
+.B \-all
+can be used to add or remove all caps. The set of capabilities starts out as
+the current inheritable set for
+.BR \-\-inh\-caps ,
+the current ambient set for
+.B \-\-ambient\-caps
+and the current bounding set for
+.BR \-\-bounding\-set .
+If you drop something from the bounding set without also dropping it from the
+inheritable set, you are likely to become confused. Do not do that.
+.TP
+.B \-\-keep\-groups
+Preserve supplementary groups. Only useful in conjunction with
+.BR \-\-rgid ,
+.BR \-\-egid ", or"
+.BR \-\-regid .
+.TP
+.B \-\-init\-groups
+Initialize supplementary groups using
+.BR initgroups "(3)."
+Only useful in conjunction with
+.B \-\-ruid
+or
+.BR \-\-reuid .
+.TP
+.B \-\-list\-caps
+List all known capabilities. This option must be specified alone.
+.TP
+.B \-\-no\-new\-privs
+Set the
+.I no_new_privs
+bit. With this bit set,
+.BR execve (2)
+will not grant new privileges.
+For example, the set-user-ID and set-group-ID bits as well
+as file capabilities will be disabled. (Executing binaries with these bits set
+will still work, but they will not gain privileges. Certain LSMs, especially
+AppArmor, may result in failures to execute certain programs.) This bit is
+inherited by child processes and cannot be unset. See
+.BR prctl (2)
+and
+.I Documentation/\:prctl/\:no_\:new_\:privs.txt
+in the Linux kernel source.
+.sp
+The no_new_privs bit is supported since Linux 3.5.
+.TP
+.BI \-\-rgid " gid\fR, " \-\-egid " gid\fR, " \-\-regid " gid"
+Set the real, effective, or both GIDs. The \fIgid\fR argument can be
+given as textual group name.
+.sp
+For safety, you must specify one of
+.BR \-\-clear\-groups ,
+.BR \-\-groups ,
+.BR \-\-keep\-groups ", or"
+.B \-\-init\-groups
+if you set any primary
+.IR gid .
+.TP
+.BI \-\-ruid " uid\fR, " \-\-euid " uid\fR, " \-\-reuid " uid"
+Set the real, effective, or both UIDs. The \fIuid\fR argument can be
+given as textual login name.
+.sp
+Setting a
+.I uid
+or
+.I gid
+does not change capabilities, although the exec call at the end might change
+capabilities. This means that, if you are root, you probably want to do
+something like:
+.sp
+.B " setpriv \-\-reuid=1000 \-\-regid=1000 \-\-inh\-caps=\-all"
+.TP
+.BR \-\-securebits " (" + | \- ) \fIsecurebit ...
+Set or clear securebits. The argument is a comma-separated list.
+The valid securebits are
+.IR noroot ,
+.IR noroot_locked ,
+.IR no_setuid_fixup ,
+.IR no_setuid_fixup_locked ,
+and
+.IR keep_caps_locked .
+.I keep_caps
+is cleared by
+.BR execve (2)
+and is therefore not allowed.
+.TP
+.BR "\-\-pdeathsig keep" | clear | <signal>
+Keep, clear or set the parent death signal. Some LSMs, most notably SELinux and
+AppArmor, clear the signal when the process' credentials change. Using
+\fB--pdeathsig keep\fR will restore the parent death signal after changing
+credentials to remedy that situation.
+.TP
+.BI \-\-selinux\-label " label"
+Request a particular SELinux transition (using a transition on exec, not
+dyntrans). This will fail and cause
+.BR setpriv (1)
+to abort if SELinux is not in use, and the transition may be ignored or cause
+.BR execve (2)
+to fail at SELinux's whim. (In particular, this is unlikely to work in
+conjunction with
+.IR no_new_privs .)
+This is similar to
+.BR runcon (1).
+.TP
+.BI \-\-apparmor\-profile " profile"
+Request a particular AppArmor profile (using a transition on exec). This will
+fail and cause
+.BR setpriv (1)
+to abort if AppArmor is not in use, and the transition may be ignored or cause
+.BR execve (2)
+to fail at AppArmor's whim.
+.TP
+.BI \-\-reset\-env
+Clears all the environment variables except TERM; initializes the environment variables HOME, SHELL, USER, LOGNAME
+according to the user's passwd entry; sets PATH to \fI/usr/local/bin:/bin:/usr/bin\fR for a regual user and to
+\fI/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin\fR for root.
+.sp
+The environment variable PATH may be different on systems where /bin and /sbin
+are merged into /usr. The environment variable SHELL defaults to \fI/bin/sh\fR if none is given in the user's
+passwd entry.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH NOTES
+If applying any specified option fails,
+.I program
+will not be run and
+.B setpriv
+will return with exit code 127.
+.PP
+Be careful with this tool \-\- it may have unexpected security consequences.
+For example, setting no_new_privs and then execing a program that is
+SELinux\-confined (as this tool would do) may prevent the SELinux
+restrictions from taking effect.
+.SH EXAMPLE
+If you're looking for behaviour similar to
+.BR su (1)/ runuser "(1), or " sudo (8)
+(without the
+.B -g
+option), try something like:
+.sp
+.B " setpriv \-\-reuid=1000 \-\-regid=1000 \-\-init\-groups"
+.PP
+If you want to mimic daemontools'
+.BR setuid (8),
+try:
+.sp
+.B " setpriv \-\-reuid=1000 \-\-regid=1000 \-\-clear\-groups"
+.SH SEE ALSO
+.BR runuser (1),
+.BR su (1),
+.BR prctl (2),
+.BR capabilities (7)
+.SH AUTHOR
+.MT luto@amacapital.net
+Andy Lutomirski
+.ME
+.SH AVAILABILITY
+The
+.B setpriv
+command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/setpriv.c b/sys-utils/setpriv.c
new file mode 100644
index 0000000..828ddc1
--- /dev/null
+++ b/sys-utils/setpriv.c
@@ -0,0 +1,1096 @@
+/*
+ * setpriv(1) - set various kernel privilege bits and run something
+ *
+ * Copyright (C) 2012 Andy Lutomirski <luto@amacapital.net>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <cap-ng.h>
+#include <errno.h>
+#include <getopt.h>
+#include <grp.h>
+#include <linux/securebits.h>
+#include <pwd.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "c.h"
+#include "closestream.h"
+#include "nls.h"
+#include "optutils.h"
+#include "strutils.h"
+#include "xalloc.h"
+#include "pathnames.h"
+#include "signames.h"
+#include "env.h"
+
+#ifndef PR_SET_NO_NEW_PRIVS
+# define PR_SET_NO_NEW_PRIVS 38
+#endif
+#ifndef PR_GET_NO_NEW_PRIVS
+# define PR_GET_NO_NEW_PRIVS 39
+#endif
+
+#ifndef PR_CAP_AMBIENT
+# define PR_CAP_AMBIENT 47
+# define PR_CAP_AMBIENT_IS_SET 1
+# define PR_CAP_AMBIENT_RAISE 2
+# define PR_CAP_AMBIENT_LOWER 3
+#endif
+
+#define SETPRIV_EXIT_PRIVERR 127 /* how we exit when we fail to set privs */
+
+/* The shell to set SHELL env.variable if none is given in the user's passwd entry. */
+#define DEFAULT_SHELL "/bin/sh"
+
+static gid_t get_group(const char *s, const char *err);
+
+enum cap_type {
+ CAP_TYPE_EFFECTIVE = CAPNG_EFFECTIVE,
+ CAP_TYPE_PERMITTED = CAPNG_PERMITTED,
+ CAP_TYPE_INHERITABLE = CAPNG_INHERITABLE,
+ CAP_TYPE_BOUNDING = CAPNG_BOUNDING_SET,
+ CAP_TYPE_AMBIENT = (1 << 4)
+};
+
+/*
+ * Note: We are subject to https://bugzilla.redhat.com/show_bug.cgi?id=895105
+ * and we will therefore have problems if new capabilities are added. Once
+ * that bug is fixed, I'll (Andy Lutomirski) submit a corresponding fix to
+ * setpriv. In the mean time, the code here tries to work reasonably well.
+ */
+
+struct privctx {
+ unsigned int
+ nnp:1, /* no_new_privs */
+ have_ruid:1, /* real uid */
+ have_euid:1, /* effective uid */
+ have_rgid:1, /* real gid */
+ have_egid:1, /* effective gid */
+ have_passwd:1, /* passwd entry */
+ have_groups:1, /* add groups */
+ keep_groups:1, /* keep groups */
+ clear_groups:1, /* remove groups */
+ init_groups:1, /* initialize groups */
+ reset_env:1, /* reset environment */
+ have_securebits:1; /* remove groups */
+
+ /* uids and gids */
+ uid_t ruid, euid;
+ gid_t rgid, egid;
+
+ /* real user passwd entry */
+ struct passwd passwd;
+
+ /* supplementary groups */
+ size_t num_groups;
+ gid_t *groups;
+
+ /* caps */
+ const char *caps_to_inherit;
+ const char *ambient_caps;
+ const char *bounding_set;
+
+ /* securebits */
+ int securebits;
+ /* parent death signal (<0 clear, 0 nothing, >0 signal) */
+ int pdeathsig;
+
+ /* LSMs */
+ const char *selinux_label;
+ const char *apparmor_profile;
+};
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ fputs(USAGE_HEADER, out);
+ fprintf(out, _(" %s [options] <program> [<argument>...]\n"),
+ program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Run a program with different privilege settings.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -d, --dump show current state (and do not exec)\n"), out);
+ fputs(_(" --nnp, --no-new-privs disallow granting new privileges\n"), out);
+ fputs(_(" --ambient-caps <caps,...> set ambient capabilities\n"), out);
+ fputs(_(" --inh-caps <caps,...> set inheritable capabilities\n"), out);
+ fputs(_(" --bounding-set <caps> set capability bounding set\n"), out);
+ fputs(_(" --ruid <uid|user> set real uid\n"), out);
+ fputs(_(" --euid <uid|user> set effective uid\n"), out);
+ fputs(_(" --rgid <gid|user> set real gid\n"), out);
+ fputs(_(" --egid <gid|group> set effective gid\n"), out);
+ fputs(_(" --reuid <uid|user> set real and effective uid\n"), out);
+ fputs(_(" --regid <gid|group> set real and effective gid\n"), out);
+ fputs(_(" --clear-groups clear supplementary groups\n"), out);
+ fputs(_(" --keep-groups keep supplementary groups\n"), out);
+ fputs(_(" --init-groups initialize supplementary groups\n"), out);
+ fputs(_(" --groups <group,...> set supplementary groups by UID or name\n"), out);
+ fputs(_(" --securebits <bits> set securebits\n"), out);
+ fputs(_(" --pdeathsig keep|clear|<signame>\n"
+ " set or clear parent death signal\n"), out);
+ fputs(_(" --selinux-label <label> set SELinux label\n"), out);
+ fputs(_(" --apparmor-profile <pr> set AppArmor profile\n"), out);
+ fputs(_(" --reset-env clear all environment and initialize\n"
+ " HOME, SHELL, USER, LOGNAME and PATH\n"), out);
+
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(29));
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_(" This tool can be dangerous. Read the manpage, and be careful.\n"), out);
+ printf(USAGE_MAN_TAIL("setpriv(1)"));
+
+ exit(EXIT_SUCCESS);
+}
+
+static int real_cap_last_cap(void)
+{
+ /* CAP_LAST_CAP is untrustworthy. */
+ static int ret = -1;
+ int matched;
+ FILE *f;
+
+ if (ret != -1)
+ return ret;
+
+ f = fopen(_PATH_PROC_CAPLASTCAP, "r");
+ if (!f) {
+ ret = CAP_LAST_CAP; /* guess */
+ return ret;
+ }
+
+ matched = fscanf(f, "%d", &ret);
+ fclose(f);
+
+ if (matched != 1)
+ ret = CAP_LAST_CAP; /* guess */
+
+ return ret;
+}
+
+static int has_cap(enum cap_type which, unsigned int i)
+{
+ switch (which) {
+ case CAP_TYPE_EFFECTIVE:
+ case CAP_TYPE_BOUNDING:
+ case CAP_TYPE_INHERITABLE:
+ case CAP_TYPE_PERMITTED:
+ return capng_have_capability((capng_type_t)which, i);
+ case CAP_TYPE_AMBIENT:
+ return prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET,
+ (unsigned long) i, 0UL, 0UL);
+ default:
+ warnx(_("invalid capability type"));
+ return -1;
+ }
+}
+
+/* Returns the number of capabilities printed. */
+static int print_caps(FILE *f, enum cap_type which)
+{
+ int i, n = 0, max = real_cap_last_cap();
+
+ for (i = 0; i <= max; i++) {
+ int ret = has_cap(which, i);
+
+ if (i == 0 && ret < 0)
+ return -1;
+
+ if (ret == 1) {
+ const char *name = capng_capability_to_name(i);
+ if (n)
+ fputc(',', f);
+ if (name)
+ fputs(name, f);
+ else
+ /* cap-ng has very poor handling of
+ * CAP_LAST_CAP changes. This is the
+ * best we can do. */
+ printf("cap_%d", i);
+ n++;
+ }
+ }
+
+ return n;
+}
+
+static void dump_one_secbit(int *first, int *bits, int bit, const char *name)
+{
+ if (*bits & bit) {
+ if (*first)
+ *first = 0;
+ else
+ printf(",");
+ fputs(name, stdout);
+ *bits &= ~bit;
+ }
+}
+
+static void dump_securebits(void)
+{
+ int first = 1;
+ int bits = prctl(PR_GET_SECUREBITS, 0, 0, 0, 0);
+
+ if (bits < 0) {
+ warnx(_("getting process secure bits failed"));
+ return;
+ }
+
+ printf(_("Securebits: "));
+
+ dump_one_secbit(&first, &bits, SECBIT_NOROOT, "noroot");
+ dump_one_secbit(&first, &bits, SECBIT_NOROOT_LOCKED, "noroot_locked");
+ dump_one_secbit(&first, &bits, SECBIT_NO_SETUID_FIXUP,
+ "no_setuid_fixup");
+ dump_one_secbit(&first, &bits, SECBIT_NO_SETUID_FIXUP_LOCKED,
+ "no_setuid_fixup_locked");
+ bits &= ~SECBIT_KEEP_CAPS;
+ dump_one_secbit(&first, &bits, SECBIT_KEEP_CAPS_LOCKED,
+ "keep_caps_locked");
+ if (bits) {
+ if (first)
+ first = 0;
+ else
+ printf(",");
+ printf("0x%x", (unsigned)bits);
+ }
+
+ if (first)
+ printf(_("[none]\n"));
+ else
+ printf("\n");
+}
+
+static void dump_label(const char *name)
+{
+ char buf[4097];
+ ssize_t len;
+ int fd, e;
+
+ fd = open(_PATH_PROC_ATTR_CURRENT, O_RDONLY);
+ if (fd == -1) {
+ warn(_("cannot open %s"), _PATH_PROC_ATTR_CURRENT);
+ return;
+ }
+
+ len = read(fd, buf, sizeof(buf));
+ e = errno;
+ close(fd);
+ if (len < 0) {
+ errno = e;
+ warn(_("cannot read %s"), name);
+ return;
+ }
+ if (sizeof(buf) - 1 <= (size_t)len) {
+ warnx(_("%s: too long"), name);
+ return;
+ }
+
+ buf[len] = 0;
+ if (0 < len && buf[len - 1] == '\n')
+ buf[len - 1] = 0;
+ printf("%s: %s\n", name, buf);
+}
+
+static void dump_groups(void)
+{
+ int n = getgroups(0, NULL);
+ gid_t *groups;
+
+ if (n < 0) {
+ warn("getgroups failed");
+ return;
+ }
+
+ groups = xmalloc(n * sizeof(gid_t));
+ n = getgroups(n, groups);
+ if (n < 0) {
+ free(groups);
+ warn("getgroups failed");
+ return;
+ }
+
+ printf(_("Supplementary groups: "));
+ if (n == 0)
+ printf(_("[none]"));
+ else {
+ int i;
+ for (i = 0; i < n; i++) {
+ if (0 < i)
+ printf(",");
+ printf("%ld", (long)groups[i]);
+ }
+ }
+ printf("\n");
+ free(groups);
+}
+
+static void dump_pdeathsig(void)
+{
+ int pdeathsig;
+
+ if (prctl(PR_GET_PDEATHSIG, &pdeathsig) != 0) {
+ warn(_("get pdeathsig failed"));
+ return;
+ }
+
+ printf("Parent death signal: ");
+ if (pdeathsig && signum_to_signame(pdeathsig) != NULL)
+ printf("%s\n", signum_to_signame(pdeathsig));
+ else if (pdeathsig)
+ printf("%d\n", pdeathsig);
+ else
+ printf("[none]\n");
+}
+
+static void dump(int dumplevel)
+{
+ int x;
+ uid_t ru, eu, su;
+ gid_t rg, eg, sg;
+
+ if (getresuid(&ru, &eu, &su) == 0) {
+ printf(_("uid: %u\n"), ru);
+ printf(_("euid: %u\n"), eu);
+ /* Saved and fs uids always equal euid. */
+ if (3 <= dumplevel)
+ printf(_("suid: %u\n"), su);
+ } else
+ warn(_("getresuid failed"));
+
+ if (getresgid(&rg, &eg, &sg) == 0) {
+ printf("gid: %ld\n", (long)rg);
+ printf("egid: %ld\n", (long)eg);
+ /* Saved and fs gids always equal egid. */
+ if (dumplevel >= 3)
+ printf("sgid: %ld\n", (long)sg);
+ } else
+ warn(_("getresgid failed"));
+
+ dump_groups();
+
+ x = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
+ if (0 <= x)
+ printf("no_new_privs: %d\n", x);
+ else
+ warn("setting no_new_privs failed");
+
+ if (2 <= dumplevel) {
+ printf(_("Effective capabilities: "));
+ if (print_caps(stdout, CAP_TYPE_EFFECTIVE) == 0)
+ printf(_("[none]"));
+ printf("\n");
+
+ printf(_("Permitted capabilities: "));
+ if (print_caps(stdout, CAP_TYPE_PERMITTED) == 0)
+ printf(_("[none]"));
+ printf("\n");
+ }
+
+ printf(_("Inheritable capabilities: "));
+ if (print_caps(stdout, CAP_TYPE_INHERITABLE) == 0)
+ printf(_("[none]"));
+ printf("\n");
+
+ printf(_("Ambient capabilities: "));
+ x = print_caps(stdout, CAP_TYPE_AMBIENT);
+ if (x == 0)
+ printf(_("[none]"));
+ if (x < 0)
+ printf(_("[unsupported]"));
+ printf("\n");
+
+ printf(_("Capability bounding set: "));
+ if (print_caps(stdout, CAP_TYPE_BOUNDING) == 0)
+ printf(_("[none]"));
+ printf("\n");
+
+ dump_securebits();
+ dump_pdeathsig();
+
+ if (access(_PATH_SYS_SELINUX, F_OK) == 0)
+ dump_label(_("SELinux label"));
+
+ if (access(_PATH_SYS_APPARMOR, F_OK) == 0) {
+ dump_label(_("AppArmor profile"));
+ }
+}
+
+static void list_known_caps(void)
+{
+ int i, max = real_cap_last_cap();
+
+ for (i = 0; i <= max; i++) {
+ const char *name = capng_capability_to_name(i);
+ if (name)
+ printf("%s\n", name);
+ else
+ warnx(_("cap %d: libcap-ng is broken"), i);
+ }
+}
+
+static void parse_groups(struct privctx *opts, const char *str)
+{
+ char *groups = xstrdup(str);
+ char *buf = groups; /* We'll reuse it */
+ char *c;
+ size_t i = 0;
+
+ opts->have_groups = 1;
+ opts->num_groups = 0;
+ while ((c = strsep(&groups, ",")))
+ opts->num_groups++;
+
+ /* Start again */
+ strcpy(buf, str); /* It's exactly the right length */
+ groups = buf;
+
+ opts->groups = xcalloc(opts->num_groups, sizeof(gid_t));
+ while ((c = strsep(&groups, ",")))
+ opts->groups[i++] = get_group(c, _("Invalid supplementary group id"));
+
+ free(groups);
+}
+
+static void parse_pdeathsig(struct privctx *opts, const char *str)
+{
+ if (!strcmp(str, "keep")) {
+ if (prctl(PR_GET_PDEATHSIG, &opts->pdeathsig) != 0)
+ errx(SETPRIV_EXIT_PRIVERR,
+ _("failed to get parent death signal"));
+ } else if (!strcmp(str, "clear")) {
+ opts->pdeathsig = -1;
+ } else if ((opts->pdeathsig = signame_to_signum(str)) < 0) {
+ errx(EXIT_FAILURE, _("unknown signal: %s"), str);
+ }
+}
+
+static void do_setresuid(const struct privctx *opts)
+{
+ uid_t ruid, euid, suid;
+ if (getresuid(&ruid, &euid, &suid) != 0)
+ err(SETPRIV_EXIT_PRIVERR, _("getresuid failed"));
+ if (opts->have_ruid)
+ ruid = opts->ruid;
+ if (opts->have_euid)
+ euid = opts->euid;
+
+ /* Also copy effective to saved (for paranoia). */
+ if (setresuid(ruid, euid, euid) != 0)
+ err(SETPRIV_EXIT_PRIVERR, _("setresuid failed"));
+}
+
+static void do_setresgid(const struct privctx *opts)
+{
+ gid_t rgid, egid, sgid;
+ if (getresgid(&rgid, &egid, &sgid) != 0)
+ err(SETPRIV_EXIT_PRIVERR, _("getresgid failed"));
+ if (opts->have_rgid)
+ rgid = opts->rgid;
+ if (opts->have_egid)
+ egid = opts->egid;
+
+ /* Also copy effective to saved (for paranoia). */
+ if (setresgid(rgid, egid, egid) != 0)
+ err(SETPRIV_EXIT_PRIVERR, _("setresgid failed"));
+}
+
+static void bump_cap(unsigned int cap)
+{
+ if (capng_have_capability(CAPNG_PERMITTED, cap))
+ capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, cap);
+}
+
+static int cap_update(capng_act_t action,
+ enum cap_type type, unsigned int cap)
+{
+ switch (type) {
+ case CAP_TYPE_EFFECTIVE:
+ case CAP_TYPE_BOUNDING:
+ case CAP_TYPE_INHERITABLE:
+ case CAP_TYPE_PERMITTED:
+ return capng_update(action, (capng_type_t) type, cap);
+ case CAP_TYPE_AMBIENT:
+ {
+ int ret;
+
+ if (action == CAPNG_ADD)
+ ret = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE,
+ (unsigned long) cap, 0UL, 0UL);
+ else
+ ret = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_LOWER,
+ (unsigned long) cap, 0UL, 0UL);
+
+ return ret;
+ }
+ default:
+ errx(EXIT_FAILURE, _("unsupported capability type"));
+ return -1;
+ }
+}
+
+static void do_caps(enum cap_type type, const char *caps)
+{
+ char *my_caps = xstrdup(caps);
+ char *c;
+
+ while ((c = strsep(&my_caps, ","))) {
+ capng_act_t action;
+ if (*c == '+')
+ action = CAPNG_ADD;
+ else if (*c == '-')
+ action = CAPNG_DROP;
+ else
+ errx(EXIT_FAILURE, _("bad capability string"));
+
+ if (!strcmp(c + 1, "all")) {
+ int i;
+ /* It would be really bad if -all didn't drop all
+ * caps. It's better to just fail. */
+ if (real_cap_last_cap() > CAP_LAST_CAP)
+ errx(SETPRIV_EXIT_PRIVERR,
+ _("libcap-ng is too old for \"all\" caps"));
+ for (i = 0; i <= CAP_LAST_CAP; i++)
+ cap_update(action, type, i);
+ } else {
+ int cap = capng_name_to_capability(c + 1);
+ if (0 <= cap)
+ cap_update(action, type, cap);
+ else if (sscanf(c + 1, "cap_%d", &cap) == 1
+ && 0 <= cap && cap <= real_cap_last_cap())
+ cap_update(action, type, cap);
+ else
+ errx(EXIT_FAILURE,
+ _("unknown capability \"%s\""), c + 1);
+ }
+ }
+
+ free(my_caps);
+}
+
+static void parse_securebits(struct privctx *opts, const char *arg)
+{
+ char *buf = xstrdup(arg);
+ char *c;
+
+ opts->have_securebits = 1;
+ opts->securebits = prctl(PR_GET_SECUREBITS, 0, 0, 0, 0);
+ if (opts->securebits < 0)
+ err(SETPRIV_EXIT_PRIVERR, _("getting process secure bits failed"));
+
+ if (opts->securebits & ~(int)(SECBIT_NOROOT |
+ SECBIT_NOROOT_LOCKED |
+ SECBIT_NO_SETUID_FIXUP |
+ SECBIT_NO_SETUID_FIXUP_LOCKED |
+ SECBIT_KEEP_CAPS |
+ SECBIT_KEEP_CAPS_LOCKED))
+ errx(SETPRIV_EXIT_PRIVERR,
+ _("unrecognized securebit set -- refusing to adjust"));
+
+ while ((c = strsep(&buf, ","))) {
+ if (*c != '+' && *c != '-')
+ errx(EXIT_FAILURE, _("bad securebits string"));
+
+ if (!strcmp(c + 1, "all")) {
+ if (*c == '-')
+ opts->securebits = 0;
+ else
+ errx(EXIT_FAILURE,
+ _("+all securebits is not allowed"));
+ } else {
+ int bit;
+ if (!strcmp(c + 1, "noroot"))
+ bit = SECBIT_NOROOT;
+ else if (!strcmp(c + 1, "noroot_locked"))
+ bit = SECBIT_NOROOT_LOCKED;
+ else if (!strcmp(c + 1, "no_setuid_fixup"))
+ bit = SECBIT_NO_SETUID_FIXUP;
+ else if (!strcmp(c + 1, "no_setuid_fixup_locked"))
+ bit = SECBIT_NO_SETUID_FIXUP_LOCKED;
+ else if (!strcmp(c + 1, "keep_caps"))
+ errx(EXIT_FAILURE,
+ _("adjusting keep_caps does not make sense"));
+ else if (!strcmp(c + 1, "keep_caps_locked"))
+ bit = SECBIT_KEEP_CAPS_LOCKED; /* sigh */
+ else
+ errx(EXIT_FAILURE, _("unrecognized securebit"));
+
+ if (*c == '+')
+ opts->securebits |= bit;
+ else
+ opts->securebits &= ~bit;
+ }
+ }
+
+ opts->securebits |= SECBIT_KEEP_CAPS; /* We need it, and it's reset on exec */
+
+ free(buf);
+}
+
+static void do_selinux_label(const char *label)
+{
+ int fd;
+ size_t len;
+
+ if (access(_PATH_SYS_SELINUX, F_OK) != 0)
+ errx(SETPRIV_EXIT_PRIVERR, _("SELinux is not running"));
+
+ fd = open(_PATH_PROC_ATTR_EXEC, O_RDWR);
+ if (fd == -1)
+ err(SETPRIV_EXIT_PRIVERR,
+ _("cannot open %s"), _PATH_PROC_ATTR_EXEC);
+
+ len = strlen(label);
+ errno = 0;
+ if (write(fd, label, len) != (ssize_t) len)
+ err(SETPRIV_EXIT_PRIVERR,
+ _("write failed: %s"), _PATH_PROC_ATTR_EXEC);
+
+ if (close(fd) != 0)
+ err(SETPRIV_EXIT_PRIVERR,
+ _("close failed: %s"), _PATH_PROC_ATTR_EXEC);
+}
+
+static void do_apparmor_profile(const char *label)
+{
+ FILE *f;
+
+ if (access(_PATH_SYS_APPARMOR, F_OK) != 0)
+ errx(SETPRIV_EXIT_PRIVERR, _("AppArmor is not running"));
+
+ f = fopen(_PATH_PROC_ATTR_EXEC, "r+");
+ if (!f)
+ err(SETPRIV_EXIT_PRIVERR,
+ _("cannot open %s"), _PATH_PROC_ATTR_EXEC);
+
+ fprintf(f, "exec %s", label);
+
+ if (close_stream(f) != 0)
+ err(SETPRIV_EXIT_PRIVERR,
+ _("write failed: %s"), _PATH_PROC_ATTR_EXEC);
+}
+
+
+static void do_reset_environ(struct passwd *pw)
+{
+ char *term = getenv("TERM");
+
+ if (term)
+ term = xstrdup(term);
+#ifdef HAVE_CLEARENV
+ clearenv();
+#else
+ environ = NULL;
+#endif
+ if (term)
+ xsetenv("TERM", term, 1);
+
+ if (pw->pw_shell && *pw->pw_shell)
+ xsetenv("SHELL", pw->pw_shell, 1);
+ else
+ xsetenv("SHELL", DEFAULT_SHELL, 1);
+
+ xsetenv("HOME", pw->pw_dir, 1);
+ xsetenv("USER", pw->pw_name, 1);
+ xsetenv("LOGNAME", pw->pw_name, 1);
+
+ if (pw->pw_uid)
+ xsetenv("PATH", _PATH_DEFPATH, 1);
+ else
+ xsetenv("PATH", _PATH_DEFPATH_ROOT, 1);
+}
+
+static uid_t get_user(const char *s, const char *err)
+{
+ struct passwd *pw;
+ long tmp;
+ pw = getpwnam(s);
+ if (pw)
+ return pw->pw_uid;
+ tmp = strtol_or_err(s, err);
+ return tmp;
+}
+
+static gid_t get_group(const char *s, const char *err)
+{
+ struct group *gr;
+ long tmp;
+ gr = getgrnam(s);
+ if (gr)
+ return gr->gr_gid;
+ tmp = strtol_or_err(s, err);
+ return tmp;
+}
+
+static struct passwd *get_passwd(const char *s, uid_t *uid, const char *err)
+{
+ struct passwd *pw;
+ long tmp;
+ pw = getpwnam(s);
+ if (pw) {
+ *uid = pw->pw_uid;
+ } else {
+ tmp = strtol_or_err(s, err);
+ *uid = tmp;
+ pw = getpwuid(*uid);
+ }
+ return pw;
+}
+
+static struct passwd *passwd_copy(struct passwd *dst, const struct passwd *src)
+{
+ struct passwd *rv;
+ rv = memcpy(dst, src, sizeof(*dst));
+ rv->pw_name = xstrdup(rv->pw_name);
+ rv->pw_passwd = xstrdup(rv->pw_passwd);
+ rv->pw_gecos = xstrdup(rv->pw_gecos);
+ rv->pw_dir = xstrdup(rv->pw_dir);
+ rv->pw_shell = xstrdup(rv->pw_shell);
+ return rv;
+}
+
+int main(int argc, char **argv)
+{
+ enum {
+ NNP = CHAR_MAX + 1,
+ RUID,
+ EUID,
+ RGID,
+ EGID,
+ REUID,
+ REGID,
+ CLEAR_GROUPS,
+ KEEP_GROUPS,
+ INIT_GROUPS,
+ GROUPS,
+ INHCAPS,
+ AMBCAPS,
+ LISTCAPS,
+ CAPBSET,
+ SECUREBITS,
+ PDEATHSIG,
+ SELINUX_LABEL,
+ APPARMOR_PROFILE,
+ RESET_ENV
+ };
+
+ static const struct option longopts[] = {
+ { "dump", no_argument, NULL, 'd' },
+ { "nnp", no_argument, NULL, NNP },
+ { "no-new-privs", no_argument, NULL, NNP },
+ { "inh-caps", required_argument, NULL, INHCAPS },
+ { "ambient-caps", required_argument, NULL, AMBCAPS },
+ { "list-caps", no_argument, NULL, LISTCAPS },
+ { "ruid", required_argument, NULL, RUID },
+ { "euid", required_argument, NULL, EUID },
+ { "rgid", required_argument, NULL, RGID },
+ { "egid", required_argument, NULL, EGID },
+ { "reuid", required_argument, NULL, REUID },
+ { "regid", required_argument, NULL, REGID },
+ { "clear-groups", no_argument, NULL, CLEAR_GROUPS },
+ { "keep-groups", no_argument, NULL, KEEP_GROUPS },
+ { "init-groups", no_argument, NULL, INIT_GROUPS },
+ { "groups", required_argument, NULL, GROUPS },
+ { "bounding-set", required_argument, NULL, CAPBSET },
+ { "securebits", required_argument, NULL, SECUREBITS },
+ { "pdeathsig", required_argument, NULL, PDEATHSIG, },
+ { "selinux-label", required_argument, NULL, SELINUX_LABEL },
+ { "apparmor-profile", required_argument, NULL, APPARMOR_PROFILE },
+ { "help", no_argument, NULL, 'h' },
+ { "reset-env", no_argument, NULL, RESET_ENV, },
+ { "version", no_argument, NULL, 'V' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ static const ul_excl_t excl[] = {
+ /* keep in same order with enum definitions */
+ {CLEAR_GROUPS, KEEP_GROUPS, INIT_GROUPS, GROUPS},
+ {0}
+ };
+ int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+ int c;
+ struct privctx opts;
+ struct passwd *pw = NULL;
+ int dumplevel = 0;
+ int total_opts = 0;
+ int list_caps = 0;
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ memset(&opts, 0, sizeof(opts));
+
+ while ((c = getopt_long(argc, argv, "+dhV", longopts, NULL)) != -1) {
+ err_exclusive_options(c, longopts, excl, excl_st);
+ total_opts++;
+ switch (c) {
+ case 'd':
+ dumplevel++;
+ break;
+ case NNP:
+ if (opts.nnp)
+ errx(EXIT_FAILURE,
+ _("duplicate --no-new-privs option"));
+ opts.nnp = 1;
+ break;
+ case RUID:
+ if (opts.have_ruid)
+ errx(EXIT_FAILURE, _("duplicate ruid"));
+ opts.have_ruid = 1;
+ pw = get_passwd(optarg, &opts.ruid, _("failed to parse ruid"));
+ if (pw) {
+ passwd_copy(&opts.passwd, pw);
+ opts.have_passwd = 1;
+ }
+ break;
+ case EUID:
+ if (opts.have_euid)
+ errx(EXIT_FAILURE, _("duplicate euid"));
+ opts.have_euid = 1;
+ opts.euid = get_user(optarg, _("failed to parse euid"));
+ break;
+ case REUID:
+ if (opts.have_ruid || opts.have_euid)
+ errx(EXIT_FAILURE, _("duplicate ruid or euid"));
+ opts.have_ruid = opts.have_euid = 1;
+ pw = get_passwd(optarg, &opts.ruid, _("failed to parse reuid"));
+ opts.euid = opts.ruid;
+ if (pw) {
+ passwd_copy(&opts.passwd, pw);
+ opts.have_passwd = 1;
+ }
+ break;
+ case RGID:
+ if (opts.have_rgid)
+ errx(EXIT_FAILURE, _("duplicate rgid"));
+ opts.have_rgid = 1;
+ opts.rgid = get_group(optarg, _("failed to parse rgid"));
+ break;
+ case EGID:
+ if (opts.have_egid)
+ errx(EXIT_FAILURE, _("duplicate egid"));
+ opts.have_egid = 1;
+ opts.egid = get_group(optarg, _("failed to parse egid"));
+ break;
+ case REGID:
+ if (opts.have_rgid || opts.have_egid)
+ errx(EXIT_FAILURE, _("duplicate rgid or egid"));
+ opts.have_rgid = opts.have_egid = 1;
+ opts.rgid = opts.egid = get_group(optarg, _("failed to parse regid"));
+ break;
+ case CLEAR_GROUPS:
+ if (opts.clear_groups)
+ errx(EXIT_FAILURE,
+ _("duplicate --clear-groups option"));
+ opts.clear_groups = 1;
+ break;
+ case KEEP_GROUPS:
+ if (opts.keep_groups)
+ errx(EXIT_FAILURE,
+ _("duplicate --keep-groups option"));
+ opts.keep_groups = 1;
+ break;
+ case INIT_GROUPS:
+ if (opts.init_groups)
+ errx(EXIT_FAILURE,
+ _("duplicate --init-groups option"));
+ opts.init_groups = 1;
+ break;
+ case GROUPS:
+ if (opts.have_groups)
+ errx(EXIT_FAILURE,
+ _("duplicate --groups option"));
+ parse_groups(&opts, optarg);
+ break;
+ case PDEATHSIG:
+ if (opts.pdeathsig)
+ errx(EXIT_FAILURE,
+ _("duplicate --keep-pdeathsig option"));
+ parse_pdeathsig(&opts, optarg);
+ break;
+ case LISTCAPS:
+ list_caps = 1;
+ break;
+ case INHCAPS:
+ if (opts.caps_to_inherit)
+ errx(EXIT_FAILURE,
+ _("duplicate --inh-caps option"));
+ opts.caps_to_inherit = optarg;
+ break;
+ case AMBCAPS:
+ if (opts.ambient_caps)
+ errx(EXIT_FAILURE,
+ _("duplicate --ambient-caps option"));
+ opts.ambient_caps = optarg;
+ break;
+ case CAPBSET:
+ if (opts.bounding_set)
+ errx(EXIT_FAILURE,
+ _("duplicate --bounding-set option"));
+ opts.bounding_set = optarg;
+ break;
+ case SECUREBITS:
+ if (opts.have_securebits)
+ errx(EXIT_FAILURE,
+ _("duplicate --securebits option"));
+ parse_securebits(&opts, optarg);
+ break;
+ case SELINUX_LABEL:
+ if (opts.selinux_label)
+ errx(EXIT_FAILURE,
+ _("duplicate --selinux-label option"));
+ opts.selinux_label = optarg;
+ break;
+ case APPARMOR_PROFILE:
+ if (opts.apparmor_profile)
+ errx(EXIT_FAILURE,
+ _("duplicate --apparmor-profile option"));
+ opts.apparmor_profile = optarg;
+ break;
+ case RESET_ENV:
+ opts.reset_env = 1;
+ break;
+ case 'h':
+ usage();
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if (dumplevel) {
+ if (total_opts != dumplevel || optind < argc)
+ errx(EXIT_FAILURE,
+ _("--dump is incompatible with all other options"));
+ dump(dumplevel);
+ return EXIT_SUCCESS;
+ }
+
+ if (list_caps) {
+ if (total_opts != 1 || optind < argc)
+ errx(EXIT_FAILURE,
+ _("--list-caps must be specified alone"));
+ list_known_caps();
+ return EXIT_SUCCESS;
+ }
+
+ if (argc <= optind)
+ errx(EXIT_FAILURE, _("No program specified"));
+
+ if ((opts.have_rgid || opts.have_egid)
+ && !opts.keep_groups && !opts.clear_groups && !opts.init_groups
+ && !opts.have_groups)
+ errx(EXIT_FAILURE,
+ _("--[re]gid requires --keep-groups, --clear-groups, --init-groups, or --groups"));
+
+ if (opts.init_groups && !opts.have_ruid)
+ errx(EXIT_FAILURE,
+ _("--init-groups requires --ruid or --reuid"));
+
+ if (opts.init_groups && !opts.have_passwd)
+ errx(EXIT_FAILURE,
+ _("uid %ld not found, --init-groups requires an user that "
+ "can be found on the system"),
+ (long) opts.ruid);
+
+ if (opts.reset_env) {
+ if (opts.have_passwd)
+ /* pwd according to --ruid or --reuid */
+ pw = &opts.passwd;
+ else
+ /* pwd for the current user */
+ pw = getpwuid(getuid());
+ do_reset_environ(pw);
+ }
+
+ if (opts.nnp && prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1)
+ err(EXIT_FAILURE, _("disallow granting new privileges failed"));
+
+ if (opts.selinux_label)
+ do_selinux_label(opts.selinux_label);
+ if (opts.apparmor_profile)
+ do_apparmor_profile(opts.apparmor_profile);
+
+ if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) == -1)
+ err(EXIT_FAILURE, _("keep process capabilities failed"));
+
+ /* We're going to want CAP_SETPCAP, CAP_SETUID, and CAP_SETGID if
+ * possible. */
+ bump_cap(CAP_SETPCAP);
+ bump_cap(CAP_SETUID);
+ bump_cap(CAP_SETGID);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ err(SETPRIV_EXIT_PRIVERR, _("activate capabilities"));
+
+ if (opts.have_ruid || opts.have_euid) {
+ do_setresuid(&opts);
+ /* KEEPCAPS doesn't work for the effective mask. */
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ err(SETPRIV_EXIT_PRIVERR, _("reactivate capabilities"));
+ }
+
+ if (opts.have_rgid || opts.have_egid)
+ do_setresgid(&opts);
+
+ if (opts.have_groups) {
+ if (setgroups(opts.num_groups, opts.groups) != 0)
+ err(SETPRIV_EXIT_PRIVERR, _("setgroups failed"));
+ } else if (opts.init_groups) {
+ if (initgroups(opts.passwd.pw_name, opts.passwd.pw_gid) != 0)
+ err(SETPRIV_EXIT_PRIVERR, _("initgroups failed"));
+ } else if (opts.clear_groups) {
+ gid_t x = 0;
+ if (setgroups(0, &x) != 0)
+ err(SETPRIV_EXIT_PRIVERR, _("setgroups failed"));
+ }
+
+ if (opts.have_securebits && prctl(PR_SET_SECUREBITS, opts.securebits, 0, 0, 0) != 0)
+ err(SETPRIV_EXIT_PRIVERR, _("set process securebits failed"));
+
+ if (opts.bounding_set) {
+ do_caps(CAP_TYPE_BOUNDING, opts.bounding_set);
+ errno = EPERM; /* capng doesn't set errno if we're missing CAP_SETPCAP */
+ if (capng_apply(CAPNG_SELECT_BOUNDS) != 0)
+ err(SETPRIV_EXIT_PRIVERR, _("apply bounding set"));
+ }
+
+ if (opts.caps_to_inherit) {
+ do_caps(CAP_TYPE_INHERITABLE, opts.caps_to_inherit);
+ if (capng_apply(CAPNG_SELECT_CAPS) != 0)
+ err(SETPRIV_EXIT_PRIVERR, _("apply capabilities"));
+ }
+
+ if (opts.ambient_caps) {
+ do_caps(CAP_TYPE_AMBIENT, opts.ambient_caps);
+ }
+
+ /* Clear or set parent death signal */
+ if (opts.pdeathsig && prctl(PR_SET_PDEATHSIG, opts.pdeathsig < 0 ? 0 : opts.pdeathsig) != 0)
+ err(SETPRIV_EXIT_PRIVERR, _("set parent death signal failed"));
+
+ execvp(argv[optind], argv + optind);
+ errexec(argv[optind]);
+}
diff --git a/sys-utils/setsid.1 b/sys-utils/setsid.1
new file mode 100644
index 0000000..64f0555
--- /dev/null
+++ b/sys-utils/setsid.1
@@ -0,0 +1,42 @@
+.\" Rick Sladkey <jrs@world.std.com>
+.\" In the public domain.
+.TH SETSID 1 "July 2014" "util-linux" "User Commands"
+.SH NAME
+setsid \- run a program in a new session
+.SH SYNOPSIS
+.B setsid
+[options]
+.I program
+.RI [ arguments ]
+.SH DESCRIPTION
+.B setsid
+runs a program in a new session. The command calls
+.BR fork (2)
+if already a process group leader. Otherwise, it executes a program in the
+current process. This default behavior is possible to override by
+the \fB\-\-fork\fR option.
+.SH OPTIONS
+.TP
+.BR \-c , " \-\-ctty"
+Set the controlling terminal to the current one.
+.TP
+.BR \-f , " \-\-fork"
+Always create a new process.
+.TP
+.BR \-w , " \-\-wait"
+Wait for the execution of the program to end, and return the exit value of
+this program as the return value of
+.BR setsid .
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH "SEE ALSO"
+.BR setsid (2)
+.SH AUTHOR
+Rick Sladkey <jrs@world.std.com>
+.SH AVAILABILITY
+The setsid command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/setsid.c b/sys-utils/setsid.c
new file mode 100644
index 0000000..8b4f83d
--- /dev/null
+++ b/sys-utils/setsid.c
@@ -0,0 +1,123 @@
+/*
+ * setsid.c -- execute a command in a new session
+ * Rick Sladkey <jrs@world.std.com>
+ * In the public domain.
+ *
+ * 1999-02-22 Arkadiusz Miśkiewicz <misiek@pld.ORG.PL>
+ * - added Native Language Support
+ *
+ * 2001-01-18 John Fremlin <vii@penguinpowered.com>
+ * - fork in case we are process group leader
+ *
+ * 2008-08-20 Daniel Kahn Gillmor <dkg@fifthhorseman.net>
+ * - if forked, wait on child process and emit its return code.
+ */
+
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "c.h"
+#include "nls.h"
+#include "closestream.h"
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ fputs(USAGE_HEADER, out);
+ fprintf(out, _(
+ " %s [options] <program> [arguments ...]\n"),
+ program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Run a program in a new session.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -c, --ctty set the controlling terminal to the current one\n"), out);
+ fputs(_(" -f, --fork always fork\n"), out);
+ fputs(_(" -w, --wait wait program to exit, and use the same return\n"), out);
+
+ printf(USAGE_HELP_OPTIONS(16));
+
+ printf(USAGE_MAN_TAIL("setsid(1)"));
+ exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+ int ch, forcefork = 0;
+ int ctty = 0;
+ pid_t pid;
+ int status = 0;
+
+ static const struct option longopts[] = {
+ {"ctty", no_argument, NULL, 'c'},
+ {"fork", no_argument, NULL, 'f'},
+ {"wait", no_argument, NULL, 'w'},
+ {"version", no_argument, NULL, 'V'},
+ {"help", no_argument, NULL, 'h'},
+ {NULL, 0, NULL, 0}
+ };
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ while ((ch = getopt_long(argc, argv, "+Vhcfw", longopts, NULL)) != -1)
+ switch (ch) {
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ case 'c':
+ ctty=1;
+ break;
+ case 'f':
+ forcefork = 1;
+ break;
+ case 'w':
+ status = 1;
+ break;
+ case 'h':
+ usage();
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+
+ if (argc - optind < 1) {
+ warnx(_("no command specified"));
+ errtryhelp(EXIT_FAILURE);
+ }
+
+ if (forcefork || getpgrp() == getpid()) {
+ pid = fork();
+ switch (pid) {
+ case -1:
+ err(EXIT_FAILURE, _("fork"));
+ case 0:
+ /* child */
+ break;
+ default:
+ /* parent */
+ if (!status)
+ return EXIT_SUCCESS;
+ if (wait(&status) != pid)
+ err(EXIT_FAILURE, "wait");
+ if (WIFEXITED(status))
+ return WEXITSTATUS(status);
+ err(status, _("child %d did not exit normally"), pid);
+ }
+ }
+ if (setsid() < 0)
+ /* cannot happen */
+ err(EXIT_FAILURE, _("setsid failed"));
+
+ if (ctty && ioctl(STDIN_FILENO, TIOCSCTTY, 1))
+ err(EXIT_FAILURE, _("failed to set the controlling terminal"));
+ execvp(argv[optind], argv + optind);
+ errexec(argv[optind]);
+}
diff --git a/sys-utils/swapoff.8 b/sys-utils/swapoff.8
new file mode 100644
index 0000000..1a06b7e
--- /dev/null
+++ b/sys-utils/swapoff.8
@@ -0,0 +1 @@
+.so man8/swapon.8
diff --git a/sys-utils/swapoff.c b/sys-utils/swapoff.c
new file mode 100644
index 0000000..0a3807f
--- /dev/null
+++ b/sys-utils/swapoff.c
@@ -0,0 +1,253 @@
+#include <stdio.h>
+#include <errno.h>
+#include <getopt.h>
+
+#ifdef HAVE_SYS_SWAP_H
+# include <sys/swap.h>
+#endif
+
+#include "nls.h"
+#include "c.h"
+#include "xalloc.h"
+#include "closestream.h"
+
+#include "swapprober.h"
+#include "swapon-common.h"
+
+#if !defined(HAVE_SWAPOFF) && defined(SYS_swapoff)
+# include <sys/syscall.h>
+# define swapoff(path) syscall(SYS_swapoff, path)
+#endif
+
+static int verbose;
+static int all;
+
+#define QUIET 1
+#define CANONIC 1
+
+/*
+ * This function works like mnt_resolve_tag(), but it's able to read UUID/LABEL
+ * from regular swap files too (according to entries in /proc/swaps). Note that
+ * mnt_resolve_tag() and mnt_resolve_spec() works with system visible block
+ * devices only.
+ */
+static char *swapoff_resolve_tag(const char *name, const char *value,
+ struct libmnt_cache *cache)
+{
+ char *path;
+ struct libmnt_table *tb;
+ struct libmnt_iter *itr;
+ struct libmnt_fs *fs;
+
+ /* this is usual case for block devices (and it's really fast as it uses
+ * udev /dev/disk/by-* symlinks by default */
+ path = mnt_resolve_tag(name, value, cache);
+ if (path)
+ return path;
+
+ /* try regular files from /proc/swaps */
+ tb = get_swaps();
+ if (!tb)
+ return NULL;
+
+ itr = mnt_new_iter(MNT_ITER_BACKWARD);
+ if (!itr)
+ err(EXIT_FAILURE, _("failed to initialize libmount iterator"));
+
+ while (tb && mnt_table_next_fs(tb, itr, &fs) == 0) {
+ blkid_probe pr = NULL;
+ const char *src = mnt_fs_get_source(fs);
+ const char *type = mnt_fs_get_swaptype(fs);
+ const char *data = NULL;
+
+ if (!src || !type || strcmp(type, "file") != 0)
+ continue;
+ pr = get_swap_prober(src);
+ if (!pr)
+ continue;
+ blkid_probe_lookup_value(pr, name, &data, NULL);
+ if (data && strcmp(data, value) == 0)
+ path = xstrdup(src);
+ blkid_free_probe(pr);
+ if (path)
+ break;
+ }
+
+ mnt_free_iter(itr);
+ return path;
+}
+
+static int do_swapoff(const char *orig_special, int quiet, int canonic)
+{
+ const char *special = orig_special;
+
+ if (verbose)
+ printf(_("swapoff %s\n"), orig_special);
+
+ if (!canonic) {
+ char *n, *v;
+
+ special = mnt_resolve_spec(orig_special, mntcache);
+ if (!special && blkid_parse_tag_string(orig_special, &n, &v) == 0) {
+ special = swapoff_resolve_tag(n, v, mntcache);
+ free(n);
+ free(v);
+ }
+ if (!special)
+ return cannot_find(orig_special);
+ }
+
+ if (swapoff(special) == 0)
+ return 0; /* success */
+
+ if (errno == EPERM)
+ errx(EXIT_FAILURE, _("Not superuser."));
+
+ if (!quiet || errno == ENOMEM)
+ warn(_("%s: swapoff failed"), orig_special);
+
+ return -1;
+}
+
+static int swapoff_by(const char *name, const char *value, int quiet)
+{
+ const char *special = swapoff_resolve_tag(name, value, mntcache);
+ return special ? do_swapoff(special, quiet, CANONIC) : cannot_find(value);
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ fputs(USAGE_HEADER, out);
+ fprintf(out, _(" %s [options] [<spec>]\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Disable devices and files for paging and swapping.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -a, --all disable all swaps from /proc/swaps\n"
+ " -v, --verbose verbose mode\n"), out);
+
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(24));
+
+ fputs(_("\nThe <spec> parameter:\n" \
+ " -L <label> LABEL of device to be used\n" \
+ " -U <uuid> UUID of device to be used\n" \
+ " LABEL=<label> LABEL of device to be used\n" \
+ " UUID=<uuid> UUID of device to be used\n" \
+ " <device> name of device to be used\n" \
+ " <file> name of file to be used\n"), out);
+
+ printf(USAGE_MAN_TAIL("swapoff(8)"));
+ exit(EXIT_SUCCESS);
+}
+
+static int swapoff_all(void)
+{
+ int status = 0;
+ struct libmnt_table *tb;
+ struct libmnt_fs *fs;
+ struct libmnt_iter *itr = mnt_new_iter(MNT_ITER_BACKWARD);
+
+ if (!itr)
+ err(EXIT_FAILURE, _("failed to initialize libmount iterator"));
+
+ /*
+ * In case /proc/swaps exists, unswap stuff listed there. We are quiet
+ * but report errors in status. Errors might mean that /proc/swaps
+ * exists as ordinary file, not in procfs. do_swapoff() exits
+ * immediately on EPERM.
+ */
+ tb = get_swaps();
+
+ while (tb && mnt_table_find_next_fs(tb, itr, match_swap, NULL, &fs) == 0)
+ status |= do_swapoff(mnt_fs_get_source(fs), QUIET, CANONIC);
+
+ /*
+ * Unswap stuff mentioned in /etc/fstab. Probably it was unmounted
+ * already, so errors are not bad. Doing swapoff -a twice should not
+ * give error messages.
+ */
+ tb = get_fstab();
+ mnt_reset_iter(itr, MNT_ITER_FORWARD);
+
+ while (tb && mnt_table_find_next_fs(tb, itr, match_swap, NULL, &fs) == 0) {
+ if (!is_active_swap(mnt_fs_get_source(fs)))
+ do_swapoff(mnt_fs_get_source(fs), QUIET, !CANONIC);
+ }
+
+ mnt_free_iter(itr);
+ return status;
+}
+
+int main(int argc, char *argv[])
+{
+ int status = 0, c;
+ size_t i;
+
+ static const struct option long_opts[] = {
+ { "all", no_argument, NULL, 'a' },
+ { "help", no_argument, NULL, 'h' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "version", no_argument, NULL, 'V' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ while ((c = getopt_long(argc, argv, "ahvVL:U:",
+ long_opts, NULL)) != -1) {
+ switch (c) {
+ case 'a': /* all */
+ ++all;
+ break;
+ case 'h': /* help */
+ usage();
+ break;
+ case 'v': /* be chatty */
+ ++verbose;
+ break;
+ case 'V': /* version */
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ case 'L':
+ add_label(optarg);
+ break;
+ case 'U':
+ add_uuid(optarg);
+ break;
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+ argv += optind;
+
+ if (!all && !numof_labels() && !numof_uuids() && *argv == NULL) {
+ warnx(_("bad usage"));
+ errtryhelp(EXIT_FAILURE);
+ }
+
+ mnt_init_debug(0);
+ mntcache = mnt_new_cache();
+
+ for (i = 0; i < numof_labels(); i++)
+ status |= swapoff_by("LABEL", get_label(i), !QUIET);
+
+ for (i = 0; i < numof_uuids(); i++)
+ status |= swapoff_by("UUID", get_uuid(i), !QUIET);
+
+ while (*argv != NULL)
+ status |= do_swapoff(*argv++, !QUIET, !CANONIC);
+
+ if (all)
+ status |= swapoff_all();
+
+ free_tables();
+ mnt_unref_cache(mntcache);
+
+ return status;
+}
diff --git a/sys-utils/swapon-common.c b/sys-utils/swapon-common.c
new file mode 100644
index 0000000..dd1593d
--- /dev/null
+++ b/sys-utils/swapon-common.c
@@ -0,0 +1,117 @@
+
+#include "c.h"
+#include "nls.h"
+#include "xalloc.h"
+
+#include "swapon-common.h"
+
+/*
+ * content of /proc/swaps and /etc/fstab
+ */
+static struct libmnt_table *swaps, *fstab;
+
+struct libmnt_cache *mntcache;
+
+static int table_parser_errcb(struct libmnt_table *tb __attribute__((__unused__)),
+ const char *filename, int line)
+{
+ if (filename)
+ warnx(_("%s: parse error at line %d -- ignored"), filename, line);
+ return 1;
+}
+
+struct libmnt_table *get_fstab(void)
+{
+ if (!fstab) {
+ fstab = mnt_new_table();
+ if (!fstab)
+ return NULL;
+ mnt_table_set_parser_errcb(fstab, table_parser_errcb);
+ mnt_table_set_cache(fstab, mntcache);
+ if (mnt_table_parse_fstab(fstab, NULL) != 0)
+ return NULL;
+ }
+
+ return fstab;
+}
+
+struct libmnt_table *get_swaps(void)
+{
+ if (!swaps) {
+ swaps = mnt_new_table();
+ if (!swaps)
+ return NULL;
+ mnt_table_set_cache(swaps, mntcache);
+ mnt_table_set_parser_errcb(swaps, table_parser_errcb);
+ if (mnt_table_parse_swaps(swaps, NULL) != 0)
+ return NULL;
+ }
+
+ return swaps;
+}
+
+void free_tables(void)
+{
+ mnt_unref_table(swaps);
+ mnt_unref_table(fstab);
+}
+
+int match_swap(struct libmnt_fs *fs, void *data __attribute__((unused)))
+{
+ return fs && mnt_fs_is_swaparea(fs);
+}
+
+int is_active_swap(const char *filename)
+{
+ struct libmnt_table *st = get_swaps();
+ return st && mnt_table_find_source(st, filename, MNT_ITER_BACKWARD);
+}
+
+
+int cannot_find(const char *special)
+{
+ warnx(_("cannot find the device for %s"), special);
+ return -1;
+}
+
+/*
+ * Lists with -L and -U option
+ */
+static const char **llist;
+static size_t llct;
+static const char **ulist;
+static size_t ulct;
+
+
+void add_label(const char *label)
+{
+ llist = xrealloc(llist, (++llct) * sizeof(char *));
+ llist[llct - 1] = label;
+}
+
+const char *get_label(size_t i)
+{
+ return i < llct ? llist[i] : NULL;
+}
+
+size_t numof_labels(void)
+{
+ return llct;
+}
+
+void add_uuid(const char *uuid)
+{
+ ulist = xrealloc(ulist, (++ulct) * sizeof(char *));
+ ulist[ulct - 1] = uuid;
+}
+
+const char *get_uuid(size_t i)
+{
+ return i < ulct ? ulist[i] : NULL;
+}
+
+size_t numof_uuids(void)
+{
+ return ulct;
+}
+
diff --git a/sys-utils/swapon-common.h b/sys-utils/swapon-common.h
new file mode 100644
index 0000000..d1b679f
--- /dev/null
+++ b/sys-utils/swapon-common.h
@@ -0,0 +1,25 @@
+#ifndef UTIL_LINUX_SWAPON_COMMON_H
+#define UTIL_LINUX_SWAPON_COMMON_H
+
+#include <libmount.h>
+
+extern struct libmnt_cache *mntcache;
+
+extern struct libmnt_table *get_fstab(void);
+extern struct libmnt_table *get_swaps(void);
+extern void free_tables(void);
+
+extern int match_swap(struct libmnt_fs *fs, void *data);
+extern int is_active_swap(const char *filename);
+
+extern int cannot_find(const char *special);
+
+extern void add_label(const char *label);
+extern const char *get_label(size_t i);
+extern size_t numof_labels(void);
+
+extern void add_uuid(const char *uuid);
+extern const char *get_uuid(size_t i);
+extern size_t numof_uuids(void);
+
+#endif /* UTIL_LINUX_SWAPON_COMMON_H */
diff --git a/sys-utils/swapon.8 b/sys-utils/swapon.8
new file mode 100644
index 0000000..510a15f
--- /dev/null
+++ b/sys-utils/swapon.8
@@ -0,0 +1,256 @@
+.\" Copyright (c) 1980, 1991 Regents of the University of California.
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 3. All advertising materials mentioning features or use of this software
+.\" must display the following acknowledgement:
+.\" This product includes software developed by the University of
+.\" California, Berkeley and its contributors.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)swapon.8 6.3 (Berkeley) 3/16/91
+.\"
+.TH SWAPON 8 "October 2014" "util-linux" "System Administration"
+.SH NAME
+swapon, swapoff \- enable/disable devices and files for paging and swapping
+.SH SYNOPSIS
+.B swapon
+[options]
+.RI [ specialfile ...]
+.br
+.B swapoff
+.RB [ \-va ]
+.RI [ specialfile ...]
+.SH DESCRIPTION
+.B swapon
+is used to specify devices on which paging and swapping are to take place.
+
+The device or file used is given by the
+.I specialfile
+parameter. It may be of the form
+.BI \-L " label"
+or
+.BI \-U " uuid"
+to indicate a device by label or uuid.
+
+Calls to
+.B swapon
+normally occur in the system boot scripts making all swap devices available, so
+that the paging and swapping activity is interleaved across several devices and
+files.
+
+.B swapoff
+disables swapping on the specified devices and files.
+When the
+.B \-a
+flag is given, swapping is disabled on all known swap devices and files
+(as found in
+.I /proc/swaps
+or
+.IR /etc/fstab ).
+
+.SH OPTIONS
+.TP
+.BR \-a , " \-\-all"
+All devices marked as ``swap'' in
+.I /etc/fstab
+are made available, except for those with the ``noauto'' option.
+Devices that are already being used as swap are silently skipped.
+.TP
+.BR \-d , " \-\-discard" [ =\fIpolicy\fR]
+Enable swap discards, if the swap backing device supports the discard or
+trim operation. This may improve performance on some Solid State Devices,
+but often it does not. The option allows one to select between two
+available swap discard policies:
+.B \-\-discard=once
+to perform a single-time discard operation for the whole swap area at swapon;
+or
+.B \-\-discard=pages
+to asynchronously discard freed swap pages before they are available for reuse.
+If no policy is selected, the default behavior is to enable both discard types.
+The
+.I /etc/fstab
+mount options
+.BR discard ,
+.BR discard=once ,
+or
+.B discard=pages
+may also be used to enable discard flags.
+.TP
+.BR \-e , " \-\-ifexists"
+Silently skip devices that do not exist.
+The
+.I /etc/fstab
+mount option
+.B nofail
+may also be used to skip non-existing device.
+
+.TP
+.BR \-f , " \-\-fixpgsz"
+Reinitialize (exec mkswap) the swap space if its page size does not
+match that of the current running kernel.
+.BR mkswap (2)
+initializes the whole device and does not check for bad blocks.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.TP
+.BI \-L " label"
+Use the partition that has the specified
+.IR label .
+(For this, access to
+.I /proc/partitions
+is needed.)
+.TP
+.BR \-o , " \-\-options " \fIopts\fP
+Specify swap options by an fstab-compatible comma-separated string.
+For example:
+.RS
+.RS
+.sp
+.B "swapon -o pri=1,discard=pages,nofail /dev/sda2"
+.sp
+.RE
+The \fIopts\fP string is evaluated last and overrides all other
+command line options.
+.RE
+.TP
+.BR \-p , " \-\-priority " \fIpriority\fP
+Specify the priority of the swap device.
+.I priority
+is a value between \-1 and 32767. Higher numbers indicate
+higher priority. See
+.BR swapon (2)
+for a full description of swap priorities. Add
+.BI pri= value
+to the option field of
+.I /etc/fstab
+for use with
+.BR "swapon -a" .
+When no priority is defined, it defaults to \-1.
+.TP
+.BR \-s , " \-\-summary"
+Display swap usage summary by device. Equivalent to "cat /proc/swaps".
+This output format is DEPRECATED in favour
+of \fB\-\-show\fR that provides better control on output data.
+.TP
+.BR \-\-show [ =\fIcolumn\fR ...]
+Display a definable table of swap areas. See the
+.B \-\-help
+output for a list of available columns.
+.TP
+.B \-\-output\-all
+Output all available columns.
+.TP
+.B \-\-noheadings
+Do not print headings when displaying
+.B \-\-show
+output.
+.TP
+.B \-\-raw
+Display
+.B \-\-show
+output without aligning table columns.
+.TP
+.B \-\-bytes
+Display swap size in bytes in
+.B \-\-show
+output instead of in user-friendly units.
+.TP
+.BI \-U " uuid"
+Use the partition that has the specified
+.IR uuid .
+.TP
+.BR \-v , " \-\-verbose"
+Be verbose.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.SH NOTES
+You should not use \fBswapon\fR on a file with holes.
+This can be seen in the system log as
+.RS
+.sp
+.B "swapon: swapfile has holes."
+.sp
+.RE
+The swap file implementation in the kernel expects to be able to write to the
+file directly, without the assistance of the filesystem. This is a problem on
+preallocated files (e.g.
+.BR fallocate (1))
+on filesystems like \fBXFS\fR or \fBext4\fR, and on copy-on-write
+filesystems like \fBbtrfs\fR.
+.PP
+It is recommended to use
+.BR dd (1)
+and
+.I /dev/zero
+to avoid holes on XFS and ext4.
+.PP
+.B swapon
+may not work correctly when using a swap file with some versions of
+\fBbtrfs\fR. This is due to btrfs being a copy-on-write filesystem: the
+file location may not be static and corruption can result. Btrfs actively
+disallows the use of swap files on its filesystems by refusing to map the file.
+.PP
+One possible workaround is to map the swap
+file to a loopback device. This will allow the filesystem to determine the
+mapping properly but may come with a performance impact.
+.PP
+Swap over \fBNFS\fR may not work.
+.PP
+.B swapon
+automatically detects and rewrites a swap space signature with old software
+suspend data (e.g. S1SUSPEND, S2SUSPEND, ...). The problem is that if we don't
+do it, then we get data corruption the next time an attempt at unsuspending is
+made.
+
+.SH ENVIRONMENT
+.IP LIBMOUNT_DEBUG=all
+enables libmount debug output.
+.IP LIBBLKID_DEBUG=all
+enables libblkid debug output.
+
+.SH SEE ALSO
+.BR swapoff (2),
+.BR swapon (2),
+.BR fstab (5),
+.BR init (8),
+.BR mkswap (8),
+.BR mount (8),
+.BR rc (8)
+.SH FILES
+.br
+.I /dev/sd??
+standard paging devices
+.br
+.I /etc/fstab
+ascii filesystem description table
+.SH HISTORY
+The
+.B swapon
+command appeared in 4.0BSD.
+.SH AVAILABILITY
+The swapon command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/swapon.c b/sys-utils/swapon.c
new file mode 100644
index 0000000..357dcb3
--- /dev/null
+++ b/sys-utils/swapon.c
@@ -0,0 +1,1017 @@
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <getopt.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <ctype.h>
+
+#include <libsmartcols.h>
+
+#include "c.h"
+#include "nls.h"
+#include "bitops.h"
+#include "blkdev.h"
+#include "pathnames.h"
+#include "xalloc.h"
+#include "strutils.h"
+#include "optutils.h"
+#include "closestream.h"
+
+#include "swapheader.h"
+#include "swapprober.h"
+#include "swapon-common.h"
+
+#ifdef HAVE_SYS_SWAP_H
+# include <sys/swap.h>
+#endif
+
+#ifndef SWAP_FLAG_DISCARD
+# define SWAP_FLAG_DISCARD 0x10000 /* enable discard for swap */
+#endif
+
+#ifndef SWAP_FLAG_DISCARD_ONCE
+# define SWAP_FLAG_DISCARD_ONCE 0x20000 /* discard swap area at swapon-time */
+#endif
+
+#ifndef SWAP_FLAG_DISCARD_PAGES
+# define SWAP_FLAG_DISCARD_PAGES 0x40000 /* discard page-clusters after use */
+#endif
+
+#define SWAP_FLAGS_DISCARD_VALID (SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \
+ SWAP_FLAG_DISCARD_PAGES)
+
+#ifndef SWAP_FLAG_PREFER
+# define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */
+#endif
+
+#ifndef SWAP_FLAG_PRIO_MASK
+# define SWAP_FLAG_PRIO_MASK 0x7fff
+#endif
+
+#ifndef SWAP_FLAG_PRIO_SHIFT
+# define SWAP_FLAG_PRIO_SHIFT 0
+#endif
+
+#if !defined(HAVE_SWAPON) && defined(SYS_swapon)
+# include <sys/syscall.h>
+# define swapon(path, flags) syscall(SYS_swapon, path, flags)
+#endif
+
+#define MAX_PAGESIZE (64 * 1024)
+
+#ifndef UUID_STR_LEN
+# define UUID_STR_LEN 37
+#endif
+
+enum {
+ SIG_SWAPSPACE = 1,
+ SIG_SWSUSPEND
+};
+
+/* column names */
+struct colinfo {
+ const char *name; /* header */
+ double whint; /* width hint (N < 1 is in percent of termwidth) */
+ int flags; /* SCOLS_FL_* */
+ const char *help;
+};
+
+enum {
+ COL_PATH,
+ COL_TYPE,
+ COL_SIZE,
+ COL_USED,
+ COL_PRIO,
+ COL_UUID,
+ COL_LABEL
+};
+static struct colinfo infos[] = {
+ [COL_PATH] = { "NAME", 0.20, 0, N_("device file or partition path") },
+ [COL_TYPE] = { "TYPE", 0.20, SCOLS_FL_TRUNC, N_("type of the device")},
+ [COL_SIZE] = { "SIZE", 0.20, SCOLS_FL_RIGHT, N_("size of the swap area")},
+ [COL_USED] = { "USED", 0.20, SCOLS_FL_RIGHT, N_("bytes in use")},
+ [COL_PRIO] = { "PRIO", 0.20, SCOLS_FL_RIGHT, N_("swap priority")},
+ [COL_UUID] = { "UUID", 0.20, 0, N_("swap uuid")},
+ [COL_LABEL] = { "LABEL", 0.20, 0, N_("swap label")},
+};
+
+
+/* swap area properties */
+struct swap_prop {
+ int discard; /* discard policy */
+ int priority; /* non-prioritized swap by default */
+ int no_fail; /* skip device if not exist */
+};
+
+/* device description */
+struct swap_device {
+ const char *path; /* device or file to be turned on */
+ const char *label; /* swap label */
+ const char *uuid; /* unique identifier */
+ unsigned int pagesize;
+};
+
+/* control struct */
+struct swapon_ctl {
+ int columns[ARRAY_SIZE(infos) * 2]; /* --show columns */
+ int ncolumns; /* number of columns */
+
+ struct swap_prop props; /* global settings for all devices */
+
+ unsigned int
+ all:1, /* turn on all swap devices */
+ bytes:1, /* display --show in bytes */
+ fix_page_size:1, /* reinitialize page size */
+ no_heading:1, /* toggle --show headers */
+ raw:1, /* toggle --show alignment */
+ show:1, /* display --show information */
+ verbose:1; /* be chatty */
+};
+
+static int column_name_to_id(const char *name, size_t namesz)
+{
+ size_t i;
+
+ assert(name);
+
+ for (i = 0; i < ARRAY_SIZE(infos); i++) {
+ const char *cn = infos[i].name;
+
+ if (!strncasecmp(name, cn, namesz) && !*(cn + namesz))
+ return i;
+ }
+ warnx(_("unknown column: %s"), name);
+ return -1;
+}
+
+static inline int get_column_id(const struct swapon_ctl *ctl, int num)
+{
+ assert(num < ctl->ncolumns);
+ assert(ctl->columns[num] < (int) ARRAY_SIZE(infos));
+
+ return ctl->columns[num];
+}
+
+static inline struct colinfo *get_column_info(const struct swapon_ctl *ctl, unsigned num)
+{
+ return &infos[get_column_id(ctl, num)];
+}
+
+static void add_scols_line(const struct swapon_ctl *ctl, struct libscols_table *table, struct libmnt_fs *fs)
+{
+ int i;
+ struct libscols_line *line;
+ blkid_probe pr = NULL;
+ const char *data;
+
+ assert(table);
+ assert(fs);
+
+ line = scols_table_new_line(table, NULL);
+ if (!line)
+ err(EXIT_FAILURE, _("failed to allocate output line"));
+
+ data = mnt_fs_get_source(fs);
+ if (access(data, R_OK) == 0)
+ pr = get_swap_prober(data);
+ for (i = 0; i < ctl->ncolumns; i++) {
+ char *str = NULL;
+ off_t size;
+
+ switch (get_column_id(ctl, i)) {
+ case COL_PATH:
+ xasprintf(&str, "%s", mnt_fs_get_source(fs));
+ break;
+ case COL_TYPE:
+ xasprintf(&str, "%s", mnt_fs_get_swaptype(fs));
+ break;
+ case COL_SIZE:
+ size = mnt_fs_get_size(fs);
+ size *= 1024; /* convert to bytes */
+ if (ctl->bytes)
+ xasprintf(&str, "%jd", size);
+ else
+ str = size_to_human_string(SIZE_SUFFIX_1LETTER, size);
+ break;
+ case COL_USED:
+ size = mnt_fs_get_usedsize(fs);
+ size *= 1024; /* convert to bytes */
+ if (ctl->bytes)
+ xasprintf(&str, "%jd", size);
+ else
+ str = size_to_human_string(SIZE_SUFFIX_1LETTER, size);
+ break;
+ case COL_PRIO:
+ xasprintf(&str, "%d", mnt_fs_get_priority(fs));
+ break;
+ case COL_UUID:
+ if (pr && !blkid_probe_lookup_value(pr, "UUID", &data, NULL))
+ xasprintf(&str, "%s", data);
+ break;
+ case COL_LABEL:
+ if (pr && !blkid_probe_lookup_value(pr, "LABEL", &data, NULL))
+ xasprintf(&str, "%s", data);
+ break;
+ default:
+ break;
+ }
+
+ if (str && scols_line_refer_data(line, i, str))
+ err(EXIT_FAILURE, _("failed to add output data"));
+ }
+ if (pr)
+ blkid_free_probe(pr);
+ return;
+}
+
+static int display_summary(void)
+{
+ struct libmnt_table *st = get_swaps();
+ struct libmnt_iter *itr;
+ struct libmnt_fs *fs;
+
+ if (!st)
+ return -1;
+
+ if (mnt_table_is_empty(st))
+ return 0;
+
+ itr = mnt_new_iter(MNT_ITER_FORWARD);
+ if (!itr)
+ err(EXIT_FAILURE, _("failed to initialize libmount iterator"));
+
+ printf(_("%s\t\t\t\tType\t\tSize\tUsed\tPriority\n"), _("Filename"));
+
+ while (mnt_table_next_fs(st, itr, &fs) == 0) {
+ printf("%-39s\t%-8s\t%jd\t%jd\t%d\n",
+ mnt_fs_get_source(fs),
+ mnt_fs_get_swaptype(fs),
+ mnt_fs_get_size(fs),
+ mnt_fs_get_usedsize(fs),
+ mnt_fs_get_priority(fs));
+ }
+
+ mnt_free_iter(itr);
+ return 0;
+}
+
+static int show_table(struct swapon_ctl *ctl)
+{
+ struct libmnt_table *st = get_swaps();
+ struct libmnt_iter *itr = NULL;
+ struct libmnt_fs *fs;
+ int i;
+ struct libscols_table *table = NULL;
+
+ if (!st)
+ return -1;
+
+ itr = mnt_new_iter(MNT_ITER_FORWARD);
+ if (!itr)
+ err(EXIT_FAILURE, _("failed to initialize libmount iterator"));
+
+ scols_init_debug(0);
+
+ table = scols_new_table();
+ if (!table)
+ err(EXIT_FAILURE, _("failed to allocate output table"));
+
+ scols_table_enable_raw(table, ctl->raw);
+ scols_table_enable_noheadings(table, ctl->no_heading);
+
+ for (i = 0; i < ctl->ncolumns; i++) {
+ struct colinfo *col = get_column_info(ctl, i);
+
+ if (!scols_table_new_column(table, col->name, col->whint, col->flags))
+ err(EXIT_FAILURE, _("failed to allocate output column"));
+ }
+
+ while (mnt_table_next_fs(st, itr, &fs) == 0)
+ add_scols_line(ctl, table, fs);
+
+ scols_print_table(table);
+ scols_unref_table(table);
+ mnt_free_iter(itr);
+ return 0;
+}
+
+/* calls mkswap */
+static int swap_reinitialize(struct swap_device *dev)
+{
+ pid_t pid;
+ int status, ret;
+ char const *cmd[7];
+ int idx=0;
+
+ assert(dev);
+ assert(dev->path);
+
+ warnx(_("%s: reinitializing the swap."), dev->path);
+
+ switch ((pid=fork())) {
+ case -1: /* fork error */
+ warn(_("fork failed"));
+ return -1;
+
+ case 0: /* child */
+ if (geteuid() != getuid()) {
+ /* in case someone uses swapon as setuid binary */
+ if (setgid(getgid()) < 0)
+ exit(EXIT_FAILURE);
+ if (setuid(getuid()) < 0)
+ exit(EXIT_FAILURE);
+ }
+
+ cmd[idx++] = "mkswap";
+ if (dev->label) {
+ cmd[idx++] = "-L";
+ cmd[idx++] = dev->label;
+ }
+ if (dev->uuid) {
+ cmd[idx++] = "-U";
+ cmd[idx++] = dev->uuid;
+ }
+ cmd[idx++] = dev->path;
+ cmd[idx++] = NULL;
+ execvp(cmd[0], (char * const *) cmd);
+ errexec(cmd[0]);
+
+ default: /* parent */
+ do {
+ ret = waitpid(pid, &status, 0);
+ } while (ret == -1 && errno == EINTR);
+
+ if (ret < 0) {
+ warn(_("waitpid failed"));
+ return -1;
+ }
+
+ /* mkswap returns: 0=suss, >0 error */
+ if (WIFEXITED(status) && WEXITSTATUS(status)==0)
+ return 0; /* ok */
+ break;
+ }
+ return -1; /* error */
+}
+
+/* Replaces unwanted SWSUSPEND signature with swap signature */
+static int swap_rewrite_signature(const struct swap_device *dev)
+{
+ int fd, rc = -1;
+
+ assert(dev);
+ assert(dev->path);
+ assert(dev->pagesize);
+
+ fd = open(dev->path, O_WRONLY);
+ if (fd == -1) {
+ warn(_("cannot open %s"), dev->path);
+ return -1;
+ }
+
+ if (lseek(fd, dev->pagesize - SWAP_SIGNATURE_SZ, SEEK_SET) < 0) {
+ warn(_("%s: lseek failed"), dev->path);
+ goto err;
+ }
+
+ if (write(fd, (void *) SWAP_SIGNATURE,
+ SWAP_SIGNATURE_SZ) != SWAP_SIGNATURE_SZ) {
+ warn(_("%s: write signature failed"), dev->path);
+ goto err;
+ }
+
+ rc = 0;
+err:
+ if (close_fd(fd) != 0) {
+ warn(_("write failed: %s"), dev->path);
+ rc = -1;
+ }
+ return rc;
+}
+
+static int swap_detect_signature(const char *buf, int *sig)
+{
+ assert(buf);
+ assert(sig);
+
+ if (memcmp(buf, SWAP_SIGNATURE, SWAP_SIGNATURE_SZ) == 0)
+ *sig = SIG_SWAPSPACE;
+
+ else if (memcmp(buf, "S1SUSPEND", 9) == 0 ||
+ memcmp(buf, "S2SUSPEND", 9) == 0 ||
+ memcmp(buf, "ULSUSPEND", 9) == 0 ||
+ memcmp(buf, "\xed\xc3\x02\xe9\x98\x56\xe5\x0c", 8) == 0 ||
+ memcmp(buf, "LINHIB0001", 10) == 0)
+ *sig = SIG_SWSUSPEND;
+ else
+ return 0;
+
+ return 1;
+}
+
+static char *swap_get_header(int fd, int *sig, unsigned int *pagesize)
+{
+ char *buf;
+ ssize_t datasz;
+ unsigned int page;
+
+ assert(sig);
+ assert(pagesize);
+
+ *pagesize = 0;
+ *sig = 0;
+
+ buf = xmalloc(MAX_PAGESIZE);
+
+ datasz = read(fd, buf, MAX_PAGESIZE);
+ if (datasz == (ssize_t) -1)
+ goto err;
+
+ for (page = 0x1000; page <= MAX_PAGESIZE; page <<= 1) {
+ /* skip 32k pagesize since this does not seem to
+ * be supported */
+ if (page == 0x8000)
+ continue;
+ /* the smallest swap area is PAGE_SIZE*10, it means
+ * 40k, that's less than MAX_PAGESIZE */
+ if (datasz < 0 || (size_t) datasz < (page - SWAP_SIGNATURE_SZ))
+ break;
+ if (swap_detect_signature(buf + page - SWAP_SIGNATURE_SZ, sig)) {
+ *pagesize = page;
+ break;
+ }
+ }
+
+ if (*pagesize)
+ return buf;
+err:
+ free(buf);
+ return NULL;
+}
+
+/* returns real size of swap space */
+static unsigned long long swap_get_size(const struct swap_device *dev,
+ const char *hdr)
+{
+ unsigned int last_page = 0;
+ const unsigned int swap_version = SWAP_VERSION;
+ const struct swap_header_v1_2 *s;
+
+ assert(dev);
+ assert(dev->pagesize > 0);
+
+ s = (const struct swap_header_v1_2 *) hdr;
+
+ if (s->version == swap_version)
+ last_page = s->last_page;
+ else if (swab32(s->version) == swap_version)
+ last_page = swab32(s->last_page);
+
+ return ((unsigned long long) last_page + 1) * dev->pagesize;
+}
+
+static void swap_get_info(struct swap_device *dev, const char *hdr)
+{
+ const struct swap_header_v1_2 *s = (const struct swap_header_v1_2 *) hdr;
+
+ assert(dev);
+
+ if (s && *s->volume_name)
+ dev->label = xstrdup(s->volume_name);
+
+ if (s && *s->uuid) {
+ const unsigned char *u = s->uuid;
+ char str[UUID_STR_LEN];
+
+ snprintf(str, sizeof(str),
+ "%02x%02x%02x%02x-"
+ "%02x%02x-%02x%02x-"
+ "%02x%02x-%02x%02x%02x%02x%02x%02x",
+ u[0], u[1], u[2], u[3],
+ u[4], u[5], u[6], u[7],
+ u[8], u[9], u[10], u[11], u[12], u[13], u[14], u[15]);
+ dev->uuid = xstrdup(str);
+ }
+}
+
+static int swapon_checks(const struct swapon_ctl *ctl, struct swap_device *dev)
+{
+ struct stat st;
+ int fd, sig;
+ char *hdr = NULL;
+ unsigned long long devsize = 0;
+ int permMask;
+
+ assert(ctl);
+ assert(dev);
+ assert(dev->path);
+
+ fd = open(dev->path, O_RDONLY);
+ if (fd == -1) {
+ warn(_("cannot open %s"), dev->path);
+ goto err;
+ }
+
+ if (fstat(fd, &st) < 0) {
+ warn(_("stat of %s failed"), dev->path);
+ goto err;
+ }
+
+ permMask = S_ISBLK(st.st_mode) ? 07007 : 07077;
+ if ((st.st_mode & permMask) != 0)
+ warnx(_("%s: insecure permissions %04o, %04o suggested."),
+ dev->path, st.st_mode & 07777,
+ ~permMask & 0666);
+
+ if (S_ISREG(st.st_mode) && st.st_uid != 0)
+ warnx(_("%s: insecure file owner %d, 0 (root) suggested."),
+ dev->path, st.st_uid);
+
+ /* test for holes by LBT */
+ if (S_ISREG(st.st_mode)) {
+ if (st.st_blocks * 512 < st.st_size) {
+ warnx(_("%s: skipping - it appears to have holes."),
+ dev->path);
+ goto err;
+ }
+ devsize = st.st_size;
+ }
+
+ if (S_ISBLK(st.st_mode) && blkdev_get_size(fd, &devsize)) {
+ warnx(_("%s: get size failed"), dev->path);
+ goto err;
+ }
+
+ hdr = swap_get_header(fd, &sig, &dev->pagesize);
+ if (!hdr) {
+ warnx(_("%s: read swap header failed"), dev->path);
+ goto err;
+ }
+
+ if (ctl->verbose)
+ warnx(_("%s: found signature [pagesize=%d, signature=%s]"),
+ dev->path,
+ dev->pagesize,
+ sig == SIG_SWAPSPACE ? "swap" :
+ sig == SIG_SWSUSPEND ? "suspend" : "unknown");
+
+ if (sig == SIG_SWAPSPACE && dev->pagesize) {
+ unsigned long long swapsize = swap_get_size(dev, hdr);
+ int syspg = getpagesize();
+
+ if (ctl->verbose)
+ warnx(_("%s: pagesize=%d, swapsize=%llu, devsize=%llu"),
+ dev->path, dev->pagesize, swapsize, devsize);
+
+ if (swapsize > devsize) {
+ if (ctl->verbose)
+ warnx(_("%s: last_page 0x%08llx is larger"
+ " than actual size of swapspace"),
+ dev->path, swapsize);
+
+ } else if (syspg < 0 || (unsigned int) syspg != dev->pagesize) {
+ if (ctl->fix_page_size) {
+ int rc;
+
+ swap_get_info(dev, hdr);
+
+ warnx(_("%s: swap format pagesize does not match."),
+ dev->path);
+ rc = swap_reinitialize(dev);
+ if (rc < 0)
+ goto err;
+ } else
+ warnx(_("%s: swap format pagesize does not match. "
+ "(Use --fixpgsz to reinitialize it.)"),
+ dev->path);
+ }
+ } else if (sig == SIG_SWSUSPEND) {
+ /* We have to reinitialize swap with old (=useless) software suspend
+ * data. The problem is that if we don't do it, then we get data
+ * corruption the next time an attempt at unsuspending is made.
+ */
+ warnx(_("%s: software suspend data detected. "
+ "Rewriting the swap signature."),
+ dev->path);
+ if (swap_rewrite_signature(dev) < 0)
+ goto err;
+ }
+
+ free(hdr);
+ close(fd);
+ return 0;
+err:
+ if (fd != -1)
+ close(fd);
+ free(hdr);
+ return -1;
+}
+
+static int do_swapon(const struct swapon_ctl *ctl,
+ const struct swap_prop *prop,
+ const char *spec,
+ int canonic)
+{
+ struct swap_device dev = { .path = NULL };
+ int status;
+ int flags = 0;
+ int priority;
+
+ assert(ctl);
+ assert(prop);
+
+ if (!canonic) {
+ dev.path = mnt_resolve_spec(spec, mntcache);
+ if (!dev.path)
+ return cannot_find(spec);
+ } else
+ dev.path = spec;
+
+ priority = prop->priority;
+
+ if (swapon_checks(ctl, &dev))
+ return -1;
+
+#ifdef SWAP_FLAG_PREFER
+ if (priority >= 0) {
+ if (priority > SWAP_FLAG_PRIO_MASK)
+ priority = SWAP_FLAG_PRIO_MASK;
+
+ flags = SWAP_FLAG_PREFER
+ | ((priority & SWAP_FLAG_PRIO_MASK)
+ << SWAP_FLAG_PRIO_SHIFT);
+ }
+#endif
+ /*
+ * Validate the discard flags passed and set them
+ * accordingly before calling sys_swapon.
+ */
+ if (prop->discard && !(prop->discard & ~SWAP_FLAGS_DISCARD_VALID)) {
+ /*
+ * If we get here with both discard policy flags set,
+ * we just need to tell the kernel to enable discards
+ * and it will do correctly, just as we expect.
+ */
+ if ((prop->discard & SWAP_FLAG_DISCARD_ONCE) &&
+ (prop->discard & SWAP_FLAG_DISCARD_PAGES))
+ flags |= SWAP_FLAG_DISCARD;
+ else
+ flags |= prop->discard;
+ }
+
+ if (ctl->verbose)
+ printf(_("swapon %s\n"), dev.path);
+
+ status = swapon(dev.path, flags);
+ if (status < 0)
+ warn(_("%s: swapon failed"), dev.path);
+
+ return status;
+}
+
+static int swapon_by_label(struct swapon_ctl *ctl, const char *label)
+{
+ char *device = mnt_resolve_tag("LABEL", label, mntcache);
+ return device ? do_swapon(ctl, &ctl->props, device, TRUE) : cannot_find(label);
+}
+
+static int swapon_by_uuid(struct swapon_ctl *ctl, const char *uuid)
+{
+ char *device = mnt_resolve_tag("UUID", uuid, mntcache);
+ return device ? do_swapon(ctl, &ctl->props, device, TRUE) : cannot_find(uuid);
+}
+
+/* -o <options> or fstab */
+static int parse_options(struct swap_prop *props, const char *options)
+{
+ char *arg = NULL;
+ size_t argsz = 0;
+
+ assert(props);
+ assert(options);
+
+ if (mnt_optstr_get_option(options, "nofail", NULL, NULL) == 0)
+ props->no_fail = 1;
+
+ if (mnt_optstr_get_option(options, "discard", &arg, &argsz) == 0) {
+ props->discard |= SWAP_FLAG_DISCARD;
+
+ if (arg) {
+ /* only single-time discards are wanted */
+ if (strncmp(arg, "once", argsz) == 0)
+ props->discard |= SWAP_FLAG_DISCARD_ONCE;
+
+ /* do discard for every released swap page */
+ if (strncmp(arg, "pages", argsz) == 0)
+ props->discard |= SWAP_FLAG_DISCARD_PAGES;
+ }
+ }
+
+ arg = NULL;
+ if (mnt_optstr_get_option(options, "pri", &arg, NULL) == 0 && arg)
+ props->priority = atoi(arg);
+
+ return 0;
+}
+
+
+static int swapon_all(struct swapon_ctl *ctl)
+{
+ struct libmnt_table *tb = get_fstab();
+ struct libmnt_iter *itr;
+ struct libmnt_fs *fs;
+ int status = 0;
+
+ if (!tb)
+ err(EXIT_FAILURE, _("failed to parse %s"), mnt_get_fstab_path());
+
+ itr = mnt_new_iter(MNT_ITER_FORWARD);
+ if (!itr)
+ err(EXIT_FAILURE, _("failed to initialize libmount iterator"));
+
+ while (mnt_table_find_next_fs(tb, itr, match_swap, NULL, &fs) == 0) {
+ /* defaults */
+ const char *opts;
+ const char *device;
+ struct swap_prop prop; /* per device setting */
+
+ if (mnt_fs_get_option(fs, "noauto", NULL, NULL) == 0) {
+ if (ctl->verbose)
+ warnx(_("%s: noauto option -- ignored"), mnt_fs_get_source(fs));
+ continue;
+ }
+
+ /* default setting */
+ prop = ctl->props;
+
+ /* overwrite default by setting from fstab */
+ opts = mnt_fs_get_options(fs);
+ if (opts)
+ parse_options(&prop, opts);
+
+ /* convert LABEL=, UUID= etc. from fstab to device name */
+ device = mnt_resolve_spec(mnt_fs_get_source(fs), mntcache);
+ if (!device) {
+ if (!prop.no_fail)
+ status |= cannot_find(mnt_fs_get_source(fs));
+ continue;
+ }
+
+ if (is_active_swap(device)) {
+ if (ctl->verbose)
+ warnx(_("%s: already active -- ignored"), device);
+ continue;
+ }
+
+ if (prop.no_fail && access(device, R_OK) != 0) {
+ if (ctl->verbose)
+ warnx(_("%s: inaccessible -- ignored"), device);
+ continue;
+ }
+
+ /* swapon */
+ status |= do_swapon(ctl, &prop, device, TRUE);
+ }
+
+ mnt_free_iter(itr);
+ return status;
+}
+
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ size_t i;
+
+ fputs(USAGE_HEADER, out);
+ fprintf(out, _(" %s [options] [<spec>]\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Enable devices and files for paging and swapping.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -a, --all enable all swaps from /etc/fstab\n"), out);
+ fputs(_(" -d, --discard[=<policy>] enable swap discards, if supported by device\n"), out);
+ fputs(_(" -e, --ifexists silently skip devices that do not exist\n"), out);
+ fputs(_(" -f, --fixpgsz reinitialize the swap space if necessary\n"), out);
+ fputs(_(" -o, --options <list> comma-separated list of swap options\n"), out);
+ fputs(_(" -p, --priority <prio> specify the priority of the swap device\n"), out);
+ fputs(_(" -s, --summary display summary about used swap devices (DEPRECATED)\n"), out);
+ fputs(_(" --show[=<columns>] display summary in definable table\n"), out);
+ fputs(_(" --noheadings don't print table heading (with --show)\n"), out);
+ fputs(_(" --raw use the raw output format (with --show)\n"), out);
+ fputs(_(" --bytes display swap size in bytes in --show output\n"), out);
+ fputs(_(" -v, --verbose verbose mode\n"), out);
+
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(26));
+
+ fputs(_("\nThe <spec> parameter:\n" \
+ " -L <label> synonym for LABEL=<label>\n"
+ " -U <uuid> synonym for UUID=<uuid>\n"
+ " LABEL=<label> specifies device by swap area label\n"
+ " UUID=<uuid> specifies device by swap area UUID\n"
+ " PARTLABEL=<label> specifies device by partition label\n"
+ " PARTUUID=<uuid> specifies device by partition UUID\n"
+ " <device> name of device to be used\n"
+ " <file> name of file to be used\n"), out);
+
+ fputs(_("\nAvailable discard policy types (for --discard):\n"
+ " once : only single-time area discards are issued\n"
+ " pages : freed pages are discarded before they are reused\n"
+ "If no policy is selected, both discard types are enabled (default).\n"), out);
+
+ fputs(USAGE_COLUMNS, out);
+ for (i = 0; i < ARRAY_SIZE(infos); i++)
+ fprintf(out, " %-5s %s\n", infos[i].name, _(infos[i].help));
+
+ printf(USAGE_MAN_TAIL("swapon(8)"));
+ exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char *argv[])
+{
+ int status = 0, c;
+ size_t i;
+ char *options = NULL;
+
+ enum {
+ BYTES_OPTION = CHAR_MAX + 1,
+ NOHEADINGS_OPTION,
+ RAW_OPTION,
+ SHOW_OPTION,
+ OPT_LIST_TYPES
+ };
+
+ static const struct option long_opts[] = {
+ { "priority", required_argument, NULL, 'p' },
+ { "discard", optional_argument, NULL, 'd' },
+ { "ifexists", no_argument, NULL, 'e' },
+ { "options", optional_argument, NULL, 'o' },
+ { "summary", no_argument, NULL, 's' },
+ { "fixpgsz", no_argument, NULL, 'f' },
+ { "all", no_argument, NULL, 'a' },
+ { "help", no_argument, NULL, 'h' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "version", no_argument, NULL, 'V' },
+ { "show", optional_argument, NULL, SHOW_OPTION },
+ { "output-all", no_argument, NULL, OPT_LIST_TYPES },
+ { "noheadings", no_argument, NULL, NOHEADINGS_OPTION },
+ { "raw", no_argument, NULL, RAW_OPTION },
+ { "bytes", no_argument, NULL, BYTES_OPTION },
+ { NULL, 0, NULL, 0 }
+ };
+
+ static const ul_excl_t excl[] = { /* rows and cols in ASCII order */
+ { 'a','o','s', SHOW_OPTION },
+ { 'a','o', BYTES_OPTION },
+ { 'a','o', NOHEADINGS_OPTION },
+ { 'a','o', RAW_OPTION },
+ { 0 }
+ };
+ int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+ struct swapon_ctl ctl;
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ memset(&ctl, 0, sizeof(struct swapon_ctl));
+ ctl.props.priority = -1;
+
+ mnt_init_debug(0);
+ mntcache = mnt_new_cache();
+
+ while ((c = getopt_long(argc, argv, "ahd::efo:p:svVL:U:",
+ long_opts, NULL)) != -1) {
+
+ err_exclusive_options(c, long_opts, excl, excl_st);
+
+ switch (c) {
+ case 'a': /* all */
+ ctl.all = 1;
+ break;
+ case 'h': /* help */
+ usage();
+ break;
+ case 'o':
+ options = optarg;
+ break;
+ case 'p': /* priority */
+ ctl.props.priority = strtos16_or_err(optarg,
+ _("failed to parse priority"));
+ break;
+ case 'L':
+ add_label(optarg);
+ break;
+ case 'U':
+ add_uuid(optarg);
+ break;
+ case 'd':
+ ctl.props.discard |= SWAP_FLAG_DISCARD;
+ if (optarg) {
+ if (*optarg == '=')
+ optarg++;
+
+ if (strcmp(optarg, "once") == 0)
+ ctl.props.discard |= SWAP_FLAG_DISCARD_ONCE;
+ else if (strcmp(optarg, "pages") == 0)
+ ctl.props.discard |= SWAP_FLAG_DISCARD_PAGES;
+ else
+ errx(EXIT_FAILURE, _("unsupported discard policy: %s"), optarg);
+ }
+ break;
+ case 'e': /* ifexists */
+ ctl.props.no_fail = 1;
+ break;
+ case 'f':
+ ctl.fix_page_size = 1;
+ break;
+ case 's': /* status report */
+ status = display_summary();
+ return status;
+ case 'v': /* be chatty */
+ ctl.verbose = 1;
+ break;
+ case SHOW_OPTION:
+ if (optarg) {
+ ctl.ncolumns = string_to_idarray(optarg,
+ ctl.columns,
+ ARRAY_SIZE(ctl.columns),
+ column_name_to_id);
+ if (ctl.ncolumns < 0)
+ return EXIT_FAILURE;
+ }
+ ctl.show = 1;
+ break;
+ case OPT_LIST_TYPES:
+ for (ctl.ncolumns = 0; (size_t)ctl.ncolumns < ARRAY_SIZE(infos); ctl.ncolumns++)
+ ctl.columns[ctl.ncolumns] = ctl.ncolumns;
+ break;
+ case NOHEADINGS_OPTION:
+ ctl.no_heading = 1;
+ break;
+ case RAW_OPTION:
+ ctl.raw = 1;
+ break;
+ case BYTES_OPTION:
+ ctl.bytes = 1;
+ break;
+ case 'V': /* version */
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ case 0:
+ break;
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+ argv += optind;
+
+ if (ctl.show || (!ctl.all && !numof_labels() && !numof_uuids() && *argv == NULL)) {
+ if (!ctl.ncolumns) {
+ /* default columns */
+ ctl.columns[ctl.ncolumns++] = COL_PATH;
+ ctl.columns[ctl.ncolumns++] = COL_TYPE;
+ ctl.columns[ctl.ncolumns++] = COL_SIZE;
+ ctl.columns[ctl.ncolumns++] = COL_USED;
+ ctl.columns[ctl.ncolumns++] = COL_PRIO;
+ }
+ status = show_table(&ctl);
+ return status;
+ }
+
+ if (ctl.props.no_fail && !ctl.all) {
+ warnx(_("bad usage"));
+ errtryhelp(EXIT_FAILURE);
+ }
+
+ if (ctl.all)
+ status |= swapon_all(&ctl);
+
+ if (options)
+ parse_options(&ctl.props, options);
+
+ for (i = 0; i < numof_labels(); i++)
+ status |= swapon_by_label(&ctl, get_label(i));
+
+ for (i = 0; i < numof_uuids(); i++)
+ status |= swapon_by_uuid(&ctl, get_uuid(i));
+
+ while (*argv != NULL)
+ status |= do_swapon(&ctl, &ctl.props, *argv++, FALSE);
+
+ free_tables();
+ mnt_unref_cache(mntcache);
+
+ return status;
+}
diff --git a/sys-utils/switch_root.8 b/sys-utils/switch_root.8
new file mode 100644
index 0000000..4e162b3
--- /dev/null
+++ b/sys-utils/switch_root.8
@@ -0,0 +1,61 @@
+.\" Karel Zak <kzak@redhat.com>
+.TH SWITCH_ROOT 8 "June 2009" "util-linux" "System Administration"
+.SH NAME
+switch_root \- switch to another filesystem as the root of the mount tree
+.SH SYNOPSIS
+.B switch_root
+.RB [ \-hV ]
+.LP
+.B switch_root
+.I newroot
+.I init
+.RI [ arg ...]
+.SH DESCRIPTION
+.B switch_root
+moves already mounted /proc, /dev, /sys and /run to
+.I newroot
+and makes
+.I newroot
+the new root filesystem and starts
+.I init
+process.
+
+.B WARNING: switch_root removes recursively all files and directories on the current root filesystem.
+
+.SH OPTIONS
+.IP "\fB\-h, \-\-help\fP"
+Display help text and exit.
+.IP "\fB\-V, \-\-version\fP"
+Display version information and exit.
+
+.SH RETURN VALUE
+.B switch_root
+returns 0 on success and 1 on failure.
+
+.SH NOTES
+switch_root will fail to function if
+.B newroot
+is not the root of a mount. If you want to switch root into a directory that
+does not meet this requirement then you can first use a bind-mounting trick to
+turn any directory into a mount point:
+.sp
+.nf
+.RS
+mount --bind $DIR $DIR
+.RE
+.fi
+
+.SH "SEE ALSO"
+.BR chroot (2),
+.BR init (8),
+.BR mkinitrd (8),
+.BR mount (8)
+.SH AUTHORS
+.nf
+Peter Jones <pjones@redhat.com>
+Jeremy Katz <katzj@redhat.com>
+Karel Zak <kzak@redhat.com>
+.fi
+.SH AVAILABILITY
+The switch_root command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/switch_root.c b/sys-utils/switch_root.c
new file mode 100644
index 0000000..a85ce24
--- /dev/null
+++ b/sys-utils/switch_root.c
@@ -0,0 +1,263 @@
+/*
+ * switchroot.c - switch to new root directory and start init.
+ *
+ * Copyright 2002-2009 Red Hat, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * Authors:
+ * Peter Jones <pjones@redhat.com>
+ * Jeremy Katz <katzj@redhat.com>
+ */
+#include <sys/mount.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <sys/param.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <ctype.h>
+#include <dirent.h>
+#include <getopt.h>
+
+#include "c.h"
+#include "nls.h"
+#include "closestream.h"
+#include "statfs_magic.h"
+
+#ifndef MS_MOVE
+#define MS_MOVE 8192
+#endif
+
+#ifndef MNT_DETACH
+#define MNT_DETACH 0x00000002 /* Just detach from the tree */
+#endif
+
+/* remove all files/directories below dirName -- don't cross mountpoints */
+static int recursiveRemove(int fd)
+{
+ struct stat rb;
+ DIR *dir;
+ int rc = -1;
+ int dfd;
+
+ if (!(dir = fdopendir(fd))) {
+ warn(_("failed to open directory"));
+ goto done;
+ }
+
+ /* fdopendir() precludes us from continuing to use the input fd */
+ dfd = dirfd(dir);
+
+ if (fstat(dfd, &rb)) {
+ warn(_("stat failed"));
+ goto done;
+ }
+
+ while(1) {
+ struct dirent *d;
+ int isdir = 0;
+
+ errno = 0;
+ if (!(d = readdir(dir))) {
+ if (errno) {
+ warn(_("failed to read directory"));
+ goto done;
+ }
+ break; /* end of directory */
+ }
+
+ if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
+ continue;
+#ifdef _DIRENT_HAVE_D_TYPE
+ if (d->d_type == DT_DIR || d->d_type == DT_UNKNOWN)
+#endif
+ {
+ struct stat sb;
+
+ if (fstatat(dfd, d->d_name, &sb, AT_SYMLINK_NOFOLLOW)) {
+ warn(_("stat of %s failed"), d->d_name);
+ continue;
+ }
+
+ /* skip if device is not the same */
+ if (sb.st_dev != rb.st_dev)
+ continue;
+
+ /* remove subdirectories */
+ if (S_ISDIR(sb.st_mode)) {
+ int cfd;
+
+ cfd = openat(dfd, d->d_name, O_RDONLY);
+ if (cfd >= 0) {
+ recursiveRemove(cfd);
+ close(cfd);
+ }
+ isdir = 1;
+ }
+ }
+
+ if (unlinkat(dfd, d->d_name, isdir ? AT_REMOVEDIR : 0))
+ warn(_("failed to unlink %s"), d->d_name);
+ }
+
+ rc = 0; /* success */
+
+done:
+ if (dir)
+ closedir(dir);
+ return rc;
+}
+
+static int switchroot(const char *newroot)
+{
+ /* Don't try to unmount the old "/", there's no way to do it. */
+ const char *umounts[] = { "/dev", "/proc", "/sys", "/run", NULL };
+ int i;
+ int cfd;
+ pid_t pid;
+ struct stat newroot_stat, sb;
+
+ if (stat(newroot, &newroot_stat) != 0) {
+ warn(_("stat of %s failed"), newroot);
+ return -1;
+ }
+
+ for (i = 0; umounts[i] != NULL; i++) {
+ char newmount[PATH_MAX];
+
+ snprintf(newmount, sizeof(newmount), "%s%s", newroot, umounts[i]);
+
+ if ((stat(newmount, &sb) != 0) || (sb.st_dev != newroot_stat.st_dev)) {
+ /* mount point seems to be mounted already or stat failed */
+ umount2(umounts[i], MNT_DETACH);
+ continue;
+ }
+
+ if (mount(umounts[i], newmount, NULL, MS_MOVE, NULL) < 0) {
+ warn(_("failed to mount moving %s to %s"),
+ umounts[i], newmount);
+ warnx(_("forcing unmount of %s"), umounts[i]);
+ umount2(umounts[i], MNT_FORCE);
+ }
+ }
+
+ if (chdir(newroot)) {
+ warn(_("failed to change directory to %s"), newroot);
+ return -1;
+ }
+
+ cfd = open("/", O_RDONLY);
+ if (cfd < 0) {
+ warn(_("cannot open %s"), "/");
+ return -1;
+ }
+
+ if (mount(newroot, "/", NULL, MS_MOVE, NULL) < 0) {
+ close(cfd);
+ warn(_("failed to mount moving %s to /"), newroot);
+ return -1;
+ }
+
+ if (chroot(".")) {
+ close(cfd);
+ warn(_("failed to change root"));
+ return -1;
+ }
+
+ pid = fork();
+ if (pid <= 0) {
+ struct statfs stfs;
+
+ if (fstatfs(cfd, &stfs) == 0 &&
+ (F_TYPE_EQUAL(stfs.f_type, STATFS_RAMFS_MAGIC) ||
+ F_TYPE_EQUAL(stfs.f_type, STATFS_TMPFS_MAGIC)))
+ recursiveRemove(cfd);
+ else
+ warn(_("old root filesystem is not an initramfs"));
+ if (pid == 0)
+ exit(EXIT_SUCCESS);
+ }
+
+ close(cfd);
+ return 0;
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *output = stdout;
+ fputs(USAGE_HEADER, output);
+ fprintf(output, _(" %s [options] <newrootdir> <init> <args to init>\n"),
+ program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, output);
+ fputs(_("Switch to another filesystem as the root of the mount tree.\n"), output);
+
+ fputs(USAGE_OPTIONS, output);
+ printf(USAGE_HELP_OPTIONS(16));
+ printf(USAGE_MAN_TAIL("switch_root(8)"));
+
+ exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char *argv[])
+{
+ char *newroot, *init, **initargs;
+ int c;
+ static const struct option longopts[] = {
+ {"version", no_argument, NULL, 'V'},
+ {"help", no_argument, NULL, 'h'},
+ {NULL, 0, NULL, 0}
+ };
+
+ atexit(close_stdout);
+
+ while ((c = getopt_long(argc, argv, "+Vh", longopts, NULL)) != -1)
+ switch (c) {
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ case 'h':
+ usage();
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ if (argc < 3) {
+ warnx(_("not enough arguments"));
+ errtryhelp(EXIT_FAILURE);
+ }
+
+ newroot = argv[1];
+ init = argv[2];
+ initargs = &argv[2];
+
+ if (!*newroot || !*init) {
+ warnx(_("bad usage"));
+ errtryhelp(EXIT_FAILURE);
+ }
+
+ if (switchroot(newroot))
+ errx(EXIT_FAILURE, _("failed. Sorry."));
+
+ if (access(init, X_OK))
+ warn(_("cannot access %s"), init);
+
+ execv(init, initargs);
+ errexec(init);
+}
+
diff --git a/sys-utils/tunelp.8 b/sys-utils/tunelp.8
new file mode 100644
index 0000000..90db834
--- /dev/null
+++ b/sys-utils/tunelp.8
@@ -0,0 +1,122 @@
+.\" Copyright (C) 1992-1997 Michael K. Johnson <johnsonm@redhat.com>
+.\" Copyright (C) 1998 Andrea Arcangeli <andrea@e-mind.com>
+.\" It may be distributed under the terms of the GNU General Public License,
+.\" version 2, or any higher version. See section COPYING of the GNU General
+.\" Public license for conditions under which this file may be redistributed.
+.\"
+.TH TUNELP 8 "October 2011" "util-linux" "System Administration"
+.SH NAME
+tunelp \- set various parameters for the lp device
+.SH SYNOPSIS
+.B tunelp
+[options]
+.I device
+.SH DESCRIPTION
+\fBtunelp\fP sets several parameters for the /dev/lp\fI?\fP devices, for
+better performance (or for any performance at all, if your printer won't work
+without it...) Without parameters, it tells whether the device is using
+interrupts, and if so, which one. With parameters, it sets the device
+characteristics accordingly.
+.SH OPTIONS
+.TP
+\fB\-i\fR, \fB\-\-irq\fR \fIargument\fR
+specifies the IRQ to use for the parallel port in question. If this is set
+to something non-zero, \-t and \-c have no effect. If your port does not use
+interrupts, this option will make printing stop. The command
+.B tunelp -i 0
+restores non-interrupt driven (polling) action, and your printer should work
+again. If your parallel port does support interrupts, interrupt-driven
+printing should be somewhat faster and efficient, and will probably be
+desirable.
+.IP
+NOTE: This option will have no effect with kernel 2.1.131 or later since the
+irq is handled by the parport driver. You can change the parport irq for
+example via
+.IR /proc/parport/*/irq .
+Read
+.I /usr/src/linux/Documentation/parport.txt
+for more details on parport.
+.TP
+\fB\-t\fR, \fB\-\-time\fR \fImilliseconds\fR
+is the amount of time in jiffies that the driver waits if the printer doesn't
+take a character for the number of tries dictated by the \-c parameter. 10
+is the default value. If you want fastest possible printing, and don't care
+about system load, you may set this to 0. If you don't care how fast your
+printer goes, or are printing text on a slow printer with a buffer, then 500
+(5 seconds) should be fine, and will give you very low system load. This
+value generally should be lower for printing graphics than text, by a factor
+of approximately 10, for best performance.
+.TP
+\fB\-c\fR, \fB\-\-chars\fR \fIcharacters\fR
+is the number of times to try to output a character to the printer before
+sleeping for \-t \fITIME\fP. It is the number of times around a loop that
+tries to send a character to the printer. 120 appears to be a good value for
+most printers in polling mode. 1000 is the default, because there are some
+printers that become jerky otherwise, but you \fImust\fP set this to `1' to
+handle the maximal CPU efficiency if you are using interrupts. If you have a
+very fast printer, a value of 10 might make more sense even if in polling
+mode. If you have a \fIreally\fP old printer, you can increase this further.
+.IP
+Setting \-t \fITIME\fP to 0 is equivalent to setting \-c \fICHARS\fP to
+infinity.
+.TP
+\fB\-w\fR, \fB\-\-wait\fR \fImilliseconds\fR
+is the number of usec we wait while playing with the strobe signal. While
+most printers appear to be able to deal with an extremely short strobe, some
+printers demand a longer one. Increasing this from the default 1 may make it
+possible to print with those printers. This may also make it possible to use
+longer cables. It's also possible to decrease this value to 0 if your
+printer is fast enough or your machine is slow enough.
+.TP
+\fB\-a\fR, \fB\-\-abort\fR \fI<on|off>\fR
+This is whether to abort on printer error - the default is not to. If you
+are sitting at your computer, you probably want to be able to see an error
+and fix it, and have the printer go on printing. On the other hand, if you
+aren't, you might rather that your printer spooler find out that the printer
+isn't ready, quit trying, and send you mail about it. The choice is yours.
+.TP
+\fB\-o\fR, \fB\-\-check\-status\fR \fI<on|off>\fR
+This option is much like \-a. It makes any
+.BR open (2)
+of this device check to see that the device is on-line and not reporting any
+out of paper or other errors. This is the correct setting for most versions
+of lpd.
+.TP
+\fB\-C\fR, \fB\-\-careful\fR \fI<on|off>\fR
+This option adds extra ("careful") error checking. When this option is on,
+the printer driver will ensure that the printer is on-line and not reporting
+any out of paper or other errors before sending data. This is particularly
+useful for printers that normally appear to accept data when turned off.
+.IP
+NOTE: This option is obsolete because it's the default in 2.1.131 kernel or
+later.
+.TP
+\fB\-s\fR, \fB\-\-status\fR
+This option returns the current printer status, both as a decimal number from
+0..255, and as a list of active flags. When this option is specified, \-q
+off, turning off the display of the current IRQ, is implied.
+.TP
+\fB\-r\fR, \fB\-\-reset\fR
+This option resets the port. It requires a Linux kernel version of 1.1.80 or
+later.
+.TP
+\fB\-q\fR, \fB\-\-print\-irq\fR \fI<on|off>\fR
+This option sets printing the display of the current IRQ setting.
+.SH NOTES
+.BR \-o ,
+.BR \-C ,
+and
+.B \-s
+all require a Linux kernel version of 1.1.76 or later.
+.PP
+.B \-C
+requires a Linux version prior to 2.1.131.
+.SH FILES
+.I /dev/lp?
+.br
+.I /proc/parport/*/*
+.SH AVAILABILITY
+The tunelp command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/tunelp.c b/sys-utils/tunelp.c
new file mode 100644
index 0000000..fe261f3
--- /dev/null
+++ b/sys-utils/tunelp.c
@@ -0,0 +1,321 @@
+/*
+ * Copyright (C) 1992-1997 Michael K. Johnson, johnsonm@redhat.com
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License, version 2, or any later version. See file COPYING for
+ * information on distribution conditions.
+ */
+
+/*
+ * This command is deprecated. The utility is in maintenance mode,
+ * meaning we keep them in source tree for backward compatibility
+ * only. Do not waste time making this command better, unless the
+ * fix is about security or other very critical issue.
+ *
+ * See Documentation/deprecated.txt for more information.
+ */
+
+/*
+ * $Log: tunelp.c,v $
+ * Revision 1.9 1998/06/08 19:37:11 janl
+ * Thus compiles tunelp with 2.1.103 kernels
+ *
+ * Revision 1.8 1997/07/06 00:14:06 aebr
+ * Fixes to silence -Wall.
+ *
+ * Revision 1.7 1997/06/20 16:10:38 janl
+ * tunelp refreshed from authors archive.
+ *
+ * Revision 1.9 1997/06/20 12:56:43 johnsonm
+ * Finished fixing license terms.
+ *
+ * Revision 1.8 1997/06/20 12:34:59 johnsonm
+ * Fixed copyright and license.
+ *
+ * Revision 1.7 1995/03/29 11:16:23 johnsonm
+ * TYPO fixed...
+ *
+ * Revision 1.6 1995/03/29 11:12:15 johnsonm
+ * Added third argument to ioctl needed with new kernels
+ *
+ * Revision 1.5 1995/01/13 10:33:43 johnsonm
+ * Chris's changes for new ioctl numbers and backwards compatibility
+ * and the reset ioctl.
+ *
+ * Revision 1.4 1995/01/03 17:42:14 johnsonm
+ * -s isn't supposed to take an argument; removed : after s in getopt...
+ *
+ * Revision 1.3 1995/01/03 07:36:49 johnsonm
+ * Fixed typo
+ *
+ * Revision 1.2 1995/01/03 07:33:44 johnsonm
+ * revisions for lp driver updates in Linux 1.1.76
+ *
+ * 1999-02-22 Arkadiusz Miśkiewicz <misiek@pld.ORG.PL>
+ * - added Native Language Support
+ *
+ * 1999-05-07 Merged LPTRUSTIRQ patch by Andrea Arcangeli (1998/11/29), aeb
+ *
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <linux/lp.h>
+
+#include "nls.h"
+#include "closestream.h"
+#include "strutils.h"
+
+#define EXIT_LP_MALLOC 2
+#define EXIT_LP_BADVAL 3
+#define EXIT_LP_IO_ERR 4
+
+#define XALLOC_EXIT_CODE EXIT_LP_MALLOC
+#include "xalloc.h"
+
+struct command {
+ long op;
+ long val;
+ struct command *next;
+};
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ fputs(USAGE_HEADER, out);
+ fprintf(out, _(" %s [options] <device>\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Set various parameters for the line printer.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -i, --irq <num> specify parallel port irq\n"), out);
+ fputs(_(" -t, --time <ms> driver wait time in milliseconds\n"), out);
+ fputs(_(" -c, --chars <num> number of output characters before sleep\n"), out);
+ fputs(_(" -w, --wait <us> strobe wait in micro seconds\n"), out);
+ /* TRANSLATORS: do not translate <on|off> arguments. The
+ argument reader does not recognize locale, unless `on' is
+ exactly that very same string. */
+ fputs(_(" -a, --abort <on|off> abort on error\n"), out);
+ fputs(_(" -o, --check-status <on|off> check printer status before printing\n"), out);
+ fputs(_(" -C, --careful <on|off> extra checking to status check\n"), out);
+ fputs(_(" -s, --status query printer status\n"), out);
+ fputs(_(" -r, --reset reset the port\n"), out);
+ fputs(_(" -q, --print-irq <on|off> display current irq setting\n"), out);
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(30));
+ printf(USAGE_MAN_TAIL("tunelp(8)"));
+
+ exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+ int c, fd, irq, status, show_irq, offset = 0, retval;
+ char *filename;
+ struct stat statbuf;
+ struct command *cmds, *cmdst;
+ static const struct option longopts[] = {
+ {"irq", required_argument, NULL, 'i'},
+ {"time", required_argument, NULL, 't'},
+ {"chars", required_argument, NULL, 'c'},
+ {"wait", required_argument, NULL, 'w'},
+ {"abort", required_argument, NULL, 'a'},
+ {"check-status", required_argument, NULL, 'o'},
+ {"careful", required_argument, NULL, 'C'},
+ {"status", no_argument, NULL, 's'},
+ {"trust-irq", required_argument, NULL, 'T'},
+ {"reset", no_argument, NULL, 'r'},
+ {"print-irq", required_argument, NULL, 'q'},
+ {"version", no_argument, NULL, 'V'},
+ {"help", no_argument, NULL, 'h'},
+ {NULL, 0, NULL, 0}
+ };
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ strutils_set_exitcode(EXIT_LP_BADVAL);
+
+ if (argc < 2) {
+ warnx(_("not enough arguments"));
+ errtryhelp(EXIT_FAILURE);
+ }
+
+ cmdst = cmds = xmalloc(sizeof(struct command));
+ cmds->next = NULL;
+
+ show_irq = 1;
+ while ((c = getopt_long(argc, argv, "t:c:w:a:i:ho:C:sq:rT:vV", longopts, NULL)) != -1) {
+ switch (c) {
+ case 'h':
+ usage();
+ break;
+ case 'i':
+ cmds->op = LPSETIRQ;
+ cmds->val = strtol_or_err(optarg, _("argument error"));
+ cmds->next = xmalloc(sizeof(struct command));
+ cmds = cmds->next;
+ cmds->next = NULL;
+ break;
+ case 't':
+ cmds->op = LPTIME;
+ cmds->val = strtol_or_err(optarg, _("argument error"));
+ cmds->next = xmalloc(sizeof(struct command));
+ cmds = cmds->next;
+ cmds->next = NULL;
+ break;
+ case 'c':
+ cmds->op = LPCHAR;
+ cmds->val = strtol_or_err(optarg, _("argument error"));
+ cmds->next = xmalloc(sizeof(struct command));
+ cmds = cmds->next;
+ cmds->next = NULL;
+ break;
+ case 'w':
+ cmds->op = LPWAIT;
+ cmds->val = strtol_or_err(optarg, _("argument error"));
+ cmds->next = xmalloc(sizeof(struct command));
+ cmds = cmds->next;
+ cmds->next = NULL;
+ break;
+ case 'a':
+ cmds->op = LPABORT;
+ cmds->val = parse_switch(optarg, _("argument error"), "on", "off", NULL);
+ cmds->next = xmalloc(sizeof(struct command));
+ cmds = cmds->next;
+ cmds->next = NULL;
+ break;
+ case 'q':
+ show_irq = parse_switch(optarg, _("argument error"), "on", "off", NULL);
+ break;
+ case 'o':
+ cmds->op = LPABORTOPEN;
+ cmds->val = parse_switch(optarg, _("argument error"), "on", "off", NULL);
+ cmds->next = xmalloc(sizeof(struct command));
+ cmds = cmds->next;
+ cmds->next = NULL;
+ break;
+ case 'C':
+ cmds->op = LPCAREFUL;
+ cmds->val = parse_switch(optarg, _("argument error"), "on", "off", NULL);
+ cmds->next = xmalloc(sizeof(struct command));
+ cmds = cmds->next;
+ cmds->next = NULL;
+ break;
+ case 's':
+ show_irq = 0;
+ cmds->op = LPGETSTATUS;
+ cmds->val = 0;
+ cmds->next = xmalloc(sizeof(struct command));
+ cmds = cmds->next;
+ cmds->next = NULL;
+ break;
+ case 'r':
+ cmds->op = LPRESET;
+ cmds->val = 0;
+ cmds->next = xmalloc(sizeof(struct command));
+ cmds = cmds->next;
+ cmds->next = NULL;
+ break;
+ case 'v':
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if (optind != argc - 1) {
+ warnx(_("no device specified"));
+ errtryhelp(EXIT_FAILURE);
+ }
+
+ filename = xstrdup(argv[optind]);
+ fd = open(filename, O_WRONLY | O_NONBLOCK, 0);
+ /* Need to open O_NONBLOCK in case ABORTOPEN is already set
+ * and printer is off or off-line or in an error condition.
+ * Otherwise we would abort...
+ */
+ if (fd < 0)
+ err(EXIT_FAILURE, "%s", filename);
+
+ if (fstat(fd, &statbuf))
+ err(EXIT_FAILURE, "%s: stat() failed", filename);
+
+ if (!S_ISCHR(statbuf.st_mode)) {
+ warnx(_("%s not an lp device"), filename);
+ errtryhelp(EXIT_FAILURE);
+ }
+ /* Allow for binaries compiled under a new kernel to work on
+ * the old ones The irq argument to ioctl isn't touched by
+ * the old kernels, but we don't want to cause the kernel to
+ * complain if we are using a new kernel
+ */
+ if (LPGETIRQ >= 0x0600 && ioctl(fd, LPGETIRQ, &irq) < 0
+ && errno == EINVAL)
+ /* We don't understand the new ioctls */
+ offset = 0x0600;
+
+ cmds = cmdst;
+ while (cmds->next) {
+ if (cmds->op == LPGETSTATUS) {
+ status = 0xdeadbeef;
+ retval = ioctl(fd, LPGETSTATUS - offset, &status);
+ if (retval < 0)
+ warnx(_("LPGETSTATUS error"));
+ else {
+ if (status == (int)0xdeadbeef)
+ /* a few 1.1.7x kernels will do this */
+ status = retval;
+ printf(_("%s status is %d"), filename, status);
+ if (!(status & LP_PBUSY))
+ printf(_(", busy"));
+ if (!(status & LP_PACK))
+ printf(_(", ready"));
+ if ((status & LP_POUTPA))
+ printf(_(", out of paper"));
+ if ((status & LP_PSELECD))
+ printf(_(", on-line"));
+ if (!(status & LP_PERRORP))
+ printf(_(", error"));
+ printf("\n");
+ }
+ } else
+ if (ioctl(fd, cmds->op - offset, cmds->val) < 0)
+ warn(_("ioctl failed"));
+ cmdst = cmds;
+ cmds = cmds->next;
+ free(cmdst);
+ }
+
+ if (show_irq) {
+ irq = 0xdeadbeef;
+ retval = ioctl(fd, LPGETIRQ - offset, &irq);
+ if (retval == -1)
+ err(EXIT_LP_IO_ERR, _("LPGETIRQ error"));
+ if (irq == (int)0xdeadbeef)
+ /* up to 1.1.77 will do this */
+ irq = retval;
+ if (irq)
+ printf(_("%s using IRQ %d\n"), filename, irq);
+ else
+ printf(_("%s using polling\n"), filename);
+ }
+ free(filename);
+ close(fd);
+
+ return EXIT_SUCCESS;
+}
diff --git a/sys-utils/umount.8 b/sys-utils/umount.8
new file mode 100644
index 0000000..f94d2f4
--- /dev/null
+++ b/sys-utils/umount.8
@@ -0,0 +1,267 @@
+.\" Copyright (c) 1996 Andries Brouwer
+.\" This page is somewhat derived from a page that was
+.\" (c) 1980, 1989, 1991 The Regents of the University of California
+.\" and had been heavily modified by Rik Faith and myself.
+.\"
+.\" This is free documentation; you can redistribute it and/or
+.\" modify it under the terms of the GNU General Public License as
+.\" published by the Free Software Foundation; either version 2 of
+.\" the License, or (at your option) any later version.
+.\"
+.\" The GNU General Public License's references to "object code"
+.\" and "executables" are to be interpreted as the output of any
+.\" document formatting or typesetting system, including
+.\" intermediate and printed output.
+.\"
+.\" This manual is distributed in the hope that it will be useful,
+.\" but WITHOUT ANY WARRANTY; without even the implied warranty of
+.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+.\" GNU General Public License for more details.
+.\"
+.\" You should have received a copy of the GNU General Public License along
+.\" with this program; if not, write to the Free Software Foundation, Inc.,
+.\" 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+.\"
+.TH UMOUNT 8 "July 2014" "util-linux" "System Administration"
+.SH NAME
+umount \- unmount file systems
+.SH SYNOPSIS
+.B umount \-a
+.RB [ \-dflnrv ]
+.RB [ \-t
+.IR fstype ]
+.RB [ \-O
+.IR option ...]
+.sp
+.B umount
+.RB [ \-dflnrv ]
+.RI { directory | device }...
+.sp
+.B umount
+.BR \-h | \-V
+
+.SH DESCRIPTION
+The
+.B umount
+command detaches the mentioned file system(s) from the file hierarchy. A
+file system is specified by giving the directory where it has been
+mounted. Giving the special device on which the file system lives may
+also work, but is obsolete, mainly because it will fail in case this
+device was mounted on more than one directory.
+.PP
+Note that a file system cannot be unmounted when it is 'busy' - for
+example, when there are open files on it, or when some process has its
+working directory there, or when a swap file on it is in use. The
+offending process could even be
+.B umount
+itself - it opens libc, and libc in its turn may open for example locale
+files. A lazy unmount avoids this problem, but it may introduce another
+issues. See \fB\-\-lazy\fR description below.
+.SH OPTIONS
+.TP
+.BR \-a , " \-\-all"
+All of the filesystems described in
+.I /proc/self/mountinfo
+(or in deprecated /etc/mtab)
+are unmounted, except the proc, devfs, devpts, sysfs, rpc_pipefs and nfsd
+filesystems. This list of the filesystems may be replaced by \fB\-\-types\fR
+umount option.
+.TP
+.BR \-A , " \-\-all\-targets"
+Unmount all mountpoints in the current namespace for the specified filesystem.
+The filesystem can be specified by one of the mountpoints or the device name (or
+UUID, etc.). When this option is used together with \fB\-\-recursive\fR, then
+all nested mounts within the filesystem are recursively unmounted.
+This option is only supported on systems where /etc/mtab is a symlink
+to /proc/mounts.
+.TP
+.BR \-c , " \-\-no\-canonicalize"
+Do not canonicalize paths. The paths canonicalization is based on
+.BR stat (2)
+and
+.BR readlink (2)
+system calls. These system calls may hang in some cases (for example on NFS if
+server is not available). The option has to be used with canonical path to the
+mount point.
+
+For more details about this option see the
+.BR mount (8)
+man page. Note that \fBumount\fR does not pass this option to the
+.BI /sbin/umount. type
+helpers.
+.TP
+.BR \-d , " \-\-detach\-loop"
+When the unmounted device was a loop device, also free this loop
+device. This option is unnecessary for devices initialized by
+.BR mount (8),
+in this case "autoclear" functionality is enabled by default.
+.TP
+.B \-\-fake
+Causes everything to be done except for the actual system call or umount helper
+execution; this 'fakes' unmounting the filesystem. It can be used to remove
+entries from the deprecated
+.I /etc/mtab
+that were unmounted earlier with the
+.B \-n
+option.
+.TP
+.BR \-f , " \-\-force"
+Force an unmount (in case of an unreachable NFS system).
+
+Note that this option does not guarantee that umount command does not hang.
+It's strongly recommended to use absolute paths without symlinks to avoid
+unwanted readlink and stat system calls on unreachable NFS in umount.
+.TP
+.BR \-i , " \-\-internal\-only"
+Do not call the \fB/sbin/umount.\fIfilesystem\fR helper even if it exists.
+By default such a helper program is called if it exists.
+.TP
+.BR \-l , " \-\-lazy"
+Lazy unmount. Detach the filesystem from the file hierarchy now,
+and clean up all references to this filesystem as soon as it is not busy
+anymore.
+
+A system reboot would be expected in near future if you're going to use this
+option for network filesystem or local filesystem with submounts. The
+recommended use-case for \fBumount -l\fR is to prevent hangs on shutdown due to
+an unreachable network share where a normal umount will hang due to a downed
+server or a network partition. Remounts of the share will not be possible.
+
+.TP
+.BR \-N , " \-\-namespace " \fIns
+Perform umount in namespace specified by \fIns\fR.
+\fIns\fR is either PID of process running in that namespace
+or special file representing that namespace.
+.sp
+.BR umount (8)
+switches to the namespace when it reads /etc/fstab, writes /etc/mtab (or writes to /run/mount) and calls
+.BR umount (2)
+system call, otherwise it runs in the original namespace. It means that the target namespace does not have
+to contain any libraries or another requirements necessary to execute
+.BR umount (2)
+command.
+.sp
+See \fBnamespaces\fR(7) for more information.
+.TP
+.BR \-n , " \-\-no\-mtab"
+Unmount without writing in
+.IR /etc/mtab .
+.TP
+.BR \-O , " \-\-test\-opts " \fIoption\fR...
+Unmount only the filesystems that have the specified option set in
+.IR /etc/fstab .
+More than one option may be specified in a comma-separated list.
+Each option can be prefixed with
+.B no
+to indicate that no action should be taken for this option.
+.TP
+.BR \-q , " \-\-quiet"
+Suppress "not mounted" error messages.
+.TP
+.BR \-R , " \-\-recursive"
+Recursively unmount each specified directory. Recursion for each directory will
+stop if any unmount operation in the chain fails for any reason. The relationship
+between mountpoints is determined by /proc/self/mountinfo entries. The filesystem
+must be specified by mountpoint path; a recursive unmount by device name (or UUID)
+is unsupported.
+.TP
+.BR \-r , " \-\-read\-only"
+When an unmount fails, try to remount the filesystem read-only.
+.TP
+.BR \-t , " \-\-types " \fItype\fR...
+Indicate that the actions should only be taken on filesystems of the
+specified
+.IR type .
+More than one type may be specified in a comma-separated list. The list
+of filesystem types can be prefixed with
+.B no
+to indicate that no action should be taken for all of the mentioned types.
+Note that
+.B umount
+reads information about mounted filesystems from kernel (/proc/mounts) and
+filesystem names may be different than filesystem names used in the /etc/fstab
+(e.g. "nfs4" vs. "nfs").
+.TP
+.BR \-v , " \-\-verbose"
+Verbose mode.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH "LOOP DEVICE"
+The
+.B umount
+command will automatically detach loop device previously initialized by
+.BR mount (8)
+command independently of /etc/mtab.
+
+In this case the device is initialized with "autoclear" flag (see
+.BR losetup (8)
+output for more details), otherwise it's necessary to use the option \fB \-\-detach\-loop\fR
+or call \fBlosetup -d <device>\fR. The autoclear feature is supported since Linux 2.6.25.
+.SH EXTERNAL HELPERS
+The syntax of external unmount helpers is:
+.PP
+.RS
+.BI umount. suffix
+.RI { directory | device }
+.RB [ \-flnrv ]
+.RB [ \-N
+.IR namespace ]
+.RB [ \-t
+.IR type . subtype ]
+.RE
+.PP
+where \fIsuffix\fR is the filesystem type (or the value from a
+\fBuhelper=\fR or \fBhelper=\fR marker in the mtab file).
+The \fB\-t\fR option can be used for filesystems that
+have subtype support. For example:
+.PP
+.RS
+.B umount.fuse \-t fuse.sshfs
+.RE
+.PP
+A \fBuhelper=\fIsomething\fR marker (unprivileged helper) can appear in
+the \fI/etc/mtab\fR file when ordinary users need to be able to unmount
+a mountpoint that is not defined in \fI/etc/fstab\fR
+(for example for a device that was mounted by \fBudisks\fR(1)).
+.PP
+A \fBhelper=\fItype\fR marker in the mtab file will redirect
+all unmount requests
+to the \fB/sbin/umount.\fItype\fR helper independently of UID.
+.PP
+Note that \fI/etc/mtab\fR is currently deprecated and helper= and another
+userspace mount options are maintained by libmount.
+.SH FILES
+.TP
+.I /etc/mtab
+table of mounted filesystems (deprecated and usually replaced by
+symlink to /proc/mounts)
+.TP
+.I /etc/fstab
+table of known filesystems
+.TP
+.I /proc/self/mountinfo
+table of mounted filesystems generated by kernel.
+.SH ENVIRONMENT
+.IP LIBMOUNT_FSTAB=<path>
+overrides the default location of the fstab file (ignored for suid)
+.IP LIBMOUNT_MTAB=<path>
+overrides the default location of the mtab file (ignored for suid)
+.IP LIBMOUNT_DEBUG=all
+enables libmount debug output
+.SH "SEE ALSO"
+.BR umount (2),
+.BR losetup (8),
+.BR mount (8)
+.SH HISTORY
+A
+.B umount
+command appeared in Version 6 AT&T UNIX.
+.SH AVAILABILITY
+The umount command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/umount.c b/sys-utils/umount.c
new file mode 100644
index 0000000..b021088
--- /dev/null
+++ b/sys-utils/umount.c
@@ -0,0 +1,610 @@
+/*
+ * umount(8) -- mount a filesystem
+ *
+ * Copyright (C) 2011 Red Hat, Inc. All rights reserved.
+ * Written by Karel Zak <kzak@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <sys/types.h>
+
+#include <libmount.h>
+
+#include "nls.h"
+#include "c.h"
+#include "env.h"
+#include "closestream.h"
+#include "pathnames.h"
+#include "canonicalize.h"
+
+#define XALLOC_EXIT_CODE MNT_EX_SYSERR
+#include "xalloc.h"
+
+#define OPTUTILS_EXIT_CODE MNT_EX_USAGE
+#include "optutils.h"
+
+static int quiet;
+
+static int table_parser_errcb(struct libmnt_table *tb __attribute__((__unused__)),
+ const char *filename, int line)
+{
+ if (filename)
+ warnx(_("%s: parse error at line %d -- ignored"), filename, line);
+ return 1;
+}
+
+
+static void __attribute__((__noreturn__)) print_version(void)
+{
+ const char *ver = NULL;
+ const char **features = NULL, **p;
+
+ mnt_get_library_version(&ver);
+ mnt_get_library_features(&features);
+
+ printf(_("%s from %s (libmount %s"),
+ program_invocation_short_name,
+ PACKAGE_STRING,
+ ver);
+ p = features;
+ while (p && *p) {
+ fputs(p == features ? ": " : ", ", stdout);
+ fputs(*p++, stdout);
+ }
+ fputs(")\n", stdout);
+ exit(MNT_EX_SUCCESS);
+}
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ fputs(USAGE_HEADER, out);
+ fprintf(out, _(
+ " %1$s [-hV]\n"
+ " %1$s -a [options]\n"
+ " %1$s [options] <source> | <directory>\n"),
+ program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Unmount filesystems.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -a, --all unmount all filesystems\n"), out);
+ fputs(_(" -A, --all-targets unmount all mountpoints for the given device in the\n"
+ " current namespace\n"), out);
+ fputs(_(" -c, --no-canonicalize don't canonicalize paths\n"), out);
+ fputs(_(" -d, --detach-loop if mounted loop device, also free this loop device\n"), out);
+ fputs(_(" --fake dry run; skip the umount(2) syscall\n"), out);
+ fputs(_(" -f, --force force unmount (in case of an unreachable NFS system)\n"), out);
+ fputs(_(" -i, --internal-only don't call the umount.<type> helpers\n"), out);
+ fputs(_(" -n, --no-mtab don't write to /etc/mtab\n"), out);
+ fputs(_(" -l, --lazy detach the filesystem now, clean up things later\n"), out);
+ fputs(_(" -O, --test-opts <list> limit the set of filesystems (use with -a)\n"), out);
+ fputs(_(" -R, --recursive recursively unmount a target with all its children\n"), out);
+ fputs(_(" -r, --read-only in case unmounting fails, try to remount read-only\n"), out);
+ fputs(_(" -t, --types <list> limit the set of filesystem types\n"), out);
+ fputs(_(" -v, --verbose say what is being done\n"), out);
+ fputs(_(" -q, --quiet suppress 'not mounted' error messages\n"), out);
+ fputs(_(" -N, --namespace <ns> perform umount in another namespace\n"), out);
+
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(25));
+ printf(USAGE_MAN_TAIL("umount(8)"));
+
+ exit(MNT_EX_SUCCESS);
+}
+
+static void __attribute__((__noreturn__)) exit_non_root(const char *option)
+{
+ const uid_t ruid = getuid();
+ const uid_t euid = geteuid();
+
+ if (ruid == 0 && euid != 0) {
+ /* user is root, but setuid to non-root */
+ if (option)
+ errx(MNT_EX_USAGE,
+ _("only root can use \"--%s\" option "
+ "(effective UID is %u)"),
+ option, euid);
+ errx(MNT_EX_USAGE, _("only root can do that "
+ "(effective UID is %u)"), euid);
+ }
+ if (option)
+ errx(MNT_EX_USAGE, _("only root can use \"--%s\" option"), option);
+ errx(MNT_EX_USAGE, _("only root can do that"));
+}
+
+static void success_message(struct libmnt_context *cxt)
+{
+ const char *tgt, *src;
+
+ if (mnt_context_helper_executed(cxt)
+ || mnt_context_get_status(cxt) != 1)
+ return;
+
+ tgt = mnt_context_get_target(cxt);
+ if (!tgt)
+ return;
+
+ src = mnt_context_get_source(cxt);
+ if (src)
+ warnx(_("%s (%s) unmounted"), tgt, src);
+ else
+ warnx(_("%s unmounted"), tgt);
+}
+
+static int mk_exit_code(struct libmnt_context *cxt, int rc)
+{
+ char buf[BUFSIZ] = { 0 };
+
+ rc = mnt_context_get_excode(cxt, rc, buf, sizeof(buf));
+
+ /* suppress "not mounted" error message */
+ if (quiet &&
+ rc == MNT_EX_FAIL &&
+ mnt_context_syscall_called(cxt) &&
+ mnt_context_get_syscall_errno(cxt) == EINVAL)
+ return rc;
+
+ /* print errors/warnings */
+ if (*buf) {
+ const char *spec = mnt_context_get_target(cxt);
+ if (!spec)
+ spec = mnt_context_get_source(cxt);
+ if (!spec)
+ spec = "???";
+ warnx("%s: %s.", spec, buf);
+ }
+ return rc;
+}
+
+static int umount_all(struct libmnt_context *cxt)
+{
+ struct libmnt_iter *itr;
+ struct libmnt_fs *fs;
+ int mntrc, ignored, rc = 0;
+
+ itr = mnt_new_iter(MNT_ITER_BACKWARD);
+ if (!itr) {
+ warn(_("failed to initialize libmount iterator"));
+ return MNT_EX_SYSERR;
+ }
+
+ while (mnt_context_next_umount(cxt, itr, &fs, &mntrc, &ignored) == 0) {
+
+ const char *tgt = mnt_fs_get_target(fs);
+
+ if (ignored) {
+ if (mnt_context_is_verbose(cxt))
+ printf(_("%-25s: ignored\n"), tgt);
+ } else {
+ int xrc = mk_exit_code(cxt, mntrc);
+
+ if (xrc == MNT_EX_SUCCESS
+ && mnt_context_is_verbose(cxt))
+ printf("%-25s: successfully unmounted\n", tgt);
+ rc |= xrc;
+ }
+ }
+
+ mnt_free_iter(itr);
+ return rc;
+}
+
+static int umount_one(struct libmnt_context *cxt, const char *spec)
+{
+ int rc;
+
+ if (!spec)
+ return MNT_EX_SOFTWARE;
+
+ if (mnt_context_set_target(cxt, spec))
+ err(MNT_EX_SYSERR, _("failed to set umount target"));
+
+ rc = mnt_context_umount(cxt);
+ rc = mk_exit_code(cxt, rc);
+
+ if (rc == MNT_EX_SUCCESS && mnt_context_is_verbose(cxt))
+ success_message(cxt);
+
+ mnt_reset_context(cxt);
+ return rc;
+}
+
+static struct libmnt_table *new_mountinfo(struct libmnt_context *cxt)
+{
+ struct libmnt_table *tb;
+ struct libmnt_ns *ns_old = mnt_context_switch_target_ns(cxt);
+
+ if (!ns_old)
+ err(MNT_EX_SYSERR, _("failed to switch namespace"));
+
+ tb = mnt_new_table();
+ if (!tb)
+ err(MNT_EX_SYSERR, _("libmount table allocation failed"));
+
+ mnt_table_set_parser_errcb(tb, table_parser_errcb);
+ mnt_table_set_cache(tb, mnt_context_get_cache(cxt));
+
+ if (mnt_table_parse_file(tb, _PATH_PROC_MOUNTINFO)) {
+ warn(_("failed to parse %s"), _PATH_PROC_MOUNTINFO);
+ mnt_unref_table(tb);
+ tb = NULL;
+ }
+
+ if (!mnt_context_switch_ns(cxt, ns_old))
+ err(MNT_EX_SYSERR, _("failed to switch namespace"));
+
+ return tb;
+}
+
+/*
+ * like umount_one() but does not return error is @spec not mounted
+ */
+static int umount_one_if_mounted(struct libmnt_context *cxt, const char *spec)
+{
+ int rc;
+ struct libmnt_fs *fs;
+
+ rc = mnt_context_find_umount_fs(cxt, spec, &fs);
+ if (rc == 1) {
+ rc = MNT_EX_SUCCESS; /* already unmounted */
+ mnt_reset_context(cxt);
+ } else if (rc < 0) {
+ rc = mk_exit_code(cxt, rc); /* error */
+ mnt_reset_context(cxt);
+ } else
+ rc = umount_one(cxt, mnt_fs_get_target(fs));
+
+ return rc;
+}
+
+static int umount_do_recurse(struct libmnt_context *cxt,
+ struct libmnt_table *tb, struct libmnt_fs *fs)
+{
+ struct libmnt_fs *child;
+ struct libmnt_iter *itr = mnt_new_iter(MNT_ITER_BACKWARD);
+ int rc;
+
+ if (!itr)
+ err(MNT_EX_SYSERR, _("libmount iterator allocation failed"));
+
+ /* umount all children */
+ for (;;) {
+ rc = mnt_table_next_child_fs(tb, itr, fs, &child);
+ if (rc < 0) {
+ warnx(_("failed to get child fs of %s"),
+ mnt_fs_get_target(fs));
+ rc = MNT_EX_SOFTWARE;
+ goto done;
+ } else if (rc == 1)
+ break; /* no more children */
+
+ rc = umount_do_recurse(cxt, tb, child);
+ if (rc != MNT_EX_SUCCESS)
+ goto done;
+ }
+
+ rc = umount_one_if_mounted(cxt, mnt_fs_get_target(fs));
+done:
+ mnt_free_iter(itr);
+ return rc;
+}
+
+static int umount_recursive(struct libmnt_context *cxt, const char *spec)
+{
+ struct libmnt_table *tb;
+ struct libmnt_fs *fs;
+ int rc;
+
+ tb = new_mountinfo(cxt);
+ if (!tb)
+ return MNT_EX_SOFTWARE;
+
+ /* it's always real mountpoint, don't assume that the target maybe a device */
+ mnt_context_disable_swapmatch(cxt, 1);
+
+ fs = mnt_table_find_target(tb, spec, MNT_ITER_BACKWARD);
+ if (fs)
+ rc = umount_do_recurse(cxt, tb, fs);
+ else {
+ rc = MNT_EX_USAGE;
+ if (!quiet)
+ warnx(access(spec, F_OK) == 0 ?
+ _("%s: not mounted") :
+ _("%s: not found"), spec);
+ }
+
+ mnt_unref_table(tb);
+ return rc;
+}
+
+static int umount_alltargets(struct libmnt_context *cxt, const char *spec, int rec)
+{
+ struct libmnt_fs *fs;
+ struct libmnt_table *tb;
+ struct libmnt_iter *itr = NULL;
+ dev_t devno = 0;
+ int rc;
+
+ /* Convert @spec to device name, Use the same logic like regular
+ * "umount <spec>".
+ */
+ rc = mnt_context_find_umount_fs(cxt, spec, &fs);
+ if (rc == 1) {
+ rc = MNT_EX_USAGE;
+ if (!quiet)
+ warnx(access(spec, F_OK) == 0 ?
+ _("%s: not mounted") :
+ _("%s: not found"), spec);
+ return rc;
+ }
+ if (rc < 0)
+ return mk_exit_code(cxt, rc); /* error */
+
+ if (!mnt_fs_get_srcpath(fs) || !mnt_fs_get_devno(fs))
+ errx(MNT_EX_USAGE, _("%s: failed to determine source "
+ "(--all-targets is unsupported on systems with "
+ "regular mtab file)."), spec);
+
+ itr = mnt_new_iter(MNT_ITER_BACKWARD);
+ if (!itr)
+ err(MNT_EX_SYSERR, _("libmount iterator allocation failed"));
+
+ /* get on @cxt independent mountinfo */
+ tb = new_mountinfo(cxt);
+ if (!tb) {
+ rc = MNT_EX_SOFTWARE;
+ goto done;
+ }
+
+ /* Note that @fs is from mount context and the context will be reset
+ * after each umount() call */
+ devno = mnt_fs_get_devno(fs);
+ fs = NULL;
+
+ mnt_reset_context(cxt);
+
+ while (mnt_table_next_fs(tb, itr, &fs) == 0) {
+ if (mnt_fs_get_devno(fs) != devno)
+ continue;
+ mnt_context_disable_swapmatch(cxt, 1);
+ if (rec)
+ rc = umount_do_recurse(cxt, tb, fs);
+ else
+ rc = umount_one_if_mounted(cxt, mnt_fs_get_target(fs));
+
+ if (rc != MNT_EX_SUCCESS)
+ break;
+ }
+
+done:
+ mnt_free_iter(itr);
+ mnt_unref_table(tb);
+
+ return rc;
+}
+
+/*
+ * Check path -- non-root user should not be able to resolve path which is
+ * unreadable for him.
+ */
+static char *sanitize_path(const char *path)
+{
+ char *p;
+
+ if (!path)
+ return NULL;
+
+ p = canonicalize_path_restricted(path);
+ if (!p)
+ err(MNT_EX_USAGE, "%s", path);
+
+ return p;
+}
+
+static pid_t parse_pid(const char *str)
+{
+ char *end;
+ pid_t ret;
+
+ errno = 0;
+ ret = strtoul(str, &end, 10);
+
+ if (ret < 0 || errno || end == str || (end && *end))
+ return 0;
+ return ret;
+}
+
+int main(int argc, char **argv)
+{
+ int c, rc = 0, all = 0, recursive = 0, alltargets = 0;
+ struct libmnt_context *cxt;
+ char *types = NULL;
+
+ enum {
+ UMOUNT_OPT_FAKE = CHAR_MAX + 1,
+ };
+
+ static const struct option longopts[] = {
+ { "all", no_argument, NULL, 'a' },
+ { "all-targets", no_argument, NULL, 'A' },
+ { "detach-loop", no_argument, NULL, 'd' },
+ { "fake", no_argument, NULL, UMOUNT_OPT_FAKE },
+ { "force", no_argument, NULL, 'f' },
+ { "help", no_argument, NULL, 'h' },
+ { "internal-only", no_argument, NULL, 'i' },
+ { "lazy", no_argument, NULL, 'l' },
+ { "no-canonicalize", no_argument, NULL, 'c' },
+ { "no-mtab", no_argument, NULL, 'n' },
+ { "quiet", no_argument, NULL, 'q' },
+ { "read-only", no_argument, NULL, 'r' },
+ { "recursive", no_argument, NULL, 'R' },
+ { "test-opts", required_argument, NULL, 'O' },
+ { "types", required_argument, NULL, 't' },
+ { "verbose", no_argument, NULL, 'v' },
+ { "version", no_argument, NULL, 'V' },
+ { "namespace", required_argument, NULL, 'N' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ static const ul_excl_t excl[] = { /* rows and cols in ASCII order */
+ { 'A','a' }, /* all-targets,all */
+ { 'R','a' }, /* recursive,all */
+ { 'O','R','t'}, /* options,recursive,types */
+ { 'R','r' }, /* recursive,read-only */
+ { 0 }
+ };
+ int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+ sanitize_env();
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ mnt_init_debug(0);
+ cxt = mnt_new_context();
+ if (!cxt)
+ err(MNT_EX_SYSERR, _("libmount context allocation failed"));
+
+ mnt_context_set_tables_errcb(cxt, table_parser_errcb);
+
+ while ((c = getopt_long(argc, argv, "aAcdfhilnqRrO:t:vVN:",
+ longopts, NULL)) != -1) {
+
+
+ /* only few options are allowed for non-root users */
+ if (mnt_context_is_restricted(cxt) && !strchr("hdilqVv", c))
+ exit_non_root(option_to_longopt(c, longopts));
+
+ err_exclusive_options(c, longopts, excl, excl_st);
+
+ switch(c) {
+ case 'a':
+ all = 1;
+ break;
+ case 'A':
+ alltargets = 1;
+ break;
+ case 'c':
+ mnt_context_disable_canonicalize(cxt, TRUE);
+ break;
+ case 'd':
+ mnt_context_enable_loopdel(cxt, TRUE);
+ break;
+ case UMOUNT_OPT_FAKE:
+ mnt_context_enable_fake(cxt, TRUE);
+ break;
+ case 'f':
+ mnt_context_enable_force(cxt, TRUE);
+ break;
+ case 'h':
+ usage();
+ break;
+ case 'i':
+ mnt_context_disable_helpers(cxt, TRUE);
+ break;
+ case 'l':
+ mnt_context_enable_lazy(cxt, TRUE);
+ break;
+ case 'n':
+ mnt_context_disable_mtab(cxt, TRUE);
+ break;
+ case 'q':
+ quiet = 1;
+ break;
+ case 'r':
+ mnt_context_enable_rdonly_umount(cxt, TRUE);
+ break;
+ case 'R':
+ recursive = TRUE;
+ break;
+ case 'O':
+ if (mnt_context_set_options_pattern(cxt, optarg))
+ err(MNT_EX_SYSERR, _("failed to set options pattern"));
+ break;
+ case 't':
+ types = optarg;
+ break;
+ case 'v':
+ mnt_context_enable_verbose(cxt, TRUE);
+ break;
+ case 'V':
+ print_version();
+ break;
+ case 'N':
+ {
+ char path[PATH_MAX];
+ pid_t pid = parse_pid(optarg);
+
+ if (pid)
+ snprintf(path, sizeof(path), "/proc/%i/ns/mnt", pid);
+
+ if (mnt_context_set_target_ns(cxt, pid ? path : optarg))
+ err(MNT_EX_SYSERR, _("failed to set target namespace to %s"), pid ? path : optarg);
+ break;
+ }
+ default:
+ errtryhelp(MNT_EX_USAGE);
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (all) {
+ if (!types)
+ types = "noproc,nodevfs,nodevpts,nosysfs,norpc_pipefs,nonfsd,noselinuxfs";
+
+ mnt_context_set_fstype_pattern(cxt, types);
+ rc = umount_all(cxt);
+
+ } else if (argc < 1) {
+ warnx(_("bad usage"));
+ errtryhelp(MNT_EX_USAGE);
+
+ } else if (alltargets) {
+ while (argc--)
+ rc += umount_alltargets(cxt, *argv++, recursive);
+ } else if (recursive) {
+ while (argc--)
+ rc += umount_recursive(cxt, *argv++);
+ } else {
+ while (argc--) {
+ char *path = *argv;
+
+ if (mnt_context_is_restricted(cxt)
+ && !mnt_tag_is_valid(path))
+ path = sanitize_path(path);
+
+ rc += umount_one(cxt, path);
+
+ if (path != *argv)
+ free(path);
+ argv++;
+ }
+ }
+
+ mnt_free_context(cxt);
+ return (rc < 256) ? rc : 255;
+}
+
diff --git a/sys-utils/unshare.1 b/sys-utils/unshare.1
new file mode 100644
index 0000000..746c411
--- /dev/null
+++ b/sys-utils/unshare.1
@@ -0,0 +1,266 @@
+.TH UNSHARE 1 "February 2016" "util-linux" "User Commands"
+.SH NAME
+unshare \- run program with some namespaces unshared from parent
+.SH SYNOPSIS
+.B unshare
+[options]
+.RI [ program
+.RI [ arguments ]]
+.SH DESCRIPTION
+Unshares the indicated namespaces from the parent process and then executes
+the specified \fIprogram\fR. If \fIprogram\fR is not given, then ``${SHELL}'' is
+run (default: /bin/sh).
+.PP
+The namespaces can optionally be made persistent by bind mounting
+/proc/\fIpid\fR/ns/\fItype\fR files to a filesystem path and entered with
+.BR \%nsenter (1)
+even after the \fIprogram\fR terminates (except PID namespaces where
+permanently running init process is required).
+Once a persistent \%namespace is no longer needed, it can be unpersisted with
+.BR umount (8).
+See the \fBEXAMPLES\fR section for more details.
+.PP
+The namespaces to be unshared are indicated via options. Unshareable namespaces are:
+.TP
+.B mount namespace
+Mounting and unmounting filesystems will not affect the rest of the system,
+except for filesystems which are explicitly marked as
+shared (with \fBmount --make-shared\fP; see \fI/proc/self/mountinfo\fP or
+\fBfindmnt -o+PROPAGATION\fP for the \fBshared\fP flags).
+For further details, see
+.BR mount_namespaces (7)
+and the discussion of the
+.B CLONE_NEWNS
+flag in
+.BR clone (2).
+.sp
+.B unshare
+since util-linux version 2.27 automatically sets propagation to \fBprivate\fP
+in a new mount namespace to make sure that the new namespace is really
+unshared. It's possible to disable this feature with option
+\fB\-\-propagation unchanged\fP.
+Note that \fBprivate\fP is the kernel default.
+.TP
+.B UTS namespace
+Setting hostname or domainname will not affect the rest of the system.
+For further details, see
+.BR namespaces (7)
+and the discussion of the
+.B CLONE_NEWUTS
+flag in
+.BR clone (2).
+.TP
+.B IPC namespace
+The process will have an independent namespace for POSIX message queues
+as well as System V \%message queues,
+semaphore sets and shared memory segments.
+For further details, see
+.BR namespaces (7)
+and the discussion of the
+.B CLONE_NEWIPC
+flag in
+.BR clone (2).
+.TP
+.B network namespace
+The process will have independent IPv4 and IPv6 stacks, IP routing tables,
+firewall rules, the \fI/proc/net\fP and \fI/sys/class/net\fP directory trees,
+sockets, etc.
+For further details, see
+.BR namespaces (7)
+and the discussion of the
+.B CLONE_NEWNET
+flag in
+.BR clone (2).
+.TP
+.B PID namespace
+Children will have a distinct set of PID-to-process mappings from their parent.
+For further details, see
+.BR pid_namespaces (7)
+and
+the discussion of the
+.B CLONE_NEWPID
+flag in
+.BR clone (2).
+.TP
+.B cgroup namespace
+The process will have a virtualized view of \fI/proc\:/self\:/cgroup\fP, and new
+cgroup mounts will be rooted at the namespace cgroup root.
+For further details, see
+.BR cgroup_namespaces (7)
+and the discussion of the
+.B CLONE_NEWCGROUP
+flag in
+.BR clone (2).
+.TP
+.B user namespace
+The process will have a distinct set of UIDs, GIDs and capabilities.
+For further details, see
+.BR user_namespaces (7)
+and the discussion of the
+.B CLONE_NEWUSER
+flag in
+.BR clone (2).
+.SH OPTIONS
+.TP
+.BR \-i , " \-\-ipc" [ =\fIfile ]
+Unshare the IPC namespace. If \fIfile\fP is specified, then a persistent
+namespace is created by a bind mount.
+.TP
+.BR \-m , " \-\-mount" [ =\fIfile ]
+Unshare the mount namespace. If \fIfile\fP is specified, then a persistent
+namespace is created by a bind mount.
+Note that \fIfile\fP has to be located on a filesystem with the propagation
+flag set to \fBprivate\fP. Use the command \fBfindmnt -o+PROPAGATION\fP
+when not sure about the current setting. See also the examples below.
+.TP
+.BR \-n , " \-\-net" [ =\fIfile ]
+Unshare the network namespace. If \fIfile\fP is specified, then a persistent
+namespace is created by a bind mount.
+.TP
+.BR \-p , " \-\-pid" [ =\fIfile ]
+Unshare the PID namespace. If \fIfile\fP is specified then persistent
+namespace is created by a bind mount. See also the \fB--fork\fP and
+\fB--mount-proc\fP options.
+.TP
+.BR \-u , " \-\-uts" [ =\fIfile ]
+Unshare the UTS namespace. If \fIfile\fP is specified, then a persistent
+namespace is created by a bind mount.
+.TP
+.BR \-U , " \-\-user" [ =\fIfile ]
+Unshare the user namespace. If \fIfile\fP is specified, then a persistent
+namespace is created by a bind mount.
+.TP
+.BR \-C , " \-\-cgroup"[=\fIfile\fP]
+Unshare the cgroup namespace. If \fIfile\fP is specified then persistent namespace is created
+by bind mount.
+.TP
+.BR \-f , " \-\-fork"
+Fork the specified \fIprogram\fR as a child process of \fBunshare\fR rather than
+running it directly. This is useful when creating a new PID namespace.
+.TP
+.BR \-\-kill\-child [ =\fIsigname ]
+When \fBunshare\fR terminates, have \fIsigname\fP be sent to the forked child process.
+Combined with \fB--pid\fR this allows for an easy and reliable killing of the entire
+process tree below \fBunshare\fR.
+If not given, \fIsigname\fP defaults to \fBSIGKILL\fR.
+This option implies \fB--fork\fR.
+.TP
+.BR \-\-mount\-proc [ =\fImountpoint ]
+Just before running the program, mount the proc filesystem at \fImountpoint\fP
+(default is /proc). This is useful when creating a new PID namespace. It also
+implies creating a new mount namespace since the /proc mount would otherwise
+mess up existing programs on the system. The new proc filesystem is explicitly
+mounted as private (with MS_PRIVATE|MS_REC).
+.TP
+.BR \-r , " \-\-map\-root\-user"
+Run the program only after the current effective user and group IDs have been mapped to
+the superuser UID and GID in the newly created user namespace. This makes it possible to
+conveniently gain capabilities needed to manage various aspects of the newly created
+namespaces (such as configuring interfaces in the network namespace or mounting filesystems in
+the mount namespace) even when run unprivileged. As a mere convenience feature, it does not support
+more sophisticated use cases, such as mapping multiple ranges of UIDs and GIDs.
+This option implies \fB--setgroups=deny\fR.
+.TP
+.BR "\-\-propagation private" | shared | slave | unchanged
+Recursively set the mount propagation flag in the new mount namespace. The default
+is to set the propagation to \fIprivate\fP. It is possible to disable this feature
+with the argument \fBunchanged\fR. The option is silently ignored when the mount
+namespace (\fB\-\-mount\fP) is not requested.
+.TP
+.BR "\-\-setgroups allow" | deny
+Allow or deny the
+.BR setgroups (2)
+system call in a user namespace.
+.sp
+To be able to call
+.BR setgroups (2),
+the calling process must at least have CAP_SETGID.
+But since Linux 3.19 a further restriction applies:
+the kernel gives permission to call
+.BR \%setgroups (2)
+only after the GID map (\fB/proc/\fIpid\fB/gid_map\fR) has been set.
+The GID map is writable by root when
+.BR \%setgroups (2)
+is enabled (i.e. \fBallow\fR, the default), and
+the GID map becomes writable by unprivileged processes when
+.BR \%setgroups (2)
+is permanently disabled (with \fBdeny\fR).
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH NOTES
+The proc and sysfs filesystems mounting as root in a user namespace have to be
+restricted so that a less privileged user can not get more access to sensitive
+files that a more privileged user made unavailable. In short the rule for proc
+and sysfs is as close to a bind mount as possible.
+.SH EXAMPLES
+.TP
+.B # unshare --fork --pid --mount-proc readlink /proc/self
+.TQ
+1
+.br
+Establish a PID namespace, ensure we're PID 1 in it against a newly mounted
+procfs instance.
+.TP
+.B $ unshare --map-root-user --user sh -c whoami
+.TQ
+root
+.br
+Establish a user namespace as an unprivileged user with a root user within it.
+.TP
+.B # touch /root/uts-ns
+.TQ
+.B # unshare --uts=/root/uts-ns hostname FOO
+.TQ
+.B # nsenter --uts=/root/uts-ns hostname
+.TQ
+FOO
+.TQ
+.B # umount /root/uts-ns
+.br
+Establish a persistent UTS namespace, and modify the hostname. The namespace
+is then entered with \fBnsenter\fR. The namespace is destroyed by unmounting
+the bind reference.
+.TP
+.B # mount --bind /root/namespaces /root/namespaces
+.TQ
+.B # mount --make-private /root/namespaces
+.TQ
+.B # touch /root/namespaces/mnt
+.TQ
+.B # unshare --mount=/root/namespaces/mnt
+.br
+Establish a persistent mount namespace referenced by the bind mount
+/root/namespaces/mnt. This example shows a portable solution, because it
+makes sure that the bind mount is created on a shared filesystem.
+.TP
+.B # unshare -pf --kill-child -- bash -c "(sleep 999 &) && sleep 1000" &
+.TQ
+.B # pid=$!
+.TQ
+.B # kill $pid
+.br
+Reliable killing of subprocesses of the \fIprogram\fR.
+When \fBunshare\fR gets killed, everything below it gets killed as well.
+Without it, the children of \fIprogram\fR would have orphaned and
+been re-parented to PID 1.
+
+.SH SEE ALSO
+.BR clone (2),
+.BR unshare (2),
+.BR namespaces (7),
+.BR mount (8)
+.SH AUTHORS
+.UR dottedmag@dottedmag.net
+Mikhail Gusarov
+.UE
+.br
+.UR kzak@redhat.com
+Karel Zak
+.UE
+.SH AVAILABILITY
+The unshare command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/unshare.c b/sys-utils/unshare.c
new file mode 100644
index 0000000..661665a
--- /dev/null
+++ b/sys-utils/unshare.c
@@ -0,0 +1,484 @@
+/*
+ * unshare(1) - command-line interface for unshare(2)
+ *
+ * Copyright (C) 2009 Mikhail Gusarov <dottedmag@dottedmag.net>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <errno.h>
+#include <getopt.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/wait.h>
+#include <sys/mount.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/prctl.h>
+
+/* we only need some defines missing in sys/mount.h, no libmount linkage */
+#include <libmount.h>
+
+#include "nls.h"
+#include "c.h"
+#include "closestream.h"
+#include "namespace.h"
+#include "exec_shell.h"
+#include "xalloc.h"
+#include "pathnames.h"
+#include "all-io.h"
+#include "signames.h"
+
+/* synchronize parent and child by pipe */
+#define PIPE_SYNC_BYTE 0x06
+
+/* 'private' is kernel default */
+#define UNSHARE_PROPAGATION_DEFAULT (MS_REC | MS_PRIVATE)
+
+/* /proc namespace files and mountpoints for binds */
+static struct namespace_file {
+ int type; /* CLONE_NEW* */
+ const char *name; /* ns/<type> */
+ const char *target; /* user specified target for bind mount */
+} namespace_files[] = {
+ { .type = CLONE_NEWUSER, .name = "ns/user" },
+ { .type = CLONE_NEWCGROUP,.name = "ns/cgroup" },
+ { .type = CLONE_NEWIPC, .name = "ns/ipc" },
+ { .type = CLONE_NEWUTS, .name = "ns/uts" },
+ { .type = CLONE_NEWNET, .name = "ns/net" },
+ { .type = CLONE_NEWPID, .name = "ns/pid" },
+ { .type = CLONE_NEWNS, .name = "ns/mnt" },
+ { .name = NULL }
+};
+
+static int npersists; /* number of persistent namespaces */
+
+
+enum {
+ SETGROUPS_NONE = -1,
+ SETGROUPS_DENY = 0,
+ SETGROUPS_ALLOW = 1,
+};
+
+static const char *setgroups_strings[] =
+{
+ [SETGROUPS_DENY] = "deny",
+ [SETGROUPS_ALLOW] = "allow"
+};
+
+static int setgroups_str2id(const char *str)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(setgroups_strings); i++)
+ if (strcmp(str, setgroups_strings[i]) == 0)
+ return i;
+
+ errx(EXIT_FAILURE, _("unsupported --setgroups argument '%s'"), str);
+}
+
+static void setgroups_control(int action)
+{
+ const char *file = _PATH_PROC_SETGROUPS;
+ const char *cmd;
+ int fd;
+
+ if (action < 0 || (size_t) action >= ARRAY_SIZE(setgroups_strings))
+ return;
+ cmd = setgroups_strings[action];
+
+ fd = open(file, O_WRONLY);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ return;
+ err(EXIT_FAILURE, _("cannot open %s"), file);
+ }
+
+ if (write_all(fd, cmd, strlen(cmd)))
+ err(EXIT_FAILURE, _("write failed %s"), file);
+ close(fd);
+}
+
+static void map_id(const char *file, uint32_t from, uint32_t to)
+{
+ char *buf;
+ int fd;
+
+ fd = open(file, O_WRONLY);
+ if (fd < 0)
+ err(EXIT_FAILURE, _("cannot open %s"), file);
+
+ xasprintf(&buf, "%u %u 1", from, to);
+ if (write_all(fd, buf, strlen(buf)))
+ err(EXIT_FAILURE, _("write failed %s"), file);
+ free(buf);
+ close(fd);
+}
+
+static unsigned long parse_propagation(const char *str)
+{
+ size_t i;
+ static const struct prop_opts {
+ const char *name;
+ unsigned long flag;
+ } opts[] = {
+ { "slave", MS_REC | MS_SLAVE },
+ { "private", MS_REC | MS_PRIVATE },
+ { "shared", MS_REC | MS_SHARED },
+ { "unchanged", 0 }
+ };
+
+ for (i = 0; i < ARRAY_SIZE(opts); i++) {
+ if (strcmp(opts[i].name, str) == 0)
+ return opts[i].flag;
+ }
+
+ errx(EXIT_FAILURE, _("unsupported propagation mode: %s"), str);
+}
+
+static void set_propagation(unsigned long flags)
+{
+ if (flags == 0)
+ return;
+
+ if (mount("none", "/", NULL, flags, NULL) != 0)
+ err(EXIT_FAILURE, _("cannot change root filesystem propagation"));
+}
+
+
+static int set_ns_target(int type, const char *path)
+{
+ struct namespace_file *ns;
+
+ for (ns = namespace_files; ns->name; ns++) {
+ if (ns->type != type)
+ continue;
+ ns->target = path;
+ npersists++;
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int bind_ns_files(pid_t pid)
+{
+ struct namespace_file *ns;
+ char src[PATH_MAX];
+
+ for (ns = namespace_files; ns->name; ns++) {
+ if (!ns->target)
+ continue;
+
+ snprintf(src, sizeof(src), "/proc/%u/%s", (unsigned) pid, ns->name);
+
+ if (mount(src, ns->target, NULL, MS_BIND, NULL) != 0)
+ err(EXIT_FAILURE, _("mount %s on %s failed"), src, ns->target);
+ }
+
+ return 0;
+}
+
+static ino_t get_mnt_ino(pid_t pid)
+{
+ struct stat st;
+ char path[PATH_MAX];
+
+ snprintf(path, sizeof(path), "/proc/%u/ns/mnt", (unsigned) pid);
+
+ if (stat(path, &st) != 0)
+ err(EXIT_FAILURE, _("cannot stat %s"), path);
+ return st.st_ino;
+}
+
+static void bind_ns_files_from_child(pid_t *child, int fds[2])
+{
+ char ch;
+ pid_t ppid = getpid();
+ ino_t ino = get_mnt_ino(ppid);
+
+ if (pipe(fds) < 0)
+ err(EXIT_FAILURE, _("pipe failed"));
+
+ *child = fork();
+
+ switch (*child) {
+ case -1:
+ err(EXIT_FAILURE, _("fork failed"));
+
+ case 0: /* child */
+ close(fds[1]);
+ fds[1] = -1;
+
+ /* wait for parent */
+ if (read_all(fds[0], &ch, 1) != 1 && ch != PIPE_SYNC_BYTE)
+ err(EXIT_FAILURE, _("failed to read pipe"));
+ if (get_mnt_ino(ppid) == ino)
+ exit(EXIT_FAILURE);
+ bind_ns_files(ppid);
+ exit(EXIT_SUCCESS);
+ break;
+
+ default: /* parent */
+ close(fds[0]);
+ fds[0] = -1;
+ break;
+ }
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+
+ fputs(USAGE_HEADER, out);
+ fprintf(out, _(" %s [options] [<program> [<argument>...]]\n"),
+ program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Run a program with some namespaces unshared from the parent.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -m, --mount[=<file>] unshare mounts namespace\n"), out);
+ fputs(_(" -u, --uts[=<file>] unshare UTS namespace (hostname etc)\n"), out);
+ fputs(_(" -i, --ipc[=<file>] unshare System V IPC namespace\n"), out);
+ fputs(_(" -n, --net[=<file>] unshare network namespace\n"), out);
+ fputs(_(" -p, --pid[=<file>] unshare pid namespace\n"), out);
+ fputs(_(" -U, --user[=<file>] unshare user namespace\n"), out);
+ fputs(_(" -C, --cgroup[=<file>] unshare cgroup namespace\n"), out);
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_(" -f, --fork fork before launching <program>\n"), out);
+ fputs(_(" -r, --map-root-user map current user to root (implies --user)\n"), out);
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_(" --kill-child[=<signame>] when dying, kill the forked child (implies --fork)\n"
+ " defaults to SIGKILL\n"), out);
+ fputs(_(" --mount-proc[=<dir>] mount proc filesystem first (implies --mount)\n"), out);
+ fputs(_(" --propagation slave|shared|private|unchanged\n"
+ " modify mount propagation in mount namespace\n"), out);
+ fputs(_(" --setgroups allow|deny control the setgroups syscall in user namespaces\n"), out);
+
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(27));
+ printf(USAGE_MAN_TAIL("unshare(1)"));
+
+ exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char *argv[])
+{
+ enum {
+ OPT_MOUNTPROC = CHAR_MAX + 1,
+ OPT_PROPAGATION,
+ OPT_SETGROUPS,
+ OPT_KILLCHILD
+ };
+ static const struct option longopts[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "version", no_argument, NULL, 'V' },
+
+ { "mount", optional_argument, NULL, 'm' },
+ { "uts", optional_argument, NULL, 'u' },
+ { "ipc", optional_argument, NULL, 'i' },
+ { "net", optional_argument, NULL, 'n' },
+ { "pid", optional_argument, NULL, 'p' },
+ { "user", optional_argument, NULL, 'U' },
+ { "cgroup", optional_argument, NULL, 'C' },
+
+ { "fork", no_argument, NULL, 'f' },
+ { "kill-child", optional_argument, NULL, OPT_KILLCHILD },
+ { "mount-proc", optional_argument, NULL, OPT_MOUNTPROC },
+ { "map-root-user", no_argument, NULL, 'r' },
+ { "propagation", required_argument, NULL, OPT_PROPAGATION },
+ { "setgroups", required_argument, NULL, OPT_SETGROUPS },
+ { NULL, 0, NULL, 0 }
+ };
+
+ int setgrpcmd = SETGROUPS_NONE;
+ int unshare_flags = 0;
+ int c, forkit = 0, maproot = 0;
+ int kill_child_signo = 0; /* 0 means --kill-child was not used */
+ const char *procmnt = NULL;
+ pid_t pid = 0;
+ int fds[2];
+ int status;
+ unsigned long propagation = UNSHARE_PROPAGATION_DEFAULT;
+ uid_t real_euid = geteuid();
+ gid_t real_egid = getegid();
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ while ((c = getopt_long(argc, argv, "+fhVmuinpCUr", longopts, NULL)) != -1) {
+ switch (c) {
+ case 'f':
+ forkit = 1;
+ break;
+ case 'h':
+ usage();
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ case 'm':
+ unshare_flags |= CLONE_NEWNS;
+ if (optarg)
+ set_ns_target(CLONE_NEWNS, optarg);
+ break;
+ case 'u':
+ unshare_flags |= CLONE_NEWUTS;
+ if (optarg)
+ set_ns_target(CLONE_NEWUTS, optarg);
+ break;
+ case 'i':
+ unshare_flags |= CLONE_NEWIPC;
+ if (optarg)
+ set_ns_target(CLONE_NEWIPC, optarg);
+ break;
+ case 'n':
+ unshare_flags |= CLONE_NEWNET;
+ if (optarg)
+ set_ns_target(CLONE_NEWNET, optarg);
+ break;
+ case 'p':
+ unshare_flags |= CLONE_NEWPID;
+ if (optarg)
+ set_ns_target(CLONE_NEWPID, optarg);
+ break;
+ case 'U':
+ unshare_flags |= CLONE_NEWUSER;
+ if (optarg)
+ set_ns_target(CLONE_NEWUSER, optarg);
+ break;
+ case 'C':
+ unshare_flags |= CLONE_NEWCGROUP;
+ if (optarg)
+ set_ns_target(CLONE_NEWCGROUP, optarg);
+ break;
+ case OPT_MOUNTPROC:
+ unshare_flags |= CLONE_NEWNS;
+ procmnt = optarg ? optarg : "/proc";
+ break;
+ case 'r':
+ unshare_flags |= CLONE_NEWUSER;
+ maproot = 1;
+ break;
+ case OPT_SETGROUPS:
+ setgrpcmd = setgroups_str2id(optarg);
+ break;
+ case OPT_PROPAGATION:
+ propagation = parse_propagation(optarg);
+ break;
+ case OPT_KILLCHILD:
+ forkit = 1;
+ if (optarg) {
+ if ((kill_child_signo = signame_to_signum(optarg)) < 0)
+ errx(EXIT_FAILURE, _("unknown signal: %s"),
+ optarg);
+ } else {
+ kill_child_signo = SIGKILL;
+ }
+ break;
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if (npersists && (unshare_flags & CLONE_NEWNS))
+ bind_ns_files_from_child(&pid, fds);
+
+ if (-1 == unshare(unshare_flags))
+ err(EXIT_FAILURE, _("unshare failed"));
+
+ if (npersists) {
+ if (pid && (unshare_flags & CLONE_NEWNS)) {
+ int rc;
+ char ch = PIPE_SYNC_BYTE;
+
+ /* signal child we are ready */
+ write_all(fds[1], &ch, 1);
+ close(fds[1]);
+ fds[1] = -1;
+
+ /* wait for bind_ns_files_from_child() */
+ do {
+ rc = waitpid(pid, &status, 0);
+ if (rc < 0) {
+ if (errno == EINTR)
+ continue;
+ err(EXIT_FAILURE, _("waitpid failed"));
+ }
+ if (WIFEXITED(status) &&
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ return WEXITSTATUS(status);
+ } while (rc < 0);
+ } else
+ /* simple way, just bind */
+ bind_ns_files(getpid());
+ }
+
+ if (forkit) {
+ pid = fork();
+
+ switch(pid) {
+ case -1:
+ err(EXIT_FAILURE, _("fork failed"));
+ case 0: /* child */
+ break;
+ default: /* parent */
+ if (waitpid(pid, &status, 0) == -1)
+ err(EXIT_FAILURE, _("waitpid failed"));
+ if (WIFEXITED(status))
+ return WEXITSTATUS(status);
+ else if (WIFSIGNALED(status))
+ kill(getpid(), WTERMSIG(status));
+ err(EXIT_FAILURE, _("child exit failed"));
+ }
+ }
+
+ if (kill_child_signo != 0 && prctl(PR_SET_PDEATHSIG, kill_child_signo) < 0)
+ err(EXIT_FAILURE, "prctl failed");
+
+ if (maproot) {
+ if (setgrpcmd == SETGROUPS_ALLOW)
+ errx(EXIT_FAILURE, _("options --setgroups=allow and "
+ "--map-root-user are mutually exclusive"));
+
+ /* since Linux 3.19 unprivileged writing of /proc/self/gid_map
+ * has s been disabled unless /proc/self/setgroups is written
+ * first to permanently disable the ability to call setgroups
+ * in that user namespace. */
+ setgroups_control(SETGROUPS_DENY);
+ map_id(_PATH_PROC_UIDMAP, 0, real_euid);
+ map_id(_PATH_PROC_GIDMAP, 0, real_egid);
+
+ } else if (setgrpcmd != SETGROUPS_NONE)
+ setgroups_control(setgrpcmd);
+
+ if ((unshare_flags & CLONE_NEWNS) && propagation)
+ set_propagation(propagation);
+
+ if (procmnt &&
+ (mount("none", procmnt, NULL, MS_PRIVATE|MS_REC, NULL) != 0 ||
+ mount("proc", procmnt, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) != 0))
+ err(EXIT_FAILURE, _("mount %s failed"), procmnt);
+
+ if (optind < argc) {
+ execvp(argv[optind], argv + optind);
+ errexec(argv[optind]);
+ }
+ exec_shell();
+}
diff --git a/sys-utils/wdctl.8 b/sys-utils/wdctl.8
new file mode 100644
index 0000000..7edf808
--- /dev/null
+++ b/sys-utils/wdctl.8
@@ -0,0 +1,70 @@
+.\" wdctl.8 --
+.\" Copyright (C) 2012 Karel Zak <kzak@redhat.com>
+.\" May be distributed under the GNU General Public License
+.TH WDCTL "8" "July 2014" "util-linux" "System Administration"
+.SH NAME
+wdctl \- show hardware watchdog status
+.SH SYNOPSIS
+.B wdctl
+[options]
+.RI [ device ...]
+.SH DESCRIPTION
+Show hardware watchdog status. The default device is
+.IR /dev/watchdog .
+If more than one device is specified then the output is separated by
+one blank line.
+.PP
+Note that the number of supported watchdog features is hardware specific.
+.SH OPTIONS
+.TP
+.BR \-f , " \-\-flags " \fIlist
+Print only the specified flags.
+.TP
+.BR \-F , " \-\-noflags"
+Do not print information about flags.
+.TP
+.BR \-I , " \-\-noident"
+Do not print watchdog identity information.
+.TP
+.BR \-n , " \-\-noheadings"
+Do not print a header line for flags table.
+.IP "\fB\-o\fR, \fB\-\-output \fIlist\fP"
+Define the output columns to use in table of watchdog flags. If no
+output arrangement is specified, then a default set is used. Use
+.B \-\-help
+to get list of all supported columns.
+.TP
+.BR \-O , " \-\-oneline"
+Print all wanted information on one line in key="value" output format.
+.TP
+.BR \-r , " \-\-raw"
+Use the raw output format.
+.TP
+.BR \-s , " \-settimeout " \fIseconds
+Set the watchdog timeout in seconds.
+.TP
+.BR \-T , " \-\-notimeouts"
+Do not print watchdog timeouts.
+.IP "\fB\-x\fR, \fB\-\-flags\-only\fP"
+Same as \fB\-I \-T\fP.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+.SH AUTHORS
+.MT kzak@\:redhat\:.com
+Karel Zak
+.ME
+.br
+.MT lennart@\:poettering\:.net
+Lennart Poettering
+.ME
+.SH AVAILABILITY
+The
+.B wdctl
+command is part of the util-linux package and is available from
+.UR https://\:www.kernel.org\:/pub\:/linux\:/utils\:/util-linux/
+Linux Kernel Archive
+.UE .
diff --git a/sys-utils/wdctl.c b/sys-utils/wdctl.c
new file mode 100644
index 0000000..642db85
--- /dev/null
+++ b/sys-utils/wdctl.c
@@ -0,0 +1,618 @@
+/*
+ * wdctl(8) - show hardware watchdog status
+ *
+ * Copyright (C) 2012 Lennart Poettering
+ * Copyright (C) 2012 Karel Zak <kzak@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#include <sys/ioctl.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <signal.h>
+#include <assert.h>
+#include <linux/watchdog.h>
+
+#include <libsmartcols.h>
+
+#include "nls.h"
+#include "c.h"
+#include "xalloc.h"
+#include "closestream.h"
+#include "optutils.h"
+#include "pathnames.h"
+#include "strutils.h"
+#include "carefulputc.h"
+
+/*
+ * since 2.6.18
+ */
+#ifndef WDIOC_SETPRETIMEOUT
+# define WDIOC_SETPRETIMEOUT _IOWR(WATCHDOG_IOCTL_BASE, 8, int)
+# define WDIOC_GETPRETIMEOUT _IOR(WATCHDOG_IOCTL_BASE, 9, int)
+# define WDIOC_GETTIMELEFT _IOR(WATCHDOG_IOCTL_BASE, 10, int)
+# define WDIOF_POWEROVER 0x0040 /* Power over voltage */
+# define WDIOF_SETTIMEOUT 0x0080 /* Set timeout (in seconds) */
+# define WDIOF_MAGICCLOSE 0x0100 /* Supports magic close char */
+# define WDIOF_PRETIMEOUT 0x0200 /* Pretimeout (in seconds), get/set */
+# define WDIOF_KEEPALIVEPING 0x8000 /* Keep alive ping reply */
+#endif
+
+/*
+ * since 3.5
+ */
+#ifndef WDIOF_ALARMONLY
+# define WDIOF_ALARMONLY 0x0400 /* Watchdog triggers a management or
+ other external alarm not a reboot */
+#endif
+
+/* basic output flags */
+static int no_headings;
+static int raw;
+
+struct wdflag {
+ uint32_t flag;
+ const char *name;
+ const char *description;
+};
+
+static const struct wdflag wdflags[] = {
+ { WDIOF_CARDRESET, "CARDRESET", N_("Card previously reset the CPU") },
+ { WDIOF_EXTERN1, "EXTERN1", N_("External relay 1") },
+ { WDIOF_EXTERN2, "EXTERN2", N_("External relay 2") },
+ { WDIOF_FANFAULT, "FANFAULT", N_("Fan failed") },
+ { WDIOF_KEEPALIVEPING, "KEEPALIVEPING", N_("Keep alive ping reply") },
+ { WDIOF_MAGICCLOSE, "MAGICCLOSE", N_("Supports magic close char") },
+ { WDIOF_OVERHEAT, "OVERHEAT", N_("Reset due to CPU overheat") },
+ { WDIOF_POWEROVER, "POWEROVER", N_("Power over voltage") },
+ { WDIOF_POWERUNDER, "POWERUNDER", N_("Power bad/power fault") },
+ { WDIOF_PRETIMEOUT, "PRETIMEOUT", N_("Pretimeout (in seconds)") },
+ { WDIOF_SETTIMEOUT, "SETTIMEOUT", N_("Set timeout (in seconds)") },
+ { WDIOF_ALARMONLY, "ALARMONLY", N_("Not trigger reboot") }
+};
+
+
+/* column names */
+struct colinfo {
+ const char *name; /* header */
+ double whint; /* width hint (N < 1 is in percent of termwidth) */
+ int flags; /* SCOLS_FL_* */
+ const char *help;
+};
+
+enum { COL_FLAG, COL_DESC, COL_STATUS, COL_BSTATUS, COL_DEVICE };
+
+/* columns descriptions */
+static struct colinfo infos[] = {
+ [COL_FLAG] = { "FLAG", 14, 0, N_("flag name") },
+ [COL_DESC] = { "DESCRIPTION", 0.1, SCOLS_FL_TRUNC, N_("flag description") },
+ [COL_STATUS] = { "STATUS", 1, SCOLS_FL_RIGHT, N_("flag status") },
+ [COL_BSTATUS] = { "BOOT-STATUS", 1, SCOLS_FL_RIGHT, N_("flag boot status") },
+ [COL_DEVICE] = { "DEVICE", 0.1, 0, N_("watchdog device name") }
+
+};
+
+static int columns[ARRAY_SIZE(infos) * 2];
+static int ncolumns;
+
+struct wdinfo {
+ char *device;
+
+ int timeout;
+ int timeleft;
+ int pretimeout;
+
+ uint32_t status;
+ uint32_t bstatus;
+
+ struct watchdog_info ident;
+
+ unsigned int has_timeout : 1,
+ has_timeleft : 1,
+ has_pretimeout : 1;
+};
+
+/* converts flag name to flag bit */
+static long name2bit(const char *name, size_t namesz)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(wdflags); i++) {
+ const char *cn = wdflags[i].name;
+ if (!strncasecmp(name, cn, namesz) && !*(cn + namesz))
+ return wdflags[i].flag;
+ }
+ warnx(_("unknown flag: %s"), name);
+ return -1;
+}
+
+static int column2id(const char *name, size_t namesz)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(infos); i++) {
+ const char *cn = infos[i].name;
+ if (!strncasecmp(name, cn, namesz) && !*(cn + namesz))
+ return i;
+ }
+ warnx(_("unknown column: %s"), name);
+ return -1;
+}
+
+static int get_column_id(int num)
+{
+ assert(num < ncolumns);
+ assert(columns[num] < (int) ARRAY_SIZE(infos));
+
+ return columns[num];
+}
+
+static struct colinfo *get_column_info(unsigned num)
+{
+ return &infos[ get_column_id(num) ];
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ size_t i;
+
+ fputs(USAGE_HEADER, out);
+ fprintf(out,
+ _(" %s [options] [<device> ...]\n"), program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Show the status of the hardware watchdog.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -f, --flags <list> print selected flags only\n"
+ " -F, --noflags don't print information about flags\n"
+ " -I, --noident don't print watchdog identity information\n"
+ " -n, --noheadings don't print headings for flags table\n"
+ " -O, --oneline print all information on one line\n"
+ " -o, --output <list> output columns of the flags\n"
+ " -r, --raw use raw output format for flags table\n"
+ " -T, --notimeouts don't print watchdog timeouts\n"
+ " -s, --settimeout <sec> set watchdog timeout\n"
+ " -x, --flags-only print only flags table (same as -I -T)\n"), out);
+
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(24));
+ fputs(USAGE_SEPARATOR, out);
+
+ fprintf(out, _("The default device is %s.\n"), _PATH_WATCHDOG_DEV);
+
+ fputs(USAGE_COLUMNS, out);
+ for (i = 0; i < ARRAY_SIZE(infos); i++)
+ fprintf(out, " %13s %s\n", infos[i].name, _(infos[i].help));
+
+ printf(USAGE_MAN_TAIL("wdctl(8)"));
+
+ exit(EXIT_SUCCESS);
+}
+
+static void add_flag_line(struct libscols_table *table, struct wdinfo *wd, const struct wdflag *fl)
+{
+ int i;
+ struct libscols_line *line;
+
+ line = scols_table_new_line(table, NULL);
+ if (!line) {
+ warn(_("failed to allocate output line"));
+ return;
+ }
+
+ for (i = 0; i < ncolumns; i++) {
+ const char *str = NULL;
+
+ switch (get_column_id(i)) {
+ case COL_FLAG:
+ str = fl->name;
+ break;
+ case COL_DESC:
+ str = fl->description;
+ break;
+ case COL_STATUS:
+ str = wd->status & fl->flag ? "1" : "0";
+ break;
+ case COL_BSTATUS:
+ str = wd->bstatus & fl->flag ? "1" : "0";
+ break;
+ case COL_DEVICE:
+ str = wd->device;
+ break;
+ default:
+ break;
+ }
+
+ if (str && scols_line_set_data(line, i, str)) {
+ warn(_("failed to add output data"));
+ break;
+ }
+ }
+}
+
+static int show_flags(struct wdinfo *wd, uint32_t wanted)
+{
+ size_t i;
+ int rc = -1;
+ struct libscols_table *table;
+ uint32_t flags;
+
+ scols_init_debug(0);
+
+ /* create output table */
+ table = scols_new_table();
+ if (!table) {
+ warn(_("failed to allocate output table"));
+ return -1;
+ }
+ scols_table_enable_raw(table, raw);
+ scols_table_enable_noheadings(table, no_headings);
+
+ /* define columns */
+ for (i = 0; i < (size_t) ncolumns; i++) {
+ struct colinfo *col = get_column_info(i);
+
+ if (!scols_table_new_column(table, col->name, col->whint, col->flags)) {
+ warnx(_("failed to allocate output column"));
+ goto done;
+ }
+ }
+
+ /* fill-in table with data
+ * -- one line for each supported flag (option) */
+ flags = wd->ident.options;
+
+ for (i = 0; i < ARRAY_SIZE(wdflags); i++) {
+ if (wanted && !(wanted & wdflags[i].flag))
+ ; /* ignore */
+ else if (flags & wdflags[i].flag)
+ add_flag_line(table, wd, &wdflags[i]);
+
+ flags &= ~wdflags[i].flag;
+ }
+
+ if (flags)
+ warnx(_("%s: unknown flags 0x%x\n"), wd->device, flags);
+
+ scols_print_table(table);
+ rc = 0;
+done:
+ scols_unref_table(table);
+ return rc;
+}
+/*
+ * Warning: successfully opened watchdog has to be properly closed with magic
+ * close character otherwise the machine will be rebooted!
+ *
+ * Don't use err() or exit() here!
+ */
+static int set_watchdog(struct wdinfo *wd, int timeout)
+{
+ int fd;
+ sigset_t sigs, oldsigs;
+ int rc = 0;
+
+ assert(wd->device);
+
+ sigemptyset(&oldsigs);
+ sigfillset(&sigs);
+ sigprocmask(SIG_BLOCK, &sigs, &oldsigs);
+
+ fd = open(wd->device, O_WRONLY|O_CLOEXEC);
+
+ if (fd < 0) {
+ if (errno == EBUSY)
+ warnx(_("%s: watchdog already in use, terminating."),
+ wd->device);
+ warn(_("cannot open %s"), wd->device);
+ return -1;
+ }
+
+ for (;;) {
+ /* We just opened this to query the state, not to arm
+ * it hence use the magic close character */
+ static const char v = 'V';
+
+ if (write(fd, &v, 1) >= 0)
+ break;
+ if (errno != EINTR) {
+ warn(_("%s: failed to disarm watchdog"), wd->device);
+ break;
+ }
+ /* Let's try hard, since if we don't get this right
+ * the machine might end up rebooting. */
+ }
+
+ if (ioctl(fd, WDIOC_SETTIMEOUT, &timeout) != 0) {
+ rc = errno;
+ warn(_("cannot set timeout for %s"), wd->device);
+ }
+
+ if (close(fd))
+ warn(_("write failed"));
+ sigprocmask(SIG_SETMASK, &oldsigs, NULL);
+ printf(P_("Timeout has been set to %d second.\n",
+ "Timeout has been set to %d seconds.\n", timeout), timeout);
+
+ return rc;
+}
+
+/*
+ * Warning: successfully opened watchdog has to be properly closed with magic
+ * close character otherwise the machine will be rebooted!
+ *
+ * Don't use err() or exit() here!
+ */
+static int read_watchdog(struct wdinfo *wd)
+{
+ int fd;
+ sigset_t sigs, oldsigs;
+
+ assert(wd->device);
+
+ sigemptyset(&oldsigs);
+ sigfillset(&sigs);
+ sigprocmask(SIG_BLOCK, &sigs, &oldsigs);
+
+ fd = open(wd->device, O_WRONLY|O_CLOEXEC);
+
+ if (fd < 0) {
+ if (errno == EBUSY)
+ warnx(_("%s: watchdog already in use, terminating."),
+ wd->device);
+ warn(_("cannot open %s"), wd->device);
+ return -1;
+ }
+
+ if (ioctl(fd, WDIOC_GETSUPPORT, &wd->ident) < 0)
+ warn(_("%s: failed to get information about watchdog"), wd->device);
+ else {
+ ioctl(fd, WDIOC_GETSTATUS, &wd->status);
+ ioctl(fd, WDIOC_GETBOOTSTATUS, &wd->bstatus);
+
+ if (ioctl(fd, WDIOC_GETTIMEOUT, &wd->timeout) >= 0)
+ wd->has_timeout = 1;
+ if (ioctl(fd, WDIOC_GETPRETIMEOUT, &wd->pretimeout) >= 0)
+ wd->has_pretimeout = 1;
+ if (ioctl(fd, WDIOC_GETTIMELEFT, &wd->timeleft) >= 0)
+ wd->has_timeleft = 1;
+ }
+
+ for (;;) {
+ /* We just opened this to query the state, not to arm
+ * it hence use the magic close character */
+ static const char v = 'V';
+
+ if (write(fd, &v, 1) >= 0)
+ break;
+ if (errno != EINTR) {
+ warn(_("%s: failed to disarm watchdog"), wd->device);
+ break;
+ }
+ /* Let's try hard, since if we don't get this right
+ * the machine might end up rebooting. */
+ }
+
+ if (close(fd))
+ warn(_("write failed"));
+ sigprocmask(SIG_SETMASK, &oldsigs, NULL);
+
+ return 0;
+}
+
+static void print_oneline(struct wdinfo *wd, uint32_t wanted,
+ int noident, int notimeouts, int noflags)
+{
+ printf("%s:", wd->device);
+
+ if (!noident) {
+ printf(" VERSION=\"%x\"", wd->ident.firmware_version);
+
+ printf(" IDENTITY=");
+ fputs_quoted((char *) wd->ident.identity, stdout);
+ }
+ if (!notimeouts) {
+ if (wd->has_timeout)
+ printf(" TIMEOUT=\"%i\"", wd->timeout);
+ if (wd->has_pretimeout)
+ printf(" PRETIMEOUT=\"%i\"", wd->pretimeout);
+ if (wd->has_timeleft)
+ printf(" TIMELEFT=\"%i\"", wd->timeleft);
+ }
+
+ if (!noflags) {
+ size_t i;
+ uint32_t flags = wd->ident.options;
+
+ for (i = 0; i < ARRAY_SIZE(wdflags); i++) {
+ const struct wdflag *fl;
+
+ if ((wanted && !(wanted & wdflags[i].flag)) ||
+ !(flags & wdflags[i].flag))
+ continue;
+
+ fl= &wdflags[i];
+
+ printf(" %s=\"%s\"", fl->name,
+ wd->status & fl->flag ? "1" : "0");
+ printf(" %s_BOOT=\"%s\"", fl->name,
+ wd->bstatus & fl->flag ? "1" : "0");
+
+ }
+ }
+
+ fputc('\n', stdout);
+}
+
+static void show_timeouts(struct wdinfo *wd)
+{
+ if (wd->has_timeout)
+ printf(P_("%-14s %2i second\n", "%-14s %2i seconds\n", wd->timeout),
+ _("Timeout:"), wd->timeout);
+ if (wd->has_pretimeout)
+ printf(P_("%-14s %2i second\n", "%-14s %2i seconds\n", wd->pretimeout),
+ _("Pre-timeout:"), wd->pretimeout);
+ if (wd->has_timeleft)
+ printf(P_("%-14s %2i second\n", "%-14s %2i seconds\n", wd->timeleft),
+ _("Timeleft:"), wd->timeleft);
+}
+
+int main(int argc, char *argv[])
+{
+ struct wdinfo wd;
+ int c, res = EXIT_SUCCESS, count = 0;
+ char noflags = 0, noident = 0, notimeouts = 0, oneline = 0;
+ uint32_t wanted = 0;
+ int timeout = 0;
+
+ static const struct option long_opts[] = {
+ { "flags", required_argument, NULL, 'f' },
+ { "flags-only", no_argument, NULL, 'x' },
+ { "help", no_argument, NULL, 'h' },
+ { "noflags", no_argument, NULL, 'F' },
+ { "noheadings", no_argument, NULL, 'n' },
+ { "noident", no_argument, NULL, 'I' },
+ { "notimeouts", no_argument, NULL, 'T' },
+ { "settimeout", required_argument, NULL, 's' },
+ { "output", required_argument, NULL, 'o' },
+ { "oneline", no_argument, NULL, 'O' },
+ { "raw", no_argument, NULL, 'r' },
+ { "version", no_argument, NULL, 'V' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ static const ul_excl_t excl[] = { /* rows and cols in ASCII order */
+ { 'F','f' }, /* noflags,flags*/
+ { 0 }
+ };
+ int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ while ((c = getopt_long(argc, argv,
+ "d:f:hFnITo:s:OrVx", long_opts, NULL)) != -1) {
+
+ err_exclusive_options(c, long_opts, excl, excl_st);
+
+ switch(c) {
+ case 'o':
+ ncolumns = string_to_idarray(optarg,
+ columns, ARRAY_SIZE(columns),
+ column2id);
+ if (ncolumns < 0)
+ return EXIT_FAILURE;
+ break;
+ case 's':
+ timeout = strtos32_or_err(optarg, _("invalid timeout argument"));
+ break;
+ case 'f':
+ if (string_to_bitmask(optarg, (unsigned long *) &wanted, name2bit) != 0)
+ return EXIT_FAILURE;
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ case 'h':
+ usage();
+ case 'F':
+ noflags = 1;
+ break;
+ case 'I':
+ noident = 1;
+ break;
+ case 'T':
+ notimeouts = 1;
+ break;
+ case 'n':
+ no_headings = 1;
+ break;
+ case 'r':
+ raw = 1;
+ break;
+ case 'O':
+ oneline = 1;
+ break;
+ case 'x':
+ noident = 1;
+ notimeouts = 1;
+ break;
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if (!ncolumns) {
+ /* default columns */
+ columns[ncolumns++] = COL_FLAG;
+ columns[ncolumns++] = COL_DESC;
+ columns[ncolumns++] = COL_STATUS;
+ columns[ncolumns++] = COL_BSTATUS;
+ }
+
+ do {
+ int rc;
+
+ memset(&wd, 0, sizeof(wd));
+
+ if (optind == argc)
+ wd.device = _PATH_WATCHDOG_DEV;
+ else
+ wd.device = argv[optind++];
+
+ if (count)
+ fputc('\n', stdout);
+ count++;
+
+ if (timeout) {
+ rc = set_watchdog(&wd, timeout);
+ if (rc) {
+ res = EXIT_FAILURE;
+ }
+ }
+
+ rc = read_watchdog(&wd);
+ if (rc) {
+ res = EXIT_FAILURE;
+ continue;
+ }
+
+ if (oneline) {
+ print_oneline(&wd, wanted, noident, notimeouts, noflags);
+ continue;
+ }
+
+ /* pretty output */
+ if (!noident) {
+ printf("%-15s%s\n", _("Device:"), wd.device);
+ printf("%-15s%s [%s %x]\n",
+ _("Identity:"),
+ wd.ident.identity,
+ _("version"),
+ wd.ident.firmware_version);
+ }
+ if (!notimeouts)
+ show_timeouts(&wd);
+ if (!noflags)
+ show_flags(&wd, wanted);
+ } while (optind < argc);
+
+ return res;
+}
diff --git a/sys-utils/zramctl.8 b/sys-utils/zramctl.8
new file mode 100644
index 0000000..c6ecdc3
--- /dev/null
+++ b/sys-utils/zramctl.8
@@ -0,0 +1,131 @@
+.TH ZRAMCTL 8 "July 2014" "util-linux" "System Administration"
+.SH NAME
+zramctl \- set up and control zram devices
+.SH SYNOPSIS
+.ad l
+Get info:
+.sp
+.in +5
+.BR zramctl " [options]"
+.sp
+.in -5
+Reset zram:
+.sp
+.in +5
+.B "zramctl \-r"
+.IR zramdev ...
+.sp
+.in -5
+Print name of first unused zram device:
+.sp
+.in +5
+.B "zramctl \-f"
+.sp
+.in -5
+Set up a zram device:
+.sp
+.in +5
+.B zramctl
+.RB [ \-f " | "\fIzramdev\fP ]
+.RB [ \-s
+.IR size ]
+.RB [ \-t
+.IR number ]
+.RB [ \-a
+.IR algorithm ]
+.sp
+.in -5
+.ad b
+.SH DESCRIPTION
+.B zramctl
+is used to quickly set up zram device parameters, to reset zram devices, and to
+query the status of used zram devices.
+.PP
+If no option is given, all non-zero size zram devices are shown.
+.PP
+Note that \fIzramdev\fP node specified on command line has to already exist. The command
+.B zramctl
+creates a new /dev/zram<N> nodes only when \fB\-\-find\fR option specified. It's possible
+(and common) that after system boot /dev/zram<N> nodes are not created yet.
+.SH OPTIONS
+.TP
+.BR \-a , " \-\-algorithm lzo" | lz4 | lz4hc | deflate | 842
+Set the compression algorithm to be used for compressing data in the zram device.
+.TP
+.BR \-f , " \-\-find"
+Find the first unused zram device. If a \fB\-\-size\fR argument is present, then
+initialize the device.
+.TP
+.BR \-n , " \-\-noheadings"
+Do not print a header line in status output.
+.TP
+.BR \-o , " \-\-output " \fIlist
+Define the status output columns to be used. If no output arrangement is
+specified, then a default set is used.
+Use \fB\-\-help\fP to get a list of all supported columns.
+.TP
+.B \-\-output\-all
+Output all available columns.
+.TP
+.B \-\-raw
+Use the raw format for status output.
+.TP
+.BR \-r , " \-\-reset"
+Reset the options of the specified zram device(s). Zram device settings
+can be changed only after a reset.
+.TP
+.BR \-s , " \-\-size " \fIsize
+Create a zram device of the specified \fIsize\fR.
+Zram devices are aligned to memory pages; when the requested \fIsize\fR is
+not a multiple of the page size, it will be rounded up to the next multiple.
+When not otherwise specified, the unit of the \fIsize\fR parameter is bytes.
+.IP
+The \fIsize\fR argument may be followed by the multiplicative suffixes KiB (=1024),
+MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB"
+is optional, e.g., "K" has the same meaning as "KiB") or the suffixes
+KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB and YB.
+.TP
+.BR \-t , " \-\-streams " \fInumber
+Set the maximum number of compression streams that can be used for the device.
+The default is one stream.
+.TP
+.BR \-V , " \-\-version"
+Display version information and exit.
+.TP
+.BR \-h , " \-\-help"
+Display help text and exit.
+
+.SH RETURN VALUE
+.B zramctl
+returns 0 on success, nonzero on failure.
+
+.SH FILES
+.TP
+.I /dev/zram[0..N]
+zram block devices
+
+.SH EXAMPLE
+The following commands set up a zram device with a size of one gigabyte
+and use it as swap device.
+.nf
+.IP
+# zramctl --find --size 1024M
+/dev/zram0
+# mkswap /dev/zram0
+# swapon /dev/zram0
+ ...
+# swapoff /dev/zram0
+# zramctl --reset /dev/zram0
+.fi
+.SH SEE ALSO
+.UR http://git.\:kernel.\:org\:/cgit\:/linux\:/kernel\:/git\:/torvalds\:/linux.git\:/tree\:/Documentation\:/blockdev\:/zram.txt
+Linux kernel documentation
+.UE .
+.SH AUTHORS
+.nf
+Timofey Titovets <nefelim4ag@gmail.com>
+Karel Zak <kzak@redhat.com>
+.fi
+.SH AVAILABILITY
+The zramctl command is part of the util-linux package and is available from
+https://www.kernel.org/pub/linux/utils/util-linux/.
diff --git a/sys-utils/zramctl.c b/sys-utils/zramctl.c
new file mode 100644
index 0000000..69267c8
--- /dev/null
+++ b/sys-utils/zramctl.c
@@ -0,0 +1,765 @@
+/*
+ * zramctl - control compressed block devices in RAM
+ *
+ * Copyright (c) 2014 Timofey Titovets <Nefelim4ag@gmail.com>
+ * Copyright (C) 2014 Karel Zak <kzak@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <getopt.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <dirent.h>
+
+#include <libsmartcols.h>
+
+#include "c.h"
+#include "nls.h"
+#include "closestream.h"
+#include "strutils.h"
+#include "xalloc.h"
+#include "sysfs.h"
+#include "optutils.h"
+#include "ismounted.h"
+#include "strv.h"
+#include "path.h"
+#include "pathnames.h"
+
+/*#define CONFIG_ZRAM_DEBUG*/
+
+#ifdef CONFIG_ZRAM_DEBUG
+# define DBG(x) do { fputs("zram: ", stderr); x; fputc('\n', stderr); } while(0)
+#else
+# define DBG(x)
+#endif
+
+/* status output columns */
+struct colinfo {
+ const char *name;
+ double whint;
+ int flags;
+ const char *help;
+};
+
+enum {
+ COL_NAME = 0,
+ COL_DISKSIZE,
+ COL_ORIG_SIZE,
+ COL_COMP_SIZE,
+ COL_ALGORITHM,
+ COL_STREAMS,
+ COL_ZEROPAGES,
+ COL_MEMTOTAL,
+ COL_MEMLIMIT,
+ COL_MEMUSED,
+ COL_MIGRATED,
+ COL_MOUNTPOINT
+};
+
+static const struct colinfo infos[] = {
+ [COL_NAME] = { "NAME", 0.25, 0, N_("zram device name") },
+ [COL_DISKSIZE] = { "DISKSIZE", 5, SCOLS_FL_RIGHT, N_("limit on the uncompressed amount of data") },
+ [COL_ORIG_SIZE] = { "DATA", 5, SCOLS_FL_RIGHT, N_("uncompressed size of stored data") },
+ [COL_COMP_SIZE] = { "COMPR", 5, SCOLS_FL_RIGHT, N_("compressed size of stored data") },
+ [COL_ALGORITHM] = { "ALGORITHM", 3, 0, N_("the selected compression algorithm") },
+ [COL_STREAMS] = { "STREAMS", 3, SCOLS_FL_RIGHT, N_("number of concurrent compress operations") },
+ [COL_ZEROPAGES] = { "ZERO-PAGES", 3, SCOLS_FL_RIGHT, N_("empty pages with no allocated memory") },
+ [COL_MEMTOTAL] = { "TOTAL", 5, SCOLS_FL_RIGHT, N_("all memory including allocator fragmentation and metadata overhead") },
+ [COL_MEMLIMIT] = { "MEM-LIMIT", 5, SCOLS_FL_RIGHT, N_("memory limit used to store compressed data") },
+ [COL_MEMUSED] = { "MEM-USED", 5, SCOLS_FL_RIGHT, N_("memory zram have been consumed to store compressed data") },
+ [COL_MIGRATED] = { "MIGRATED", 5, SCOLS_FL_RIGHT, N_("number of objects migrated by compaction") },
+ [COL_MOUNTPOINT]= { "MOUNTPOINT",0.10, SCOLS_FL_TRUNC, N_("where the device is mounted") },
+};
+
+static int columns[ARRAY_SIZE(infos) * 2] = {-1};
+static int ncolumns;
+
+enum {
+ MM_ORIG_DATA_SIZE = 0,
+ MM_COMPR_DATA_SIZE,
+ MM_MEM_USED_TOTAL,
+ MM_MEM_LIMIT,
+ MM_MEM_USED_MAX,
+ MM_ZERO_PAGES,
+ MM_NUM_MIGRATED
+};
+
+static const char *mm_stat_names[] = {
+ [MM_ORIG_DATA_SIZE] = "orig_data_size",
+ [MM_COMPR_DATA_SIZE] = "compr_data_size",
+ [MM_MEM_USED_TOTAL] = "mem_used_total",
+ [MM_MEM_LIMIT] = "mem_limit",
+ [MM_MEM_USED_MAX] = "mem_used_max",
+ [MM_ZERO_PAGES] = "zero_pages",
+ [MM_NUM_MIGRATED] = "num_migrated"
+};
+
+struct zram {
+ char devname[32];
+ struct path_cxt *sysfs; /* device specific sysfs directory */
+ char **mm_stat;
+
+ unsigned int mm_stat_probed : 1,
+ control_probed : 1,
+ has_control : 1; /* has /sys/class/zram-control/ */
+};
+
+static unsigned int raw, no_headings, inbytes;
+static struct path_cxt *__control;
+
+static int get_column_id(int num)
+{
+ assert(num < ncolumns);
+ assert(columns[num] < (int) ARRAY_SIZE(infos));
+ return columns[num];
+}
+
+static const struct colinfo *get_column_info(int num)
+{
+ return &infos[ get_column_id(num) ];
+}
+
+static int column_name_to_id(const char *name, size_t namesz)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(infos); i++) {
+ const char *cn = infos[i].name;
+
+ if (!strncasecmp(name, cn, namesz) && !*(cn + namesz))
+ return i;
+ }
+ warnx(_("unknown column: %s"), name);
+ return -1;
+}
+
+static void zram_reset_stat(struct zram *z)
+{
+ if (z) {
+ strv_free(z->mm_stat);
+ z->mm_stat = NULL;
+ z->mm_stat_probed = 0;
+ }
+}
+
+static void zram_set_devname(struct zram *z, const char *devname, size_t n)
+{
+ assert(z);
+
+ if (!devname)
+ snprintf(z->devname, sizeof(z->devname), "/dev/zram%zu", n);
+ else {
+ strncpy(z->devname, devname, sizeof(z->devname));
+ z->devname[sizeof(z->devname) - 1] = '\0';
+ }
+
+ DBG(fprintf(stderr, "set devname: %s", z->devname));
+ ul_unref_path(z->sysfs);
+ z->sysfs = NULL;
+ zram_reset_stat(z);
+}
+
+static int zram_get_devnum(struct zram *z)
+{
+ int n;
+
+ assert(z);
+
+ if (sscanf(z->devname, "/dev/zram%d", &n) == 1)
+ return n;
+ return -EINVAL;
+}
+
+static struct zram *new_zram(const char *devname)
+{
+ struct zram *z = xcalloc(1, sizeof(struct zram));
+
+ DBG(fprintf(stderr, "new: %p", z));
+ if (devname)
+ zram_set_devname(z, devname, 0);
+ return z;
+}
+
+static void free_zram(struct zram *z)
+{
+ if (!z)
+ return;
+ DBG(fprintf(stderr, "free: %p", z));
+ ul_unref_path(z->sysfs);
+ zram_reset_stat(z);
+ free(z);
+}
+
+static struct path_cxt *zram_get_sysfs(struct zram *z)
+{
+ assert(z);
+
+ if (!z->sysfs) {
+ dev_t devno = sysfs_devname_to_devno(z->devname);
+ if (!devno)
+ return NULL;
+ z->sysfs = ul_new_sysfs_path(devno, NULL, NULL);
+ if (!z->sysfs)
+ return NULL;
+ if (*z->devname != '/')
+ /* canonicalize the device name according to /sys */
+ sysfs_blkdev_get_path(z->sysfs, z->devname, sizeof(z->devname));
+ }
+
+ return z->sysfs;
+}
+
+static inline int zram_exist(struct zram *z)
+{
+ assert(z);
+
+ errno = 0;
+ if (zram_get_sysfs(z) == NULL) {
+ errno = ENODEV;
+ return 0;
+ }
+
+ DBG(fprintf(stderr, "%s exists", z->devname));
+ return 1;
+}
+
+static int zram_set_u64parm(struct zram *z, const char *attr, uint64_t num)
+{
+ struct path_cxt *sysfs = zram_get_sysfs(z);
+ if (!sysfs)
+ return -EINVAL;
+ DBG(fprintf(stderr, "%s writing %ju to %s", z->devname, num, attr));
+ return ul_path_write_u64(sysfs, num, attr);
+}
+
+static int zram_set_strparm(struct zram *z, const char *attr, const char *str)
+{
+ struct path_cxt *sysfs = zram_get_sysfs(z);
+ if (!sysfs)
+ return -EINVAL;
+ DBG(fprintf(stderr, "%s writing %s to %s", z->devname, str, attr));
+ return ul_path_write_string(sysfs, str, attr);
+}
+
+
+static int zram_used(struct zram *z)
+{
+ uint64_t size;
+ struct path_cxt *sysfs = zram_get_sysfs(z);
+
+ if (sysfs &&
+ ul_path_read_u64(sysfs, &size, "disksize") == 0 &&
+ size > 0) {
+
+ DBG(fprintf(stderr, "%s used", z->devname));
+ return 1;
+ }
+ DBG(fprintf(stderr, "%s unused", z->devname));
+ return 0;
+}
+
+static int zram_has_control(struct zram *z)
+{
+ if (!z->control_probed) {
+ z->has_control = access(_PATH_SYS_CLASS "/zram-control/", F_OK) == 0 ? 1 : 0;
+ z->control_probed = 1;
+ DBG(fprintf(stderr, "zram-control: %s", z->has_control ? "yes" : "no"));
+ }
+
+ return z->has_control;
+}
+
+static struct path_cxt *zram_get_control(void)
+{
+ if (!__control)
+ __control = ul_new_path(_PATH_SYS_CLASS "/zram-control");
+ return __control;
+}
+
+static int zram_control_add(struct zram *z)
+{
+ int n;
+ struct path_cxt *ctl;
+
+ if (!zram_has_control(z) || !(ctl = zram_get_control()))
+ return -ENOSYS;
+
+ if (ul_path_read_s32(ctl, &n, "hot_add") != 0 || n < 0)
+ return n;
+
+ DBG(fprintf(stderr, "hot-add: %d", n));
+ zram_set_devname(z, NULL, n);
+ return 0;
+}
+
+static int zram_control_remove(struct zram *z)
+{
+ struct path_cxt *ctl;
+ int n;
+
+ if (!zram_has_control(z) || !(ctl = zram_get_control()))
+ return -ENOSYS;
+
+ n = zram_get_devnum(z);
+ if (n < 0)
+ return n;
+
+ DBG(fprintf(stderr, "hot-remove: %d", n));
+ return ul_path_write_u64(ctl, n, "hot_remove");
+}
+
+static struct zram *find_free_zram(void)
+{
+ struct zram *z = new_zram(NULL);
+ size_t i;
+ int isfree = 0;
+
+ for (i = 0; isfree == 0; i++) {
+ DBG(fprintf(stderr, "find free: checking zram%zu", i));
+ zram_set_devname(z, NULL, i);
+ if (!zram_exist(z) && zram_control_add(z) != 0)
+ break;
+ isfree = !zram_used(z);
+ }
+ if (!isfree) {
+ free_zram(z);
+ z = NULL;
+ }
+ return z;
+}
+
+static char *get_mm_stat(struct zram *z, size_t idx, int bytes)
+{
+ struct path_cxt *sysfs;
+ const char *name;
+ char *str = NULL;
+ uint64_t num;
+
+ assert(idx < ARRAY_SIZE(mm_stat_names));
+ assert(z);
+
+ sysfs = zram_get_sysfs(z);
+ if (!sysfs)
+ return NULL;
+
+ /* Linux >= 4.1 uses /sys/block/zram<id>/mm_stat */
+ if (!z->mm_stat && !z->mm_stat_probed) {
+ if (ul_path_read_string(sysfs, &str, "mm_stat") > 0 && str) {
+ z->mm_stat = strv_split(str, " ");
+
+ /* make sure kernel provides mm_stat as expected */
+ if (strv_length(z->mm_stat) < ARRAY_SIZE(mm_stat_names)) {
+ strv_free(z->mm_stat);
+ z->mm_stat = NULL;
+ }
+ }
+ z->mm_stat_probed = 1;
+ free(str);
+ str = NULL;
+ }
+
+ if (z->mm_stat) {
+ if (bytes)
+ return xstrdup(z->mm_stat[idx]);
+
+ num = strtou64_or_err(z->mm_stat[idx], _("Failed to parse mm_stat"));
+ return size_to_human_string(SIZE_SUFFIX_1LETTER, num);
+ }
+
+ /* Linux < 4.1 uses /sys/block/zram<id>/<attrname> */
+ name = mm_stat_names[idx];
+ if (bytes) {
+ ul_path_read_string(sysfs, &str, name);
+ return str;
+
+ } else if (ul_path_read_u64(sysfs, &num, name) == 0)
+ return size_to_human_string(SIZE_SUFFIX_1LETTER, num);
+
+ return NULL;
+}
+
+static void fill_table_row(struct libscols_table *tb, struct zram *z)
+{
+ static struct libscols_line *ln;
+ struct path_cxt *sysfs;
+ size_t i;
+ uint64_t num;
+
+ assert(tb);
+ assert(z);
+
+ DBG(fprintf(stderr, "%s: filling status table", z->devname));
+
+ sysfs = zram_get_sysfs(z);
+ if (!sysfs)
+ return;
+
+ ln = scols_table_new_line(tb, NULL);
+ if (!ln)
+ err(EXIT_FAILURE, _("failed to allocate output line"));
+
+ for (i = 0; i < (size_t) ncolumns; i++) {
+ char *str = NULL;
+
+ switch (get_column_id(i)) {
+ case COL_NAME:
+ str = xstrdup(z->devname);
+ break;
+ case COL_DISKSIZE:
+ if (inbytes)
+ ul_path_read_string(sysfs, &str, "disksize");
+
+ else if (ul_path_read_u64(sysfs, &num, "disksize") == 0)
+ str = size_to_human_string(SIZE_SUFFIX_1LETTER, num);
+ break;
+ case COL_ALGORITHM:
+ {
+ char *alg = NULL;
+
+ ul_path_read_string(sysfs, &alg, "comp_algorithm");
+ if (alg) {
+ char* lbr = strrchr(alg, '[');
+ char* rbr = strrchr(alg, ']');
+
+ if (lbr != NULL && rbr != NULL && rbr - lbr > 1)
+ str = xstrndup(lbr + 1, rbr - lbr - 1);
+ free(alg);
+ }
+ break;
+ }
+ case COL_MOUNTPOINT:
+ {
+ char path[PATH_MAX] = { '\0' };
+ int fl;
+
+ check_mount_point(z->devname, &fl, path, sizeof(path));
+ if (*path)
+ str = xstrdup(path);
+ break;
+ }
+ case COL_STREAMS:
+ ul_path_read_string(sysfs, &str, "max_comp_streams");
+ break;
+ case COL_ZEROPAGES:
+ str = get_mm_stat(z, MM_ZERO_PAGES, 1);
+ break;
+ case COL_ORIG_SIZE:
+ str = get_mm_stat(z, MM_ORIG_DATA_SIZE, inbytes);
+ break;
+ case COL_COMP_SIZE:
+ str = get_mm_stat(z, MM_COMPR_DATA_SIZE, inbytes);
+ break;
+ case COL_MEMTOTAL:
+ str = get_mm_stat(z, MM_MEM_USED_TOTAL, inbytes);
+ break;
+ case COL_MEMLIMIT:
+ str = get_mm_stat(z, MM_MEM_LIMIT, inbytes);
+ break;
+ case COL_MEMUSED:
+ str = get_mm_stat(z, MM_MEM_USED_MAX, inbytes);
+ break;
+ case COL_MIGRATED:
+ str = get_mm_stat(z, MM_NUM_MIGRATED, inbytes);
+ break;
+ }
+ if (str && scols_line_refer_data(ln, i, str))
+ err(EXIT_FAILURE, _("failed to add output data"));
+ }
+}
+
+static void status(struct zram *z)
+{
+ struct libscols_table *tb;
+ size_t i;
+ DIR *dir;
+ struct dirent *d;
+
+ scols_init_debug(0);
+
+ tb = scols_new_table();
+ if (!tb)
+ err(EXIT_FAILURE, _("failed to allocate output table"));
+
+ scols_table_enable_raw(tb, raw);
+ scols_table_enable_noheadings(tb, no_headings);
+
+ for (i = 0; i < (size_t) ncolumns; i++) {
+ const struct colinfo *col = get_column_info(i);
+
+ if (!scols_table_new_column(tb, col->name, col->whint, col->flags))
+ err(EXIT_FAILURE, _("failed to initialize output column"));
+ }
+
+ if (z) {
+ /* just one device specified */
+ fill_table_row(tb, z);
+ goto print_table;
+ }
+
+ /* list all used devices */
+ z = new_zram(NULL);
+ if (!(dir = opendir(_PATH_DEV)))
+ err(EXIT_FAILURE, _("cannot open %s"), _PATH_DEV);
+
+ while ((d = readdir(dir))) {
+ int n;
+ if (sscanf(d->d_name, "zram%d", &n) != 1)
+ continue;
+ zram_set_devname(z, NULL, n);
+ if (zram_exist(z) && zram_used(z))
+ fill_table_row(tb, z);
+ }
+ closedir(dir);
+ free_zram(z);
+
+print_table:
+ scols_print_table(tb);
+ scols_unref_table(tb);
+}
+
+static void __attribute__((__noreturn__)) usage(void)
+{
+ FILE *out = stdout;
+ size_t i;
+
+ fputs(USAGE_HEADER, out);
+ fprintf(out, _( " %1$s [options] <device>\n"
+ " %1$s -r <device> [...]\n"
+ " %1$s [options] -f | <device> -s <size>\n"),
+ program_invocation_short_name);
+
+ fputs(USAGE_SEPARATOR, out);
+ fputs(_("Set up and control zram devices.\n"), out);
+
+ fputs(USAGE_OPTIONS, out);
+ fputs(_(" -a, --algorithm lzo|lz4|lz4hc|deflate|842 compression algorithm to use\n"), out);
+ fputs(_(" -b, --bytes print sizes in bytes rather than in human readable format\n"), out);
+ fputs(_(" -f, --find find a free device\n"), out);
+ fputs(_(" -n, --noheadings don't print headings\n"), out);
+ fputs(_(" -o, --output <list> columns to use for status output\n"), out);
+ fputs(_(" --output-all output all columns\n"), out);
+ fputs(_(" --raw use raw status output format\n"), out);
+ fputs(_(" -r, --reset reset all specified devices\n"), out);
+ fputs(_(" -s, --size <size> device size\n"), out);
+ fputs(_(" -t, --streams <number> number of compression streams\n"), out);
+
+ fputs(USAGE_SEPARATOR, out);
+ printf(USAGE_HELP_OPTIONS(27));
+
+ fputs(USAGE_COLUMNS, out);
+ for (i = 0; i < ARRAY_SIZE(infos); i++)
+ fprintf(out, " %11s %s\n", infos[i].name, _(infos[i].help));
+
+ printf(USAGE_MAN_TAIL("zramctl(8)"));
+ exit(EXIT_SUCCESS);
+}
+
+/* actions */
+enum {
+ A_NONE = 0,
+ A_STATUS,
+ A_CREATE,
+ A_FINDONLY,
+ A_RESET
+};
+
+int main(int argc, char **argv)
+{
+ uintmax_t size = 0, nstreams = 0;
+ char *algorithm = NULL;
+ int rc = 0, c, find = 0, act = A_NONE;
+ struct zram *zram = NULL;
+
+ enum {
+ OPT_RAW = CHAR_MAX + 1,
+ OPT_LIST_TYPES
+ };
+
+ static const struct option longopts[] = {
+ { "algorithm", required_argument, NULL, 'a' },
+ { "bytes", no_argument, NULL, 'b' },
+ { "find", no_argument, NULL, 'f' },
+ { "help", no_argument, NULL, 'h' },
+ { "output", required_argument, NULL, 'o' },
+ { "output-all",no_argument, NULL, OPT_LIST_TYPES },
+ { "noheadings",no_argument, NULL, 'n' },
+ { "reset", no_argument, NULL, 'r' },
+ { "raw", no_argument, NULL, OPT_RAW },
+ { "size", required_argument, NULL, 's' },
+ { "streams", required_argument, NULL, 't' },
+ { "version", no_argument, NULL, 'V' },
+ { NULL, 0, NULL, 0 }
+ };
+
+ static const ul_excl_t excl[] = {
+ { 'f', 'o', 'r' },
+ { 'o', 'r', 's' },
+ { 0 }
+ };
+ int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT;
+
+ setlocale(LC_ALL, "");
+ bindtextdomain(PACKAGE, LOCALEDIR);
+ textdomain(PACKAGE);
+ atexit(close_stdout);
+
+ while ((c = getopt_long(argc, argv, "a:bfho:nrs:t:V", longopts, NULL)) != -1) {
+
+ err_exclusive_options(c, longopts, excl, excl_st);
+
+ switch (c) {
+ case 'a':
+ algorithm = optarg;
+ break;
+ case 'b':
+ inbytes = 1;
+ break;
+ case 'f':
+ find = 1;
+ break;
+ case 'o':
+ ncolumns = string_to_idarray(optarg,
+ columns, ARRAY_SIZE(columns),
+ column_name_to_id);
+ if (ncolumns < 0)
+ return EXIT_FAILURE;
+ break;
+ case OPT_LIST_TYPES:
+ for (ncolumns = 0; (size_t)ncolumns < ARRAY_SIZE(infos); ncolumns++)
+ columns[ncolumns] = ncolumns;
+ break;
+ case 's':
+ size = strtosize_or_err(optarg, _("failed to parse size"));
+ act = A_CREATE;
+ break;
+ case 't':
+ nstreams = strtou64_or_err(optarg, _("failed to parse streams"));
+ break;
+ case 'r':
+ act = A_RESET;
+ break;
+ case OPT_RAW:
+ raw = 1;
+ break;
+ case 'n':
+ no_headings = 1;
+ break;
+ case 'V':
+ printf(UTIL_LINUX_VERSION);
+ return EXIT_SUCCESS;
+ case 'h':
+ usage();
+ default:
+ errtryhelp(EXIT_FAILURE);
+ }
+ }
+
+ if (find && optind < argc)
+ errx(EXIT_FAILURE, _("option --find is mutually exclusive "
+ "with <device>"));
+ if (act == A_NONE)
+ act = find ? A_FINDONLY : A_STATUS;
+
+ if (act != A_RESET && optind + 1 < argc)
+ errx(EXIT_FAILURE, _("only one <device> at a time is allowed"));
+
+ if ((act == A_STATUS || act == A_FINDONLY) && (algorithm || nstreams))
+ errx(EXIT_FAILURE, _("options --algorithm and --streams "
+ "must be combined with --size"));
+
+ ul_path_init_debug();
+ ul_sysfs_init_debug();
+
+ switch (act) {
+ case A_STATUS:
+ if (!ncolumns) { /* default columns */
+ columns[ncolumns++] = COL_NAME;
+ columns[ncolumns++] = COL_ALGORITHM;
+ columns[ncolumns++] = COL_DISKSIZE;
+ columns[ncolumns++] = COL_ORIG_SIZE;
+ columns[ncolumns++] = COL_COMP_SIZE;
+ columns[ncolumns++] = COL_MEMTOTAL;
+ columns[ncolumns++] = COL_STREAMS;
+ columns[ncolumns++] = COL_MOUNTPOINT;
+ }
+ if (optind < argc) {
+ zram = new_zram(argv[optind++]);
+ if (!zram_exist(zram))
+ err(EXIT_FAILURE, "%s", zram->devname);
+ }
+ status(zram);
+ free_zram(zram);
+ break;
+ case A_RESET:
+ if (optind == argc)
+ errx(EXIT_FAILURE, _("no device specified"));
+ while (optind < argc) {
+ zram = new_zram(argv[optind]);
+ if (!zram_exist(zram)
+ || zram_set_u64parm(zram, "reset", 1)) {
+ warn(_("%s: failed to reset"), zram->devname);
+ rc = 1;
+ }
+ zram_control_remove(zram);
+ free_zram(zram);
+ optind++;
+ }
+ break;
+ case A_FINDONLY:
+ zram = find_free_zram();
+ if (!zram)
+ errx(EXIT_FAILURE, _("no free zram device found"));
+ printf("%s\n", zram->devname);
+ free_zram(zram);
+ break;
+ case A_CREATE:
+ if (find) {
+ zram = find_free_zram();
+ if (!zram)
+ errx(EXIT_FAILURE, _("no free zram device found"));
+ } else if (optind == argc)
+ errx(EXIT_FAILURE, _("no device specified"));
+ else {
+ zram = new_zram(argv[optind]);
+ if (!zram_exist(zram))
+ err(EXIT_FAILURE, "%s", zram->devname);
+ }
+
+ if (zram_set_u64parm(zram, "reset", 1))
+ err(EXIT_FAILURE, _("%s: failed to reset"), zram->devname);
+
+ if (nstreams &&
+ zram_set_u64parm(zram, "max_comp_streams", nstreams))
+ err(EXIT_FAILURE, _("%s: failed to set number of streams"), zram->devname);
+
+ if (algorithm &&
+ zram_set_strparm(zram, "comp_algorithm", algorithm))
+ err(EXIT_FAILURE, _("%s: failed to set algorithm"), zram->devname);
+
+ if (zram_set_u64parm(zram, "disksize", size))
+ err(EXIT_FAILURE, _("%s: failed to set disksize (%ju bytes)"),
+ zram->devname, size);
+ if (find)
+ printf("%s\n", zram->devname);
+ free_zram(zram);
+ break;
+ }
+
+ ul_unref_path(__control);
+ return rc ? EXIT_FAILURE : EXIT_SUCCESS;
+}