diff options
Diffstat (limited to 'sys-utils')
157 files changed, 55933 insertions, 0 deletions
diff --git a/sys-utils/Makemodule.am b/sys-utils/Makemodule.am new file mode 100644 index 0000000..e553634 --- /dev/null +++ b/sys-utils/Makemodule.am @@ -0,0 +1,568 @@ +if BUILD_LSMEM +usrbin_exec_PROGRAMS += lsmem +MANPAGES += sys-utils/lsmem.1 +dist_noinst_DATA += sys-utils/lsmem.1.adoc +lsmem_SOURCES = sys-utils/lsmem.c +lsmem_LDADD = $(LDADD) libcommon.la libsmartcols.la +lsmem_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir) +endif + +if BUILD_CHMEM +usrbin_exec_PROGRAMS += chmem +MANPAGES += sys-utils/chmem.8 +dist_noinst_DATA += sys-utils/chmem.8.adoc +chmem_SOURCES = sys-utils/chmem.c +chmem_LDADD = $(LDADD) libcommon.la +endif + +if BUILD_FLOCK +usrbin_exec_PROGRAMS += flock +MANPAGES += sys-utils/flock.1 +dist_noinst_DATA += sys-utils/flock.1.adoc +flock_SOURCES = sys-utils/flock.c lib/monotonic.c lib/timer.c +flock_LDADD = $(LDADD) libcommon.la $(REALTIME_LIBS) +endif + +if BUILD_CHOOM +usrbin_exec_PROGRAMS += choom +MANPAGES += sys-utils/choom.1 +dist_noinst_DATA += sys-utils/choom.1.adoc +choom_SOURCES = sys-utils/choom.c +choom_LDADD = $(LDADD) libcommon.la +endif + +if BUILD_IPCMK +usrbin_exec_PROGRAMS += ipcmk +MANPAGES += sys-utils/ipcmk.1 +dist_noinst_DATA += sys-utils/ipcmk.1.adoc +ipcmk_SOURCES = sys-utils/ipcmk.c +ipcmk_LDADD = $(LDADD) libcommon.la +endif + +if BUILD_IPCRM +usrbin_exec_PROGRAMS += ipcrm +MANPAGES += sys-utils/ipcrm.1 +dist_noinst_DATA += sys-utils/ipcrm.1.adoc +ipcrm_SOURCES = sys-utils/ipcrm.c +ipcrm_LDADD = $(LDADD) libcommon.la +endif + +if BUILD_IPCS +usrbin_exec_PROGRAMS += ipcs +MANPAGES += sys-utils/ipcs.1 +dist_noinst_DATA += sys-utils/ipcs.1.adoc +ipcs_SOURCES = sys-utils/ipcs.c \ + sys-utils/ipcutils.c \ + sys-utils/ipcutils.h +ipcs_LDADD = $(LDADD) libcommon.la +endif + +if BUILD_IRQTOP +usrbin_exec_PROGRAMS += irqtop +MANPAGES += sys-utils/irqtop.1 +dist_noinst_DATA += sys-utils/irqtop.1.adoc +irqtop_SOURCES = sys-utils/irqtop.c \ + sys-utils/irq-common.c \ + sys-utils/irq-common.h \ + lib/monotonic.c +irqtop_LDADD = $(LDADD) libcommon.la $(REALTIME_LIBS) libsmartcols.la +irqtop_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir) +if HAVE_SLANG +irqtop_LDADD += -lslang +else +irqtop_CFLAGS += $(NCURSES_CFLAGS) +irqtop_LDADD += $(NCURSES_LIBS) +endif +endif + +if BUILD_LSIRQ +usrbin_exec_PROGRAMS += lsirq +MANPAGES += sys-utils/lsirq.1 +dist_noinst_DATA += sys-utils/lsirq.1.adoc +lsirq_SOURCES = sys-utils/lsirq.c \ + sys-utils/irq-common.c \ + sys-utils/irq-common.h +lsirq_LDADD = $(LDADD) libcommon.la libsmartcols.la +lsirq_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir) +endif + +if BUILD_LSIPC +usrbin_exec_PROGRAMS += lsipc +MANPAGES += sys-utils/lsipc.1 +dist_noinst_DATA += sys-utils/lsipc.1.adoc +lsipc_SOURCES = sys-utils/lsipc.c \ + sys-utils/ipcutils.c \ + sys-utils/ipcutils.h +lsipc_LDADD = $(LDADD) libcommon.la libsmartcols.la +lsipc_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir) +endif + +if BUILD_RENICE +usrbin_exec_PROGRAMS += renice +MANPAGES += sys-utils/renice.1 +dist_noinst_DATA += sys-utils/renice.1.adoc +renice_SOURCES = sys-utils/renice.c +endif + +if BUILD_RFKILL +usrsbin_exec_PROGRAMS += rfkill +MANPAGES += sys-utils/rfkill.8 +dist_noinst_DATA += sys-utils/rfkill.8.adoc +rfkill_SOURCES = sys-utils/rfkill.c +rfkill_LDADD = $(LDADD) libcommon.la libsmartcols.la +rfkill_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir) +endif + +if BUILD_SETSID +usrbin_exec_PROGRAMS += setsid +MANPAGES += sys-utils/setsid.1 +dist_noinst_DATA += sys-utils/setsid.1.adoc +setsid_SOURCES = sys-utils/setsid.c +endif + +if BUILD_READPROFILE +usrsbin_exec_PROGRAMS += readprofile +MANPAGES += sys-utils/readprofile.8 +dist_noinst_DATA += sys-utils/readprofile.8.adoc +readprofile_SOURCES = sys-utils/readprofile.c +readprofile_LDADD = $(LDADD) libcommon.la +endif + +if BUILD_TUNELP +usrsbin_exec_PROGRAMS += tunelp +MANPAGES += sys-utils/tunelp.8 +dist_noinst_DATA += sys-utils/tunelp.8.adoc +tunelp_SOURCES = sys-utils/tunelp.c +tunelp_LDADD = $(LDADD) libcommon.la +endif + +if BUILD_FSTRIM +sbin_PROGRAMS += fstrim +MANPAGES += sys-utils/fstrim.8 +dist_noinst_DATA += sys-utils/fstrim.8.adoc +fstrim_SOURCES = sys-utils/fstrim.c +fstrim_LDADD = $(LDADD) libcommon.la libmount.la +fstrim_CFLAGS = $(AM_CFLAGS) -I$(ul_libmount_incdir) +if HAVE_SYSTEMD +systemdsystemunit_DATA += \ + sys-utils/fstrim.service \ + sys-utils/fstrim.timer +endif +endif # BUILD_FSTRIM + +PATHFILES += sys-utils/fstrim.service +EXTRA_DIST += sys-utils/fstrim.timer + +if BUILD_DMESG +bin_PROGRAMS += dmesg +MANPAGES += sys-utils/dmesg.1 +dist_noinst_DATA += sys-utils/dmesg.1.adoc +dmesg_SOURCES = sys-utils/dmesg.c lib/monotonic.c +dmesg_LDADD = $(LDADD) libcommon.la libtcolors.la $(REALTIME_LIBS) +dmesg_CFLAGS = $(AM_CFLAGS) +check_PROGRAMS += test_dmesg +test_dmesg_SOURCES = $(dmesg_SOURCES) +test_dmesg_LDADD = $(dmesg_LDADD) +test_dmesg_CFLAGS = -DTEST_DMESG $(dmesg_CFLAGS) +endif + +if BUILD_CTRLALTDEL +sbin_PROGRAMS += ctrlaltdel +MANPAGES += sys-utils/ctrlaltdel.8 +dist_noinst_DATA += sys-utils/ctrlaltdel.8.adoc +ctrlaltdel_SOURCES = sys-utils/ctrlaltdel.c +ctrlaltdel_LDADD = $(LDADD) libcommon.la +endif + +if BUILD_FSFREEZE +sbin_PROGRAMS += fsfreeze +MANPAGES += sys-utils/fsfreeze.8 +dist_noinst_DATA += sys-utils/fsfreeze.8.adoc +fsfreeze_SOURCES = sys-utils/fsfreeze.c +endif + +if BUILD_BLKDISCARD +sbin_PROGRAMS += blkdiscard +MANPAGES += sys-utils/blkdiscard.8 +dist_noinst_DATA += sys-utils/blkdiscard.8.adoc +blkdiscard_SOURCES = sys-utils/blkdiscard.c lib/monotonic.c +blkdiscard_LDADD = $(LDADD) libcommon.la $(REALTIME_LIBS) +blkdiscard_CFLAGS = $(AM_CFLAGS) +if BUILD_LIBBLKID +blkdiscard_LDADD += libblkid.la +blkdiscard_CFLAGS += -I$(ul_libblkid_incdir) +endif +endif + +if BUILD_BLKZONE +sbin_PROGRAMS += blkzone +MANPAGES += sys-utils/blkzone.8 +dist_noinst_DATA += sys-utils/blkzone.8.adoc +blkzone_SOURCES = sys-utils/blkzone.c +blkzone_LDADD = $(LDADD) libcommon.la +endif + +if BUILD_LDATTACH +usrsbin_exec_PROGRAMS += ldattach +MANPAGES += sys-utils/ldattach.8 +dist_noinst_DATA += sys-utils/ldattach.8.adoc +ldattach_SOURCES = sys-utils/ldattach.c +ldattach_LDADD = $(LDADD) libcommon.la +endif + +if BUILD_RTCWAKE +usrsbin_exec_PROGRAMS += rtcwake +MANPAGES += sys-utils/rtcwake.8 +dist_noinst_DATA += sys-utils/rtcwake.8.adoc +rtcwake_SOURCES = sys-utils/rtcwake.c +rtcwake_LDADD = $(LDADD) libcommon.la +endif + +if BUILD_SETARCH +usrbin_exec_PROGRAMS += setarch +MANPAGES += sys-utils/setarch.8 +dist_noinst_DATA += sys-utils/setarch.8.adoc +setarch_SOURCES = sys-utils/setarch.c + +SETARCH_LINKS = uname26 linux32 linux64 + +if ARCH_S390 +SETARCH_LINKS += s390 s390x +endif +if ARCH_I86 +SETARCH_LINKS += i386 +endif +if ARCH_86_64 +SETARCH_LINKS += i386 x86_64 +endif +if ARCH_PPC +SETARCH_LINKS += ppc ppc64 ppc32 +endif +if ARCH_SPARC +SETARCH_LINKS += sparc sparc64 sparc32 sparc32bash +endif +if ARCH_MIPS +SETARCH_LINKS += mips mips64 mips32 +endif +if ARCH_IA64 +SETARCH_LINKS += i386 ia64 +endif +if ARCH_HPPA +SETARCH_LINKS += parisc parisc64 parisc32 +endif + +if ENABLE_ASCIIDOC +SETARCH_MAN_LINKS = $(addprefix sys-utils/,$(SETARCH_LINKS:=.8)) +endif + +man_MANS += $(SETARCH_MAN_LINKS) +CLEANFILES += $(SETARCH_MAN_LINKS) + +$(SETARCH_MAN_LINKS): + $(AM_V_at) $(MKDIR_P) sys-utils + $(AM_V_GEN)echo ".so man8/setarch.8" > $@ + +install-exec-hook-setarch: + for I in $(SETARCH_LINKS); do \ + cd $(DESTDIR)$(usrbin_execdir) && ln -sf setarch $$I ; \ + done + +uninstall-hook-setarch: + for I in $(SETARCH_LINKS); do \ + rm -f $(DESTDIR)$(usrbin_execdir)/$$I ; \ + done + +INSTALL_EXEC_HOOKS += install-exec-hook-setarch +UNINSTALL_HOOKS += uninstall-hook-setarch + +endif # BUILD_SETARCH + + +if BUILD_EJECT +usrbin_exec_PROGRAMS += eject +MANPAGES += sys-utils/eject.1 +dist_noinst_DATA += sys-utils/eject.1.adoc +eject_SOURCES = sys-utils/eject.c lib/monotonic.c +eject_LDADD = $(LDADD) libmount.la libcommon.la $(REALTIME_LIBS) +eject_CFLAGS = $(AM_CFLAGS) -I$(ul_libmount_incdir) +endif + + +if BUILD_LOSETUP +sbin_PROGRAMS += losetup +MANPAGES += sys-utils/losetup.8 +dist_noinst_DATA += sys-utils/losetup.8.adoc +losetup_SOURCES = sys-utils/losetup.c +losetup_LDADD = $(LDADD) libcommon.la libsmartcols.la +losetup_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir) + +if HAVE_STATIC_LOSETUP +bin_PROGRAMS += losetup.static +losetup_static_SOURCES = $(losetup_SOURCES) +losetup_static_LDFLAGS = -all-static +losetup_static_LDADD = $(losetup_LDADD) +losetup_static_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir) +endif +endif # BUILD_LOSETUP + + +if BUILD_ZRAMCTL +sbin_PROGRAMS += zramctl +MANPAGES += sys-utils/zramctl.8 +dist_noinst_DATA += sys-utils/zramctl.8.adoc +zramctl_SOURCES = sys-utils/zramctl.c \ + lib/ismounted.c +zramctl_LDADD = $(LDADD) libcommon.la libsmartcols.la +zramctl_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir) +endif + + +if BUILD_PRLIMIT +usrbin_exec_PROGRAMS += prlimit +MANPAGES += sys-utils/prlimit.1 +dist_noinst_DATA += sys-utils/prlimit.1.adoc +prlimit_SOURCES = sys-utils/prlimit.c +prlimit_LDADD = $(LDADD) libcommon.la libsmartcols.la +prlimit_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir) +endif + + +if BUILD_LSNS +usrbin_exec_PROGRAMS += lsns +MANPAGES += sys-utils/lsns.8 +dist_noinst_DATA += sys-utils/lsns.8.adoc +lsns_SOURCES = sys-utils/lsns.c +lsns_LDADD = $(LDADD) libcommon.la libsmartcols.la libmount.la +lsns_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir) -I$(ul_libmount_incdir) +endif + + +if BUILD_MOUNT +bin_PROGRAMS += mount umount +MANPAGES += \ + sys-utils/mount.8 \ + sys-utils/fstab.5 \ + sys-utils/umount.8 +dist_noinst_DATA += \ + sys-utils/mount.8.adoc \ + sys-utils/fstab.5.adoc \ + sys-utils/umount.8.adoc +mount_SOURCES = sys-utils/mount.c +mount_LDADD = $(LDADD) libcommon.la libmount.la $(SELINUX_LIBS) +mount_CFLAGS = $(SUID_CFLAGS) $(AM_CFLAGS) -I$(ul_libmount_incdir) +mount_LDFLAGS = $(SUID_LDFLAGS) $(AM_LDFLAGS) + +umount_SOURCES = sys-utils/umount.c +umount_LDADD = $(LDADD) libcommon.la libmount.la +umount_CFLAGS = $(AM_CFLAGS) $(SUID_CFLAGS) -I$(ul_libmount_incdir) +umount_LDFLAGS = $(SUID_LDFLAGS) $(AM_LDFLAGS) + +if HAVE_STATIC_MOUNT +bin_PROGRAMS += mount.static +mount_static_SOURCES = $(mount_SOURCES) +mount_static_CFLAGS = $(mount_CFLAGS) +mount_static_LDFLAGS = $(mount_LDFLAGS) -all-static +mount_static_LDADD = $(mount_LDADD) $(SELINUX_LIBS_STATIC) +endif + +if HAVE_STATIC_UMOUNT +bin_PROGRAMS += umount.static +umount_static_SOURCES = $(umount_SOURCES) +umount_static_CFLAGS = $(umount_CFLAGS) +umount_static_LDFLAGS = $(umount_LDFLAGS) -all-static +umount_static_LDADD = $(umount_LDADD) +endif + +install-exec-hook-mount: +if MAKEINSTALL_DO_CHOWN + chown root:root $(DESTDIR)$(bindir)/mount +endif +if MAKEINSTALL_DO_SETUID + chmod 4755 $(DESTDIR)$(bindir)/mount +endif +if MAKEINSTALL_DO_CHOWN + chown root:root $(DESTDIR)$(bindir)/umount +endif +if MAKEINSTALL_DO_SETUID + chmod 4755 $(DESTDIR)$(bindir)/umount +endif + +INSTALL_EXEC_HOOKS += install-exec-hook-mount +endif # BUILD_MOUNT + + +if BUILD_SWAPON +sbin_PROGRAMS += swapon swapoff +MANPAGES += sys-utils/swapon.8 +dist_noinst_DATA += sys-utils/swapon.8.adoc +MANLINKS += sys-utils/swapoff.8 + +swapon_SOURCES = \ + sys-utils/swapon.c \ + sys-utils/swapon-common.c \ + sys-utils/swapon-common.h \ + lib/swapprober.c \ + include/swapprober.h +swapon_CFLAGS = $(AM_CFLAGS) \ + -I$(ul_libblkid_incdir) \ + -I$(ul_libmount_incdir) \ + -I$(ul_libsmartcols_incdir) +swapon_LDADD = $(LDADD) \ + libblkid.la \ + libcommon.la \ + libmount.la \ + libsmartcols.la + +swapoff_SOURCES = \ + sys-utils/swapoff.c \ + sys-utils/swapon-common.c \ + sys-utils/swapon-common.h \ + lib/swapprober.c \ + include/swapprober.h +swapoff_CFLAGS = $(AM_CFLAGS) \ + -I$(ul_libblkid_incdir) \ + -I$(ul_libmount_incdir) +swapoff_LDADD = $(LDADD) \ + libmount.la \ + libblkid.la \ + libcommon.la +endif + +if BUILD_LSCPU +usrbin_exec_PROGRAMS += lscpu +MANPAGES += sys-utils/lscpu.1 +dist_noinst_DATA += sys-utils/lscpu.1.adoc +lscpu_SOURCES = sys-utils/lscpu.c \ + sys-utils/lscpu-cputype.c \ + sys-utils/lscpu-cpu.c \ + sys-utils/lscpu-topology.c \ + sys-utils/lscpu-virt.c \ + sys-utils/lscpu-arm.c \ + sys-utils/lscpu-dmi.c \ + sys-utils/lscpu.h +lscpu_LDADD = $(LDADD) libcommon.la libsmartcols.la $(RTAS_LIBS) +lscpu_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir) +endif + +if BUILD_CHCPU +sbin_PROGRAMS += chcpu +MANPAGES += sys-utils/chcpu.8 +dist_noinst_DATA += sys-utils/chcpu.8.adoc +chcpu_SOURCES = sys-utils/chcpu.c +chcpu_LDADD = $(LDADD) libcommon.la +endif + +if BUILD_WDCTL +bin_PROGRAMS += wdctl +MANPAGES += sys-utils/wdctl.8 +dist_noinst_DATA += sys-utils/wdctl.8.adoc +wdctl_SOURCES = sys-utils/wdctl.c +wdctl_LDADD = $(LDADD) libcommon.la libsmartcols.la +wdctl_CFLAGS = $(AM_CFLAGS) -I$(ul_libsmartcols_incdir) +endif + +if BUILD_MOUNTPOINT +bin_PROGRAMS += mountpoint +MANPAGES += sys-utils/mountpoint.1 +dist_noinst_DATA += sys-utils/mountpoint.1.adoc +mountpoint_LDADD = $(LDADD) libmount.la +mountpoint_CFLAGS = $(AM_CFLAGS) -I$(ul_libmount_incdir) +mountpoint_SOURCES = sys-utils/mountpoint.c +endif + +if BUILD_FALLOCATE +usrbin_exec_PROGRAMS += fallocate +MANPAGES += sys-utils/fallocate.1 +dist_noinst_DATA += sys-utils/fallocate.1.adoc +fallocate_SOURCES = sys-utils/fallocate.c +fallocate_LDADD = $(LDADD) libcommon.la +endif + +if BUILD_PIVOT_ROOT +sbin_PROGRAMS += pivot_root +MANPAGES += sys-utils/pivot_root.8 +dist_noinst_DATA += sys-utils/pivot_root.8.adoc +pivot_root_SOURCES = sys-utils/pivot_root.c +endif + +if BUILD_SWITCH_ROOT +sbin_PROGRAMS += switch_root +MANPAGES += sys-utils/switch_root.8 +dist_noinst_DATA += sys-utils/switch_root.8.adoc +switch_root_SOURCES = sys-utils/switch_root.c +endif + +if BUILD_UNSHARE +usrbin_exec_PROGRAMS += unshare +MANPAGES += sys-utils/unshare.1 +dist_noinst_DATA += sys-utils/unshare.1.adoc +unshare_SOURCES = sys-utils/unshare.c \ + lib/caputils.c \ + lib/exec_shell.c +unshare_LDADD = $(LDADD) libcommon.la +unshare_CFLAGS = $(AM_CFLAGS) -I$(ul_libmount_incdir) + +if HAVE_STATIC_UNSHARE +usrbin_exec_PROGRAMS += unshare.static +unshare_static_SOURCES = $(unshare_SOURCES) +unshare_static_LDFLAGS = -all-static +unshare_static_LDADD = $(unshare_LDADD) +unshare_static_CFLAGS = $(unshare_CFLAGS) +endif +endif + +if BUILD_NSENTER +usrbin_exec_PROGRAMS += nsenter +MANPAGES += sys-utils/nsenter.1 +dist_noinst_DATA += sys-utils/nsenter.1.adoc +nsenter_SOURCES = sys-utils/nsenter.c lib/exec_shell.c +nsenter_LDADD = $(LDADD) libcommon.la $(SELINUX_LIBS) + +if HAVE_STATIC_NSENTER +usrbin_exec_PROGRAMS += nsenter.static +nsenter_static_SOURCES = $(nsenter_SOURCES) +nsenter_static_LDFLAGS = -all-static +nsenter_static_LDADD = $(nsenter_LDADD) +endif +endif + +if BUILD_HWCLOCK +sbin_PROGRAMS += hwclock +MANPAGES += \ + sys-utils/hwclock.8 \ + sys-utils/adjtime_config.5 +dist_noinst_DATA += \ + sys-utils/hwclock.8.adoc \ + sys-utils/adjtime_config.5.adoc +hwclock_SOURCES = \ + sys-utils/hwclock.c \ + sys-utils/hwclock.h +if USE_HWCLOCK_GPLv3_DATETIME +hwclock_SOURCES += \ + sys-utils/hwclock-parse-date.y +endif +hwclock_LDADD = $(LDADD) libcommon.la -lm +hwclock_CPPFLAGS = $(AM_CPPFLAGS) -I$(top_srcdir)/sys-utils +if USE_HWCLOCK_CMOS +hwclock_SOURCES += \ + sys-utils/hwclock-cmos.c +endif +if LINUX +hwclock_SOURCES += \ + sys-utils/hwclock-rtc.c \ + lib/monotonic.c +hwclock_LDADD += $(REALTIME_LIBS) +endif +if HAVE_AUDIT +hwclock_LDADD += -laudit +endif +endif # BUILD_HWCLOCK + +if BUILD_SETPRIV +usrbin_exec_PROGRAMS += setpriv +MANPAGES += sys-utils/setpriv.1 +dist_noinst_DATA += sys-utils/setpriv.1.adoc +setpriv_SOURCES = sys-utils/setpriv.c \ + lib/caputils.c +setpriv_LDADD = $(LDADD) -lcap-ng libcommon.la +endif diff --git a/sys-utils/adjtime_config.5 b/sys-utils/adjtime_config.5 new file mode 100644 index 0000000..a794876 --- /dev/null +++ b/sys-utils/adjtime_config.5 @@ -0,0 +1,88 @@ +'\" t +.\" Title: adjtime_config +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: File formats +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "ADJTIME_CONFIG" "5" "2022-05-11" "util\-linux 2.38.1" "File formats" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +adjtime_config \- information about hardware clock setting and drift factor +.SH "SYNOPSIS" +.sp +\fI/etc/adjtime\fP +.SH "DESCRIPTION" +.sp +The file \fI/etc/adjtime\fP contains descriptive information about the hardware mode clock setting and clock drift factor. The file is read and write by \fBhwclock\fP(8); and read by programs like rtcwake to get RTC time mode. +.sp +The file is usually located in \fI/etc\fP, but tools like \fBhwclock\fP(8) or \fBrtcwake\fP(8) can use alternative location by command line options if write access to \fI/etc\fP is unwanted. The default clock mode is "UTC" if the file is missing. +.sp +The Hardware Clock is usually not very accurate. However, much of its inaccuracy is completely predictable \- it gains or loses the same amount of time every day. This is called systematic drift. The util \fBhwclock\fP(8) keeps the file \fI/etc/adjtime\fP, that keeps some historical information. For more details see "\fBThe Adjust Function\fP" and "\fBThe Adjtime File\fP" sections from \fBhwclock\fP(8) man page. +.sp +The \fIadjtime\fP file is formatted in ASCII. +.SS "First line" +.sp +Three numbers, separated by blanks: +.sp +\fBdrift factor\fP +.RS 4 +the systematic drift rate in seconds per day (floating point decimal) +.RE +.sp +\fBlast adjust time\fP +.RS 4 +the resulting number of seconds since 1969 UTC of most recent adjustment or calibration (decimal integer) +.RE +.sp +\fBadjustment status\fP +.RS 4 +zero (for compatibility with \fBclock\fP(8)) as a floating point decimal +.RE +.SS "Second line" +.sp +\fBlast calibration time\fP +.RS 4 +The resulting number of seconds since 1969 UTC of most recent calibration. Zero if there has been no calibration yet or it is known that any previous calibration is moot (for example, because the Hardware Clock has been found, since that calibration, not to contain a valid time). This is a decimal integer. +.RE +.SS "Third line" +.sp +\fBclock mode\fP +.RS 4 +Supported values are \fBUTC\fP or \fBLOCAL\fP. Tells whether the Hardware Clock is set to Coordinated Universal Time or local time. You can always override this value with options on the \fBhwclock\fP(8) command line. +.RE +.SH "FILES" +.sp +\fI/etc/adjtime\fP +.SH "SEE ALSO" +.sp +\fBhwclock\fP(8), +\fBrtcwake\fP(8) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +\fBadjtime_config\fP is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/adjtime_config.5.adoc b/sys-utils/adjtime_config.5.adoc new file mode 100644 index 0000000..ee7d8ec --- /dev/null +++ b/sys-utils/adjtime_config.5.adoc @@ -0,0 +1,65 @@ +//po4a: entry man manual += adjtime_config(5) +:doctype: manpage +:man manual: File formats +:man source: util-linux {release-version} +:page-layout: base +:configfile: adjtime_config + +== NAME + +adjtime_config - information about hardware clock setting and drift factor + +== SYNOPSIS + +_/etc/adjtime_ + +== DESCRIPTION + +The file _/etc/adjtime_ contains descriptive information about the hardware mode clock setting and clock drift factor. The file is read and write by *hwclock*(8); and read by programs like rtcwake to get RTC time mode. + +The file is usually located in _/etc_, but tools like *hwclock*(8) or *rtcwake*(8) can use alternative location by command line options if write access to _/etc_ is unwanted. The default clock mode is "UTC" if the file is missing. + +The Hardware Clock is usually not very accurate. However, much of its inaccuracy is completely predictable - it gains or loses the same amount of time every day. This is called systematic drift. The util *hwclock*(8) keeps the file _/etc/adjtime_, that keeps some historical information. For more details see "*The Adjust Function*" and "*The Adjtime File*" sections from *hwclock*(8) man page. + +The _adjtime_ file is formatted in ASCII. + +=== First line + +Three numbers, separated by blanks: + +*drift factor*:: +the systematic drift rate in seconds per day (floating point decimal) + +*last adjust time*:: +the resulting number of seconds since 1969 UTC of most recent adjustment or calibration (decimal integer) + +*adjustment status*:: +zero (for compatibility with *clock*(8)) as a floating point decimal + +=== Second line + +*last calibration time*:: +The resulting number of seconds since 1969 UTC of most recent calibration. Zero if there has been no calibration yet or it is known that any previous calibration is moot (for example, because the Hardware Clock has been found, since that calibration, not to contain a valid time). This is a decimal integer. + +=== Third line + +*clock mode*:: +Supported values are *UTC* or *LOCAL*. Tells whether the Hardware Clock is set to Coordinated Universal Time or local time. You can always override this value with options on the *hwclock*(8) command line. + +== FILES + +_/etc/adjtime_ + +== SEE ALSO + +*hwclock*(8), +*rtcwake*(8) + +include::man-common/bugreports.adoc[] + +include::man-common/footer-config.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/blkdiscard.8 b/sys-utils/blkdiscard.8 new file mode 100644 index 0000000..fe167a8 --- /dev/null +++ b/sys-utils/blkdiscard.8 @@ -0,0 +1,105 @@ +'\" t +.\" Title: blkdiscard +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: System Administration +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "BLKDISCARD" "8" "2022-05-11" "util\-linux 2.38.1" "System Administration" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +blkdiscard \- discard sectors on a device +.SH "SYNOPSIS" +.sp +\fBblkdiscard\fP [options] [\fB\-o\fP \fIoffset\fP] [\fB\-l\fP \fIlength\fP] \fIdevice\fP +.SH "DESCRIPTION" +.sp +\fBblkdiscard\fP is used to discard device sectors. This is useful for solid\-state drivers (SSDs) and thinly\-provisioned storage. Unlike \fBfstrim\fP(8), this command is used directly on the block device. +.sp +By default, \fBblkdiscard\fP will discard all blocks on the device. Options may be used to modify this behavior based on range or size, as explained below. +.sp +The \fIdevice\fP argument is the pathname of the block device. +.sp +\fBWARNING: All data in the discarded region on the device will be lost!\fP +.SH "OPTIONS" +.sp +The \fIoffset\fP and \fIlength\fP arguments may be followed by the multiplicative suffixes KiB (=1024), MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB" is optional, e.g., "K" has the same meaning as "KiB") or the suffixes KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB and YB. +.sp +\fB\-f\fP, \fB\-\-force\fP +.RS 4 +Disable all checking. Since v2.36 the block device is open in exclusive mode (\fBO_EXCL\fP) by default to avoid collision with mounted filesystem or another kernel subsystem. The \fB\-\-force\fP option disables the exclusive access mode. +.RE +.sp +\fB\-o\fP, \fB\-\-offset\fP \fIoffset\fP +.RS 4 +Byte offset into the device from which to start discarding. The provided value must be aligned to the device sector size. The default value is zero. +.RE +.sp +\fB\-l\fP, \fB\-\-length\fP \fIlength\fP +.RS 4 +The number of bytes to discard (counting from the starting point). The provided value must be aligned to the device sector size. If the specified value extends past the end of the device, \fBblkdiscard\fP will stop at the device size boundary. The default value extends to the end of the device. +.RE +.sp +\fB\-p\fP, \fB\-\-step\fP \fIlength\fP +.RS 4 +The number of bytes to discard within one iteration. The default is to discard all by one ioctl call. +.RE +.sp +\fB\-s\fP, \fB\-\-secure\fP +.RS 4 +Perform a secure discard. A secure discard is the same as a regular discard except that all copies of the discarded blocks that were possibly created by garbage collection must also be erased. This requires support from the device. +.RE +.sp +\fB\-z\fP, \fB\-\-zeroout\fP +.RS 4 +Zero\-fill rather than discard. +.RE +.sp +\fB\-v\fP, \fB\-\-verbose\fP +.RS 4 +Display the aligned values of \fIoffset\fP and \fIlength\fP. If the \fB\-\-step\fP option is specified, it prints the discard progress every second. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "AUTHORS" +.sp +.MTO "lczerner\(atredhat.com" "Lukas Czerner" "" +.SH "SEE ALSO" +.sp +\fBfstrim\fP(8) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBblkdiscard\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/blkdiscard.8.adoc b/sys-utils/blkdiscard.8.adoc new file mode 100644 index 0000000..8368743 --- /dev/null +++ b/sys-utils/blkdiscard.8.adoc @@ -0,0 +1,68 @@ +//po4a: entry man manual += blkdiscard(8) +:doctype: manpage +:man manual: System Administration +:man source: util-linux {release-version} +:page-layout: base +:command: blkdiscard + +== NAME + +blkdiscard - discard sectors on a device + +== SYNOPSIS + +*blkdiscard* [options] [*-o* _offset_] [*-l* _length_] _device_ + +== DESCRIPTION + +*blkdiscard* is used to discard device sectors. This is useful for solid-state drivers (SSDs) and thinly-provisioned storage. Unlike *fstrim*(8), this command is used directly on the block device. + +By default, *blkdiscard* will discard all blocks on the device. Options may be used to modify this behavior based on range or size, as explained below. + +The _device_ argument is the pathname of the block device. + +*WARNING: All data in the discarded region on the device will be lost!* + +== OPTIONS + +The _offset_ and _length_ arguments may be followed by the multiplicative suffixes KiB (=1024), MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB" is optional, e.g., "K" has the same meaning as "KiB") or the suffixes KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB and YB. + +*-f*, *--force*:: +Disable all checking. Since v2.36 the block device is open in exclusive mode (*O_EXCL*) by default to avoid collision with mounted filesystem or another kernel subsystem. The *--force* option disables the exclusive access mode. + +*-o*, *--offset* _offset_:: +Byte offset into the device from which to start discarding. The provided value must be aligned to the device sector size. The default value is zero. + +*-l*, *--length* _length_:: +The number of bytes to discard (counting from the starting point). The provided value must be aligned to the device sector size. If the specified value extends past the end of the device, *blkdiscard* will stop at the device size boundary. The default value extends to the end of the device. + +*-p*, *--step* _length_:: +The number of bytes to discard within one iteration. The default is to discard all by one ioctl call. + +*-s*, *--secure*:: +Perform a secure discard. A secure discard is the same as a regular discard except that all copies of the discarded blocks that were possibly created by garbage collection must also be erased. This requires support from the device. + +*-z*, *--zeroout*:: +Zero-fill rather than discard. + +*-v*, *--verbose*:: +Display the aligned values of _offset_ and _length_. If the *--step* option is specified, it prints the discard progress every second. + +include::man-common/help-version.adoc[] + +== AUTHORS + +mailto:lczerner@redhat.com[Lukas Czerner] + +== SEE ALSO + +*fstrim*(8) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/blkdiscard.c b/sys-utils/blkdiscard.c new file mode 100644 index 0000000..38240e8 --- /dev/null +++ b/sys-utils/blkdiscard.c @@ -0,0 +1,327 @@ +/* + * blkdiscard.c -- discard the part (or whole) of the block device. + * + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. + * Written by Lukas Czerner <lczerner@redhat.com> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * This program uses BLKDISCARD ioctl to discard part or the whole block + * device if the device supports it. You can specify range (start and + * length) to be discarded, or simply discard the whole device. + */ + + +#include <string.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <fcntl.h> +#include <limits.h> +#include <getopt.h> +#include <time.h> + +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <linux/fs.h> + +#ifdef HAVE_LIBBLKID +# include <blkid.h> +#endif + +#include "nls.h" +#include "strutils.h" +#include "c.h" +#include "closestream.h" +#include "monotonic.h" + +#ifndef BLKDISCARD +# define BLKDISCARD _IO(0x12,119) +#endif + +#ifndef BLKSECDISCARD +# define BLKSECDISCARD _IO(0x12,125) +#endif + +#ifndef BLKZEROOUT +# define BLKZEROOUT _IO(0x12,127) +#endif + +enum { + ACT_DISCARD = 0, /* default */ + ACT_ZEROOUT, + ACT_SECURE +}; + +static void print_stats(int act, char *path, uint64_t stats[]) +{ + switch (act) { + case ACT_ZEROOUT: + printf(_("%s: Zero-filled %" PRIu64 " bytes from the offset %" PRIu64"\n"), \ + path, stats[1], stats[0]); + break; + case ACT_SECURE: + case ACT_DISCARD: + printf(_("%s: Discarded %" PRIu64 " bytes from the offset %" PRIu64"\n"), \ + path, stats[1], stats[0]); + break; + } +} + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + fputs(USAGE_HEADER, out); + fprintf(out, + _(" %s [options] <device>\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Discard the content of sectors on a device.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -f, --force disable all checking\n"), out); + fputs(_(" -o, --offset <num> offset in bytes to discard from\n"), out); + fputs(_(" -l, --length <num> length of bytes to discard from the offset\n"), out); + fputs(_(" -p, --step <num> size of the discard iterations within the offset\n"), out); + fputs(_(" -s, --secure perform secure discard\n"), out); + fputs(_(" -z, --zeroout zero-fill rather than discard\n"), out); + fputs(_(" -v, --verbose print aligned length and offset\n"), out); + + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(21)); + + fputs(USAGE_ARGUMENTS, out); + printf(USAGE_ARG_SIZE(_("<num>"))); + + printf(USAGE_MAN_TAIL("blkdiscard(8)")); + exit(EXIT_SUCCESS); +} + +#ifdef HAVE_LIBBLKID +/* + * Check existing signature on the open fd + * Returns 0 signature found + * 1 no signature + * <0 error + */ +static int probe_device(int fd, char *path) +{ + const char *type; + blkid_probe pr = NULL; + int ret = -1; + + pr = blkid_new_probe(); + if (!pr || blkid_probe_set_device(pr, fd, 0, 0)) + return ret; + + blkid_probe_enable_superblocks(pr, TRUE); + blkid_probe_enable_partitions(pr, TRUE); + + ret = blkid_do_fullprobe(pr); + if (ret) + goto out; + + if (!blkid_probe_lookup_value(pr, "TYPE", &type, NULL)) { + warnx("%s contains existing file system (%s).",path ,type); + } else if (!blkid_probe_lookup_value(pr, "PTTYPE", &type, NULL)) { + warnx("%s contains existing partition (%s).",path ,type); + } else { + warnx("%s contains existing signature.", path); + } + +out: + blkid_free_probe(pr); + return ret; +} +#endif /* HAVE_LIBBLKID */ + +int main(int argc, char **argv) +{ + char *path; + int c, fd, verbose = 0, secsize, force = 0; + uint64_t end, blksize, step, range[2], stats[2]; + struct stat sb; + struct timeval now = { 0 }, last = { 0 }; + int act = ACT_DISCARD; + + static const struct option longopts[] = { + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, 'V' }, + { "offset", required_argument, NULL, 'o' }, + { "force", no_argument, NULL, 'f' }, + { "length", required_argument, NULL, 'l' }, + { "step", required_argument, NULL, 'p' }, + { "secure", no_argument, NULL, 's' }, + { "verbose", no_argument, NULL, 'v' }, + { "zeroout", no_argument, NULL, 'z' }, + { NULL, 0, NULL, 0 } + }; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + range[0] = 0; + range[1] = ULLONG_MAX; + step = 0; + + while ((c = getopt_long(argc, argv, "hfVsvo:l:p:z", longopts, NULL)) != -1) { + switch(c) { + case 'f': + force = 1; + break; + case 'l': + range[1] = strtosize_or_err(optarg, + _("failed to parse length")); + break; + case 'o': + range[0] = strtosize_or_err(optarg, + _("failed to parse offset")); + break; + case 'p': + step = strtosize_or_err(optarg, + _("failed to parse step")); + break; + case 's': + act = ACT_SECURE; + break; + case 'v': + verbose = 1; + break; + case 'z': + act = ACT_ZEROOUT; + break; + + case 'h': + usage(); + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if (optind == argc) + errx(EXIT_FAILURE, _("no device specified")); + + path = argv[optind++]; + + if (optind != argc) { + warnx(_("unexpected number of arguments")); + errtryhelp(EXIT_FAILURE); + } + + fd = open(path, O_RDWR | (force ? 0 : O_EXCL)); + if (fd < 0) + err(EXIT_FAILURE, _("cannot open %s"), path); + + if (fstat(fd, &sb) == -1) + err(EXIT_FAILURE, _("stat of %s failed"), path); + if (!S_ISBLK(sb.st_mode)) + errx(EXIT_FAILURE, _("%s: not a block device"), path); + + if (ioctl(fd, BLKGETSIZE64, &blksize)) + err(EXIT_FAILURE, _("%s: BLKGETSIZE64 ioctl failed"), path); + if (ioctl(fd, BLKSSZGET, &secsize)) + err(EXIT_FAILURE, _("%s: BLKSSZGET ioctl failed"), path); + + /* check offset alignment to the sector size */ + if (range[0] % secsize) + errx(EXIT_FAILURE, _("%s: offset %" PRIu64 " is not aligned " + "to sector size %i"), path, range[0], secsize); + + /* is the range end behind the end of the device ?*/ + if (range[0] > blksize) + errx(EXIT_FAILURE, _("%s: offset is greater than device size"), path); + end = range[0] + range[1]; + if (end < range[0] || end > blksize) + end = blksize; + + range[1] = (step > 0) ? step : end - range[0]; + + /* check length alignment to the sector size */ + if (range[1] % secsize) + errx(EXIT_FAILURE, _("%s: length %" PRIu64 " is not aligned " + "to sector size %i"), path, range[1], secsize); +#ifdef HAVE_LIBBLKID + if (force) + warnx(_("Operation forced, data will be lost!")); + else { + /* Check for existing signatures on the device */ + switch(probe_device(fd, path)) { + case 0: /* signature detected */ + /* + * Only require force in interactive mode to avoid + * breaking existing scripts + */ + if (isatty(STDIN_FILENO)) { + errx(EXIT_FAILURE, + _("This is destructive operation, data will " \ + "be lost! Use the -f option to override.")); + } + break; + case 1: /* no signature */ + break; + default: /* error */ + err(EXIT_FAILURE, _("failed to probe the device")); + break; + } + } +#endif /* HAVE_LIBBLKID */ + + stats[0] = range[0], stats[1] = 0; + gettime_monotonic(&last); + + for (/* nothing */; range[0] < end; range[0] += range[1]) { + if (range[0] + range[1] > end) + range[1] = end - range[0]; + + switch (act) { + case ACT_ZEROOUT: + if (ioctl(fd, BLKZEROOUT, &range)) + err(EXIT_FAILURE, _("%s: BLKZEROOUT ioctl failed"), path); + break; + case ACT_SECURE: + if (ioctl(fd, BLKSECDISCARD, &range)) + err(EXIT_FAILURE, _("%s: BLKSECDISCARD ioctl failed"), path); + break; + case ACT_DISCARD: + if (ioctl(fd, BLKDISCARD, &range)) + err(EXIT_FAILURE, _("%s: BLKDISCARD ioctl failed"), path); + break; + } + + stats[1] += range[1]; + + /* reporting progress at most once per second */ + if (verbose && step) { + gettime_monotonic(&now); + if (now.tv_sec > last.tv_sec && + (now.tv_usec >= last.tv_usec || now.tv_sec > last.tv_sec + 1)) { + print_stats(act, path, stats); + stats[0] += stats[1], stats[1] = 0; + last = now; + } + } + } + + if (verbose && stats[1]) + print_stats(act, path, stats); + + close(fd); + return EXIT_SUCCESS; +} diff --git a/sys-utils/blkzone.8 b/sys-utils/blkzone.8 new file mode 100644 index 0000000..45e8c3e --- /dev/null +++ b/sys-utils/blkzone.8 @@ -0,0 +1,250 @@ +'\" t +.\" Title: blkzone +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: System Administration +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "BLKZONE" "8" "2022-05-11" "util\-linux 2.38.1" "System Administration" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +blkzone \- run zone command on a device +.SH "SYNOPSIS" +.sp +\fBblkzone\fP \fIcommand\fP [options] \fIdevice\fP +.SH "DESCRIPTION" +.sp +\fBblkzone\fP is used to run zone command on device that support the Zoned Block Commands (ZBC) or Zoned\-device ATA Commands (ZAC). The zones to operate on can be specified using the offset, count and length options. +.sp +The \fIdevice\fP argument is the pathname of the block device. +.SH "COMMANDS" +.SS "report" +.sp +The command \fBblkzone report\fP is used to report device zone information. +.sp +By default, the command will report all zones from the start of the block device. Options may be used to modify this behavior, changing the starting zone or the size of the report, as explained below. +.sp +Report output: +.TS +allbox tab(:); +lt lt. +T{ +.sp +start +T}:T{ +.sp +Zone start sector +T} +T{ +.sp +len +T}:T{ +.sp +Zone length in number of sectors +T} +T{ +.sp +cap +T}:T{ +.sp +Zone capacity in number of sectors +T} +T{ +.sp +wptr +T}:T{ +.sp +Zone write pointer position +T} +T{ +.sp +reset +T}:T{ +.sp +Reset write pointer recommended +T} +T{ +.sp +non\-seq +T}:T{ +.sp +Non\-sequential write resources active +T} +T{ +.sp +cond +T}:T{ +.sp +Zone condition +T} +T{ +.sp +type +T}:T{ +.sp +Zone type +T} +.TE +.sp +.sp +Zone conditions: +.TS +allbox tab(:); +lt lt. +T{ +.sp +cl +T}:T{ +.sp +Closed +T} +T{ +.sp +nw +T}:T{ +.sp +Not write pointer +T} +T{ +.sp +em +T}:T{ +.sp +Empty +T} +T{ +.sp +fu +T}:T{ +.sp +Full +T} +T{ +.sp +oe +T}:T{ +.sp +Explicitly opened +T} +T{ +.sp +oi +T}:T{ +.sp +Implicitly opened +T} +T{ +.sp +ol +T}:T{ +.sp +Offline +T} +T{ +.sp +ro +T}:T{ +.sp +Read only +T} +T{ +.sp +x? +T}:T{ +.sp +Reserved conditions (should not be reported) +T} +.TE +.sp +.SS "capacity" +.sp +The command \fBblkzone capacity\fP is used to report device capacity information. +.sp +By default, the command will report the sum, in number of sectors, of all zone capacities on the device. Options may be used to modify this behavior, changing the starting zone or the size of the report, as explained below. +.SS "reset" +.sp +The command \fBblkzone reset\fP is used to reset one or more zones. Unlike \fBsg_reset_wp\fP(8), this command operates from the block layer and can reset a range of zones. +.SS "open" +.sp +The command \fBblkzone open\fP is used to explicitly open one or more zones. Unlike \fBsg_zone\fP(8), open action, this command operates from the block layer and can open a range of zones. +.SS "close" +.sp +The command \fBblkzone close\fP is used to close one or more zones. Unlike \fBsg_zone\fP(8), close action, this command operates from the block layer and can close a range of zones. +.SS "finish" +.sp +The command \fBblkzone finish\fP is used to finish (transition to full condition) one or more zones. Unlike \fBsg_zone\fP(8), finish action, this command operates from the block layer and can finish a range of zones. +.sp +By default, the \fBreset\fP, \fBopen\fP, \fBclose\fP and \fBfinish\fP commands will operate from the zone at device sector 0 and operate on all zones. Options may be used to modify this behavior as explained below. +.SH "OPTIONS" +.sp +The \fIoffset\fP and \fIlength\fP option arguments may be followed by the multiplicative suffixes KiB (=1024), MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB" is optional, e.g., "K" has the same meaning as "KiB") or the suffixes KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB and YB. Additionally, the 0x prefix can be used to specify \fIoffset\fP and \fIlength\fP in hex. +.sp +\fB\-o\fP, \fB\-\-offset\fP \fIsector\fP +.RS 4 +The starting zone specified as a sector offset. The provided offset in sector units (512 bytes) should match the start of a zone. The default value is zero. +.RE +.sp +\fB\-l\fP, \fB\-\-length\fP \fIsectors\fP +.RS 4 +The maximum number of sectors the command should operate on. The default value is the number of sectors remaining after \fIoffset\fP. This option cannot be used together with the option \fB\-\-count\fP. +.RE +.sp +\fB\-c\fP, \fB\-\-count\fP \fIcount\fP +.RS 4 +The maximum number of zones the command should operate on. The default value is the number of zones starting from \fIoffset\fP. This option cannot be used together with the option \fB\-\-length\fP. +.RE +.sp +\fB\-f\fP, \fB\-\-force\fP +.RS 4 +Enforce commands to change zone status on block devices used by the system. +.RE +.sp +\fB\-v\fP, \fB\-\-verbose\fP +.RS 4 +Display the number of zones returned in the report or the range of sectors reset. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "AUTHORS" +.sp +.MTO "shaun\(attancheff.com" "Shaun Tancheff" "," +.MTO "kzak\(atredhat.com" "Karel Zak" "" +.SH "SEE ALSO" +.sp +\fBsg_rep_zones\fP(8) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBblkzone\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/blkzone.8.adoc b/sys-utils/blkzone.8.adoc new file mode 100644 index 0000000..8456b5e --- /dev/null +++ b/sys-utils/blkzone.8.adoc @@ -0,0 +1,118 @@ +//po4a: entry man manual += blkzone(8) +:doctype: manpage +:man manual: System Administration +:man source: util-linux {release-version} +:page-layout: base +:command: blkzone + +== NAME + +blkzone - run zone command on a device + +== SYNOPSIS + +*blkzone* _command_ [options] _device_ + +== DESCRIPTION + +*blkzone* is used to run zone command on device that support the Zoned Block Commands (ZBC) or Zoned-device ATA Commands (ZAC). The zones to operate on can be specified using the offset, count and length options. + +The _device_ argument is the pathname of the block device. + +== COMMANDS + +=== report + +The command *blkzone report* is used to report device zone information. + +By default, the command will report all zones from the start of the block device. Options may be used to modify this behavior, changing the starting zone or the size of the report, as explained below. + +Report output: +[cols=",",] +|=== +|start |Zone start sector +|len |Zone length in number of sectors +|cap |Zone capacity in number of sectors +|wptr |Zone write pointer position +|reset |Reset write pointer recommended +|non-seq |Non-sequential write resources active +|cond |Zone condition +|type |Zone type +|=== + +Zone conditions: +[cols=",",] +|=== +|cl |Closed +|nw |Not write pointer +|em |Empty +|fu |Full +|oe |Explicitly opened +|oi |Implicitly opened +|ol |Offline +|ro |Read only +|x? |Reserved conditions (should not be reported) +|=== + +=== capacity + +The command *blkzone capacity* is used to report device capacity information. + +By default, the command will report the sum, in number of sectors, of all zone capacities on the device. Options may be used to modify this behavior, changing the starting zone or the size of the report, as explained below. + +=== reset + +The command *blkzone reset* is used to reset one or more zones. Unlike *sg_reset_wp*(8), this command operates from the block layer and can reset a range of zones. + +=== open + +The command *blkzone open* is used to explicitly open one or more zones. Unlike *sg_zone*(8), open action, this command operates from the block layer and can open a range of zones. + +=== close + +The command *blkzone close* is used to close one or more zones. Unlike *sg_zone*(8), close action, this command operates from the block layer and can close a range of zones. + +=== finish + +The command *blkzone finish* is used to finish (transition to full condition) one or more zones. Unlike *sg_zone*(8), finish action, this command operates from the block layer and can finish a range of zones. + +By default, the *reset*, *open*, *close* and *finish* commands will operate from the zone at device sector 0 and operate on all zones. Options may be used to modify this behavior as explained below. + +== OPTIONS + +The _offset_ and _length_ option arguments may be followed by the multiplicative suffixes KiB (=1024), MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB" is optional, e.g., "K" has the same meaning as "KiB") or the suffixes KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB and YB. Additionally, the 0x prefix can be used to specify _offset_ and _length_ in hex. + +*-o*, *--offset* _sector_:: +The starting zone specified as a sector offset. The provided offset in sector units (512 bytes) should match the start of a zone. The default value is zero. + +*-l*, *--length* _sectors_:: +The maximum number of sectors the command should operate on. The default value is the number of sectors remaining after _offset_. This option cannot be used together with the option *--count*. + +*-c*, *--count* _count_:: +The maximum number of zones the command should operate on. The default value is the number of zones starting from _offset_. This option cannot be used together with the option *--length*. + +*-f*, *--force*:: +Enforce commands to change zone status on block devices used by the system. + +*-v*, *--verbose*:: +Display the number of zones returned in the report or the range of sectors reset. + +include::man-common/help-version.adoc[] + +== AUTHORS + +mailto:shaun@tancheff.com[Shaun Tancheff], +mailto:kzak@redhat.com[Karel Zak] + +== SEE ALSO + +*sg_rep_zones*(8) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/blkzone.c b/sys-utils/blkzone.c new file mode 100644 index 0000000..04e2660 --- /dev/null +++ b/sys-utils/blkzone.c @@ -0,0 +1,506 @@ +/* + * blkzone.c -- the block device zone commands + * + * Copyright (C) 2015,2016 Seagate Technology PLC + * Written by Shaun Tancheff <shaun.tancheff@seagate.com> + * + * Copyright (C) 2017 Karel Zak <kzak@redhat.com> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <string.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <fcntl.h> +#include <limits.h> +#include <getopt.h> +#include <time.h> + +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <linux/fs.h> +#include <linux/blkzoned.h> + +#include "nls.h" +#include "strutils.h" +#include "xalloc.h" +#include "c.h" +#include "closestream.h" +#include "blkdev.h" +#include "sysfs.h" +#include "optutils.h" + +/* + * These ioctls are defined in linux/blkzoned.h starting with kernel 5.5. + */ +#ifndef BLKOPENZONE +#define BLKOPENZONE _IOW(0x12, 134, struct blk_zone_range) +#endif +#ifndef BLKCLOSEZONE +#define BLKCLOSEZONE _IOW(0x12, 135, struct blk_zone_range) +#endif +#ifndef BLKFINISHZONE +#define BLKFINISHZONE _IOW(0x12, 136, struct blk_zone_range) +#endif + +struct blkzone_control; + +static int blkzone_report(struct blkzone_control *ctl); +static int blkzone_action(struct blkzone_control *ctl); + +struct blkzone_command { + const char *name; + int (*handler)(struct blkzone_control *); + unsigned long ioctl_cmd; + const char *ioctl_name; + const char *help; +}; + +struct blkzone_control { + const char *devname; + const struct blkzone_command *command; + + uint64_t total_sectors; + int secsize; + + uint64_t offset; + uint64_t length; + uint32_t count; + + unsigned int force : 1; + unsigned int verbose : 1; +}; + +static const struct blkzone_command commands[] = { + { + .name = "report", + .handler = blkzone_report, + .help = N_("Report zone information about the given device") + },{ + .name = "capacity", + .handler = blkzone_report, + .help = N_("Report sum of zone capacities for the given device") + },{ + .name = "reset", + .handler = blkzone_action, + .ioctl_cmd = BLKRESETZONE, + .ioctl_name = "BLKRESETZONE", + .help = N_("Reset a range of zones.") + },{ + .name = "open", + .handler = blkzone_action, + .ioctl_cmd = BLKOPENZONE, + .ioctl_name = "BLKOPENZONE", + .help = N_("Open a range of zones.") + },{ + .name = "close", + .handler = blkzone_action, + .ioctl_cmd = BLKCLOSEZONE, + .ioctl_name = "BLKCLOSEZONE", + .help = N_("Close a range of zones.") + },{ + .name = "finish", + .handler = blkzone_action, + .ioctl_cmd = BLKFINISHZONE, + .ioctl_name = "BLKFINISHZONE", + .help = N_("Set a range of zones to Full.") + } +}; + +static const struct blkzone_command *name_to_command(const char *name) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(commands); i++) { + if (strcmp(commands[i].name, name) == 0) + return &commands[i]; + } + + return NULL; +} + +static int init_device(struct blkzone_control *ctl, int mode) +{ + struct stat sb; + int fd; + + fd = open(ctl->devname, mode); + if (fd < 0) + err(EXIT_FAILURE, _("cannot open %s"), ctl->devname); + + if (fstat(fd, &sb) == -1) + err(EXIT_FAILURE, _("stat of %s failed"), ctl->devname); + if (!S_ISBLK(sb.st_mode)) + errx(EXIT_FAILURE, _("%s: not a block device"), ctl->devname); + + if (blkdev_get_sectors(fd, (unsigned long long *) &ctl->total_sectors)) + err(EXIT_FAILURE, _("%s: blkdev_get_sectors ioctl failed"), ctl->devname); + + if (blkdev_get_sector_size(fd, &ctl->secsize)) + err(EXIT_FAILURE, _("%s: BLKSSZGET ioctl failed"), ctl->devname); + + return fd; +} + +/* + * Get the device zone size indicated by chunk sectors). + */ +static unsigned long blkdev_chunk_sectors(const char *dname) +{ + struct path_cxt *pc = NULL; + dev_t devno = sysfs_devname_to_devno(dname); + dev_t disk; + uint64_t sz = 0; + int rc; + + /* + * Mapping /dev/sdXn -> /sys/block/sdX to read the chunk_size entry. + * This method masks off the partition specified by the minor device + * component. + */ + pc = ul_new_sysfs_path(devno, NULL, NULL); + if (!pc) + return 0; + + rc = sysfs_blkdev_get_wholedisk(pc, NULL, 0, &disk); + if (rc != 0) + goto done; + + /* if @pc is not while-disk device, switch to disk */ + if (devno != disk) { + rc = sysfs_blkdev_init_path(pc, disk, NULL); + if (rc != 0) + goto done; + } + + rc = ul_path_read_u64(pc, &sz, "queue/chunk_sectors"); +done: + ul_unref_path(pc); + return rc == 0 ? sz : 0; +} + +#if HAVE_DECL_BLK_ZONE_REP_CAPACITY +#define has_zone_capacity(zi) ((zi)->flags & BLK_ZONE_REP_CAPACITY) +#define zone_capacity(z) (z)->capacity +#else +#define has_zone_capacity(zi) (false) +#define zone_capacity(z) (z)->len +#endif + +/* + * blkzone report + */ +#define DEF_REPORT_LEN (1U << 12) /* 4k zones per report (256k kzalloc) */ + +static const char *type_text[] = { + "RESERVED", + "CONVENTIONAL", + "SEQ_WRITE_REQUIRED", + "SEQ_WRITE_PREFERRED", +}; + +static const char *condition_str[] = { + "nw", /* Not write pointer */ + "em", /* Empty */ + "oi", /* Implicitly opened */ + "oe", /* Explicitly opened */ + "cl", /* Closed */ + "x5", "x6", "x7", "x8", "x9", "xA", "xB", "xC", /* xN: reserved */ + "ro", /* Read only */ + "fu", /* Full */ + "of" /* Offline */ +}; + +static int blkzone_report(struct blkzone_control *ctl) +{ + bool only_capacity_sum = !strcmp(ctl->command->name, "capacity"); + uint64_t capacity_sum = 0; + struct blk_zone_report *zi; + unsigned long zonesize; + uint32_t i, nr_zones; + int fd; + + fd = init_device(ctl, O_RDONLY); + + if (ctl->offset >= ctl->total_sectors) + errx(EXIT_FAILURE, + _("%s: offset is greater than or equal to device size"), ctl->devname); + + zonesize = blkdev_chunk_sectors(ctl->devname); + if (!zonesize) + errx(EXIT_FAILURE, _("%s: unable to determine zone size"), ctl->devname); + + if (ctl->count) + nr_zones = ctl->count; + else if (ctl->length) + nr_zones = (ctl->length + zonesize - 1) / zonesize; + else + nr_zones = 1 + (ctl->total_sectors - ctl->offset) / zonesize; + + zi = xmalloc(sizeof(struct blk_zone_report) + + (DEF_REPORT_LEN * sizeof(struct blk_zone))); + + while (nr_zones && ctl->offset < ctl->total_sectors) { + + zi->nr_zones = min(nr_zones, DEF_REPORT_LEN); + zi->sector = ctl->offset; + + if (ioctl(fd, BLKREPORTZONE, zi) == -1) + err(EXIT_FAILURE, _("%s: BLKREPORTZONE ioctl failed"), ctl->devname); + + if (ctl->verbose) + printf(_("Found %d zones from 0x%"PRIx64"\n"), + zi->nr_zones, ctl->offset); + + if (!zi->nr_zones) + break; + + for (i = 0; i < zi->nr_zones; i++) { +/* + * blk_zone_report hasn't been packed since https://github.com/torvalds/linux/commit/b3e7e7d2d668de0102264302a4d10dd9d4438a42 + * was merged. See https://github.com/util-linux/util-linux/issues/1083 + */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Waddress-of-packed-member" + const struct blk_zone *entry = &zi->zones[i]; +#pragma GCC diagnostic pop + unsigned int type = entry->type; + uint64_t start = entry->start; + uint64_t wp = entry->wp; + uint8_t cond = entry->cond; + uint64_t len = entry->len; + uint64_t cap; + + if (!len) { + nr_zones = 0; + break; + } + + if (has_zone_capacity(zi)) + cap = zone_capacity(entry); + else + cap = entry->len; + + if (only_capacity_sum) { + capacity_sum += cap; + } else if (has_zone_capacity(zi)) { + printf(_(" start: 0x%09"PRIx64", len 0x%06"PRIx64 + ", cap 0x%06"PRIx64", wptr 0x%06"PRIx64 + " reset:%u non-seq:%u, zcond:%2u(%s) [type: %u(%s)]\n"), + start, len, cap, (type == 0x1) ? 0 : wp - start, + entry->reset, entry->non_seq, + cond, condition_str[cond & (ARRAY_SIZE(condition_str) - 1)], + type, type_text[type]); + } else { + printf(_(" start: 0x%09"PRIx64", len 0x%06"PRIx64 + ", wptr 0x%06"PRIx64 + " reset:%u non-seq:%u, zcond:%2u(%s) [type: %u(%s)]\n"), + start, len, (type == 0x1) ? 0 : wp - start, + entry->reset, entry->non_seq, + cond, condition_str[cond & (ARRAY_SIZE(condition_str) - 1)], + type, type_text[type]); + } + + nr_zones--; + ctl->offset = start + len; + } + + } + + if (only_capacity_sum) + printf(_("0x%09"PRIx64"\n"), capacity_sum); + + free(zi); + close(fd); + + return 0; +} + +/* + * blkzone reset, open, close, and finish. + */ +static int blkzone_action(struct blkzone_control *ctl) +{ + struct blk_zone_range za = { .sector = 0 }; + unsigned long zonesize; + uint64_t zlen; + int fd; + + zonesize = blkdev_chunk_sectors(ctl->devname); + if (!zonesize) + errx(EXIT_FAILURE, _("%s: unable to determine zone size"), ctl->devname); + + fd = init_device(ctl, O_WRONLY | (ctl->force ? 0 : O_EXCL)); + + if (ctl->offset & (zonesize - 1)) + errx(EXIT_FAILURE, _("%s: offset %" PRIu64 " is not aligned " + "to zone size %lu"), + ctl->devname, ctl->offset, zonesize); + + if (ctl->offset > ctl->total_sectors) + errx(EXIT_FAILURE, _("%s: offset is greater than device size"), ctl->devname); + + if (ctl->count) + zlen = ctl->count * zonesize; + else if (ctl->length) + zlen = ctl->length; + else + zlen = ctl->total_sectors; + if (ctl->offset + zlen > ctl->total_sectors) + zlen = ctl->total_sectors - ctl->offset; + + if (ctl->length && + (zlen & (zonesize - 1)) && + ctl->offset + zlen != ctl->total_sectors) + errx(EXIT_FAILURE, _("%s: number of sectors %" PRIu64 " is not aligned " + "to zone size %lu"), + ctl->devname, ctl->length, zonesize); + + za.sector = ctl->offset; + za.nr_sectors = zlen; + + if (ioctl(fd, ctl->command->ioctl_cmd, &za) == -1) + err(EXIT_FAILURE, _("%s: %s ioctl failed"), + ctl->devname, ctl->command->ioctl_name); + else if (ctl->verbose) + printf(_("%s: successful %s of zones in range from %" PRIu64 ", to %" PRIu64), + ctl->devname, + ctl->command->name, + ctl->offset, + ctl->offset + zlen); + close(fd); + return 0; +} + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + size_t i; + + fputs(USAGE_HEADER, out); + fprintf(out, _(" %s <command> [options] <device>\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Run zone command on the given block device.\n"), out); + + fputs(USAGE_COMMANDS, out); + for (i = 0; i < ARRAY_SIZE(commands); i++) + fprintf(out, " %-11s %s\n", commands[i].name, _(commands[i].help)); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -o, --offset <sector> start sector of zone to act (in 512-byte sectors)\n"), out); + fputs(_(" -l, --length <sectors> maximum sectors to act (in 512-byte sectors)\n"), out); + fputs(_(" -c, --count <number> maximum number of zones\n"), out); + fputs(_(" -f, --force enforce on block devices used by the system\n"), out); + fputs(_(" -v, --verbose display more details\n"), out); + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(24)); + + fputs(USAGE_ARGUMENTS, out); + printf(USAGE_ARG_SIZE(_("<sector> and <sectors>"))); + + printf(USAGE_MAN_TAIL("blkzone(8)")); + exit(EXIT_SUCCESS); +} + +int main(int argc, char **argv) +{ + int c; + struct blkzone_control ctl = { + .devname = NULL + }; + + static const struct option longopts[] = { + { "help", no_argument, NULL, 'h' }, + { "count", required_argument, NULL, 'c' }, /* max #of zones to operate on */ + { "length", required_argument, NULL, 'l' }, /* max of sectors to operate on */ + { "offset", required_argument, NULL, 'o' }, /* starting LBA */ + { "force", no_argument, NULL, 'f' }, + { "verbose", no_argument, NULL, 'v' }, + { "version", no_argument, NULL, 'V' }, + { NULL, 0, NULL, 0 } + }; + static const ul_excl_t excl[] = { /* rows and cols in ASCII order */ + { 'c', 'l' }, + { 0 } + }; + int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; + + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + if (argc >= 2 && *argv[1] != '-') { + ctl.command = name_to_command(argv[1]); + if (!ctl.command) + errx(EXIT_FAILURE, _("%s is not valid command name"), argv[1]); + argv++; + argc--; + } + + while ((c = getopt_long(argc, argv, "hc:l:o:fvV", longopts, NULL)) != -1) { + + err_exclusive_options(c, longopts, excl, excl_st); + + switch (c) { + case 'c': + ctl.count = strtou32_or_err(optarg, + _("failed to parse number of zones")); + break; + case 'l': + ctl.length = strtosize_or_err(optarg, + _("failed to parse number of sectors")); + break; + case 'o': + ctl.offset = strtosize_or_err(optarg, + _("failed to parse zone offset")); + break; + case 'f': + ctl.force = 1; + break; + case 'v': + ctl.verbose = 1; + break; + + case 'h': + usage(); + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if (!ctl.command) + errx(EXIT_FAILURE, _("no command specified")); + + if (optind == argc) + errx(EXIT_FAILURE, _("no device specified")); + ctl.devname = argv[optind++]; + + if (optind != argc) + errx(EXIT_FAILURE,_("unexpected number of arguments")); + + if (ctl.command->handler(&ctl) < 0) + return EXIT_FAILURE; + + return EXIT_SUCCESS; + +} diff --git a/sys-utils/chcpu.8 b/sys-utils/chcpu.8 new file mode 100644 index 0000000..0a2c1dc --- /dev/null +++ b/sys-utils/chcpu.8 @@ -0,0 +1,129 @@ +'\" t +.\" Title: chcpu +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: System Administration +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "CHCPU" "8" "2022-05-11" "util\-linux 2.38.1" "System Administration" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +chcpu \- configure CPUs +.SH "SYNOPSIS" +.sp +\fBchcpu\fP \fB\-c\fP|\fB\-d\fP|\fB\-e\fP|\fB\-g\fP \fIcpu\-list\fP +.sp +\fBchcpu\fP \fB\-p\fP \fImode\fP +.sp +\fBchcpu\fP \fB\-r\fP|\fB\-h\fP|\fB\-V\fP +.SH "DESCRIPTION" +.sp +\fBchcpu\fP can modify the state of CPUs. It can enable or disable CPUs, scan for new CPUs, change the CPU dispatching \fImode\fP of the underlying hypervisor, and request CPUs from the hypervisor (configure) or return CPUs to the hypervisor (deconfigure). +.sp +Some options have a \fIcpu\-list\fP argument. Use this argument to specify a comma\-separated list of CPUs. The list can contain individual CPU addresses or ranges of addresses. For example, \fB0,5,7,9\-11\fP makes the command applicable to the CPUs with the addresses 0, 5, 7, 9, 10, and 11. +.SH "OPTIONS" +.sp +\fB\-c\fP, \fB\-\-configure\fP \fIcpu\-list\fP +.RS 4 +Configure the specified CPUs. Configuring a CPU means that the hypervisor takes a CPU from the CPU pool and assigns it to the virtual hardware on which your kernel runs. +.RE +.sp +\fB\-d\fP, \fB\-\-disable\fP \fIcpu\-list\fP +.RS 4 +Disable the specified CPUs. Disabling a CPU means that the kernel sets it offline. +.RE +.sp +\fB\-e\fP, \fB\-\-enable\fP \fIcpu\-list\fP +.RS 4 +Enable the specified CPUs. Enabling a CPU means that the kernel sets it online. A CPU must be configured, see \fB\-c\fP, before it can be enabled. +.RE +.sp +\fB\-g\fP, \fB\-\-deconfigure\fP \fIcpu\-list\fP +.RS 4 +Deconfigure the specified CPUs. Deconfiguring a CPU means that the hypervisor removes the CPU from the virtual hardware on which the Linux instance runs and returns it to the CPU pool. A CPU must be offline, see \fB\-d\fP, before it can be deconfigured. +.RE +.sp +\fB\-p\fP, \fB\-\-dispatch\fP \fImode\fP +.RS 4 +Set the CPU dispatching \fImode\fP (polarization). This option has an effect only if your hardware architecture and hypervisor support CPU polarization. Available \fImodes\fP are: +.sp +\fBhorizontal\fP +.RS 4 +The workload is spread across all available CPUs. +.RE +.sp +\fBvertical\fP +.RS 4 +The workload is concentrated on few CPUs. +.RE +.RE +.sp +\fB\-r\fP, \fB\-\-rescan\fP +.RS 4 +Trigger a rescan of CPUs. After a rescan, the Linux kernel recognizes the new CPUs. Use this option on systems that do not automatically detect newly attached CPUs. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "EXIT STATUS" +.sp +\fBchcpu\fP has the following exit status values: +.sp +\fB0\fP +.RS 4 +success +.RE +.sp +\fB1\fP +.RS 4 +failure +.RE +.sp +\fB64\fP +.RS 4 +partial success +.RE +.SH "AUTHORS" +.sp +.MTO "heiko.carstens\(atde.ibm.com" "Heiko Carstens" "" +.SH "COPYRIGHT" +.sp +Copyright IBM Corp. 2011 +.SH "SEE ALSO" +.sp +\fBlscpu\fP(1) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBchcpu\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/chcpu.8.adoc b/sys-utils/chcpu.8.adoc new file mode 100644 index 0000000..c5797df --- /dev/null +++ b/sys-utils/chcpu.8.adoc @@ -0,0 +1,86 @@ +//po4a: entry man manual += chcpu(8) +:doctype: manpage +:man manual: System Administration +:man source: util-linux {release-version} +:page-layout: base +:command: chcpu + +== NAME + +chcpu - configure CPUs + +== SYNOPSIS + +*chcpu* *-c*|*-d*|*-e*|*-g* _cpu-list_ + +*chcpu* *-p* _mode_ + +*chcpu* *-r*|*-h*|*-V* + +== DESCRIPTION + +*chcpu* can modify the state of CPUs. It can enable or disable CPUs, scan for new CPUs, change the CPU dispatching _mode_ of the underlying hypervisor, and request CPUs from the hypervisor (configure) or return CPUs to the hypervisor (deconfigure). + +Some options have a _cpu-list_ argument. Use this argument to specify a comma-separated list of CPUs. The list can contain individual CPU addresses or ranges of addresses. For example, *0,5,7,9-11* makes the command applicable to the CPUs with the addresses 0, 5, 7, 9, 10, and 11. + +== OPTIONS + +*-c*, *--configure* _cpu-list_:: +Configure the specified CPUs. Configuring a CPU means that the hypervisor takes a CPU from the CPU pool and assigns it to the virtual hardware on which your kernel runs. + +*-d*, *--disable* _cpu-list_:: +Disable the specified CPUs. Disabling a CPU means that the kernel sets it offline. + +*-e*, *--enable* _cpu-list_:: +Enable the specified CPUs. Enabling a CPU means that the kernel sets it online. A CPU must be configured, see *-c*, before it can be enabled. + +*-g*, *--deconfigure* _cpu-list_:: +Deconfigure the specified CPUs. Deconfiguring a CPU means that the hypervisor removes the CPU from the virtual hardware on which the Linux instance runs and returns it to the CPU pool. A CPU must be offline, see *-d*, before it can be deconfigured. + +*-p*, *--dispatch* _mode_:: +Set the CPU dispatching _mode_ (polarization). This option has an effect only if your hardware architecture and hypervisor support CPU polarization. Available _modes_ are: + +*horizontal*;; +The workload is spread across all available CPUs. + +*vertical*;; +The workload is concentrated on few CPUs. + +*-r*, *--rescan*:: +Trigger a rescan of CPUs. After a rescan, the Linux kernel recognizes the new CPUs. Use this option on systems that do not automatically detect newly attached CPUs. + +include::man-common/help-version.adoc[] + +== EXIT STATUS + +*chcpu* has the following exit status values: + +*0*:: +success + +*1*:: +failure + +*64*:: +partial success + +== AUTHORS + +mailto:heiko.carstens@de.ibm.com[Heiko Carstens] + +== COPYRIGHT + +Copyright IBM Corp. 2011 + +== SEE ALSO + +*lscpu*(1) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/chcpu.c b/sys-utils/chcpu.c new file mode 100644 index 0000000..527bce5 --- /dev/null +++ b/sys-utils/chcpu.c @@ -0,0 +1,391 @@ +/* + * chcpu - CPU configuration tool + * + * Copyright IBM Corp. 2011 + * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>, + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <ctype.h> +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/utsname.h> +#include <unistd.h> +#include <stdarg.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include "cpuset.h" +#include "nls.h" +#include "xalloc.h" +#include "c.h" +#include "strutils.h" +#include "bitops.h" +#include "path.h" +#include "closestream.h" +#include "optutils.h" + +#define EXCL_ERROR "--{configure,deconfigure,disable,dispatch,enable}" + +/* partial success, otherwise we return regular EXIT_{SUCCESS,FAILURE} */ +#define CHCPU_EXIT_SOMEOK 64 + +#define _PATH_SYS_CPU "/sys/devices/system/cpu" + +static cpu_set_t *onlinecpus; +static int maxcpus; + +#define is_cpu_online(cpu) (CPU_ISSET_S((cpu), CPU_ALLOC_SIZE(maxcpus), onlinecpus)) +#define num_online_cpus() (CPU_COUNT_S(CPU_ALLOC_SIZE(maxcpus), onlinecpus)) + +enum { + CMD_CPU_ENABLE = 0, + CMD_CPU_DISABLE, + CMD_CPU_CONFIGURE, + CMD_CPU_DECONFIGURE, + CMD_CPU_RESCAN, + CMD_CPU_DISPATCH_HORIZONTAL, + CMD_CPU_DISPATCH_VERTICAL, +}; + +/* returns: 0 = success + * < 0 = failure + * > 0 = partial success + */ +static int cpu_enable(struct path_cxt *sys, cpu_set_t *cpu_set, size_t setsize, int enable) +{ + int cpu; + int online, rc; + int configured = -1; + int fails = 0; + + for (cpu = 0; cpu < maxcpus; cpu++) { + if (!CPU_ISSET_S(cpu, setsize, cpu_set)) + continue; + if (ul_path_accessf(sys, F_OK, "cpu%d", cpu) != 0) { + warnx(_("CPU %u does not exist"), cpu); + fails++; + continue; + } + if (ul_path_accessf(sys, F_OK, "cpu%d/online", cpu) != 0) { + warnx(_("CPU %u is not hot pluggable"), cpu); + fails++; + continue; + } + if (ul_path_readf_s32(sys, &online, "cpu%d/online", cpu) == 0 + && online == 1 + && enable == 1) { + printf(_("CPU %u is already enabled\n"), cpu); + continue; + } + if (online == 0 && enable == 0) { + printf(_("CPU %u is already disabled\n"), cpu); + continue; + } + if (ul_path_accessf(sys, F_OK, "cpu%d/configure", cpu) == 0) + ul_path_readf_s32(sys, &configured, "cpu%d/configure", cpu); + if (enable) { + rc = ul_path_writef_string(sys, "1", "cpu%d/online", cpu); + if (rc != 0 && configured == 0) { + warn(_("CPU %u enable failed (CPU is deconfigured)"), cpu); + fails++; + } else if (rc != 0) { + warn(_("CPU %u enable failed"), cpu); + fails++; + } else + printf(_("CPU %u enabled\n"), cpu); + } else { + if (onlinecpus && num_online_cpus() == 1) { + warnx(_("CPU %u disable failed (last enabled CPU)"), cpu); + fails++; + continue; + } + rc = ul_path_writef_string(sys, "0", "cpu%d/online", cpu); + if (rc != 0) { + warn(_("CPU %u disable failed"), cpu); + fails++; + } else { + printf(_("CPU %u disabled\n"), cpu); + if (onlinecpus) + CPU_CLR_S(cpu, setsize, onlinecpus); + } + } + } + + return fails == 0 ? 0 : fails == maxcpus ? -1 : 1; +} + +static int cpu_rescan(struct path_cxt *sys) +{ + if (ul_path_access(sys, F_OK, "rescan") != 0) + errx(EXIT_FAILURE, _("This system does not support rescanning of CPUs")); + + if (ul_path_write_string(sys, "1", "rescan") != 0) + err(EXIT_FAILURE, _("Failed to trigger rescan of CPUs")); + + printf(_("Triggered rescan of CPUs\n")); + return 0; +} + +static int cpu_set_dispatch(struct path_cxt *sys, int mode) +{ + if (ul_path_access(sys, F_OK, "dispatching") != 0) + errx(EXIT_FAILURE, _("This system does not support setting " + "the dispatching mode of CPUs")); + if (mode == 0) { + if (ul_path_write_string(sys, "0", "dispatching") != 0) + err(EXIT_FAILURE, _("Failed to set horizontal dispatch mode")); + + printf(_("Successfully set horizontal dispatching mode\n")); + } else { + if (ul_path_write_string(sys, "1", "dispatching") != 0) + err(EXIT_FAILURE, _("Failed to set vertical dispatch mode")); + + printf(_("Successfully set vertical dispatching mode\n")); + } + return 0; +} + +/* returns: 0 = success + * < 0 = failure + * > 0 = partial success + */ +static int cpu_configure(struct path_cxt *sys, cpu_set_t *cpu_set, size_t setsize, int configure) +{ + int cpu; + int rc, current; + int fails = 0; + + for (cpu = 0; cpu < maxcpus; cpu++) { + if (!CPU_ISSET_S(cpu, setsize, cpu_set)) + continue; + if (ul_path_accessf(sys, F_OK, "cpu%d", cpu) != 0) { + warnx(_("CPU %u does not exist"), cpu); + fails++; + continue; + } + if (ul_path_accessf(sys, F_OK, "cpu%d/configure", cpu) != 0) { + warnx(_("CPU %u is not configurable"), cpu); + fails++; + continue; + } + ul_path_readf_s32(sys, ¤t, "cpu%d/configure", cpu); + if (current == 1 && configure == 1) { + printf(_("CPU %u is already configured\n"), cpu); + continue; + } + if (current == 0 && configure == 0) { + printf(_("CPU %u is already deconfigured\n"), cpu); + continue; + } + if (current == 1 && configure == 0 && onlinecpus && + is_cpu_online(cpu)) { + warnx(_("CPU %u deconfigure failed (CPU is enabled)"), cpu); + fails++; + continue; + } + if (configure) { + rc = ul_path_writef_string(sys, "1", "cpu%d/configure", cpu); + if (rc != 0) { + warn(_("CPU %u configure failed"), cpu); + fails++; + } else + printf(_("CPU %u configured\n"), cpu); + } else { + rc = ul_path_writef_string(sys, "0", "cpu%d/configure", cpu); + if (rc != 0) { + warn(_("CPU %u deconfigure failed"), cpu); + fails++; + } else + printf(_("CPU %u deconfigured\n"), cpu); + } + } + + return fails == 0 ? 0 : fails == maxcpus ? -1 : 1; +} + +static void cpu_parse(char *cpu_string, cpu_set_t *cpu_set, size_t setsize) +{ + int rc; + + rc = cpulist_parse(cpu_string, cpu_set, setsize, 1); + if (rc == 0) + return; + if (rc == 2) + errx(EXIT_FAILURE, _("invalid CPU number in CPU list: %s"), cpu_string); + errx(EXIT_FAILURE, _("failed to parse CPU list: %s"), cpu_string); +} + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + fprintf(out, _( + "\nUsage:\n" + " %s [options]\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Configure CPUs in a multi-processor system.\n"), out); + + fputs(USAGE_OPTIONS, stdout); + fputs(_( + " -e, --enable <cpu-list> enable cpus\n" + " -d, --disable <cpu-list> disable cpus\n" + " -c, --configure <cpu-list> configure cpus\n" + " -g, --deconfigure <cpu-list> deconfigure cpus\n" + " -p, --dispatch <mode> set dispatching mode\n" + " -r, --rescan trigger rescan of cpus\n" + ), stdout); + printf(USAGE_HELP_OPTIONS(31)); + + printf(USAGE_MAN_TAIL("chcpu(8)")); + exit(EXIT_SUCCESS); +} + +int main(int argc, char *argv[]) +{ + struct path_cxt *sys = NULL; /* _PATH_SYS_CPU handler */ + cpu_set_t *cpu_set = NULL; + size_t setsize; + int cmd = -1; + int c, rc; + + static const struct option longopts[] = { + { "configure", required_argument, NULL, 'c' }, + { "deconfigure",required_argument, NULL, 'g' }, + { "disable", required_argument, NULL, 'd' }, + { "dispatch", required_argument, NULL, 'p' }, + { "enable", required_argument, NULL, 'e' }, + { "help", no_argument, NULL, 'h' }, + { "rescan", no_argument, NULL, 'r' }, + { "version", no_argument, NULL, 'V' }, + { NULL, 0, NULL, 0 } + }; + + static const ul_excl_t excl[] = { /* rows and cols in ASCII order */ + { 'c','d','e','g','p' }, + { 0 } + }; + int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + ul_path_init_debug(); + sys = ul_new_path(_PATH_SYS_CPU); + if (!sys) + err(EXIT_FAILURE, _("failed to initialize sysfs handler")); + + maxcpus = get_max_number_of_cpus(); + if (maxcpus < 1) + errx(EXIT_FAILURE, _("cannot determine NR_CPUS; aborting")); + + if (ul_path_access(sys, F_OK, "online") == 0) + ul_path_readf_cpulist(sys, &cpu_set, maxcpus, "online"); + else + cpu_set = CPU_ALLOC(maxcpus); + if (!cpu_set) + err(EXIT_FAILURE, _("cpuset_alloc failed")); + + setsize = CPU_ALLOC_SIZE(maxcpus); + + while ((c = getopt_long(argc, argv, "c:d:e:g:hp:rV", longopts, NULL)) != -1) { + + err_exclusive_options(c, longopts, excl, excl_st); + + switch (c) { + case 'c': + cmd = CMD_CPU_CONFIGURE; + cpu_parse(argv[optind - 1], cpu_set, setsize); + break; + case 'd': + cmd = CMD_CPU_DISABLE; + cpu_parse(argv[optind - 1], cpu_set, setsize); + break; + case 'e': + cmd = CMD_CPU_ENABLE; + cpu_parse(argv[optind - 1], cpu_set, setsize); + break; + case 'g': + cmd = CMD_CPU_DECONFIGURE; + cpu_parse(argv[optind - 1], cpu_set, setsize); + break; + case 'p': + if (strcmp("horizontal", argv[optind - 1]) == 0) + cmd = CMD_CPU_DISPATCH_HORIZONTAL; + else if (strcmp("vertical", argv[optind - 1]) == 0) + cmd = CMD_CPU_DISPATCH_VERTICAL; + else + errx(EXIT_FAILURE, _("unsupported argument: %s"), + argv[optind -1 ]); + break; + case 'r': + cmd = CMD_CPU_RESCAN; + break; + + case 'h': + usage(); + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if ((argc == 1) || (argc != optind)) { + warnx(_("bad usage")); + errtryhelp(EXIT_FAILURE); + } + + switch (cmd) { + case CMD_CPU_ENABLE: + rc = cpu_enable(sys, cpu_set, maxcpus, 1); + break; + case CMD_CPU_DISABLE: + rc = cpu_enable(sys, cpu_set, maxcpus, 0); + break; + case CMD_CPU_CONFIGURE: + rc = cpu_configure(sys, cpu_set, maxcpus, 1); + break; + case CMD_CPU_DECONFIGURE: + rc = cpu_configure(sys, cpu_set, maxcpus, 0); + break; + case CMD_CPU_RESCAN: + rc = cpu_rescan(sys); + break; + case CMD_CPU_DISPATCH_HORIZONTAL: + rc = cpu_set_dispatch(sys, 0); + break; + case CMD_CPU_DISPATCH_VERTICAL: + rc = cpu_set_dispatch(sys, 1); + break; + default: + rc = -EINVAL; + break; + } + + CPU_FREE(cpu_set); + ul_unref_path(sys); + + return rc == 0 ? EXIT_SUCCESS : + rc < 0 ? EXIT_FAILURE : CHCPU_EXIT_SOMEOK; +} diff --git a/sys-utils/chmem.8 b/sys-utils/chmem.8 new file mode 100644 index 0000000..d152567 --- /dev/null +++ b/sys-utils/chmem.8 @@ -0,0 +1,173 @@ +'\" t +.\" Title: chmem +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: System Administration +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "CHMEM" "8" "2022-05-11" "util\-linux 2.38.1" "System Administration" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +chmem \- configure memory +.SH "SYNOPSIS" +.sp +\fBchmem\fP [\fB\-h] [\fP\-V*] [\fB\-v\fP] [\fB\-e\fP|\fB\-d\fP] [\fISIZE\fP|\fIRANGE\fP \fB\-b\fP \fIBLOCKRANGE\fP] [\fB\-z\fP \fIZONE\fP] +.SH "DESCRIPTION" +.sp +The chmem command sets a particular size or range of memory online or offline. +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +Specify \fISIZE\fP as <size>[m|M|g|G]. With m or M, <size> specifies the memory size in MiB (1024 x 1024 bytes). With g or G, <size> specifies the memory size in GiB (1024 x 1024 x 1024 bytes). The default unit is MiB. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +Specify \fIRANGE\fP in the form 0x<start>\-0x<end> as shown in the output of the \fBlsmem\fP(1) command. <start> is the hexadecimal address of the first byte and <end> is the hexadecimal address of the last byte in the memory range. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +Specify \fIBLOCKRANGE\fP in the form <first>\-<last> or <block> as shown in the output of the \fBlsmem\fP(1) command. <first> is the number of the first memory block and <last> is the number of the last memory block in the memory range. Alternatively a single block can be specified. \fIBLOCKRANGE\fP requires the \fB\-\-blocks\fP option. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +Specify \fIZONE\fP as the name of a memory zone, as shown in the output of the \fBlsmem \-o +ZONES\fP command. The output shows one or more valid memory zones for each memory range. If multiple zones are shown, then the memory range currently belongs to the first zone. By default, \fBchmem\fP will set memory online to the zone Movable, if this is among the valid zones. This default can be changed by specifying the \fB\-\-zone\fP option with another valid zone. For memory ballooning, it is recommended to select the zone Movable for memory online and offline, if possible. Memory in this zone is much more likely to be able to be offlined again, but it cannot be used for arbitrary kernel allocations, only for migratable pages (e.g., anonymous and page cache pages). Use the \fB\-\-help\fP option to see all available zones. +.RE +.sp +\fISIZE\fP and \fIRANGE\fP must be aligned to the Linux memory block size, as shown in the output of the \fBlsmem\fP(1) command. +.sp +Setting memory online can fail for various reasons. On virtualized systems it can fail if the hypervisor does not have enough memory left, for example because memory was overcommitted. Setting memory offline can fail if Linux cannot free the memory. If only part of the requested memory can be set online or offline, a message tells you how much memory was set online or offline instead of the requested amount. +.sp +When setting memory online \fBchmem\fP starts with the lowest memory block numbers. When setting memory offline \fBchmem\fP starts with the highest memory block numbers. +.SH "OPTIONS" +.sp +\fB\-b\fP, \fB\-\-blocks\fP +.RS 4 +Use a \fIBLOCKRANGE\fP parameter instead of \fIRANGE\fP or \fISIZE\fP for the \fB\-\-enable\fP and \fB\-\-disable\fP options. +.RE +.sp +\fB\-d\fP, \fB\-\-disable\fP +.RS 4 +Set the specified \fIRANGE\fP, \fISIZE\fP, or \fIBLOCKRANGE\fP of memory offline. +.RE +.sp +\fB\-e\fP, \fB\-\-enable\fP +.RS 4 +Set the specified \fIRANGE\fP, \fISIZE\fP, or \fIBLOCKRANGE\fP of memory online. +.RE +.sp +\fB\-z\fP, \fB\-\-zone\fP +.RS 4 +Select the memory \fIZONE\fP where to set the specified \fIRANGE\fP, \fISIZE\fP, or \fIBLOCKRANGE\fP of memory online or offline. By default, memory will be set online to the zone Movable, if possible. +.RE +.sp +\fB\-v\fP, \fB\-\-verbose\fP +.RS 4 +Verbose mode. Causes \fBchmem\fP to print debugging messages about it\(cqs progress. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "EXIT STATUS" +.sp +\fBchmem\fP has the following exit status values: +.sp +\fB0\fP +.RS 4 +success +.RE +.sp +\fB1\fP +.RS 4 +failure +.RE +.sp +\fB64\fP +.RS 4 +partial success +.RE +.SH "EXAMPLE" +.sp +\fBchmem \-\-enable 1024\fP +.RS 4 +This command requests 1024 MiB of memory to be set online. +.RE +.sp +\fBchmem \-e 2g\fP +.RS 4 +This command requests 2 GiB of memory to be set online. +.RE +.sp +\fBchmem \-\-disable 0x00000000e4000000\-0x00000000f3ffffff\fP +.RS 4 +This command requests the memory range starting with 0x00000000e4000000 and ending with 0x00000000f3ffffff to be set offline. +.RE +.sp +\fBchmem \-b \-d 10\fP +.RS 4 +This command requests the memory block number 10 to be set offline. +.RE +.SH "SEE ALSO" +.sp +\fBlsmem\fP(1) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBchmem\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/chmem.8.adoc b/sys-utils/chmem.8.adoc new file mode 100644 index 0000000..de628aa --- /dev/null +++ b/sys-utils/chmem.8.adoc @@ -0,0 +1,91 @@ +//po4a: entry man manual += chmem(8) +:doctype: manpage +:man manual: System Administration +:man source: util-linux {release-version} +:page-layout: base +:command: chmem + +== NAME + +chmem - configure memory + +== SYNOPSIS + +*chmem* [*-h] [*-V*] [*-v*] [*-e*|*-d*] [_SIZE_|_RANGE_ *-b* _BLOCKRANGE_] [*-z* _ZONE_] + +== DESCRIPTION + +The chmem command sets a particular size or range of memory online or offline. + +* Specify _SIZE_ as <size>[m|M|g|G]. With m or M, <size> specifies the memory size in MiB (1024 x 1024 bytes). With g or G, <size> specifies the memory size in GiB (1024 x 1024 x 1024 bytes). The default unit is MiB. + +* Specify _RANGE_ in the form 0x<start>-0x<end> as shown in the output of the *lsmem*(1) command. <start> is the hexadecimal address of the first byte and <end> is the hexadecimal address of the last byte in the memory range. + +* Specify _BLOCKRANGE_ in the form <first>-<last> or <block> as shown in the output of the *lsmem*(1) command. <first> is the number of the first memory block and <last> is the number of the last memory block in the memory range. Alternatively a single block can be specified. _BLOCKRANGE_ requires the *--blocks* option. + +* Specify _ZONE_ as the name of a memory zone, as shown in the output of the *lsmem -o +ZONES* command. The output shows one or more valid memory zones for each memory range. If multiple zones are shown, then the memory range currently belongs to the first zone. By default, *chmem* will set memory online to the zone Movable, if this is among the valid zones. This default can be changed by specifying the *--zone* option with another valid zone. For memory ballooning, it is recommended to select the zone Movable for memory online and offline, if possible. Memory in this zone is much more likely to be able to be offlined again, but it cannot be used for arbitrary kernel allocations, only for migratable pages (e.g., anonymous and page cache pages). Use the *--help* option to see all available zones. + +_SIZE_ and _RANGE_ must be aligned to the Linux memory block size, as shown in the output of the *lsmem*(1) command. + +Setting memory online can fail for various reasons. On virtualized systems it can fail if the hypervisor does not have enough memory left, for example because memory was overcommitted. Setting memory offline can fail if Linux cannot free the memory. If only part of the requested memory can be set online or offline, a message tells you how much memory was set online or offline instead of the requested amount. + +When setting memory online *chmem* starts with the lowest memory block numbers. When setting memory offline *chmem* starts with the highest memory block numbers. + +== OPTIONS + +*-b*, *--blocks*:: +Use a _BLOCKRANGE_ parameter instead of _RANGE_ or _SIZE_ for the *--enable* and *--disable* options. + +*-d*, *--disable*:: +Set the specified _RANGE_, _SIZE_, or _BLOCKRANGE_ of memory offline. + +*-e*, *--enable*:: +Set the specified _RANGE_, _SIZE_, or _BLOCKRANGE_ of memory online. + +*-z*, *--zone*:: +Select the memory _ZONE_ where to set the specified _RANGE_, _SIZE_, or _BLOCKRANGE_ of memory online or offline. By default, memory will be set online to the zone Movable, if possible. + +*-v*, *--verbose*:: +Verbose mode. Causes *chmem* to print debugging messages about it's progress. + +include::man-common/help-version.adoc[] + +== EXIT STATUS + +*chmem* has the following exit status values: + +*0*:: +success + +*1*:: +failure + +*64*:: +partial success + +== EXAMPLE + +*chmem --enable 1024*:: +This command requests 1024 MiB of memory to be set online. + +*chmem -e 2g*:: +This command requests 2 GiB of memory to be set online. + +*chmem --disable 0x00000000e4000000-0x00000000f3ffffff*:: +This command requests the memory range starting with 0x00000000e4000000 and ending with 0x00000000f3ffffff to be set offline. + +*chmem -b -d 10*:: +This command requests the memory block number 10 to be set offline. + +== SEE ALSO + +*lsmem*(1) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/chmem.c b/sys-utils/chmem.c new file mode 100644 index 0000000..09d0af6 --- /dev/null +++ b/sys-utils/chmem.c @@ -0,0 +1,459 @@ +/* + * chmem - Memory configuration tool + * + * Copyright IBM Corp. 2016 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <getopt.h> +#include <assert.h> +#include <dirent.h> + +#include "c.h" +#include "nls.h" +#include "path.h" +#include "strutils.h" +#include "strv.h" +#include "optutils.h" +#include "closestream.h" +#include "xalloc.h" + +/* partial success, otherwise we return regular EXIT_{SUCCESS,FAILURE} */ +#define CHMEM_EXIT_SOMEOK 64 + +#define _PATH_SYS_MEMORY "/sys/devices/system/memory" + +struct chmem_desc { + struct path_cxt *sysmem; /* _PATH_SYS_MEMORY handler */ + struct dirent **dirs; + int ndirs; + uint64_t block_size; + uint64_t start; + uint64_t end; + uint64_t size; + unsigned int use_blocks : 1; + unsigned int is_size : 1; + unsigned int verbose : 1; + unsigned int have_zones : 1; +}; + +enum { + CMD_MEMORY_ENABLE = 0, + CMD_MEMORY_DISABLE, + CMD_NONE +}; + +enum zone_id { + ZONE_DMA = 0, + ZONE_DMA32, + ZONE_NORMAL, + ZONE_HIGHMEM, + ZONE_MOVABLE, + ZONE_DEVICE, +}; + +static char *zone_names[] = { + [ZONE_DMA] = "DMA", + [ZONE_DMA32] = "DMA32", + [ZONE_NORMAL] = "Normal", + [ZONE_HIGHMEM] = "Highmem", + [ZONE_MOVABLE] = "Movable", + [ZONE_DEVICE] = "Device", +}; + +/* + * name must be null-terminated + */ +static int zone_name_to_id(const char *name) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(zone_names); i++) { + if (!strcasecmp(name, zone_names[i])) + return i; + } + return -1; +} + +static void idxtostr(struct chmem_desc *desc, uint64_t idx, char *buf, size_t bufsz) +{ + uint64_t start, end; + + start = idx * desc->block_size; + end = start + desc->block_size - 1; + snprintf(buf, bufsz, + _("Memory Block %"PRIu64" (0x%016"PRIx64"-0x%016"PRIx64")"), + idx, start, end); +} + +static int chmem_size(struct chmem_desc *desc, int enable, int zone_id) +{ + char *name, *onoff, line[BUFSIZ], str[BUFSIZ]; + uint64_t size, index; + const char *zn; + int i, rc; + + size = desc->size; + onoff = enable ? "online" : "offline"; + i = enable ? 0 : desc->ndirs - 1; + + if (enable && zone_id >= 0) { + if (zone_id == ZONE_MOVABLE) + onoff = "online_movable"; + else + onoff = "online_kernel"; + } + + for (; i >= 0 && i < desc->ndirs && size; i += enable ? 1 : -1) { + name = desc->dirs[i]->d_name; + index = strtou64_or_err(name + 6, _("Failed to parse index")); + + if (ul_path_readf_buffer(desc->sysmem, line, sizeof(line), "%s/state", name) > 0 + && strncmp(onoff, line, 6) == 0) + continue; + + if (desc->have_zones) { + ul_path_readf_buffer(desc->sysmem, line, sizeof(line), "%s/valid_zones", name); + if (zone_id >= 0) { + zn = zone_names[zone_id]; + if (enable && !strcasestr(line, zn)) + continue; + if (!enable && strncasecmp(line, zn, strlen(zn)) != 0) + continue; + } else if (enable) { + /* By default, use zone Movable for online, if valid */ + if (strcasestr(line, zone_names[ZONE_MOVABLE])) + onoff = "online_movable"; + else + onoff = "online"; + } + } + + idxtostr(desc, index, str, sizeof(str)); + rc = ul_path_writef_string(desc->sysmem, onoff, "%s/state", name); + if (rc != 0 && desc->verbose) { + if (enable) + fprintf(stdout, _("%s enable failed\n"), str); + else + fprintf(stdout, _("%s disable failed\n"), str); + } else if (rc == 0 && desc->verbose) { + if (enable) + fprintf(stdout, _("%s enabled\n"), str); + else + fprintf(stdout, _("%s disabled\n"), str); + } + if (rc == 0) + size--; + } + if (size) { + uint64_t bytes; + char *sizestr; + + bytes = (desc->size - size) * desc->block_size; + sizestr = size_to_human_string(SIZE_SUFFIX_1LETTER, bytes); + if (enable) + warnx(_("Could only enable %s of memory"), sizestr); + else + warnx(_("Could only disable %s of memory"), sizestr); + free(sizestr); + } + return size == 0 ? 0 : size == desc->size ? -1 : 1; +} + +static int chmem_range(struct chmem_desc *desc, int enable, int zone_id) +{ + char *name, *onoff, line[BUFSIZ], str[BUFSIZ]; + uint64_t index, todo; + const char *zn; + int i, rc; + + todo = desc->end - desc->start + 1; + onoff = enable ? "online" : "offline"; + + if (enable && zone_id >= 0) { + if (zone_id == ZONE_MOVABLE) + onoff = "online_movable"; + else + onoff = "online_kernel"; + } + + for (i = 0; i < desc->ndirs; i++) { + name = desc->dirs[i]->d_name; + index = strtou64_or_err(name + 6, _("Failed to parse index")); + if (index < desc->start) + continue; + if (index > desc->end) + break; + idxtostr(desc, index, str, sizeof(str)); + if (ul_path_readf_buffer(desc->sysmem, line, sizeof(line), "%s/state", name) > 0 + && strncmp(onoff, line, 6) == 0) { + if (desc->verbose && enable) + fprintf(stdout, _("%s already enabled\n"), str); + else if (desc->verbose && !enable) + fprintf(stdout, _("%s already disabled\n"), str); + todo--; + continue; + } + + if (desc->have_zones) { + ul_path_readf_buffer(desc->sysmem, line, sizeof(line), "%s/valid_zones", name); + if (zone_id >= 0) { + zn = zone_names[zone_id]; + if (enable && !strcasestr(line, zn)) { + warnx(_("%s enable failed: Zone mismatch"), str); + continue; + } + if (!enable && strncasecmp(line, zn, strlen(zn)) != 0) { + warnx(_("%s disable failed: Zone mismatch"), str); + continue; + } + } else if (enable) { + /* By default, use zone Movable for online, if valid */ + if (strcasestr(line, zone_names[ZONE_MOVABLE])) + onoff = "online_movable"; + else + onoff = "online"; + } + } + + rc = ul_path_writef_string(desc->sysmem, onoff, "%s/state", name); + if (rc != 0) { + if (enable) + warn(_("%s enable failed"), str); + else + warn(_("%s disable failed"), str); + } else if (desc->verbose) { + if (enable) + fprintf(stdout, _("%s enabled\n"), str); + else + fprintf(stdout, _("%s disabled\n"), str); + } + if (rc == 0) + todo--; + } + return todo == 0 ? 0 : todo == desc->end - desc->start + 1 ? -1 : 1; +} + +static int filter(const struct dirent *de) +{ + if (strncmp("memory", de->d_name, 6) != 0) + return 0; + return isdigit_string(de->d_name + 6); +} + +static void read_info(struct chmem_desc *desc) +{ + char line[128]; + + desc->ndirs = scandir(_PATH_SYS_MEMORY, &desc->dirs, filter, versionsort); + if (desc->ndirs <= 0) + goto fail; + ul_path_read_buffer(desc->sysmem, line, sizeof(line), "block_size_bytes"); + + errno = 0; + desc->block_size = strtoumax(line, NULL, 16); + if (errno) + goto fail; + return; +fail: + err(EXIT_FAILURE, _("Failed to read %s"), _PATH_SYS_MEMORY); +} + +static void parse_single_param(struct chmem_desc *desc, char *str) +{ + if (desc->use_blocks) { + desc->start = strtou64_or_err(str, _("Failed to parse block number")); + desc->end = desc->start; + return; + } + desc->is_size = 1; + desc->size = strtosize_or_err(str, _("Failed to parse size")); + if (isdigit(str[strlen(str) - 1])) + desc->size *= 1024*1024; + if (desc->size % desc->block_size) { + errx(EXIT_FAILURE, _("Size must be aligned to memory block size (%s)"), + size_to_human_string(SIZE_SUFFIX_1LETTER, desc->block_size)); + } + desc->size /= desc->block_size; +} + +static void parse_range_param(struct chmem_desc *desc, char *start, char *end) +{ + if (desc->use_blocks) { + desc->start = strtou64_or_err(start, _("Failed to parse start")); + desc->end = strtou64_or_err(end, _("Failed to parse end")); + return; + } + if (strlen(start) < 2 || start[1] != 'x') + errx(EXIT_FAILURE, _("Invalid start address format: %s"), start); + if (strlen(end) < 2 || end[1] != 'x') + errx(EXIT_FAILURE, _("Invalid end address format: %s"), end); + desc->start = strtox64_or_err(start, _("Failed to parse start address")); + desc->end = strtox64_or_err(end, _("Failed to parse end address")); + if (desc->start % desc->block_size || (desc->end + 1) % desc->block_size) { + errx(EXIT_FAILURE, + _("Start address and (end address + 1) must be aligned to " + "memory block size (%s)"), + size_to_human_string(SIZE_SUFFIX_1LETTER, desc->block_size)); + } + desc->start /= desc->block_size; + desc->end /= desc->block_size; +} + +static void parse_parameter(struct chmem_desc *desc, char *param) +{ + char **split; + + split = strv_split(param, "-"); + if (strv_length(split) > 2) + errx(EXIT_FAILURE, _("Invalid parameter: %s"), param); + if (strv_length(split) == 1) + parse_single_param(desc, split[0]); + else + parse_range_param(desc, split[0], split[1]); + strv_free(split); + if (desc->start > desc->end) + errx(EXIT_FAILURE, _("Invalid range: %s"), param); +} + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + size_t i; + + fputs(USAGE_HEADER, out); + fprintf(out, _(" %s [options] [SIZE|RANGE|BLOCKRANGE]\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Set a particular size or range of memory online or offline.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -e, --enable enable memory\n"), out); + fputs(_(" -d, --disable disable memory\n"), out); + fputs(_(" -b, --blocks use memory blocks\n"), out); + fputs(_(" -z, --zone <name> select memory zone (see below)\n"), out); + fputs(_(" -v, --verbose verbose output\n"), out); + printf(USAGE_HELP_OPTIONS(20)); + + fputs(_("\nSupported zones:\n"), out); + for (i = 0; i < ARRAY_SIZE(zone_names); i++) + fprintf(out, " %s\n", zone_names[i]); + + printf(USAGE_MAN_TAIL("chmem(8)")); + + exit(EXIT_SUCCESS); +} + +int main(int argc, char **argv) +{ + struct chmem_desc _desc = { 0 }, *desc = &_desc; + int cmd = CMD_NONE, zone_id = -1; + char *zone = NULL; + int c, rc; + + static const struct option longopts[] = { + {"block", no_argument, NULL, 'b'}, + {"disable", no_argument, NULL, 'd'}, + {"enable", no_argument, NULL, 'e'}, + {"help", no_argument, NULL, 'h'}, + {"verbose", no_argument, NULL, 'v'}, + {"version", no_argument, NULL, 'V'}, + {"zone", required_argument, NULL, 'z'}, + {NULL, 0, NULL, 0} + }; + + static const ul_excl_t excl[] = { /* rows and cols in ASCII order */ + { 'd','e' }, + { 0 } + }; + int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + ul_path_init_debug(); + desc->sysmem = ul_new_path(_PATH_SYS_MEMORY); + if (!desc->sysmem) + err(EXIT_FAILURE, _("failed to initialize %s handler"), _PATH_SYS_MEMORY); + + read_info(desc); + + while ((c = getopt_long(argc, argv, "bdehvVz:", longopts, NULL)) != -1) { + + err_exclusive_options(c, longopts, excl, excl_st); + + switch (c) { + case 'd': + cmd = CMD_MEMORY_DISABLE; + break; + case 'e': + cmd = CMD_MEMORY_ENABLE; + break; + case 'b': + desc->use_blocks = 1; + break; + case 'v': + desc->verbose = 1; + break; + case 'z': + zone = xstrdup(optarg); + break; + + case 'h': + usage(); + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if ((argc == 1) || (argc != optind + 1) || (cmd == CMD_NONE)) { + warnx(_("bad usage")); + errtryhelp(EXIT_FAILURE); + } + + parse_parameter(desc, argv[optind]); + + + /* The valid_zones sysfs attribute was introduced with kernel 3.18 */ + if (ul_path_access(desc->sysmem, F_OK, "memory0/valid_zones") == 0) + desc->have_zones = 1; + else if (zone) + warnx(_("zone ignored, no valid_zones sysfs attribute present")); + + if (zone && desc->have_zones) { + zone_id = zone_name_to_id(zone); + if (zone_id == -1) { + warnx(_("unknown memory zone: %s"), zone); + errtryhelp(EXIT_FAILURE); + } + } + + if (desc->is_size) + rc = chmem_size(desc, cmd == CMD_MEMORY_ENABLE ? 1 : 0, zone_id); + else + rc = chmem_range(desc, cmd == CMD_MEMORY_ENABLE ? 1 : 0, zone_id); + + ul_unref_path(desc->sysmem); + + return rc == 0 ? EXIT_SUCCESS : + rc < 0 ? EXIT_FAILURE : CHMEM_EXIT_SOMEOK; +} diff --git a/sys-utils/choom.1 b/sys-utils/choom.1 new file mode 100644 index 0000000..e55b915 --- /dev/null +++ b/sys-utils/choom.1 @@ -0,0 +1,88 @@ +'\" t +.\" Title: choom +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: User Commands +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "CHOOM" "1" "2022-05-11" "util\-linux 2.38.1" "User Commands" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +choom \- display and adjust OOM\-killer score. +.sp +\fBchoom\fP \fB\-p\fP \fIPID\fP +.sp +\fBchoom\fP \fB\-p\fP \fIPID\fP \fB\-n\fP \fInumber\fP +.sp +\fBchoom\fP \fB\-n\fP \fInumber\fP [\-\-] \fIcommand\fP [\fIargument\fP ...] +.SH "DESCRIPTION" +.sp +The \fBchoom\fP command displays and adjusts Out\-Of\-Memory killer score setting. +.SH "OPTIONS" +.sp +\fB\-p\fP, \fB\-\-pid\fP \fIpid\fP +.RS 4 +Specifies process ID. +.RE +.sp +\fB\-n\fP, \fB\-\-adjust\fP \fIvalue\fP +.RS 4 +Specify the adjust score value. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "NOTES" +.sp +Linux kernel uses the badness heuristic to select which process gets killed in out of memory conditions. +.sp +The badness heuristic assigns a value to each candidate task ranging from 0 (never kill) to 1000 (always kill) to determine which process is targeted. The units are roughly a proportion along that range of allowed memory the process may allocate from based on an estimation of its current memory and swap use. For example, if a task is using all allowed memory, its badness score will be 1000. If it is using half of its allowed memory, its score will be 500. +.sp +There is an additional factor included in the badness score: the current memory and swap usage is discounted by 3% for root processes. +.sp +The amount of "allowed" memory depends on the context in which the oom killer was called. If it is due to the memory assigned to the allocating task\(cqs cpuset being exhausted, the allowed memory represents the set of mems assigned to that cpuset. If it is due to a mempolicy\(cqs node(s) being exhausted, the allowed memory represents the set of mempolicy nodes. If it is due to a memory limit (or swap limit) being reached, the allowed memory is that configured limit. Finally, if it is due to the entire system being out of memory, the allowed memory represents all allocatable resources. +.sp +The adjust score value is added to the badness score before it is used to determine which task to kill. Acceptable values range from \-1000 to +1000. This allows userspace to polarize the preference for oom killing either by always preferring a certain task or completely disabling it. The lowest possible value, \-1000, is equivalent to disabling oom killing entirely for that task since it will always report a badness score of 0. +.sp +Setting an adjust score value of +500, for example, is roughly equivalent to allowing the remainder of tasks sharing the same system, cpuset, mempolicy, or memory controller resources to use at least 50% more memory. A value of \-500, on the other hand, would be roughly equivalent to discounting 50% of the task\(cqs allowed memory from being considered as scoring against the task. +.SH "AUTHORS" +.sp +.MTO "kzak\(atredhat.com" "Karel Zak" "" +.SH "SEE ALSO" +.sp +\fBproc\fP(5) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBchoom\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/choom.1.adoc b/sys-utils/choom.1.adoc new file mode 100644 index 0000000..51d43e7 --- /dev/null +++ b/sys-utils/choom.1.adoc @@ -0,0 +1,61 @@ +//po4a: entry man manual += choom(1) +:doctype: manpage +:man manual: User Commands +:man source: util-linux {release-version} +:page-layout: base +:command: choom + +== NAME + +choom - display and adjust OOM-killer score. + +*choom* *-p* _PID_ + +*choom* *-p* _PID_ *-n* _number_ + +*choom* *-n* _number_ [--] _command_ [_argument_ ...] + +== DESCRIPTION + +The *choom* command displays and adjusts Out-Of-Memory killer score setting. + +== OPTIONS + +*-p*, *--pid* _pid_:: +Specifies process ID. + +*-n*, *--adjust* _value_:: +Specify the adjust score value. + +include::man-common/help-version.adoc[] + +== NOTES + +Linux kernel uses the badness heuristic to select which process gets killed in out of memory conditions. + +The badness heuristic assigns a value to each candidate task ranging from 0 (never kill) to 1000 (always kill) to determine which process is targeted. The units are roughly a proportion along that range of allowed memory the process may allocate from based on an estimation of its current memory and swap use. For example, if a task is using all allowed memory, its badness score will be 1000. If it is using half of its allowed memory, its score will be 500. + +There is an additional factor included in the badness score: the current memory and swap usage is discounted by 3% for root processes. + +The amount of "allowed" memory depends on the context in which the oom killer was called. If it is due to the memory assigned to the allocating task's cpuset being exhausted, the allowed memory represents the set of mems assigned to that cpuset. If it is due to a mempolicy's node(s) being exhausted, the allowed memory represents the set of mempolicy nodes. If it is due to a memory limit (or swap limit) being reached, the allowed memory is that configured limit. Finally, if it is due to the entire system being out of memory, the allowed memory represents all allocatable resources. + +The adjust score value is added to the badness score before it is used to determine which task to kill. Acceptable values range from -1000 to +1000. This allows userspace to polarize the preference for oom killing either by always preferring a certain task or completely disabling it. The lowest possible value, -1000, is equivalent to disabling oom killing entirely for that task since it will always report a badness score of 0. + +Setting an adjust score value of +500, for example, is roughly equivalent to allowing the remainder of tasks sharing the same system, cpuset, mempolicy, or memory controller resources to use at least 50% more memory. A value of -500, on the other hand, would be roughly equivalent to discounting 50% of the task's allowed memory from being considered as scoring against the task. + +== AUTHORS + +mailto:kzak@redhat.com[Karel Zak] + +== SEE ALSO + +*proc*(5) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/choom.c b/sys-utils/choom.c new file mode 100644 index 0000000..b3d3e4d --- /dev/null +++ b/sys-utils/choom.c @@ -0,0 +1,159 @@ +/* + * choom - Change OOM score setting + * + * Copyright (C) 2018 Karel Zak <kzak@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <getopt.h> +#include <errno.h> + +#include "nls.h" +#include "c.h" +#include "path.h" +#include "strutils.h" +#include "closestream.h" + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + fputs(USAGE_HEADER, out); + fprintf(out, + _(" %1$s [options] -p pid\n" + " %1$s [options] -n number -p pid\n" + " %1$s [options] -n number [--] command [args...]]\n"), + program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Display and adjust OOM-killer score.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -n, --adjust <num> specify the adjust score value\n"), out); + fputs(_(" -p, --pid <num> process ID\n"), out); + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(24)); + printf(USAGE_MAN_TAIL("choom(1)")); + exit(EXIT_SUCCESS); +} + +static int get_score(struct path_cxt *pc) +{ + int ret; + + if (ul_path_read_s32(pc, &ret, "oom_score") != 0) + err(EXIT_FAILURE, _("failed to read OOM score value")); + + return ret; +} + +static int get_score_adj(struct path_cxt *pc) +{ + int ret; + + if (ul_path_read_s32(pc, &ret, "oom_score_adj") != 0) + err(EXIT_FAILURE, _("failed to read OOM score adjust value")); + + return ret; +} + +static int set_score_adj(struct path_cxt *pc, int adj) +{ + return ul_path_write_s64(pc, adj, "oom_score_adj"); +} + +int main(int argc, char **argv) +{ + pid_t pid = 0; + int c, adj = 0, has_adj = 0; + struct path_cxt *pc = NULL; + + static const struct option longopts[] = { + { "adjust", required_argument, NULL, 'n' }, + { "pid", required_argument, NULL, 'p' }, + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, 'V' }, + { NULL, 0, NULL, 0 } + }; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + while ((c = getopt_long(argc, argv, "hn:p:V", longopts, NULL)) != -1) { + switch (c) { + case 'p': + pid = strtos32_or_err(optarg, _("invalid PID argument")); + break; + case 'n': + adj = strtos32_or_err(optarg, _("invalid adjust argument")); + has_adj = 1; + break; + + case 'V': + print_version(EXIT_SUCCESS); + case 'h': + usage(); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if (optind < argc && pid) { + warnx(_("invalid argument: %s"), argv[optind]); + errtryhelp(EXIT_FAILURE); + } + if (!pid && argc - optind < 1) { + warnx(_("no PID or COMMAND specified")); + errtryhelp(EXIT_FAILURE); + } + if (optind < argc && !has_adj) { + warnx(_("no OOM score adjust value specified")); + errtryhelp(EXIT_FAILURE); + } + + pc = ul_new_path("/proc/%d", (int) (pid ? pid : getpid())); + + /* Show */ + if (!has_adj) { + printf(_("pid %d's current OOM score: %d\n"), pid, get_score(pc)); + printf(_("pid %d's current OOM score adjust value: %d\n"), pid, get_score_adj(pc)); + + /* Change */ + } else if (pid) { + int old = get_score_adj(pc); + + if (set_score_adj(pc, adj)) + err(EXIT_FAILURE, _("failed to set score adjust value")); + + printf(_("pid %d's OOM score adjust value changed from %d to %d\n"), pid, old, adj); + + /* Start new process */ + } else { + if (set_score_adj(pc, adj)) + err(EXIT_FAILURE, _("failed to set score adjust value")); + ul_unref_path(pc); + argv += optind; + execvp(argv[0], argv); + errexec(argv[0]); + } + + ul_unref_path(pc); + return EXIT_SUCCESS; +} diff --git a/sys-utils/ctrlaltdel.8 b/sys-utils/ctrlaltdel.8 new file mode 100644 index 0000000..e7ffa38 --- /dev/null +++ b/sys-utils/ctrlaltdel.8 @@ -0,0 +1,80 @@ +'\" t +.\" Title: ctrlaltdel +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: System Administration +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "CTRLALTDEL" "8" "2022-05-11" "util\-linux 2.38.1" "System Administration" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +ctrlaltdel \- set the function of the Ctrl\-Alt\-Del combination +.SH "SYNOPSIS" +.sp +\fBctrlaltdel\fP \fBhard\fP|\fBsoft\fP +.SH "DESCRIPTION" +.sp +Based on examination of the \fIlinux/kernel/reboot.c\fP code, it is clear that there are two supported functions that the <Ctrl\-Alt\-Del> sequence can perform. +.sp +\fBhard\fP +.RS 4 +Immediately reboot the computer without calling \fBsync\fP(2) and without any other preparation. This is the default. +.RE +.sp +\fBsoft\fP +.RS 4 +Make the kernel send the \fBSIGINT\fP (interrupt) signal to the \fBinit\fP process (this is always the process with PID 1). If this option is used, the \fBinit\fP(8) program must support this feature. Since there are now several \fBinit\fP(8) programs in the Linux community, please consult the documentation for the version that you are currently using. +.RE +.sp +When the command is run without any argument, it will display the current setting. +.sp +The function of \fBctrlaltdel\fP is usually set in the \fI/etc/rc.local\fP file. +.SH "OPTIONS" +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "FILES" +.sp +\fI/etc/rc.local\fP +.SH "AUTHORS" +.sp +.MTO "poe\(atdaimi.aau.dk" "Peter Orbaek" "" +.SH "SEE ALSO" +.sp +\fBinit\fP(8), +\fBsystemd\fP(1) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBctrlaltdel\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/ctrlaltdel.8.adoc b/sys-utils/ctrlaltdel.8.adoc new file mode 100644 index 0000000..5784240 --- /dev/null +++ b/sys-utils/ctrlaltdel.8.adoc @@ -0,0 +1,58 @@ +//po4a: entry man manual +//// +Copyright 1992, 1993 Rickard E. Faith (faith@cs.unc.edu) +May be distributed under the GNU General Public License +//// += ctrlaltdel(8) +:doctype: manpage +:man manual: System Administration +:man source: util-linux {release-version} +:page-layout: base +:command: ctrlaltdel + +== NAME + +ctrlaltdel - set the function of the Ctrl-Alt-Del combination + +== SYNOPSIS + +*ctrlaltdel* *hard*|*soft* + +== DESCRIPTION + +Based on examination of the _linux/kernel/reboot.c_ code, it is clear that there are two supported functions that the <Ctrl-Alt-Del> sequence can perform. + +*hard*:: +Immediately reboot the computer without calling *sync*(2) and without any other preparation. This is the default. + +*soft*:: +Make the kernel send the *SIGINT* (interrupt) signal to the *init* process (this is always the process with PID 1). If this option is used, the *init*(8) program must support this feature. Since there are now several *init*(8) programs in the Linux community, please consult the documentation for the version that you are currently using. + +When the command is run without any argument, it will display the current setting. + +The function of *ctrlaltdel* is usually set in the _/etc/rc.local_ file. + +== OPTIONS + +include::man-common/help-version.adoc[] + +== FILES + +_/etc/rc.local_ + +== AUTHORS + +mailto:poe@daimi.aau.dk[Peter Orbaek] + +== SEE ALSO + +*init*(8), +*systemd*(1) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/ctrlaltdel.c b/sys-utils/ctrlaltdel.c new file mode 100644 index 0000000..303d2dc --- /dev/null +++ b/sys-utils/ctrlaltdel.c @@ -0,0 +1,113 @@ +/* + * ctrlaltdel.c - Set the function of the Ctrl-Alt-Del combination + * Created 4-Jul-92 by Peter Orbaek <poe@daimi.aau.dk> + * 1999-02-22 Arkadiusz Miśkiewicz <misiek@pld.ORG.PL> + * - added Native Language Support + */ + +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <sys/reboot.h> +#include "nls.h" +#include "c.h" +#include "closestream.h" +#include "pathnames.h" +#include "path.h" + +#define LINUX_REBOOT_CMD_CAD_ON 0x89ABCDEF +#define LINUX_REBOOT_CMD_CAD_OFF 0x00000000 + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + fputs(USAGE_HEADER, out); + fprintf(out, _(" %s hard|soft\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fprintf(out, _("Set the function of the Ctrl-Alt-Del combination.\n")); + + fputs(USAGE_OPTIONS, out); + printf(USAGE_HELP_OPTIONS(16)); + printf(USAGE_MAN_TAIL("ctrlaltdel(8)")); + exit(EXIT_SUCCESS); +} + +static int get_cad(void) +{ + uint64_t val; + + if (ul_path_read_u64(NULL, &val, _PATH_PROC_CTRL_ALT_DEL) != 0) + err(EXIT_FAILURE, _("cannot read %s"), _PATH_PROC_CTRL_ALT_DEL); + + switch (val) { + case 0: + fputs("soft\n", stdout); + break; + case 1: + fputs("hard\n", stdout); + break; + default: + printf("%s hard\n", _("implicit")); + warnx(_("unexpected value in %s: %ju"), _PATH_PROC_CTRL_ALT_DEL, val); + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} + +static int set_cad(const char *arg) +{ + unsigned int cmd; + + if (geteuid()) { + warnx(_("You must be root to set the Ctrl-Alt-Del behavior")); + return EXIT_FAILURE; + } + if (!strcmp("hard", arg)) + cmd = LINUX_REBOOT_CMD_CAD_ON; + else if (!strcmp("soft", arg)) + cmd = LINUX_REBOOT_CMD_CAD_OFF; + else { + warnx(_("unknown argument: %s"), arg); + return EXIT_FAILURE; + } + if (reboot(cmd) < 0) { + warn("reboot"); + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} + +int main(int argc, char **argv) +{ + int ch, ret; + static const struct option longopts[] = { + {"version", no_argument, NULL, 'V'}, + {"help", no_argument, NULL, 'h'}, + {NULL, 0, NULL, 0} + }; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + while ((ch = getopt_long(argc, argv, "Vh", longopts, NULL)) != -1) + switch (ch) { + case 'V': + print_version(EXIT_SUCCESS); + case 'h': + usage(); + default: + errtryhelp(EXIT_FAILURE); + } + + if (argc < 2) + ret = get_cad(); + else + ret = set_cad(argv[1]); + return ret; +} diff --git a/sys-utils/dmesg.1 b/sys-utils/dmesg.1 new file mode 100644 index 0000000..a7f62d6 --- /dev/null +++ b/sys-utils/dmesg.1 @@ -0,0 +1,312 @@ +'\" t +.\" Title: dmesg +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: User Commands +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "DMESG" "1" "2022-05-11" "util\-linux 2.38.1" "User Commands" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +dmesg \- print or control the kernel ring buffer +.SH "SYNOPSIS" +.sp +\fBdmesg\fP [options] +.sp +\fBdmesg\fP \fB\-\-clear\fP +.sp +\fBdmesg\fP \fB\-\-read\-clear\fP [options] +.sp +\fBdmesg\fP \fB\-\-console\-level\fP \fIlevel\fP +.sp +\fBdmesg\fP \fB\-\-console\-on\fP +.sp +\fBdmesg\fP \fB\-\-console\-off\fP +.SH "DESCRIPTION" +.sp +\fBdmesg\fP is used to examine or control the kernel ring buffer. +.sp +The default action is to display all messages from the kernel ring buffer. +.SH "OPTIONS" +.sp +The \fB\-\-clear\fP, \fB\-\-read\-clear\fP, \fB\-\-console\-on\fP, \fB\-\-console\-off\fP, and \fB\-\-console\-level\fP options are mutually exclusive. +.sp +\fB\-C\fP, \fB\-\-clear\fP +.RS 4 +Clear the ring buffer. +.RE +.sp +\fB\-c\fP, \fB\-\-read\-clear\fP +.RS 4 +Clear the ring buffer after first printing its contents. +.RE +.sp +\fB\-D\fP, \fB\-\-console\-off\fP +.RS 4 +Disable the printing of messages to the console. +.RE +.sp +\fB\-d\fP, \fB\-\-show\-delta\fP +.RS 4 +Display the timestamp and the time delta spent between messages. If used together with \fB\-\-notime\fP then only the time delta without the timestamp is printed. +.RE +.sp +\fB\-E\fP, \fB\-\-console\-on\fP +.RS 4 +Enable printing messages to the console. +.RE +.sp +\fB\-e\fP, \fB\-\-reltime\fP +.RS 4 +Display the local time and the delta in human\-readable format. Be aware that conversion to the local time could be inaccurate (see \fB\-T\fP for more details). +.RE +.sp +\fB\-F\fP, \fB\-\-file\fP \fIfile\fP +.RS 4 +Read the syslog messages from the given \fIfile\fP. Note that \fB\-F\fP does not support messages in kmsg format. The old syslog format is supported only. +.RE +.sp +\fB\-f\fP, \fB\-\-facility\fP \fIlist\fP +.RS 4 +Restrict output to the given (comma\-separated) \fIlist\fP of facilities. For example: +.sp +\fBdmesg \-\-facility=daemon\fP +.sp +will print messages from system daemons only. For all supported facilities see the \fB\-\-help\fP output. +.RE +.sp +\fB\-H\fP, \fB\-\-human\fP +.RS 4 +Enable human\-readable output. See also \fB\-\-color\fP, \fB\-\-reltime\fP and \fB\-\-nopager\fP. +.RE +.sp +\fB\-J\fP, \fB\-\-json\fP +.RS 4 +Use JSON output format. The time output format is in "sec.usec" format only, log priority level is not decoded by default (use \fB\-\-decode\fP to split into facility and priority), the other options to control the output format or time format are silently ignored. +.RE +.sp +\fB\-k\fP, \fB\-\-kernel\fP +.RS 4 +Print kernel messages. +.RE +.sp +\fB\-L\fP, \fB\-\-color\fP[=\fIwhen\fP] +.RS 4 +Colorize the output. The optional argument \fIwhen\fP can be \fBauto\fP, \fBnever\fP or \fBalways\fP. If the \fIwhen\fP argument is omitted, it defaults to \fBauto\fP. The colors can be disabled; for the current built\-in default see the \fB\-\-help\fP output. See also the \fBCOLORS\fP section below. +.RE +.sp +\fB\-l\fP, \fB\-\-level\fP \fIlist\fP +.RS 4 +Restrict output to the given (comma\-separated) \fIlist\fP of levels. For example: +.sp +\fBdmesg \-\-level=err,warn\fP +.sp +will print error and warning messages only. For all supported levels see the \fB\-\-help\fP output. +.RE +.sp +\fB\-n\fP, \fB\-\-console\-level\fP \fIlevel\fP +.RS 4 +Set the \fIlevel\fP at which printing of messages is done to the console. The \fIlevel\fP is a level number or abbreviation of the level name. For all supported levels see the \fB\-\-help\fP output. +.sp +For example, \fB\-n 1\fP or \fB\-n emerg\fP prevents all messages, except emergency (panic) messages, from appearing on the console. All levels of messages are still written to \fI/proc/kmsg\fP, so \fBsyslogd\fP(8) can still be used to control exactly where kernel messages appear. When the \fB\-n\fP option is used, \fBdmesg\fP will \fInot\fP print or clear the kernel ring buffer. +.RE +.sp +\fB\-\-noescape\fP +.RS 4 +The unprintable and potentially unsafe characters (e.g., broken multi\-byte sequences, terminal controlling chars, etc.) are escaped in format \(rsx<hex> for security reason by default. This option disables this feature at all. It\(cqs usable for example for debugging purpose together with \fB\-\-raw\fP. Be careful and don\(cqt use it by default. +.RE +.sp +\fB\-P\fP, \fB\-\-nopager\fP +.RS 4 +Do not pipe output into a pager. A pager is enabled by default for \fB\-\-human\fP output. +.RE +.sp +\fB\-p\fP, \fB\-\-force\-prefix\fP +.RS 4 +Add facility, level or timestamp information to each line of a multi\-line message. +.RE +.sp +\fB\-r\fP, \fB\-\-raw\fP +.RS 4 +Print the raw message buffer, i.e., do not strip the log\-level prefixes, but all unprintable characters are still escaped (see also \fB\-\-noescape\fP). +.sp +Note that the real raw format depends on the method how \fBdmesg\fP reads kernel messages. The \fI/dev/kmsg\fP device uses a different format than \fBsyslog\fP(2). For backward compatibility, \fBdmesg\fP returns data always in the \fBsyslog\fP(2) format. It is possible to read the real raw data from \fI/dev/kmsg\fP by, for example, the command \(aqdd if=/dev/kmsg iflag=nonblock\(aq. +.RE +.sp +\fB\-S\fP, \fB\-\-syslog\fP +.RS 4 +Force \fBdmesg\fP to use the \fBsyslog\fP(2) kernel interface to read kernel messages. The default is to use \fI/dev/kmsg\fP rather than \fBsyslog\fP(2) since kernel 3.5.0. +.RE +.sp +\fB\-s\fP, \fB\-\-buffer\-size\fP \fIsize\fP +.RS 4 +Use a buffer of \fIsize\fP to query the kernel ring buffer. This is 16392 by default. (The default kernel syslog buffer size was 4096 at first, 8192 since 1.3.54, 16384 since 2.1.113.) If you have set the kernel buffer to be larger than the default, then this option can be used to view the entire buffer. +.RE +.sp +\fB\-T\fP, \fB\-\-ctime\fP +.RS 4 +Print human\-readable timestamps. +.sp +\fBBe aware that the timestamp could be inaccurate!\fP The \fBtime\fP source used for the logs is \fBnot updated after\fP system \fBSUSPEND\fP/\fBRESUME\fP. Timestamps are adjusted according to current delta between boottime and monotonic clocks, this works only for messages printed after last resume. +.RE +.sp +\fB\-\-since\fP \fItime\fP +.RS 4 +Display record since the specified time. The time is possible to specify in absolute way as well as by relative notation (e.g. \(aq1 hour ago\(aq). Be aware that the timestamp could be inaccurate and see \fB\-\-ctime\fP for more details. +.RE +.sp +\fB\-\-until\fP \fItime\fP +.RS 4 +Display record until the specified time. The time is possible to specify in absolute way as well as by relative notation (e.g. \(aq1 hour ago\(aq). Be aware that the timestamp could be inaccurate and see \fB\-\-ctime\fP for more details. +.RE +.sp +\fB\-t\fP, \fB\-\-notime\fP +.RS 4 +Do not print kernel\(cqs timestamps. +.RE +.sp +\fB\-\-time\-format\fP \fIformat\fP +.RS 4 +Print timestamps using the given \fIformat\fP, which can be \fBctime\fP, \fBreltime\fP, \fBdelta\fP or \fBiso\fP. The first three formats are aliases of the time\-format\-specific options. The \fBiso\fP format is a \fBdmesg\fP implementation of the ISO\-8601 timestamp format. The purpose of this format is to make the comparing of timestamps between two systems, and any other parsing, easy. The definition of the \fBiso\fP timestamp is: YYYY\-MM\-DD<T>HH:MM:SS,<microseconds>\(<-+><timezone offset from UTC>. +.sp +The \fBiso\fP format has the same issue as \fBctime\fP: the time may be inaccurate when a system is suspended and resumed. +.RE +.sp +\fB\-u\fP, \fB\-\-userspace\fP +.RS 4 +Print userspace messages. +.RE +.sp +\fB\-w\fP, \fB\-\-follow\fP +.RS 4 +Wait for new messages. This feature is supported only on systems with a readable \fI/dev/kmsg\fP (since kernel 3.5.0). +.RE +.sp +\fB\-W\fP, \fB\-\-follow\-new\fP +.RS 4 +Wait and print only new messages. +.RE +.sp +\fB\-x\fP, \fB\-\-decode\fP +.RS 4 +Decode facility and level (priority) numbers to human\-readable prefixes. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "COLORS" +.sp +The output colorization is implemented by \fBterminal\-colors.d\fP(5) functionality. +Implicit coloring can be disabled by an empty file +.RS 3 +.ll -.6i +.sp +\fI/etc/terminal\-colors.d/dmesg.disable\fP +.br +.RE +.ll +.sp +for the \fBdmesg\fP command or for all tools by +.RS 3 +.ll -.6i +.sp +\fI/etc/terminal\-colors.d/disable\fP +.br +.RE +.ll +.sp +The user\-specific \fI$XDG_CONFIG_HOME/terminal\-colors.d\fP +or \fI$HOME/.config/terminal\-colors.d\fP overrides the global setting. +.sp +Note that the output colorization may be enabled by default, and in this case +\fIterminal\-colors.d\fP directories do not have to exist yet. +.sp +The logical color names supported by \fBdmesg\fP are: +.sp +\fBsubsys\fP +.RS 4 +The message sub\-system prefix (e.g., "ACPI:"). +.RE +.sp +\fBtime\fP +.RS 4 +The message timestamp. +.RE +.sp +\fBtimebreak\fP +.RS 4 +The message timestamp in short ctime format in \fB\-\-reltime\fP or \fB\-\-human\fP output. +.RE +.sp +\fBalert\fP +.RS 4 +The text of the message with the alert log priority. +.RE +.sp +\fBcrit\fP +.RS 4 +The text of the message with the critical log priority. +.RE +.sp +\fBerr\fP +.RS 4 +The text of the message with the error log priority. +.RE +.sp +\fBwarn\fP +.RS 4 +The text of the message with the warning log priority. +.RE +.sp +\fBsegfault\fP +.RS 4 +The text of the message that inform about segmentation fault. +.RE +.SH "EXIT STATUS" +.sp +\fBdmesg\fP can fail reporting permission denied error. This is usually caused by \fBdmesg_restrict\fP kernel setting, please see \fBsyslog\fP(2) for more details. +.SH "AUTHORS" +.sp +.MTO "kzak\(atredhat.com" "Karel Zak" "" +.sp +\fBdmesg\fP was originally written by \c +.MTO "tytso\(atathena.mit.edu" "Theodore Ts\(cqo" "." +.SH "SEE ALSO" +.sp +\fBterminal\-colors.d\fP(5), +\fBsyslogd\fP(8) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBdmesg\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/dmesg.1.adoc b/sys-utils/dmesg.1.adoc new file mode 100644 index 0000000..9072f22 --- /dev/null +++ b/sys-utils/dmesg.1.adoc @@ -0,0 +1,194 @@ +//po4a: entry man manual +//// +Copyright 1993 Rickard E. Faith (faith@cs.unc.edu) +May be distributed under the GNU General Public License +//// += dmesg(1) +:doctype: manpage +:man manual: User Commands +:man source: util-linux {release-version} +:page-layout: base +:command: dmesg + +== NAME + +dmesg - print or control the kernel ring buffer + +== SYNOPSIS + +*dmesg* [options] + +*dmesg* *--clear* + +*dmesg* *--read-clear* [options] + +*dmesg* *--console-level* _level_ + +*dmesg* *--console-on* + +*dmesg* *--console-off* + +== DESCRIPTION + +*dmesg* is used to examine or control the kernel ring buffer. + +The default action is to display all messages from the kernel ring buffer. + +== OPTIONS + +The *--clear*, *--read-clear*, *--console-on*, *--console-off*, and *--console-level* options are mutually exclusive. + +*-C*, *--clear*:: +Clear the ring buffer. + +*-c*, *--read-clear*:: +Clear the ring buffer after first printing its contents. + +*-D*, *--console-off*:: +Disable the printing of messages to the console. + +*-d*, *--show-delta*:: +Display the timestamp and the time delta spent between messages. If used together with *--notime* then only the time delta without the timestamp is printed. + +*-E*, *--console-on*:: +Enable printing messages to the console. + +*-e*, *--reltime*:: +Display the local time and the delta in human-readable format. Be aware that conversion to the local time could be inaccurate (see *-T* for more details). + +*-F*, *--file* _file_:: +Read the syslog messages from the given _file_. Note that *-F* does not support messages in kmsg format. The old syslog format is supported only. + +*-f*, *--facility* _list_:: +Restrict output to the given (comma-separated) _list_ of facilities. For example: ++ +*dmesg --facility=daemon* ++ +will print messages from system daemons only. For all supported facilities see the *--help* output. + +*-H*, *--human*:: +Enable human-readable output. See also *--color*, *--reltime* and *--nopager*. + +*-J*, *--json*:: +Use JSON output format. The time output format is in "sec.usec" format only, log priority level is not decoded by default (use *--decode* to split into facility and priority), the other options to control the output format or time format are silently ignored. + +*-k*, *--kernel*:: +Print kernel messages. + +*-L*, *--color*[=_when_]:: +Colorize the output. The optional argument _when_ can be *auto*, *never* or *always*. If the _when_ argument is omitted, it defaults to *auto*. The colors can be disabled; for the current built-in default see the *--help* output. See also the *COLORS* section below. + +*-l*, *--level* _list_:: +Restrict output to the given (comma-separated) _list_ of levels. For example: ++ +*dmesg --level=err,warn* ++ +will print error and warning messages only. For all supported levels see the *--help* output. + +*-n*, *--console-level* _level_:: +Set the _level_ at which printing of messages is done to the console. The _level_ is a level number or abbreviation of the level name. For all supported levels see the *--help* output. ++ +For example, *-n 1* or *-n emerg* prevents all messages, except emergency (panic) messages, from appearing on the console. All levels of messages are still written to _/proc/kmsg_, so *syslogd*(8) can still be used to control exactly where kernel messages appear. When the *-n* option is used, *dmesg* will _not_ print or clear the kernel ring buffer. + +*--noescape*:: +The unprintable and potentially unsafe characters (e.g., broken multi-byte sequences, terminal controlling chars, etc.) are escaped in format \x<hex> for security reason by default. This option disables this feature at all. It's usable for example for debugging purpose together with *--raw*. Be careful and don't use it by default. + +*-P*, *--nopager*:: +Do not pipe output into a pager. A pager is enabled by default for *--human* output. + +*-p*, *--force-prefix*:: +Add facility, level or timestamp information to each line of a multi-line message. + +*-r*, *--raw*:: +Print the raw message buffer, i.e., do not strip the log-level prefixes, but all unprintable characters are still escaped (see also *--noescape*). ++ +Note that the real raw format depends on the method how *dmesg* reads kernel messages. The _/dev/kmsg_ device uses a different format than *syslog*(2). For backward compatibility, *dmesg* returns data always in the *syslog*(2) format. It is possible to read the real raw data from _/dev/kmsg_ by, for example, the command 'dd if=/dev/kmsg iflag=nonblock'. + +*-S*, *--syslog*:: +Force *dmesg* to use the *syslog*(2) kernel interface to read kernel messages. The default is to use _/dev/kmsg_ rather than *syslog*(2) since kernel 3.5.0. + +*-s*, *--buffer-size* _size_:: +Use a buffer of _size_ to query the kernel ring buffer. This is 16392 by default. (The default kernel syslog buffer size was 4096 at first, 8192 since 1.3.54, 16384 since 2.1.113.) If you have set the kernel buffer to be larger than the default, then this option can be used to view the entire buffer. + +*-T*, *--ctime*:: +Print human-readable timestamps. ++ +*Be aware that the timestamp could be inaccurate!* The *time* source used for the logs is *not updated after* system *SUSPEND*/*RESUME*. Timestamps are adjusted according to current delta between boottime and monotonic clocks, this works only for messages printed after last resume. + +*--since* _time_:: +Display record since the specified time. The time is possible to specify in absolute way as well as by relative notation (e.g. '1 hour ago'). Be aware that the timestamp could be inaccurate and see *--ctime* for more details. + +*--until* _time_:: +Display record until the specified time. The time is possible to specify in absolute way as well as by relative notation (e.g. '1 hour ago'). Be aware that the timestamp could be inaccurate and see *--ctime* for more details. + +*-t*, *--notime*:: +Do not print kernel's timestamps. + +*--time-format* _format_:: +Print timestamps using the given _format_, which can be *ctime*, *reltime*, *delta* or *iso*. The first three formats are aliases of the time-format-specific options. The *iso* format is a *dmesg* implementation of the ISO-8601 timestamp format. The purpose of this format is to make the comparing of timestamps between two systems, and any other parsing, easy. The definition of the *iso* timestamp is: YYYY-MM-DD<T>HH:MM:SS,<microseconds><-+><timezone offset from UTC>. ++ +The *iso* format has the same issue as *ctime*: the time may be inaccurate when a system is suspended and resumed. + +*-u*, *--userspace*:: +Print userspace messages. + +*-w*, *--follow*:: +Wait for new messages. This feature is supported only on systems with a readable _/dev/kmsg_ (since kernel 3.5.0). + +*-W*, *--follow-new*:: +Wait and print only new messages. + +*-x*, *--decode*:: +Decode facility and level (priority) numbers to human-readable prefixes. + +include::man-common/help-version.adoc[] + +include::man-common/colors.adoc[] +The logical color names supported by *dmesg* are: + +*subsys*:: +The message sub-system prefix (e.g., "ACPI:"). + +*time*:: +The message timestamp. + +*timebreak*:: +The message timestamp in short ctime format in *--reltime* or *--human* output. + +*alert*:: +The text of the message with the alert log priority. + +*crit*:: +The text of the message with the critical log priority. + +*err*:: +The text of the message with the error log priority. + +*warn*:: +The text of the message with the warning log priority. + +*segfault*:: +The text of the message that inform about segmentation fault. + +== EXIT STATUS + +*dmesg* can fail reporting permission denied error. This is usually caused by *dmesg_restrict* kernel setting, please see *syslog*(2) for more details. + +== AUTHORS + +mailto:kzak@redhat.com[Karel Zak] + +*dmesg* was originally written by mailto:tytso@athena.mit.edu[Theodore Ts'o]. + +== SEE ALSO + +*terminal-colors.d*(5), +*syslogd*(8) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/dmesg.c b/sys-utils/dmesg.c new file mode 100644 index 0000000..0179362 --- /dev/null +++ b/sys-utils/dmesg.c @@ -0,0 +1,1683 @@ +/* + * dmesg.c -- Print out the contents of the kernel ring buffer + * + * Copyright (C) 1993 Theodore Ts'o <tytso@athena.mit.edu> + * Copyright (C) 2011 Karel Zak <kzak@redhat.com> + * + * This program comes with ABSOLUTELY NO WARRANTY. + */ +#include <stdio.h> +#include <getopt.h> +#include <stdlib.h> +#include <sys/klog.h> +#include <sys/syslog.h> +#include <sys/time.h> +#include <sys/sysinfo.h> +#include <ctype.h> +#include <time.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <fcntl.h> + +#include "c.h" +#include "colors.h" +#include "nls.h" +#include "strutils.h" +#include "xalloc.h" +#include "widechar.h" +#include "all-io.h" +#include "bitops.h" +#include "closestream.h" +#include "optutils.h" +#include "timeutils.h" +#include "monotonic.h" +#include "mangle.h" +#include "pager.h" +#include "jsonwrt.h" + +/* Close the log. Currently a NOP. */ +#define SYSLOG_ACTION_CLOSE 0 +/* Open the log. Currently a NOP. */ +#define SYSLOG_ACTION_OPEN 1 +/* Read from the log. */ +#define SYSLOG_ACTION_READ 2 +/* Read all messages remaining in the ring buffer. (allowed for non-root) */ +#define SYSLOG_ACTION_READ_ALL 3 +/* Read and clear all messages remaining in the ring buffer */ +#define SYSLOG_ACTION_READ_CLEAR 4 +/* Clear ring buffer. */ +#define SYSLOG_ACTION_CLEAR 5 +/* Disable printk's to console */ +#define SYSLOG_ACTION_CONSOLE_OFF 6 +/* Enable printk's to console */ +#define SYSLOG_ACTION_CONSOLE_ON 7 +/* Set level of messages printed to console */ +#define SYSLOG_ACTION_CONSOLE_LEVEL 8 +/* Return number of unread characters in the log buffer */ +#define SYSLOG_ACTION_SIZE_UNREAD 9 +/* Return size of the log buffer */ +#define SYSLOG_ACTION_SIZE_BUFFER 10 + +/* + * Color scheme + */ +struct dmesg_color { + const char *scheme; /* name used in termina-colors.d/dmesg.scheme */ + const char *dflt; /* default color ESC sequence */ +}; + +enum { + DMESG_COLOR_SUBSYS, + DMESG_COLOR_TIME, + DMESG_COLOR_TIMEBREAK, + DMESG_COLOR_ALERT, + DMESG_COLOR_CRIT, + DMESG_COLOR_ERR, + DMESG_COLOR_WARN, + DMESG_COLOR_SEGFAULT +}; + +static const struct dmesg_color colors[] = +{ + [DMESG_COLOR_SUBSYS] = { "subsys", UL_COLOR_BROWN }, + [DMESG_COLOR_TIME] = { "time", UL_COLOR_GREEN }, + [DMESG_COLOR_TIMEBREAK] = { "timebreak",UL_COLOR_GREEN UL_COLOR_BOLD }, + [DMESG_COLOR_ALERT] = { "alert", UL_COLOR_REVERSE UL_COLOR_RED }, + [DMESG_COLOR_CRIT] = { "crit", UL_COLOR_BOLD UL_COLOR_RED }, + [DMESG_COLOR_ERR] = { "err", UL_COLOR_RED }, + [DMESG_COLOR_WARN] = { "warn", UL_COLOR_BOLD }, + [DMESG_COLOR_SEGFAULT] = { "segfault", UL_COLOR_HALFBRIGHT UL_COLOR_RED } +}; + +#define dmesg_enable_color(_id) \ + color_scheme_enable(colors[_id].scheme, colors[_id].dflt); + +/* + * Priority and facility names + */ +struct dmesg_name { + const char *name; + const char *help; +}; + +/* + * Priority names -- based on sys/syslog.h + */ +static const struct dmesg_name level_names[] = +{ + [LOG_EMERG] = { "emerg", N_("system is unusable") }, + [LOG_ALERT] = { "alert", N_("action must be taken immediately") }, + [LOG_CRIT] = { "crit", N_("critical conditions") }, + [LOG_ERR] = { "err", N_("error conditions") }, + [LOG_WARNING] = { "warn", N_("warning conditions") }, + [LOG_NOTICE] = { "notice",N_("normal but significant condition") }, + [LOG_INFO] = { "info", N_("informational") }, + [LOG_DEBUG] = { "debug", N_("debug-level messages") } +}; + +/* + * sys/syslog.h uses (f << 3) for all facility codes. + * We want to use the codes as array indexes, so shift back... + * + * Note that libc LOG_FAC() macro returns the base codes, not the + * shifted code :-) + */ +#define FAC_BASE(f) ((f) >> 3) + +static const struct dmesg_name facility_names[] = +{ + [FAC_BASE(LOG_KERN)] = { "kern", N_("kernel messages") }, + [FAC_BASE(LOG_USER)] = { "user", N_("random user-level messages") }, + [FAC_BASE(LOG_MAIL)] = { "mail", N_("mail system") }, + [FAC_BASE(LOG_DAEMON)] = { "daemon", N_("system daemons") }, + [FAC_BASE(LOG_AUTH)] = { "auth", N_("security/authorization messages") }, + [FAC_BASE(LOG_SYSLOG)] = { "syslog", N_("messages generated internally by syslogd") }, + [FAC_BASE(LOG_LPR)] = { "lpr", N_("line printer subsystem") }, + [FAC_BASE(LOG_NEWS)] = { "news", N_("network news subsystem") }, + [FAC_BASE(LOG_UUCP)] = { "uucp", N_("UUCP subsystem") }, + [FAC_BASE(LOG_CRON)] = { "cron", N_("clock daemon") }, + [FAC_BASE(LOG_AUTHPRIV)] = { "authpriv", N_("security/authorization messages (private)") }, + [FAC_BASE(LOG_FTP)] = { "ftp", N_("FTP daemon") }, +}; + +/* supported methods to read message buffer + */ +enum { + DMESG_METHOD_KMSG, /* read messages from /dev/kmsg (default) */ + DMESG_METHOD_SYSLOG, /* klogctl() buffer */ + DMESG_METHOD_MMAP /* mmap file with records (see --file) */ +}; + +enum { + DMESG_TIMEFTM_NONE = 0, + DMESG_TIMEFTM_CTIME, /* [ctime] */ + DMESG_TIMEFTM_CTIME_DELTA, /* [ctime <delta>] */ + DMESG_TIMEFTM_DELTA, /* [<delta>] */ + DMESG_TIMEFTM_RELTIME, /* [relative] */ + DMESG_TIMEFTM_TIME, /* [time] */ + DMESG_TIMEFTM_TIME_DELTA, /* [time <delta>] */ + DMESG_TIMEFTM_ISO8601 /* 2013-06-13T22:11:00,123456+0100 */ +}; +#define is_timefmt(c, f) ((c)->time_fmt == (DMESG_TIMEFTM_ ##f)) + +struct dmesg_control { + /* bit arrays -- see include/bitops.h */ + char levels[ARRAY_SIZE(level_names) / NBBY + 1]; + char facilities[ARRAY_SIZE(facility_names) / NBBY + 1]; + + struct timeval lasttime; /* last printed timestamp */ + struct tm lasttm; /* last localtime */ + struct timeval boot_time; /* system boot time */ + time_t suspended_time; /* time spent in suspended state */ + + int action; /* SYSLOG_ACTION_* */ + int method; /* DMESG_METHOD_* */ + + size_t bufsize; /* size of syslog buffer */ + + int kmsg; /* /dev/kmsg file descriptor */ + ssize_t kmsg_first_read;/* initial read() return code */ + char kmsg_buf[BUFSIZ];/* buffer to read kmsg data */ + + time_t since; /* filter records by time */ + time_t until; /* filter records by time */ + + /* + * For the --file option we mmap whole file. The unnecessary (already + * printed) pages are always unmapped. The result is that we have in + * memory only the currently used page(s). + */ + char *filename; + char *mmap_buff; + size_t pagesize; + unsigned int time_fmt; /* time format */ + + struct ul_jsonwrt jfmt; /* -J formatting */ + + unsigned int follow:1, /* wait for new messages */ + end:1, /* seek to the of buffer */ + raw:1, /* raw mode */ + noesc:1, /* no escape */ + fltr_lev:1, /* filter out by levels[] */ + fltr_fac:1, /* filter out by facilities[] */ + decode:1, /* use "facility: level: " prefix */ + pager:1, /* pipe output into a pager */ + color:1, /* colorize messages */ + json:1, /* JSON output */ + force_prefix:1; /* force timestamp and decode prefix + on each line */ + int indent; /* due to timestamps if newline */ +}; + +struct dmesg_record { + const char *mesg; + size_t mesg_size; + + int level; + int facility; + struct timeval tv; + + const char *next; /* buffer with next unparsed record */ + size_t next_size; /* size of the next buffer */ +}; + +#define INIT_DMESG_RECORD(_r) do { \ + (_r)->mesg = NULL; \ + (_r)->mesg_size = 0; \ + (_r)->facility = -1; \ + (_r)->level = -1; \ + (_r)->tv.tv_sec = 0; \ + (_r)->tv.tv_usec = 0; \ + } while (0) + +static int read_kmsg(struct dmesg_control *ctl); + +static int set_level_color(int log_level, const char *mesg, size_t mesgsz) +{ + int id = -1; + + switch (log_level) { + case LOG_ALERT: + id = DMESG_COLOR_ALERT; + break; + case LOG_CRIT: + id = DMESG_COLOR_CRIT; + break; + case LOG_ERR: + id = DMESG_COLOR_ERR; + break; + case LOG_WARNING: + id = DMESG_COLOR_WARN; + break; + default: + break; + } + + /* well, sometimes the messages contains important keywords, but in + * non-warning/error messages + */ + if (id < 0 && memmem(mesg, mesgsz, "segfault at", 11)) + id = DMESG_COLOR_SEGFAULT; + + if (id >= 0) + dmesg_enable_color(id); + + return id >= 0 ? 0 : -1; +} + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + size_t i; + + fputs(USAGE_HEADER, out); + fprintf(out, _(" %s [options]\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Display or control the kernel ring buffer.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -C, --clear clear the kernel ring buffer\n"), out); + fputs(_(" -c, --read-clear read and clear all messages\n"), out); + fputs(_(" -D, --console-off disable printing messages to console\n"), out); + fputs(_(" -E, --console-on enable printing messages to console\n"), out); + fputs(_(" -F, --file <file> use the file instead of the kernel log buffer\n"), out); + fputs(_(" -f, --facility <list> restrict output to defined facilities\n"), out); + fputs(_(" -H, --human human readable output\n"), out); + fputs(_(" -J, --json use JSON output format\n"), out); + fputs(_(" -k, --kernel display kernel messages\n"), out); + fprintf(out, + _(" -L, --color[=<when>] colorize messages (%s, %s or %s)\n"), "auto", "always", "never"); + fprintf(out, + " %s\n", USAGE_COLORS_DEFAULT); + fputs(_(" -l, --level <list> restrict output to defined levels\n"), out); + fputs(_(" -n, --console-level <level> set level of messages printed to console\n"), out); + fputs(_(" -P, --nopager do not pipe output into a pager\n"), out); + fputs(_(" -p, --force-prefix force timestamp output on each line of multi-line messages\n"), out); + fputs(_(" -r, --raw print the raw message buffer\n"), out); + fputs(_(" --noescape don't escape unprintable character\n"), out); + fputs(_(" -S, --syslog force to use syslog(2) rather than /dev/kmsg\n"), out); + fputs(_(" -s, --buffer-size <size> buffer size to query the kernel ring buffer\n"), out); + fputs(_(" -u, --userspace display userspace messages\n"), out); + fputs(_(" -w, --follow wait for new messages\n"), out); + fputs(_(" -W, --follow-new wait and print only new messages\n"), out); + fputs(_(" -x, --decode decode facility and level to readable string\n"), out); + fputs(_(" -d, --show-delta show time delta between printed messages\n"), out); + fputs(_(" -e, --reltime show local time and time delta in readable format\n"), out); + fputs(_(" -T, --ctime show human-readable timestamp (may be inaccurate!)\n"), out); + fputs(_(" -t, --notime don't show any timestamp with messages\n"), out); + fputs(_(" --time-format <format> show timestamp using the given format:\n" + " [delta|reltime|ctime|notime|iso]\n" + "Suspending/resume will make ctime and iso timestamps inaccurate.\n"), out); + fputs(_(" --since <time> display the lines since the specified time\n"), out); + fputs(_(" --until <time> display the lines until the specified time\n"), out); + + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(29)); + fputs(_("\nSupported log facilities:\n"), out); + for (i = 0; i < ARRAY_SIZE(level_names); i++) + fprintf(out, " %7s - %s\n", + facility_names[i].name, + _(facility_names[i].help)); + + fputs(_("\nSupported log levels (priorities):\n"), out); + for (i = 0; i < ARRAY_SIZE(level_names); i++) + fprintf(out, " %7s - %s\n", + level_names[i].name, + _(level_names[i].help)); + + printf(USAGE_MAN_TAIL("dmesg(1)")); + exit(EXIT_SUCCESS); +} + +/* + * LEVEL ::= <number> | <name> + * <number> ::= @len is set: number in range <0..N>, where N < ARRAY_SIZE(level_names) + * ::= @len not set: number in range <1..N>, where N <= ARRAY_SIZE(level_names) + * <name> ::= case-insensitive text + * + * Note that @len argument is not set when parsing "-n <level>" command line + * option. The console_level is interpreted as "log level less than the value". + * + * For example "dmesg -n 8" or "dmesg -n debug" enables debug console log + * level by klogctl(SYSLOG_ACTION_CONSOLE_LEVEL, NULL, 8). The @str argument + * has to be parsed to number in range <1..8>. + */ +static int parse_level(const char *str, size_t len) +{ + int offset = 0; + + if (!str) + return -1; + if (!len) { + len = strlen(str); + offset = 1; + } + errno = 0; + + if (isdigit(*str)) { + char *end = NULL; + long x = strtol(str, &end, 10) - offset; + + if (!errno && end && end > str && (size_t) (end - str) == len && + x >= 0 && (size_t) x < ARRAY_SIZE(level_names)) + return x + offset; + } else { + size_t i; + + for (i = 0; i < ARRAY_SIZE(level_names); i++) { + const char *n = level_names[i].name; + + if (strncasecmp(str, n, len) == 0 && *(n + len) == '\0') + return i + offset; + } + } + + if (errno) + err(EXIT_FAILURE, _("failed to parse level '%s'"), str); + + errx(EXIT_FAILURE, _("unknown level '%s'"), str); + return -1; +} + +/* + * FACILITY ::= <number> | <name> + * <number> ::= number in range <0..N>, where N < ARRAY_SIZE(facility_names) + * <name> ::= case-insensitive text + */ +static int parse_facility(const char *str, size_t len) +{ + if (!str) + return -1; + if (!len) + len = strlen(str); + errno = 0; + + if (isdigit(*str)) { + char *end = NULL; + long x = strtol(str, &end, 10); + + if (!errno && end && end > str && (size_t) (end - str) == len && + x >= 0 && (size_t) x < ARRAY_SIZE(facility_names)) + return x; + } else { + size_t i; + + for (i = 0; i < ARRAY_SIZE(facility_names); i++) { + const char *n = facility_names[i].name; + + if (strncasecmp(str, n, len) == 0 && *(n + len) == '\0') + return i; + } + } + + if (errno) + err(EXIT_FAILURE, _("failed to parse facility '%s'"), str); + + errx(EXIT_FAILURE, _("unknown facility '%s'"), str); + return -1; +} + +/* + * Parses numerical prefix used for all messages in kernel ring buffer. + * + * Priorities/facilities are encoded into a single 32-bit quantity, where the + * bottom 3 bits are the priority (0-7) and the top 28 bits are the facility + * (0-big number). + * + * Note that the number has to end with '>' or ',' char. + */ +static const char *parse_faclev(const char *str, int *fac, int *lev) +{ + long num; + char *end = NULL; + + if (!str) + return str; + + errno = 0; + num = strtol(str, &end, 10); + + if (!errno && end && end > str) { + *fac = LOG_FAC(num); + *lev = LOG_PRI(num); + + if (*lev < 0 || (size_t) *lev > ARRAY_SIZE(level_names)) + *lev = -1; + if (*fac < 0 || (size_t) *fac > ARRAY_SIZE(facility_names)) + *fac = -1; + return end + 1; /* skip '<' or ',' */ + } + + return str; +} + +/* + * Parses timestamp from syslog message prefix, expected format: + * + * seconds.microseconds] + * + * the ']' is the timestamp field terminator. + */ +static const char *parse_syslog_timestamp(const char *str0, struct timeval *tv) +{ + const char *str = str0; + char *end = NULL; + + if (!str0) + return str0; + + errno = 0; + tv->tv_sec = strtol(str, &end, 10); + + if (!errno && end && *end == '.' && *(end + 1)) { + str = end + 1; + end = NULL; + tv->tv_usec = strtol(str, &end, 10); + } + if (errno || !end || end == str || *end != ']') + return str0; + + return end + 1; /* skip ']' */ +} + +/* + * Parses timestamp from /dev/kmsg, expected formats: + * + * microseconds, + * microseconds; + * + * the ',' is fields separators and ';' items terminator (for the last item) + */ +static const char *parse_kmsg_timestamp(const char *str0, struct timeval *tv) +{ + const char *str = str0; + char *end = NULL; + uint64_t usec; + + if (!str0) + return str0; + + errno = 0; + usec = strtoumax(str, &end, 10); + + if (!errno && end && (*end == ';' || *end == ',')) { + tv->tv_usec = usec % 1000000; + tv->tv_sec = usec / 1000000; + } else + return str0; + + return end + 1; /* skip separator */ +} + + +static double time_diff(struct timeval *a, struct timeval *b) +{ + return (a->tv_sec - b->tv_sec) + (a->tv_usec - b->tv_usec) / 1E6; +} + +static int get_syslog_buffer_size(void) +{ + int n = klogctl(SYSLOG_ACTION_SIZE_BUFFER, NULL, 0); + + return n > 0 ? n : 0; +} + +/* + * Reads messages from regular file by mmap + */ +static ssize_t mmap_file_buffer(struct dmesg_control *ctl, char **buf) +{ + struct stat st; + int fd; + + if (!ctl->filename) + return -1; + + fd = open(ctl->filename, O_RDONLY); + if (fd < 0) + err(EXIT_FAILURE, _("cannot open %s"), ctl->filename); + if (fstat(fd, &st)) + err(EXIT_FAILURE, _("stat of %s failed"), ctl->filename); + + *buf = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0); + if (*buf == MAP_FAILED) + err(EXIT_FAILURE, _("cannot mmap: %s"), ctl->filename); + ctl->mmap_buff = *buf; + ctl->pagesize = getpagesize(); + close(fd); + + return st.st_size; +} + +/* + * Reads messages from kernel ring buffer by klogctl() + */ +static ssize_t read_syslog_buffer(struct dmesg_control *ctl, char **buf) +{ + size_t sz; + int rc = -1; + + if (ctl->bufsize) { + sz = ctl->bufsize + 8; + *buf = xmalloc(sz * sizeof(char)); + rc = klogctl(SYSLOG_ACTION_READ_ALL, *buf, sz); + } else { + sz = 16392; + while (1) { + *buf = xmalloc(sz * sizeof(char)); + rc = klogctl(SYSLOG_ACTION_READ_ALL, *buf, sz); + if (rc < 0) + break; + if ((size_t) rc != sz || sz > (1 << 28)) + break; + free(*buf); + *buf = NULL; + sz *= 4; + } + } + + return rc; +} + +/* + * Top level function to read messages + */ +static ssize_t read_buffer(struct dmesg_control *ctl, char **buf) +{ + ssize_t n = -1; + + switch (ctl->method) { + case DMESG_METHOD_MMAP: + n = mmap_file_buffer(ctl, buf); + break; + case DMESG_METHOD_SYSLOG: + if (!ctl->bufsize) + ctl->bufsize = get_syslog_buffer_size(); + + n = read_syslog_buffer(ctl, buf); + break; + case DMESG_METHOD_KMSG: + /* + * Since kernel 3.5.0 + */ + n = read_kmsg(ctl); + break; + default: + abort(); /* impossible method -> drop core */ + } + + return n; +} + +static int fwrite_hex(const char *buf, size_t size, FILE *out) +{ + size_t i; + + for (i = 0; i < size; i++) { + int rc = fprintf(out, "\\x%02hhx", buf[i]); + if (rc < 0) + return rc; + } + return 0; +} + +/* + * Prints to 'out' and non-printable chars are replaced with \x<hex> sequences. + */ +static void safe_fwrite(struct dmesg_control *ctl, const char *buf, size_t size, int indent, FILE *out) +{ + size_t i; +#ifdef HAVE_WIDECHAR + mbstate_t s; + wchar_t wc; + memset(&s, 0, sizeof (s)); +#endif + for (i = 0; i < size; i++) { + const char *p = buf + i; + int rc, hex = 0; + size_t len = 1; + + if (!ctl->noesc) { + if (*p == '\0') { + hex = 1; + goto doprint; + } +#ifdef HAVE_WIDECHAR + len = mbrtowc(&wc, p, size - i, &s); + + if (len == 0) /* L'\0' */ + return; + + if (len == (size_t)-1 || len == (size_t)-2) { /* invalid sequence */ + memset(&s, 0, sizeof (s)); + len = hex = 1; + i += len - 1; + } else if (len > 1) { + if (!iswprint(wc) && !iswspace(wc)) /* non-printable multibyte */ + hex = 1; + i += len - 1; + } else +#endif + { + len = 1; + if (!isprint((unsigned char) *p) && + !isspace((unsigned char) *p)) /* non-printable */ + hex = 1; + } + } + +doprint: + if (hex) + rc = fwrite_hex(p, len, out); + else if (*p == '\n' && *(p + 1) && indent) { + rc = fwrite(p, 1, len, out) != len; + if (fprintf(out, "%*s", indent, "") != indent) + rc |= 1; + } else + rc = fwrite(p, 1, len, out) != len; + + if (rc != 0) { + if (errno != EPIPE) + err(EXIT_FAILURE, _("write failed")); + exit(EXIT_SUCCESS); + } + } +} + +static const char *skip_item(const char *begin, const char *end, const char *sep) +{ + while (begin < end) { + int c = *begin++; + + if (c == '\0' || strchr(sep, c)) + break; + } + + return begin; +} + +/* + * Parses one record from syslog(2) buffer + */ +static int get_next_syslog_record(struct dmesg_control *ctl, + struct dmesg_record *rec) +{ + size_t i; + const char *begin = NULL; + + if (ctl->method != DMESG_METHOD_MMAP && + ctl->method != DMESG_METHOD_SYSLOG) + return -1; + + if (!rec->next || !rec->next_size) + return 1; + + INIT_DMESG_RECORD(rec); + + /* + * Unmap already printed file data from memory + */ + if (ctl->mmap_buff && (size_t) (rec->next - ctl->mmap_buff) > ctl->pagesize) { + void *x = ctl->mmap_buff; + + ctl->mmap_buff += ctl->pagesize; + munmap(x, ctl->pagesize); + } + + for (i = 0; i < rec->next_size; i++) { + const char *p = rec->next + i; + const char *end = NULL; + + if (!begin) + begin = p; + if (i + 1 == rec->next_size) { + end = p + 1; + i++; + } else if (*p == '\n' && *(p + 1) == '<') + end = p; + + if (begin && !*begin) + begin = NULL; /* zero(s) at the end of the buffer? */ + if (!begin || !end) + continue; + if (end <= begin) + continue; /* error or empty line? */ + + if (*begin == '<') { + if (ctl->fltr_lev || ctl->fltr_fac || ctl->decode || ctl->color || ctl->json) + begin = parse_faclev(begin + 1, &rec->facility, + &rec->level); + else + begin = skip_item(begin, end, ">"); + } + + if (*begin == '[' && (*(begin + 1) == ' ' || + isdigit(*(begin + 1)))) { + + if (!is_timefmt(ctl, NONE)) + begin = parse_syslog_timestamp(begin + 1, &rec->tv); + else + begin = skip_item(begin, end, "]"); + + if (begin < end && *begin == ' ') + begin++; + } + + rec->mesg = begin; + rec->mesg_size = end - begin; + + /* Don't count \n from the last message to the message size */ + if (*end != '\n' && *(end - 1) == '\n') + rec->mesg_size--; + + rec->next_size -= end - rec->next; + rec->next = rec->next_size > 0 ? end + 1 : NULL; + if (rec->next_size > 0) + rec->next_size--; + + return 0; + } + + return 1; +} + +static time_t record_time(struct dmesg_control *ctl, struct dmesg_record *rec) +{ + return ctl->boot_time.tv_sec + ctl->suspended_time + rec->tv.tv_sec; +} + +static int accept_record(struct dmesg_control *ctl, struct dmesg_record *rec) +{ + if (ctl->fltr_lev && (rec->facility < 0 || + !isset(ctl->levels, rec->level))) + return 0; + + if (ctl->fltr_fac && (rec->facility < 0 || + !isset(ctl->facilities, rec->facility))) + return 0; + + if (ctl->since && ctl->since >= record_time(ctl, rec)) + return 0; + + if (ctl->until && ctl->until <= record_time(ctl, rec)) + return 0; + + return 1; +} + +static void raw_print(struct dmesg_control *ctl, const char *buf, size_t size) +{ + int lastc = '\n'; + + if (!ctl->mmap_buff) { + /* + * Print whole ring buffer + */ + safe_fwrite(ctl, buf, size, 0, stdout); + lastc = buf[size - 1]; + } else { + /* + * Print file in small chunks to save memory + */ + while (size) { + size_t sz = size > ctl->pagesize ? ctl->pagesize : size; + char *x = ctl->mmap_buff; + + safe_fwrite(ctl, x, sz, 0, stdout); + lastc = x[sz - 1]; + size -= sz; + ctl->mmap_buff += sz; + munmap(x, sz); + } + } + + if (lastc != '\n') + putchar('\n'); +} + +static struct tm *record_localtime(struct dmesg_control *ctl, + struct dmesg_record *rec, + struct tm *tm) +{ + time_t t = record_time(ctl, rec); + return localtime_r(&t, tm); +} + +static char *record_ctime(struct dmesg_control *ctl, + struct dmesg_record *rec, + char *buf, size_t bufsiz) +{ + struct tm tm; + + record_localtime(ctl, rec, &tm); + + /* TRANSLATORS: dmesg uses strftime() fo generate date-time string + where %a is abbreviated name of the day, %b is abbreviated month + name and %e is day of the month as a decimal number. Please, set + proper month/day order here */ + if (strftime(buf, bufsiz, _("%a %b %e %H:%M:%S %Y"), &tm) == 0) + *buf = '\0'; + return buf; +} + +static char *short_ctime(struct tm *tm, char *buf, size_t bufsiz) +{ + /* TRANSLATORS: dmesg uses strftime() fo generate date-time string + where: %b is abbreviated month and %e is day of the month as a + decimal number. Please, set proper month/day order here. */ + if (strftime(buf, bufsiz, _("%b%e %H:%M"), tm) == 0) + *buf = '\0'; + return buf; +} + +static char *iso_8601_time(struct dmesg_control *ctl, struct dmesg_record *rec, + char *buf, size_t bufsz) +{ + struct timeval tv = { + .tv_sec = ctl->boot_time.tv_sec + ctl->suspended_time + rec->tv.tv_sec, + .tv_usec = rec->tv.tv_usec + }; + + if (strtimeval_iso(&tv, ISO_TIMESTAMP_COMMA_T, buf, bufsz) != 0) + return NULL; + + return buf; +} + +static double record_count_delta(struct dmesg_control *ctl, + struct dmesg_record *rec) +{ + double delta = 0; + + if (timerisset(&ctl->lasttime)) + delta = time_diff(&rec->tv, &ctl->lasttime); + + ctl->lasttime = rec->tv; + return delta; +} + +static const char *get_subsys_delimiter(const char *mesg, size_t mesg_size) +{ + const char *p = mesg; + size_t sz = mesg_size; + + while (sz > 0) { + const char *d = strnchr(p, sz, ':'); + if (!d) + return NULL; + sz -= d - p + 1; + if (sz) { + if (sz >= 2 && isblank(*(d + 1))) + return d + 2; + p = d + 1; + } + } + return NULL; +} + +#define is_facpri_valid(_r) \ + (((_r)->level > -1) && ((_r)->level < (int) ARRAY_SIZE(level_names)) && \ + ((_r)->facility > -1) && \ + ((_r)->facility < (int) ARRAY_SIZE(facility_names))) + + +static void print_record(struct dmesg_control *ctl, + struct dmesg_record *rec) +{ + char buf[128]; + char fpbuf[32] = "\0"; + char tsbuf[64] = "\0"; + size_t mesg_size = rec->mesg_size; + int timebreak = 0; + char *mesg_copy = NULL; + const char *line = NULL; + + if (!accept_record(ctl, rec)) + return; + + if (!rec->mesg_size) { + if (!ctl->json) + putchar('\n'); + return; + } + + if (ctl->json) { + if (!ul_jsonwrt_is_ready(&ctl->jfmt)) { + ul_jsonwrt_init(&ctl->jfmt, stdout, 0); + ul_jsonwrt_root_open(&ctl->jfmt); + ul_jsonwrt_array_open(&ctl->jfmt, "dmesg"); + } + ul_jsonwrt_object_open(&ctl->jfmt, NULL); + } + + /* + * Compose syslog(2) compatible raw output -- used for /dev/kmsg for + * backward compatibility with syslog(2) buffers only + */ + if (ctl->raw) { + ctl->indent = snprintf(tsbuf, sizeof(tsbuf), + "<%d>[%5ld.%06ld] ", + LOG_MAKEPRI(rec->facility, rec->level), + (long) rec->tv.tv_sec, + (long) rec->tv.tv_usec); + goto full_output; + } + + /* Store decode information (facility & priority level) in a buffer */ + if (!ctl->json && ctl->decode && is_facpri_valid(rec)) + snprintf(fpbuf, sizeof(fpbuf), "%-6s:%-6s: ", + facility_names[rec->facility].name, + level_names[rec->level].name); + + /* Store the timestamp in a buffer */ + switch (ctl->time_fmt) { + double delta; + struct tm cur; + case DMESG_TIMEFTM_NONE: + ctl->indent = 0; + break; + case DMESG_TIMEFTM_CTIME: + ctl->indent = snprintf(tsbuf, sizeof(tsbuf), "[%s] ", + record_ctime(ctl, rec, buf, sizeof(buf))); + break; + case DMESG_TIMEFTM_CTIME_DELTA: + ctl->indent = snprintf(tsbuf, sizeof(tsbuf), "[%s <%12.06f>] ", + record_ctime(ctl, rec, buf, sizeof(buf)), + record_count_delta(ctl, rec)); + break; + case DMESG_TIMEFTM_DELTA: + ctl->indent = snprintf(tsbuf, sizeof(tsbuf), "[<%12.06f>] ", + record_count_delta(ctl, rec)); + break; + case DMESG_TIMEFTM_RELTIME: + record_localtime(ctl, rec, &cur); + delta = record_count_delta(ctl, rec); + if (cur.tm_min != ctl->lasttm.tm_min || + cur.tm_hour != ctl->lasttm.tm_hour || + cur.tm_yday != ctl->lasttm.tm_yday) { + timebreak = 1; + ctl->indent = snprintf(tsbuf, sizeof(tsbuf), "[%s] ", + short_ctime(&cur, buf, + sizeof(buf))); + } else { + if (delta < 10) + ctl->indent = snprintf(tsbuf, sizeof(tsbuf), + "[ %+8.06f] ", delta); + else + ctl->indent = snprintf(tsbuf, sizeof(tsbuf), + "[ %+9.06f] ", delta); + } + ctl->lasttm = cur; + break; + case DMESG_TIMEFTM_TIME: + ctl->indent = snprintf(tsbuf, sizeof(tsbuf), + ctl->json ? "%5ld.%06ld" : "[%5ld.%06ld] ", + (long)rec->tv.tv_sec, + (long)rec->tv.tv_usec); + break; + case DMESG_TIMEFTM_TIME_DELTA: + ctl->indent = snprintf(tsbuf, sizeof(tsbuf), "[%5ld.%06ld <%12.06f>] ", + (long)rec->tv.tv_sec, + (long)rec->tv.tv_usec, + record_count_delta(ctl, rec)); + break; + case DMESG_TIMEFTM_ISO8601: + ctl->indent = snprintf(tsbuf, sizeof(tsbuf), "%s ", + iso_8601_time(ctl, rec, buf, + sizeof(buf))); + break; + default: + abort(); + } + + ctl->indent += strlen(fpbuf); + +full_output: + /* Output the decode information */ + if (*fpbuf) { + fputs(fpbuf, stdout); + } else if (ctl->json && is_facpri_valid(rec)) { + if (ctl->decode) { + ul_jsonwrt_value_s(&ctl->jfmt, "fac", facility_names[rec->facility].name); + ul_jsonwrt_value_s(&ctl->jfmt, "pri", level_names[rec->level].name); + } else + ul_jsonwrt_value_u64(&ctl->jfmt, "pri", LOG_MAKEPRI(rec->facility, rec->level)); + } + + /* Output the timestamp buffer */ + if (*tsbuf) { + /* Colorize the timestamp */ + if (ctl->color) + dmesg_enable_color(timebreak ? DMESG_COLOR_TIMEBREAK : + DMESG_COLOR_TIME); + if (ctl->time_fmt != DMESG_TIMEFTM_RELTIME) { + if (ctl->json) + ul_jsonwrt_value_raw(&ctl->jfmt, "time", tsbuf); + else + fputs(tsbuf, stdout); + } else { + /* + * For relative timestamping, the first line's + * timestamp is the offset and all other lines will + * report an offset of 0.000000. + */ + fputs(!line ? tsbuf : "[ +0.000000] ", stdout); + } + if (ctl->color) + color_disable(); + } + + /* + * A kernel message may contain several lines of output, separated + * by '\n'. If the timestamp and decode outputs are forced then each + * line of the message must be displayed with that information. + */ + if (ctl->force_prefix) { + if (!line) { + mesg_copy = xstrdup(rec->mesg); + line = strtok(mesg_copy, "\n"); + if (!line) + goto done; /* only when something is wrong */ + mesg_size = strlen(line); + } + } else { + line = rec->mesg; + mesg_size = rec->mesg_size; + } + + /* Colorize kernel message output */ + if (ctl->color) { + /* Subsystem prefix */ + const char *subsys = get_subsys_delimiter(line, mesg_size); + int has_color = 0; + + if (subsys) { + dmesg_enable_color(DMESG_COLOR_SUBSYS); + safe_fwrite(ctl, line, subsys - line, ctl->indent, stdout); + color_disable(); + + mesg_size -= subsys - line; + line = subsys; + } + /* Error, alert .. etc. colors */ + has_color = set_level_color(rec->level, line, mesg_size) == 0; + safe_fwrite(ctl, line, mesg_size, ctl->indent, stdout); + if (has_color) + color_disable(); + } else { + if (ctl->json) + ul_jsonwrt_value_s(&ctl->jfmt, "msg", line); + else + safe_fwrite(ctl, line, mesg_size, ctl->indent, stdout); + } + + /* Get the next line */ + if (ctl->force_prefix) { + line = strtok(NULL, "\n"); + if (line && *line) { + putchar('\n'); + mesg_size = strlen(line); + goto full_output; + } + } + +done: + free(mesg_copy); + if (ctl->json) + ul_jsonwrt_object_close(&ctl->jfmt); + else + putchar('\n'); +} + +/* + * Prints the 'buf' kernel ring buffer; the messages are filtered out according + * to 'levels' and 'facilities' bitarrays. + */ +static void print_buffer(struct dmesg_control *ctl, + const char *buf, size_t size) +{ + struct dmesg_record rec = { .next = buf, .next_size = size }; + + if (ctl->raw) { + raw_print(ctl, buf, size); + return; + } + + while (get_next_syslog_record(ctl, &rec) == 0) + print_record(ctl, &rec); +} + +static ssize_t read_kmsg_one(struct dmesg_control *ctl) +{ + ssize_t size; + + /* kmsg returns EPIPE if record was modified while reading */ + do { + size = read(ctl->kmsg, ctl->kmsg_buf, + sizeof(ctl->kmsg_buf) - 1); + } while (size < 0 && errno == EPIPE); + + return size; +} + +static int init_kmsg(struct dmesg_control *ctl) +{ + int mode = O_RDONLY; + + if (!ctl->follow) + mode |= O_NONBLOCK; + else + setlinebuf(stdout); + + ctl->kmsg = open("/dev/kmsg", mode); + if (ctl->kmsg < 0) + return -1; + + /* + * Seek after the last record available at the time + * the last SYSLOG_ACTION_CLEAR was issued. + * + * ... otherwise SYSLOG_ACTION_CLEAR will have no effect for kmsg. + */ + lseek(ctl->kmsg, 0, ctl->end ? SEEK_END : SEEK_DATA); + + /* + * Old kernels (<3.5) can successfully open /dev/kmsg for read-only, + * but read() returns -EINVAL :-((( + * + * Let's try to read the first record. The record is later processed in + * read_kmsg(). + */ + ctl->kmsg_first_read = read_kmsg_one(ctl); + if (ctl->kmsg_first_read < 0) { + close(ctl->kmsg); + ctl->kmsg = -1; + return -1; + } + + return 0; +} + +/* + * /dev/kmsg record format: + * + * faclev,seqnum,timestamp[optional, ...];message\n + * TAGNAME=value + * ... + * + * - fields are separated by ',' + * - last field is terminated by ';' + * + */ +#define LAST_KMSG_FIELD(s) (!s || !*s || *(s - 1) == ';') + +static int parse_kmsg_record(struct dmesg_control *ctl, + struct dmesg_record *rec, + char *buf, + size_t sz) +{ + const char *p = buf, *end; + + if (sz == 0 || !buf || !*buf) + return -1; + + end = buf + (sz - 1); + INIT_DMESG_RECORD(rec); + + while (p < end && isspace(*p)) + p++; + + /* A) priority and facility */ + if (ctl->fltr_lev || ctl->fltr_fac || ctl->decode || + ctl->raw || ctl->color || ctl->json) + p = parse_faclev(p, &rec->facility, &rec->level); + else + p = skip_item(p, end, ","); + if (LAST_KMSG_FIELD(p)) + goto mesg; + + /* B) sequence number */ + p = skip_item(p, end, ",;"); + if (LAST_KMSG_FIELD(p)) + goto mesg; + + /* C) timestamp */ + if (is_timefmt(ctl, NONE)) + p = skip_item(p, end, ",;"); + else + p = parse_kmsg_timestamp(p, &rec->tv); + if (LAST_KMSG_FIELD(p)) + goto mesg; + + /* D) optional fields (ignore) */ + p = skip_item(p, end, ";"); + +mesg: + /* E) message text */ + rec->mesg = p; + p = skip_item(p, end, "\n"); + if (!p) + return -1; + + /* The message text is terminated by \n, but it's possible that the + * message contains another stuff behind this linebreak; in this case + * the previous skip_item() returns pointer to the stuff behind \n. + * Let's normalize all these situations and make sure we always point to + * the \n. + * + * Note that the next unhexmangle_to_buffer() will replace \n by \0. + */ + if (*p && *p != '\n') + p--; + + /* + * Kernel escapes non-printable characters, unfortunately kernel + * definition of "non-printable" is too strict. On UTF8 console we can + * print many chars, so let's decode from kernel. + */ + rec->mesg_size = unhexmangle_to_buffer(rec->mesg, + (char *) rec->mesg, p - rec->mesg + 1); + + rec->mesg_size--; /* don't count \0 */ + + /* F) message tags (ignore) */ + + return 0; +} + +/* + * Note that each read() call for /dev/kmsg returns always one record. It means + * that we don't have to read whole message buffer before the records parsing. + * + * So this function does not compose one huge buffer (like read_syslog_buffer()) + * and print_buffer() is unnecessary. All is done in this function. + * + * Returns 0 on success, -1 on error. + */ +static int read_kmsg(struct dmesg_control *ctl) +{ + struct dmesg_record rec; + ssize_t sz; + + if (ctl->method != DMESG_METHOD_KMSG || ctl->kmsg < 0) + return -1; + + /* + * The very first read() call is done in kmsg_init() where we test + * /dev/kmsg usability. The return code from the initial read() is + * stored in ctl->kmsg_first_read; + */ + sz = ctl->kmsg_first_read; + + while (sz > 0) { + *(ctl->kmsg_buf + sz) = '\0'; /* for debug messages */ + + if (parse_kmsg_record(ctl, &rec, + ctl->kmsg_buf, (size_t) sz) == 0) + print_record(ctl, &rec); + + sz = read_kmsg_one(ctl); + } + + return 0; +} + +static int which_time_format(const char *s) +{ + if (!strcmp(s, "notime")) + return DMESG_TIMEFTM_NONE; + if (!strcmp(s, "ctime")) + return DMESG_TIMEFTM_CTIME; + if (!strcmp(s, "delta")) + return DMESG_TIMEFTM_DELTA; + if (!strcmp(s, "reltime")) + return DMESG_TIMEFTM_RELTIME; + if (!strcmp(s, "iso")) + return DMESG_TIMEFTM_ISO8601; + errx(EXIT_FAILURE, _("unknown time format: %s"), s); +} + +#ifdef TEST_DMESG +static inline int dmesg_get_boot_time(struct timeval *tv) +{ + char *str = getenv("DMESG_TEST_BOOTIME"); + uintmax_t sec, usec; + + if (str && sscanf(str, "%ju.%ju", &sec, &usec) == 2) { + tv->tv_sec = sec; + tv->tv_usec = usec; + return tv->tv_sec >= 0 && tv->tv_usec >= 0 ? 0 : -EINVAL; + } + + return get_boot_time(tv); +} + +static inline time_t dmesg_get_suspended_time(void) +{ + if (getenv("DMESG_TEST_BOOTIME")) + return 0; + return get_suspended_time(); +} +#else +# define dmesg_get_boot_time get_boot_time +# define dmesg_get_suspended_time get_suspended_time +#endif + +int main(int argc, char *argv[]) +{ + char *buf = NULL; + int c, nopager = 0; + int console_level = 0; + int klog_rc = 0; + int delta = 0; + ssize_t n; + static struct dmesg_control ctl = { + .filename = NULL, + .action = SYSLOG_ACTION_READ_ALL, + .method = DMESG_METHOD_KMSG, + .kmsg = -1, + .time_fmt = DMESG_TIMEFTM_TIME, + .indent = 0, + }; + int colormode = UL_COLORMODE_UNDEF; + enum { + OPT_TIME_FORMAT = CHAR_MAX + 1, + OPT_NOESC, + OPT_SINCE, + OPT_UNTIL + }; + + static const struct option longopts[] = { + { "buffer-size", required_argument, NULL, 's' }, + { "clear", no_argument, NULL, 'C' }, + { "color", optional_argument, NULL, 'L' }, + { "console-level", required_argument, NULL, 'n' }, + { "console-off", no_argument, NULL, 'D' }, + { "console-on", no_argument, NULL, 'E' }, + { "decode", no_argument, NULL, 'x' }, + { "file", required_argument, NULL, 'F' }, + { "facility", required_argument, NULL, 'f' }, + { "follow", no_argument, NULL, 'w' }, + { "follow-new", no_argument, NULL, 'W' }, + { "human", no_argument, NULL, 'H' }, + { "help", no_argument, NULL, 'h' }, + { "json", no_argument, NULL, 'J' }, + { "kernel", no_argument, NULL, 'k' }, + { "level", required_argument, NULL, 'l' }, + { "since", required_argument, NULL, OPT_SINCE }, + { "syslog", no_argument, NULL, 'S' }, + { "raw", no_argument, NULL, 'r' }, + { "read-clear", no_argument, NULL, 'c' }, + { "reltime", no_argument, NULL, 'e' }, + { "show-delta", no_argument, NULL, 'd' }, + { "ctime", no_argument, NULL, 'T' }, + { "noescape", no_argument, NULL, OPT_NOESC }, + { "notime", no_argument, NULL, 't' }, + { "nopager", no_argument, NULL, 'P' }, + { "until", required_argument, NULL, OPT_UNTIL }, + { "userspace", no_argument, NULL, 'u' }, + { "version", no_argument, NULL, 'V' }, + { "time-format", required_argument, NULL, OPT_TIME_FORMAT }, + { "force-prefix", no_argument, NULL, 'p' }, + { NULL, 0, NULL, 0 } + }; + + static const ul_excl_t excl[] = { /* rows and cols in ASCII order */ + { 'C','D','E','c','n','r' }, /* clear,off,on,read-clear,level,raw*/ + { 'H','r' }, /* human, raw */ + { 'L','r' }, /* color, raw */ + { 'S','w' }, /* syslog,follow */ + { 'T','r' }, /* ctime, raw */ + { 'd','r' }, /* delta, raw */ + { 'e','r' }, /* reltime, raw */ + { 'r','x' }, /* raw, decode */ + { 'r','t' }, /* notime, raw */ + { 0 } + }; + int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + while ((c = getopt_long(argc, argv, "CcDdEeF:f:HhJkL::l:n:iPprSs:TtuVWwx", + longopts, NULL)) != -1) { + + err_exclusive_options(c, longopts, excl, excl_st); + + switch (c) { + case 'C': + ctl.action = SYSLOG_ACTION_CLEAR; + break; + case 'c': + ctl.action = SYSLOG_ACTION_READ_CLEAR; + break; + case 'D': + ctl.action = SYSLOG_ACTION_CONSOLE_OFF; + break; + case 'd': + delta = 1; + break; + case 'E': + ctl.action = SYSLOG_ACTION_CONSOLE_ON; + break; + case 'e': + ctl.time_fmt = DMESG_TIMEFTM_RELTIME; + break; + case 'F': + ctl.filename = optarg; + ctl.method = DMESG_METHOD_MMAP; + break; + case 'f': + ctl.fltr_fac = 1; + if (string_to_bitarray(optarg, + ctl.facilities, parse_facility) < 0) + return EXIT_FAILURE; + break; + case 'H': + ctl.time_fmt = DMESG_TIMEFTM_RELTIME; + colormode = UL_COLORMODE_AUTO; + ctl.pager = 1; + break; + case 'J': + ctl.json = 1; + break; + case 'k': + ctl.fltr_fac = 1; + setbit(ctl.facilities, FAC_BASE(LOG_KERN)); + break; + case 'L': + colormode = UL_COLORMODE_AUTO; + if (optarg) + colormode = colormode_or_err(optarg, + _("unsupported color mode")); + break; + case 'l': + ctl.fltr_lev= 1; + if (string_to_bitarray(optarg, + ctl.levels, parse_level) < 0) + return EXIT_FAILURE; + break; + case 'n': + ctl.action = SYSLOG_ACTION_CONSOLE_LEVEL; + console_level = parse_level(optarg, 0); + break; + case 'P': + nopager = 1; + break; + case 'p': + ctl.force_prefix = 1; + break; + case 'r': + ctl.raw = 1; + break; + case 'S': + ctl.method = DMESG_METHOD_SYSLOG; + break; + case 's': + ctl.bufsize = strtou32_or_err(optarg, + _("invalid buffer size argument")); + if (ctl.bufsize < 4096) + ctl.bufsize = 4096; + break; + case 'T': + ctl.time_fmt = DMESG_TIMEFTM_CTIME; + break; + case 't': + ctl.time_fmt = DMESG_TIMEFTM_NONE; + break; + case 'u': + ctl.fltr_fac = 1; + for (n = 1; (size_t) n < ARRAY_SIZE(facility_names); n++) + setbit(ctl.facilities, n); + break; + case 'w': + ctl.follow = 1; + break; + case 'W': + ctl.follow = 1; + ctl.end = 1; + break; + case 'x': + ctl.decode = 1; + break; + case OPT_TIME_FORMAT: + ctl.time_fmt = which_time_format(optarg); + break; + case OPT_NOESC: + ctl.noesc = 1; + break; + case OPT_SINCE: + { + usec_t p; + if (parse_timestamp(optarg, &p) < 0) + errx(EXIT_FAILURE, _("invalid time value \"%s\""), optarg); + ctl.since = (time_t) (p / 1000000); + break; + } + case OPT_UNTIL: + { + usec_t p; + if (parse_timestamp(optarg, &p) < 0) + errx(EXIT_FAILURE, _("invalid time value \"%s\""), optarg); + ctl.until = (time_t) (p / 1000000); + break; + } + case 'h': + usage(); + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if (argc != optind) { + warnx(_("bad usage")); + errtryhelp(EXIT_FAILURE); + } + + if (ctl.json) { + ctl.time_fmt = DMESG_TIMEFTM_TIME; + delta = 0; + ctl.force_prefix = 0; + ctl.raw = 0; + ctl.noesc = 1; + nopager = 1; + } + + if ((is_timefmt(&ctl, RELTIME) || + is_timefmt(&ctl, CTIME) || + is_timefmt(&ctl, ISO8601)) || + ctl.since || + ctl.until) { + if (dmesg_get_boot_time(&ctl.boot_time) != 0) + ctl.time_fmt = DMESG_TIMEFTM_NONE; + else + ctl.suspended_time = dmesg_get_suspended_time(); + } + + if (delta) + switch (ctl.time_fmt) { + case DMESG_TIMEFTM_CTIME: + ctl.time_fmt = DMESG_TIMEFTM_CTIME_DELTA; + break; + case DMESG_TIMEFTM_TIME: + ctl.time_fmt = DMESG_TIMEFTM_TIME_DELTA; + break; + case DMESG_TIMEFTM_ISO8601: + warnx(_("--show-delta is ignored when used together with iso8601 time format")); + break; + default: + ctl.time_fmt = DMESG_TIMEFTM_DELTA; + } + + + if (!ctl.json) + ctl.color = colors_init(colormode, "dmesg") ? 1 : 0; + if (ctl.follow) + nopager = 1; + ctl.pager = nopager ? 0 : ctl.pager; + if (ctl.pager) + pager_redirect(); + + switch (ctl.action) { + case SYSLOG_ACTION_READ_ALL: + case SYSLOG_ACTION_READ_CLEAR: + if (ctl.method == DMESG_METHOD_KMSG && init_kmsg(&ctl) != 0) + ctl.method = DMESG_METHOD_SYSLOG; + + if (ctl.raw + && ctl.method != DMESG_METHOD_KMSG + && (ctl.fltr_lev || ctl.fltr_fac)) + errx(EXIT_FAILURE, _("--raw can be used together with --level or " + "--facility only when reading messages from /dev/kmsg")); + + /* only kmsg supports multi-line messages */ + if (ctl.force_prefix && ctl.method != DMESG_METHOD_KMSG) + ctl.force_prefix = 0; + if (ctl.pager) + pager_redirect(); + n = read_buffer(&ctl, &buf); + if (n > 0) + print_buffer(&ctl, buf, n); + if (!ctl.mmap_buff) + free(buf); + if (ctl.kmsg >= 0) + close(ctl.kmsg); + if (ctl.json && ul_jsonwrt_is_ready(&ctl.jfmt)) { + ul_jsonwrt_array_close(&ctl.jfmt); + ul_jsonwrt_root_close(&ctl.jfmt); + } + if (n < 0) + err(EXIT_FAILURE, _("read kernel buffer failed")); + else if (ctl.action == SYSLOG_ACTION_READ_CLEAR) + ; /* fallthrough */ + else + break; + case SYSLOG_ACTION_CLEAR: + if (klogctl(SYSLOG_ACTION_CLEAR, NULL, 0) < 0) + err(EXIT_FAILURE, _("clear kernel buffer failed")); + break; + case SYSLOG_ACTION_CONSOLE_OFF: + case SYSLOG_ACTION_CONSOLE_ON: + klog_rc = klogctl(ctl.action, NULL, 0); + break; + case SYSLOG_ACTION_CONSOLE_LEVEL: + klog_rc = klogctl(ctl.action, NULL, console_level); + break; + default: + errx(EXIT_FAILURE, _("unsupported command")); + break; + } + + + if (klog_rc) + err(EXIT_FAILURE, _("klogctl failed")); + + return EXIT_SUCCESS; +} diff --git a/sys-utils/eject.1 b/sys-utils/eject.1 new file mode 100644 index 0000000..3cc3f7d --- /dev/null +++ b/sys-utils/eject.1 @@ -0,0 +1,185 @@ +'\" t +.\" Title: eject +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: User Commands +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "EJECT" "1" "2022-05-11" "util\-linux 2.38.1" "User Commands" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +eject \- eject removable media +.sp +\fBeject\fP [options] \fIdevice\fP|\fImountpoint\fP +.SH "DESCRIPTION" +.sp +\fBeject\fP allows removable media (typically a CD\-ROM, floppy disk, tape, JAZ, ZIP or USB disk) to be ejected under software control. The command can also control some multi\-disc CD\-ROM changers, the auto\-eject feature supported by some devices, and close the disc tray of some CD\-ROM drives. +.sp +The device corresponding to \fIdevice\fP or \fImountpoint\fP is ejected. If no name is specified, the default name \fB/dev/cdrom\fP is used. The device may be addressed by device name (e.g., \(aqsda\(aq), device path (e.g., \(aq/dev/sda\(aq), UUID=\fIuuid\fP or LABEL=\fIlabel\fP tags. +.sp +There are four different methods of ejecting, depending on whether the device is a CD\-ROM, SCSI device, removable floppy, or tape. By default \fBeject\fP tries all four methods in order until it succeeds. +.sp +If a device partition is specified, the whole\-disk device is used. +.sp +If the device or a device partition is currently mounted, it is unmounted before ejecting. The eject is processed on exclusive open block device file descriptor if \fB\-\-no\-unmount\fP or \fB\-\-force\fP are not specified. +.SH "OPTIONS" +.sp +\fB\-a\fP, \fB\-\-auto on\fP|\fBoff\fP +.RS 4 +This option controls the auto\-eject mode, supported by some devices. When enabled, the drive automatically ejects when the device is closed. +.RE +.sp +\fB\-c\fP, \fB\-\-changerslot\fP \fIslot\fP +.RS 4 +With this option a CD slot can be selected from an ATAPI/IDE CD\-ROM changer. The CD\-ROM drive cannot be in use (mounted data CD or playing a music CD) for a change request to work. Please also note that the first slot of the changer is referred to as 0, not 1. +.RE +.sp +\fB\-d\fP, \fB\-\-default\fP +.RS 4 +List the default device name. +.RE +.sp +\fB\-F\fP, \fB\-\-force\fP +.RS 4 +Force eject, don\(cqt check device type, don\(cqt open device with exclusive lock. The successful result may be false positive on non hot\-pluggable devices. +.RE +.sp +\fB\-f\fP, \fB\-\-floppy\fP +.RS 4 +This option specifies that the drive should be ejected using a removable floppy disk eject command. +.RE +.sp +\fB\-i\fP, \fB\-\-manualeject on\fP|\fBoff\fP +.RS 4 +This option controls locking of the hardware eject button. When enabled, the drive will not be ejected when the button is pressed. This is useful when you are carrying a laptop in a bag or case and don\(cqt want it to eject if the button is inadvertently pressed. +.RE +.sp +\fB\-M\fP, \fB\-\-no\-partitions\-unmount\fP +.RS 4 +The option tells \fBeject\fP to not try to unmount other partitions on partitioned devices. If another partition is still mounted, the program will not attempt to eject the media. It will attempt to unmount only the device or mountpoint given on the command line. +.RE +.sp +\fB\-m\fP, \fB\-\-no\-unmount\fP +.RS 4 +The option tells \fBeject\fP to not try to unmount at all. If this option is not specified then \fBeject\fP opens the device with \fBO_EXCL\fP flag to be sure that the device is not used (since v2.35). +.RE +.sp +\fB\-n\fP, \fB\-\-noop\fP +.RS 4 +With this option the selected device is displayed but no action is performed. +.RE +.sp +\fB\-p\fP, \fB\-\-proc\fP +.RS 4 +This option allows you to use \fI/proc/mounts\fP instead \fI/etc/mtab\fP. It also passes the \fB\-n\fP option to \fBumount\fP(8). +.RE +.sp +\fB\-q\fP, \fB\-\-tape\fP +.RS 4 +This option specifies that the drive should be ejected using a tape drive offline command. +.RE +.sp +\fB\-r\fP, \fB\-\-cdrom\fP +.RS 4 +This option specifies that the drive should be ejected using a CDROM eject command. +.RE +.sp +\fB\-s\fP, \fB\-\-scsi\fP +.RS 4 +This option specifies that the drive should be ejected using SCSI commands. +.RE +.sp +\fB\-T\fP, \fB\-\-traytoggle\fP +.RS 4 +With this option the drive is given a CD\-ROM tray close command if it\(cqs opened, and a CD\-ROM tray eject command if it\(cqs closed. Not all devices support this command, because it uses the above CD\-ROM tray close command. +.RE +.sp +\fB\-t\fP, \fB\-\-trayclose\fP +.RS 4 +With this option the drive is given a CD\-ROM tray close command. Not all devices support this command. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.sp +\fB\-v\fP, \fB\-\-verbose\fP +.RS 4 +Run in verbose mode; more information is displayed about what the command is doing. +.RE +.sp +\fB\-X\fP, \fB\-\-listspeed\fP +.RS 4 +With this option the CD\-ROM drive will be probed to detect the available speeds. The output is a list of speeds which can be used as an argument of the \fB\-x\fP option. This only works with Linux 2.6.13 or higher, on previous versions solely the maximum speed will be reported. Also note that some drives may not correctly report the speed and therefore this option does not work with them. +.RE +.sp +\fB\-x\fP, \fB\-\-cdspeed\fP \fIspeed\fP +.RS 4 +With this option the drive is given a CD\-ROM select speed command. The \fIspeed\fP argument is a number indicating the desired speed (e.g., 8 for 8X speed), or 0 for maximum data rate. Not all devices support this command and you can only specify speeds that the drive is capable of. Every time the media is changed this option is cleared. This option can be used alone, or with the \fB\-t\fP and \fB\-c\fP options. +.RE +.SH "EXIT STATUS" +.sp +Returns 0 if operation was successful, 1 if operation failed or command syntax was not valid. +.SH "NOTES" +.sp +\fBeject\fP only works with devices that support one or more of the four methods of ejecting. This includes most CD\-ROM drives (IDE, SCSI, and proprietary), some SCSI tape drives, JAZ drives, ZIP drives (parallel port, SCSI, and IDE versions), and LS120 removable floppies. Users have also reported success with floppy drives on Sun SPARC and Apple Macintosh systems. If \fBeject\fP does not work, it is most likely a limitation of the kernel driver for the device and not the \fBeject\fP program itself. +.sp +The \fB\-r\fP, \fB\-s\fP, \fB\-f\fP, and \fB\-q\fP options allow controlling which methods are used to eject. More than one method can be specified. If none of these options are specified, it tries all four (this works fine in most cases). +.sp +\fBeject\fP may not always be able to determine if the device is mounted (e.g., if it has several names). If the device name is a symbolic link, \fBeject\fP will follow the link and use the device that it points to. +.sp +If \fBeject\fP determines that the device can have multiple partitions, it will attempt to unmount all mounted partitions of the device before ejecting (see also \fB\-\-no\-partitions\-unmount\fP). If an unmount fails, the program will not attempt to eject the media. +.sp +You can eject an audio CD. Some CD\-ROM drives will refuse to open the tray if the drive is empty. Some devices do not support the tray close command. +.sp +If the auto\-eject feature is enabled, then the drive will always be ejected after running this command. Not all Linux kernel CD\-ROM drivers support the auto\-eject mode. There is no way to find out the state of the auto\-eject mode. +.sp +You need appropriate privileges to access the device files. Running as root is required to eject some devices (e.g., SCSI devices). +.SH "AUTHORS" +.sp +.MTO "tranter\(atpobox.com" "Jeff Tranter" "" +\- original author, +.MTO "kzak\(atredhat.com" "Karel Zak" "" +and +.MTO "mluscon\(atredhat.com" "Michal Luscon" "" +\- util\-linux version. +.SH "SEE ALSO" +.sp +\fBfindmnt\fP(8), +\fBlsblk\fP(8), +\fBmount\fP(8), +\fBumount\fP(8) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBeject\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/eject.1.adoc b/sys-utils/eject.1.adoc new file mode 100644 index 0000000..dc6a417 --- /dev/null +++ b/sys-utils/eject.1.adoc @@ -0,0 +1,130 @@ +//po4a: entry man manual +//// +Copyright (C) 1994-2005 Jeff Tranter (tranter@pobox.com) +Copyright (C) 2012 Karel Zak <kzak@redhat.com>. + +It may be distributed under the GNU Public License, version 2, or +any higher version. See section COPYING of the GNU Public license +for conditions under which this file may be redistributed. +//// += eject(1) +:doctype: manpage +:man manual: User Commands +:man source: util-linux {release-version} +:page-layout: base +:command: eject + +== NAME + +eject - eject removable media + +*eject* [options] _device_|_mountpoint_ + +== DESCRIPTION + +*eject* allows removable media (typically a CD-ROM, floppy disk, tape, JAZ, ZIP or USB disk) to be ejected under software control. The command can also control some multi-disc CD-ROM changers, the auto-eject feature supported by some devices, and close the disc tray of some CD-ROM drives. + +The device corresponding to _device_ or _mountpoint_ is ejected. If no name is specified, the default name */dev/cdrom* is used. The device may be addressed by device name (e.g., 'sda'), device path (e.g., '/dev/sda'), UUID=__uuid__ or LABEL=__label__ tags. + +There are four different methods of ejecting, depending on whether the device is a CD-ROM, SCSI device, removable floppy, or tape. By default *eject* tries all four methods in order until it succeeds. + +If a device partition is specified, the whole-disk device is used. + +If the device or a device partition is currently mounted, it is unmounted before ejecting. The eject is processed on exclusive open block device file descriptor if *--no-unmount* or *--force* are not specified. + +== OPTIONS + +*-a*, **--auto on**|*off*:: +This option controls the auto-eject mode, supported by some devices. When enabled, the drive automatically ejects when the device is closed. + +*-c*, *--changerslot* _slot_:: +With this option a CD slot can be selected from an ATAPI/IDE CD-ROM changer. The CD-ROM drive cannot be in use (mounted data CD or playing a music CD) for a change request to work. Please also note that the first slot of the changer is referred to as 0, not 1. + +*-d*, *--default*:: +List the default device name. + +*-F*, *--force*:: +Force eject, don't check device type, don't open device with exclusive lock. The successful result may be false positive on non hot-pluggable devices. + +*-f*, *--floppy*:: +This option specifies that the drive should be ejected using a removable floppy disk eject command. + +*-i*, **--manualeject on**|*off*:: +This option controls locking of the hardware eject button. When enabled, the drive will not be ejected when the button is pressed. This is useful when you are carrying a laptop in a bag or case and don't want it to eject if the button is inadvertently pressed. + +*-M*, *--no-partitions-unmount*:: +The option tells *eject* to not try to unmount other partitions on partitioned devices. If another partition is still mounted, the program will not attempt to eject the media. It will attempt to unmount only the device or mountpoint given on the command line. + +*-m*, *--no-unmount*:: +The option tells *eject* to not try to unmount at all. If this option is not specified then *eject* opens the device with *O_EXCL* flag to be sure that the device is not used (since v2.35). + +*-n*, *--noop*:: +With this option the selected device is displayed but no action is performed. + +*-p*, *--proc*:: +This option allows you to use _/proc/mounts_ instead _/etc/mtab_. It also passes the *-n* option to *umount*(8). + +*-q*, *--tape*:: +This option specifies that the drive should be ejected using a tape drive offline command. + +*-r*, *--cdrom*:: +This option specifies that the drive should be ejected using a CDROM eject command. + +*-s*, *--scsi*:: +This option specifies that the drive should be ejected using SCSI commands. + +*-T*, *--traytoggle*:: +With this option the drive is given a CD-ROM tray close command if it's opened, and a CD-ROM tray eject command if it's closed. Not all devices support this command, because it uses the above CD-ROM tray close command. + +*-t*, *--trayclose*:: +With this option the drive is given a CD-ROM tray close command. Not all devices support this command. + +include::man-common/help-version.adoc[] + +*-v*, *--verbose*:: +Run in verbose mode; more information is displayed about what the command is doing. + +*-X*, *--listspeed*:: +With this option the CD-ROM drive will be probed to detect the available speeds. The output is a list of speeds which can be used as an argument of the *-x* option. This only works with Linux 2.6.13 or higher, on previous versions solely the maximum speed will be reported. Also note that some drives may not correctly report the speed and therefore this option does not work with them. + +*-x*, *--cdspeed* _speed_:: +With this option the drive is given a CD-ROM select speed command. The _speed_ argument is a number indicating the desired speed (e.g., 8 for 8X speed), or 0 for maximum data rate. Not all devices support this command and you can only specify speeds that the drive is capable of. Every time the media is changed this option is cleared. This option can be used alone, or with the *-t* and *-c* options. + +== EXIT STATUS + +Returns 0 if operation was successful, 1 if operation failed or command syntax was not valid. + +== NOTES + +*eject* only works with devices that support one or more of the four methods of ejecting. This includes most CD-ROM drives (IDE, SCSI, and proprietary), some SCSI tape drives, JAZ drives, ZIP drives (parallel port, SCSI, and IDE versions), and LS120 removable floppies. Users have also reported success with floppy drives on Sun SPARC and Apple Macintosh systems. If *eject* does not work, it is most likely a limitation of the kernel driver for the device and not the *eject* program itself. + +The *-r*, *-s*, *-f*, and *-q* options allow controlling which methods are used to eject. More than one method can be specified. If none of these options are specified, it tries all four (this works fine in most cases). + +*eject* may not always be able to determine if the device is mounted (e.g., if it has several names). If the device name is a symbolic link, *eject* will follow the link and use the device that it points to. + +If *eject* determines that the device can have multiple partitions, it will attempt to unmount all mounted partitions of the device before ejecting (see also *--no-partitions-unmount*). If an unmount fails, the program will not attempt to eject the media. + +You can eject an audio CD. Some CD-ROM drives will refuse to open the tray if the drive is empty. Some devices do not support the tray close command. + +If the auto-eject feature is enabled, then the drive will always be ejected after running this command. Not all Linux kernel CD-ROM drivers support the auto-eject mode. There is no way to find out the state of the auto-eject mode. + +You need appropriate privileges to access the device files. Running as root is required to eject some devices (e.g., SCSI devices). + +== AUTHORS + +mailto:tranter@pobox.com[Jeff Tranter] - original author, mailto:kzak@redhat.com[Karel Zak] and mailto:mluscon@redhat.com[Michal Luscon] - util-linux version. + +== SEE ALSO + +*findmnt*(8), +*lsblk*(8), +*mount*(8), +*umount*(8) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/eject.c b/sys-utils/eject.c new file mode 100644 index 0000000..5cd0bea --- /dev/null +++ b/sys-utils/eject.c @@ -0,0 +1,1056 @@ +/* + * Copyright (C) 1994-2005 Jeff Tranter (tranter@pobox.com) + * Copyright (C) 2012 Karel Zak <kzak@redhat.com> + * Copyright (C) Michal Luscon <mluscon@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <fcntl.h> +#include <limits.h> +#include <err.h> +#include <stdarg.h> + +#include <getopt.h> +#include <errno.h> +#include <regex.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <sys/wait.h> +#include <sys/mtio.h> +#include <linux/cdrom.h> +#include <linux/fd.h> +#include <sys/mount.h> +#include <scsi/scsi.h> +#include <scsi/sg.h> +#include <scsi/scsi_ioctl.h> +#include <sys/time.h> + +#include <libmount.h> + +#include "c.h" +#include "closestream.h" +#include "nls.h" +#include "strutils.h" +#include "xalloc.h" +#include "pathnames.h" +#include "sysfs.h" +#include "monotonic.h" + +/* + * sg_io_hdr_t driver_status -- see kernel include/scsi/scsi.h + */ +#ifndef DRIVER_SENSE +# define DRIVER_SENSE 0x08 +#endif + + +#define EJECT_DEFAULT_DEVICE "/dev/cdrom" + + +/* Used by the toggle_tray() function. If ejecting the tray takes this + * time or less, the tray was probably already ejected, so we close it + * again. + */ +#define TRAY_WAS_ALREADY_OPEN_USECS 200000 /* about 0.2 seconds */ + +struct eject_control { + struct libmnt_table *mtab; + char *device; /* device or mount point to be ejected */ + int fd; /* file descriptor for device */ + unsigned int /* command flags and arguments */ + a_option:1, + c_option:1, + d_option:1, + F_option:1, + f_option:1, + i_option:1, + M_option:1, + m_option:1, + n_option:1, + p_option:1, + q_option:1, + r_option:1, + s_option:1, + T_option:1, + t_option:1, + v_option:1, + X_option:1, + x_option:1, + a_arg:1, + i_arg:1; + + unsigned int force_exclusive; /* use O_EXCL */ + + long int c_arg; /* changer slot number */ + long int x_arg; /* cd speed */ +}; + +static void vinfo(const char *fmt, va_list va) +{ + fprintf(stdout, "%s: ", program_invocation_short_name); + vprintf(fmt, va); + fputc('\n', stdout); +} + +static inline void __attribute__ ((__format__ (__printf__, 2, 3))) + verbose(const struct eject_control *ctl, const char *fmt, ...) +{ + va_list va; + + if (!ctl->v_option) + return; + + va_start(va, fmt); + vinfo(fmt, va); + va_end(va); +} + +static inline __attribute__ ((__format__ (__printf__, 1, 2))) + void info(const char *fmt, ...) +{ + va_list va; + va_start(va, fmt); + vinfo(fmt, va); + va_end(va); +} + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + fputs(USAGE_HEADER, out); + fprintf(out, + _(" %s [options] [<device>|<mountpoint>]\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Eject removable media.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -a, --auto <on|off> turn auto-eject feature on or off\n" + " -c, --changerslot <slot> switch discs on a CD-ROM changer\n" + " -d, --default display default device\n" + " -f, --floppy eject floppy\n" + " -F, --force don't care about device type\n" + " -i, --manualeject <on|off> toggle manual eject protection on/off\n" + " -m, --no-unmount do not unmount device even if it is mounted\n" + " -M, --no-partitions-unmount do not unmount another partitions\n" + " -n, --noop don't eject, just show device found\n" + " -p, --proc use /proc/mounts instead of /etc/mtab\n" + " -q, --tape eject tape\n" + " -r, --cdrom eject CD-ROM\n" + " -s, --scsi eject SCSI device\n" + " -t, --trayclose close tray\n" + " -T, --traytoggle toggle tray\n" + " -v, --verbose enable verbose output\n" + " -x, --cdspeed <speed> set CD-ROM max speed\n" + " -X, --listspeed list CD-ROM available speeds\n"), + out); + + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(29)); + + fputs(_("\nBy default tries -r, -s, -f, and -q in order until success.\n"), out); + printf(USAGE_MAN_TAIL("eject(1)")); + + exit(EXIT_SUCCESS); +} + + +/* Handle command line options. */ +static void parse_args(struct eject_control *ctl, int argc, char **argv) +{ + static const struct option long_opts[] = + { + {"auto", required_argument, NULL, 'a'}, + {"cdrom", no_argument, NULL, 'r'}, + {"cdspeed", required_argument, NULL, 'x'}, + {"changerslot", required_argument, NULL, 'c'}, + {"default", no_argument, NULL, 'd'}, + {"floppy", no_argument, NULL, 'f'}, + {"force", no_argument, NULL, 'F'}, + {"help", no_argument, NULL, 'h'}, + {"listspeed", no_argument, NULL, 'X'}, + {"manualeject", required_argument, NULL, 'i'}, + {"noop", no_argument, NULL, 'n'}, + {"no-unmount", no_argument, NULL, 'm'}, + {"no-partitions-unmount", no_argument, NULL, 'M' }, + {"proc", no_argument, NULL, 'p'}, + {"scsi", no_argument, NULL, 's'}, + {"tape", no_argument, NULL, 'q'}, + {"trayclose", no_argument, NULL, 't'}, + {"traytoggle", no_argument, NULL, 'T'}, + {"verbose", no_argument, NULL, 'v'}, + {"version", no_argument, NULL, 'V'}, + {NULL, 0, NULL, 0} + }; + int c; + + while ((c = getopt_long(argc, argv, + "a:c:i:x:dfFhnqrstTXvVpmM", long_opts, NULL)) != -1) { + switch (c) { + case 'a': + ctl->a_option = 1; + ctl->a_arg = parse_switch(optarg, _("argument error"), + "on", "off", "1", "0", NULL); + break; + case 'c': + ctl->c_option = 1; + ctl->c_arg = strtoul_or_err(optarg, _("invalid argument to --changerslot/-c option")); + break; + case 'x': + ctl->x_option = 1; + ctl->x_arg = strtoul_or_err(optarg, _("invalid argument to --cdspeed/-x option")); + break; + case 'd': + ctl->d_option = 1; + break; + case 'f': + ctl->f_option = 1; + break; + case 'F': + ctl->F_option = 1; + break; + case 'i': + ctl->i_option = 1; + ctl->i_arg = parse_switch(optarg, _("argument error"), + "on", "off", "1", "0", NULL); + break; + case 'm': + ctl->m_option = 1; + break; + case 'M': + ctl->M_option = 1; + break; + case 'n': + ctl->n_option = 1; + break; + case 'p': + ctl->p_option = 1; + break; + case 'q': + ctl->q_option = 1; + break; + case 'r': + ctl->r_option = 1; + break; + case 's': + ctl->s_option = 1; + break; + case 't': + ctl->t_option = 1; + break; + case 'T': + ctl->T_option = 1; + break; + case 'X': + ctl->X_option = 1; + break; + case 'v': + ctl->v_option = 1; + break; + + case 'h': + usage(); + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + break; + } + } + + /* check for a single additional argument */ + if ((argc - optind) > 1) + errx(EXIT_FAILURE, _("too many arguments")); + + if ((argc - optind) == 1) + ctl->device = xstrdup(argv[optind]); +} + +/* + * Given name, such as foo, see if any of the following exist: + * + * foo (if foo starts with '.' or '/') + * /dev/foo + * + * If found, return the full path. If not found, return 0. + * Returns pointer to dynamically allocated string. + */ +static char *find_device(const char *name) +{ + if (!name) + return NULL; + + if ((*name == '.' || *name == '/') && access(name, F_OK) == 0) + return xstrdup(name); + + char buf[PATH_MAX]; + + snprintf(buf, sizeof(buf), "/dev/%s", name); + if (access(buf, F_OK) == 0) + return xstrdup(buf); + + return NULL; +} + +/* Set or clear auto-eject mode. */ +static void auto_eject(const struct eject_control *ctl) +{ + int status = -1; + +#if defined(CDROM_SET_OPTIONS) && defined(CDROM_CLEAR_OPTIONS) + if (ctl->a_arg) + status = ioctl(ctl->fd, CDROM_SET_OPTIONS, CDO_AUTO_EJECT); + else + status = ioctl(ctl->fd, CDROM_CLEAR_OPTIONS, CDO_AUTO_EJECT); +#else + errno = ENOSYS; +#endif + if (status < 0) + err(EXIT_FAILURE,_("CD-ROM auto-eject command failed")); +} + +/* + * Stops CDROM from opening on manual eject button press. + * This can be useful when you carry your laptop + * in your bag while it's on and no CD inserted in it's drive. + * Implemented as found in Documentation/userspace-api/ioctl/cdrom.rst + */ +static void manual_eject(const struct eject_control *ctl) +{ + if (ioctl(ctl->fd, CDROM_LOCKDOOR, ctl->i_arg) < 0) { + switch (errno) { + case EDRIVE_CANT_DO_THIS: + errx(EXIT_FAILURE, _("CD-ROM door lock is not supported")); + case EBUSY: + errx(EXIT_FAILURE, _("other users have the drive open and not CAP_SYS_ADMIN")); + default: + err(EXIT_FAILURE, _("CD-ROM lock door command failed")); + } + } + + if (ctl->i_arg) + info(_("CD-Drive may NOT be ejected with device button")); + else + info(_("CD-Drive may be ejected with device button")); +} + +/* + * Changer select. CDROM_SELECT_DISC is preferred, older kernels used + * CDROMLOADFROMSLOT. + */ +static void changer_select(const struct eject_control *ctl) +{ +#ifdef CDROM_SELECT_DISC + if (ioctl(ctl->fd, CDROM_SELECT_DISC, ctl->c_arg) < 0) + err(EXIT_FAILURE, _("CD-ROM select disc command failed")); + +#elif defined CDROMLOADFROMSLOT + if (ioctl(ctl->fd, CDROMLOADFROMSLOT, ctl->c_arg) != 0) + err(EXIT_FAILURE, _("CD-ROM load from slot command failed")); +#else + warnx(_("IDE/ATAPI CD-ROM changer not supported by this kernel\n") ); +#endif +} + +/* + * Close tray. Not supported by older kernels. + */ +static void close_tray(int fd) +{ + int status; + +#if defined(CDROMCLOSETRAY) || defined(CDIOCCLOSE) +#if defined(CDROMCLOSETRAY) + status = ioctl(fd, CDROMCLOSETRAY); +#elif defined(CDIOCCLOSE) + status = ioctl(fd, CDIOCCLOSE); +#endif + if (status != 0) + err(EXIT_FAILURE, _("CD-ROM tray close command failed")); +#else + warnx(_("CD-ROM tray close command not supported by this kernel\n")); +#endif +} + +/* + * Eject using CDROMEJECT ioctl. + */ +static int eject_cdrom(int fd) +{ +#if defined(CDROMEJECT) + int ret = ioctl(fd, CDROM_LOCKDOOR, 0); + if (ret < 0) + return 0; + return ioctl(fd, CDROMEJECT) >= 0; +#elif defined(CDIOCEJECT) + return ioctl(fd, CDIOCEJECT) >= 0; +#else + warnx(_("CD-ROM eject unsupported")); + errno = ENOSYS; + return 0; +#endif +} + +/* + * Toggle tray. + * + * Written by Benjamin Schwenk <benjaminschwenk@yahoo.de> and + * Sybren Stuvel <sybren@thirdtower.com> + * + * Not supported by older kernels because it might use + * CloseTray(). + * + */ +static void toggle_tray(int fd) +{ +#ifdef CDROM_DRIVE_STATUS + /* First ask the CDROM for info, otherwise fall back to manual. */ + switch (ioctl(fd, CDROM_DRIVE_STATUS)) { + case CDS_TRAY_OPEN: + close_tray(fd); + return; + + case CDS_NO_DISC: + case CDS_DISC_OK: + if (!eject_cdrom(fd)) + err(EXIT_FAILURE, _("CD-ROM eject command failed")); + return; + case CDS_NO_INFO: + warnx(_("no CD-ROM information available")); + return; + case CDS_DRIVE_NOT_READY: + warnx(_("CD-ROM drive is not ready")); + return; + default: + err(EXIT_FAILURE, _("CD-ROM status command failed")); + } +#else + struct timeval time_start, time_stop; + int time_elapsed; + + /* Try to open the CDROM tray and measure the time therefore + * needed. In my experience the function needs less than 0.05 + * seconds if the tray was already open, and at least 1.5 seconds + * if it was closed. */ + gettime_monotonic(&time_start); + + /* Send the CDROMEJECT command to the device. */ + if (!eject_cdrom(fd)) + err(EXIT_FAILURE, _("CD-ROM eject command failed")); + + /* Get the second timestamp, to measure the time needed to open + * the tray. */ + gettime_monotonic(&time_stop); + + time_elapsed = (time_stop.tv_sec * 1000000 + time_stop.tv_usec) - + (time_start.tv_sec * 1000000 + time_start.tv_usec); + + /* If the tray "opened" too fast, we can be nearly sure, that it + * was already open. In this case, close it now. Else the tray was + * closed before. This would mean that we are done. */ + if (time_elapsed < TRAY_WAS_ALREADY_OPEN_USECS) + close_tray(fd); +#endif +} + +/* + * Select Speed of CD-ROM drive. + * Thanks to Roland Krivanek (krivanek@fmph.uniba.sk) + * http://dmpc.dbp.fmph.uniba.sk/~krivanek/cdrom_speed/ + */ +static void select_speed(const struct eject_control *ctl) +{ +#ifdef CDROM_SELECT_SPEED + if (ioctl(ctl->fd, CDROM_SELECT_SPEED, ctl->x_arg) != 0) + err(EXIT_FAILURE, _("CD-ROM select speed command failed")); +#else + warnx(_("CD-ROM select speed command not supported by this kernel")); +#endif +} + +/* + * Read Speed of CD-ROM drive. From Linux 2.6.13, the current speed + * is correctly reported + */ +static int read_speed(const char *devname) +{ + int drive_number = -1; + char *name; + FILE *f; + + f = fopen(_PATH_PROC_CDROMINFO, "r"); + if (!f) + err(EXIT_FAILURE, _("cannot open %s"), _PATH_PROC_CDROMINFO); + + name = strrchr(devname, '/') + 1; + + while (name && !feof(f)) { + char line[512]; + char *str; + + if (!fgets(line, sizeof(line), f)) + break; + + /* find drive number in line "drive name" */ + if (drive_number == -1) { + if (strncmp(line, "drive name:", 11) == 0) { + str = strtok(&line[11], "\t "); + drive_number = 0; + while (str && strncmp(name, str, strlen(name)) != 0) { + drive_number++; + str = strtok(NULL, "\t "); + if (!str) + errx(EXIT_FAILURE, + _("%s: failed to finding CD-ROM name"), + _PATH_PROC_CDROMINFO); + } + } + /* find line "drive speed" and read the correct speed */ + } else { + if (strncmp(line, "drive speed:", 12) == 0) { + int n; + + fclose(f); + + str = line + 12; + normalize_whitespace((unsigned char *) str); + + if (ul_strtos32(str, &n, 10) == 0) + return n; + + errx(EXIT_FAILURE, _("%s: failed to read speed"), + _PATH_PROC_CDROMINFO); + } + } + } + + errx(EXIT_FAILURE, _("failed to read speed")); +} + +/* + * List Speed of CD-ROM drive. + */ +static void list_speeds(struct eject_control *ctl) +{ + int max_speed, curr_speed = 0; + + select_speed(ctl); + max_speed = read_speed(ctl->device); + + while (curr_speed < max_speed) { + ctl->x_arg = curr_speed + 1; + select_speed(ctl); + curr_speed = read_speed(ctl->device); + if (ctl->x_arg < curr_speed) + printf("%d ", curr_speed); + else + curr_speed = ctl->x_arg + 1; + } + + printf("\n"); +} + +/* + * Eject using SCSI SG_IO commands. Return 1 if successful, 0 otherwise. + */ +static int eject_scsi(const struct eject_control *ctl) +{ + int status, k; + sg_io_hdr_t io_hdr; + unsigned char allowRmBlk[6] = {ALLOW_MEDIUM_REMOVAL, 0, 0, 0, 0, 0}; + unsigned char startStop1Blk[6] = {START_STOP, 0, 0, 0, 1, 0}; + unsigned char startStop2Blk[6] = {START_STOP, 0, 0, 0, 2, 0}; + unsigned char inqBuff[2]; + unsigned char sense_buffer[32]; + + if ((ioctl(ctl->fd, SG_GET_VERSION_NUM, &k) < 0) || (k < 30000)) { + verbose(ctl, _("not an sg device, or old sg driver")); + return 0; + } + + memset(&io_hdr, 0, sizeof(sg_io_hdr_t)); + io_hdr.interface_id = 'S'; + io_hdr.cmd_len = 6; + io_hdr.mx_sb_len = sizeof(sense_buffer); + io_hdr.dxfer_direction = SG_DXFER_NONE; + io_hdr.dxfer_len = 0; + io_hdr.dxferp = inqBuff; + io_hdr.sbp = sense_buffer; + io_hdr.timeout = 10000; + + io_hdr.cmdp = allowRmBlk; + status = ioctl(ctl->fd, SG_IO, (void *)&io_hdr); + if (status < 0 || io_hdr.host_status || io_hdr.driver_status) + return 0; + + io_hdr.cmdp = startStop1Blk; + status = ioctl(ctl->fd, SG_IO, (void *)&io_hdr); + if (status < 0 || io_hdr.host_status) + return 0; + + /* Ignore errors when there is not medium -- in this case driver sense + * buffer sets MEDIUM NOT PRESENT (3a) bit. For more details see: + * http://www.tldp.org/HOWTO/archived/SCSI-Programming-HOWTO/SCSI-Programming-HOWTO-22.html#sec-sensecodes + * -- kzak Jun 2013 + */ + if (io_hdr.driver_status != 0 && + !(io_hdr.driver_status == DRIVER_SENSE && io_hdr.sbp && + io_hdr.sbp[12] == 0x3a)) + return 0; + + io_hdr.cmdp = startStop2Blk; + status = ioctl(ctl->fd, SG_IO, (void *)&io_hdr); + if (status < 0 || io_hdr.host_status || io_hdr.driver_status) + return 0; + + /* force kernel to reread partition table when new disc inserted */ + ioctl(ctl->fd, BLKRRPART); + return 1; +} + +/* + * Eject using FDEJECT ioctl. Return 1 if successful, 0 otherwise. + */ +static int eject_floppy(int fd) +{ + return ioctl(fd, FDEJECT) >= 0; +} + + +/* + * Rewind and eject using tape ioctl. Return 1 if successful, 0 otherwise. + */ +static int eject_tape(int fd) +{ + struct mtop op = { .mt_op = MTOFFL, .mt_count = 0 }; + + return ioctl(fd, MTIOCTOP, &op) >= 0; +} + + +/* umount a device. */ +static void umount_one(const struct eject_control *ctl, const char *name) +{ + int status; + + if (!name) + return; + + verbose(ctl, _("%s: unmounting"), name); + + switch (fork()) { + case 0: /* child */ + if (drop_permissions() != 0) + err(EXIT_FAILURE, _("drop permissions failed")); + if (ctl->p_option) + execl("/bin/umount", "/bin/umount", name, "-n", (char *)NULL); + else + execl("/bin/umount", "/bin/umount", name, (char *)NULL); + + errexec("/bin/umount"); + + case -1: + warn( _("unable to fork")); + break; + + default: /* parent */ + wait(&status); + if (WIFEXITED(status) == 0) + errx(EXIT_FAILURE, + _("unmount of `%s' did not exit normally"), name); + + if (WEXITSTATUS(status) != 0) + errx(EXIT_FAILURE, _("unmount of `%s' failed\n"), name); + break; + } +} + +/* Open a device file. */ +static void open_device(struct eject_control *ctl) +{ + int extra = ctl->F_option == 0 && /* never use O_EXCL on --force */ + ctl->force_exclusive ? O_EXCL : 0; + + ctl->fd = open(ctl->device, O_RDWR | O_NONBLOCK | extra); + if (ctl->fd < 0) + ctl->fd = open(ctl->device, O_RDONLY | O_NONBLOCK | extra); + if (ctl->fd == -1) + err(EXIT_FAILURE, _("cannot open %s"), ctl->device); +} + +/* + * See if device has been mounted by looking in mount table. If so, set + * device name and mount point name, and return 1, otherwise return 0. + */ +static int device_get_mountpoint(struct eject_control *ctl, char **devname, char **mnt) +{ + struct libmnt_fs *fs; + int rc; + + *mnt = NULL; + + if (!ctl->mtab) { + struct libmnt_cache *cache; + + ctl->mtab = mnt_new_table(); + if (!ctl->mtab) + err(EXIT_FAILURE, _("failed to initialize libmount table")); + + cache = mnt_new_cache(); + mnt_table_set_cache(ctl->mtab, cache); + mnt_unref_cache(cache); + + if (ctl->p_option) + rc = mnt_table_parse_file(ctl->mtab, _PATH_PROC_MOUNTINFO); + else + rc = mnt_table_parse_mtab(ctl->mtab, NULL); + if (rc) + err(EXIT_FAILURE, _("failed to parse mount table")); + } + + fs = mnt_table_find_source(ctl->mtab, *devname, MNT_ITER_BACKWARD); + if (!fs) { + /* maybe 'devname' is mountpoint rather than a real device */ + fs = mnt_table_find_target(ctl->mtab, *devname, MNT_ITER_BACKWARD); + if (fs) { + free(*devname); + *devname = xstrdup(mnt_fs_get_source(fs)); + } + } + + if (fs) + *mnt = xstrdup(mnt_fs_get_target(fs)); + return *mnt ? 0 : -1; +} + +static char *get_disk_devname(const char *device) +{ + struct stat st; + dev_t diskno = 0; + char diskname[128]; + + if (stat(device, &st) != 0) + return NULL; + + /* get whole-disk devno */ + if (sysfs_devno_to_wholedisk(st.st_rdev, diskname, + sizeof(diskname), &diskno) != 0) + return NULL; + + return st.st_rdev == diskno ? NULL : find_device(diskname); +} + +/* umount all partitions if -M not specified, otherwise returns + * number of the mounted partitions only. + */ +static int umount_partitions(struct eject_control *ctl) +{ + struct path_cxt *pc = NULL; + dev_t devno; + DIR *dir = NULL; + struct dirent *d; + int count = 0; + + devno = sysfs_devname_to_devno(ctl->device); + if (devno) + pc = ul_new_sysfs_path(devno, NULL, NULL); + if (!pc) + return 0; + + /* open /sys/block/<wholedisk> */ + if (!(dir = ul_path_opendir(pc, NULL))) + goto done; + + /* scan for partition subdirs */ + while ((d = readdir(dir))) { + if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, "..")) + continue; + + if (sysfs_blkdev_is_partition_dirent(dir, d, ctl->device)) { + char *mnt = NULL; + char *dev = find_device(d->d_name); + + if (dev && device_get_mountpoint(ctl, &dev, &mnt) == 0) { + verbose(ctl, _("%s: mounted on %s"), dev, mnt); + if (!ctl->M_option) + umount_one(ctl, mnt); + count++; + } + free(dev); + free(mnt); + } + } + +done: + if (dir) + closedir(dir); + ul_unref_path(pc); + + return count; +} + +static int is_hotpluggable(const struct eject_control *ctl) +{ + struct path_cxt *pc = NULL; + dev_t devno; + int rc = 0; + + devno = sysfs_devname_to_devno(ctl->device); + if (devno) + pc = ul_new_sysfs_path(devno, NULL, NULL); + if (!pc) + return 0; + + rc = sysfs_blkdev_is_hotpluggable(pc); + ul_unref_path(pc); + return rc; +} + + +/* handle -x option */ +static void set_device_speed(struct eject_control *ctl) +{ + if (!ctl->x_option) + return; + + if (ctl->x_arg == 0) + verbose(ctl, _("setting CD-ROM speed to auto")); + else + verbose(ctl, _("setting CD-ROM speed to %ldX"), ctl->x_arg); + + open_device(ctl); + select_speed(ctl); + exit(EXIT_SUCCESS); +} + + +/* main program */ +int main(int argc, char **argv) +{ + char *disk = NULL; + char *mountpoint = NULL; + int worked = 0; /* set to 1 when successfully ejected */ + struct eject_control ctl = { .fd = -1 }; + + setlocale(LC_ALL,""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + /* parse the command line arguments */ + parse_args(&ctl, argc, argv); + + /* handle -d option */ + if (ctl.d_option) { + info(_("default device: `%s'"), EJECT_DEFAULT_DEVICE); + return EXIT_SUCCESS; + } + + if (!ctl.device) { + ctl.device = mnt_resolve_path(EJECT_DEFAULT_DEVICE, NULL); + verbose(&ctl, _("using default device `%s'"), ctl.device); + } else { + char *p; + + if (ctl.device[strlen(ctl.device) - 1] == '/') + ctl.device[strlen(ctl.device) - 1] = '\0'; + + /* figure out full device or mount point name */ + p = find_device(ctl.device); + if (p) + free(ctl.device); + else + p = ctl.device; + + ctl.device = mnt_resolve_spec(p, NULL); + free(p); + } + + if (!ctl.device) + errx(EXIT_FAILURE, _("unable to find device")); + + verbose(&ctl, _("device name is `%s'"), ctl.device); + + device_get_mountpoint(&ctl, &ctl.device, &mountpoint); + if (mountpoint) + verbose(&ctl, _("%s: mounted on %s"), ctl.device, mountpoint); + else + verbose(&ctl, _("%s: not mounted"), ctl.device); + + disk = get_disk_devname(ctl.device); + if (disk) { + verbose(&ctl, _("%s: disc device: %s (disk device will be used for eject)"), ctl.device, disk); + free(ctl.device); + ctl.device = disk; + disk = NULL; + } else { + struct stat st; + + if (stat(ctl.device, &st) != 0 || !S_ISBLK(st.st_mode)) + errx(EXIT_FAILURE, _("%s: not found mountpoint or device " + "with the given name"), ctl.device); + + verbose(&ctl, _("%s: is whole-disk device"), ctl.device); + } + + if (ctl.F_option == 0 && is_hotpluggable(&ctl) == 0) + errx(EXIT_FAILURE, _("%s: is not hot-pluggable device"), ctl.device); + + /* handle -n option */ + if (ctl.n_option) { + info(_("device is `%s'"), ctl.device); + verbose(&ctl, _("exiting due to -n/--noop option")); + goto done; + } + + /* handle -i option */ + if (ctl.i_option) { + open_device(&ctl); + manual_eject(&ctl); + goto done; + } + + /* handle -a option */ + if (ctl.a_option) { + if (ctl.a_arg) + verbose(&ctl, _("%s: enabling auto-eject mode"), ctl.device); + else + verbose(&ctl, _("%s: disabling auto-eject mode"), ctl.device); + open_device(&ctl); + auto_eject(&ctl); + goto done; + } + + /* handle -t option */ + if (ctl.t_option) { + verbose(&ctl, _("%s: closing tray"), ctl.device); + open_device(&ctl); + close_tray(ctl.fd); + set_device_speed(&ctl); + goto done; + } + + /* handle -T option */ + if (ctl.T_option) { + verbose(&ctl, _("%s: toggling tray"), ctl.device); + open_device(&ctl); + toggle_tray(ctl.fd); + set_device_speed(&ctl); + goto done; + } + + /* handle -X option */ + if (ctl.X_option) { + verbose(&ctl, _("%s: listing CD-ROM speed"), ctl.device); + open_device(&ctl); + list_speeds(&ctl); + goto done; + } + + /* handle -x option only */ + if (!ctl.c_option) + set_device_speed(&ctl); + + + /* + * Unmount all partitions if -m is not specified; or umount given + * mountpoint if -M is specified, otherwise print error of another + * partition is mounted. + */ + if (!ctl.m_option) { + int ct = umount_partitions(&ctl); /* umount all, or count mounted on -M */ + + if (ct == 0 && mountpoint) + umount_one(&ctl, mountpoint); /* probably whole-device */ + + if (ctl.M_option) { + if (ct == 1 && mountpoint) + umount_one(&ctl, mountpoint); + else if (ct) + errx(EXIT_FAILURE, _("error: %s: device in use"), ctl.device); + } + /* Now, we assume the device is no more used, use O_EXCL to be + * resistant against our bugs and possible races (someone else + * remounted the device). + */ + ctl.force_exclusive = 1; + } + + /* handle -c option */ + if (ctl.c_option) { + verbose(&ctl, _("%s: selecting CD-ROM disc #%ld"), ctl.device, ctl.c_arg); + open_device(&ctl); + changer_select(&ctl); + set_device_speed(&ctl); + goto done; + } + + /* if user did not specify type of eject, try all four methods */ + if (ctl.r_option + ctl.s_option + ctl.f_option + ctl.q_option == 0) + ctl.r_option = ctl.s_option = ctl.f_option = ctl.q_option = 1; + + /* open device */ + open_device(&ctl); + + /* try various methods of ejecting until it works */ + if (ctl.r_option) { + verbose(&ctl, _("%s: trying to eject using CD-ROM eject command"), ctl.device); + worked = eject_cdrom(ctl.fd); + verbose(&ctl, worked ? _("CD-ROM eject command succeeded") : + _("CD-ROM eject command failed")); + } + + if (ctl.s_option && !worked) { + verbose(&ctl, _("%s: trying to eject using SCSI commands"), ctl.device); + worked = eject_scsi(&ctl); + verbose(&ctl, worked ? _("SCSI eject succeeded") : + _("SCSI eject failed")); + } + + if (ctl.f_option && !worked) { + verbose(&ctl, _("%s: trying to eject using floppy eject command"), ctl.device); + worked = eject_floppy(ctl.fd); + verbose(&ctl, worked ? _("floppy eject command succeeded") : + _("floppy eject command failed")); + } + + if (ctl.q_option && !worked) { + verbose(&ctl, _("%s: trying to eject using tape offline command"), ctl.device); + worked = eject_tape(ctl.fd); + verbose(&ctl, worked ? _("tape offline command succeeded") : + _("tape offline command failed")); + } + + if (!worked) + errx(EXIT_FAILURE, _("unable to eject")); + +done: + /* cleanup */ + if (ctl.fd >= 0) + close(ctl.fd); + + free(ctl.device); + free(mountpoint); + + mnt_unref_table(ctl.mtab); + + return EXIT_SUCCESS; +} diff --git a/sys-utils/fallocate.1 b/sys-utils/fallocate.1 new file mode 100644 index 0000000..92bd5ec --- /dev/null +++ b/sys-utils/fallocate.1 @@ -0,0 +1,141 @@ +'\" t +.\" Title: fallocate +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: User Commands +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "FALLOCATE" "1" "2022-05-11" "util\-linux 2.38.1" "User Commands" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +fallocate \- preallocate or deallocate space to a file +.SH "SYNOPSIS" +.sp +\fBfallocate\fP [\fB\-c\fP|\fB\-p\fP|\fB\-z\fP] [\fB\-o\fP \fIoffset\fP] \fB\-l\fP \fIlength\fP [\fB\-n\fP] \fIfilename\fP +.sp +\fBfallocate\fP \fB\-d\fP [\fB\-o\fP \fIoffset\fP] [\fB\-l\fP \fIlength\fP] \fIfilename\fP +.sp +\fBfallocate\fP \fB\-x\fP [\fB\-o\fP \fIoffset\fP] \fB\-l\fP \fIlength filename\fP +.SH "DESCRIPTION" +.sp +\fBfallocate\fP is used to manipulate the allocated disk space for a file, either to deallocate or preallocate it. For filesystems which support the \fBfallocate\fP(2) system call, preallocation is done quickly by allocating blocks and marking them as uninitialized, requiring no IO to the data blocks. This is much faster than creating a file by filling it with zeroes. +.sp +The exit status returned by \fBfallocate\fP is 0 on success and 1 on failure. +.SH "OPTIONS" +.sp +The \fIlength\fP and \fIoffset\fP arguments may be followed by the multiplicative suffixes KiB (=1024), MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB, and YiB (the "iB" is optional, e.g., "K" has the same meaning as "KiB") or the suffixes KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB, and YB. +.sp +The options \fB\-\-collapse\-range\fP, \fB\-\-dig\-holes\fP, \fB\-\-punch\-hole\fP, and \fB\-\-zero\-range\fP are mutually exclusive. +.sp +\fB\-c\fP, \fB\-\-collapse\-range\fP +.RS 4 +Removes a byte range from a file, without leaving a hole. The byte range to be collapsed starts at \fIoffset\fP and continues for \fIlength\fP bytes. At the completion of the operation, the contents of the file starting at the location \fIoffset\fP+\fIlength\fP will be appended at the location \fIoffset\fP, and the file will be \fIlength\fP bytes smaller. The option \fB\-\-keep\-size\fP may not be specified for the collapse\-range operation. +.sp +Available since Linux 3.15 for ext4 (only for extent\-based files) and XFS. +.sp +A filesystem may place limitations on the granularity of the operation, in order to ensure efficient implementation. Typically, \fIoffset\fP and \fIlength\fP must be a multiple of the filesystem logical block size, which varies according to the filesystem type and configuration. If a filesystem has such a requirement, the operation will fail with the error \fBEINVAL\fP if this requirement is violated. +.RE +.sp +\fB\-d\fP, \fB\-\-dig\-holes\fP +.RS 4 +Detect and dig holes. This makes the file sparse in\-place, without using extra disk space. The minimum size of the hole depends on filesystem I/O block size (usually 4096 bytes). Also, when using this option, \fB\-\-keep\-size\fP is implied. If no range is specified by \fB\-\-offset\fP and \fB\-\-length\fP, then the entire file is analyzed for holes. +.sp +You can think of this option as doing a "\fBcp \-\-sparse\fP" and then renaming the destination file to the original, without the need for extra disk space. +.sp +See \fB\-\-punch\-hole\fP for a list of supported filesystems. +.RE +.sp +\fB\-i\fP, \fB\-\-insert\-range\fP +.RS 4 +Insert a hole of \fIlength\fP bytes from \fIoffset\fP, shifting existing data. +.RE +.sp +\fB\-l\fP, \fB\-\-length\fP \fIlength\fP +.RS 4 +Specifies the length of the range, in bytes. +.RE +.sp +\fB\-n\fP, \fB\-\-keep\-size\fP +.RS 4 +Do not modify the apparent length of the file. This may effectively allocate blocks past EOF, which can be removed with a truncate. +.RE +.sp +\fB\-o\fP, \fB\-\-offset\fP \fIoffset\fP +.RS 4 +Specifies the beginning offset of the range, in bytes. +.RE +.sp +\fB\-p\fP, \fB\-\-punch\-hole\fP +.RS 4 +Deallocates space (i.e., creates a hole) in the byte range starting at \fIoffset\fP and continuing for \fIlength\fP bytes. Within the specified range, partial filesystem blocks are zeroed, and whole filesystem blocks are removed from the file. After a successful call, subsequent reads from this range will return zeroes. This option may not be specified at the same time as the \fB\-\-zero\-range\fP option. Also, when using this option, \fB\-\-keep\-size\fP is implied. +.sp +Supported for XFS (since Linux 2.6.38), ext4 (since Linux 3.0), Btrfs (since Linux 3.7), tmpfs (since Linux 3.5) and gfs2 (since Linux 4.16). +.RE +.sp +\fB\-v\fP, \fB\-\-verbose\fP +.RS 4 +Enable verbose mode. +.RE +.sp +\fB\-x\fP, \fB\-\-posix\fP +.RS 4 +Enable POSIX operation mode. In that mode allocation operation always completes, but it may take longer time when fast allocation is not supported by the underlying filesystem. +.RE +.sp +\fB\-z\fP, \fB\-\-zero\-range\fP +.RS 4 +Zeroes space in the byte range starting at \fIoffset\fP and continuing for \fIlength\fP bytes. Within the specified range, blocks are preallocated for the regions that span the holes in the file. After a successful call, subsequent reads from this range will return zeroes. +.sp +Zeroing is done within the filesystem preferably by converting the range into unwritten extents. This approach means that the specified range will not be physically zeroed out on the device (except for partial blocks at the either end of the range), and I/O is (otherwise) required only to update metadata. +.sp +Option \fB\-\-keep\-size\fP can be specified to prevent file length modification. +.sp +Available since Linux 3.14 for ext4 (only for extent\-based files) and XFS. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "AUTHORS" +.sp +.MTO "sandeen\(atredhat.com" "Eric Sandeen" "," +.MTO "kzak\(atredhat.com" "Karel Zak" "" +.SH "SEE ALSO" +.sp +\fBtruncate\fP(1), +\fBfallocate\fP(2), +\fBposix_fallocate\fP(3) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBfallocate\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/fallocate.1.adoc b/sys-utils/fallocate.1.adoc new file mode 100644 index 0000000..edcca8e --- /dev/null +++ b/sys-utils/fallocate.1.adoc @@ -0,0 +1,98 @@ +//po4a: entry man manual += fallocate(1) +:doctype: manpage +:man manual: User Commands +:man source: util-linux {release-version} +:page-layout: base +:command: fallocate + +== NAME + +fallocate - preallocate or deallocate space to a file + +== SYNOPSIS + +*fallocate* [*-c*|*-p*|*-z*] [*-o* _offset_] *-l* _length_ [*-n*] _filename_ + +*fallocate* *-d* [*-o* _offset_] [*-l* _length_] _filename_ + +*fallocate* *-x* [*-o* _offset_] *-l* _length filename_ + +== DESCRIPTION + +*fallocate* is used to manipulate the allocated disk space for a file, either to deallocate or preallocate it. For filesystems which support the *fallocate*(2) system call, preallocation is done quickly by allocating blocks and marking them as uninitialized, requiring no IO to the data blocks. This is much faster than creating a file by filling it with zeroes. + +The exit status returned by *fallocate* is 0 on success and 1 on failure. + +== OPTIONS + +The _length_ and _offset_ arguments may be followed by the multiplicative suffixes KiB (=1024), MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB, and YiB (the "iB" is optional, e.g., "K" has the same meaning as "KiB") or the suffixes KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB, and YB. + +The options *--collapse-range*, *--dig-holes*, *--punch-hole*, and *--zero-range* are mutually exclusive. + +*-c*, *--collapse-range*:: +Removes a byte range from a file, without leaving a hole. The byte range to be collapsed starts at _offset_ and continues for _length_ bytes. At the completion of the operation, the contents of the file starting at the location __offset__+_length_ will be appended at the location _offset_, and the file will be _length_ bytes smaller. The option *--keep-size* may not be specified for the collapse-range operation. ++ +Available since Linux 3.15 for ext4 (only for extent-based files) and XFS. ++ +A filesystem may place limitations on the granularity of the operation, in order to ensure efficient implementation. Typically, _offset_ and _length_ must be a multiple of the filesystem logical block size, which varies according to the filesystem type and configuration. If a filesystem has such a requirement, the operation will fail with the error *EINVAL* if this requirement is violated. + +*-d*, *--dig-holes*:: +Detect and dig holes. This makes the file sparse in-place, without using extra disk space. The minimum size of the hole depends on filesystem I/O block size (usually 4096 bytes). Also, when using this option, *--keep-size* is implied. If no range is specified by *--offset* and *--length*, then the entire file is analyzed for holes. ++ +You can think of this option as doing a "*cp --sparse*" and then renaming the destination file to the original, without the need for extra disk space. ++ +See *--punch-hole* for a list of supported filesystems. + +*-i*, *--insert-range*:: +Insert a hole of _length_ bytes from _offset_, shifting existing data. + +*-l*, *--length* _length_:: +Specifies the length of the range, in bytes. + +*-n*, *--keep-size*:: +Do not modify the apparent length of the file. This may effectively allocate blocks past EOF, which can be removed with a truncate. + +*-o*, *--offset* _offset_:: +Specifies the beginning offset of the range, in bytes. + +*-p*, *--punch-hole*:: +Deallocates space (i.e., creates a hole) in the byte range starting at _offset_ and continuing for _length_ bytes. Within the specified range, partial filesystem blocks are zeroed, and whole filesystem blocks are removed from the file. After a successful call, subsequent reads from this range will return zeroes. This option may not be specified at the same time as the *--zero-range* option. Also, when using this option, *--keep-size* is implied. ++ +Supported for XFS (since Linux 2.6.38), ext4 (since Linux 3.0), Btrfs (since Linux 3.7), tmpfs (since Linux 3.5) and gfs2 (since Linux 4.16). + +*-v*, *--verbose*:: +Enable verbose mode. + +*-x*, *--posix*:: +Enable POSIX operation mode. In that mode allocation operation always completes, but it may take longer time when fast allocation is not supported by the underlying filesystem. + +*-z*, *--zero-range*:: +Zeroes space in the byte range starting at _offset_ and continuing for _length_ bytes. Within the specified range, blocks are preallocated for the regions that span the holes in the file. After a successful call, subsequent reads from this range will return zeroes. ++ +Zeroing is done within the filesystem preferably by converting the range into unwritten extents. This approach means that the specified range will not be physically zeroed out on the device (except for partial blocks at the either end of the range), and I/O is (otherwise) required only to update metadata. ++ +Option *--keep-size* can be specified to prevent file length modification. ++ +Available since Linux 3.14 for ext4 (only for extent-based files) and XFS. + +include::man-common/help-version.adoc[] + +== AUTHORS + +mailto:sandeen@redhat.com[Eric Sandeen], +mailto:kzak@redhat.com[Karel Zak] + +== SEE ALSO + +*truncate*(1), +*fallocate*(2), +*posix_fallocate*(3) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/fallocate.c b/sys-utils/fallocate.c new file mode 100644 index 0000000..307fbd8 --- /dev/null +++ b/sys-utils/fallocate.c @@ -0,0 +1,443 @@ +/* + * fallocate - utility to use the fallocate system call + * + * Copyright (C) 2008-2009 Red Hat, Inc. All rights reserved. + * Written by Eric Sandeen <sandeen@redhat.com> + * Karel Zak <kzak@redhat.com> + * + * cvtnum routine taken from xfsprogs, + * Copyright (c) 2003-2005 Silicon Graphics, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <getopt.h> +#include <limits.h> +#include <string.h> + +#ifndef HAVE_FALLOCATE +# include <sys/syscall.h> +#endif + +#if defined(HAVE_LINUX_FALLOC_H) && \ + (!defined(FALLOC_FL_KEEP_SIZE) || !defined(FALLOC_FL_PUNCH_HOLE) || \ + !defined(FALLOC_FL_COLLAPSE_RANGE) || !defined(FALLOC_FL_ZERO_RANGE) || \ + !defined(FALLOC_FL_INSERT_RANGE)) +# include <linux/falloc.h> /* non-libc fallback for FALLOC_FL_* flags */ +#endif + + +#ifndef FALLOC_FL_KEEP_SIZE +# define FALLOC_FL_KEEP_SIZE 0x1 +#endif + +#ifndef FALLOC_FL_PUNCH_HOLE +# define FALLOC_FL_PUNCH_HOLE 0x2 +#endif + +#ifndef FALLOC_FL_COLLAPSE_RANGE +# define FALLOC_FL_COLLAPSE_RANGE 0x8 +#endif + +#ifndef FALLOC_FL_ZERO_RANGE +# define FALLOC_FL_ZERO_RANGE 0x10 +#endif + +#ifndef FALLOC_FL_INSERT_RANGE +# define FALLOC_FL_INSERT_RANGE 0x20 +#endif + +#include "nls.h" +#include "strutils.h" +#include "c.h" +#include "closestream.h" +#include "xalloc.h" +#include "optutils.h" + +static int verbose; +static char *filename; + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + fputs(USAGE_HEADER, out); + fprintf(out, + _(" %s [options] <filename>\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Preallocate space to, or deallocate space from a file.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -c, --collapse-range remove a range from the file\n"), out); + fputs(_(" -d, --dig-holes detect zeroes and replace with holes\n"), out); + fputs(_(" -i, --insert-range insert a hole at range, shifting existing data\n"), out); + fputs(_(" -l, --length <num> length for range operations, in bytes\n"), out); + fputs(_(" -n, --keep-size maintain the apparent size of the file\n"), out); + fputs(_(" -o, --offset <num> offset for range operations, in bytes\n"), out); + fputs(_(" -p, --punch-hole replace a range with a hole (implies -n)\n"), out); + fputs(_(" -z, --zero-range zero and ensure allocation of a range\n"), out); +#ifdef HAVE_POSIX_FALLOCATE + fputs(_(" -x, --posix use posix_fallocate(3) instead of fallocate(2)\n"), out); +#endif + fputs(_(" -v, --verbose verbose mode\n"), out); + + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(22)); + + fputs(USAGE_ARGUMENTS, out); + printf(USAGE_ARG_SIZE(_("<num>"))); + + printf(USAGE_MAN_TAIL("fallocate(1)")); + + exit(EXIT_SUCCESS); +} + +static loff_t cvtnum(char *s) +{ + uintmax_t x; + + if (strtosize(s, &x)) + return -1LL; + + return x; +} + +static void xfallocate(int fd, int mode, off_t offset, off_t length) +{ + int error; + +#ifdef HAVE_FALLOCATE + error = fallocate(fd, mode, offset, length); +#else + error = syscall(SYS_fallocate, fd, mode, offset, length); +#endif + /* + * EOPNOTSUPP: The FALLOC_FL_KEEP_SIZE is unsupported + * ENOSYS: The filesystem does not support sys_fallocate + */ + if (error < 0) { + if ((mode & FALLOC_FL_KEEP_SIZE) && errno == EOPNOTSUPP) + errx(EXIT_FAILURE, _("fallocate failed: keep size mode is unsupported")); + err(EXIT_FAILURE, _("fallocate failed")); + } +} + +#ifdef HAVE_POSIX_FALLOCATE +static void xposix_fallocate(int fd, off_t offset, off_t length) +{ + int error = posix_fallocate(fd, offset, length); + if (error < 0) { + err(EXIT_FAILURE, _("fallocate failed")); + } +} +#endif + +/* The real buffer size has to be bufsize + sizeof(uintptr_t) */ +static int is_nul(void *buf, size_t bufsize) +{ + typedef uintptr_t word; + void const *vp; + char const *cbuf = buf, *cp; + word const *wp = buf; + + /* set sentinel */ + memset((char *) buf + bufsize, '\1', sizeof(word)); + + /* Find first nonzero *word*, or the word with the sentinel. */ + while (*wp++ == 0) + continue; + + /* Find the first nonzero *byte*, or the sentinel. */ + vp = wp - 1; + cp = vp; + + while (*cp++ == 0) + continue; + + return cbuf + bufsize < cp; +} + +static void dig_holes(int fd, off_t file_off, off_t len) +{ + off_t file_end = len ? file_off + len : 0; + off_t hole_start = 0, hole_sz = 0; + uintmax_t ct = 0; + size_t bufsz; + char *buf; + struct stat st; +#if defined(POSIX_FADV_SEQUENTIAL) && defined(HAVE_POSIX_FADVISE) + off_t cache_start = file_off; + /* + * We don't want to call POSIX_FADV_DONTNEED to discard cached + * data in PAGE_SIZE steps. IMHO it's overkill (too many syscalls). + * + * Let's assume that 1MiB (on system with 4K page size) is just + * a good compromise. + * -- kzak Feb-2014 + */ + const size_t cachesz = getpagesize() * 256; +#endif + + if (fstat(fd, &st) != 0) + err(EXIT_FAILURE, _("stat of %s failed"), filename); + + bufsz = st.st_blksize; + + if (lseek(fd, file_off, SEEK_SET) < 0) + err(EXIT_FAILURE, _("seek on %s failed"), filename); + + /* buffer + extra space for is_nul() sentinel */ + buf = xmalloc(bufsz + sizeof(uintptr_t)); + while (file_end == 0 || file_off < file_end) { + /* + * Detect data area (skip holes) + */ + off_t end, off; + + off = lseek(fd, file_off, SEEK_DATA); + if ((off == -1 && errno == ENXIO) || + (file_end && off >= file_end)) + break; + + end = lseek(fd, off, SEEK_HOLE); + if (file_end && end > file_end) + end = file_end; + + if (off < 0 || end < 0) + break; + +#if defined(POSIX_FADV_SEQUENTIAL) && defined(HAVE_POSIX_FADVISE) + (void) posix_fadvise(fd, off, end, POSIX_FADV_SEQUENTIAL); +#endif + /* + * Dig holes in the area + */ + while (off < end) { + ssize_t rsz = pread(fd, buf, bufsz, off); + if (rsz < 0 && errno) + err(EXIT_FAILURE, _("%s: read failed"), filename); + if (end && rsz > 0 && off > end - rsz) + rsz = end - off; + if (rsz <= 0) + break; + + if (is_nul(buf, rsz)) { + if (!hole_sz) /* new hole detected */ + hole_start = off; + hole_sz += rsz; + } else if (hole_sz) { + xfallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, + hole_start, hole_sz); + ct += hole_sz; + hole_sz = hole_start = 0; + } + +#if defined(POSIX_FADV_DONTNEED) && defined(HAVE_POSIX_FADVISE) + /* discard cached data */ + if (off - cache_start > (off_t) cachesz) { + size_t clen = off - cache_start; + + clen = (clen / cachesz) * cachesz; + (void) posix_fadvise(fd, cache_start, clen, POSIX_FADV_DONTNEED); + cache_start = cache_start + clen; + } +#endif + off += rsz; + } + if (hole_sz) { + off_t alloc_sz = hole_sz; + if (off >= end) + alloc_sz += st.st_blksize; /* meet block boundary */ + xfallocate(fd, FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE, + hole_start, alloc_sz); + ct += hole_sz; + } + file_off = off; + } + + free(buf); + + if (verbose) { + char *str = size_to_human_string(SIZE_SUFFIX_3LETTER | SIZE_SUFFIX_SPACE, ct); + fprintf(stdout, _("%s: %s (%ju bytes) converted to sparse holes.\n"), + filename, str, ct); + free(str); + } +} + +int main(int argc, char **argv) +{ + int c; + int fd; + int mode = 0; + int dig = 0; + int posix = 0; + loff_t length = -2LL; + loff_t offset = 0; + + static const struct option longopts[] = { + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, 'V' }, + { "keep-size", no_argument, NULL, 'n' }, + { "punch-hole", no_argument, NULL, 'p' }, + { "collapse-range", no_argument, NULL, 'c' }, + { "dig-holes", no_argument, NULL, 'd' }, + { "insert-range", no_argument, NULL, 'i' }, + { "zero-range", no_argument, NULL, 'z' }, + { "offset", required_argument, NULL, 'o' }, + { "length", required_argument, NULL, 'l' }, + { "posix", no_argument, NULL, 'x' }, + { "verbose", no_argument, NULL, 'v' }, + { NULL, 0, NULL, 0 } + }; + + static const ul_excl_t excl[] = { /* rows and cols in ASCII order */ + { 'c', 'd', 'p', 'z' }, + { 'c', 'n' }, + { 'x', 'c', 'd', 'i', 'n', 'p', 'z'}, + { 0 } + }; + int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + while ((c = getopt_long(argc, argv, "hvVncpdizxl:o:", longopts, NULL)) + != -1) { + + err_exclusive_options(c, longopts, excl, excl_st); + + switch(c) { + case 'c': + mode |= FALLOC_FL_COLLAPSE_RANGE; + break; + case 'd': + dig = 1; + break; + case 'i': + mode |= FALLOC_FL_INSERT_RANGE; + break; + case 'l': + length = cvtnum(optarg); + break; + case 'n': + mode |= FALLOC_FL_KEEP_SIZE; + break; + case 'o': + offset = cvtnum(optarg); + break; + case 'p': + mode |= FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; + break; + case 'z': + mode |= FALLOC_FL_ZERO_RANGE; + break; + case 'x': +#ifdef HAVE_POSIX_FALLOCATE + posix = 1; + break; +#else + errx(EXIT_FAILURE, _("posix_fallocate support is not compiled")); +#endif + case 'v': + verbose++; + break; + + case 'h': + usage(); + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if (optind == argc) + errx(EXIT_FAILURE, _("no filename specified")); + + filename = argv[optind++]; + + if (optind != argc) + errx(EXIT_FAILURE, _("unexpected number of arguments")); + + if (dig) { + /* for --dig-holes the default is analyze all file */ + if (length == -2LL) + length = 0; + if (length < 0) + errx(EXIT_FAILURE, _("invalid length value specified")); + } else { + /* it's safer to require the range specification (--length --offset) */ + if (length == -2LL) + errx(EXIT_FAILURE, _("no length argument specified")); + if (length <= 0) + errx(EXIT_FAILURE, _("invalid length value specified")); + } + if (offset < 0) + errx(EXIT_FAILURE, _("invalid offset value specified")); + + /* O_CREAT makes sense only for the default fallocate(2) behavior + * when mode is no specified and new space is allocated */ + fd = open(filename, O_RDWR | (!dig && !mode ? O_CREAT : 0), + S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH); + if (fd < 0) + err(EXIT_FAILURE, _("cannot open %s"), filename); + + if (dig) + dig_holes(fd, offset, length); + else { +#ifdef HAVE_POSIX_FALLOCATE + if (posix) + xposix_fallocate(fd, offset, length); + else +#endif + xfallocate(fd, mode, offset, length); + + if (verbose) { + char *str = size_to_human_string(SIZE_SUFFIX_3LETTER | SIZE_SUFFIX_SPACE, length); + + if (mode & FALLOC_FL_PUNCH_HOLE) + fprintf(stdout, _("%s: %s (%ju bytes) hole created.\n"), + filename, str, length); + else if (mode & FALLOC_FL_COLLAPSE_RANGE) + fprintf(stdout, _("%s: %s (%ju bytes) removed.\n"), + filename, str, length); + else if (mode & FALLOC_FL_INSERT_RANGE) + fprintf(stdout, _("%s: %s (%ju bytes) inserted.\n"), + filename, str, length); + else if (mode & FALLOC_FL_ZERO_RANGE) + fprintf(stdout, _("%s: %s (%ju bytes) zeroed.\n"), + filename, str, length); + else + fprintf(stdout, _("%s: %s (%ju bytes) allocated.\n"), + filename, str, length); + free(str); + } + } + + if (close_fd(fd) != 0) + err(EXIT_FAILURE, _("write failed: %s"), filename); + + return EXIT_SUCCESS; +} diff --git a/sys-utils/flock.1 b/sys-utils/flock.1 new file mode 100644 index 0000000..043414d --- /dev/null +++ b/sys-utils/flock.1 @@ -0,0 +1,166 @@ +'\" t +.\" Title: flock +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: User Commands +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "FLOCK" "1" "2022-05-11" "util\-linux 2.38.1" "User Commands" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +flock \- manage locks from shell scripts +.SH "SYNOPSIS" +.sp +\fBflock\fP [options] \fIfile\fP|\fIdirectory\fP \fIcommand\fP [\fIarguments\fP] +.sp +\fBflock\fP [options] \fIfile\fP|\fIdirectory\fP \fB\-c\fP \fIcommand\fP +.sp +\fBflock\fP [options] \fInumber\fP +.SH "DESCRIPTION" +.sp +This utility manages \fBflock\fP(2) locks from within shell scripts or from the command line. +.sp +The first and second of the above forms wrap the lock around the execution of a \fIcommand\fP, in a manner similar to \fBsu\fP(1) or \fBnewgrp\fP(1). They lock a specified \fIfile\fP or \fIdirectory\fP, which is created (assuming appropriate permissions) if it does not already exist. By default, if the lock cannot be immediately acquired, \fBflock\fP waits until the lock is available. +.sp +The third form uses an open file by its file descriptor \fInumber\fP. See the examples below for how that can be used. +.SH "OPTIONS" +.sp +\fB\-c\fP, \fB\-\-command\fP \fIcommand\fP +.RS 4 +Pass a single \fIcommand\fP, without arguments, to the shell with \fB\-c\fP. +.RE +.sp +\fB\-E\fP, \fB\-\-conflict\-exit\-code\fP \fInumber\fP +.RS 4 +The exit status used when the \fB\-n\fP option is in use, and the conflicting lock exists, or the \fB\-w\fP option is in use, and the timeout is reached. The default value is \fB1\fP. The \fInumber\fP has to be in the range of 0 to 255. +.RE +.sp +\fB\-F\fP, \fB\-\-no\-fork\fP +.RS 4 +Do not fork before executing \fIcommand\fP. Upon execution the flock process is replaced by \fIcommand\fP which continues to hold the lock. This option is incompatible with \fB\-\-close\fP as there would otherwise be nothing left to hold the lock. +.RE +.sp +\fB\-e\fP, \fB\-x\fP, \fB\-\-exclusive\fP +.RS 4 +Obtain an exclusive lock, sometimes called a write lock. This is the default. +.RE +.sp +\fB\-n\fP, \fB\-\-nb\fP, \fB\-\-nonblock\fP +.RS 4 +Fail rather than wait if the lock cannot be immediately acquired. See the \fB\-E\fP option for the exit status used. +.RE +.sp +\fB\-o\fP, \fB\-\-close\fP +.RS 4 +Close the file descriptor on which the lock is held before executing \fIcommand\fP. This is useful if \fIcommand\fP spawns a child process which should not be holding the lock. +.RE +.sp +\fB\-s\fP, \fB\-\-shared\fP +.RS 4 +Obtain a shared lock, sometimes called a read lock. +.RE +.sp +\fB\-u\fP, \fB\-\-unlock\fP +.RS 4 +Drop a lock. This is usually not required, since a lock is automatically dropped when the file is closed. However, it may be required in special cases, for example if the enclosed command group may have forked a background process which should not be holding the lock. +.RE +.sp +\fB\-w\fP, \fB\-\-wait\fP, \fB\-\-timeout\fP \fIseconds\fP +.RS 4 +Fail if the lock cannot be acquired within \fIseconds\fP. Decimal fractional values are allowed. See the \fB\-E\fP option for the exit status used. The zero number of \fIseconds\fP is interpreted as \fB\-\-nonblock\fP. +.RE +.sp +\fB\-\-verbose\fP +.RS 4 +Report how long it took to acquire the lock, or why the lock could not be obtained. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "EXIT STATUS" +.sp +The command uses <sysexits.h> exit status values for everything, except when using either of the options \fB\-n\fP or \fB\-w\fP which report a failure to acquire the lock with an exit status given by the \fB\-E\fP option, or 1 by default. The exit status given by \fB\-E\fP has to be in the range of 0 to 255. +.sp +When using the \fIcommand\fP variant, and executing the child worked, then the exit status is that of the child command. +.SH "NOTES" +.sp +\fBflock\fP does not detect deadlock. See \fBflock\fP(2) for details. +.sp +Some file systems (e. g. NFS and CIFS) have a limited implementation of \fBflock\fP(2) and flock may always fail. For details see \fBflock\fP(2), \fBnfs\fP(5) and \fBmount.cifs\fP(8). Depending on mount options, flock can always fail there. +.SH "EXAMPLES" +.sp +Note that "shell> " in examples is a command line prompt. +.sp +shell1> flock /tmp \-c cat; shell2> flock \-w .007 /tmp \-c echo; /bin/echo $? +.RS 4 +Set exclusive lock to directory \fI/tmp\fP and the second command will fail. +.RE +.sp +shell1> flock \-s /tmp \-c cat; shell2> flock \-s \-w .007 /tmp \-c echo; /bin/echo $? +.RS 4 +Set shared lock to directory \fI/tmp\fP and the second command will not fail. Notice that attempting to get exclusive lock with second command would fail. +.RE +.sp +shell> flock \-x local\-lock\-file echo \(aqa b c\(aq +.RS 4 +Grab the exclusive lock "local\-lock\-file" before running echo with \(aqa b c\(aq. +.RE +.sp +(; flock \-n 9 || exit 1; # ... commands executed under lock ...; ) 9>/var/lock/mylockfile +.RS 4 +The form is convenient inside shell scripts. The mode used to open the file doesn\(cqt matter to \fBflock\fP; using \fI>\fP or \fI>>\fP allows the lockfile to be created if it does not already exist, however, write permission is required. Using \fI<\fP requires that the file already exists but only read permission is required. +.RE +.sp +[ "${FLOCKER}" != "$0" ] && exec env FLOCKER="$0" flock \-en "$0" "$0" "$@" || : +.RS 4 +This is useful boilerplate code for shell scripts. Put it at the top of the shell script you want to lock and it\(cqll automatically lock itself on the first run. If the environment variable \fB$FLOCKER\fP is not set to the shell script that is being run, then execute \fBflock\fP and grab an exclusive non\-blocking lock (using the script itself as the lock file) before re\-execing itself with the right arguments. It also sets the \fBFLOCKER\fP environment variable to the right value so it doesn\(cqt run again. +.RE +.sp +shell> exec 4<>/var/lock/mylockfile; shell> flock \-n 4 +.RS 4 +This form is convenient for locking a file without spawning a subprocess. The shell opens the lock file for reading and writing as file descriptor 4, then \fBflock\fP is used to lock the descriptor. +.RE +.SH "AUTHORS" +.sp +.MTO "hpa\(atzytor.com" "H. Peter Anvin" "" +.SH "COPYRIGHT" +.sp +Copyright © 2003\-2006 H. Peter Anvin. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +.SH "SEE ALSO" +.sp +\fBflock\fP(2) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBflock\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/flock.1.adoc b/sys-utils/flock.1.adoc new file mode 100644 index 0000000..d5b4f10 --- /dev/null +++ b/sys-utils/flock.1.adoc @@ -0,0 +1,141 @@ +//po4a: entry man manual +//// +Copyright 2003-2006 H. Peter Anvin - All Rights Reserved + +Permission is hereby granted, free of charge, to any person +obtaining a copy of this software and associated documentation +files (the "Software"), to deal in the Software without +restriction, including without limitation the rights to use, +copy, modify, merge, publish, distribute, sublicense, and/or +sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice shall +be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +OTHER DEALINGS IN THE SOFTWARE. +//// += flock(1) +:doctype: manpage +:man manual: User Commands +:man source: util-linux {release-version} +:page-layout: base +:command: flock +:copyright: © + +== NAME + +flock - manage locks from shell scripts + +== SYNOPSIS + +*flock* [options] _file_|_directory_ _command_ [_arguments_] + +*flock* [options] _file_|_directory_ *-c* _command_ + +*flock* [options] _number_ + +== DESCRIPTION + +This utility manages *flock*(2) locks from within shell scripts or from the command line. + +The first and second of the above forms wrap the lock around the execution of a _command_, in a manner similar to *su*(1) or *newgrp*(1). They lock a specified _file_ or _directory_, which is created (assuming appropriate permissions) if it does not already exist. By default, if the lock cannot be immediately acquired, *flock* waits until the lock is available. + +The third form uses an open file by its file descriptor _number_. See the examples below for how that can be used. + +== OPTIONS + +*-c*, *--command* _command_:: +Pass a single _command_, without arguments, to the shell with *-c*. + +*-E*, *--conflict-exit-code* _number_:: +The exit status used when the *-n* option is in use, and the conflicting lock exists, or the *-w* option is in use, and the timeout is reached. The default value is *1*. The _number_ has to be in the range of 0 to 255. + +*-F*, *--no-fork*:: +Do not fork before executing _command_. Upon execution the flock process is replaced by _command_ which continues to hold the lock. This option is incompatible with *--close* as there would otherwise be nothing left to hold the lock. + +*-e*, *-x*, *--exclusive*:: +Obtain an exclusive lock, sometimes called a write lock. This is the default. + +*-n*, *--nb*, *--nonblock*:: +Fail rather than wait if the lock cannot be immediately acquired. See the *-E* option for the exit status used. + +*-o*, *--close*:: +Close the file descriptor on which the lock is held before executing _command_. This is useful if _command_ spawns a child process which should not be holding the lock. + +*-s*, *--shared*:: +Obtain a shared lock, sometimes called a read lock. + +*-u*, *--unlock*:: +Drop a lock. This is usually not required, since a lock is automatically dropped when the file is closed. However, it may be required in special cases, for example if the enclosed command group may have forked a background process which should not be holding the lock. + +*-w*, *--wait*, *--timeout* _seconds_:: +Fail if the lock cannot be acquired within _seconds_. Decimal fractional values are allowed. See the *-E* option for the exit status used. The zero number of _seconds_ is interpreted as *--nonblock*. + +*--verbose*:: +Report how long it took to acquire the lock, or why the lock could not be obtained. + +include::man-common/help-version.adoc[] + +== EXIT STATUS + +The command uses <sysexits.h> exit status values for everything, except when using either of the options *-n* or *-w* which report a failure to acquire the lock with an exit status given by the *-E* option, or 1 by default. The exit status given by *-E* has to be in the range of 0 to 255. + +When using the _command_ variant, and executing the child worked, then the exit status is that of the child command. + +== NOTES + +*flock* does not detect deadlock. See *flock*(2) for details. + +Some file systems (e. g. NFS and CIFS) have a limited implementation of *flock*(2) and flock may always fail. For details see *flock*(2), *nfs*(5) and *mount.cifs*(8). Depending on mount options, flock can always fail there. + +== EXAMPLES + +Note that "shell> " in examples is a command line prompt. + +shell1> flock /tmp -c cat; shell2> flock -w .007 /tmp -c echo; /bin/echo $?:: +Set exclusive lock to directory _/tmp_ and the second command will fail. + +shell1> flock -s /tmp -c cat; shell2> flock -s -w .007 /tmp -c echo; /bin/echo $?:: +Set shared lock to directory _/tmp_ and the second command will not fail. Notice that attempting to get exclusive lock with second command would fail. + +shell> flock -x local-lock-file echo 'a b c':: +Grab the exclusive lock "local-lock-file" before running echo with 'a b c'. + +(; flock -n 9 || exit 1; # ... commands executed under lock ...; ) 9>/var/lock/mylockfile:: +The form is convenient inside shell scripts. The mode used to open the file doesn't matter to *flock*; using _>_ or _>>_ allows the lockfile to be created if it does not already exist, however, write permission is required. Using _<_ requires that the file already exists but only read permission is required. + +[ "$\{FLOCKER}" != "$0" ] && exec env FLOCKER="$0" flock -en "$0" "$0" "$@" || : :: +This is useful boilerplate code for shell scripts. Put it at the top of the shell script you want to lock and it'll automatically lock itself on the first run. If the environment variable *$FLOCKER* is not set to the shell script that is being run, then execute *flock* and grab an exclusive non-blocking lock (using the script itself as the lock file) before re-execing itself with the right arguments. It also sets the *FLOCKER* environment variable to the right value so it doesn't run again. + +shell> exec 4<>/var/lock/mylockfile; shell> flock -n 4:: +This form is convenient for locking a file without spawning a subprocess. The shell opens the lock file for reading and writing as file descriptor 4, then *flock* is used to lock the descriptor. + +== AUTHORS + +mailto:hpa@zytor.com[H. Peter Anvin] + +== COPYRIGHT + +//TRANSLATORS: Keep {copyright} untranslated. +Copyright {copyright} 2003-2006 H. Peter Anvin. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +== SEE ALSO + +*flock*(2) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/flock.c b/sys-utils/flock.c new file mode 100644 index 0000000..6e93bab --- /dev/null +++ b/sys-utils/flock.c @@ -0,0 +1,385 @@ +/* Copyright 2003-2005 H. Peter Anvin - All Rights Reserved + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall + * be included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <paths.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sysexits.h> +#include <sys/file.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +#include "c.h" +#include "nls.h" +#include "strutils.h" +#include "closestream.h" +#include "monotonic.h" +#include "timer.h" + +static void __attribute__((__noreturn__)) usage(void) +{ + fputs(USAGE_HEADER, stdout); + printf( + _(" %1$s [options] <file>|<directory> <command> [<argument>...]\n" + " %1$s [options] <file>|<directory> -c <command>\n" + " %1$s [options] <file descriptor number>\n"), + program_invocation_short_name); + + fputs(USAGE_SEPARATOR, stdout); + fputs(_("Manage file locks from shell scripts.\n"), stdout); + + fputs(USAGE_OPTIONS, stdout); + fputs(_( " -s, --shared get a shared lock\n"), stdout); + fputs(_( " -x, --exclusive get an exclusive lock (default)\n"), stdout); + fputs(_( " -u, --unlock remove a lock\n"), stdout); + fputs(_( " -n, --nonblock fail rather than wait\n"), stdout); + fputs(_( " -w, --timeout <secs> wait for a limited amount of time\n"), stdout); + fputs(_( " -E, --conflict-exit-code <number> exit code after conflict or timeout\n"), stdout); + fputs(_( " -o, --close close file descriptor before running command\n"), stdout); + fputs(_( " -c, --command <command> run a single command string through the shell\n"), stdout); + fputs(_( " -F, --no-fork execute command without forking\n"), stdout); + fputs(_( " --verbose increase verbosity\n"), stdout); + fputs(USAGE_SEPARATOR, stdout); + printf(USAGE_HELP_OPTIONS(26)); + printf(USAGE_MAN_TAIL("flock(1)")); + exit(EXIT_SUCCESS); +} + +static sig_atomic_t timeout_expired = 0; + +static void timeout_handler(int sig __attribute__((__unused__)), + siginfo_t *info, + void *context __attribute__((__unused__))) +{ +#ifdef HAVE_TIMER_CREATE + if (info->si_code == SI_TIMER) +#endif + timeout_expired = 1; +} + +static int open_file(const char *filename, int *flags) +{ + + int fd; + int fl = *flags == 0 ? O_RDONLY : *flags; + + errno = 0; + fl |= O_NOCTTY | O_CREAT; + fd = open(filename, fl, 0666); + + /* Linux doesn't like O_CREAT on a directory, even though it + * should be a no-op; POSIX doesn't allow O_RDWR or O_WRONLY + */ + if (fd < 0 && errno == EISDIR) { + fl = O_RDONLY | O_NOCTTY; + fd = open(filename, fl); + } + if (fd < 0) { + warn(_("cannot open lock file %s"), filename); + if (errno == ENOMEM || errno == EMFILE || errno == ENFILE) + exit(EX_OSERR); + if (errno == EROFS || errno == ENOSPC) + exit(EX_CANTCREAT); + exit(EX_NOINPUT); + } + *flags = fl; + return fd; +} + +static void __attribute__((__noreturn__)) run_program(char **cmd_argv) +{ + execvp(cmd_argv[0], cmd_argv); + + warn(_("failed to execute %s"), cmd_argv[0]); + _exit((errno == ENOMEM) ? EX_OSERR : EX_UNAVAILABLE); +} + +int main(int argc, char *argv[]) +{ + struct ul_timer timer; + struct itimerval timeout; + int have_timeout = 0; + int type = LOCK_EX; + int block = 0; + int open_flags = 0; + int fd = -1; + int opt, ix; + int do_close = 0; + int no_fork = 0; + int status; + int verbose = 0; + struct timeval time_start, time_done; + /* + * The default exit code for lock conflict or timeout + * is specified in man flock.1 + */ + int conflict_exit_code = 1; + char **cmd_argv = NULL, *sh_c_argv[4]; + const char *filename = NULL; + enum { + OPT_VERBOSE = CHAR_MAX + 1 + }; + static const struct option long_options[] = { + {"shared", no_argument, NULL, 's'}, + {"exclusive", no_argument, NULL, 'x'}, + {"unlock", no_argument, NULL, 'u'}, + {"nonblocking", no_argument, NULL, 'n'}, + {"nb", no_argument, NULL, 'n'}, + {"timeout", required_argument, NULL, 'w'}, + {"wait", required_argument, NULL, 'w'}, + {"conflict-exit-code", required_argument, NULL, 'E'}, + {"close", no_argument, NULL, 'o'}, + {"no-fork", no_argument, NULL, 'F'}, + {"verbose", no_argument, NULL, OPT_VERBOSE}, + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'V'}, + {NULL, 0, NULL, 0} + }; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + strutils_set_exitcode(EX_USAGE); + + if (argc < 2) { + warnx(_("not enough arguments")); + errtryhelp(EX_USAGE); + } + + memset(&timeout, 0, sizeof timeout); + + optopt = 0; + while ((opt = + getopt_long(argc, argv, "+sexnoFuw:E:hV?", long_options, + &ix)) != EOF) { + switch (opt) { + case 's': + type = LOCK_SH; + break; + case 'e': + case 'x': + type = LOCK_EX; + break; + case 'u': + type = LOCK_UN; + break; + case 'o': + do_close = 1; + break; + case 'F': + no_fork = 1; + break; + case 'n': + block = LOCK_NB; + break; + case 'w': + have_timeout = 1; + strtotimeval_or_err(optarg, &timeout.it_value, + _("invalid timeout value")); + break; + case 'E': + conflict_exit_code = strtos32_or_err(optarg, + _("invalid exit code")); + if (conflict_exit_code < 0 || conflict_exit_code > 255) + errx(EX_USAGE, _("exit code out of range (expected 0 to 255)")); + break; + case OPT_VERBOSE: + verbose = 1; + break; + + case 'V': + print_version(EX_OK); + case 'h': + usage(); + default: + errtryhelp(EX_USAGE); + } + } + + if (no_fork && do_close) + errx(EX_USAGE, + _("the --no-fork and --close options are incompatible")); + + if (argc > optind + 1) { + /* Run command */ + if (!strcmp(argv[optind + 1], "-c") || + !strcmp(argv[optind + 1], "--command")) { + if (argc != optind + 3) + errx(EX_USAGE, + _("%s requires exactly one command argument"), + argv[optind + 1]); + cmd_argv = sh_c_argv; + cmd_argv[0] = getenv("SHELL"); + if (!cmd_argv[0] || !*cmd_argv[0]) + cmd_argv[0] = _PATH_BSHELL; + cmd_argv[1] = "-c"; + cmd_argv[2] = argv[optind + 2]; + cmd_argv[3] = NULL; + } else { + cmd_argv = &argv[optind + 1]; + } + + filename = argv[optind]; + fd = open_file(filename, &open_flags); + + } else if (optind < argc) { + /* Use provided file descriptor */ + fd = strtos32_or_err(argv[optind], _("bad file descriptor")); + } else { + /* Bad options */ + errx(EX_USAGE, _("requires file descriptor, file or directory")); + } + + if (have_timeout) { + if (timeout.it_value.tv_sec == 0 && + timeout.it_value.tv_usec == 0) { + /* -w 0 is equivalent to -n; this has to be + * special-cased because setting an itimer to zero + * means disabled! + */ + have_timeout = 0; + block = LOCK_NB; + } else + if (setup_timer(&timer, &timeout, &timeout_handler)) + err(EX_OSERR, _("cannot set up timer")); + } + + if (verbose) + gettime_monotonic(&time_start); + while (flock(fd, type | block)) { + switch (errno) { + case EWOULDBLOCK: + /* -n option set and failed to lock. */ + if (verbose) + warnx(_("failed to get lock")); + exit(conflict_exit_code); + case EINTR: + /* Signal received */ + if (timeout_expired) { + /* -w option set and failed to lock. */ + if (verbose) + warnx(_("timeout while waiting to get lock")); + exit(conflict_exit_code); + } + /* otherwise try again */ + continue; + case EIO: + case EBADF: /* since Linux 3.4 (commit 55725513) */ + /* Probably NFSv4 where flock() is emulated by fcntl(). + * Let's try to reopen in read-write mode. + */ + if (!(open_flags & O_RDWR) && + type != LOCK_SH && + filename && + access(filename, R_OK | W_OK) == 0) { + + close(fd); + open_flags = O_RDWR; + fd = open_file(filename, &open_flags); + + if (open_flags & O_RDWR) + break; + } + /* fallthrough */ + default: + /* Other errors */ + if (filename) + warn("%s", filename); + else + warn("%d", fd); + exit((errno == ENOLCK + || errno == ENOMEM) ? EX_OSERR : EX_DATAERR); + } + } + + if (have_timeout) + cancel_timer(&timer); + if (verbose) { + struct timeval delta; + + gettime_monotonic(&time_done); + timersub(&time_done, &time_start, &delta); + printf(_("%s: getting lock took %"PRId64".%06"PRId64" seconds\n"), + program_invocation_short_name, + (int64_t) delta.tv_sec, + (int64_t) delta.tv_usec); + } + status = EX_OK; + + if (cmd_argv) { + pid_t w, f; + /* Clear any inherited settings */ + signal(SIGCHLD, SIG_DFL); + if (verbose) + printf(_("%s: executing %s\n"), program_invocation_short_name, cmd_argv[0]); + + if (!no_fork) { + f = fork(); + if (f < 0) + err(EX_OSERR, _("fork failed")); + + /* child */ + else if (f == 0) { + if (do_close) + close(fd); + run_program(cmd_argv); + + /* parent */ + } else { + do { + w = waitpid(f, &status, 0); + if (w == -1 && errno != EINTR) + break; + } while (w != f); + + if (w == -1) { + status = EXIT_FAILURE; + warn(_("waitpid failed")); + } else if (WIFEXITED(status)) + status = WEXITSTATUS(status); + else if (WIFSIGNALED(status)) + status = WTERMSIG(status) + 128; + else + /* WTF? */ + status = EX_OSERR; + } + + } else + /* no-fork execution */ + run_program(cmd_argv); + } + + return status; +} diff --git a/sys-utils/fsfreeze.8 b/sys-utils/fsfreeze.8 new file mode 100644 index 0000000..be08dff --- /dev/null +++ b/sys-utils/fsfreeze.8 @@ -0,0 +1,88 @@ +'\" t +.\" Title: fsfreeze +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: System Administration +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "FSFREEZE" "8" "2022-05-11" "util\-linux 2.38.1" "System Administration" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +fsfreeze \- suspend access to a filesystem (Ext3/4, ReiserFS, JFS, XFS) +.SH "SYNOPSIS" +.sp +\fBfsfreeze\fP \fB\-\-freeze\fP|\fB\-\-unfreeze\fP \fImountpoint\fP +.SH "DESCRIPTION" +.sp +\fBfsfreeze\fP suspends or resumes access to a filesystem. +.sp +\fBfsfreeze\fP halts any new access to the filesystem and creates a stable image on disk. \fBfsfreeze\fP is intended to be used with hardware RAID devices that support the creation of snapshots. +.sp +\fBfsfreeze\fP is unnecessary for \fBdevice\-mapper\fP devices. The device\-mapper (and LVM) automatically freezes a filesystem on the device when a snapshot creation is requested. For more details see the \fBdmsetup\fP(8) man page. +.sp +The \fImountpoint\fP argument is the pathname of the directory where the filesystem is mounted. The filesystem must be mounted to be frozen (see \fBmount\fP(8)). +.sp +Note that access\-time updates are also suspended if the filesystem is mounted with the traditional atime behavior (mount option \fBstrictatime\fP, for more details see \fBmount\fP(8)). +.SH "OPTIONS" +.sp +\fB\-f\fP, \fB\-\-freeze\fP +.RS 4 +This option requests the specified filesystem to be frozen from new modifications. When this is selected, all ongoing transactions in the filesystem are allowed to complete, new \fBwrite\fP(2) system calls are halted, other calls which modify the filesystem are halted, and all dirty data, metadata, and log information are written to disk. Any process attempting to write to the frozen filesystem will block waiting for the filesystem to be unfrozen. +.sp +Note that even after freezing, the on\-disk filesystem can contain information on files that are still in the process of unlinking. These files will not be unlinked until the filesystem is unfrozen or a clean mount of the snapshot is complete. +.RE +.sp +\fB\-u\fP, \fB\-\-unfreeze\fP +.RS 4 +This option is used to un\-freeze the filesystem and allow operations to continue. Any filesystem modifications that were blocked by the freeze are unblocked and allowed to complete. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "FILESYSTEM SUPPORT" +.sp +This command will work only if filesystem supports has support for freezing. List of these filesystems include (2016\-12\-18) \fBbtrfs\fP, \fBext2/3/4\fP, \fBf2fs\fP, \fBjfs\fP, \fBnilfs2\fP, \fBreiserfs\fP, and \fBxfs\fP. Previous list may be incomplete, as more filesystems get support. If in doubt easiest way to know if a filesystem has support is create a small loopback mount and test freezing it. +.SH "NOTES" +.sp +This man page is based on \fBxfs_freeze\fP(8). +.SH "AUTHORS" +.sp +Written by Hajime Taira. +.SH "SEE ALSO" +.sp +\fBmount\fP(8) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBfsfreeze\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/fsfreeze.8.adoc b/sys-utils/fsfreeze.8.adoc new file mode 100644 index 0000000..e21df5b --- /dev/null +++ b/sys-utils/fsfreeze.8.adoc @@ -0,0 +1,63 @@ +//po4a: entry man manual += fsfreeze(8) +:doctype: manpage +:man manual: System Administration +:man source: util-linux {release-version} +:page-layout: base +:command: fsfreeze + +== NAME + +fsfreeze - suspend access to a filesystem (Ext3/4, ReiserFS, JFS, XFS) + +== SYNOPSIS + +*fsfreeze* *--freeze*|*--unfreeze* _mountpoint_ + +== DESCRIPTION + +*fsfreeze* suspends or resumes access to a filesystem. + +*fsfreeze* halts any new access to the filesystem and creates a stable image on disk. *fsfreeze* is intended to be used with hardware RAID devices that support the creation of snapshots. + +*fsfreeze* is unnecessary for *device-mapper* devices. The device-mapper (and LVM) automatically freezes a filesystem on the device when a snapshot creation is requested. For more details see the *dmsetup*(8) man page. + +The _mountpoint_ argument is the pathname of the directory where the filesystem is mounted. The filesystem must be mounted to be frozen (see *mount*(8)). + +Note that access-time updates are also suspended if the filesystem is mounted with the traditional atime behavior (mount option *strictatime*, for more details see *mount*(8)). + +== OPTIONS + +*-f*, *--freeze*:: +This option requests the specified filesystem to be frozen from new modifications. When this is selected, all ongoing transactions in the filesystem are allowed to complete, new *write*(2) system calls are halted, other calls which modify the filesystem are halted, and all dirty data, metadata, and log information are written to disk. Any process attempting to write to the frozen filesystem will block waiting for the filesystem to be unfrozen. ++ +Note that even after freezing, the on-disk filesystem can contain information on files that are still in the process of unlinking. These files will not be unlinked until the filesystem is unfrozen or a clean mount of the snapshot is complete. + +*-u*, *--unfreeze*:: +This option is used to un-freeze the filesystem and allow operations to continue. Any filesystem modifications that were blocked by the freeze are unblocked and allowed to complete. + +include::man-common/help-version.adoc[] + +== FILESYSTEM SUPPORT + +This command will work only if filesystem supports has support for freezing. List of these filesystems include (2016-12-18) *btrfs*, *ext2/3/4*, *f2fs*, *jfs*, *nilfs2*, *reiserfs*, and *xfs*. Previous list may be incomplete, as more filesystems get support. If in doubt easiest way to know if a filesystem has support is create a small loopback mount and test freezing it. + +== NOTES + +This man page is based on *xfs_freeze*(8). + +== AUTHORS + +Written by Hajime Taira. + +== SEE ALSO + +*mount*(8) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/fsfreeze.c b/sys-utils/fsfreeze.c new file mode 100644 index 0000000..cd2bb47 --- /dev/null +++ b/sys-utils/fsfreeze.c @@ -0,0 +1,150 @@ +/* + * fsfreeze.c -- Filesystem freeze/unfreeze IO for Linux + * + * Copyright (C) 2010 Hajime Taira <htaira@redhat.com> + * Masatake Yamato <yamato@redhat.com> + * + * This program is free software. You can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation: either version 1 or + * (at your option) any later version. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <fcntl.h> +#include <linux/fs.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <getopt.h> + +#include "c.h" +#include "blkdev.h" +#include "nls.h" +#include "closestream.h" +#include "optutils.h" + +enum fs_operation { + NOOP, + FREEZE, + UNFREEZE +}; + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + fputs(USAGE_HEADER, out); + fprintf(out, + _(" %s [options] <mountpoint>\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Suspend access to a filesystem.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -f, --freeze freeze the filesystem\n"), out); + fputs(_(" -u, --unfreeze unfreeze the filesystem\n"), out); + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(19)); + printf(USAGE_MAN_TAIL("fsfreeze(8)")); + + exit(EXIT_SUCCESS); +} + +int main(int argc, char **argv) +{ + int fd = -1, c; + int action = NOOP, rc = EXIT_FAILURE; + char *path; + struct stat sb; + + static const struct option longopts[] = { + { "help", no_argument, NULL, 'h' }, + { "freeze", no_argument, NULL, 'f' }, + { "unfreeze", no_argument, NULL, 'u' }, + { "version", no_argument, NULL, 'V' }, + { NULL, 0, NULL, 0 } + }; + + static const ul_excl_t excl[] = { /* rows and cols in ASCII order */ + { 'f','u' }, /* freeze, unfreeze */ + { 0 } + }; + int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + while ((c = getopt_long(argc, argv, "hfuV", longopts, NULL)) != -1) { + + err_exclusive_options(c, longopts, excl, excl_st); + + switch(c) { + case 'f': + action = FREEZE; + break; + case 'u': + action = UNFREEZE; + break; + + case 'h': + usage(); + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if (action == NOOP) + errx(EXIT_FAILURE, _("neither --freeze or --unfreeze specified")); + if (optind == argc) + errx(EXIT_FAILURE, _("no filename specified")); + path = argv[optind++]; + + if (optind != argc) { + warnx(_("unexpected number of arguments")); + errtryhelp(EXIT_FAILURE); + } + + fd = open(path, O_RDONLY); + if (fd < 0) + err(EXIT_FAILURE, _("cannot open %s"), path); + + if (fstat(fd, &sb) == -1) { + warn(_("stat of %s failed"), path); + goto done; + } + + if (!S_ISDIR(sb.st_mode)) { + warnx(_("%s: is not a directory"), path); + goto done; + } + + switch (action) { + case FREEZE: + if (ioctl(fd, FIFREEZE, 0)) { + warn(_("%s: freeze failed"), path); + goto done; + } + break; + case UNFREEZE: + if (ioctl(fd, FITHAW, 0)) { + warn(_("%s: unfreeze failed"), path); + goto done; + } + break; + default: + abort(); + } + + rc = EXIT_SUCCESS; +done: + close(fd); + return rc; +} + diff --git a/sys-utils/fstab.5 b/sys-utils/fstab.5 new file mode 100644 index 0000000..7e1d3b3 --- /dev/null +++ b/sys-utils/fstab.5 @@ -0,0 +1,144 @@ +'\" t +.\" Title: fstab +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: File formats +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "FSTAB" "5" "2022-05-11" "util\-linux 2.38.1" "File formats" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +fstab \- static information about the filesystems +.SH "SYNOPSIS" +.sp +\fI/etc/fstab\fP +.SH "DESCRIPTION" +.sp +The file \fBfstab\fP contains descriptive information about the filesystems the system can mount. \fBfstab\fP is only read by programs, and not written; it is the duty of the system administrator to properly create and maintain this file. The order of records in \fBfstab\fP is important because \fBfsck\fP(8), \fBmount\fP(8), and \fBumount\fP(8) sequentially iterate through \fBfstab\fP doing their thing. +.sp +Each filesystem is described on a separate line. Fields on each line are separated by tabs or spaces. Lines starting with \(aq#\(aq are comments. Blank lines are ignored. +.sp +The following is a typical example of an \fBfstab\fP entry: +.sp +.if n .RS 4 +.nf +.fam C +LABEL=t\-home2 /home ext4 defaults,auto_da_alloc 0 2 +.fam +.fi +.if n .RE +.SS "The first field (\fIfs_spec\fP)." +.sp +This field describes the block special device, remote filesystem or filesystem image for loop device to be mounted or swap file or swap partition to be enabled. +.sp +For ordinary mounts, it will hold (a link to) a block special device node (as created by \fBmknod\fP(2)) for the device to be mounted, like \fI/dev/cdrom\fP or \fI/dev/sdb7\fP. For NFS mounts, this field is \fI<host>:<dir>\fP, e.g., \fIknuth.aeb.nl:/\fP. For filesystems with no storage, any string can be used, and will show up in \fBdf\fP(1) output, for example. Typical usage is \fIproc\fP for \fBprocfs\fP; \fImem\fP, \fInone\fP, or \fItmpfs\fP for \fBtmpfs\fP. Other special filesystems, like \fBudev\fP and \fBsysfs\fP, are typically not listed in \fBfstab\fP. +.sp +LABEL=<label> or UUID=<uuid> may be given instead of a device name. This is the recommended method, as device names are often a coincidence of hardware detection order, and can change when other disks are added or removed. For example, \(aqLABEL=Boot\(aq or \(aqUUID=3e6be9de\-8139\-11d1\-9106\-a43f08d823a6\(aq. (Use a filesystem\-specific tool like \fBe2label\fP(8), \fBxfs_admin\fP(8), or \fBfatlabel\fP(8) to set LABELs on filesystems). +.sp +It\(cqs also possible to use \fBPARTUUID=\fP and \fBPARTLABEL=\fP. These partitions identifiers are supported for example for GUID Partition Table (GPT). +.sp +See \fBmount\fP(8), \fBblkid\fP(8) or \fBlsblk\fP(8) for more details about device identifiers. +.sp +Note that \fBmount\fP(8) uses UUIDs as strings. The string representation of the UUID should be based on lower case characters. But when specifying the volume ID of FAT or NTFS file systems upper case characters are used (e.g UUID="A40D\-85E7" or UUID="61DB7756DB7779B3"). +.SS "The second field (\fIfs_file\fP)." +.sp +This field describes the mount point (target) for the filesystem. For swap partitions, this field should be specified as `none\(aq. If the name of the mount point contains spaces or tabs these can be escaped as `\(rs040\(aq and \(aq\(rs011\(aq respectively. +.SS "The third field (\fIfs_vfstype\fP)." +.sp +This field describes the type of the filesystem. Linux supports many filesystem types: ext4, xfs, btrfs, f2fs, vfat, ntfs, hfsplus, tmpfs, sysfs, proc, iso9660, udf, squashfs, nfs, cifs, and many more. For more details, see \fBmount\fP(8). +.sp +An entry \fIswap\fP denotes a file or partition to be used for swapping, cf. \fBswapon\fP(8). An entry \fInone\fP is useful for bind or move mounts. +.sp +More than one type may be specified in a comma\-separated list. +.sp +\fBmount\fP(8) and \fBumount\fP(8) support filesystem \fIsubtypes\fP. The subtype is defined by \(aq.subtype\(aq suffix. For example \(aqfuse.sshfs\(aq. It\(cqs recommended to use subtype notation rather than add any prefix to the first fstab field (for example \(aqsshfs#example.com\(aq is deprecated). +.SS "The fourth field (\fIfs_mntops\fP)." +.sp +This field describes the mount options associated with the filesystem. +.sp +It is formatted as a comma\-separated list of options. It contains at least the type of mount (\fBro\fP or \fBrw\fP), plus any additional options appropriate to the filesystem type (including performance\-tuning options). For details, see \fBmount\fP(8) or \fBswapon\fP(8). +.sp +Basic filesystem\-independent options are: +.sp +\fBdefaults\fP +.RS 4 +use default options: rw, suid, dev, exec, auto, nouser, and async. +.RE +.sp +\fBnoauto\fP +.RS 4 +do not mount when \fBmount \-a\fP is given (e.g., at boot time) +.RE +.sp +\fBuser\fP +.RS 4 +allow a user to mount +.RE +.sp +\fBowner\fP +.RS 4 +allow device owner to mount +.RE +.sp +\fBcomment\fP +.RS 4 +or \fBx\-<name>\fP for use by fstab\-maintaining programs +.RE +.sp +\fBnofail\fP +.RS 4 +do not report errors for this device if it does not exist. +.RE +.SS "The fifth field (\fIfs_freq\fP)." +.sp +This field is used by \fBdump\fP(8) to determine which filesystems need to be dumped. Defaults to zero (don\(cqt dump) if not present. +.SS "The sixth field (\fIfs_passno\fP)." +.sp +This field is used by \fBfsck\fP(8) to determine the order in which filesystem checks are done at boot time. The root filesystem should be specified with a \fIfs_passno\fP of 1. Other filesystems should have a \fIfs_passno\fP of 2. Filesystems within a drive will be checked sequentially, but filesystems on different drives will be checked at the same time to utilize parallelism available in the hardware. Defaults to zero (don\(cqt check the filesystem) if not present. +.SH "FILES" +.sp +\fI/etc/fstab\fP, +\fI<fstab.h>\fP +.SH "NOTES" +.sp +The proper way to read records from \fBfstab\fP is to use the routines \fBgetmntent\fP(3) or \fBlibmount\fP. +.sp +The keyword \fBignore\fP as a filesystem type (3rd field) is no longer supported by the pure libmount based mount utility (since util\-linux v2.22). +.SH "HISTORY" +.sp +The ancestor of this \fBfstab\fP file format appeared in 4.0BSD. +.SH "SEE ALSO" +.sp +\fBgetmntent\fP(3), +\fBfs\fP(5), +\fBfindmnt\fP(8), +\fBmount\fP(8), +\fBswapon\fP(8) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +\fBfstab\fP is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/fstab.5.adoc b/sys-utils/fstab.5.adoc new file mode 100644 index 0000000..332d746 --- /dev/null +++ b/sys-utils/fstab.5.adoc @@ -0,0 +1,149 @@ +//po4a: entry man manual +//// +Copyright (c) 1980, 1989, 1991 The Regents of the University of California. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. All advertising materials mentioning features or use of this software + must display the following acknowledgement: +This product includes software developed by the University of +California, Berkeley and its contributors. +4. Neither the name of the University nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. + + @(#)fstab.5 6.5 (Berkeley) 5/10/91 +//// += fstab(5) +:doctype: manpage +:man manual: File formats +:man source: util-linux {release-version} +:page-layout: base +:configfile: fstab + +== NAME + +fstab - static information about the filesystems + +== SYNOPSIS + +_/etc/fstab_ + +== DESCRIPTION + +The file *fstab* contains descriptive information about the filesystems the system can mount. *fstab* is only read by programs, and not written; it is the duty of the system administrator to properly create and maintain this file. The order of records in *fstab* is important because *fsck*(8), *mount*(8), and *umount*(8) sequentially iterate through *fstab* doing their thing. + +Each filesystem is described on a separate line. Fields on each line are separated by tabs or spaces. Lines starting with '#' are comments. Blank lines are ignored. + +The following is a typical example of an *fstab* entry: + +.... +LABEL=t-home2 /home ext4 defaults,auto_da_alloc 0 2 +.... + +=== The first field (_fs_spec_). + +This field describes the block special device, remote filesystem or filesystem image for loop device to be mounted or swap file or swap partition to be enabled. + +For ordinary mounts, it will hold (a link to) a block special device node (as created by *mknod*(2)) for the device to be mounted, like _/dev/cdrom_ or _/dev/sdb7_. For NFS mounts, this field is _<host>:<dir>_, e.g., _knuth.aeb.nl:/_. For filesystems with no storage, any string can be used, and will show up in *df*(1) output, for example. Typical usage is _proc_ for *procfs*; _mem_, _none_, or _tmpfs_ for *tmpfs*. Other special filesystems, like *udev* and *sysfs*, are typically not listed in *fstab*. + +LABEL=<label> or UUID=<uuid> may be given instead of a device name. This is the recommended method, as device names are often a coincidence of hardware detection order, and can change when other disks are added or removed. For example, 'LABEL=Boot' or 'UUID=3e6be9de-8139-11d1-9106-a43f08d823a6'. (Use a filesystem-specific tool like *e2label*(8), *xfs_admin*(8), or *fatlabel*(8) to set LABELs on filesystems). + +It's also possible to use *PARTUUID=* and *PARTLABEL=*. These partitions identifiers are supported for example for GUID Partition Table (GPT). + +See *mount*(8), *blkid*(8) or *lsblk*(8) for more details about device identifiers. + +Note that *mount*(8) uses UUIDs as strings. The string representation of the UUID should be based on lower case characters. But when specifying the volume ID of FAT or NTFS file systems upper case characters are used (e.g UUID="A40D-85E7" or UUID="61DB7756DB7779B3"). + +=== The second field (_fs_file_). + +This field describes the mount point (target) for the filesystem. For swap partitions, this field should be specified as `none'. If the name of the mount point contains spaces or tabs these can be escaped as `\040' and '\011' respectively. + +=== The third field (_fs_vfstype_). + +This field describes the type of the filesystem. Linux supports many filesystem types: ext4, xfs, btrfs, f2fs, vfat, ntfs, hfsplus, tmpfs, sysfs, proc, iso9660, udf, squashfs, nfs, cifs, and many more. For more details, see *mount*(8). + +An entry _swap_ denotes a file or partition to be used for swapping, cf. *swapon*(8). An entry _none_ is useful for bind or move mounts. + +More than one type may be specified in a comma-separated list. + +*mount*(8) and *umount*(8) support filesystem _subtypes_. The subtype is defined by '.subtype' suffix. For example 'fuse.sshfs'. It's recommended to use subtype notation rather than add any prefix to the first fstab field (for example 'sshfs#example.com' is deprecated). + +=== The fourth field (_fs_mntops_). + +This field describes the mount options associated with the filesystem. + +It is formatted as a comma-separated list of options. It contains at least the type of mount (*ro* or *rw*), plus any additional options appropriate to the filesystem type (including performance-tuning options). For details, see *mount*(8) or *swapon*(8). + +Basic filesystem-independent options are: + +*defaults*:: +use default options: rw, suid, dev, exec, auto, nouser, and async. +*noauto*:: +do not mount when *mount -a* is given (e.g., at boot time) +*user*:: +allow a user to mount +*owner*:: +allow device owner to mount +*comment*:: +or *x-<name>* for use by fstab-maintaining programs +*nofail*:: +do not report errors for this device if it does not exist. + +=== The fifth field (_fs_freq_). + +This field is used by *dump*(8) to determine which filesystems need to be dumped. Defaults to zero (don't dump) if not present. + +=== The sixth field (_fs_passno_). + +This field is used by *fsck*(8) to determine the order in which filesystem checks are done at boot time. The root filesystem should be specified with a _fs_passno_ of 1. Other filesystems should have a _fs_passno_ of 2. Filesystems within a drive will be checked sequentially, but filesystems on different drives will be checked at the same time to utilize parallelism available in the hardware. Defaults to zero (don't check the filesystem) if not present. + +== FILES + +_/etc/fstab_, +_<fstab.h>_ + +== NOTES + +The proper way to read records from *fstab* is to use the routines *getmntent*(3) or *libmount*. + +The keyword *ignore* as a filesystem type (3rd field) is no longer supported by the pure libmount based mount utility (since util-linux v2.22). + +== HISTORY + +The ancestor of this *fstab* file format appeared in 4.0BSD. + +== SEE ALSO + +*getmntent*(3), +*fs*(5), +*findmnt*(8), +*mount*(8), +*swapon*(8) + +include::man-common/bugreports.adoc[] + +include::man-common/footer-config.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/fstrim.8 b/sys-utils/fstrim.8 new file mode 100644 index 0000000..37d71b4 --- /dev/null +++ b/sys-utils/fstrim.8 @@ -0,0 +1,146 @@ +'\" t +.\" Title: fstrim +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: System Administration +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "FSTRIM" "8" "2022-05-11" "util\-linux 2.38.1" "System Administration" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +fstrim \- discard unused blocks on a mounted filesystem +.SH "SYNOPSIS" +.sp +\fBfstrim\fP [\fB\-Aa\fP] [\fB\-o\fP \fIoffset\fP] [\fB\-l\fP \fIlength\fP] [\fB\-m\fP \fIminimum\-size\fP] [\fB\-v\fP \fImountpoint\fP] +.SH "DESCRIPTION" +.sp +\fBfstrim\fP is used on a mounted filesystem to discard (or "trim") blocks which are not in use by the filesystem. This is useful for solid\-state drives (SSDs) and thinly\-provisioned storage. +.sp +By default, \fBfstrim\fP will discard all unused blocks in the filesystem. Options may be used to modify this behavior based on range or size, as explained below. +.sp +The \fImountpoint\fP argument is the pathname of the directory where the filesystem is mounted. +.sp +Running \fBfstrim\fP frequently, or even using \fBmount \-o discard\fP, might negatively affect the lifetime of poor\-quality SSD devices. For most desktop and server systems a sufficient trimming frequency is once a week. Note that not all devices support a queued trim, so each trim command incurs a performance penalty on whatever else might be trying to use the disk at the time. +.SH "OPTIONS" +.sp +The \fIoffset\fP, \fIlength\fP, and \fIminimum\-size\fP arguments may be followed by the multiplicative suffixes KiB (=1024), MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB" is optional, e.g., "K" has the same meaning as "KiB") or the suffixes KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB and YB. +.sp +\fB\-A, \-\-fstab\fP +.RS 4 +Trim all mounted filesystems mentioned in \fI/etc/fstab\fP on devices that support the discard operation. The root filesystem is determined from kernel command line if missing in the file. The other supplied options, like \fB\-\-offset\fP, \fB\-\-length\fP and \fB\-\-minimum\fP, are applied to all these devices. Errors from filesystems that do not support the discard operation, read\-only devices, autofs and read\-only filesystems are silently ignored. Filesystems with "X\-fstrim.notrim" mount option are skipped. +.RE +.sp +\fB\-a, \-\-all\fP +.RS 4 +Trim all mounted filesystems on devices that support the discard operation. The other supplied options, like \fB\-\-offset\fP, \fB\-\-length\fP and \fB\-\-minimum\fP, are applied to all these devices. Errors from filesystems that do not support the discard operation, read\-only devices and read\-only filesystems are silently ignored. +.RE +.sp +\fB\-n, \-\-dry\-run\fP +.RS 4 +This option does everything apart from actually call \fBFITRIM\fP ioctl. +.RE +.sp +\fB\-o, \-\-offset\fP \fIoffset\fP +.RS 4 +Byte offset in the filesystem from which to begin searching for free blocks to discard. The default value is zero, starting at the beginning of the filesystem. +.RE +.sp +\fB\-l, \-\-length\fP \fIlength\fP +.RS 4 +The number of bytes (after the starting point) to search for free blocks to discard. If the specified value extends past the end of the filesystem, \fBfstrim\fP will stop at the filesystem size boundary. The default value extends to the end of the filesystem. +.RE +.sp +\fB\-I, \-\-listed\-in\fP \fIlist\fP +.RS 4 +Specifies a colon\-separated list of files in fstab or kernel mountinfo format. All missing or empty files are silently ignored. The evaluation of the \fIlist\fP stops after first non\-empty file. For example: +.sp +\fB\-\-listed\-in /etc/fstab:/proc/self/mountinfo\fP. +.sp +Filesystems with "X\-fstrim.notrim" mount option in fstab are skipped. +.RE +.sp +\fB\-m, \-\-minimum\fP \fIminimum\-size\fP +.RS 4 +Minimum contiguous free range to discard, in bytes. (This value is internally rounded up to a multiple of the filesystem block size.) Free ranges smaller than this will be ignored and \fBfstrim\fP will adjust the minimum if it\(cqs smaller than the device\(cqs minimum, and report that (fstrim_range.minlen) back to userspace. By increasing this value, the \fBfstrim\fP operation will complete more quickly for filesystems with badly fragmented freespace, although not all blocks will be discarded. The default value is zero, discarding every free block. +.RE +.sp +\fB\-v, \-\-verbose\fP +.RS 4 +Verbose execution. With this option \fBfstrim\fP will output the number of bytes passed from the filesystem down the block stack to the device for potential discard. This number is a maximum discard amount from the storage device\(cqs perspective, because \fIFITRIM\fP ioctl called repeated will keep sending the same sectors for discard repeatedly. +.sp +\fBfstrim\fP will report the same potential discard bytes each time, but only sectors which had been written to between the discards would actually be discarded by the storage device. Further, the kernel block layer reserves the right to adjust the discard ranges to fit raid stripe geometry, non\-trim capable devices in a LVM setup, etc. These reductions would not be reflected in fstrim_range.len (the \fB\-\-length\fP option). +.RE +.sp +\fB\-\-quiet\-unsupported\fP +.RS 4 +Suppress error messages if trim operation (ioctl) is unsupported. This option is meant to be used in \fBsystemd\fP service file or in \fBcron\fP(8) scripts to hide warnings that are result of known problems, such as NTFS driver reporting \fIBad file descriptor\fP when device is mounted read\-only, or lack of file system support for ioctl \fIFITRIM\fP call. This option also cleans exit status when unsupported filesystem specified on \fBfstrim\fP command line. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "EXIT STATUS" +.sp +0 +.RS 4 +success +.RE +.sp +1 +.RS 4 +failure +.RE +.sp +32 +.RS 4 +all failed +.RE +.sp +64 +.RS 4 +some filesystem discards have succeeded, some failed +.RE +.sp +The command \fBfstrim \-\-all\fP returns 0 (all succeeded), 32 (all failed) or 64 (some failed, some succeeded). +.SH "AUTHORS" +.sp +.MTO "lczerner\(atredhat.com" "Lukas Czerner" "," +.MTO "kzak\(atredhat.com" "Karel Zak" "" +.SH "SEE ALSO" +.sp +\fBblkdiscard\fP(8), +\fBmount\fP(8) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBfstrim\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/fstrim.8.adoc b/sys-utils/fstrim.8.adoc new file mode 100644 index 0000000..7accc42 --- /dev/null +++ b/sys-utils/fstrim.8.adoc @@ -0,0 +1,94 @@ +//po4a: entry man manual += fstrim(8) +:doctype: manpage +:man manual: System Administration +:man source: util-linux {release-version} +:page-layout: base +:command: fstrim + +== NAME + +fstrim - discard unused blocks on a mounted filesystem + +== SYNOPSIS + +*fstrim* [*-Aa*] [*-o* _offset_] [*-l* _length_] [*-m* _minimum-size_] [*-v* _mountpoint_] + +== DESCRIPTION + +*fstrim* is used on a mounted filesystem to discard (or "trim") blocks which are not in use by the filesystem. This is useful for solid-state drives (SSDs) and thinly-provisioned storage. + +By default, *fstrim* will discard all unused blocks in the filesystem. Options may be used to modify this behavior based on range or size, as explained below. + +The _mountpoint_ argument is the pathname of the directory where the filesystem is mounted. + +Running *fstrim* frequently, or even using *mount -o discard*, might negatively affect the lifetime of poor-quality SSD devices. For most desktop and server systems a sufficient trimming frequency is once a week. Note that not all devices support a queued trim, so each trim command incurs a performance penalty on whatever else might be trying to use the disk at the time. + +== OPTIONS + +The _offset_, _length_, and _minimum-size_ arguments may be followed by the multiplicative suffixes KiB (=1024), MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB" is optional, e.g., "K" has the same meaning as "KiB") or the suffixes KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB and YB. + +*-A, --fstab*:: +Trim all mounted filesystems mentioned in _/etc/fstab_ on devices that support the discard operation. The root filesystem is determined from kernel command line if missing in the file. The other supplied options, like *--offset*, *--length* and *--minimum*, are applied to all these devices. Errors from filesystems that do not support the discard operation, read-only devices, autofs and read-only filesystems are silently ignored. Filesystems with "X-fstrim.notrim" mount option are skipped. + +*-a, --all*:: +Trim all mounted filesystems on devices that support the discard operation. The other supplied options, like *--offset*, *--length* and *--minimum*, are applied to all these devices. Errors from filesystems that do not support the discard operation, read-only devices and read-only filesystems are silently ignored. + +*-n, --dry-run*:: +This option does everything apart from actually call *FITRIM* ioctl. + +*-o, --offset* _offset_:: +Byte offset in the filesystem from which to begin searching for free blocks to discard. The default value is zero, starting at the beginning of the filesystem. + +*-l, --length* _length_:: +The number of bytes (after the starting point) to search for free blocks to discard. If the specified value extends past the end of the filesystem, *fstrim* will stop at the filesystem size boundary. The default value extends to the end of the filesystem. + +*-I, --listed-in* _list_:: +Specifies a colon-separated list of files in fstab or kernel mountinfo format. All missing or empty files are silently ignored. The evaluation of the _list_ stops after first non-empty file. For example: ++ +*--listed-in /etc/fstab:/proc/self/mountinfo*. ++ +Filesystems with "X-fstrim.notrim" mount option in fstab are skipped. + +*-m, --minimum* _minimum-size_:: +Minimum contiguous free range to discard, in bytes. (This value is internally rounded up to a multiple of the filesystem block size.) Free ranges smaller than this will be ignored and *fstrim* will adjust the minimum if it's smaller than the device's minimum, and report that (fstrim_range.minlen) back to userspace. By increasing this value, the *fstrim* operation will complete more quickly for filesystems with badly fragmented freespace, although not all blocks will be discarded. The default value is zero, discarding every free block. + +*-v, --verbose*:: +Verbose execution. With this option *fstrim* will output the number of bytes passed from the filesystem down the block stack to the device for potential discard. This number is a maximum discard amount from the storage device's perspective, because _FITRIM_ ioctl called repeated will keep sending the same sectors for discard repeatedly. ++ +*fstrim* will report the same potential discard bytes each time, but only sectors which had been written to between the discards would actually be discarded by the storage device. Further, the kernel block layer reserves the right to adjust the discard ranges to fit raid stripe geometry, non-trim capable devices in a LVM setup, etc. These reductions would not be reflected in fstrim_range.len (the *--length* option). + +*--quiet-unsupported*:: +Suppress error messages if trim operation (ioctl) is unsupported. This option is meant to be used in *systemd* service file or in *cron*(8) scripts to hide warnings that are result of known problems, such as NTFS driver reporting _Bad file descriptor_ when device is mounted read-only, or lack of file system support for ioctl _FITRIM_ call. This option also cleans exit status when unsupported filesystem specified on *fstrim* command line. + +include::man-common/help-version.adoc[] + +== EXIT STATUS + +0:: +success +1:: +failure +32:: +all failed +64:: +some filesystem discards have succeeded, some failed + +The command *fstrim --all* returns 0 (all succeeded), 32 (all failed) or 64 (some failed, some succeeded). + +== AUTHORS +mailto:lczerner@redhat.com[Lukas Czerner], +mailto:kzak@redhat.com[Karel Zak] + +== SEE ALSO + +*blkdiscard*(8), +*mount*(8) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/fstrim.c b/sys-utils/fstrim.c new file mode 100644 index 0000000..0def35f --- /dev/null +++ b/sys-utils/fstrim.c @@ -0,0 +1,586 @@ +/* + * fstrim.c -- discard the part (or whole) of mounted filesystem. + * + * Copyright (C) 2010 Red Hat, Inc. All rights reserved. + * Written by Lukas Czerner <lczerner@redhat.com> + * Karel Zak <kzak@redhat.com> + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * This program uses FITRIM ioctl to discard parts or the whole filesystem + * online (mounted). You can specify range (start and length) to be + * discarded, or simply discard whole filesystem. + */ + +#include <string.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <fcntl.h> +#include <limits.h> +#include <getopt.h> + +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/vfs.h> +#include <linux/fs.h> + +#include "nls.h" +#include "xalloc.h" +#include "strutils.h" +#include "c.h" +#include "closestream.h" +#include "pathnames.h" +#include "sysfs.h" +#include "optutils.h" +#include "statfs_magic.h" + +#include <libmount.h> + + +#ifndef FITRIM +struct fstrim_range { + uint64_t start; + uint64_t len; + uint64_t minlen; +}; +#define FITRIM _IOWR('X', 121, struct fstrim_range) +#endif + +struct fstrim_control { + struct fstrim_range range; + + unsigned int verbose : 1, + quiet_unsupp : 1, + dryrun : 1; +}; + +static int is_directory(const char *path, int silent) +{ + struct stat sb; + + if (stat(path, &sb) == -1) { + if (!silent) + warn(_("stat of %s failed"), path); + return 0; + } + if (!S_ISDIR(sb.st_mode)) { + if (!silent) + warnx(_("%s: not a directory"), path); + return 0; + } + return 1; +} + +/* returns: 0 = success, 1 = unsupported, < 0 = error */ +static int fstrim_filesystem(struct fstrim_control *ctl, const char *path, const char *devname) +{ + int fd = -1, rc; + struct fstrim_range range; + char *rpath = realpath(path, NULL); + + if (!rpath) { + warn(_("cannot get realpath: %s"), path); + rc = -errno; + goto done; + } + /* kernel modifies the range */ + memcpy(&range, &ctl->range, sizeof(range)); + + fd = open(rpath, O_RDONLY); + if (fd < 0) { + warn(_("cannot open %s"), path); + rc = -errno; + goto done; + } + + if (ctl->dryrun) { + if (devname) + printf(_("%s: 0 B (dry run) trimmed on %s\n"), path, devname); + else + printf(_("%s: 0 B (dry run) trimmed\n"), path); + rc = 0; + goto done; + } + + errno = 0; + if (ioctl(fd, FITRIM, &range)) { + switch (errno) { + case EBADF: + case ENOTTY: + case EOPNOTSUPP: + case ENOSYS: + rc = 1; + break; + default: + rc = -errno; + } + if (rc < 0) + warn(_("%s: FITRIM ioctl failed"), path); + goto done; + } + + if (ctl->verbose) { + char *str = size_to_human_string( + SIZE_SUFFIX_3LETTER | SIZE_SUFFIX_SPACE, + (uint64_t) range.len); + if (devname) + /* TRANSLATORS: The standard value here is a very large number. */ + printf(_("%s: %s (%" PRIu64 " bytes) trimmed on %s\n"), + path, str, (uint64_t) range.len, devname); + else + /* TRANSLATORS: The standard value here is a very large number. */ + printf(_("%s: %s (%" PRIu64 " bytes) trimmed\n"), + path, str, (uint64_t) range.len); + + free(str); + } + + rc = 0; +done: + if (fd >= 0) + close(fd); + free(rpath); + return rc; +} + +static int has_discard(const char *devname, struct path_cxt **wholedisk) +{ + struct path_cxt *pc = NULL; + uint64_t dg = 0; + dev_t disk = 0, dev; + int rc = -1, rdonly = 0; + + dev = sysfs_devname_to_devno(devname); + if (!dev) + goto fail; + + pc = ul_new_sysfs_path(dev, NULL, NULL); + if (!pc) + goto fail; + + /* + * This is tricky to read the info from sys/, because the queue + * attributes are provided for whole devices (disk) only. We're trying + * to reuse the whole-disk sysfs context to optimize this stuff (as + * system usually have just one disk only). + */ + rc = sysfs_blkdev_get_wholedisk(pc, NULL, 0, &disk); + if (rc != 0 || !disk) + goto fail; + + if (dev != disk) { + /* Partition, try reuse whole-disk context if valid for the + * current device, otherwise create new context for the + * whole-disk. + */ + if (*wholedisk && sysfs_blkdev_get_devno(*wholedisk) != disk) { + ul_unref_path(*wholedisk); + *wholedisk = NULL; + } + if (!*wholedisk) { + *wholedisk = ul_new_sysfs_path(disk, NULL, NULL); + if (!*wholedisk) + goto fail; + } + sysfs_blkdev_set_parent(pc, *wholedisk); + } + + rc = ul_path_read_u64(pc, &dg, "queue/discard_granularity"); + if (!rc) + ul_path_scanf(pc, "ro", "%d", &rdonly); + + ul_unref_path(pc); + return rc == 0 && dg > 0 && rdonly == 0; +fail: + ul_unref_path(pc); + return 1; +} + +static int is_unwanted_fs(struct libmnt_fs *fs, const char *tgt) +{ + struct statfs vfs; + int fd, rc; + + if (mnt_fs_is_pseudofs(fs)) + return 1; + if (mnt_fs_is_netfs(fs)) + return 1; + if (mnt_fs_is_swaparea(fs)) + return 1; + if (mnt_fs_match_fstype(fs, "autofs")) + return 1; + if (mnt_fs_match_options(fs, "ro")) + return 1; + if (mnt_fs_match_options(fs, "+X-fstrim.notrim")) + return 1; + + fd = open(tgt, O_PATH); + if (fd < 0) + return 1; + rc = fstatfs(fd, &vfs) != 0 || vfs.f_type == STATFS_AUTOFS_MAGIC; + close(fd); + if (rc) + return 1; + + /* FITRIM on read-only filesystem can fail, and it can fail */ + if (access(tgt, W_OK) != 0) { + if (errno == EROFS) + return 1; + if (errno == EACCES) + return 1; + } + return 0; +} + +static int uniq_fs_target_cmp( + struct libmnt_table *tb __attribute__((__unused__)), + struct libmnt_fs *a, + struct libmnt_fs *b) +{ + return !mnt_fs_streq_target(a, mnt_fs_get_target(b)); +} + +static int uniq_fs_source_cmp( + struct libmnt_table *tb __attribute__((__unused__)), + struct libmnt_fs *a, + struct libmnt_fs *b) +{ + if (mnt_fs_is_pseudofs(a) || mnt_fs_is_netfs(a) || + mnt_fs_is_pseudofs(b) || mnt_fs_is_netfs(b)) + return 1; + + return !mnt_fs_streq_srcpath(a, mnt_fs_get_srcpath(b)); +} + +/* + * -1 = tab empty + * 0 = all success + * 32 = all failed + * 64 = some failed, some success + */ +static int fstrim_all_from_file(struct fstrim_control *ctl, const char *filename) +{ + struct libmnt_fs *fs; + struct libmnt_iter *itr; + struct libmnt_table *tab; + struct libmnt_cache *cache = NULL; + struct path_cxt *wholedisk = NULL; + int cnt = 0, cnt_err = 0; + int fstab = 0; + + tab = mnt_new_table_from_file(filename); + if (!tab) + err(MNT_EX_FAIL, _("failed to parse %s"), filename); + + if (mnt_table_is_empty(tab)) { + mnt_unref_table(tab); + return -1; + } + + if (streq_paths(filename, "/etc/fstab")) + fstab = 1; + + /* de-duplicate by mountpoints */ + mnt_table_uniq_fs(tab, 0, uniq_fs_target_cmp); + + if (fstab) { + char *rootdev = NULL; + + cache = mnt_new_cache(); + if (!cache) + err(MNT_EX_FAIL, _("failed to initialize libmount cache")); + + /* Make sure we trim also root FS on fstab */ + if (mnt_table_find_target(tab, "/", MNT_ITER_FORWARD) == NULL && + mnt_guess_system_root(0, cache, &rootdev) == 0) { + + fs = mnt_new_fs(); + if (!fs) + err(MNT_EX_FAIL, _("failed to allocate FS handler")); + mnt_fs_set_target(fs, "/"); + mnt_fs_set_source(fs, rootdev); + mnt_fs_set_fstype(fs, "auto"); + mnt_table_add_fs(tab, fs); + mnt_unref_fs(fs); + fs = NULL; + } + free(rootdev); + } + + itr = mnt_new_iter(MNT_ITER_BACKWARD); + if (!itr) + err(MNT_EX_FAIL, _("failed to initialize libmount iterator")); + + /* Remove useless entries and canonicalize the table */ + while (mnt_table_next_fs(tab, itr, &fs) == 0) { + const char *src = mnt_fs_get_srcpath(fs), + *tgt = mnt_fs_get_target(fs); + char *path; + int rc = 1; + + if (!tgt || is_unwanted_fs(fs, tgt)) { + mnt_table_remove_fs(tab, fs); + continue; + } + + /* convert LABEL= (etc.) from fstab to paths */ + if (!src && cache) { + const char *spec = mnt_fs_get_source(fs); + + if (!spec) { + mnt_table_remove_fs(tab, fs); + continue; + } + src = mnt_resolve_spec(spec, cache); + mnt_fs_set_source(fs, src); + } + + if (!src || *src != '/') { + mnt_table_remove_fs(tab, fs); + continue; + } + + /* Is it really accessible mountpoint? Not all mountpoints are + * accessible (maybe over mounted by another filesystem) */ + path = mnt_get_mountpoint(tgt); + if (path && streq_paths(path, tgt)) + rc = 0; + free(path); + if (rc) { + mnt_table_remove_fs(tab, fs); + continue; /* overlaying mount */ + } + + if (!is_directory(tgt, 1) || + !has_discard(src, &wholedisk)) { + mnt_table_remove_fs(tab, fs); + continue; + } + } + + /* de-duplicate by source */ + mnt_table_uniq_fs(tab, MNT_UNIQ_FORWARD, uniq_fs_source_cmp); + + mnt_reset_iter(itr, MNT_ITER_BACKWARD); + + /* Do FITRIM */ + while (mnt_table_next_fs(tab, itr, &fs) == 0) { + const char *src = mnt_fs_get_srcpath(fs), + *tgt = mnt_fs_get_target(fs); + int rc; + + cnt++; + + /* + * We're able to detect that the device supports discard, but + * things also depend on filesystem or device mapping, for + * example LUKS (by default) does not support FSTRIM. + * + * This is reason why we ignore EOPNOTSUPP and ENOTTY errors + * from discard ioctl. + */ + rc = fstrim_filesystem(ctl, tgt, src); + if (rc < 0) + cnt_err++; + else if (rc == 1 && !ctl->quiet_unsupp) + warnx(_("%s: the discard operation is not supported"), tgt); + } + mnt_free_iter(itr); + + ul_unref_path(wholedisk); + mnt_unref_table(tab); + mnt_unref_cache(cache); + + if (cnt && cnt == cnt_err) + return MNT_EX_FAIL; /* all failed */ + if (cnt && cnt_err) + return MNT_EX_SOMEOK; /* some ok */ + + return MNT_EX_SUCCESS; +} + +/* + * fstrim --all follows "mount -a" return codes: + * + * 0 = all success + * 32 = all failed + * 64 = some failed, some success + */ +static int fstrim_all(struct fstrim_control *ctl, const char *tabs) +{ + char *list = xstrdup(tabs); + char *file; + int rc = MNT_EX_FAIL; + + mnt_init_debug(0); + ul_path_init_debug(); + + for (file = strtok(list, ":"); file; file = strtok(NULL, ":")) { + struct stat st; + + if (stat(file, &st) < 0 || !S_ISREG(st.st_mode)) + continue; + + rc = fstrim_all_from_file(ctl, file); + if (rc >= 0) + break; /* stop after first non-empty file */ + } + free(list); + return rc; +} + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + fputs(USAGE_HEADER, out); + fprintf(out, + _(" %s [options] <mount point>\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Discard unused blocks on a mounted filesystem.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -a, --all trim mounted filesystems\n"), out); + fputs(_(" -A, --fstab trim filesystems from /etc/fstab\n"), out); + fputs(_(" -I, --listed-in <list> trim filesystems listed in specified files\n"), out); + fputs(_(" -o, --offset <num> the offset in bytes to start discarding from\n"), out); + fputs(_(" -l, --length <num> the number of bytes to discard\n"), out); + fputs(_(" -m, --minimum <num> the minimum extent length to discard\n"), out); + fputs(_(" -v, --verbose print number of discarded bytes\n"), out); + fputs(_(" --quiet-unsupported suppress error messages if trim unsupported\n"), out); + fputs(_(" -n, --dry-run does everything, but trim\n"), out); + + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(21)); + + fputs(USAGE_ARGUMENTS, out); + printf(USAGE_ARG_SIZE(_("<num>"))); + + printf(USAGE_MAN_TAIL("fstrim(8)")); + exit(EXIT_SUCCESS); +} + +int main(int argc, char **argv) +{ + char *path = NULL; + char *tabs = NULL; + int c, rc, all = 0; + struct fstrim_control ctl = { + .range = { .len = ULLONG_MAX } + }; + enum { + OPT_QUIET_UNSUPP = CHAR_MAX + 1 + }; + + static const struct option longopts[] = { + { "all", no_argument, NULL, 'a' }, + { "fstab", no_argument, NULL, 'A' }, + { "help", no_argument, NULL, 'h' }, + { "listed-in", required_argument, NULL, 'I' }, + { "version", no_argument, NULL, 'V' }, + { "offset", required_argument, NULL, 'o' }, + { "length", required_argument, NULL, 'l' }, + { "minimum", required_argument, NULL, 'm' }, + { "verbose", no_argument, NULL, 'v' }, + { "quiet-unsupported", no_argument, NULL, OPT_QUIET_UNSUPP }, + { "dry-run", no_argument, NULL, 'n' }, + { NULL, 0, NULL, 0 } + }; + + static const ul_excl_t excl[] = { /* rows and cols in ASCII order */ + { 'A','I','a' }, + { 0 } + }; + int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + while ((c = getopt_long(argc, argv, "AahI:l:m:no:Vv", longopts, NULL)) != -1) { + + err_exclusive_options(c, longopts, excl, excl_st); + + switch(c) { + case 'A': + all = 1; + tabs = _PATH_MNTTAB; /* fstab */ + break; + case 'a': + all = 1; + tabs = _PATH_PROC_MOUNTINFO; /* mountinfo */ + break; + case 'I': + all = 1; + tabs = optarg; + break; + case 'n': + ctl.dryrun = 1; + break; + case 'l': + ctl.range.len = strtosize_or_err(optarg, + _("failed to parse length")); + break; + case 'o': + ctl.range.start = strtosize_or_err(optarg, + _("failed to parse offset")); + break; + case 'm': + ctl.range.minlen = strtosize_or_err(optarg, + _("failed to parse minimum extent length")); + break; + case 'v': + ctl.verbose = 1; + break; + case OPT_QUIET_UNSUPP: + ctl.quiet_unsupp = 1; + break; + case 'h': + usage(); + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if (!all) { + if (optind == argc) + errx(EXIT_FAILURE, _("no mountpoint specified")); + path = argv[optind++]; + } + + if (optind != argc) { + warnx(_("unexpected number of arguments")); + errtryhelp(EXIT_FAILURE); + } + + if (all) + return fstrim_all(&ctl, tabs); /* MNT_EX_* codes */ + + if (!is_directory(path, 0)) + return EXIT_FAILURE; + + rc = fstrim_filesystem(&ctl, path, NULL); + if (rc == 1 && ctl.quiet_unsupp) + rc = 0; + if (rc == 1) + warnx(_("%s: the discard operation is not supported"), path); + + return rc == 0 ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/sys-utils/fstrim.service.in b/sys-utils/fstrim.service.in new file mode 100644 index 0000000..11e6e95 --- /dev/null +++ b/sys-utils/fstrim.service.in @@ -0,0 +1,16 @@ +[Unit] +Description=Discard unused blocks on filesystems from /etc/fstab +Documentation=man:fstrim(8) +ConditionVirtualization=!container + +[Service] +Type=oneshot +ExecStart=@sbindir@/fstrim --listed-in /etc/fstab:/proc/self/mountinfo --verbose --quiet-unsupported +PrivateDevices=no +PrivateNetwork=yes +PrivateUsers=no +ProtectKernelTunables=yes +ProtectKernelModules=yes +ProtectControlGroups=yes +MemoryDenyWriteExecute=yes +SystemCallFilter=@default @file-system @basic-io @system-service diff --git a/sys-utils/fstrim.timer b/sys-utils/fstrim.timer new file mode 100644 index 0000000..60ab3b6 --- /dev/null +++ b/sys-utils/fstrim.timer @@ -0,0 +1,14 @@ +[Unit] +Description=Discard unused blocks once a week +Documentation=man:fstrim +ConditionVirtualization=!container +ConditionPathExists=!/etc/initrd-release + +[Timer] +OnCalendar=weekly +AccuracySec=1h +Persistent=true +RandomizedDelaySec=6000 + +[Install] +WantedBy=timers.target diff --git a/sys-utils/hwclock-cmos.c b/sys-utils/hwclock-cmos.c new file mode 100644 index 0000000..56ee624 --- /dev/null +++ b/sys-utils/hwclock-cmos.c @@ -0,0 +1,387 @@ +/* + * SPDX-License-Identifier: GPL-2.0-or-later + * + * i386 CMOS starts out with 14 bytes clock data alpha has something + * similar, but with details depending on the machine type. + * + * byte 0: seconds 0-59 + * byte 2: minutes 0-59 + * byte 4: hours 0-23 in 24hr mode, + * 1-12 in 12hr mode, with high bit unset/set + * if am/pm. + * byte 6: weekday 1-7, Sunday=1 + * byte 7: day of the month 1-31 + * byte 8: month 1-12 + * byte 9: year 0-99 + * + * Numbers are stored in BCD/binary if bit 2 of byte 11 is unset/set The + * clock is in 12hr/24hr mode if bit 1 of byte 11 is unset/set The clock is + * undefined (being updated) if bit 7 of byte 10 is set. The clock is frozen + * (to be updated) by setting bit 7 of byte 11 Bit 7 of byte 14 indicates + * whether the CMOS clock is reliable: it is 1 if RTC power has been good + * since this bit was last read; it is 0 when the battery is dead and system + * power has been off. + * + * Avoid setting the RTC clock within 2 seconds of the day rollover that + * starts a new month or enters daylight saving time. + * + * The century situation is messy: + * + * Usually byte 50 (0x32) gives the century (in BCD, so 19 or 20 hex), but + * IBM PS/2 has (part of) a checksum there and uses byte 55 (0x37). + * Sometimes byte 127 (0x7f) or Bank 1, byte 0x48 gives the century. The + * original RTC will not access any century byte; some modern versions will. + * If a modern RTC or BIOS increments the century byte it may go from 0x19 + * to 0x20, but in some buggy cases 0x1a is produced. + */ +/* + * A struct tm has int fields + * tm_sec 0-59, 60 or 61 only for leap seconds + * tm_min 0-59 + * tm_hour 0-23 + * tm_mday 1-31 + * tm_mon 0-11 + * tm_year number of years since 1900 + * tm_wday 0-6, 0=Sunday + * tm_yday 0-365 + * tm_isdst >0: yes, 0: no, <0: unknown + */ + +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +#include "c.h" +#include "nls.h" +#include "pathnames.h" + +/* for inb, outb */ +#ifdef HAVE_SYS_IO_H +# include <sys/io.h> +#elif defined(HAVE_ASM_IO_H) +# include <asm/io.h> +#else +# error "no sys/io.h or asm/io.h" +#endif /* HAVE_SYS_IO_H, HAVE_ASM_IO_H */ + +#include "hwclock.h" + +#define BCD_TO_BIN(val) ((val)=((val)&15) + ((val)>>4)*10) +#define BIN_TO_BCD(val) ((val)=(((val)/10)<<4) + (val)%10) + +#define IOPL_NOT_IMPLEMENTED -2 + +/* + * POSIX uses 1900 as epoch for a struct tm, and 1970 for a time_t. + */ +#define TM_EPOCH 1900 + +static unsigned short clock_ctl_addr = 0x70; +static unsigned short clock_data_addr = 0x71; + +/* + * Hmmh, this isn't very atomic. Maybe we should force an error instead? + * + * TODO: optimize the access to CMOS by mlockall(MCL_CURRENT) and SCHED_FIFO + */ +static unsigned long atomic(unsigned long (*op) (unsigned long), + unsigned long arg) +{ + return (*op) (arg); +} + +/* + * We only want to read CMOS data, but unfortunately writing to bit 7 + * disables (1) or enables (0) NMI; since this bit is read-only we have + * to guess the old status. Various docs suggest that one should disable + * NMI while reading/writing CMOS data, and enable it again afterwards. + * This would yield the sequence + * + * outb (reg | 0x80, 0x70); + * val = inb(0x71); + * outb (0x0d, 0x70); // 0x0d: random read-only location + * + * Other docs state that "any write to 0x70 should be followed by an + * action to 0x71 or the RTC will be left in an unknown state". Most + * docs say that it doesn't matter at all what one does. + * + * bit 0x80: disable NMI while reading - should we? Let us follow the + * kernel and not disable. Called only with 0 <= reg < 128 + */ + +static inline unsigned long cmos_read(unsigned long reg) +{ + outb(reg, clock_ctl_addr); + return inb(clock_data_addr); +} + +static inline unsigned long cmos_write(unsigned long reg, unsigned long val) +{ + outb(reg, clock_ctl_addr); + outb(val, clock_data_addr); + return 0; +} + +static unsigned long cmos_set_time(unsigned long arg) +{ + unsigned char save_control, save_freq_select, pmbit = 0; + struct tm tm = *(struct tm *)arg; + +/* + * CMOS byte 10 (clock status register A) has 3 bitfields: + * bit 7: 1 if data invalid, update in progress (read-only bit) + * (this is raised 224 us before the actual update starts) + * 6-4 select base frequency + * 010: 32768 Hz time base (default) + * 111: reset + * all other combinations are manufacturer-dependent + * (e.g.: DS1287: 010 = start oscillator, anything else = stop) + * 3-0 rate selection bits for interrupt + * 0000 none (may stop RTC) + * 0001, 0010 give same frequency as 1000, 1001 + * 0011 122 microseconds (minimum, 8192 Hz) + * .... each increase by 1 halves the frequency, doubles the period + * 1111 500 milliseconds (maximum, 2 Hz) + * 0110 976.562 microseconds (default 1024 Hz) + */ + save_control = cmos_read(11); /* tell the clock it's being set */ + cmos_write(11, (save_control | 0x80)); + save_freq_select = cmos_read(10); /* stop and reset prescaler */ + cmos_write(10, (save_freq_select | 0x70)); + + tm.tm_year %= 100; + tm.tm_mon += 1; + tm.tm_wday += 1; + + if (!(save_control & 0x02)) { /* 12hr mode; the default is 24hr mode */ + if (tm.tm_hour == 0) + tm.tm_hour = 24; + if (tm.tm_hour > 12) { + tm.tm_hour -= 12; + pmbit = 0x80; + } + } + + if (!(save_control & 0x04)) { /* BCD mode - the default */ + BIN_TO_BCD(tm.tm_sec); + BIN_TO_BCD(tm.tm_min); + BIN_TO_BCD(tm.tm_hour); + BIN_TO_BCD(tm.tm_wday); + BIN_TO_BCD(tm.tm_mday); + BIN_TO_BCD(tm.tm_mon); + BIN_TO_BCD(tm.tm_year); + } + + cmos_write(0, tm.tm_sec); + cmos_write(2, tm.tm_min); + cmos_write(4, tm.tm_hour | pmbit); + cmos_write(6, tm.tm_wday); + cmos_write(7, tm.tm_mday); + cmos_write(8, tm.tm_mon); + cmos_write(9, tm.tm_year); + + /* + * The kernel sources, linux/arch/i386/kernel/time.c, have the + * following comment: + * + * The following flags have to be released exactly in this order, + * otherwise the DS12887 (popular MC146818A clone with integrated + * battery and quartz) will not reset the oscillator and will not + * update precisely 500 ms later. You won't find this mentioned in + * the Dallas Semiconductor data sheets, but who believes data + * sheets anyway ... -- Markus Kuhn + */ + cmos_write(11, save_control); + cmos_write(10, save_freq_select); + return 0; +} + +static int hclock_read(unsigned long reg) +{ + return atomic(cmos_read, reg); +} + +static void hclock_set_time(const struct tm *tm) +{ + atomic(cmos_set_time, (unsigned long)(tm)); +} + +static inline int cmos_clock_busy(void) +{ + return + /* poll bit 7 (UIP) of Control Register A */ + (hclock_read(10) & 0x80); +} + +static int synchronize_to_clock_tick_cmos(const struct hwclock_control *ctl + __attribute__((__unused__))) +{ + int i; + + /* + * Wait for rise. Should be within a second, but in case something + * weird happens, we have a limit on this loop to reduce the impact + * of this failure. + */ + for (i = 0; !cmos_clock_busy(); i++) + if (i >= 10000000) + return 1; + + /* Wait for fall. Should be within 2.228 ms. */ + for (i = 0; cmos_clock_busy(); i++) + if (i >= 1000000) + return 1; + return 0; +} + +/* + * Read the hardware clock and return the current time via <tm> argument. + * Assume we have an ISA machine and read the clock directly with CPU I/O + * instructions. + * + * This function is not totally reliable. It takes a finite and + * unpredictable amount of time to execute the code below. During that time, + * the clock may change and we may even read an invalid value in the middle + * of an update. We do a few checks to minimize this possibility, but only + * the kernel can actually read the clock properly, since it can execute + * code in a short and predictable amount of time (by turning of + * interrupts). + * + * In practice, the chance of this function returning the wrong time is + * extremely remote. + */ +static int read_hardware_clock_cmos(const struct hwclock_control *ctl + __attribute__((__unused__)), struct tm *tm) +{ + unsigned char status = 0, pmbit = 0; + + while (1) { + /* + * Bit 7 of Byte 10 of the Hardware Clock value is the + * Update In Progress (UIP) bit, which is on while and 244 + * uS before the Hardware Clock updates itself. It updates + * the counters individually, so reading them during an + * update would produce garbage. The update takes 2mS, so we + * could be spinning here that long waiting for this bit to + * turn off. + * + * Furthermore, it is pathologically possible for us to be + * in this code so long that even if the UIP bit is not on + * at first, the clock has changed while we were running. We + * check for that too, and if it happens, we start over. + */ + if (!cmos_clock_busy()) { + /* No clock update in progress, go ahead and read */ + tm->tm_sec = hclock_read(0); + tm->tm_min = hclock_read(2); + tm->tm_hour = hclock_read(4); + tm->tm_wday = hclock_read(6); + tm->tm_mday = hclock_read(7); + tm->tm_mon = hclock_read(8); + tm->tm_year = hclock_read(9); + status = hclock_read(11); + /* + * Unless the clock changed while we were reading, + * consider this a good clock read . + */ + if (tm->tm_sec == hclock_read(0)) + break; + } + /* + * Yes, in theory we could have been running for 60 seconds + * and the above test wouldn't work! + */ + } + + if (!(status & 0x04)) { /* BCD mode - the default */ + BCD_TO_BIN(tm->tm_sec); + BCD_TO_BIN(tm->tm_min); + pmbit = (tm->tm_hour & 0x80); + tm->tm_hour &= 0x7f; + BCD_TO_BIN(tm->tm_hour); + BCD_TO_BIN(tm->tm_wday); + BCD_TO_BIN(tm->tm_mday); + BCD_TO_BIN(tm->tm_mon); + BCD_TO_BIN(tm->tm_year); + } + + /* + * We don't use the century byte of the Hardware Clock since we + * don't know its address (usually 50 or 55). Here, we follow the + * advice of the X/Open Base Working Group: "if century is not + * specified, then values in the range [69-99] refer to years in the + * twentieth century (1969 to 1999 inclusive), and values in the + * range [00-68] refer to years in the twenty-first century (2000 to + * 2068 inclusive)." + */ + tm->tm_wday -= 1; + tm->tm_mon -= 1; + if (tm->tm_year < 69) + tm->tm_year += 100; + if (pmbit) { + tm->tm_hour += 12; + if (tm->tm_hour == 24) + tm->tm_hour = 0; + } + + tm->tm_isdst = -1; /* don't know whether it's daylight */ + return 0; +} + +static int set_hardware_clock_cmos(const struct hwclock_control *ctl + __attribute__((__unused__)), + const struct tm *new_broken_time) +{ + hclock_set_time(new_broken_time); + return 0; +} + +# if defined(HAVE_IOPL) +static int i386_iopl(const int level) +{ + return iopl(level); +} +# else +static int i386_iopl(const int level __attribute__ ((__unused__))) +{ + extern int ioperm(unsigned long from, unsigned long num, int turn_on); + return ioperm(clock_ctl_addr, 2, 1); +} +# endif + +static int get_permissions_cmos(void) +{ + int rc; + + rc = i386_iopl(3); + if (rc == IOPL_NOT_IMPLEMENTED) { + warnx(_("ISA port access is not implemented")); + } else if (rc != 0) { + warn(_("iopl() port access failed")); + } + return rc; +} + +static const char *get_device_path(void) +{ + return NULL; +} + +static struct clock_ops cmos_interface = { + N_("Using direct ISA access to the clock"), + get_permissions_cmos, + read_hardware_clock_cmos, + set_hardware_clock_cmos, + synchronize_to_clock_tick_cmos, + get_device_path, +}; + +/* + * return &cmos if cmos clock present, NULL otherwise. + */ +struct clock_ops *probe_for_cmos_clock(void) +{ + return &cmos_interface; +} diff --git a/sys-utils/hwclock-parse-date.c b/sys-utils/hwclock-parse-date.c new file mode 100644 index 0000000..8e540bd --- /dev/null +++ b/sys-utils/hwclock-parse-date.c @@ -0,0 +1,3292 @@ +/* A Bison parser, made by GNU Bison 3.8.2. */ + +/* Bison implementation for Yacc-like parsers in C + + Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2021 Free Software Foundation, + Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +/* C LALR(1) parser skeleton written by Richard Stallman, by + simplifying the original so-called "semantic" parser. */ + +/* DO NOT RELY ON FEATURES THAT ARE NOT DOCUMENTED in the manual, + especially those whose name start with YY_ or yy_. They are + private implementation details that can be changed or removed. */ + +/* All symbols defined below should begin with yy or YY, to avoid + infringing on user name space. This should be done even for local + variables, as they might otherwise be expanded by user macros. + There are some unavoidable exceptions within include files to + define necessary library symbols; they are noted "INFRINGES ON + USER NAME SPACE" below. */ + +/* Identify Bison output, and Bison version. */ +#define YYBISON 30802 + +/* Bison version string. */ +#define YYBISON_VERSION "3.8.2" + +/* Skeleton name. */ +#define YYSKELETON_NAME "yacc.c" + +/* Pure parsers. */ +#define YYPURE 1 + +/* Push parsers. */ +#define YYPUSH 0 + +/* Pull parsers. */ +#define YYPULL 1 + + + + +/* First part of user prologue. */ +#line 1 "sys-utils/hwclock-parse-date.y" + +/** + * SPDX-License-Identifier: GPL-3.0-or-later + * + * Parse a string into an internal timestamp. + * + * This file is based on gnulib parse-datetime.y-dd7a871 with + * the other gnulib dependencies removed for use in util-linux. + * + * Copyright (C) 1999-2000, 2002-2017 Free Software Foundation, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Originally written by Steven M. Bellovin <smb@research.att.com> while + * at the University of North Carolina at Chapel Hill. Later tweaked by + * a couple of people on Usenet. Completely overhauled by Rich $alz + * <rsalz@bbn.com> and Jim Berets <jberets@bbn.com> in August, 1990. + * + * Modified by Paul Eggert <eggert@twinsun.com> in August 1999 to do + * the right thing about local DST. Also modified by Paul Eggert + * <eggert@cs.ucla.edu> in February 2004 to support + * nanosecond-resolution timestamps, and in October 2004 to support + * TZ strings in dates. + */ + +/** + * FIXME: Check for arithmetic overflow in all cases, not just + * some of them. + */ + +#include <sys/time.h> +#include <time.h> + +#include "c.h" +#include "timeutils.h" +#include "hwclock.h" + +/** + * There's no need to extend the stack, so there's no need to involve + * alloca. + */ +#define YYSTACK_USE_ALLOCA 0 + +/** + * Tell Bison how much stack space is needed. 20 should be plenty for + * this grammar, which is not right recursive. Beware setting it too + * high, since that might cause problems on machines whose + * implementations have lame stack-overflow checking. + */ +#define YYMAXDEPTH 20 +#define YYINITDEPTH YYMAXDEPTH + +/** + * Since the code of parse-datetime.y is not included in the Emacs executable + * itself, there is no need to #define static in this file. Even if + * the code were included in the Emacs executable, it probably + * wouldn't do any harm to #undef it here; this will only cause + * problems if we try to write to a static variable, which I don't + * think this code needs to do. + */ +#ifdef emacs +# undef static +#endif + +#include <inttypes.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + + +#include <stdarg.h> +#include "cctype.h" +#include "nls.h" + +/** + * Bison's skeleton tests _STDLIB_H, while some stdlib.h headers + * use _STDLIB_H_ as witness. Map the latter to the one bison uses. + * FIXME: this is temporary. Remove when we have a mechanism to ensure + * that the version we're using is fixed, too. + */ +#ifdef _STDLIB_H_ +# undef _STDLIB_H +# define _STDLIB_H 1 +#endif + +/** + * Shift A right by B bits portably, by dividing A by 2**B and + * truncating towards minus infinity. A and B should be free of side + * effects, and B should be in the range 0 <= B <= INT_BITS - 2, where + * INT_BITS is the number of useful bits in an int. GNU code can + * assume that INT_BITS is at least 32. + * + * ISO C99 says that A >> B is implementation-defined if A < 0. Some + * implementations (e.g., UNICOS 9.0 on a Cray Y-MP EL) don't shift + * right in the usual way when A < 0, so SHR falls back on division if + * ordinary A >> B doesn't seem to be the usual signed shift. + */ +#define SHR(a, b) \ + (-1 >> 1 == -1 \ + ? (a) >> (b) \ + : (a) / (1 << (b)) - ((a) % (1 << (b)) < 0)) + +#define TM_YEAR_BASE 1900 + +#define HOUR(x) ((x) * 60) + +#define STREQ(a, b) (strcmp (a, b) == 0) + +/** + * Convert a possibly-signed character to an unsigned character. This is + * a bit safer than casting to unsigned char, since it catches some type + * errors that the cast doesn't. + */ +static unsigned char to_uchar (char ch) { return ch; } + +/** + * FIXME: It also assumes that signed integer overflow silently wraps around, + * but this is not true any more with recent versions of GCC 4. + */ + +/** + * An integer value, and the number of digits in its textual + * representation. + */ +typedef struct { + int negative; + intmax_t value; + size_t digits; +} textint; + +/* An entry in the lexical lookup table. */ +typedef struct { + char const *name; + int type; + int value; +} table; + +/* Meridian: am, pm, or 24-hour style. */ +enum { MERam, MERpm, MER24 }; + +enum { BILLION = 1000000000, LOG10_BILLION = 9 }; + +/* Relative year, month, day, hour, minutes, seconds, and nanoseconds. */ +typedef struct { + intmax_t year; + intmax_t month; + intmax_t day; + intmax_t hour; + intmax_t minutes; + time_t seconds; + long ns; +} relative_time; + +#if HAVE_COMPOUND_LITERALS +# define RELATIVE_TIME_0 ((relative_time) { 0, 0, 0, 0, 0, 0, 0 }) +#else +static relative_time const RELATIVE_TIME_0; +#endif + +/* Information passed to and from the parser. */ +typedef struct { + /* The input string remaining to be parsed. */ + const char *input; + + /* N, if this is the Nth Tuesday. */ + intmax_t day_ordinal; + + /* Day of week; Sunday is 0. */ + int day_number; + + /* tm_isdst flag for the local zone. */ + int local_isdst; + + /* Time zone, in minutes east of UTC. */ + int time_zone; + + /* Style used for time. */ + int meridian; + + /* Gregorian year, month, day, hour, minutes, seconds, and ns. */ + textint year; + intmax_t month; + intmax_t day; + intmax_t hour; + intmax_t minutes; + struct timespec seconds; /* includes nanoseconds */ + + /* Relative year, month, day, hour, minutes, seconds, and ns. */ + relative_time rel; + + /* Presence or counts of some nonterminals parsed so far. */ + int timespec_seen; + int rels_seen; + size_t dates_seen; + size_t days_seen; + size_t local_zones_seen; + size_t dsts_seen; + size_t times_seen; + size_t zones_seen; + + /* Table of local time zone abbreviations, null terminated. */ + table local_time_zone_table[3]; +} parser_control; + +union YYSTYPE; +static int yylex (union YYSTYPE *, parser_control *); +static int yyerror (parser_control const *, char const *); +static int time_zone_hhmm (parser_control *, textint, textint); + +/** + * Extract into *PC any date and time info from a string of digits + * of the form e.g., YYYYMMDD, YYMMDD, HHMM, HH (and sometimes YYY, + * YYYY, ...). + */ +static void digits_to_date_time(parser_control *pc, textint text_int) +{ + if (pc->dates_seen && ! pc->year.digits + && ! pc->rels_seen && (pc->times_seen || 2 < text_int.digits)) { + pc->year = text_int; + } else { + if (4 < text_int.digits) { + pc->dates_seen++; + pc->day = text_int.value % 100; + pc->month = (text_int.value / 100) % 100; + pc->year.value = text_int.value / 10000; + pc->year.digits = text_int.digits - 4; + } else { + pc->times_seen++; + if (text_int.digits <= 2) { + pc->hour = text_int.value; + pc->minutes = 0; + } + else { + pc->hour = text_int.value / 100; + pc->minutes = text_int.value % 100; + } + pc->seconds.tv_sec = 0; + pc->seconds.tv_nsec = 0; + pc->meridian = MER24; + } + } +} + +/* Increment PC->rel by FACTOR * REL (FACTOR is 1 or -1). */ +static void apply_relative_time(parser_control *pc, relative_time rel, + int factor) +{ + pc->rel.ns += factor * rel.ns; + pc->rel.seconds += factor * rel.seconds; + pc->rel.minutes += factor * rel.minutes; + pc->rel.hour += factor * rel.hour; + pc->rel.day += factor * rel.day; + pc->rel.month += factor * rel.month; + pc->rel.year += factor * rel.year; + pc->rels_seen = 1; +} + +/* Set PC-> hour, minutes, seconds and nanoseconds members from arguments. */ +static void +set_hhmmss(parser_control *pc, intmax_t hour, intmax_t minutes, + time_t sec, long nsec) +{ + pc->hour = hour; + pc->minutes = minutes; + pc->seconds.tv_sec = sec; + pc->seconds.tv_nsec = nsec; +} + + +#line 353 "sys-utils/hwclock-parse-date.c" + +# ifndef YY_CAST +# ifdef __cplusplus +# define YY_CAST(Type, Val) static_cast<Type> (Val) +# define YY_REINTERPRET_CAST(Type, Val) reinterpret_cast<Type> (Val) +# else +# define YY_CAST(Type, Val) ((Type) (Val)) +# define YY_REINTERPRET_CAST(Type, Val) ((Type) (Val)) +# endif +# endif +# ifndef YY_NULLPTR +# if defined __cplusplus +# if 201103L <= __cplusplus +# define YY_NULLPTR nullptr +# else +# define YY_NULLPTR 0 +# endif +# else +# define YY_NULLPTR ((void*)0) +# endif +# endif + + +/* Debug traces. */ +#ifndef YYDEBUG +# define YYDEBUG 0 +#endif +#if YYDEBUG +extern int yydebug; +#endif + +/* Token kinds. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + enum yytokentype + { + YYEMPTY = -2, + YYEOF = 0, /* "end of file" */ + YYerror = 256, /* error */ + YYUNDEF = 257, /* "invalid token" */ + tAGO = 258, /* tAGO */ + tDST = 259, /* tDST */ + tYEAR_UNIT = 260, /* tYEAR_UNIT */ + tMONTH_UNIT = 261, /* tMONTH_UNIT */ + tHOUR_UNIT = 262, /* tHOUR_UNIT */ + tMINUTE_UNIT = 263, /* tMINUTE_UNIT */ + tSEC_UNIT = 264, /* tSEC_UNIT */ + tDAY_UNIT = 265, /* tDAY_UNIT */ + tDAY_SHIFT = 266, /* tDAY_SHIFT */ + tDAY = 267, /* tDAY */ + tDAYZONE = 268, /* tDAYZONE */ + tLOCAL_ZONE = 269, /* tLOCAL_ZONE */ + tMERIDIAN = 270, /* tMERIDIAN */ + tMONTH = 271, /* tMONTH */ + tORDINAL = 272, /* tORDINAL */ + tZONE = 273, /* tZONE */ + tSNUMBER = 274, /* tSNUMBER */ + tUNUMBER = 275, /* tUNUMBER */ + tSDECIMAL_NUMBER = 276, /* tSDECIMAL_NUMBER */ + tUDECIMAL_NUMBER = 277 /* tUDECIMAL_NUMBER */ + }; + typedef enum yytokentype yytoken_kind_t; +#endif +/* Token kinds. */ +#define YYEMPTY -2 +#define YYEOF 0 +#define YYerror 256 +#define YYUNDEF 257 +#define tAGO 258 +#define tDST 259 +#define tYEAR_UNIT 260 +#define tMONTH_UNIT 261 +#define tHOUR_UNIT 262 +#define tMINUTE_UNIT 263 +#define tSEC_UNIT 264 +#define tDAY_UNIT 265 +#define tDAY_SHIFT 266 +#define tDAY 267 +#define tDAYZONE 268 +#define tLOCAL_ZONE 269 +#define tMERIDIAN 270 +#define tMONTH 271 +#define tORDINAL 272 +#define tZONE 273 +#define tSNUMBER 274 +#define tUNUMBER 275 +#define tSDECIMAL_NUMBER 276 +#define tUDECIMAL_NUMBER 277 + +/* Value type. */ +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED +union YYSTYPE +{ +#line 294 "sys-utils/hwclock-parse-date.y" + + intmax_t intval; + textint textintval; + struct timespec timespec; + relative_time rel; + +#line 454 "sys-utils/hwclock-parse-date.c" + +}; +typedef union YYSTYPE YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1 +# define YYSTYPE_IS_DECLARED 1 +#endif + + + + +int yyparse (parser_control *pc); + + + +/* Symbol kind. */ +enum yysymbol_kind_t +{ + YYSYMBOL_YYEMPTY = -2, + YYSYMBOL_YYEOF = 0, /* "end of file" */ + YYSYMBOL_YYerror = 1, /* error */ + YYSYMBOL_YYUNDEF = 2, /* "invalid token" */ + YYSYMBOL_tAGO = 3, /* tAGO */ + YYSYMBOL_tDST = 4, /* tDST */ + YYSYMBOL_tYEAR_UNIT = 5, /* tYEAR_UNIT */ + YYSYMBOL_tMONTH_UNIT = 6, /* tMONTH_UNIT */ + YYSYMBOL_tHOUR_UNIT = 7, /* tHOUR_UNIT */ + YYSYMBOL_tMINUTE_UNIT = 8, /* tMINUTE_UNIT */ + YYSYMBOL_tSEC_UNIT = 9, /* tSEC_UNIT */ + YYSYMBOL_tDAY_UNIT = 10, /* tDAY_UNIT */ + YYSYMBOL_tDAY_SHIFT = 11, /* tDAY_SHIFT */ + YYSYMBOL_tDAY = 12, /* tDAY */ + YYSYMBOL_tDAYZONE = 13, /* tDAYZONE */ + YYSYMBOL_tLOCAL_ZONE = 14, /* tLOCAL_ZONE */ + YYSYMBOL_tMERIDIAN = 15, /* tMERIDIAN */ + YYSYMBOL_tMONTH = 16, /* tMONTH */ + YYSYMBOL_tORDINAL = 17, /* tORDINAL */ + YYSYMBOL_tZONE = 18, /* tZONE */ + YYSYMBOL_tSNUMBER = 19, /* tSNUMBER */ + YYSYMBOL_tUNUMBER = 20, /* tUNUMBER */ + YYSYMBOL_tSDECIMAL_NUMBER = 21, /* tSDECIMAL_NUMBER */ + YYSYMBOL_tUDECIMAL_NUMBER = 22, /* tUDECIMAL_NUMBER */ + YYSYMBOL_23_ = 23, /* '@' */ + YYSYMBOL_24_T_ = 24, /* 'T' */ + YYSYMBOL_25_ = 25, /* ':' */ + YYSYMBOL_26_ = 26, /* ',' */ + YYSYMBOL_27_ = 27, /* '/' */ + YYSYMBOL_YYACCEPT = 28, /* $accept */ + YYSYMBOL_spec = 29, /* spec */ + YYSYMBOL_timespec = 30, /* timespec */ + YYSYMBOL_items = 31, /* items */ + YYSYMBOL_item = 32, /* item */ + YYSYMBOL_datetime = 33, /* datetime */ + YYSYMBOL_iso_8601_datetime = 34, /* iso_8601_datetime */ + YYSYMBOL_time = 35, /* time */ + YYSYMBOL_iso_8601_time = 36, /* iso_8601_time */ + YYSYMBOL_o_zone_offset = 37, /* o_zone_offset */ + YYSYMBOL_zone_offset = 38, /* zone_offset */ + YYSYMBOL_local_zone = 39, /* local_zone */ + YYSYMBOL_zone = 40, /* zone */ + YYSYMBOL_day = 41, /* day */ + YYSYMBOL_date = 42, /* date */ + YYSYMBOL_iso_8601_date = 43, /* iso_8601_date */ + YYSYMBOL_rel = 44, /* rel */ + YYSYMBOL_relunit = 45, /* relunit */ + YYSYMBOL_relunit_snumber = 46, /* relunit_snumber */ + YYSYMBOL_dayshift = 47, /* dayshift */ + YYSYMBOL_seconds = 48, /* seconds */ + YYSYMBOL_signed_seconds = 49, /* signed_seconds */ + YYSYMBOL_unsigned_seconds = 50, /* unsigned_seconds */ + YYSYMBOL_number = 51, /* number */ + YYSYMBOL_hybrid = 52, /* hybrid */ + YYSYMBOL_o_colon_minutes = 53 /* o_colon_minutes */ +}; +typedef enum yysymbol_kind_t yysymbol_kind_t; + + + + +#ifdef short +# undef short +#endif + +/* On compilers that do not define __PTRDIFF_MAX__ etc., make sure + <limits.h> and (if available) <stdint.h> are included + so that the code can choose integer types of a good width. */ + +#ifndef __PTRDIFF_MAX__ +# include <limits.h> /* INFRINGES ON USER NAME SPACE */ +# if defined __STDC_VERSION__ && 199901 <= __STDC_VERSION__ +# include <stdint.h> /* INFRINGES ON USER NAME SPACE */ +# define YY_STDINT_H +# endif +#endif + +/* Narrow types that promote to a signed type and that can represent a + signed or unsigned integer of at least N bits. In tables they can + save space and decrease cache pressure. Promoting to a signed type + helps avoid bugs in integer arithmetic. */ + +#ifdef __INT_LEAST8_MAX__ +typedef __INT_LEAST8_TYPE__ yytype_int8; +#elif defined YY_STDINT_H +typedef int_least8_t yytype_int8; +#else +typedef signed char yytype_int8; +#endif + +#ifdef __INT_LEAST16_MAX__ +typedef __INT_LEAST16_TYPE__ yytype_int16; +#elif defined YY_STDINT_H +typedef int_least16_t yytype_int16; +#else +typedef short yytype_int16; +#endif + +/* Work around bug in HP-UX 11.23, which defines these macros + incorrectly for preprocessor constants. This workaround can likely + be removed in 2023, as HPE has promised support for HP-UX 11.23 + (aka HP-UX 11i v2) only through the end of 2022; see Table 2 of + <https://h20195.www2.hpe.com/V2/getpdf.aspx/4AA4-7673ENW.pdf>. */ +#ifdef __hpux +# undef UINT_LEAST8_MAX +# undef UINT_LEAST16_MAX +# define UINT_LEAST8_MAX 255 +# define UINT_LEAST16_MAX 65535 +#endif + +#if defined __UINT_LEAST8_MAX__ && __UINT_LEAST8_MAX__ <= __INT_MAX__ +typedef __UINT_LEAST8_TYPE__ yytype_uint8; +#elif (!defined __UINT_LEAST8_MAX__ && defined YY_STDINT_H \ + && UINT_LEAST8_MAX <= INT_MAX) +typedef uint_least8_t yytype_uint8; +#elif !defined __UINT_LEAST8_MAX__ && UCHAR_MAX <= INT_MAX +typedef unsigned char yytype_uint8; +#else +typedef short yytype_uint8; +#endif + +#if defined __UINT_LEAST16_MAX__ && __UINT_LEAST16_MAX__ <= __INT_MAX__ +typedef __UINT_LEAST16_TYPE__ yytype_uint16; +#elif (!defined __UINT_LEAST16_MAX__ && defined YY_STDINT_H \ + && UINT_LEAST16_MAX <= INT_MAX) +typedef uint_least16_t yytype_uint16; +#elif !defined __UINT_LEAST16_MAX__ && USHRT_MAX <= INT_MAX +typedef unsigned short yytype_uint16; +#else +typedef int yytype_uint16; +#endif + +#ifndef YYPTRDIFF_T +# if defined __PTRDIFF_TYPE__ && defined __PTRDIFF_MAX__ +# define YYPTRDIFF_T __PTRDIFF_TYPE__ +# define YYPTRDIFF_MAXIMUM __PTRDIFF_MAX__ +# elif defined PTRDIFF_MAX +# ifndef ptrdiff_t +# include <stddef.h> /* INFRINGES ON USER NAME SPACE */ +# endif +# define YYPTRDIFF_T ptrdiff_t +# define YYPTRDIFF_MAXIMUM PTRDIFF_MAX +# else +# define YYPTRDIFF_T long +# define YYPTRDIFF_MAXIMUM LONG_MAX +# endif +#endif + +#ifndef YYSIZE_T +# ifdef __SIZE_TYPE__ +# define YYSIZE_T __SIZE_TYPE__ +# elif defined size_t +# define YYSIZE_T size_t +# elif defined __STDC_VERSION__ && 199901 <= __STDC_VERSION__ +# include <stddef.h> /* INFRINGES ON USER NAME SPACE */ +# define YYSIZE_T size_t +# else +# define YYSIZE_T unsigned +# endif +#endif + +#define YYSIZE_MAXIMUM \ + YY_CAST (YYPTRDIFF_T, \ + (YYPTRDIFF_MAXIMUM < YY_CAST (YYSIZE_T, -1) \ + ? YYPTRDIFF_MAXIMUM \ + : YY_CAST (YYSIZE_T, -1))) + +#define YYSIZEOF(X) YY_CAST (YYPTRDIFF_T, sizeof (X)) + + +/* Stored state numbers (used for stacks). */ +typedef yytype_int8 yy_state_t; + +/* State numbers in computations. */ +typedef int yy_state_fast_t; + +#ifndef YY_ +# if defined YYENABLE_NLS && YYENABLE_NLS +# if ENABLE_NLS +# include <libintl.h> /* INFRINGES ON USER NAME SPACE */ +# define YY_(Msgid) dgettext ("bison-runtime", Msgid) +# endif +# endif +# ifndef YY_ +# define YY_(Msgid) Msgid +# endif +#endif + + +#ifndef YY_ATTRIBUTE_PURE +# if defined __GNUC__ && 2 < __GNUC__ + (96 <= __GNUC_MINOR__) +# define YY_ATTRIBUTE_PURE __attribute__ ((__pure__)) +# else +# define YY_ATTRIBUTE_PURE +# endif +#endif + +#ifndef YY_ATTRIBUTE_UNUSED +# if defined __GNUC__ && 2 < __GNUC__ + (7 <= __GNUC_MINOR__) +# define YY_ATTRIBUTE_UNUSED __attribute__ ((__unused__)) +# else +# define YY_ATTRIBUTE_UNUSED +# endif +#endif + +/* Suppress unused-variable warnings by "using" E. */ +#if ! defined lint || defined __GNUC__ +# define YY_USE(E) ((void) (E)) +#else +# define YY_USE(E) /* empty */ +#endif + +/* Suppress an incorrect diagnostic about yylval being uninitialized. */ +#if defined __GNUC__ && ! defined __ICC && 406 <= __GNUC__ * 100 + __GNUC_MINOR__ +# if __GNUC__ * 100 + __GNUC_MINOR__ < 407 +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \ + _Pragma ("GCC diagnostic push") \ + _Pragma ("GCC diagnostic ignored \"-Wuninitialized\"") +# else +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \ + _Pragma ("GCC diagnostic push") \ + _Pragma ("GCC diagnostic ignored \"-Wuninitialized\"") \ + _Pragma ("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +# endif +# define YY_IGNORE_MAYBE_UNINITIALIZED_END \ + _Pragma ("GCC diagnostic pop") +#else +# define YY_INITIAL_VALUE(Value) Value +#endif +#ifndef YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN +# define YY_IGNORE_MAYBE_UNINITIALIZED_END +#endif +#ifndef YY_INITIAL_VALUE +# define YY_INITIAL_VALUE(Value) /* Nothing. */ +#endif + +#if defined __cplusplus && defined __GNUC__ && ! defined __ICC && 6 <= __GNUC__ +# define YY_IGNORE_USELESS_CAST_BEGIN \ + _Pragma ("GCC diagnostic push") \ + _Pragma ("GCC diagnostic ignored \"-Wuseless-cast\"") +# define YY_IGNORE_USELESS_CAST_END \ + _Pragma ("GCC diagnostic pop") +#endif +#ifndef YY_IGNORE_USELESS_CAST_BEGIN +# define YY_IGNORE_USELESS_CAST_BEGIN +# define YY_IGNORE_USELESS_CAST_END +#endif + + +#define YY_ASSERT(E) ((void) (0 && (E))) + +#if !defined yyoverflow + +/* The parser invokes alloca or malloc; define the necessary symbols. */ + +# ifdef YYSTACK_USE_ALLOCA +# if YYSTACK_USE_ALLOCA +# ifdef __GNUC__ +# define YYSTACK_ALLOC __builtin_alloca +# elif defined __BUILTIN_VA_ARG_INCR +# include <alloca.h> /* INFRINGES ON USER NAME SPACE */ +# elif defined _AIX +# define YYSTACK_ALLOC __alloca +# elif defined _MSC_VER +# include <malloc.h> /* INFRINGES ON USER NAME SPACE */ +# define alloca _alloca +# else +# define YYSTACK_ALLOC alloca +# if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS +# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */ + /* Use EXIT_SUCCESS as a witness for stdlib.h. */ +# ifndef EXIT_SUCCESS +# define EXIT_SUCCESS 0 +# endif +# endif +# endif +# endif +# endif + +# ifdef YYSTACK_ALLOC + /* Pacify GCC's 'empty if-body' warning. */ +# define YYSTACK_FREE(Ptr) do { /* empty */; } while (0) +# ifndef YYSTACK_ALLOC_MAXIMUM + /* The OS might guarantee only one guard page at the bottom of the stack, + and a page size can be as small as 4096 bytes. So we cannot safely + invoke alloca (N) if N exceeds 4096. Use a slightly smaller number + to allow for a few compiler-allocated temporary stack slots. */ +# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */ +# endif +# else +# define YYSTACK_ALLOC YYMALLOC +# define YYSTACK_FREE YYFREE +# ifndef YYSTACK_ALLOC_MAXIMUM +# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM +# endif +# if (defined __cplusplus && ! defined EXIT_SUCCESS \ + && ! ((defined YYMALLOC || defined malloc) \ + && (defined YYFREE || defined free))) +# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */ +# ifndef EXIT_SUCCESS +# define EXIT_SUCCESS 0 +# endif +# endif +# ifndef YYMALLOC +# define YYMALLOC malloc +# if ! defined malloc && ! defined EXIT_SUCCESS +void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */ +# endif +# endif +# ifndef YYFREE +# define YYFREE free +# if ! defined free && ! defined EXIT_SUCCESS +void free (void *); /* INFRINGES ON USER NAME SPACE */ +# endif +# endif +# endif +#endif /* !defined yyoverflow */ + +#if (! defined yyoverflow \ + && (! defined __cplusplus \ + || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL))) + +/* A type that is properly aligned for any stack member. */ +union yyalloc +{ + yy_state_t yyss_alloc; + YYSTYPE yyvs_alloc; +}; + +/* The size of the maximum gap between one aligned stack and the next. */ +# define YYSTACK_GAP_MAXIMUM (YYSIZEOF (union yyalloc) - 1) + +/* The size of an array large to enough to hold all stacks, each with + N elements. */ +# define YYSTACK_BYTES(N) \ + ((N) * (YYSIZEOF (yy_state_t) + YYSIZEOF (YYSTYPE)) \ + + YYSTACK_GAP_MAXIMUM) + +# define YYCOPY_NEEDED 1 + +/* Relocate STACK from its old location to the new one. The + local variables YYSIZE and YYSTACKSIZE give the old and new number of + elements in the stack, and YYPTR gives the new location of the + stack. Advance YYPTR to a properly aligned location for the next + stack. */ +# define YYSTACK_RELOCATE(Stack_alloc, Stack) \ + do \ + { \ + YYPTRDIFF_T yynewbytes; \ + YYCOPY (&yyptr->Stack_alloc, Stack, yysize); \ + Stack = &yyptr->Stack_alloc; \ + yynewbytes = yystacksize * YYSIZEOF (*Stack) + YYSTACK_GAP_MAXIMUM; \ + yyptr += yynewbytes / YYSIZEOF (*yyptr); \ + } \ + while (0) + +#endif + +#if defined YYCOPY_NEEDED && YYCOPY_NEEDED +/* Copy COUNT objects from SRC to DST. The source and destination do + not overlap. */ +# ifndef YYCOPY +# if defined __GNUC__ && 1 < __GNUC__ +# define YYCOPY(Dst, Src, Count) \ + __builtin_memcpy (Dst, Src, YY_CAST (YYSIZE_T, (Count)) * sizeof (*(Src))) +# else +# define YYCOPY(Dst, Src, Count) \ + do \ + { \ + YYPTRDIFF_T yyi; \ + for (yyi = 0; yyi < (Count); yyi++) \ + (Dst)[yyi] = (Src)[yyi]; \ + } \ + while (0) +# endif +# endif +#endif /* !YYCOPY_NEEDED */ + +/* YYFINAL -- State number of the termination state. */ +#define YYFINAL 12 +/* YYLAST -- Last index in YYTABLE. */ +#define YYLAST 112 + +/* YYNTOKENS -- Number of terminals. */ +#define YYNTOKENS 28 +/* YYNNTS -- Number of nonterminals. */ +#define YYNNTS 26 +/* YYNRULES -- Number of rules. */ +#define YYNRULES 91 +/* YYNSTATES -- Number of states. */ +#define YYNSTATES 114 + +/* YYMAXUTOK -- Last valid token kind. */ +#define YYMAXUTOK 277 + + +/* YYTRANSLATE(TOKEN-NUM) -- Symbol number corresponding to TOKEN-NUM + as returned by yylex, with out-of-bounds checking. */ +#define YYTRANSLATE(YYX) \ + (0 <= (YYX) && (YYX) <= YYMAXUTOK \ + ? YY_CAST (yysymbol_kind_t, yytranslate[YYX]) \ + : YYSYMBOL_YYUNDEF) + +/* YYTRANSLATE[TOKEN-NUM] -- Symbol number corresponding to TOKEN-NUM + as returned by yylex. */ +static const yytype_int8 yytranslate[] = +{ + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 26, 2, 2, 27, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 25, 2, + 2, 2, 2, 2, 23, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 24, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22 +}; + +#if YYDEBUG +/* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ +static const yytype_int16 yyrline[] = +{ + 0, 321, 321, 322, 326, 332, 334, 338, 341, 344, + 347, 350, 353, 356, 357, 358, 362, 366, 370, 374, + 378, 382, 386, 390, 394, 400, 402, 406, 431, 435, + 446, 449, 452, 456, 460, 464, 467, 473, 477, 481, + 485, 492, 496, 514, 521, 528, 532, 537, 541, 546, + 550, 559, 561, 563, 568, 570, 572, 574, 576, 578, + 580, 582, 584, 586, 588, 590, 592, 594, 596, 598, + 600, 602, 607, 612, 614, 618, 620, 622, 624, 626, + 628, 633, 637, 637, 640, 641, 646, 647, 652, 657, + 669, 670 +}; +#endif + +/** Accessing symbol of state STATE. */ +#define YY_ACCESSING_SYMBOL(State) YY_CAST (yysymbol_kind_t, yystos[State]) + +#if YYDEBUG || 0 +/* The user-facing name of the symbol whose (internal) number is + YYSYMBOL. No bounds checking. */ +static const char *yysymbol_name (yysymbol_kind_t yysymbol) YY_ATTRIBUTE_UNUSED; + +/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. + First, the terminals, then, starting at YYNTOKENS, nonterminals. */ +static const char *const yytname[] = +{ + "\"end of file\"", "error", "\"invalid token\"", "tAGO", "tDST", + "tYEAR_UNIT", "tMONTH_UNIT", "tHOUR_UNIT", "tMINUTE_UNIT", "tSEC_UNIT", + "tDAY_UNIT", "tDAY_SHIFT", "tDAY", "tDAYZONE", "tLOCAL_ZONE", + "tMERIDIAN", "tMONTH", "tORDINAL", "tZONE", "tSNUMBER", "tUNUMBER", + "tSDECIMAL_NUMBER", "tUDECIMAL_NUMBER", "'@'", "'T'", "':'", "','", + "'/'", "$accept", "spec", "timespec", "items", "item", "datetime", + "iso_8601_datetime", "time", "iso_8601_time", "o_zone_offset", + "zone_offset", "local_zone", "zone", "day", "date", "iso_8601_date", + "rel", "relunit", "relunit_snumber", "dayshift", "seconds", + "signed_seconds", "unsigned_seconds", "number", "hybrid", + "o_colon_minutes", YY_NULLPTR +}; + +static const char * +yysymbol_name (yysymbol_kind_t yysymbol) +{ + return yytname[yysymbol]; +} +#endif + +#define YYPACT_NINF (-93) + +#define yypact_value_is_default(Yyn) \ + ((Yyn) == YYPACT_NINF) + +#define YYTABLE_NINF (-1) + +#define yytable_value_is_error(Yyn) \ + 0 + +/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing + STATE-NUM. */ +static const yytype_int8 yypact[] = +{ + 38, 27, 77, -93, 46, -93, -93, -93, -93, -93, + -93, -93, -93, -93, -93, -93, -93, -93, -93, -93, + 62, -93, 82, -3, 66, 3, 74, -4, 83, 84, + 75, -93, -93, -93, -93, -93, -93, -93, -93, -93, + 71, -93, 93, -93, -93, -93, -93, -93, -93, 78, + 72, -93, -93, -93, -93, -93, -93, -93, -93, 25, + -93, -93, -93, -93, -93, -93, -93, -93, -93, -93, + -93, -93, -93, -93, -93, 21, 19, 79, 80, -93, + -93, -93, -93, -93, 81, -93, -93, 85, 86, -93, + -93, -93, -93, -93, -6, 76, 17, -93, -93, -93, + -93, 87, 69, -93, -93, 88, 89, -1, -93, 18, + -93, -93, 69, 91 +}; + +/* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM. + Performed when YYTABLE does not specify something else to do. Zero + means the default is an error. */ +static const yytype_int8 yydefact[] = +{ + 5, 0, 0, 2, 3, 85, 87, 84, 86, 4, + 82, 83, 1, 56, 59, 65, 68, 73, 62, 81, + 37, 35, 28, 0, 0, 30, 0, 88, 0, 0, + 31, 6, 7, 16, 8, 21, 9, 10, 12, 11, + 49, 13, 52, 74, 53, 14, 15, 38, 29, 0, + 45, 54, 57, 63, 66, 69, 60, 39, 36, 90, + 32, 75, 76, 78, 79, 80, 77, 55, 58, 64, + 67, 70, 61, 40, 18, 47, 90, 0, 0, 22, + 89, 71, 72, 33, 0, 51, 44, 0, 0, 34, + 43, 48, 50, 27, 25, 41, 0, 17, 46, 91, + 19, 90, 0, 23, 26, 0, 0, 25, 42, 25, + 20, 24, 0, 25 +}; + +/* YYPGOTO[NTERM-NUM]. */ +static const yytype_int8 yypgoto[] = +{ + -93, -93, -93, -93, -93, -93, -93, -93, 20, -68, + -27, -93, -93, -93, -93, -93, -93, -93, 60, -93, + -93, -93, -92, -93, -93, 43 +}; + +/* YYDEFGOTO[NTERM-NUM]. */ +static const yytype_int8 yydefgoto[] = +{ + 0, 2, 3, 4, 31, 32, 33, 34, 35, 103, + 104, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 9, 10, 11, 45, 46, 93 +}; + +/* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If + positive, shift that token. If negative, reduce the rule whose + number is the opposite. If YYTABLE_NINF, syntax error. */ +static const yytype_int8 yytable[] = +{ + 79, 67, 68, 69, 70, 71, 72, 58, 73, 100, + 107, 74, 75, 101, 110, 76, 49, 50, 101, 102, + 113, 77, 59, 78, 61, 62, 63, 64, 65, 66, + 61, 62, 63, 64, 65, 66, 101, 101, 92, 111, + 90, 91, 106, 112, 88, 111, 5, 6, 7, 8, + 88, 13, 14, 15, 16, 17, 18, 19, 20, 21, + 22, 1, 23, 24, 25, 26, 27, 28, 29, 79, + 30, 51, 52, 53, 54, 55, 56, 12, 57, 61, + 62, 63, 64, 65, 66, 60, 48, 80, 47, 6, + 83, 8, 81, 82, 26, 84, 85, 86, 87, 94, + 95, 96, 89, 105, 97, 98, 99, 0, 108, 109, + 101, 0, 88 +}; + +static const yytype_int8 yycheck[] = +{ + 27, 5, 6, 7, 8, 9, 10, 4, 12, 15, + 102, 15, 16, 19, 15, 19, 19, 20, 19, 25, + 112, 25, 19, 27, 5, 6, 7, 8, 9, 10, + 5, 6, 7, 8, 9, 10, 19, 19, 19, 107, + 19, 20, 25, 25, 25, 113, 19, 20, 21, 22, + 25, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 23, 16, 17, 18, 19, 20, 21, 22, 96, + 24, 5, 6, 7, 8, 9, 10, 0, 12, 5, + 6, 7, 8, 9, 10, 25, 4, 27, 26, 20, + 30, 22, 9, 9, 19, 24, 3, 19, 26, 20, + 20, 20, 59, 27, 84, 20, 20, -1, 20, 20, + 19, -1, 25 +}; + +/* YYSTOS[STATE-NUM] -- The symbol kind of the accessing symbol of + state STATE-NUM. */ +static const yytype_int8 yystos[] = +{ + 0, 23, 29, 30, 31, 19, 20, 21, 22, 48, + 49, 50, 0, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 16, 17, 18, 19, 20, 21, 22, + 24, 32, 33, 34, 35, 36, 39, 40, 41, 42, + 43, 44, 45, 46, 47, 51, 52, 26, 4, 19, + 20, 5, 6, 7, 8, 9, 10, 12, 4, 19, + 46, 5, 6, 7, 8, 9, 10, 5, 6, 7, + 8, 9, 10, 12, 15, 16, 19, 25, 27, 38, + 46, 9, 9, 46, 24, 3, 19, 26, 25, 53, + 19, 20, 19, 53, 20, 20, 20, 36, 20, 20, + 15, 19, 25, 37, 38, 27, 25, 50, 20, 20, + 15, 37, 25, 50 +}; + +/* YYR1[RULE-NUM] -- Symbol kind of the left-hand side of rule RULE-NUM. */ +static const yytype_int8 yyr1[] = +{ + 0, 28, 29, 29, 30, 31, 31, 32, 32, 32, + 32, 32, 32, 32, 32, 32, 33, 34, 35, 35, + 35, 35, 36, 36, 36, 37, 37, 38, 39, 39, + 40, 40, 40, 40, 40, 40, 40, 41, 41, 41, + 41, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 43, 44, 44, 44, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 46, 46, 46, 46, 46, + 46, 47, 48, 48, 49, 49, 50, 50, 51, 52, + 53, 53 +}; + +/* YYR2[RULE-NUM] -- Number of symbols on the right-hand side of rule RULE-NUM. */ +static const yytype_int8 yyr2[] = +{ + 0, 2, 1, 1, 2, 0, 2, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 3, 2, 4, + 6, 1, 2, 4, 6, 0, 1, 2, 1, 2, + 1, 1, 2, 2, 3, 1, 2, 1, 2, 2, + 2, 3, 5, 3, 3, 2, 4, 2, 3, 1, + 3, 2, 1, 1, 2, 2, 1, 2, 2, 1, + 2, 2, 1, 2, 2, 1, 2, 2, 1, 2, + 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, + 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, + 0, 2 +}; + + +enum { YYENOMEM = -2 }; + +#define yyerrok (yyerrstatus = 0) +#define yyclearin (yychar = YYEMPTY) + +#define YYACCEPT goto yyacceptlab +#define YYABORT goto yyabortlab +#define YYERROR goto yyerrorlab +#define YYNOMEM goto yyexhaustedlab + + +#define YYRECOVERING() (!!yyerrstatus) + +#define YYBACKUP(Token, Value) \ + do \ + if (yychar == YYEMPTY) \ + { \ + yychar = (Token); \ + yylval = (Value); \ + YYPOPSTACK (yylen); \ + yystate = *yyssp; \ + goto yybackup; \ + } \ + else \ + { \ + yyerror (pc, YY_("syntax error: cannot back up")); \ + YYERROR; \ + } \ + while (0) + +/* Backward compatibility with an undocumented macro. + Use YYerror or YYUNDEF. */ +#define YYERRCODE YYUNDEF + + +/* Enable debugging if requested. */ +#if YYDEBUG + +# ifndef YYFPRINTF +# include <stdio.h> /* INFRINGES ON USER NAME SPACE */ +# define YYFPRINTF fprintf +# endif + +# define YYDPRINTF(Args) \ +do { \ + if (yydebug) \ + YYFPRINTF Args; \ +} while (0) + + + + +# define YY_SYMBOL_PRINT(Title, Kind, Value, Location) \ +do { \ + if (yydebug) \ + { \ + YYFPRINTF (stderr, "%s ", Title); \ + yy_symbol_print (stderr, \ + Kind, Value, pc); \ + YYFPRINTF (stderr, "\n"); \ + } \ +} while (0) + + +/*-----------------------------------. +| Print this symbol's value on YYO. | +`-----------------------------------*/ + +static void +yy_symbol_value_print (FILE *yyo, + yysymbol_kind_t yykind, YYSTYPE const * const yyvaluep, parser_control *pc) +{ + FILE *yyoutput = yyo; + YY_USE (yyoutput); + YY_USE (pc); + if (!yyvaluep) + return; + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + YY_USE (yykind); + YY_IGNORE_MAYBE_UNINITIALIZED_END +} + + +/*---------------------------. +| Print this symbol on YYO. | +`---------------------------*/ + +static void +yy_symbol_print (FILE *yyo, + yysymbol_kind_t yykind, YYSTYPE const * const yyvaluep, parser_control *pc) +{ + YYFPRINTF (yyo, "%s %s (", + yykind < YYNTOKENS ? "token" : "nterm", yysymbol_name (yykind)); + + yy_symbol_value_print (yyo, yykind, yyvaluep, pc); + YYFPRINTF (yyo, ")"); +} + +/*------------------------------------------------------------------. +| yy_stack_print -- Print the state stack from its BOTTOM up to its | +| TOP (included). | +`------------------------------------------------------------------*/ + +static void +yy_stack_print (yy_state_t *yybottom, yy_state_t *yytop) +{ + YYFPRINTF (stderr, "Stack now"); + for (; yybottom <= yytop; yybottom++) + { + int yybot = *yybottom; + YYFPRINTF (stderr, " %d", yybot); + } + YYFPRINTF (stderr, "\n"); +} + +# define YY_STACK_PRINT(Bottom, Top) \ +do { \ + if (yydebug) \ + yy_stack_print ((Bottom), (Top)); \ +} while (0) + + +/*------------------------------------------------. +| Report that the YYRULE is going to be reduced. | +`------------------------------------------------*/ + +static void +yy_reduce_print (yy_state_t *yyssp, YYSTYPE *yyvsp, + int yyrule, parser_control *pc) +{ + int yylno = yyrline[yyrule]; + int yynrhs = yyr2[yyrule]; + int yyi; + YYFPRINTF (stderr, "Reducing stack by rule %d (line %d):\n", + yyrule - 1, yylno); + /* The symbols being reduced. */ + for (yyi = 0; yyi < yynrhs; yyi++) + { + YYFPRINTF (stderr, " $%d = ", yyi + 1); + yy_symbol_print (stderr, + YY_ACCESSING_SYMBOL (+yyssp[yyi + 1 - yynrhs]), + &yyvsp[(yyi + 1) - (yynrhs)], pc); + YYFPRINTF (stderr, "\n"); + } +} + +# define YY_REDUCE_PRINT(Rule) \ +do { \ + if (yydebug) \ + yy_reduce_print (yyssp, yyvsp, Rule, pc); \ +} while (0) + +/* Nonzero means print parse trace. It is left uninitialized so that + multiple parsers can coexist. */ +int yydebug; +#else /* !YYDEBUG */ +# define YYDPRINTF(Args) ((void) 0) +# define YY_SYMBOL_PRINT(Title, Kind, Value, Location) +# define YY_STACK_PRINT(Bottom, Top) +# define YY_REDUCE_PRINT(Rule) +#endif /* !YYDEBUG */ + + +/* YYINITDEPTH -- initial size of the parser's stacks. */ +#ifndef YYINITDEPTH +# define YYINITDEPTH 200 +#endif + +/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only + if the built-in stack extension method is used). + + Do not make this value too large; the results are undefined if + YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH) + evaluated with infinite-precision integer arithmetic. */ + +#ifndef YYMAXDEPTH +# define YYMAXDEPTH 10000 +#endif + + + + + + +/*-----------------------------------------------. +| Release the memory associated to this symbol. | +`-----------------------------------------------*/ + +static void +yydestruct (const char *yymsg, + yysymbol_kind_t yykind, YYSTYPE *yyvaluep, parser_control *pc) +{ + YY_USE (yyvaluep); + YY_USE (pc); + if (!yymsg) + yymsg = "Deleting"; + YY_SYMBOL_PRINT (yymsg, yykind, yyvaluep, yylocationp); + + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + YY_USE (yykind); + YY_IGNORE_MAYBE_UNINITIALIZED_END +} + + + + + + +/*----------. +| yyparse. | +`----------*/ + +int +yyparse (parser_control *pc) +{ +/* Lookahead token kind. */ +int yychar; + + +/* The semantic value of the lookahead symbol. */ +/* Default value used for initialization, for pacifying older GCCs + or non-GCC compilers. */ +YY_INITIAL_VALUE (static YYSTYPE yyval_default;) +YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); + + /* Number of syntax errors so far. */ + int yynerrs = 0; + + yy_state_fast_t yystate = 0; + /* Number of tokens to shift before error messages enabled. */ + int yyerrstatus = 0; + + /* Refer to the stacks through separate pointers, to allow yyoverflow + to reallocate them elsewhere. */ + + /* Their size. */ + YYPTRDIFF_T yystacksize = YYINITDEPTH; + + /* The state stack: array, bottom, top. */ + yy_state_t yyssa[YYINITDEPTH]; + yy_state_t *yyss = yyssa; + yy_state_t *yyssp = yyss; + + /* The semantic value stack: array, bottom, top. */ + YYSTYPE yyvsa[YYINITDEPTH]; + YYSTYPE *yyvs = yyvsa; + YYSTYPE *yyvsp = yyvs; + + int yyn; + /* The return value of yyparse. */ + int yyresult; + /* Lookahead symbol kind. */ + yysymbol_kind_t yytoken = YYSYMBOL_YYEMPTY; + /* The variables used to return semantic value and location from the + action routines. */ + YYSTYPE yyval; + + + +#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N)) + + /* The number of symbols on the RHS of the reduced rule. + Keep to zero when no symbol should be popped. */ + int yylen = 0; + + YYDPRINTF ((stderr, "Starting parse\n")); + + yychar = YYEMPTY; /* Cause a token to be read. */ + + goto yysetstate; + + +/*------------------------------------------------------------. +| yynewstate -- push a new state, which is found in yystate. | +`------------------------------------------------------------*/ +yynewstate: + /* In all cases, when you get here, the value and location stacks + have just been pushed. So pushing a state here evens the stacks. */ + yyssp++; + + +/*--------------------------------------------------------------------. +| yysetstate -- set current state (the top of the stack) to yystate. | +`--------------------------------------------------------------------*/ +yysetstate: + YYDPRINTF ((stderr, "Entering state %d\n", yystate)); + YY_ASSERT (0 <= yystate && yystate < YYNSTATES); + YY_IGNORE_USELESS_CAST_BEGIN + *yyssp = YY_CAST (yy_state_t, yystate); + YY_IGNORE_USELESS_CAST_END + YY_STACK_PRINT (yyss, yyssp); + + if (yyss + yystacksize - 1 <= yyssp) +#if !defined yyoverflow && !defined YYSTACK_RELOCATE + YYNOMEM; +#else + { + /* Get the current used size of the three stacks, in elements. */ + YYPTRDIFF_T yysize = yyssp - yyss + 1; + +# if defined yyoverflow + { + /* Give user a chance to reallocate the stack. Use copies of + these so that the &'s don't force the real ones into + memory. */ + yy_state_t *yyss1 = yyss; + YYSTYPE *yyvs1 = yyvs; + + /* Each stack pointer address is followed by the size of the + data in use in that stack, in bytes. This used to be a + conditional around just the two extra args, but that might + be undefined if yyoverflow is a macro. */ + yyoverflow (YY_("memory exhausted"), + &yyss1, yysize * YYSIZEOF (*yyssp), + &yyvs1, yysize * YYSIZEOF (*yyvsp), + &yystacksize); + yyss = yyss1; + yyvs = yyvs1; + } +# else /* defined YYSTACK_RELOCATE */ + /* Extend the stack our own way. */ + if (YYMAXDEPTH <= yystacksize) + YYNOMEM; + yystacksize *= 2; + if (YYMAXDEPTH < yystacksize) + yystacksize = YYMAXDEPTH; + + { + yy_state_t *yyss1 = yyss; + union yyalloc *yyptr = + YY_CAST (union yyalloc *, + YYSTACK_ALLOC (YY_CAST (YYSIZE_T, YYSTACK_BYTES (yystacksize)))); + if (! yyptr) + YYNOMEM; + YYSTACK_RELOCATE (yyss_alloc, yyss); + YYSTACK_RELOCATE (yyvs_alloc, yyvs); +# undef YYSTACK_RELOCATE + if (yyss1 != yyssa) + YYSTACK_FREE (yyss1); + } +# endif + + yyssp = yyss + yysize - 1; + yyvsp = yyvs + yysize - 1; + + YY_IGNORE_USELESS_CAST_BEGIN + YYDPRINTF ((stderr, "Stack size increased to %ld\n", + YY_CAST (long, yystacksize))); + YY_IGNORE_USELESS_CAST_END + + if (yyss + yystacksize - 1 <= yyssp) + YYABORT; + } +#endif /* !defined yyoverflow && !defined YYSTACK_RELOCATE */ + + + if (yystate == YYFINAL) + YYACCEPT; + + goto yybackup; + + +/*-----------. +| yybackup. | +`-----------*/ +yybackup: + /* Do appropriate processing given the current state. Read a + lookahead token if we need one and don't already have one. */ + + /* First try to decide what to do without reference to lookahead token. */ + yyn = yypact[yystate]; + if (yypact_value_is_default (yyn)) + goto yydefault; + + /* Not known => get a lookahead token if don't already have one. */ + + /* YYCHAR is either empty, or end-of-input, or a valid lookahead. */ + if (yychar == YYEMPTY) + { + YYDPRINTF ((stderr, "Reading a token\n")); + yychar = yylex (&yylval, pc); + } + + if (yychar <= YYEOF) + { + yychar = YYEOF; + yytoken = YYSYMBOL_YYEOF; + YYDPRINTF ((stderr, "Now at end of input.\n")); + } + else if (yychar == YYerror) + { + /* The scanner already issued an error message, process directly + to error recovery. But do not keep the error token as + lookahead, it is too special and may lead us to an endless + loop in error recovery. */ + yychar = YYUNDEF; + yytoken = YYSYMBOL_YYerror; + goto yyerrlab1; + } + else + { + yytoken = YYTRANSLATE (yychar); + YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc); + } + + /* If the proper action on seeing token YYTOKEN is to reduce or to + detect an error, take that action. */ + yyn += yytoken; + if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) + goto yydefault; + yyn = yytable[yyn]; + if (yyn <= 0) + { + if (yytable_value_is_error (yyn)) + goto yyerrlab; + yyn = -yyn; + goto yyreduce; + } + + /* Count tokens shifted since error; after three, turn off error + status. */ + if (yyerrstatus) + yyerrstatus--; + + /* Shift the lookahead token. */ + YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc); + yystate = yyn; + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + *++yyvsp = yylval; + YY_IGNORE_MAYBE_UNINITIALIZED_END + + /* Discard the shifted token. */ + yychar = YYEMPTY; + goto yynewstate; + + +/*-----------------------------------------------------------. +| yydefault -- do the default action for the current state. | +`-----------------------------------------------------------*/ +yydefault: + yyn = yydefact[yystate]; + if (yyn == 0) + goto yyerrlab; + goto yyreduce; + + +/*-----------------------------. +| yyreduce -- do a reduction. | +`-----------------------------*/ +yyreduce: + /* yyn is the number of a rule to reduce with. */ + yylen = yyr2[yyn]; + + /* If YYLEN is nonzero, implement the default value of the action: + '$$ = $1'. + + Otherwise, the following line sets YYVAL to garbage. + This behavior is undocumented and Bison + users should not rely upon it. Assigning to YYVAL + unconditionally makes the parser a bit smaller, and it avoids a + GCC warning that YYVAL may be used uninitialized. */ + yyval = yyvsp[1-yylen]; + + + YY_REDUCE_PRINT (yyn); + switch (yyn) + { + case 4: /* timespec: '@' seconds */ +#line 326 "sys-utils/hwclock-parse-date.y" + { + pc->seconds = (yyvsp[0].timespec); + pc->timespec_seen = 1; + } +#line 1579 "sys-utils/hwclock-parse-date.c" + break; + + case 7: /* item: datetime */ +#line 338 "sys-utils/hwclock-parse-date.y" + { + pc->times_seen++; pc->dates_seen++; + } +#line 1587 "sys-utils/hwclock-parse-date.c" + break; + + case 8: /* item: time */ +#line 341 "sys-utils/hwclock-parse-date.y" + { + pc->times_seen++; + } +#line 1595 "sys-utils/hwclock-parse-date.c" + break; + + case 9: /* item: local_zone */ +#line 344 "sys-utils/hwclock-parse-date.y" + { + pc->local_zones_seen++; + } +#line 1603 "sys-utils/hwclock-parse-date.c" + break; + + case 10: /* item: zone */ +#line 347 "sys-utils/hwclock-parse-date.y" + { + pc->zones_seen++; + } +#line 1611 "sys-utils/hwclock-parse-date.c" + break; + + case 11: /* item: date */ +#line 350 "sys-utils/hwclock-parse-date.y" + { + pc->dates_seen++; + } +#line 1619 "sys-utils/hwclock-parse-date.c" + break; + + case 12: /* item: day */ +#line 353 "sys-utils/hwclock-parse-date.y" + { + pc->days_seen++; + } +#line 1627 "sys-utils/hwclock-parse-date.c" + break; + + case 18: /* time: tUNUMBER tMERIDIAN */ +#line 370 "sys-utils/hwclock-parse-date.y" + { + set_hhmmss (pc, (yyvsp[-1].textintval).value, 0, 0, 0); + pc->meridian = (yyvsp[0].intval); + } +#line 1636 "sys-utils/hwclock-parse-date.c" + break; + + case 19: /* time: tUNUMBER ':' tUNUMBER tMERIDIAN */ +#line 374 "sys-utils/hwclock-parse-date.y" + { + set_hhmmss (pc, (yyvsp[-3].textintval).value, (yyvsp[-1].textintval).value, 0, 0); + pc->meridian = (yyvsp[0].intval); + } +#line 1645 "sys-utils/hwclock-parse-date.c" + break; + + case 20: /* time: tUNUMBER ':' tUNUMBER ':' unsigned_seconds tMERIDIAN */ +#line 378 "sys-utils/hwclock-parse-date.y" + { + set_hhmmss (pc, (yyvsp[-5].textintval).value, (yyvsp[-3].textintval).value, (yyvsp[-1].timespec).tv_sec, (yyvsp[-1].timespec).tv_nsec); + pc->meridian = (yyvsp[0].intval); + } +#line 1654 "sys-utils/hwclock-parse-date.c" + break; + + case 22: /* iso_8601_time: tUNUMBER zone_offset */ +#line 386 "sys-utils/hwclock-parse-date.y" + { + set_hhmmss (pc, (yyvsp[-1].textintval).value, 0, 0, 0); + pc->meridian = MER24; + } +#line 1663 "sys-utils/hwclock-parse-date.c" + break; + + case 23: /* iso_8601_time: tUNUMBER ':' tUNUMBER o_zone_offset */ +#line 390 "sys-utils/hwclock-parse-date.y" + { + set_hhmmss (pc, (yyvsp[-3].textintval).value, (yyvsp[-1].textintval).value, 0, 0); + pc->meridian = MER24; + } +#line 1672 "sys-utils/hwclock-parse-date.c" + break; + + case 24: /* iso_8601_time: tUNUMBER ':' tUNUMBER ':' unsigned_seconds o_zone_offset */ +#line 394 "sys-utils/hwclock-parse-date.y" + { + set_hhmmss (pc, (yyvsp[-5].textintval).value, (yyvsp[-3].textintval).value, (yyvsp[-1].timespec).tv_sec, (yyvsp[-1].timespec).tv_nsec); + pc->meridian = MER24; + } +#line 1681 "sys-utils/hwclock-parse-date.c" + break; + + case 27: /* zone_offset: tSNUMBER o_colon_minutes */ +#line 406 "sys-utils/hwclock-parse-date.y" + { + pc->zones_seen++; + if (! time_zone_hhmm (pc, (yyvsp[-1].textintval), (yyvsp[0].textintval))) YYABORT; + } +#line 1690 "sys-utils/hwclock-parse-date.c" + break; + + case 28: /* local_zone: tLOCAL_ZONE */ +#line 431 "sys-utils/hwclock-parse-date.y" + { + pc->local_isdst = (yyvsp[0].intval); + pc->dsts_seen += (0 < (yyvsp[0].intval)); + } +#line 1699 "sys-utils/hwclock-parse-date.c" + break; + + case 29: /* local_zone: tLOCAL_ZONE tDST */ +#line 435 "sys-utils/hwclock-parse-date.y" + { + pc->local_isdst = 1; + pc->dsts_seen += (0 < (yyvsp[-1].intval)) + 1; + } +#line 1708 "sys-utils/hwclock-parse-date.c" + break; + + case 30: /* zone: tZONE */ +#line 446 "sys-utils/hwclock-parse-date.y" + { + pc->time_zone = (yyvsp[0].intval); + } +#line 1716 "sys-utils/hwclock-parse-date.c" + break; + + case 31: /* zone: 'T' */ +#line 449 "sys-utils/hwclock-parse-date.y" + { + pc->time_zone = HOUR(7); + } +#line 1724 "sys-utils/hwclock-parse-date.c" + break; + + case 32: /* zone: tZONE relunit_snumber */ +#line 452 "sys-utils/hwclock-parse-date.y" + { + pc->time_zone = (yyvsp[-1].intval); + apply_relative_time (pc, (yyvsp[0].rel), 1); + } +#line 1733 "sys-utils/hwclock-parse-date.c" + break; + + case 33: /* zone: 'T' relunit_snumber */ +#line 456 "sys-utils/hwclock-parse-date.y" + { + pc->time_zone = HOUR(7); + apply_relative_time (pc, (yyvsp[0].rel), 1); + } +#line 1742 "sys-utils/hwclock-parse-date.c" + break; + + case 34: /* zone: tZONE tSNUMBER o_colon_minutes */ +#line 460 "sys-utils/hwclock-parse-date.y" + { + if (! time_zone_hhmm (pc, (yyvsp[-1].textintval), (yyvsp[0].textintval))) YYABORT; + pc->time_zone += (yyvsp[-2].intval); + } +#line 1751 "sys-utils/hwclock-parse-date.c" + break; + + case 35: /* zone: tDAYZONE */ +#line 464 "sys-utils/hwclock-parse-date.y" + { + pc->time_zone = (yyvsp[0].intval) + 60; + } +#line 1759 "sys-utils/hwclock-parse-date.c" + break; + + case 36: /* zone: tZONE tDST */ +#line 467 "sys-utils/hwclock-parse-date.y" + { + pc->time_zone = (yyvsp[-1].intval) + 60; + } +#line 1767 "sys-utils/hwclock-parse-date.c" + break; + + case 37: /* day: tDAY */ +#line 473 "sys-utils/hwclock-parse-date.y" + { + pc->day_ordinal = 0; + pc->day_number = (yyvsp[0].intval); + } +#line 1776 "sys-utils/hwclock-parse-date.c" + break; + + case 38: /* day: tDAY ',' */ +#line 477 "sys-utils/hwclock-parse-date.y" + { + pc->day_ordinal = 0; + pc->day_number = (yyvsp[-1].intval); + } +#line 1785 "sys-utils/hwclock-parse-date.c" + break; + + case 39: /* day: tORDINAL tDAY */ +#line 481 "sys-utils/hwclock-parse-date.y" + { + pc->day_ordinal = (yyvsp[-1].intval); + pc->day_number = (yyvsp[0].intval); + } +#line 1794 "sys-utils/hwclock-parse-date.c" + break; + + case 40: /* day: tUNUMBER tDAY */ +#line 485 "sys-utils/hwclock-parse-date.y" + { + pc->day_ordinal = (yyvsp[-1].textintval).value; + pc->day_number = (yyvsp[0].intval); + } +#line 1803 "sys-utils/hwclock-parse-date.c" + break; + + case 41: /* date: tUNUMBER '/' tUNUMBER */ +#line 492 "sys-utils/hwclock-parse-date.y" + { + pc->month = (yyvsp[-2].textintval).value; + pc->day = (yyvsp[0].textintval).value; + } +#line 1812 "sys-utils/hwclock-parse-date.c" + break; + + case 42: /* date: tUNUMBER '/' tUNUMBER '/' tUNUMBER */ +#line 496 "sys-utils/hwclock-parse-date.y" + { + /** + * Interpret as YYYY/MM/DD if the first value has 4 or more digits, + * otherwise as MM/DD/YY. + * The goal in recognizing YYYY/MM/DD is solely to support legacy + * machine-generated dates like those in an RCS log listing. If + * you want portability, use the ISO 8601 format. + */ + if (4 <= (yyvsp[-4].textintval).digits) { + pc->year = (yyvsp[-4].textintval); + pc->month = (yyvsp[-2].textintval).value; + pc->day = (yyvsp[0].textintval).value; + } else { + pc->month = (yyvsp[-4].textintval).value; + pc->day = (yyvsp[-2].textintval).value; + pc->year = (yyvsp[0].textintval); + } + } +#line 1835 "sys-utils/hwclock-parse-date.c" + break; + + case 43: /* date: tUNUMBER tMONTH tSNUMBER */ +#line 514 "sys-utils/hwclock-parse-date.y" + { + /* e.g. 17-JUN-1992. */ + pc->day = (yyvsp[-2].textintval).value; + pc->month = (yyvsp[-1].intval); + pc->year.value = -(yyvsp[0].textintval).value; + pc->year.digits = (yyvsp[0].textintval).digits; + } +#line 1847 "sys-utils/hwclock-parse-date.c" + break; + + case 44: /* date: tMONTH tSNUMBER tSNUMBER */ +#line 521 "sys-utils/hwclock-parse-date.y" + { + /* e.g. JUN-17-1992. */ + pc->month = (yyvsp[-2].intval); + pc->day = -(yyvsp[-1].textintval).value; + pc->year.value = -(yyvsp[0].textintval).value; + pc->year.digits = (yyvsp[0].textintval).digits; + } +#line 1859 "sys-utils/hwclock-parse-date.c" + break; + + case 45: /* date: tMONTH tUNUMBER */ +#line 528 "sys-utils/hwclock-parse-date.y" + { + pc->month = (yyvsp[-1].intval); + pc->day = (yyvsp[0].textintval).value; + } +#line 1868 "sys-utils/hwclock-parse-date.c" + break; + + case 46: /* date: tMONTH tUNUMBER ',' tUNUMBER */ +#line 532 "sys-utils/hwclock-parse-date.y" + { + pc->month = (yyvsp[-3].intval); + pc->day = (yyvsp[-2].textintval).value; + pc->year = (yyvsp[0].textintval); + } +#line 1878 "sys-utils/hwclock-parse-date.c" + break; + + case 47: /* date: tUNUMBER tMONTH */ +#line 537 "sys-utils/hwclock-parse-date.y" + { + pc->day = (yyvsp[-1].textintval).value; + pc->month = (yyvsp[0].intval); + } +#line 1887 "sys-utils/hwclock-parse-date.c" + break; + + case 48: /* date: tUNUMBER tMONTH tUNUMBER */ +#line 541 "sys-utils/hwclock-parse-date.y" + { + pc->day = (yyvsp[-2].textintval).value; + pc->month = (yyvsp[-1].intval); + pc->year = (yyvsp[0].textintval); + } +#line 1897 "sys-utils/hwclock-parse-date.c" + break; + + case 50: /* iso_8601_date: tUNUMBER tSNUMBER tSNUMBER */ +#line 550 "sys-utils/hwclock-parse-date.y" + { + /* ISO 8601 format.YYYY-MM-DD. */ + pc->year = (yyvsp[-2].textintval); + pc->month = -(yyvsp[-1].textintval).value; + pc->day = -(yyvsp[0].textintval).value; + } +#line 1908 "sys-utils/hwclock-parse-date.c" + break; + + case 51: /* rel: relunit tAGO */ +#line 560 "sys-utils/hwclock-parse-date.y" + { apply_relative_time (pc, (yyvsp[-1].rel), (yyvsp[0].intval)); } +#line 1914 "sys-utils/hwclock-parse-date.c" + break; + + case 52: /* rel: relunit */ +#line 562 "sys-utils/hwclock-parse-date.y" + { apply_relative_time (pc, (yyvsp[0].rel), 1); } +#line 1920 "sys-utils/hwclock-parse-date.c" + break; + + case 53: /* rel: dayshift */ +#line 564 "sys-utils/hwclock-parse-date.y" + { apply_relative_time (pc, (yyvsp[0].rel), 1); } +#line 1926 "sys-utils/hwclock-parse-date.c" + break; + + case 54: /* relunit: tORDINAL tYEAR_UNIT */ +#line 569 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).year = (yyvsp[-1].intval); } +#line 1932 "sys-utils/hwclock-parse-date.c" + break; + + case 55: /* relunit: tUNUMBER tYEAR_UNIT */ +#line 571 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).year = (yyvsp[-1].textintval).value; } +#line 1938 "sys-utils/hwclock-parse-date.c" + break; + + case 56: /* relunit: tYEAR_UNIT */ +#line 573 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).year = 1; } +#line 1944 "sys-utils/hwclock-parse-date.c" + break; + + case 57: /* relunit: tORDINAL tMONTH_UNIT */ +#line 575 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).month = (yyvsp[-1].intval); } +#line 1950 "sys-utils/hwclock-parse-date.c" + break; + + case 58: /* relunit: tUNUMBER tMONTH_UNIT */ +#line 577 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).month = (yyvsp[-1].textintval).value; } +#line 1956 "sys-utils/hwclock-parse-date.c" + break; + + case 59: /* relunit: tMONTH_UNIT */ +#line 579 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).month = 1; } +#line 1962 "sys-utils/hwclock-parse-date.c" + break; + + case 60: /* relunit: tORDINAL tDAY_UNIT */ +#line 581 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).day = (yyvsp[-1].intval) * (yyvsp[0].intval); } +#line 1968 "sys-utils/hwclock-parse-date.c" + break; + + case 61: /* relunit: tUNUMBER tDAY_UNIT */ +#line 583 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).day = (yyvsp[-1].textintval).value * (yyvsp[0].intval); } +#line 1974 "sys-utils/hwclock-parse-date.c" + break; + + case 62: /* relunit: tDAY_UNIT */ +#line 585 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).day = (yyvsp[0].intval); } +#line 1980 "sys-utils/hwclock-parse-date.c" + break; + + case 63: /* relunit: tORDINAL tHOUR_UNIT */ +#line 587 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).hour = (yyvsp[-1].intval); } +#line 1986 "sys-utils/hwclock-parse-date.c" + break; + + case 64: /* relunit: tUNUMBER tHOUR_UNIT */ +#line 589 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).hour = (yyvsp[-1].textintval).value; } +#line 1992 "sys-utils/hwclock-parse-date.c" + break; + + case 65: /* relunit: tHOUR_UNIT */ +#line 591 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).hour = 1; } +#line 1998 "sys-utils/hwclock-parse-date.c" + break; + + case 66: /* relunit: tORDINAL tMINUTE_UNIT */ +#line 593 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).minutes = (yyvsp[-1].intval); } +#line 2004 "sys-utils/hwclock-parse-date.c" + break; + + case 67: /* relunit: tUNUMBER tMINUTE_UNIT */ +#line 595 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).minutes = (yyvsp[-1].textintval).value; } +#line 2010 "sys-utils/hwclock-parse-date.c" + break; + + case 68: /* relunit: tMINUTE_UNIT */ +#line 597 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).minutes = 1; } +#line 2016 "sys-utils/hwclock-parse-date.c" + break; + + case 69: /* relunit: tORDINAL tSEC_UNIT */ +#line 599 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).seconds = (yyvsp[-1].intval); } +#line 2022 "sys-utils/hwclock-parse-date.c" + break; + + case 70: /* relunit: tUNUMBER tSEC_UNIT */ +#line 601 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).seconds = (yyvsp[-1].textintval).value; } +#line 2028 "sys-utils/hwclock-parse-date.c" + break; + + case 71: /* relunit: tSDECIMAL_NUMBER tSEC_UNIT */ +#line 602 "sys-utils/hwclock-parse-date.y" + { + (yyval.rel) = RELATIVE_TIME_0; + (yyval.rel).seconds = (yyvsp[-1].timespec).tv_sec; + (yyval.rel).ns = (yyvsp[-1].timespec).tv_nsec; + } +#line 2038 "sys-utils/hwclock-parse-date.c" + break; + + case 72: /* relunit: tUDECIMAL_NUMBER tSEC_UNIT */ +#line 607 "sys-utils/hwclock-parse-date.y" + { + (yyval.rel) = RELATIVE_TIME_0; + (yyval.rel).seconds = (yyvsp[-1].timespec).tv_sec; + (yyval.rel).ns = (yyvsp[-1].timespec).tv_nsec; + } +#line 2048 "sys-utils/hwclock-parse-date.c" + break; + + case 73: /* relunit: tSEC_UNIT */ +#line 613 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).seconds = 1; } +#line 2054 "sys-utils/hwclock-parse-date.c" + break; + + case 75: /* relunit_snumber: tSNUMBER tYEAR_UNIT */ +#line 619 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).year = (yyvsp[-1].textintval).value; } +#line 2060 "sys-utils/hwclock-parse-date.c" + break; + + case 76: /* relunit_snumber: tSNUMBER tMONTH_UNIT */ +#line 621 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).month = (yyvsp[-1].textintval).value; } +#line 2066 "sys-utils/hwclock-parse-date.c" + break; + + case 77: /* relunit_snumber: tSNUMBER tDAY_UNIT */ +#line 623 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).day = (yyvsp[-1].textintval).value * (yyvsp[0].intval); } +#line 2072 "sys-utils/hwclock-parse-date.c" + break; + + case 78: /* relunit_snumber: tSNUMBER tHOUR_UNIT */ +#line 625 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).hour = (yyvsp[-1].textintval).value; } +#line 2078 "sys-utils/hwclock-parse-date.c" + break; + + case 79: /* relunit_snumber: tSNUMBER tMINUTE_UNIT */ +#line 627 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).minutes = (yyvsp[-1].textintval).value; } +#line 2084 "sys-utils/hwclock-parse-date.c" + break; + + case 80: /* relunit_snumber: tSNUMBER tSEC_UNIT */ +#line 629 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).seconds = (yyvsp[-1].textintval).value; } +#line 2090 "sys-utils/hwclock-parse-date.c" + break; + + case 81: /* dayshift: tDAY_SHIFT */ +#line 634 "sys-utils/hwclock-parse-date.y" + { (yyval.rel) = RELATIVE_TIME_0; (yyval.rel).day = (yyvsp[0].intval); } +#line 2096 "sys-utils/hwclock-parse-date.c" + break; + + case 85: /* signed_seconds: tSNUMBER */ +#line 642 "sys-utils/hwclock-parse-date.y" + { (yyval.timespec).tv_sec = (yyvsp[0].textintval).value; (yyval.timespec).tv_nsec = 0; } +#line 2102 "sys-utils/hwclock-parse-date.c" + break; + + case 87: /* unsigned_seconds: tUNUMBER */ +#line 648 "sys-utils/hwclock-parse-date.y" + { (yyval.timespec).tv_sec = (yyvsp[0].textintval).value; (yyval.timespec).tv_nsec = 0; } +#line 2108 "sys-utils/hwclock-parse-date.c" + break; + + case 88: /* number: tUNUMBER */ +#line 653 "sys-utils/hwclock-parse-date.y" + { digits_to_date_time (pc, (yyvsp[0].textintval)); } +#line 2114 "sys-utils/hwclock-parse-date.c" + break; + + case 89: /* hybrid: tUNUMBER relunit_snumber */ +#line 657 "sys-utils/hwclock-parse-date.y" + { + /** + * Hybrid all-digit and relative offset, so that we accept e.g., + * "YYYYMMDD +N days" as well as "YYYYMMDD N days". + */ + digits_to_date_time (pc, (yyvsp[-1].textintval)); + apply_relative_time (pc, (yyvsp[0].rel), 1); + } +#line 2127 "sys-utils/hwclock-parse-date.c" + break; + + case 90: /* o_colon_minutes: %empty */ +#line 669 "sys-utils/hwclock-parse-date.y" + { (yyval.textintval).value = (yyval.textintval).digits = 0; } +#line 2133 "sys-utils/hwclock-parse-date.c" + break; + + case 91: /* o_colon_minutes: ':' tUNUMBER */ +#line 670 "sys-utils/hwclock-parse-date.y" + { + (yyval.textintval) = (yyvsp[0].textintval); + } +#line 2141 "sys-utils/hwclock-parse-date.c" + break; + + +#line 2145 "sys-utils/hwclock-parse-date.c" + + default: break; + } + /* User semantic actions sometimes alter yychar, and that requires + that yytoken be updated with the new translation. We take the + approach of translating immediately before every use of yytoken. + One alternative is translating here after every semantic action, + but that translation would be missed if the semantic action invokes + YYABORT, YYACCEPT, or YYERROR immediately after altering yychar or + if it invokes YYBACKUP. In the case of YYABORT or YYACCEPT, an + incorrect destructor might then be invoked immediately. In the + case of YYERROR or YYBACKUP, subsequent parser actions might lead + to an incorrect destructor call or verbose syntax error message + before the lookahead is translated. */ + YY_SYMBOL_PRINT ("-> $$ =", YY_CAST (yysymbol_kind_t, yyr1[yyn]), &yyval, &yyloc); + + YYPOPSTACK (yylen); + yylen = 0; + + *++yyvsp = yyval; + + /* Now 'shift' the result of the reduction. Determine what state + that goes to, based on the state we popped back to and the rule + number reduced by. */ + { + const int yylhs = yyr1[yyn] - YYNTOKENS; + const int yyi = yypgoto[yylhs] + *yyssp; + yystate = (0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *yyssp + ? yytable[yyi] + : yydefgoto[yylhs]); + } + + goto yynewstate; + + +/*--------------------------------------. +| yyerrlab -- here on detecting error. | +`--------------------------------------*/ +yyerrlab: + /* Make sure we have latest lookahead translation. See comments at + user semantic actions for why this is necessary. */ + yytoken = yychar == YYEMPTY ? YYSYMBOL_YYEMPTY : YYTRANSLATE (yychar); + /* If not already recovering from an error, report this error. */ + if (!yyerrstatus) + { + ++yynerrs; + yyerror (pc, YY_("syntax error")); + } + + if (yyerrstatus == 3) + { + /* If just tried and failed to reuse lookahead token after an + error, discard it. */ + + if (yychar <= YYEOF) + { + /* Return failure if at end of input. */ + if (yychar == YYEOF) + YYABORT; + } + else + { + yydestruct ("Error: discarding", + yytoken, &yylval, pc); + yychar = YYEMPTY; + } + } + + /* Else will try to reuse lookahead token after shifting the error + token. */ + goto yyerrlab1; + + +/*---------------------------------------------------. +| yyerrorlab -- error raised explicitly by YYERROR. | +`---------------------------------------------------*/ +yyerrorlab: + /* Pacify compilers when the user code never invokes YYERROR and the + label yyerrorlab therefore never appears in user code. */ + if (0) + YYERROR; + ++yynerrs; + + /* Do not reclaim the symbols of the rule whose action triggered + this YYERROR. */ + YYPOPSTACK (yylen); + yylen = 0; + YY_STACK_PRINT (yyss, yyssp); + yystate = *yyssp; + goto yyerrlab1; + + +/*-------------------------------------------------------------. +| yyerrlab1 -- common code for both syntax error and YYERROR. | +`-------------------------------------------------------------*/ +yyerrlab1: + yyerrstatus = 3; /* Each real token shifted decrements this. */ + + /* Pop stack until we find a state that shifts the error token. */ + for (;;) + { + yyn = yypact[yystate]; + if (!yypact_value_is_default (yyn)) + { + yyn += YYSYMBOL_YYerror; + if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYSYMBOL_YYerror) + { + yyn = yytable[yyn]; + if (0 < yyn) + break; + } + } + + /* Pop the current state because it cannot handle the error token. */ + if (yyssp == yyss) + YYABORT; + + + yydestruct ("Error: popping", + YY_ACCESSING_SYMBOL (yystate), yyvsp, pc); + YYPOPSTACK (1); + yystate = *yyssp; + YY_STACK_PRINT (yyss, yyssp); + } + + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + *++yyvsp = yylval; + YY_IGNORE_MAYBE_UNINITIALIZED_END + + + /* Shift the error token. */ + YY_SYMBOL_PRINT ("Shifting", YY_ACCESSING_SYMBOL (yyn), yyvsp, yylsp); + + yystate = yyn; + goto yynewstate; + + +/*-------------------------------------. +| yyacceptlab -- YYACCEPT comes here. | +`-------------------------------------*/ +yyacceptlab: + yyresult = 0; + goto yyreturnlab; + + +/*-----------------------------------. +| yyabortlab -- YYABORT comes here. | +`-----------------------------------*/ +yyabortlab: + yyresult = 1; + goto yyreturnlab; + + +/*-----------------------------------------------------------. +| yyexhaustedlab -- YYNOMEM (memory exhaustion) comes here. | +`-----------------------------------------------------------*/ +yyexhaustedlab: + yyerror (pc, YY_("memory exhausted")); + yyresult = 2; + goto yyreturnlab; + + +/*----------------------------------------------------------. +| yyreturnlab -- parsing is finished, clean up and return. | +`----------------------------------------------------------*/ +yyreturnlab: + if (yychar != YYEMPTY) + { + /* Make sure we have latest lookahead translation. See comments at + user semantic actions for why this is necessary. */ + yytoken = YYTRANSLATE (yychar); + yydestruct ("Cleanup: discarding lookahead", + yytoken, &yylval, pc); + } + /* Do not reclaim the symbols of the rule whose action triggered + this YYABORT or YYACCEPT. */ + YYPOPSTACK (yylen); + YY_STACK_PRINT (yyss, yyssp); + while (yyssp != yyss) + { + yydestruct ("Cleanup: popping", + YY_ACCESSING_SYMBOL (+*yyssp), yyvsp, pc); + YYPOPSTACK (1); + } +#ifndef yyoverflow + if (yyss != yyssa) + YYSTACK_FREE (yyss); +#endif + + return yyresult; +} + +#line 675 "sys-utils/hwclock-parse-date.y" + + +static table const meridian_table[] = { + { "AM", tMERIDIAN, MERam }, + { "A.M.", tMERIDIAN, MERam }, + { "PM", tMERIDIAN, MERpm }, + { "P.M.", tMERIDIAN, MERpm }, + { NULL, 0, 0 } +}; + +static table const dst_table[] = { + { "DST", tDST, 0 } +}; + +static table const month_and_day_table[] = { + { "JANUARY", tMONTH, 1 }, + { "FEBRUARY", tMONTH, 2 }, + { "MARCH", tMONTH, 3 }, + { "APRIL", tMONTH, 4 }, + { "MAY", tMONTH, 5 }, + { "JUNE", tMONTH, 6 }, + { "JULY", tMONTH, 7 }, + { "AUGUST", tMONTH, 8 }, + { "SEPTEMBER",tMONTH, 9 }, + { "SEPT", tMONTH, 9 }, + { "OCTOBER", tMONTH, 10 }, + { "NOVEMBER", tMONTH, 11 }, + { "DECEMBER", tMONTH, 12 }, + { "SUNDAY", tDAY, 0 }, + { "MONDAY", tDAY, 1 }, + { "TUESDAY", tDAY, 2 }, + { "TUES", tDAY, 2 }, + { "WEDNESDAY",tDAY, 3 }, + { "WEDNES", tDAY, 3 }, + { "THURSDAY", tDAY, 4 }, + { "THUR", tDAY, 4 }, + { "THURS", tDAY, 4 }, + { "FRIDAY", tDAY, 5 }, + { "SATURDAY", tDAY, 6 }, + { NULL, 0, 0 } +}; + +static table const time_units_table[] = { + { "YEAR", tYEAR_UNIT, 1 }, + { "MONTH", tMONTH_UNIT, 1 }, + { "FORTNIGHT",tDAY_UNIT, 14 }, + { "WEEK", tDAY_UNIT, 7 }, + { "DAY", tDAY_UNIT, 1 }, + { "HOUR", tHOUR_UNIT, 1 }, + { "MINUTE", tMINUTE_UNIT, 1 }, + { "MIN", tMINUTE_UNIT, 1 }, + { "SECOND", tSEC_UNIT, 1 }, + { "SEC", tSEC_UNIT, 1 }, + { NULL, 0, 0 } +}; + +/* Assorted relative-time words. */ +static table const relative_time_table[] = { + { "TOMORROW", tDAY_SHIFT, 1 }, + { "YESTERDAY",tDAY_SHIFT, -1 }, + { "TODAY", tDAY_SHIFT, 0 }, + { "NOW", tDAY_SHIFT, 0 }, + { "LAST", tORDINAL, -1 }, + { "THIS", tORDINAL, 0 }, + { "NEXT", tORDINAL, 1 }, + { "FIRST", tORDINAL, 1 }, + /*{ "SECOND", tORDINAL, 2 }, */ + { "THIRD", tORDINAL, 3 }, + { "FOURTH", tORDINAL, 4 }, + { "FIFTH", tORDINAL, 5 }, + { "SIXTH", tORDINAL, 6 }, + { "SEVENTH", tORDINAL, 7 }, + { "EIGHTH", tORDINAL, 8 }, + { "NINTH", tORDINAL, 9 }, + { "TENTH", tORDINAL, 10 }, + { "ELEVENTH", tORDINAL, 11 }, + { "TWELFTH", tORDINAL, 12 }, + { "AGO", tAGO, -1 }, + { "HENCE", tAGO, 1 }, + { NULL, 0, 0 } +}; + +/** + * The universal time zone table. These labels can be used even for + * timestamps that would not otherwise be valid, e.g., GMT timestamps + * in London during summer. + */ +static table const universal_time_zone_table[] = { + { "GMT", tZONE, HOUR ( 0) }, /* Greenwich Mean */ + { "UT", tZONE, HOUR ( 0) }, /* Universal (Coordinated) */ + { "UTC", tZONE, HOUR ( 0) }, + { NULL, 0, 0 } +}; + +/** + * The time zone table. This table is necessarily incomplete, as time + * zone abbreviations are ambiguous; e.g. Australians interpret "EST" + * as Eastern time in Australia, not as US Eastern Standard Time. + * You cannot rely on parse_date to handle arbitrary time zone + * abbreviations; use numeric abbreviations like "-0500" instead. + */ +static table const time_zone_table[] = { + { "WET", tZONE, HOUR ( 0) }, /* Western European */ + { "WEST", tDAYZONE, HOUR ( 0) }, /* Western European Summer */ + { "BST", tDAYZONE, HOUR ( 0) }, /* British Summer */ + { "ART", tZONE, -HOUR ( 3) }, /* Argentina */ + { "BRT", tZONE, -HOUR ( 3) }, /* Brazil */ + { "BRST", tDAYZONE, -HOUR ( 3) }, /* Brazil Summer */ + { "NST", tZONE, -(HOUR ( 3) + 30) }, /* Newfoundland Standard */ + { "NDT", tDAYZONE,-(HOUR ( 3) + 30) }, /* Newfoundland Daylight */ + { "AST", tZONE, -HOUR ( 4) }, /* Atlantic Standard */ + { "ADT", tDAYZONE, -HOUR ( 4) }, /* Atlantic Daylight */ + { "CLT", tZONE, -HOUR ( 4) }, /* Chile */ + { "CLST", tDAYZONE, -HOUR ( 4) }, /* Chile Summer */ + { "EST", tZONE, -HOUR ( 5) }, /* Eastern Standard */ + { "EDT", tDAYZONE, -HOUR ( 5) }, /* Eastern Daylight */ + { "CST", tZONE, -HOUR ( 6) }, /* Central Standard */ + { "CDT", tDAYZONE, -HOUR ( 6) }, /* Central Daylight */ + { "MST", tZONE, -HOUR ( 7) }, /* Mountain Standard */ + { "MDT", tDAYZONE, -HOUR ( 7) }, /* Mountain Daylight */ + { "PST", tZONE, -HOUR ( 8) }, /* Pacific Standard */ + { "PDT", tDAYZONE, -HOUR ( 8) }, /* Pacific Daylight */ + { "AKST", tZONE, -HOUR ( 9) }, /* Alaska Standard */ + { "AKDT", tDAYZONE, -HOUR ( 9) }, /* Alaska Daylight */ + { "HST", tZONE, -HOUR (10) }, /* Hawaii Standard */ + { "HAST", tZONE, -HOUR (10) }, /* Hawaii-Aleutian Standard */ + { "HADT", tDAYZONE, -HOUR (10) }, /* Hawaii-Aleutian Daylight */ + { "SST", tZONE, -HOUR (12) }, /* Samoa Standard */ + { "WAT", tZONE, HOUR ( 1) }, /* West Africa */ + { "CET", tZONE, HOUR ( 1) }, /* Central European */ + { "CEST", tDAYZONE, HOUR ( 1) }, /* Central European Summer */ + { "MET", tZONE, HOUR ( 1) }, /* Middle European */ + { "MEZ", tZONE, HOUR ( 1) }, /* Middle European */ + { "MEST", tDAYZONE, HOUR ( 1) }, /* Middle European Summer */ + { "MESZ", tDAYZONE, HOUR ( 1) }, /* Middle European Summer */ + { "EET", tZONE, HOUR ( 2) }, /* Eastern European */ + { "EEST", tDAYZONE, HOUR ( 2) }, /* Eastern European Summer */ + { "CAT", tZONE, HOUR ( 2) }, /* Central Africa */ + { "SAST", tZONE, HOUR ( 2) }, /* South Africa Standard */ + { "EAT", tZONE, HOUR ( 3) }, /* East Africa */ + { "MSK", tZONE, HOUR ( 3) }, /* Moscow */ + { "MSD", tDAYZONE, HOUR ( 3) }, /* Moscow Daylight */ + { "IST", tZONE, (HOUR ( 5) + 30) }, /* India Standard */ + { "SGT", tZONE, HOUR ( 8) }, /* Singapore */ + { "KST", tZONE, HOUR ( 9) }, /* Korea Standard */ + { "JST", tZONE, HOUR ( 9) }, /* Japan Standard */ + { "GST", tZONE, HOUR (10) }, /* Guam Standard */ + { "NZST", tZONE, HOUR (12) }, /* New Zealand Standard */ + { "NZDT", tDAYZONE, HOUR (12) }, /* New Zealand Daylight */ + { NULL, 0, 0 } +}; + +/** + * Military time zone table. + * + * Note 'T' is a special case, as it is used as the separator in ISO + * 8601 date and time of day representation. + */ +static table const military_table[] = { + { "A", tZONE, -HOUR ( 1) }, + { "B", tZONE, -HOUR ( 2) }, + { "C", tZONE, -HOUR ( 3) }, + { "D", tZONE, -HOUR ( 4) }, + { "E", tZONE, -HOUR ( 5) }, + { "F", tZONE, -HOUR ( 6) }, + { "G", tZONE, -HOUR ( 7) }, + { "H", tZONE, -HOUR ( 8) }, + { "I", tZONE, -HOUR ( 9) }, + { "K", tZONE, -HOUR (10) }, + { "L", tZONE, -HOUR (11) }, + { "M", tZONE, -HOUR (12) }, + { "N", tZONE, HOUR ( 1) }, + { "O", tZONE, HOUR ( 2) }, + { "P", tZONE, HOUR ( 3) }, + { "Q", tZONE, HOUR ( 4) }, + { "R", tZONE, HOUR ( 5) }, + { "S", tZONE, HOUR ( 6) }, + { "T", 'T', 0 }, + { "U", tZONE, HOUR ( 8) }, + { "V", tZONE, HOUR ( 9) }, + { "W", tZONE, HOUR (10) }, + { "X", tZONE, HOUR (11) }, + { "Y", tZONE, HOUR (12) }, + { "Z", tZONE, HOUR ( 0) }, + { NULL, 0, 0 } +}; + +/** + * Convert a time offset expressed as HH:MM or HHMM into an integer count of + * minutes. If hh is more than 2 digits then it is of the form HHMM and must be + * delimited; in that case 'mm' is required to be absent. Otherwise, hh and mm + * are used ('mm' contains digits that were prefixed with a colon). + * + * POSIX TZ and ISO 8601 both define the maximum offset as 24:59. POSIX also + * allows seconds, but currently the parser rejects them. Both require minutes + * to be zero padded (2 digits). ISO requires hours to be zero padded, POSIX + * does not, either is accepted; which means an invalid ISO offset could pass. + */ + +static int time_zone_hhmm(parser_control *pc, textint hh, textint mm) +{ + int h, m; + + if (hh.digits > 2 && hh.digits < 5 && mm.digits == 0) { + h = hh.value / 100; + m = hh.value % 100; + } else if (hh.digits < 3 && (mm.digits == 0 || mm.digits == 2)) { + h = hh.value; + m = hh.negative ? -mm.value : mm.value; + } else + return 0; + + if (abs(h) > 24 || abs(m) > 59) + return 0; + + pc->time_zone = h * 60 + m; + return 1; +} + +static int to_hour(intmax_t hours, int meridian) +{ + switch (meridian) { + default: /* Pacify GCC. */ + case MER24: + return 0 <= hours && hours < 24 ? hours : -1; + case MERam: + return 0 < hours && hours < 12 ? hours : hours == 12 ? 0 : -1; + case MERpm: + return 0 < hours && hours < 12 ? hours + 12 : hours == 12 ? 12 : -1; + } +} + +static long int to_year(textint textyear) +{ + intmax_t year = textyear.value; + + if (year < 0) + year = -year; + + /** + * XPG4 suggests that years 00-68 map to 2000-2068, and + * years 69-99 map to 1969-1999. + */ + else if (textyear.digits == 2) + year += year < 69 ? 2000 : 1900; + + return year; +} + +static table const * lookup_zone(parser_control const *pc, char const *name) +{ + table const *tp; + + for (tp = universal_time_zone_table; tp->name; tp++) + if (strcmp (name, tp->name) == 0) + return tp; + + /** + * Try local zone abbreviations before those in time_zone_table, as + * the local ones are more likely to be right. + */ + for (tp = pc->local_time_zone_table; tp->name; tp++) + if (strcmp (name, tp->name) == 0) + return tp; + + for (tp = time_zone_table; tp->name; tp++) + if (strcmp (name, tp->name) == 0) + return tp; + + return NULL; +} + +#if ! HAVE_TM_GMTOFF +/** + * Yield the difference between *A and *B, + * measured in seconds, ignoring leap seconds. + * The body of this function is taken directly from the GNU C Library; + * see src/strftime.c. + */ +static int tm_diff(struct tm const *a, struct tm const *b) +{ + /** + * Compute intervening leap days correctly even if year is negative. + * Take care to avoid int overflow in leap day calculations. + */ + int a4 = SHR (a->tm_year, 2) + SHR (TM_YEAR_BASE, 2) - ! (a->tm_year & 3); + int b4 = SHR (b->tm_year, 2) + SHR (TM_YEAR_BASE, 2) - ! (b->tm_year & 3); + int a100 = a4 / 25 - (a4 % 25 < 0); + int b100 = b4 / 25 - (b4 % 25 < 0); + int a400 = SHR (a100, 2); + int b400 = SHR (b100, 2); + int intervening_leap_days = (a4 - b4) - (a100 - b100) + (a400 - b400); + int years = a->tm_year - b->tm_year; + int days = (365 * years + intervening_leap_days + + (a->tm_yday - b->tm_yday)); + return (60 * (60 * (24 * days + (a->tm_hour - b->tm_hour)) + + (a->tm_min - b->tm_min)) + + (a->tm_sec - b->tm_sec)); +} +#endif /* ! HAVE_TM_GMTOFF */ + +static table const * lookup_word(parser_control const *pc, char *word) +{ + char *p; + char *q; + size_t wordlen; + table const *tp; + int period_found; + int abbrev; + + /* Make it uppercase. */ + for (p = word; *p; p++) + *p = c_toupper (to_uchar (*p)); + + for (tp = meridian_table; tp->name; tp++) + if (strcmp (word, tp->name) == 0) + return tp; + + /* See if we have an abbreviation for a month. */ + wordlen = strlen (word); + abbrev = wordlen == 3 || (wordlen == 4 && word[3] == '.'); + + for (tp = month_and_day_table; tp->name; tp++) + if ((abbrev ? strncmp (word, tp->name, 3) : + strcmp (word, tp->name)) == 0) + return tp; + + if ((tp = lookup_zone (pc, word))) + return tp; + + if (strcmp (word, dst_table[0].name) == 0) + return dst_table; + + for (tp = time_units_table; tp->name; tp++) + if (strcmp (word, tp->name) == 0) + return tp; + + /* Strip off any plural and try the units table again. */ + if (word[wordlen - 1] == 'S') { + word[wordlen - 1] = '\0'; + for (tp = time_units_table; tp->name; tp++) + if (strcmp (word, tp->name) == 0) + return tp; + word[wordlen - 1] = 'S'; /* For "this" in relative_time_table. */ + } + + for (tp = relative_time_table; tp->name; tp++) + if (strcmp (word, tp->name) == 0) + return tp; + + /* Military time zones. */ + if (wordlen == 1) + for (tp = military_table; tp->name; tp++) + if (word[0] == tp->name[0]) + return tp; + + /* Drop out any periods and try the time zone table again. */ + for (period_found = 0, p = q = word; (*p = *q); q++) + if (*q == '.') + period_found = 1; + else + p++; + if (period_found && (tp = lookup_zone (pc, word))) + return tp; + + return NULL; +} + +static int yylex (union YYSTYPE *lvalp, parser_control *pc) +{ + unsigned char c; + size_t count; + + for (;;) { + while (c = *pc->input, c_isspace (c)) + pc->input++; + + if (c_isdigit (c) || c == '-' || c == '+') { + char const *p; + int sign; + uintmax_t value; + if (c == '-' || c == '+') { + sign = c == '-' ? -1 : 1; + while (c = *++pc->input, c_isspace (c)) + continue; + if (! c_isdigit (c)) + /* skip the '-' sign */ + continue; + } else + sign = 0; + p = pc->input; + for (value = 0; ; value *= 10) { + uintmax_t value1 = value + (c - '0'); + if (value1 < value) + return '?'; + value = value1; + c = *++p; + if (! c_isdigit (c)) + break; + if (UINTMAX_MAX / 10 < value) + return '?'; + } + if ((c == '.' || c == ',') && c_isdigit (p[1])) { + time_t s; + long ns; + int digits; + uintmax_t value1; + + /* Check for overflow when converting value to + * time_t. + */ + if (sign < 0) { + s = - value; + if (0 < s) + return '?'; + value1 = -s; + } else { + s = value; + if (s < 0) + return '?'; + value1 = s; + } + if (value != value1) + return '?'; + + /* Accumulate fraction, to ns precision. */ + p++; + ns = *p++ - '0'; + for (digits = 2; + digits <= LOG10_BILLION; digits++) { + ns *= 10; + if (c_isdigit (*p)) + ns += *p++ - '0'; + } + + /* Skip excess digits, truncating toward + * -Infinity. + */ + if (sign < 0) + for (; c_isdigit (*p); p++) + if (*p != '0') { + ns++; + break; + } + while (c_isdigit (*p)) + p++; + + /* Adjust to the timespec convention, which is + * that tv_nsec is always a positive offset even + * if tv_sec is negative. + */ + if (sign < 0 && ns) { + s--; + if (! (s < 0)) + return '?'; + ns = BILLION - ns; + } + + lvalp->timespec.tv_sec = s; + lvalp->timespec.tv_nsec = ns; + pc->input = p; + return + sign ? tSDECIMAL_NUMBER : tUDECIMAL_NUMBER; + } else { + lvalp->textintval.negative = sign < 0; + if (sign < 0) { + lvalp->textintval.value = - value; + if (0 < lvalp->textintval.value) + return '?'; + } else { + lvalp->textintval.value = value; + if (lvalp->textintval.value < 0) + return '?'; + } + lvalp->textintval.digits = p - pc->input; + pc->input = p; + return sign ? tSNUMBER : tUNUMBER; + } + } + + if (c_isalpha (c)) { + char buff[20]; + char *p = buff; + table const *tp; + + do { + if (p < buff + sizeof buff - 1) + *p++ = c; + c = *++pc->input; + } + while (c_isalpha (c) || c == '.'); + + *p = '\0'; + tp = lookup_word (pc, buff); + if (! tp) { + return '?'; + } + lvalp->intval = tp->value; + return tp->type; + } + + if (c != '(') + return to_uchar (*pc->input++); + + count = 0; + do { + c = *pc->input++; + if (c == '\0') + return c; + if (c == '(') + count++; + else if (c == ')') + count--; + } + while (count != 0); + } +} + +/* Do nothing if the parser reports an error. */ +static int yyerror(parser_control const *pc __attribute__((__unused__)), + char const *s __attribute__((__unused__))) +{ + return 0; +} + +/** + * If *TM0 is the old and *TM1 is the new value of a struct tm after + * passing it to mktime, return 1 if it's OK that mktime returned T. + * It's not OK if *TM0 has out-of-range members. + */ + +static int mktime_ok(struct tm const *tm0, struct tm const *tm1, time_t t) +{ + if (t == (time_t) -1) { + /** + * Guard against falsely reporting an error when parsing a + * timestamp that happens to equal (time_t) -1, on a host that + * supports such a timestamp. + */ + tm1 = localtime (&t); + if (!tm1) + return 0; + } + + return ! ((tm0->tm_sec ^ tm1->tm_sec) + | (tm0->tm_min ^ tm1->tm_min) + | (tm0->tm_hour ^ tm1->tm_hour) + | (tm0->tm_mday ^ tm1->tm_mday) + | (tm0->tm_mon ^ tm1->tm_mon) + | (tm0->tm_year ^ tm1->tm_year)); +} + +/** + * A reasonable upper bound for the size of ordinary TZ strings. + * Use heap allocation if TZ's length exceeds this. + */ +enum { TZBUFSIZE = 100 }; + +/** + * Return a copy of TZ, stored in TZBUF if it fits, and heap-allocated + * otherwise. + */ +static char * get_tz(char tzbuf[TZBUFSIZE]) +{ + char *tz = getenv ("TZ"); + if (tz) { + size_t tzsize = strlen (tz) + 1; + tz = (tzsize <= TZBUFSIZE + ? memcpy (tzbuf, tz, tzsize) + : strdup (tz)); + } + return tz; +} + +/** + * Parse a date/time string, storing the resulting time value into *result. + * The string itself is pointed to by *p. Return 1 if successful. + * *p can be an incomplete or relative time specification; if so, use + * *now as the basis for the returned time. + */ +int parse_date(struct timespec *result, char const *p, + struct timespec const *now) +{ + time_t Start; + intmax_t Start_ns; + struct tm const *tmp; + struct tm tm; + struct tm tm0; + parser_control pc; + struct timespec gettime_buffer; + unsigned char c; + int tz_was_altered = 0; + char *tz0 = NULL; + char tz0buf[TZBUFSIZE]; + int ok = 1; + struct timeval tv; + + if (! now) { + gettimeofday (&tv, NULL); + gettime_buffer.tv_sec = tv.tv_sec; + gettime_buffer.tv_nsec = tv.tv_usec * 1000; + now = &gettime_buffer; + } + + Start = now->tv_sec; + Start_ns = now->tv_nsec; + + tmp = localtime (&now->tv_sec); + if (! tmp) + return 0; + + while (c = *p, c_isspace (c)) + p++; + + if (strncmp (p, "TZ=\"", 4) == 0) { + char const *tzbase = p + 4; + size_t tzsize = 1; + char const *s; + + for (s = tzbase; *s; s++, tzsize++) + if (*s == '\\') { + s++; + if (! (*s == '\\' || *s == '"')) + break; + } else if (*s == '"') { + char *z; + char *tz1 = NULL; + char tz1buf[TZBUFSIZE] = { '\0' }; + int large_tz = TZBUFSIZE < tzsize; + int setenv_ok; + + tz0 = get_tz (tz0buf); + if (!tz0) + goto fail; + + if (large_tz) { + z = tz1 = malloc (tzsize); + if (!tz1) + goto fail; + } else + z = tz1 = tz1buf; + + for (s = tzbase; *s != '"'; s++) + *z++ = *(s += *s == '\\'); + *z = '\0'; + setenv_ok = setenv ("TZ", tz1, 1) == 0; + if (large_tz) + free (tz1); + if (!setenv_ok) + goto fail; + tz_was_altered = 1; + + p = s + 1; + while (c = *p, c_isspace (c)) + p++; + + break; + } + } + + /** + * As documented, be careful to treat the empty string just like + * a date string of "0". Without this, an empty string would be + * declared invalid when parsed during a DST transition. + */ + if (*p == '\0') + p = "0"; + + pc.input = p; + pc.year.value = tmp->tm_year; + pc.year.value += TM_YEAR_BASE; + pc.year.digits = 0; + pc.month = tmp->tm_mon + 1; + pc.day = tmp->tm_mday; + pc.hour = tmp->tm_hour; + pc.minutes = tmp->tm_min; + pc.seconds.tv_sec = tmp->tm_sec; + pc.seconds.tv_nsec = Start_ns; + tm.tm_isdst = tmp->tm_isdst; + + pc.meridian = MER24; + pc.rel = RELATIVE_TIME_0; + pc.timespec_seen = 0; + pc.rels_seen = 0; + pc.dates_seen = 0; + pc.days_seen = 0; + pc.times_seen = 0; + pc.local_zones_seen = 0; + pc.dsts_seen = 0; + pc.zones_seen = 0; + +#if HAVE_STRUCT_TM_TM_ZONE + pc.local_time_zone_table[0].name = tmp->tm_zone; + pc.local_time_zone_table[0].type = tLOCAL_ZONE; + pc.local_time_zone_table[0].value = tmp->tm_isdst; + pc.local_time_zone_table[1].name = NULL; + + /** + * Probe the names used in the next three calendar quarters, looking + * for a tm_isdst different from the one we already have. + */ + { + int quarter; + for (quarter = 1; quarter <= 3; quarter++) { + time_t probe = Start + quarter * (90 * 24 * 60 * 60); + struct tm const *probe_tm = localtime (&probe); + if (probe_tm && probe_tm->tm_zone + && probe_tm->tm_isdst + != pc.local_time_zone_table[0].value) { + { + pc.local_time_zone_table[1].name + = probe_tm->tm_zone; + pc.local_time_zone_table[1].type + = tLOCAL_ZONE; + pc.local_time_zone_table[1].value + = probe_tm->tm_isdst; + pc.local_time_zone_table[2].name + = NULL; + } + break; + } + } + } +#else +#if HAVE_TZNAME + { +# if !HAVE_DECL_TZNAME + extern char *tzname[]; +# endif + int i; + for (i = 0; i < 2; i++) { + pc.local_time_zone_table[i].name = tzname[i]; + pc.local_time_zone_table[i].type = tLOCAL_ZONE; + pc.local_time_zone_table[i].value = i; + } + pc.local_time_zone_table[i].name = NULL; + } +#else + pc.local_time_zone_table[0].name = NULL; +#endif +#endif + + if (pc.local_time_zone_table[0].name && pc.local_time_zone_table[1].name + && ! strcmp (pc.local_time_zone_table[0].name, + pc.local_time_zone_table[1].name)) { + /** + * This locale uses the same abbreviation for standard and + * daylight times. So if we see that abbreviation, we don't + * know whether it's daylight time. + */ + pc.local_time_zone_table[0].value = -1; + pc.local_time_zone_table[1].name = NULL; + } + + if (yyparse (&pc) != 0) { + goto fail; + } + + if (pc.timespec_seen) + *result = pc.seconds; + else { + if (1 < (pc.times_seen | pc.dates_seen | pc.days_seen + | pc.dsts_seen + | (pc.local_zones_seen + pc.zones_seen))) { + goto fail; + } + + tm.tm_year = to_year (pc.year) - TM_YEAR_BASE; + tm.tm_mon = pc.month - 1; + tm.tm_mday = pc.day; + if (pc.times_seen || (pc.rels_seen && + ! pc.dates_seen && ! pc.days_seen)) { + tm.tm_hour = to_hour (pc.hour, pc.meridian); + if (tm.tm_hour < 0) { + goto fail; + } + tm.tm_min = pc.minutes; + tm.tm_sec = pc.seconds.tv_sec; + } else { + tm.tm_hour = tm.tm_min = tm.tm_sec = 0; + pc.seconds.tv_nsec = 0; + } + + /** + * Let mktime deduce tm_isdst if we have an absolute timestamp. + */ + if (pc.dates_seen | pc.days_seen | pc.times_seen) + tm.tm_isdst = -1; + + /** + * But if the input explicitly specifies local time with or + * without DST, give mktime that information. + */ + if (pc.local_zones_seen) + tm.tm_isdst = pc.local_isdst; + + tm0 = tm; + + Start = mktime (&tm); + + if (! mktime_ok (&tm0, &tm, Start)) { + if (! pc.zones_seen) { + goto fail; + } else { + /** Guard against falsely reporting errors near + * the time_t boundaries when parsing times in + * other time zones. For example, suppose the + * input string "1969-12-31 23:00:00 -0100", the + * current time zone is 8 hours ahead of UTC, + * and the min time_t value is 1970-01-01 + * 00:00:00 UTC. Then the min localtime value + * is 1970-01-01 08:00:00, and mktime will + * therefore fail on 1969-12-31 23:00:00. To + * work around the problem, set the time zone to + * 1 hour behind UTC temporarily by setting + * TZ="XXX1:00" and try mktime again. + */ + + intmax_t time_zone = pc.time_zone; + + intmax_t abs_time_zone = time_zone < 0 + ? - time_zone : time_zone; + + intmax_t abs_time_zone_hour + = abs_time_zone / 60; + + int abs_time_zone_min = abs_time_zone % 60; + + char tz1buf[sizeof "XXX+0:00" + + sizeof pc.time_zone + * CHAR_BIT / 3]; + + if (!tz_was_altered) + tz0 = get_tz (tz0buf); + sprintf (tz1buf, "XXX%s%jd:%02d", + &"-"[time_zone < 0], + abs_time_zone_hour, + abs_time_zone_min); + if (setenv ("TZ", tz1buf, 1) != 0) { + goto fail; + } + tz_was_altered = 1; + tm = tm0; + Start = mktime (&tm); + if (! mktime_ok (&tm0, &tm, Start)) { + goto fail; + } + } + } + + if (pc.days_seen && ! pc.dates_seen) { + tm.tm_mday += ((pc.day_number - tm.tm_wday + 7) % 7 + 7 + * (pc.day_ordinal + - (0 < pc.day_ordinal + && tm.tm_wday != pc.day_number))); + tm.tm_isdst = -1; + Start = mktime (&tm); + if (Start == (time_t) -1) { + goto fail; + } + } + /* Add relative date. */ + if (pc.rel.year | pc.rel.month | pc.rel.day) { + int year = tm.tm_year + pc.rel.year; + int month = tm.tm_mon + pc.rel.month; + int day = tm.tm_mday + pc.rel.day; + if (((year < tm.tm_year) ^ (pc.rel.year < 0)) + | ((month < tm.tm_mon) ^ (pc.rel.month < 0)) + | ((day < tm.tm_mday) ^ (pc.rel.day < 0))) { + goto fail; + } + tm.tm_year = year; + tm.tm_mon = month; + tm.tm_mday = day; + tm.tm_hour = tm0.tm_hour; + tm.tm_min = tm0.tm_min; + tm.tm_sec = tm0.tm_sec; + tm.tm_isdst = tm0.tm_isdst; + Start = mktime (&tm); + if (Start == (time_t) -1) { + goto fail; + } + } + + /** + * The only "output" of this if-block is an updated Start value, + * so this block must follow others that clobber Start. + */ + if (pc.zones_seen) { + intmax_t delta = pc.time_zone * 60; + time_t t1; +#ifdef HAVE_TM_GMTOFF + delta -= tm.tm_gmtoff; +#else + time_t t = Start; + struct tm const *gmt = gmtime (&t); + if (! gmt) { + goto fail; + } + delta -= tm_diff (&tm, gmt); +#endif + t1 = Start - delta; + if ((Start < t1) != (delta < 0)) { + goto fail; /* time_t overflow */ + } + Start = t1; + } + + /** + * Add relative hours, minutes, and seconds. On hosts that + * support leap seconds, ignore the possibility of leap seconds; + * e.g., "+ 10 minutes" adds 600 seconds, even if one of them is + * a leap second. Typically this is not what the user wants, + * but it's too hard to do it the other way, because the time + * zone indicator must be applied before relative times, and if + * mktime is applied again the time zone will be lost. + */ + intmax_t sum_ns = pc.seconds.tv_nsec + pc.rel.ns; + intmax_t normalized_ns = (sum_ns % BILLION + BILLION) % BILLION; + time_t t0 = Start; + intmax_t d1 = 60 * 60 * pc.rel.hour; + time_t t1 = t0 + d1; + intmax_t d2 = 60 * pc.rel.minutes; + time_t t2 = t1 + d2; + time_t d3 = pc.rel.seconds; + time_t t3 = t2 + d3; + intmax_t d4 = (sum_ns - normalized_ns) / BILLION; + time_t t4 = t3 + d4; + time_t t5 = t4; + + if ((d1 / (60 * 60) ^ pc.rel.hour) + | (d2 / 60 ^ pc.rel.minutes) + | ((t1 < t0) ^ (d1 < 0)) + | ((t2 < t1) ^ (d2 < 0)) + | ((t3 < t2) ^ (d3 < 0)) + | ((t4 < t3) ^ (d4 < 0)) + | (t5 != t4)) { + goto fail; + } + result->tv_sec = t5; + result->tv_nsec = normalized_ns; + } + + goto done; + + fail: + ok = 0; + done: + if (tz_was_altered) + ok &= (tz0 ? setenv ("TZ", tz0, 1) + : unsetenv ("TZ")) == 0; + if (tz0 != tz0buf) + free (tz0); + return ok; +} diff --git a/sys-utils/hwclock-parse-date.y b/sys-utils/hwclock-parse-date.y new file mode 100644 index 0000000..f1740a2 --- /dev/null +++ b/sys-utils/hwclock-parse-date.y @@ -0,0 +1,1629 @@ +%{ +/** + * SPDX-License-Identifier: GPL-3.0-or-later + * + * Parse a string into an internal timestamp. + * + * This file is based on gnulib parse-datetime.y-dd7a871 with + * the other gnulib dependencies removed for use in util-linux. + * + * Copyright (C) 1999-2000, 2002-2017 Free Software Foundation, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Originally written by Steven M. Bellovin <smb@research.att.com> while + * at the University of North Carolina at Chapel Hill. Later tweaked by + * a couple of people on Usenet. Completely overhauled by Rich $alz + * <rsalz@bbn.com> and Jim Berets <jberets@bbn.com> in August, 1990. + * + * Modified by Paul Eggert <eggert@twinsun.com> in August 1999 to do + * the right thing about local DST. Also modified by Paul Eggert + * <eggert@cs.ucla.edu> in February 2004 to support + * nanosecond-resolution timestamps, and in October 2004 to support + * TZ strings in dates. + */ + +/** + * FIXME: Check for arithmetic overflow in all cases, not just + * some of them. + */ + +#include <sys/time.h> +#include <time.h> + +#include "c.h" +#include "timeutils.h" +#include "hwclock.h" + +/** + * There's no need to extend the stack, so there's no need to involve + * alloca. + */ +#define YYSTACK_USE_ALLOCA 0 + +/** + * Tell Bison how much stack space is needed. 20 should be plenty for + * this grammar, which is not right recursive. Beware setting it too + * high, since that might cause problems on machines whose + * implementations have lame stack-overflow checking. + */ +#define YYMAXDEPTH 20 +#define YYINITDEPTH YYMAXDEPTH + +/** + * Since the code of parse-datetime.y is not included in the Emacs executable + * itself, there is no need to #define static in this file. Even if + * the code were included in the Emacs executable, it probably + * wouldn't do any harm to #undef it here; this will only cause + * problems if we try to write to a static variable, which I don't + * think this code needs to do. + */ +#ifdef emacs +# undef static +#endif + +#include <inttypes.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + + +#include <stdarg.h> +#include "cctype.h" +#include "nls.h" + +/** + * Bison's skeleton tests _STDLIB_H, while some stdlib.h headers + * use _STDLIB_H_ as witness. Map the latter to the one bison uses. + * FIXME: this is temporary. Remove when we have a mechanism to ensure + * that the version we're using is fixed, too. + */ +#ifdef _STDLIB_H_ +# undef _STDLIB_H +# define _STDLIB_H 1 +#endif + +/** + * Shift A right by B bits portably, by dividing A by 2**B and + * truncating towards minus infinity. A and B should be free of side + * effects, and B should be in the range 0 <= B <= INT_BITS - 2, where + * INT_BITS is the number of useful bits in an int. GNU code can + * assume that INT_BITS is at least 32. + * + * ISO C99 says that A >> B is implementation-defined if A < 0. Some + * implementations (e.g., UNICOS 9.0 on a Cray Y-MP EL) don't shift + * right in the usual way when A < 0, so SHR falls back on division if + * ordinary A >> B doesn't seem to be the usual signed shift. + */ +#define SHR(a, b) \ + (-1 >> 1 == -1 \ + ? (a) >> (b) \ + : (a) / (1 << (b)) - ((a) % (1 << (b)) < 0)) + +#define TM_YEAR_BASE 1900 + +#define HOUR(x) ((x) * 60) + +#define STREQ(a, b) (strcmp (a, b) == 0) + +/** + * Convert a possibly-signed character to an unsigned character. This is + * a bit safer than casting to unsigned char, since it catches some type + * errors that the cast doesn't. + */ +static unsigned char to_uchar (char ch) { return ch; } + +/** + * FIXME: It also assumes that signed integer overflow silently wraps around, + * but this is not true any more with recent versions of GCC 4. + */ + +/** + * An integer value, and the number of digits in its textual + * representation. + */ +typedef struct { + int negative; + intmax_t value; + size_t digits; +} textint; + +/* An entry in the lexical lookup table. */ +typedef struct { + char const *name; + int type; + int value; +} table; + +/* Meridian: am, pm, or 24-hour style. */ +enum { MERam, MERpm, MER24 }; + +enum { BILLION = 1000000000, LOG10_BILLION = 9 }; + +/* Relative year, month, day, hour, minutes, seconds, and nanoseconds. */ +typedef struct { + intmax_t year; + intmax_t month; + intmax_t day; + intmax_t hour; + intmax_t minutes; + time_t seconds; + long ns; +} relative_time; + +#if HAVE_COMPOUND_LITERALS +# define RELATIVE_TIME_0 ((relative_time) { 0, 0, 0, 0, 0, 0, 0 }) +#else +static relative_time const RELATIVE_TIME_0; +#endif + +/* Information passed to and from the parser. */ +typedef struct { + /* The input string remaining to be parsed. */ + const char *input; + + /* N, if this is the Nth Tuesday. */ + intmax_t day_ordinal; + + /* Day of week; Sunday is 0. */ + int day_number; + + /* tm_isdst flag for the local zone. */ + int local_isdst; + + /* Time zone, in minutes east of UTC. */ + int time_zone; + + /* Style used for time. */ + int meridian; + + /* Gregorian year, month, day, hour, minutes, seconds, and ns. */ + textint year; + intmax_t month; + intmax_t day; + intmax_t hour; + intmax_t minutes; + struct timespec seconds; /* includes nanoseconds */ + + /* Relative year, month, day, hour, minutes, seconds, and ns. */ + relative_time rel; + + /* Presence or counts of some nonterminals parsed so far. */ + int timespec_seen; + int rels_seen; + size_t dates_seen; + size_t days_seen; + size_t local_zones_seen; + size_t dsts_seen; + size_t times_seen; + size_t zones_seen; + + /* Table of local time zone abbreviations, null terminated. */ + table local_time_zone_table[3]; +} parser_control; + +union YYSTYPE; +static int yylex (union YYSTYPE *, parser_control *); +static int yyerror (parser_control const *, char const *); +static int time_zone_hhmm (parser_control *, textint, textint); + +/** + * Extract into *PC any date and time info from a string of digits + * of the form e.g., YYYYMMDD, YYMMDD, HHMM, HH (and sometimes YYY, + * YYYY, ...). + */ +static void digits_to_date_time(parser_control *pc, textint text_int) +{ + if (pc->dates_seen && ! pc->year.digits + && ! pc->rels_seen && (pc->times_seen || 2 < text_int.digits)) { + pc->year = text_int; + } else { + if (4 < text_int.digits) { + pc->dates_seen++; + pc->day = text_int.value % 100; + pc->month = (text_int.value / 100) % 100; + pc->year.value = text_int.value / 10000; + pc->year.digits = text_int.digits - 4; + } else { + pc->times_seen++; + if (text_int.digits <= 2) { + pc->hour = text_int.value; + pc->minutes = 0; + } + else { + pc->hour = text_int.value / 100; + pc->minutes = text_int.value % 100; + } + pc->seconds.tv_sec = 0; + pc->seconds.tv_nsec = 0; + pc->meridian = MER24; + } + } +} + +/* Increment PC->rel by FACTOR * REL (FACTOR is 1 or -1). */ +static void apply_relative_time(parser_control *pc, relative_time rel, + int factor) +{ + pc->rel.ns += factor * rel.ns; + pc->rel.seconds += factor * rel.seconds; + pc->rel.minutes += factor * rel.minutes; + pc->rel.hour += factor * rel.hour; + pc->rel.day += factor * rel.day; + pc->rel.month += factor * rel.month; + pc->rel.year += factor * rel.year; + pc->rels_seen = 1; +} + +/* Set PC-> hour, minutes, seconds and nanoseconds members from arguments. */ +static void +set_hhmmss(parser_control *pc, intmax_t hour, intmax_t minutes, + time_t sec, long nsec) +{ + pc->hour = hour; + pc->minutes = minutes; + pc->seconds.tv_sec = sec; + pc->seconds.tv_nsec = nsec; +} + +%} + +/** + * We want a reentrant parser, even if the TZ manipulation and the calls to + * localtime and gmtime are not reentrant. + */ +%define api.pure +%parse-param { parser_control *pc } +%lex-param { parser_control *pc } + +/* This grammar has 31 shift/reduce conflicts. */ +%expect 31 + +%union { + intmax_t intval; + textint textintval; + struct timespec timespec; + relative_time rel; +} + +%token <intval> tAGO +%token tDST + +%token tYEAR_UNIT tMONTH_UNIT tHOUR_UNIT tMINUTE_UNIT tSEC_UNIT +%token <intval> tDAY_UNIT tDAY_SHIFT + +%token <intval> tDAY tDAYZONE tLOCAL_ZONE tMERIDIAN +%token <intval> tMONTH tORDINAL tZONE + +%token <textintval> tSNUMBER tUNUMBER +%token <timespec> tSDECIMAL_NUMBER tUDECIMAL_NUMBER + +%type <textintval> o_colon_minutes +%type <timespec> seconds signed_seconds unsigned_seconds + +%type <rel> relunit relunit_snumber dayshift + +%% + +spec: + timespec + | items +; + +timespec: + '@' seconds { + pc->seconds = $2; + pc->timespec_seen = 1; + } +; + +items: + /* empty */ + | items item +; + +item: + datetime { + pc->times_seen++; pc->dates_seen++; + } + | time { + pc->times_seen++; + } + | local_zone { + pc->local_zones_seen++; + } + | zone { + pc->zones_seen++; + } + | date { + pc->dates_seen++; + } + | day { + pc->days_seen++; + } + | rel + | number + | hybrid +; + +datetime: + iso_8601_datetime +; + +iso_8601_datetime: + iso_8601_date 'T' iso_8601_time +; + +time: + tUNUMBER tMERIDIAN { + set_hhmmss (pc, $1.value, 0, 0, 0); + pc->meridian = $2; + } + | tUNUMBER ':' tUNUMBER tMERIDIAN { + set_hhmmss (pc, $1.value, $3.value, 0, 0); + pc->meridian = $4; + } + | tUNUMBER ':' tUNUMBER ':' unsigned_seconds tMERIDIAN { + set_hhmmss (pc, $1.value, $3.value, $5.tv_sec, $5.tv_nsec); + pc->meridian = $6; + } + | iso_8601_time +; + +iso_8601_time: + tUNUMBER zone_offset { + set_hhmmss (pc, $1.value, 0, 0, 0); + pc->meridian = MER24; + } + | tUNUMBER ':' tUNUMBER o_zone_offset { + set_hhmmss (pc, $1.value, $3.value, 0, 0); + pc->meridian = MER24; + } + | tUNUMBER ':' tUNUMBER ':' unsigned_seconds o_zone_offset { + set_hhmmss (pc, $1.value, $3.value, $5.tv_sec, $5.tv_nsec); + pc->meridian = MER24; + } +; + +o_zone_offset: + /* empty */ + | zone_offset +; + +zone_offset: + tSNUMBER o_colon_minutes { + pc->zones_seen++; + if (! time_zone_hhmm (pc, $1, $2)) YYABORT; + } +; + +/** + * Local zone strings only affect DST setting, + * and only take affect if the current TZ setting is relevant. + * + * Example 1: + * 'EEST' is parsed as tLOCAL_ZONE, as it relates to the effective TZ: + * TZ=Europe/Helsinki date -d '2016-12-30 EEST' + * + * Example 2: + * 'EEST' is parsed as 'zone' (TZ=+03:00): + * TZ=Asia/Tokyo ./src/date --debug -d '2011-06-11 EEST' + * + * This is implemented by probing the next three calendar quarters + * of the effective timezone and looking for DST changes - + * if found, the timezone name (EEST) is inserted into + * the lexical lookup table with type tLOCAL_ZONE. + * (Search for 'quarter' comment in 'parse_date'). + */ +local_zone: + tLOCAL_ZONE { + pc->local_isdst = $1; + pc->dsts_seen += (0 < $1); + } + | tLOCAL_ZONE tDST { + pc->local_isdst = 1; + pc->dsts_seen += (0 < $1) + 1; + } +; + +/** + * Note 'T' is a special case, as it is used as the separator in ISO + * 8601 date and time of day representation. + */ +zone: + tZONE { + pc->time_zone = $1; + } + | 'T' { + pc->time_zone = HOUR(7); + } + | tZONE relunit_snumber { + pc->time_zone = $1; + apply_relative_time (pc, $2, 1); + } + | 'T' relunit_snumber { + pc->time_zone = HOUR(7); + apply_relative_time (pc, $2, 1); + } + | tZONE tSNUMBER o_colon_minutes { + if (! time_zone_hhmm (pc, $2, $3)) YYABORT; + pc->time_zone += $1; + } + | tDAYZONE { + pc->time_zone = $1 + 60; + } + | tZONE tDST { + pc->time_zone = $1 + 60; + } +; + +day: + tDAY { + pc->day_ordinal = 0; + pc->day_number = $1; + } + | tDAY ',' { + pc->day_ordinal = 0; + pc->day_number = $1; + } + | tORDINAL tDAY { + pc->day_ordinal = $1; + pc->day_number = $2; + } + | tUNUMBER tDAY { + pc->day_ordinal = $1.value; + pc->day_number = $2; + } +; + +date: + tUNUMBER '/' tUNUMBER { + pc->month = $1.value; + pc->day = $3.value; + } + | tUNUMBER '/' tUNUMBER '/' tUNUMBER { + /** + * Interpret as YYYY/MM/DD if the first value has 4 or more digits, + * otherwise as MM/DD/YY. + * The goal in recognizing YYYY/MM/DD is solely to support legacy + * machine-generated dates like those in an RCS log listing. If + * you want portability, use the ISO 8601 format. + */ + if (4 <= $1.digits) { + pc->year = $1; + pc->month = $3.value; + pc->day = $5.value; + } else { + pc->month = $1.value; + pc->day = $3.value; + pc->year = $5; + } + } + | tUNUMBER tMONTH tSNUMBER { + /* e.g. 17-JUN-1992. */ + pc->day = $1.value; + pc->month = $2; + pc->year.value = -$3.value; + pc->year.digits = $3.digits; + } + | tMONTH tSNUMBER tSNUMBER { + /* e.g. JUN-17-1992. */ + pc->month = $1; + pc->day = -$2.value; + pc->year.value = -$3.value; + pc->year.digits = $3.digits; + } + | tMONTH tUNUMBER { + pc->month = $1; + pc->day = $2.value; + } + | tMONTH tUNUMBER ',' tUNUMBER { + pc->month = $1; + pc->day = $2.value; + pc->year = $4; + } + | tUNUMBER tMONTH { + pc->day = $1.value; + pc->month = $2; + } + | tUNUMBER tMONTH tUNUMBER { + pc->day = $1.value; + pc->month = $2; + pc->year = $3; + } + | iso_8601_date +; + +iso_8601_date: + tUNUMBER tSNUMBER tSNUMBER { + /* ISO 8601 format.YYYY-MM-DD. */ + pc->year = $1; + pc->month = -$2.value; + pc->day = -$3.value; + } +; + +rel: + relunit tAGO + { apply_relative_time (pc, $1, $2); } + | relunit + { apply_relative_time (pc, $1, 1); } + | dayshift + { apply_relative_time (pc, $1, 1); } +; + +relunit: + tORDINAL tYEAR_UNIT + { $$ = RELATIVE_TIME_0; $$.year = $1; } + | tUNUMBER tYEAR_UNIT + { $$ = RELATIVE_TIME_0; $$.year = $1.value; } + | tYEAR_UNIT + { $$ = RELATIVE_TIME_0; $$.year = 1; } + | tORDINAL tMONTH_UNIT + { $$ = RELATIVE_TIME_0; $$.month = $1; } + | tUNUMBER tMONTH_UNIT + { $$ = RELATIVE_TIME_0; $$.month = $1.value; } + | tMONTH_UNIT + { $$ = RELATIVE_TIME_0; $$.month = 1; } + | tORDINAL tDAY_UNIT + { $$ = RELATIVE_TIME_0; $$.day = $1 * $2; } + | tUNUMBER tDAY_UNIT + { $$ = RELATIVE_TIME_0; $$.day = $1.value * $2; } + | tDAY_UNIT + { $$ = RELATIVE_TIME_0; $$.day = $1; } + | tORDINAL tHOUR_UNIT + { $$ = RELATIVE_TIME_0; $$.hour = $1; } + | tUNUMBER tHOUR_UNIT + { $$ = RELATIVE_TIME_0; $$.hour = $1.value; } + | tHOUR_UNIT + { $$ = RELATIVE_TIME_0; $$.hour = 1; } + | tORDINAL tMINUTE_UNIT + { $$ = RELATIVE_TIME_0; $$.minutes = $1; } + | tUNUMBER tMINUTE_UNIT + { $$ = RELATIVE_TIME_0; $$.minutes = $1.value; } + | tMINUTE_UNIT + { $$ = RELATIVE_TIME_0; $$.minutes = 1; } + | tORDINAL tSEC_UNIT + { $$ = RELATIVE_TIME_0; $$.seconds = $1; } + | tUNUMBER tSEC_UNIT + { $$ = RELATIVE_TIME_0; $$.seconds = $1.value; } + | tSDECIMAL_NUMBER tSEC_UNIT { + $$ = RELATIVE_TIME_0; + $$.seconds = $1.tv_sec; + $$.ns = $1.tv_nsec; + } + | tUDECIMAL_NUMBER tSEC_UNIT { + $$ = RELATIVE_TIME_0; + $$.seconds = $1.tv_sec; + $$.ns = $1.tv_nsec; + } + | tSEC_UNIT + { $$ = RELATIVE_TIME_0; $$.seconds = 1; } + | relunit_snumber +; + +relunit_snumber: + tSNUMBER tYEAR_UNIT + { $$ = RELATIVE_TIME_0; $$.year = $1.value; } + | tSNUMBER tMONTH_UNIT + { $$ = RELATIVE_TIME_0; $$.month = $1.value; } + | tSNUMBER tDAY_UNIT + { $$ = RELATIVE_TIME_0; $$.day = $1.value * $2; } + | tSNUMBER tHOUR_UNIT + { $$ = RELATIVE_TIME_0; $$.hour = $1.value; } + | tSNUMBER tMINUTE_UNIT + { $$ = RELATIVE_TIME_0; $$.minutes = $1.value; } + | tSNUMBER tSEC_UNIT + { $$ = RELATIVE_TIME_0; $$.seconds = $1.value; } +; + +dayshift: + tDAY_SHIFT + { $$ = RELATIVE_TIME_0; $$.day = $1; } +; + +seconds: signed_seconds | unsigned_seconds; + +signed_seconds: + tSDECIMAL_NUMBER + | tSNUMBER + { $$.tv_sec = $1.value; $$.tv_nsec = 0; } +; + +unsigned_seconds: + tUDECIMAL_NUMBER + | tUNUMBER + { $$.tv_sec = $1.value; $$.tv_nsec = 0; } +; + +number: + tUNUMBER + { digits_to_date_time (pc, $1); } +; + +hybrid: + tUNUMBER relunit_snumber { + /** + * Hybrid all-digit and relative offset, so that we accept e.g., + * "YYYYMMDD +N days" as well as "YYYYMMDD N days". + */ + digits_to_date_time (pc, $1); + apply_relative_time (pc, $2, 1); + } +; + +o_colon_minutes: + /* empty */ + { $$.value = $$.digits = 0; } + | ':' tUNUMBER { + $$ = $2; + } +; + +%% + +static table const meridian_table[] = { + { "AM", tMERIDIAN, MERam }, + { "A.M.", tMERIDIAN, MERam }, + { "PM", tMERIDIAN, MERpm }, + { "P.M.", tMERIDIAN, MERpm }, + { NULL, 0, 0 } +}; + +static table const dst_table[] = { + { "DST", tDST, 0 } +}; + +static table const month_and_day_table[] = { + { "JANUARY", tMONTH, 1 }, + { "FEBRUARY", tMONTH, 2 }, + { "MARCH", tMONTH, 3 }, + { "APRIL", tMONTH, 4 }, + { "MAY", tMONTH, 5 }, + { "JUNE", tMONTH, 6 }, + { "JULY", tMONTH, 7 }, + { "AUGUST", tMONTH, 8 }, + { "SEPTEMBER",tMONTH, 9 }, + { "SEPT", tMONTH, 9 }, + { "OCTOBER", tMONTH, 10 }, + { "NOVEMBER", tMONTH, 11 }, + { "DECEMBER", tMONTH, 12 }, + { "SUNDAY", tDAY, 0 }, + { "MONDAY", tDAY, 1 }, + { "TUESDAY", tDAY, 2 }, + { "TUES", tDAY, 2 }, + { "WEDNESDAY",tDAY, 3 }, + { "WEDNES", tDAY, 3 }, + { "THURSDAY", tDAY, 4 }, + { "THUR", tDAY, 4 }, + { "THURS", tDAY, 4 }, + { "FRIDAY", tDAY, 5 }, + { "SATURDAY", tDAY, 6 }, + { NULL, 0, 0 } +}; + +static table const time_units_table[] = { + { "YEAR", tYEAR_UNIT, 1 }, + { "MONTH", tMONTH_UNIT, 1 }, + { "FORTNIGHT",tDAY_UNIT, 14 }, + { "WEEK", tDAY_UNIT, 7 }, + { "DAY", tDAY_UNIT, 1 }, + { "HOUR", tHOUR_UNIT, 1 }, + { "MINUTE", tMINUTE_UNIT, 1 }, + { "MIN", tMINUTE_UNIT, 1 }, + { "SECOND", tSEC_UNIT, 1 }, + { "SEC", tSEC_UNIT, 1 }, + { NULL, 0, 0 } +}; + +/* Assorted relative-time words. */ +static table const relative_time_table[] = { + { "TOMORROW", tDAY_SHIFT, 1 }, + { "YESTERDAY",tDAY_SHIFT, -1 }, + { "TODAY", tDAY_SHIFT, 0 }, + { "NOW", tDAY_SHIFT, 0 }, + { "LAST", tORDINAL, -1 }, + { "THIS", tORDINAL, 0 }, + { "NEXT", tORDINAL, 1 }, + { "FIRST", tORDINAL, 1 }, + /*{ "SECOND", tORDINAL, 2 }, */ + { "THIRD", tORDINAL, 3 }, + { "FOURTH", tORDINAL, 4 }, + { "FIFTH", tORDINAL, 5 }, + { "SIXTH", tORDINAL, 6 }, + { "SEVENTH", tORDINAL, 7 }, + { "EIGHTH", tORDINAL, 8 }, + { "NINTH", tORDINAL, 9 }, + { "TENTH", tORDINAL, 10 }, + { "ELEVENTH", tORDINAL, 11 }, + { "TWELFTH", tORDINAL, 12 }, + { "AGO", tAGO, -1 }, + { "HENCE", tAGO, 1 }, + { NULL, 0, 0 } +}; + +/** + * The universal time zone table. These labels can be used even for + * timestamps that would not otherwise be valid, e.g., GMT timestamps + * in London during summer. + */ +static table const universal_time_zone_table[] = { + { "GMT", tZONE, HOUR ( 0) }, /* Greenwich Mean */ + { "UT", tZONE, HOUR ( 0) }, /* Universal (Coordinated) */ + { "UTC", tZONE, HOUR ( 0) }, + { NULL, 0, 0 } +}; + +/** + * The time zone table. This table is necessarily incomplete, as time + * zone abbreviations are ambiguous; e.g. Australians interpret "EST" + * as Eastern time in Australia, not as US Eastern Standard Time. + * You cannot rely on parse_date to handle arbitrary time zone + * abbreviations; use numeric abbreviations like "-0500" instead. + */ +static table const time_zone_table[] = { + { "WET", tZONE, HOUR ( 0) }, /* Western European */ + { "WEST", tDAYZONE, HOUR ( 0) }, /* Western European Summer */ + { "BST", tDAYZONE, HOUR ( 0) }, /* British Summer */ + { "ART", tZONE, -HOUR ( 3) }, /* Argentina */ + { "BRT", tZONE, -HOUR ( 3) }, /* Brazil */ + { "BRST", tDAYZONE, -HOUR ( 3) }, /* Brazil Summer */ + { "NST", tZONE, -(HOUR ( 3) + 30) }, /* Newfoundland Standard */ + { "NDT", tDAYZONE,-(HOUR ( 3) + 30) }, /* Newfoundland Daylight */ + { "AST", tZONE, -HOUR ( 4) }, /* Atlantic Standard */ + { "ADT", tDAYZONE, -HOUR ( 4) }, /* Atlantic Daylight */ + { "CLT", tZONE, -HOUR ( 4) }, /* Chile */ + { "CLST", tDAYZONE, -HOUR ( 4) }, /* Chile Summer */ + { "EST", tZONE, -HOUR ( 5) }, /* Eastern Standard */ + { "EDT", tDAYZONE, -HOUR ( 5) }, /* Eastern Daylight */ + { "CST", tZONE, -HOUR ( 6) }, /* Central Standard */ + { "CDT", tDAYZONE, -HOUR ( 6) }, /* Central Daylight */ + { "MST", tZONE, -HOUR ( 7) }, /* Mountain Standard */ + { "MDT", tDAYZONE, -HOUR ( 7) }, /* Mountain Daylight */ + { "PST", tZONE, -HOUR ( 8) }, /* Pacific Standard */ + { "PDT", tDAYZONE, -HOUR ( 8) }, /* Pacific Daylight */ + { "AKST", tZONE, -HOUR ( 9) }, /* Alaska Standard */ + { "AKDT", tDAYZONE, -HOUR ( 9) }, /* Alaska Daylight */ + { "HST", tZONE, -HOUR (10) }, /* Hawaii Standard */ + { "HAST", tZONE, -HOUR (10) }, /* Hawaii-Aleutian Standard */ + { "HADT", tDAYZONE, -HOUR (10) }, /* Hawaii-Aleutian Daylight */ + { "SST", tZONE, -HOUR (12) }, /* Samoa Standard */ + { "WAT", tZONE, HOUR ( 1) }, /* West Africa */ + { "CET", tZONE, HOUR ( 1) }, /* Central European */ + { "CEST", tDAYZONE, HOUR ( 1) }, /* Central European Summer */ + { "MET", tZONE, HOUR ( 1) }, /* Middle European */ + { "MEZ", tZONE, HOUR ( 1) }, /* Middle European */ + { "MEST", tDAYZONE, HOUR ( 1) }, /* Middle European Summer */ + { "MESZ", tDAYZONE, HOUR ( 1) }, /* Middle European Summer */ + { "EET", tZONE, HOUR ( 2) }, /* Eastern European */ + { "EEST", tDAYZONE, HOUR ( 2) }, /* Eastern European Summer */ + { "CAT", tZONE, HOUR ( 2) }, /* Central Africa */ + { "SAST", tZONE, HOUR ( 2) }, /* South Africa Standard */ + { "EAT", tZONE, HOUR ( 3) }, /* East Africa */ + { "MSK", tZONE, HOUR ( 3) }, /* Moscow */ + { "MSD", tDAYZONE, HOUR ( 3) }, /* Moscow Daylight */ + { "IST", tZONE, (HOUR ( 5) + 30) }, /* India Standard */ + { "SGT", tZONE, HOUR ( 8) }, /* Singapore */ + { "KST", tZONE, HOUR ( 9) }, /* Korea Standard */ + { "JST", tZONE, HOUR ( 9) }, /* Japan Standard */ + { "GST", tZONE, HOUR (10) }, /* Guam Standard */ + { "NZST", tZONE, HOUR (12) }, /* New Zealand Standard */ + { "NZDT", tDAYZONE, HOUR (12) }, /* New Zealand Daylight */ + { NULL, 0, 0 } +}; + +/** + * Military time zone table. + * + * Note 'T' is a special case, as it is used as the separator in ISO + * 8601 date and time of day representation. + */ +static table const military_table[] = { + { "A", tZONE, -HOUR ( 1) }, + { "B", tZONE, -HOUR ( 2) }, + { "C", tZONE, -HOUR ( 3) }, + { "D", tZONE, -HOUR ( 4) }, + { "E", tZONE, -HOUR ( 5) }, + { "F", tZONE, -HOUR ( 6) }, + { "G", tZONE, -HOUR ( 7) }, + { "H", tZONE, -HOUR ( 8) }, + { "I", tZONE, -HOUR ( 9) }, + { "K", tZONE, -HOUR (10) }, + { "L", tZONE, -HOUR (11) }, + { "M", tZONE, -HOUR (12) }, + { "N", tZONE, HOUR ( 1) }, + { "O", tZONE, HOUR ( 2) }, + { "P", tZONE, HOUR ( 3) }, + { "Q", tZONE, HOUR ( 4) }, + { "R", tZONE, HOUR ( 5) }, + { "S", tZONE, HOUR ( 6) }, + { "T", 'T', 0 }, + { "U", tZONE, HOUR ( 8) }, + { "V", tZONE, HOUR ( 9) }, + { "W", tZONE, HOUR (10) }, + { "X", tZONE, HOUR (11) }, + { "Y", tZONE, HOUR (12) }, + { "Z", tZONE, HOUR ( 0) }, + { NULL, 0, 0 } +}; + +/** + * Convert a time offset expressed as HH:MM or HHMM into an integer count of + * minutes. If hh is more than 2 digits then it is of the form HHMM and must be + * delimited; in that case 'mm' is required to be absent. Otherwise, hh and mm + * are used ('mm' contains digits that were prefixed with a colon). + * + * POSIX TZ and ISO 8601 both define the maximum offset as 24:59. POSIX also + * allows seconds, but currently the parser rejects them. Both require minutes + * to be zero padded (2 digits). ISO requires hours to be zero padded, POSIX + * does not, either is accepted; which means an invalid ISO offset could pass. + */ + +static int time_zone_hhmm(parser_control *pc, textint hh, textint mm) +{ + int h, m; + + if (hh.digits > 2 && hh.digits < 5 && mm.digits == 0) { + h = hh.value / 100; + m = hh.value % 100; + } else if (hh.digits < 3 && (mm.digits == 0 || mm.digits == 2)) { + h = hh.value; + m = hh.negative ? -mm.value : mm.value; + } else + return 0; + + if (abs(h) > 24 || abs(m) > 59) + return 0; + + pc->time_zone = h * 60 + m; + return 1; +} + +static int to_hour(intmax_t hours, int meridian) +{ + switch (meridian) { + default: /* Pacify GCC. */ + case MER24: + return 0 <= hours && hours < 24 ? hours : -1; + case MERam: + return 0 < hours && hours < 12 ? hours : hours == 12 ? 0 : -1; + case MERpm: + return 0 < hours && hours < 12 ? hours + 12 : hours == 12 ? 12 : -1; + } +} + +static long int to_year(textint textyear) +{ + intmax_t year = textyear.value; + + if (year < 0) + year = -year; + + /** + * XPG4 suggests that years 00-68 map to 2000-2068, and + * years 69-99 map to 1969-1999. + */ + else if (textyear.digits == 2) + year += year < 69 ? 2000 : 1900; + + return year; +} + +static table const * lookup_zone(parser_control const *pc, char const *name) +{ + table const *tp; + + for (tp = universal_time_zone_table; tp->name; tp++) + if (strcmp (name, tp->name) == 0) + return tp; + + /** + * Try local zone abbreviations before those in time_zone_table, as + * the local ones are more likely to be right. + */ + for (tp = pc->local_time_zone_table; tp->name; tp++) + if (strcmp (name, tp->name) == 0) + return tp; + + for (tp = time_zone_table; tp->name; tp++) + if (strcmp (name, tp->name) == 0) + return tp; + + return NULL; +} + +#if ! HAVE_TM_GMTOFF +/** + * Yield the difference between *A and *B, + * measured in seconds, ignoring leap seconds. + * The body of this function is taken directly from the GNU C Library; + * see src/strftime.c. + */ +static int tm_diff(struct tm const *a, struct tm const *b) +{ + /** + * Compute intervening leap days correctly even if year is negative. + * Take care to avoid int overflow in leap day calculations. + */ + int a4 = SHR (a->tm_year, 2) + SHR (TM_YEAR_BASE, 2) - ! (a->tm_year & 3); + int b4 = SHR (b->tm_year, 2) + SHR (TM_YEAR_BASE, 2) - ! (b->tm_year & 3); + int a100 = a4 / 25 - (a4 % 25 < 0); + int b100 = b4 / 25 - (b4 % 25 < 0); + int a400 = SHR (a100, 2); + int b400 = SHR (b100, 2); + int intervening_leap_days = (a4 - b4) - (a100 - b100) + (a400 - b400); + int years = a->tm_year - b->tm_year; + int days = (365 * years + intervening_leap_days + + (a->tm_yday - b->tm_yday)); + return (60 * (60 * (24 * days + (a->tm_hour - b->tm_hour)) + + (a->tm_min - b->tm_min)) + + (a->tm_sec - b->tm_sec)); +} +#endif /* ! HAVE_TM_GMTOFF */ + +static table const * lookup_word(parser_control const *pc, char *word) +{ + char *p; + char *q; + size_t wordlen; + table const *tp; + int period_found; + int abbrev; + + /* Make it uppercase. */ + for (p = word; *p; p++) + *p = c_toupper (to_uchar (*p)); + + for (tp = meridian_table; tp->name; tp++) + if (strcmp (word, tp->name) == 0) + return tp; + + /* See if we have an abbreviation for a month. */ + wordlen = strlen (word); + abbrev = wordlen == 3 || (wordlen == 4 && word[3] == '.'); + + for (tp = month_and_day_table; tp->name; tp++) + if ((abbrev ? strncmp (word, tp->name, 3) : + strcmp (word, tp->name)) == 0) + return tp; + + if ((tp = lookup_zone (pc, word))) + return tp; + + if (strcmp (word, dst_table[0].name) == 0) + return dst_table; + + for (tp = time_units_table; tp->name; tp++) + if (strcmp (word, tp->name) == 0) + return tp; + + /* Strip off any plural and try the units table again. */ + if (word[wordlen - 1] == 'S') { + word[wordlen - 1] = '\0'; + for (tp = time_units_table; tp->name; tp++) + if (strcmp (word, tp->name) == 0) + return tp; + word[wordlen - 1] = 'S'; /* For "this" in relative_time_table. */ + } + + for (tp = relative_time_table; tp->name; tp++) + if (strcmp (word, tp->name) == 0) + return tp; + + /* Military time zones. */ + if (wordlen == 1) + for (tp = military_table; tp->name; tp++) + if (word[0] == tp->name[0]) + return tp; + + /* Drop out any periods and try the time zone table again. */ + for (period_found = 0, p = q = word; (*p = *q); q++) + if (*q == '.') + period_found = 1; + else + p++; + if (period_found && (tp = lookup_zone (pc, word))) + return tp; + + return NULL; +} + +static int yylex (union YYSTYPE *lvalp, parser_control *pc) +{ + unsigned char c; + size_t count; + + for (;;) { + while (c = *pc->input, c_isspace (c)) + pc->input++; + + if (c_isdigit (c) || c == '-' || c == '+') { + char const *p; + int sign; + uintmax_t value; + if (c == '-' || c == '+') { + sign = c == '-' ? -1 : 1; + while (c = *++pc->input, c_isspace (c)) + continue; + if (! c_isdigit (c)) + /* skip the '-' sign */ + continue; + } else + sign = 0; + p = pc->input; + for (value = 0; ; value *= 10) { + uintmax_t value1 = value + (c - '0'); + if (value1 < value) + return '?'; + value = value1; + c = *++p; + if (! c_isdigit (c)) + break; + if (UINTMAX_MAX / 10 < value) + return '?'; + } + if ((c == '.' || c == ',') && c_isdigit (p[1])) { + time_t s; + long ns; + int digits; + uintmax_t value1; + + /* Check for overflow when converting value to + * time_t. + */ + if (sign < 0) { + s = - value; + if (0 < s) + return '?'; + value1 = -s; + } else { + s = value; + if (s < 0) + return '?'; + value1 = s; + } + if (value != value1) + return '?'; + + /* Accumulate fraction, to ns precision. */ + p++; + ns = *p++ - '0'; + for (digits = 2; + digits <= LOG10_BILLION; digits++) { + ns *= 10; + if (c_isdigit (*p)) + ns += *p++ - '0'; + } + + /* Skip excess digits, truncating toward + * -Infinity. + */ + if (sign < 0) + for (; c_isdigit (*p); p++) + if (*p != '0') { + ns++; + break; + } + while (c_isdigit (*p)) + p++; + + /* Adjust to the timespec convention, which is + * that tv_nsec is always a positive offset even + * if tv_sec is negative. + */ + if (sign < 0 && ns) { + s--; + if (! (s < 0)) + return '?'; + ns = BILLION - ns; + } + + lvalp->timespec.tv_sec = s; + lvalp->timespec.tv_nsec = ns; + pc->input = p; + return + sign ? tSDECIMAL_NUMBER : tUDECIMAL_NUMBER; + } else { + lvalp->textintval.negative = sign < 0; + if (sign < 0) { + lvalp->textintval.value = - value; + if (0 < lvalp->textintval.value) + return '?'; + } else { + lvalp->textintval.value = value; + if (lvalp->textintval.value < 0) + return '?'; + } + lvalp->textintval.digits = p - pc->input; + pc->input = p; + return sign ? tSNUMBER : tUNUMBER; + } + } + + if (c_isalpha (c)) { + char buff[20]; + char *p = buff; + table const *tp; + + do { + if (p < buff + sizeof buff - 1) + *p++ = c; + c = *++pc->input; + } + while (c_isalpha (c) || c == '.'); + + *p = '\0'; + tp = lookup_word (pc, buff); + if (! tp) { + return '?'; + } + lvalp->intval = tp->value; + return tp->type; + } + + if (c != '(') + return to_uchar (*pc->input++); + + count = 0; + do { + c = *pc->input++; + if (c == '\0') + return c; + if (c == '(') + count++; + else if (c == ')') + count--; + } + while (count != 0); + } +} + +/* Do nothing if the parser reports an error. */ +static int yyerror(parser_control const *pc __attribute__((__unused__)), + char const *s __attribute__((__unused__))) +{ + return 0; +} + +/** + * If *TM0 is the old and *TM1 is the new value of a struct tm after + * passing it to mktime, return 1 if it's OK that mktime returned T. + * It's not OK if *TM0 has out-of-range members. + */ + +static int mktime_ok(struct tm const *tm0, struct tm const *tm1, time_t t) +{ + if (t == (time_t) -1) { + /** + * Guard against falsely reporting an error when parsing a + * timestamp that happens to equal (time_t) -1, on a host that + * supports such a timestamp. + */ + tm1 = localtime (&t); + if (!tm1) + return 0; + } + + return ! ((tm0->tm_sec ^ tm1->tm_sec) + | (tm0->tm_min ^ tm1->tm_min) + | (tm0->tm_hour ^ tm1->tm_hour) + | (tm0->tm_mday ^ tm1->tm_mday) + | (tm0->tm_mon ^ tm1->tm_mon) + | (tm0->tm_year ^ tm1->tm_year)); +} + +/** + * A reasonable upper bound for the size of ordinary TZ strings. + * Use heap allocation if TZ's length exceeds this. + */ +enum { TZBUFSIZE = 100 }; + +/** + * Return a copy of TZ, stored in TZBUF if it fits, and heap-allocated + * otherwise. + */ +static char * get_tz(char tzbuf[TZBUFSIZE]) +{ + char *tz = getenv ("TZ"); + if (tz) { + size_t tzsize = strlen (tz) + 1; + tz = (tzsize <= TZBUFSIZE + ? memcpy (tzbuf, tz, tzsize) + : strdup (tz)); + } + return tz; +} + +/** + * Parse a date/time string, storing the resulting time value into *result. + * The string itself is pointed to by *p. Return 1 if successful. + * *p can be an incomplete or relative time specification; if so, use + * *now as the basis for the returned time. + */ +int parse_date(struct timespec *result, char const *p, + struct timespec const *now) +{ + time_t Start; + intmax_t Start_ns; + struct tm const *tmp; + struct tm tm; + struct tm tm0; + parser_control pc; + struct timespec gettime_buffer; + unsigned char c; + int tz_was_altered = 0; + char *tz0 = NULL; + char tz0buf[TZBUFSIZE]; + int ok = 1; + struct timeval tv; + + if (! now) { + gettimeofday (&tv, NULL); + gettime_buffer.tv_sec = tv.tv_sec; + gettime_buffer.tv_nsec = tv.tv_usec * 1000; + now = &gettime_buffer; + } + + Start = now->tv_sec; + Start_ns = now->tv_nsec; + + tmp = localtime (&now->tv_sec); + if (! tmp) + return 0; + + while (c = *p, c_isspace (c)) + p++; + + if (strncmp (p, "TZ=\"", 4) == 0) { + char const *tzbase = p + 4; + size_t tzsize = 1; + char const *s; + + for (s = tzbase; *s; s++, tzsize++) + if (*s == '\\') { + s++; + if (! (*s == '\\' || *s == '"')) + break; + } else if (*s == '"') { + char *z; + char *tz1 = NULL; + char tz1buf[TZBUFSIZE] = { '\0' }; + int large_tz = TZBUFSIZE < tzsize; + int setenv_ok; + + tz0 = get_tz (tz0buf); + if (!tz0) + goto fail; + + if (large_tz) { + z = tz1 = malloc (tzsize); + if (!tz1) + goto fail; + } else + z = tz1 = tz1buf; + + for (s = tzbase; *s != '"'; s++) + *z++ = *(s += *s == '\\'); + *z = '\0'; + setenv_ok = setenv ("TZ", tz1, 1) == 0; + if (large_tz) + free (tz1); + if (!setenv_ok) + goto fail; + tz_was_altered = 1; + + p = s + 1; + while (c = *p, c_isspace (c)) + p++; + + break; + } + } + + /** + * As documented, be careful to treat the empty string just like + * a date string of "0". Without this, an empty string would be + * declared invalid when parsed during a DST transition. + */ + if (*p == '\0') + p = "0"; + + pc.input = p; + pc.year.value = tmp->tm_year; + pc.year.value += TM_YEAR_BASE; + pc.year.digits = 0; + pc.month = tmp->tm_mon + 1; + pc.day = tmp->tm_mday; + pc.hour = tmp->tm_hour; + pc.minutes = tmp->tm_min; + pc.seconds.tv_sec = tmp->tm_sec; + pc.seconds.tv_nsec = Start_ns; + tm.tm_isdst = tmp->tm_isdst; + + pc.meridian = MER24; + pc.rel = RELATIVE_TIME_0; + pc.timespec_seen = 0; + pc.rels_seen = 0; + pc.dates_seen = 0; + pc.days_seen = 0; + pc.times_seen = 0; + pc.local_zones_seen = 0; + pc.dsts_seen = 0; + pc.zones_seen = 0; + +#if HAVE_STRUCT_TM_TM_ZONE + pc.local_time_zone_table[0].name = tmp->tm_zone; + pc.local_time_zone_table[0].type = tLOCAL_ZONE; + pc.local_time_zone_table[0].value = tmp->tm_isdst; + pc.local_time_zone_table[1].name = NULL; + + /** + * Probe the names used in the next three calendar quarters, looking + * for a tm_isdst different from the one we already have. + */ + { + int quarter; + for (quarter = 1; quarter <= 3; quarter++) { + time_t probe = Start + quarter * (90 * 24 * 60 * 60); + struct tm const *probe_tm = localtime (&probe); + if (probe_tm && probe_tm->tm_zone + && probe_tm->tm_isdst + != pc.local_time_zone_table[0].value) { + { + pc.local_time_zone_table[1].name + = probe_tm->tm_zone; + pc.local_time_zone_table[1].type + = tLOCAL_ZONE; + pc.local_time_zone_table[1].value + = probe_tm->tm_isdst; + pc.local_time_zone_table[2].name + = NULL; + } + break; + } + } + } +#else +#if HAVE_TZNAME + { +# if !HAVE_DECL_TZNAME + extern char *tzname[]; +# endif + int i; + for (i = 0; i < 2; i++) { + pc.local_time_zone_table[i].name = tzname[i]; + pc.local_time_zone_table[i].type = tLOCAL_ZONE; + pc.local_time_zone_table[i].value = i; + } + pc.local_time_zone_table[i].name = NULL; + } +#else + pc.local_time_zone_table[0].name = NULL; +#endif +#endif + + if (pc.local_time_zone_table[0].name && pc.local_time_zone_table[1].name + && ! strcmp (pc.local_time_zone_table[0].name, + pc.local_time_zone_table[1].name)) { + /** + * This locale uses the same abbreviation for standard and + * daylight times. So if we see that abbreviation, we don't + * know whether it's daylight time. + */ + pc.local_time_zone_table[0].value = -1; + pc.local_time_zone_table[1].name = NULL; + } + + if (yyparse (&pc) != 0) { + goto fail; + } + + if (pc.timespec_seen) + *result = pc.seconds; + else { + if (1 < (pc.times_seen | pc.dates_seen | pc.days_seen + | pc.dsts_seen + | (pc.local_zones_seen + pc.zones_seen))) { + goto fail; + } + + tm.tm_year = to_year (pc.year) - TM_YEAR_BASE; + tm.tm_mon = pc.month - 1; + tm.tm_mday = pc.day; + if (pc.times_seen || (pc.rels_seen && + ! pc.dates_seen && ! pc.days_seen)) { + tm.tm_hour = to_hour (pc.hour, pc.meridian); + if (tm.tm_hour < 0) { + goto fail; + } + tm.tm_min = pc.minutes; + tm.tm_sec = pc.seconds.tv_sec; + } else { + tm.tm_hour = tm.tm_min = tm.tm_sec = 0; + pc.seconds.tv_nsec = 0; + } + + /** + * Let mktime deduce tm_isdst if we have an absolute timestamp. + */ + if (pc.dates_seen | pc.days_seen | pc.times_seen) + tm.tm_isdst = -1; + + /** + * But if the input explicitly specifies local time with or + * without DST, give mktime that information. + */ + if (pc.local_zones_seen) + tm.tm_isdst = pc.local_isdst; + + tm0 = tm; + + Start = mktime (&tm); + + if (! mktime_ok (&tm0, &tm, Start)) { + if (! pc.zones_seen) { + goto fail; + } else { + /** Guard against falsely reporting errors near + * the time_t boundaries when parsing times in + * other time zones. For example, suppose the + * input string "1969-12-31 23:00:00 -0100", the + * current time zone is 8 hours ahead of UTC, + * and the min time_t value is 1970-01-01 + * 00:00:00 UTC. Then the min localtime value + * is 1970-01-01 08:00:00, and mktime will + * therefore fail on 1969-12-31 23:00:00. To + * work around the problem, set the time zone to + * 1 hour behind UTC temporarily by setting + * TZ="XXX1:00" and try mktime again. + */ + + intmax_t time_zone = pc.time_zone; + + intmax_t abs_time_zone = time_zone < 0 + ? - time_zone : time_zone; + + intmax_t abs_time_zone_hour + = abs_time_zone / 60; + + int abs_time_zone_min = abs_time_zone % 60; + + char tz1buf[sizeof "XXX+0:00" + + sizeof pc.time_zone + * CHAR_BIT / 3]; + + if (!tz_was_altered) + tz0 = get_tz (tz0buf); + sprintf (tz1buf, "XXX%s%jd:%02d", + &"-"[time_zone < 0], + abs_time_zone_hour, + abs_time_zone_min); + if (setenv ("TZ", tz1buf, 1) != 0) { + goto fail; + } + tz_was_altered = 1; + tm = tm0; + Start = mktime (&tm); + if (! mktime_ok (&tm0, &tm, Start)) { + goto fail; + } + } + } + + if (pc.days_seen && ! pc.dates_seen) { + tm.tm_mday += ((pc.day_number - tm.tm_wday + 7) % 7 + 7 + * (pc.day_ordinal + - (0 < pc.day_ordinal + && tm.tm_wday != pc.day_number))); + tm.tm_isdst = -1; + Start = mktime (&tm); + if (Start == (time_t) -1) { + goto fail; + } + } + /* Add relative date. */ + if (pc.rel.year | pc.rel.month | pc.rel.day) { + int year = tm.tm_year + pc.rel.year; + int month = tm.tm_mon + pc.rel.month; + int day = tm.tm_mday + pc.rel.day; + if (((year < tm.tm_year) ^ (pc.rel.year < 0)) + | ((month < tm.tm_mon) ^ (pc.rel.month < 0)) + | ((day < tm.tm_mday) ^ (pc.rel.day < 0))) { + goto fail; + } + tm.tm_year = year; + tm.tm_mon = month; + tm.tm_mday = day; + tm.tm_hour = tm0.tm_hour; + tm.tm_min = tm0.tm_min; + tm.tm_sec = tm0.tm_sec; + tm.tm_isdst = tm0.tm_isdst; + Start = mktime (&tm); + if (Start == (time_t) -1) { + goto fail; + } + } + + /** + * The only "output" of this if-block is an updated Start value, + * so this block must follow others that clobber Start. + */ + if (pc.zones_seen) { + intmax_t delta = pc.time_zone * 60; + time_t t1; +#ifdef HAVE_TM_GMTOFF + delta -= tm.tm_gmtoff; +#else + time_t t = Start; + struct tm const *gmt = gmtime (&t); + if (! gmt) { + goto fail; + } + delta -= tm_diff (&tm, gmt); +#endif + t1 = Start - delta; + if ((Start < t1) != (delta < 0)) { + goto fail; /* time_t overflow */ + } + Start = t1; + } + + /** + * Add relative hours, minutes, and seconds. On hosts that + * support leap seconds, ignore the possibility of leap seconds; + * e.g., "+ 10 minutes" adds 600 seconds, even if one of them is + * a leap second. Typically this is not what the user wants, + * but it's too hard to do it the other way, because the time + * zone indicator must be applied before relative times, and if + * mktime is applied again the time zone will be lost. + */ + intmax_t sum_ns = pc.seconds.tv_nsec + pc.rel.ns; + intmax_t normalized_ns = (sum_ns % BILLION + BILLION) % BILLION; + time_t t0 = Start; + intmax_t d1 = 60 * 60 * pc.rel.hour; + time_t t1 = t0 + d1; + intmax_t d2 = 60 * pc.rel.minutes; + time_t t2 = t1 + d2; + time_t d3 = pc.rel.seconds; + time_t t3 = t2 + d3; + intmax_t d4 = (sum_ns - normalized_ns) / BILLION; + time_t t4 = t3 + d4; + time_t t5 = t4; + + if ((d1 / (60 * 60) ^ pc.rel.hour) + | (d2 / 60 ^ pc.rel.minutes) + | ((t1 < t0) ^ (d1 < 0)) + | ((t2 < t1) ^ (d2 < 0)) + | ((t3 < t2) ^ (d3 < 0)) + | ((t4 < t3) ^ (d4 < 0)) + | (t5 != t4)) { + goto fail; + } + result->tv_sec = t5; + result->tv_nsec = normalized_ns; + } + + goto done; + + fail: + ok = 0; + done: + if (tz_was_altered) + ok &= (tz0 ? setenv ("TZ", tz0, 1) + : unsetenv ("TZ")) == 0; + if (tz0 != tz0buf) + free (tz0); + return ok; +} diff --git a/sys-utils/hwclock-rtc.c b/sys-utils/hwclock-rtc.c new file mode 100644 index 0000000..70c3a2f --- /dev/null +++ b/sys-utils/hwclock-rtc.c @@ -0,0 +1,599 @@ +/* + * SPDX-License-Identifier: GPL-2.0-or-later + * + * rtc.c - Use /dev/rtc for clock access + */ +#include <asm/ioctl.h> +#include <errno.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/ioctl.h> +#include <sys/select.h> +#include <sys/time.h> +#include <time.h> +#include <unistd.h> + +#include "monotonic.h" +#include "strutils.h" +#include "xalloc.h" +#include "nls.h" + +#include "hwclock.h" + +/* + * Get defines for rtc stuff. + * + * Getting the rtc defines is nontrivial. The obvious way is by including + * <linux/mc146818rtc.h> but that again includes <asm/io.h> which again + * includes ... and on sparc and alpha this gives compilation errors for + * many kernel versions. So, we give the defines ourselves here. Moreover, + * some Sparc person decided to be incompatible, and used a struct rtc_time + * different from that used in mc146818rtc.h. + */ + +/* + * On Sparcs, there is a <asm/rtc.h> that defines different ioctls (that are + * required on my machine). However, this include file does not exist on + * other architectures. + */ +/* One might do: +#ifdef __sparc__ +# include <asm/rtc.h> +#endif + */ +#ifdef __sparc__ +/* The following is roughly equivalent */ +struct sparc_rtc_time +{ + int sec; /* Seconds 0-59 */ + int min; /* Minutes 0-59 */ + int hour; /* Hour 0-23 */ + int dow; /* Day of the week 1-7 */ + int dom; /* Day of the month 1-31 */ + int month; /* Month of year 1-12 */ + int year; /* Year 0-99 */ +}; +#define RTCGET _IOR('p', 20, struct sparc_rtc_time) +#define RTCSET _IOW('p', 21, struct sparc_rtc_time) +#endif + +/* + * struct rtc_time is present since 1.3.99. + * Earlier (since 1.3.89), a struct tm was used. + */ +struct linux_rtc_time { + int tm_sec; + int tm_min; + int tm_hour; + int tm_mday; + int tm_mon; + int tm_year; + int tm_wday; + int tm_yday; + int tm_isdst; +}; + +/* RTC_RD_TIME etc have this definition since 1.99.9 (pre2.0-9) */ +#ifndef RTC_RD_TIME +# define RTC_RD_TIME _IOR('p', 0x09, struct linux_rtc_time) +# define RTC_SET_TIME _IOW('p', 0x0a, struct linux_rtc_time) +# define RTC_UIE_ON _IO('p', 0x03) /* Update int. enable on */ +# define RTC_UIE_OFF _IO('p', 0x04) /* Update int. enable off */ +#endif + +/* RTC_EPOCH_READ and RTC_EPOCH_SET are present since 2.0.34 and 2.1.89 */ +#ifndef RTC_EPOCH_READ +# define RTC_EPOCH_READ _IOR('p', 0x0d, unsigned long) /* Read epoch */ +# define RTC_EPOCH_SET _IOW('p', 0x0e, unsigned long) /* Set epoch */ +#endif + +#ifndef RTC_PARAM_GET +struct rtc_param { + uint64_t param; + union { + uint64_t uvalue; + int64_t svalue; + uint64_t ptr; + }; + uint32_t index; + uint32_t __pad; +}; + +# define RTC_PARAM_GET _IOW('p', 0x13, struct rtc_param) +# define RTC_PARAM_SET _IOW('p', 0x14, struct rtc_param) + +# define RTC_PARAM_FEATURES 0 +# define RTC_PARAM_CORRECTION 1 +# define RTC_PARAM_BACKUP_SWITCH_MODE 2 +#endif /* RTC_PARAM_GET */ + +static const struct hwclock_param hwclock_params[] = +{ + { RTC_PARAM_FEATURES, "features", N_("supported features") }, + { RTC_PARAM_CORRECTION, "correction", N_("time correction") }, + { RTC_PARAM_BACKUP_SWITCH_MODE, "bsm", N_("backup switch mode") }, + { } +}; + +const struct hwclock_param *get_hwclock_params(void) +{ + return hwclock_params; +} + +/* + * /dev/rtc is conventionally chardev 10/135 + * ia64 uses /dev/efirtc, chardev 10/136 + * devfs (obsolete) used /dev/misc/... for miscdev + * new RTC framework + udev uses dynamic major and /dev/rtc0.../dev/rtcN + * ... so we need an overridable default + */ + +/* default or user defined dev (by hwclock --rtc=<path>) */ +static const char *rtc_dev_name; +static int rtc_dev_fd = -1; + +static void close_rtc(void) +{ + if (rtc_dev_fd != -1) + close(rtc_dev_fd); + rtc_dev_fd = -1; +} + +static int open_rtc(const struct hwclock_control *ctl) +{ + static const char *fls[] = { +#ifdef __ia64__ + "/dev/efirtc", + "/dev/misc/efirtc", +#endif + "/dev/rtc0", + "/dev/rtc", + "/dev/misc/rtc" + }; + size_t i; + + if (rtc_dev_fd != -1) + return rtc_dev_fd; + + /* --rtc option has been given */ + if (ctl->rtc_dev_name) { + rtc_dev_name = ctl->rtc_dev_name; + rtc_dev_fd = open(rtc_dev_name, O_RDONLY); + } else { + for (i = 0; i < ARRAY_SIZE(fls); i++) { + if (ctl->verbose) + printf(_("Trying to open: %s\n"), fls[i]); + rtc_dev_fd = open(fls[i], O_RDONLY); + + if (rtc_dev_fd < 0) { + if (errno == ENOENT || errno == ENODEV) + continue; + if (ctl->verbose) + warn(_("cannot open %s"), fls[i]); + } + rtc_dev_name = fls[i]; + break; + } + if (rtc_dev_fd < 0) + rtc_dev_name = *fls; /* default for error messages */ + } + if (rtc_dev_fd != -1) + atexit(close_rtc); + return rtc_dev_fd; +} + +static int open_rtc_or_exit(const struct hwclock_control *ctl) +{ + int rtc_fd = open_rtc(ctl); + + if (rtc_fd < 0) { + warn(_("cannot open rtc device")); + hwclock_exit(ctl, EXIT_FAILURE); + } + return rtc_fd; +} + +static int do_rtc_read_ioctl(int rtc_fd, struct tm *tm) +{ + int rc = -1; + char *ioctlname; +#ifdef __sparc__ + /* some but not all sparcs use a different ioctl and struct */ + struct sparc_rtc_time stm; +#endif + + ioctlname = "RTC_RD_TIME"; + rc = ioctl(rtc_fd, RTC_RD_TIME, tm); + +#ifdef __sparc__ + if (rc == -1) { /* sparc sbus */ + ioctlname = "RTCGET"; + rc = ioctl(rtc_fd, RTCGET, &stm); + if (rc == 0) { + tm->tm_sec = stm.sec; + tm->tm_min = stm.min; + tm->tm_hour = stm.hour; + tm->tm_mday = stm.dom; + tm->tm_mon = stm.month - 1; + tm->tm_year = stm.year - 1900; + tm->tm_wday = stm.dow - 1; + tm->tm_yday = -1; /* day in the year */ + } + } +#endif + + if (rc == -1) { + warn(_("ioctl(%s) to %s to read the time failed"), + ioctlname, rtc_dev_name); + return -1; + } + + tm->tm_isdst = -1; /* don't know whether it's dst */ + return 0; +} + +/* + * Wait for the top of a clock tick by reading /dev/rtc in a busy loop + * until we see it. This function is used for rtc drivers without ioctl + * interrupts. This is typical on an Alpha, where the Hardware Clock + * interrupts are used by the kernel for the system clock, so aren't at + * the user's disposal. + */ +static int busywait_for_rtc_clock_tick(const struct hwclock_control *ctl, + const int rtc_fd) +{ + struct tm start_time; + /* The time when we were called (and started waiting) */ + struct tm nowtime; + int rc; + struct timeval begin = { 0 }, now = { 0 }; + + if (ctl->verbose) { + printf("ioctl(%d, RTC_UIE_ON, 0): %s\n", + rtc_fd, strerror(errno)); + printf(_("Waiting in loop for time from %s to change\n"), + rtc_dev_name); + } + + if (do_rtc_read_ioctl(rtc_fd, &start_time)) + return 1; + + /* + * Wait for change. Should be within a second, but in case + * something weird happens, we have a time limit (1.5s) on this loop + * to reduce the impact of this failure. + */ + gettime_monotonic(&begin); + do { + rc = do_rtc_read_ioctl(rtc_fd, &nowtime); + if (rc || start_time.tm_sec != nowtime.tm_sec) + break; + gettime_monotonic(&now); + if (time_diff(now, begin) > 1.5) { + warnx(_("Timed out waiting for time change.")); + return 1; + } + } while (1); + + if (rc) + return 1; + return 0; +} + +/* + * Same as synchronize_to_clock_tick(), but just for /dev/rtc. + */ +static int synchronize_to_clock_tick_rtc(const struct hwclock_control *ctl) +{ + int rtc_fd; /* File descriptor of /dev/rtc */ + int ret = 1; + + rtc_fd = open_rtc(ctl); + if (rtc_fd == -1) { + warn(_("cannot open rtc device")); + return ret; + } + + /* Turn on update interrupts (one per second) */ + int rc = ioctl(rtc_fd, RTC_UIE_ON, 0); + + if (rc != -1) { + /* + * Just reading rtc_fd fails on broken hardware: no + * update interrupt comes and a bootscript with a + * hwclock call hangs + */ + fd_set rfds; + struct timeval tv; + + /* + * Wait up to ten seconds for the next update + * interrupt + */ + FD_ZERO(&rfds); + FD_SET(rtc_fd, &rfds); + tv.tv_sec = 10; + tv.tv_usec = 0; + rc = select(rtc_fd + 1, &rfds, NULL, NULL, &tv); + if (0 < rc) + ret = 0; + else if (rc == 0) { + warnx(_("select() to %s to wait for clock tick timed out"), + rtc_dev_name); + } else + warn(_("select() to %s to wait for clock tick failed"), + rtc_dev_name); + /* Turn off update interrupts */ + rc = ioctl(rtc_fd, RTC_UIE_OFF, 0); + if (rc == -1) + warn(_("ioctl() to %s to turn off update interrupts failed"), + rtc_dev_name); + } else if (errno == ENOTTY || errno == EINVAL) { + /* rtc ioctl interrupts are unimplemented */ + ret = busywait_for_rtc_clock_tick(ctl, rtc_fd); + } else + warn(_("ioctl(%d, RTC_UIE_ON, 0) to %s failed"), + rtc_fd, rtc_dev_name); + return ret; +} + +static int read_hardware_clock_rtc(const struct hwclock_control *ctl, + struct tm *tm) +{ + int rtc_fd, rc; + + rtc_fd = open_rtc_or_exit(ctl); + + /* Read the RTC time/date, return answer via tm */ + rc = do_rtc_read_ioctl(rtc_fd, tm); + + return rc; +} + +/* + * Set the Hardware Clock to the broken down time <new_broken_time>. Use + * ioctls to "rtc" device /dev/rtc. + */ +static int set_hardware_clock_rtc(const struct hwclock_control *ctl, + const struct tm *new_broken_time) +{ + int rc = -1; + int rtc_fd; + char *ioctlname; + + rtc_fd = open_rtc_or_exit(ctl); + + ioctlname = "RTC_SET_TIME"; + rc = ioctl(rtc_fd, RTC_SET_TIME, new_broken_time); + +#ifdef __sparc__ + if (rc == -1) { /* sparc sbus */ + struct sparc_rtc_time stm; + + stm.sec = new_broken_time->tm_sec; + stm.min = new_broken_time->tm_min; + stm.hour = new_broken_time->tm_hour; + stm.dom = new_broken_time->tm_mday; + stm.month = new_broken_time->tm_mon + 1; + stm.year = new_broken_time->tm_year + 1900; + stm.dow = new_broken_time->tm_wday + 1; + + ioctlname = "RTCSET"; + rc = ioctl(rtc_fd, RTCSET, &stm); + } +#endif + + if (rc == -1) { + warn(_("ioctl(%s) to %s to set the time failed"), + ioctlname, rtc_dev_name); + hwclock_exit(ctl, EXIT_FAILURE); + } + + if (ctl->verbose) + printf(_("ioctl(%s) was successful.\n"), ioctlname); + + return 0; +} + +static int get_permissions_rtc(void) +{ + return 0; +} + +static const char *get_device_path(void) +{ + return rtc_dev_name; +} + +static struct clock_ops rtc_interface = { + N_("Using the rtc interface to the clock."), + get_permissions_rtc, + read_hardware_clock_rtc, + set_hardware_clock_rtc, + synchronize_to_clock_tick_rtc, + get_device_path, +}; + +/* return &rtc if /dev/rtc can be opened, NULL otherwise */ +struct clock_ops *probe_for_rtc_clock(const struct hwclock_control *ctl) +{ + const int rtc_fd = open_rtc(ctl); + + if (rtc_fd < 0) + return NULL; + return &rtc_interface; +} + +#ifdef __alpha__ +/* + * Get the Hardware Clock epoch setting from the kernel. + */ +int get_epoch_rtc(const struct hwclock_control *ctl, unsigned long *epoch_p) +{ + int rtc_fd; + + rtc_fd = open_rtc(ctl); + if (rtc_fd < 0) { + warn(_("cannot open %s"), rtc_dev_name); + return 1; + } + + if (ioctl(rtc_fd, RTC_EPOCH_READ, epoch_p) == -1) { + warn(_("ioctl(%d, RTC_EPOCH_READ, epoch_p) to %s failed"), + rtc_fd, rtc_dev_name); + return 1; + } + + if (ctl->verbose) + printf(_("ioctl(%d, RTC_EPOCH_READ, epoch_p) to %s succeeded.\n"), + rtc_fd, rtc_dev_name); + + return 0; +} + +/* + * Set the Hardware Clock epoch in the kernel. + */ +int set_epoch_rtc(const struct hwclock_control *ctl) +{ + int rtc_fd; + unsigned long epoch; + + errno = 0; + epoch = strtoul(ctl->epoch_option, NULL, 10); + + /* There were no RTC clocks before 1900. */ + if (errno || epoch < 1900 || epoch == ULONG_MAX) { + warnx(_("invalid epoch '%s'."), ctl->epoch_option); + return 1; + } + + rtc_fd = open_rtc(ctl); + if (rtc_fd < 0) { + warn(_("cannot open %s"), rtc_dev_name); + return 1; + } + + if (ioctl(rtc_fd, RTC_EPOCH_SET, epoch) == -1) { + warn(_("ioctl(%d, RTC_EPOCH_SET, %lu) to %s failed"), + rtc_fd, epoch, rtc_dev_name); + return 1; + } + + if (ctl->verbose) + printf(_("ioctl(%d, RTC_EPOCH_SET, %lu) to %s succeeded.\n"), + rtc_fd, epoch, rtc_dev_name); + + return 0; +} +#endif /* __alpha__ */ + + + +static int resolve_rtc_param_alias(const char *alias, uint64_t *value) +{ + const struct hwclock_param *param = &hwclock_params[0]; + + while (param->name) { + if (!strcmp(alias, param->name)) { + *value = param->id; + return 0; + } + param++; + } + + return 1; +} + +/* + * Get the Hardware Clock parameter setting from the kernel. + */ +int get_param_rtc(const struct hwclock_control *ctl, + const char *name, uint64_t *id, uint64_t *value) +{ + int rtc_fd; + struct rtc_param param = { .param = 0 }; + + /* handle name */ + if (resolve_rtc_param_alias(name, ¶m.param) != 0 + && ul_strtou64(name, ¶m.param, 0) != 0) { + warnx(_("could not convert parameter name to number")); + return 1; + } + + /* get parameter */ + rtc_fd = open_rtc(ctl); + if (rtc_fd < 0) { + warn(_("cannot open %s"), rtc_dev_name); + return 1; + } + + if (ioctl(rtc_fd, RTC_PARAM_GET, ¶m) == -1) { + warn(_("ioctl(%d, RTC_PARAM_GET, param) to %s failed"), + rtc_fd, rtc_dev_name); + return 1; + } + + if (id) + *id = param.param; + if (value) + *value = param.uvalue; + + if (ctl->verbose) + printf(_("ioctl(%d, RTC_PARAM_GET, param) to %s succeeded.\n"), + rtc_fd, rtc_dev_name); + + return 0; +} + +/* + * Set the Hardware Clock parameter in the kernel. + */ +int set_param_rtc(const struct hwclock_control *ctl, const char *opt0) +{ + int rtc_fd, rc = 1; + struct rtc_param param = { .param = 0 }; + char *tok, *opt = xstrdup(opt0); + + /* handle name */ + tok = strtok(opt, "="); + if (resolve_rtc_param_alias(tok, ¶m.param) != 0 + && ul_strtou64(tok, ¶m.param, 0) != 0) { + warnx(_("could not convert parameter name to number")); + goto done; + } + + /* handle value */ + tok = strtok(NULL, "="); + if (!tok) { + warnx(_("expected <param>=<value>")); + goto done; + } + if (ul_strtou64(tok, ¶m.uvalue, 0) != 0) { + warnx(_("could not convert parameter value to number")); + goto done; + } + + /* set parameter */ + rtc_fd = open_rtc(ctl); + if (rtc_fd < 0) { + warnx(_("cannot open %s"), rtc_dev_name); + return 1; + } + + if (ioctl(rtc_fd, RTC_PARAM_SET, ¶m) == -1) { + warn(_("ioctl(%d, RTC_PARAM_SET, param) to %s failed"), + rtc_fd, rtc_dev_name); + goto done; + } + + if (ctl->verbose) + printf(_("ioctl(%d, RTC_PARAM_SET, param) to %s succeeded.\n"), + rtc_fd, rtc_dev_name); + + rc = 0; +done: + free(opt); + return rc; +} diff --git a/sys-utils/hwclock.8 b/sys-utils/hwclock.8 new file mode 100644 index 0000000..9ea2b53 --- /dev/null +++ b/sys-utils/hwclock.8 @@ -0,0 +1,597 @@ +'\" t +.\" Title: hwclock +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: System Administration +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "HWCLOCK" "8" "2022-05-11" "util\-linux 2.38.1" "System Administration" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +hwclock \- time clocks utility +.SH "SYNOPSIS" +.sp +\fBhwclock\fP [\fIfunction\fP] [\fIoption\fP...] +.SH "DESCRIPTION" +.sp +\fBhwclock\fP is an administration tool for the time clocks. It can: display the Hardware Clock time; set the Hardware Clock to a specified time; set the Hardware Clock from the System Clock; set the System Clock from the Hardware Clock; compensate for Hardware Clock drift; correct the System Clock timescale; set the kernel\(cqs timezone, NTP timescale, and epoch (Alpha only); and predict future Hardware Clock values based on its drift rate. +.sp +Since v2.26 important changes were made to the \fB\-\-hctosys\fP function and the \fB\-\-directisa\fP option, and a new option \fB\-\-update\-drift\fP was added. See their respective descriptions below. +.SH "FUNCTIONS" +.sp +The following functions are mutually exclusive, only one can be given at a time. If none is given, the default is \fB\-\-show\fP. +.sp +\fB\-a, \-\-adjust\fP +.RS 4 +Add or subtract time from the Hardware Clock to account for systematic drift since the last time the clock was set or adjusted. See the discussion below, under \fBThe Adjust Function\fP. +.RE +.sp +\fB\-\-getepoch\fP; \fB\-\-setepoch\fP +.RS 4 +These functions are for Alpha machines only, and are only available through the Linux kernel RTC driver. +.sp +They are used to read and set the kernel\(cqs Hardware Clock epoch value. Epoch is the number of years into AD to which a zero year value in the Hardware Clock refers. For example, if the machine\(cqs BIOS sets the year counter in the Hardware Clock to contain the number of full years since 1952, then the kernel\(cqs Hardware Clock epoch value must be 1952. +.sp +The \fB\-\-setepoch\fP function requires using the \fB\-\-epoch\fP option to specify the year. For example: +.sp +\fBhwclock \-\-setepoch \-\-epoch=1952\fP +.sp +The RTC driver attempts to guess the correct epoch value, so setting it may not be required. +.sp +This epoch value is used whenever \fBhwclock\fP reads or sets the Hardware Clock on an Alpha machine. For ISA machines the kernel uses the fixed Hardware Clock epoch of 1900. +.RE +.sp +\fB\-\-param\-get=\fP\fIparameter\fP; \fB\-\-param\-set=\fP\fIparameter\fP=\fIvalue\fP +.RS 4 +Read and set the RTC\(cqs parameter. This is useful, for example, to retrieve the RTC\(cqs feature or set the RTC\(cqs Backup Switchover Mode. +.sp +\fIparameter\fP is either a numeric RTC parameter value (see the Kernel\(cqs \fIinclude/uapi/linux/rtc.h\fP) or an alias. See \fB\-\-help\fP for a list of valid aliases. \fIparameter\fP and \fIvalue\fP, if prefixed with 0x, are interpreted as hexadecimal, otherwise decimal values. +.RE +.sp +\fB\-\-predict\fP +.RS 4 +Predict what the Hardware Clock will read in the future based upon the time given by the \fB\-\-date\fP option and the information in \fI/etc/adjtime\fP. This is useful, for example, to account for drift when setting a Hardware Clock wakeup (aka alarm). See \fBrtcwake\fP(8). +.sp +Do not use this function if the Hardware Clock is being modified by anything other than the current operating system\(cqs \fBhwclock\fP command, such as \(aq11 minute mode\(aq or from dual\-booting another OS. +.RE +.sp +\fB\-r\fP, \fB\-\-show\fP; \fB\-\-get\fP +.RS 4 +Read the Hardware Clock and print its time to standard output in the \fBISO 8601\fP format. The time shown is always in local time, even if you keep your Hardware Clock in UTC. See the \fB\-\-localtime\fP option. +.sp +Showing the Hardware Clock time is the default when no function is specified. +.sp +The \fB\-\-get\fP function also applies drift correction to the time read, based upon the information in \fI/etc/adjtime\fP. Do not use this function if the Hardware Clock is being modified by anything other than the current operating system\(cqs \fBhwclock\fP command, such as \(aq11 minute mode\(aq or from dual\-booting another OS. +.RE +.sp +\fB\-s\fP, \fB\-\-hctosys\fP +.RS 4 +Set the System Clock from the Hardware Clock. The time read from the Hardware Clock is compensated to account for systematic drift before using it to set the System Clock. See the discussion below, under \fBThe Adjust Function\fP. +.sp +The System Clock must be kept in the UTC timescale for date\-time applications to work correctly in conjunction with the timezone configured for the system. If the Hardware Clock is kept in local time then the time read from it must be shifted to the UTC timescale before using it to set the System Clock. The \fB\-\-hctosys\fP function does this based upon the information in the \fI/etc/adjtime\fP file or the command line arguments \fB\-\-localtime\fP and \fB\-\-utc\fP. Note: no daylight saving adjustment is made. See the discussion below, under \fBLOCAL vs UTC\fP. +.sp +The kernel also keeps a timezone value, the \fB\-\-hctosys\fP function sets it to the timezone configured for the system. The system timezone is configured by the \fBTZ\fP environment variable or the \fI/etc/localtime\fP file, as \fBtzset\fP(3) would interpret them. The obsolete \fItz_dsttime\fP field of the kernel\(cqs timezone value is set to zero. (For details on what this field used to mean, see \fBsettimeofday\fP(2).) +.sp +When used in a startup script, making the \fB\-\-hctosys\fP function the first caller of \fBsettimeofday\fP(2) from boot, it will set the NTP \(aq11 minute mode\(aq timescale via the \fIpersistent_clock_is_local\fP kernel variable. If the Hardware Clock\(cqs timescale configuration is changed then a reboot is required to inform the kernel. See the discussion below, under \fBAutomatic Hardware Clock Synchronization by the Kernel\fP. +.sp +This is a good function to use in one of the system startup scripts before the file systems are mounted read/write. +.sp +This function should never be used on a running system. Jumping system time will cause problems, such as corrupted filesystem timestamps. Also, if something has changed the Hardware Clock, like NTP\(cqs \(aq11 minute mode\(aq, then \fB\-\-hctosys\fP will set the time incorrectly by including drift compensation. +.sp +Drift compensation can be inhibited by setting the drift factor in \fI/etc/adjtime\fP to zero. This setting will be persistent as long as the \fB\-\-update\-drift\fP option is not used with \fB\-\-systohc\fP at shutdown (or anywhere else). Another way to inhibit this is by using the \fB\-\-noadjfile\fP option when calling the \fB\-\-hctosys\fP function. A third method is to delete the \fI/etc/adjtime\fP file. \fBHwclock\fP will then default to using the UTC timescale for the Hardware Clock. If the Hardware Clock is ticking local time it will need to be defined in the file. This can be done by calling \fBhwclock \-\-localtime \-\-adjust\fP; when the file is not present this command will not actually adjust the Clock, but it will create the file with local time configured, and a drift factor of zero. +.sp +A condition under which inhibiting \fBhwclock\fP\(aqs drift correction may be desired is when dual\-booting multiple operating systems. If while this instance of Linux is stopped, another OS changes the Hardware Clock\(cqs value, then when this instance is started again the drift correction applied will be incorrect. +.sp +For \fBhwclock\fP\(aqs drift correction to work properly it is imperative that nothing changes the Hardware Clock while its Linux instance is not running. +.RE +.sp +\fB\-\-set\fP +.RS 4 +Set the Hardware Clock to the time given by the \fB\-\-date\fP option, and update the timestamps in \fI/etc/adjtime\fP. With the \fB\-\-update\-drift\fP option also (re)calculate the drift factor. Try it without the option if \fB\-\-set\fP fails. See \fB\-\-update\-drift\fP below. +.RE +.sp +\fB\-\-systz\fP +.RS 4 +This is an alternate to the \fB\-\-hctosys\fP function that does not read the Hardware Clock nor set the System Clock; consequently there is not any drift correction. It is intended to be used in a startup script on systems with kernels above version 2.6 where you know the System Clock has been set from the Hardware Clock by the kernel during boot. +.sp +It does the following things that are detailed above in the \fB\-\-hctosys\fP function: +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +Corrects the System Clock timescale to UTC as needed. Only instead of accomplishing this by setting the System Clock, \fBhwclock\fP simply informs the kernel and it handles the change. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +Sets the kernel\(cqs NTP \(aq11 minute mode\(aq timescale. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +Sets the kernel\(cqs timezone. +.RE +.RE +.sp +The first two are only available on the first call of \fBsettimeofday\fP(2) after boot. Consequently this option only makes sense when used in a startup script. If the Hardware Clocks timescale configuration is changed then a reboot would be required to inform the kernel. +.sp +\fB\-w\fP, \fB\-\-systohc\fP +.RS 4 +Set the Hardware Clock from the System Clock, and update the timestamps in \fI/etc/adjtime\fP. With the \fB\-\-update\-drift\fP option also (re)calculate the drift factor. Try it without the option if \fB\-\-systohc\fP fails. See \fB\-\-update\-drift\fP below. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "OPTIONS" +.sp +\fB\-\-adjfile=\fP\fIfilename\fP +.RS 4 +Override the default \fI/etc/adjtime\fP file path. +.RE +.sp +\fB\-\-date=\fP\fIdate_string\fP +.RS 4 +This option must be used with the \fB\-\-set\fP or \fB\-\-predict\fP functions, otherwise it is ignored. +.sp +\fBhwclock \-\-set \-\-date=\(aq16:45\(aq\fP +.sp +\fBhwclock \-\-predict \-\-date=\(aq2525\-08\-14 07:11:05\(aq\fP +.sp +The argument must be in local time, even if you keep your Hardware Clock in UTC. See the \fB\-\-localtime\fP option. Therefore, the argument should not include any timezone information. It also should not be a relative time like "+5 minutes", because \fBhwclock\fP\(aqs precision depends upon correlation between the argument\(cqs value and when the enter key is pressed. Fractional seconds are silently dropped. This option is capable of understanding many time and date formats, but the previous parameters should be observed. +.RE +.sp +\fB\-\-delay=\fP\fIseconds\fP +.RS 4 +This option can be used to overwrite the internally used delay when setting the clock time. The default is 0.5 (500ms) for rtc_cmos, for another RTC types the delay is 0. If RTC type is impossible to determine (from sysfs) then it defaults also to 0.5 to be backwardly compatible. +.sp +The 500ms default is based on commonly used MC146818A\-compatible (x86) hardware clock. This Hardware Clock can only be set to any integer time plus one half second. The integer time is required because there is no interface to set or get a fractional second. The additional half second delay is because the Hardware Clock updates to the following second precisely 500 ms after setting the new time. Unfortunately, this behavior is hardware specific and in same cases another delay is required. +.RE +.sp +\fB\-D\fP, \fB\-\-debug\fP +.RS 4 +Use \fB\-\-verbose\fP. The \fB\-\-debug\fP option has been deprecated and may be repurposed or removed in a future release. +.RE +.sp +\fB\-\-directisa\fP +.RS 4 +This option is meaningful for ISA compatible machines in the x86 and x86_64 family. For other machines, it has no effect. This option tells \fBhwclock\fP to use explicit I/O instructions to access the Hardware Clock. Without this option, \fBhwclock\fP will use the rtc device file, which it assumes to be driven by the Linux RTC device driver. As of v2.26 it will no longer automatically use directisa when the rtc driver is unavailable; this was causing an unsafe condition that could allow two processes to access the Hardware Clock at the same time. Direct hardware access from userspace should only be used for testing, troubleshooting, and as a last resort when all other methods fail. See the \fB\-\-rtc\fP option. +.RE +.sp +\fB\-\-epoch=\fP\fIyear\fP +.RS 4 +This option is required when using the \fB\-\-setepoch\fP function. The minimum \fIyear\fP value is 1900. The maximum is system dependent (\fBULONG_MAX \- 1\fP). +.RE +.sp +\fB\-f\fP, \fB\-\-rtc=\fP\fIfilename\fP +.RS 4 +Override \fBhwclock\fP\(aqs default rtc device file name. Otherwise it will use the first one found in this order: \fI/dev/rtc0\fP, \fI/dev/rtc\fP, \fI/dev/misc/rtc\fP. For \fBIA\-64:\fP \fI/dev/efirtc\fP \fI/dev/misc/efirtc\fP +.RE +.sp +\fB\-l\fP, \fB\-\-localtime\fP; \fB\-u\fP, \fB\-\-utc\fP +.RS 4 +Indicate which timescale the Hardware Clock is set to. +.sp +The Hardware Clock may be configured to use either the UTC or the local timescale, but nothing in the clock itself says which alternative is being used. The \fB\-\-localtime\fP or \fB\-\-utc\fP options give this information to the \fBhwclock\fP command. If you specify the wrong one (or specify neither and take a wrong default), both setting and reading the Hardware Clock will be incorrect. +.sp +If you specify neither \fB\-\-utc\fP nor \fB\-\-localtime\fP then the one last given with a set function (\fB\-\-set\fP, \fB\-\-systohc\fP, or \fB\-\-adjust\fP), as recorded in \fI/etc/adjtime\fP, will be used. If the adjtime file doesn\(cqt exist, the default is UTC. +.sp +Note: daylight saving time changes may be inconsistent when the Hardware Clock is kept in local time. See the discussion below, under \fBLOCAL vs UTC\fP. +.RE +.sp +\fB\-\-noadjfile\fP +.RS 4 +Disable the facilities provided by \fI/etc/adjtime\fP. \fBhwclock\fP will not read nor write to that file with this option. Either \fB\-\-utc\fP or \fB\-\-localtime\fP must be specified when using this option. +.RE +.sp +\fB\-\-test\fP +.RS 4 +Do not actually change anything on the system, that is, the Clocks or \fI/etc/adjtime\fP (\fB\-\-verbose\fP is implicit with this option). +.RE +.sp +\fB\-\-update\-drift\fP +.RS 4 +Update the Hardware Clock\(cqs drift factor in \fI/etc/adjtime\fP. It can only be used with \fB\-\-set\fP or \fB\-\-systohc\fP. +.sp +A minimum four hour period between settings is required. This is to avoid invalid calculations. The longer the period, the more precise the resulting drift factor will be. +.sp +This option was added in v2.26, because it is typical for systems to call \fBhwclock \-\-systohc\fP at shutdown; with the old behavior this would automatically (re)calculate the drift factor which caused several problems: +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +When using NTP with an \(aq11 minute mode\(aq kernel the drift factor would be clobbered to near zero. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +It would not allow the use of \(aqcold\(aq drift correction. With most configurations using \(aqcold\(aq drift will yield favorable results. Cold, means when the machine is turned off which can have a significant impact on the drift factor. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +(Re)calculating drift factor on every shutdown delivers suboptimal results. For example, if ephemeral conditions cause the machine to be abnormally hot the drift factor calculation would be out of range. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +Significantly increased system shutdown times (as of v2.31 when not using \fB\-\-update\-drift\fP the RTC is not read). +.RE +.RE +.sp +Having \fBhwclock\fP calculate the drift factor is a good starting point, but for optimal results it will likely need to be adjusted by directly editing the \fI/etc/adjtime\fP file. For most configurations once a machine\(cqs optimal drift factor is crafted it should not need to be changed. Therefore, the old behavior to automatically (re)calculate drift was changed and now requires this option to be used. See the discussion below, under \fBThe Adjust Function\fP. +.sp +This option requires reading the Hardware Clock before setting it. If it cannot be read, then this option will cause the set functions to fail. This can happen, for example, if the Hardware Clock is corrupted by a power failure. In that case, the clock must first be set without this option. Despite it not working, the resulting drift correction factor would be invalid anyway. +.sp +\fB\-v\fP, \fB\-\-verbose\fP +.RS 4 +Display more details about what \fBhwclock\fP is doing internally. +.RE +.SH "NOTES" +.SS "Clocks in a Linux System" +.sp +There are two types of date\-time clocks: +.sp +\fBThe Hardware Clock:\fP This clock is an independent hardware device, with its own power domain (battery, capacitor, etc), that operates when the machine is powered off, or even unplugged. +.sp +On an ISA compatible system, this clock is specified as part of the ISA standard. A control program can read or set this clock only to a whole second, but it can also detect the edges of the 1 second clock ticks, so the clock actually has virtually infinite precision. +.sp +This clock is commonly called the hardware clock, the real time clock, the RTC, the BIOS clock, and the CMOS clock. Hardware Clock, in its capitalized form, was coined for use by \fBhwclock\fP. The Linux kernel also refers to it as the persistent clock. +.sp +Some non\-ISA systems have a few real time clocks with only one of them having its own power domain. A very low power external I2C or SPI clock chip might be used with a backup battery as the hardware clock to initialize a more functional integrated real\-time clock which is used for most other purposes. +.sp +\fBThe System Clock:\fP This clock is part of the Linux kernel and is driven by a timer interrupt. (On an ISA machine, the timer interrupt is part of the ISA standard.) It has meaning only while Linux is running on the machine. The System Time is the number of seconds since 00:00:00 January 1, 1970 UTC (or more succinctly, the number of seconds since 1969 UTC). The System Time is not an integer, though. It has virtually infinite precision. +.sp +The System Time is the time that matters. The Hardware Clock\(cqs basic purpose is to keep time when Linux is not running so that the System Clock can be initialized from it at boot. Note that in DOS, for which ISA was designed, the Hardware Clock is the only real time clock. +.sp +It is important that the System Time not have any discontinuities such as would happen if you used the \fBdate\fP(1) program to set it while the system is running. You can, however, do whatever you want to the Hardware Clock while the system is running, and the next time Linux starts up, it will do so with the adjusted time from the Hardware Clock. Note: currently this is not possible on most systems because \fBhwclock \-\-systohc\fP is called at shutdown. +.sp +The Linux kernel\(cqs timezone is set by \fBhwclock\fP. But don\(cqt be misled \(em almost nobody cares what timezone the kernel thinks it is in. Instead, programs that care about the timezone (perhaps because they want to display a local time for you) almost always use a more traditional method of determining the timezone: They use the \fBTZ\fP environment variable or the \fI/etc/localtime\fP file, as explained in the man page for \fBtzset\fP(3). However, some programs and fringe parts of the Linux kernel such as filesystems use the kernel\(cqs timezone value. An example is the vfat filesystem. If the kernel timezone value is wrong, the vfat filesystem will report and set the wrong timestamps on files. Another example is the kernel\(cqs NTP \(aq11 minute mode\(aq. If the kernel\(cqs timezone value and/or the \fIpersistent_clock_is_local\fP variable are wrong, then the Hardware Clock will be set incorrectly by \(aq11 minute mode\(aq. See the discussion below, under \fBAutomatic Hardware Clock Synchronization by the Kernel\fP. +.sp +\fBhwclock\fP sets the kernel\(cqs timezone to the value indicated by \fBTZ\fP or \fI/etc/localtime\fP with the \fB\-\-hctosys\fP or \fB\-\-systz\fP functions. +.sp +The kernel\(cqs timezone value actually consists of two parts: 1) a field tz_minuteswest indicating how many minutes local time (not adjusted for DST) lags behind UTC, and 2) a field tz_dsttime indicating the type of Daylight Savings Time (DST) convention that is in effect in the locality at the present time. This second field is not used under Linux and is always zero. See also \fBsettimeofday\fP(2). +.SS "Hardware Clock Access Methods" +.sp +\fBhwclock\fP uses many different ways to get and set Hardware Clock values. The most normal way is to do I/O to the rtc device special file, which is presumed to be driven by the rtc device driver. Also, Linux systems using the rtc framework with udev, are capable of supporting multiple Hardware Clocks. This may bring about the need to override the default rtc device by specifying one with the \fB\-\-rtc\fP option. +.sp +However, this method is not always available as older systems do not have an rtc driver. On these systems, the method of accessing the Hardware Clock depends on the system hardware. +.sp +On an ISA compatible system, \fBhwclock\fP can directly access the "CMOS memory" registers that constitute the clock, by doing I/O to Ports 0x70 and 0x71. It does this with actual I/O instructions and consequently can only do it if running with superuser effective userid. This method may be used by specifying the \fB\-\-directisa\fP option. +.sp +This is a really poor method of accessing the clock, for all the reasons that userspace programs are generally not supposed to do direct I/O and disable interrupts. \fBhwclock\fP provides it for testing, troubleshooting, and because it may be the only method available on ISA systems which do not have a working rtc device driver. +.SS "The Adjust Function" +.sp +The Hardware Clock is usually not very accurate. However, much of its inaccuracy is completely predictable \- it gains or loses the same amount of time every day. This is called systematic drift. \fBhwclock\fP\(aqs \fB\-\-adjust\fP function lets you apply systematic drift corrections to the Hardware Clock. +.sp +It works like this: \fBhwclock\fP keeps a file, \fI/etc/adjtime\fP, that keeps some historical information. This is called the adjtime file. +.sp +Suppose you start with no adjtime file. You issue a \fBhwclock \-\-set\fP command to set the Hardware Clock to the true current time. \fBhwclock\fP creates the adjtime file and records in it the current time as the last time the clock was calibrated. Five days later, the clock has gained 10 seconds, so you issue a \fBhwclock \-\-set \-\-update\-drift\fP command to set it back 10 seconds. \fBhwclock\fP updates the adjtime file to show the current time as the last time the clock was calibrated, and records 2 seconds per day as the systematic drift rate. 24 hours go by, and then you issue a \fBhwclock \-\-adjust\fP command. \fBhwclock\fP consults the adjtime file and sees that the clock gains 2 seconds per day when left alone and that it has been left alone for exactly one day. So it subtracts 2 seconds from the Hardware Clock. It then records the current time as the last time the clock was adjusted. Another 24 hours go by and you issue another \fBhwclock \-\-adjust\fP. \fBhwclock\fP does the same thing: subtracts 2 seconds and updates the adjtime file with the current time as the last time the clock was adjusted. +.sp +When you use the \fB\-\-update\-drift\fP option with \fB\-\-set\fP or \fB\-\-systohc\fP, the systematic drift rate is (re)calculated by comparing the fully drift corrected current Hardware Clock time with the new set time, from that it derives the 24 hour drift rate based on the last calibrated timestamp from the adjtime file. This updated drift factor is then saved in \fI/etc/adjtime\fP. +.sp +A small amount of error creeps in when the Hardware Clock is set, so \fB\-\-adjust\fP refrains from making any adjustment that is less than 1 second. Later on, when you request an adjustment again, the accumulated drift will be more than 1 second and \fB\-\-adjust\fP will make the adjustment including any fractional amount. +.sp +\fBhwclock \-\-hctosys\fP also uses the adjtime file data to compensate the value read from the Hardware Clock before using it to set the System Clock. It does not share the 1 second limitation of \fB\-\-adjust\fP, and will correct sub\-second drift values immediately. It does not change the Hardware Clock time nor the adjtime file. This may eliminate the need to use \fB\-\-adjust\fP, unless something else on the system needs the Hardware Clock to be compensated. +.SS "The Adjtime File" +.sp +While named for its historical purpose of controlling adjustments only, it actually contains other information used by \fBhwclock\fP from one invocation to the next. +.sp +The format of the adjtime file is, in ASCII: +.sp +Line 1: Three numbers, separated by blanks: 1) the systematic drift rate in seconds per day, floating point decimal; 2) the resulting number of seconds since 1969 UTC of most recent adjustment or calibration, decimal integer; 3) zero (for compatibility with \fBclock\fP(8)) as a floating point decimal. +.sp +Line 2: One number: the resulting number of seconds since 1969 UTC of most recent calibration. Zero if there has been no calibration yet or it is known that any previous calibration is moot (for example, because the Hardware Clock has been found, since that calibration, not to contain a valid time). This is a decimal integer. +.sp +Line 3: "UTC" or "LOCAL". Tells whether the Hardware Clock is set to Coordinated Universal Time or local time. You can always override this value with options on the \fBhwclock\fP command line. +.sp +You can use an adjtime file that was previously used with the \fBclock\fP(8) program with \fBhwclock\fP. +.SS "Automatic Hardware Clock Synchronization by the Kernel" +.sp +You should be aware of another way that the Hardware Clock is kept synchronized in some systems. The Linux kernel has a mode wherein it copies the System Time to the Hardware Clock every 11 minutes. This mode is a compile time option, so not all kernels will have this capability. This is a good mode to use when you are using something sophisticated like NTP to keep your System Clock synchronized. (NTP is a way to keep your System Time synchronized either to a time server somewhere on the network or to a radio clock hooked up to your system. See RFC 1305.) +.sp +If the kernel is compiled with the \(aq11 minute mode\(aq option it will be active when the kernel\(cqs clock discipline is in a synchronized state. When in this state, bit 6 (the bit that is set in the mask 0x0040) of the kernel\(cqs \fItime_status\fP variable is unset. This value is output as the \(aqstatus\(aq line of the \fBadjtimex \-\-print\fP or \fBntptime\fP commands. +.sp +It takes an outside influence, like the NTP daemon to put the kernel\(cqs clock discipline into a synchronized state, and therefore turn on \(aq11 minute mode\(aq. It can be turned off by running anything that sets the System Clock the old fashioned way, including \fBhwclock \-\-hctosys\fP. However, if the NTP daemon is still running, it will turn \(aq11 minute mode\(aq back on again the next time it synchronizes the System Clock. +.sp +If your system runs with \(aq11 minute mode\(aq on, it may need to use either \fB\-\-hctosys\fP or \fB\-\-systz\fP in a startup script, especially if the Hardware Clock is configured to use the local timescale. Unless the kernel is informed of what timescale the Hardware Clock is using, it may clobber it with the wrong one. The kernel uses UTC by default. +.sp +The first userspace command to set the System Clock informs the kernel what timescale the Hardware Clock is using. This happens via the \fIpersistent_clock_is_local\fP kernel variable. If \fB\-\-hctosys\fP or \fB\-\-systz\fP is the first, it will set this variable according to the adjtime file or the appropriate command\-line argument. Note that when using this capability and the Hardware Clock timescale configuration is changed, then a reboot is required to notify the kernel. +.sp +\fBhwclock \-\-adjust\fP should not be used with NTP \(aq11 minute mode\(aq. +.SS "ISA Hardware Clock Century value" +.sp +There is some sort of standard that defines CMOS memory Byte 50 on an ISA machine as an indicator of what century it is. \fBhwclock\fP does not use or set that byte because there are some machines that don\(cqt define the byte that way, and it really isn\(cqt necessary anyway, since the year\-of\-century does a good job of implying which century it is. +.sp +If you have a bona fide use for a CMOS century byte, contact the \fBhwclock\fP maintainer; an option may be appropriate. +.sp +Note that this section is only relevant when you are using the "direct ISA" method of accessing the Hardware Clock. ACPI provides a standard way to access century values, when they are supported by the hardware. +.SH "DATE\-TIME CONFIGURATION" +.SS "Keeping Time without External Synchronization" +.sp +This discussion is based on the following conditions: +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +Nothing is running that alters the date\-time clocks, such as NTP daemon or a cron job." +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +The system timezone is configured for the correct local time. See below, under \fBPOSIX vs \(aqRIGHT\(aq\fP. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +Early during startup the following are called, in this order: \fBadjtimex \-\-tick\fP \fIvalue\fP \fB\-\-frequency\fP \fIvalue\fP \fBhwclock \-\-hctosys\fP +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +During shutdown the following is called: \fBhwclock \-\-systohc\fP +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +Systems without \fBadjtimex\fP may use \fBntptime\fP. +.RE +.RE +.sp +Whether maintaining precision time with NTP daemon or not, it makes sense to configure the system to keep reasonably good date\-time on its own. +.sp +The first step in making that happen is having a clear understanding of the big picture. There are two completely separate hardware devices running at their own speed and drifting away from the \(aqcorrect\(aq time at their own rates. The methods and software for drift correction are different for each of them. However, most systems are configured to exchange values between these two clocks at startup and shutdown. Now the individual device\(cqs time keeping errors are transferred back and forth between each other. Attempt to configure drift correction for only one of them, and the other\(cqs drift will be overlaid upon it. +.sp +This problem can be avoided when configuring drift correction for the System Clock by simply not shutting down the machine. This, plus the fact that all of \fBhwclock\fP\(aqs precision (including calculating drift factors) depends upon the System Clock\(cqs rate being correct, means that configuration of the System Clock should be done first. +.sp +The System Clock drift is corrected with the \fBadjtimex\fP(8) command\(cqs \fB\-\-tick\fP and \fB\-\-frequency\fP options. These two work together: tick is the coarse adjustment and frequency is the fine adjustment. (For systems that do not have an \fBadjtimex\fP package, \fBntptime \-f\fP \fIppm\fP may be used instead.) +.sp +Some Linux distributions attempt to automatically calculate the System Clock drift with \fBadjtimex\fP\(aqs compare operation. Trying to correct one drifting clock by using another drifting clock as a reference is akin to a dog trying to catch its own tail. Success may happen eventually, but great effort and frustration will likely precede it. This automation may yield an improvement over no configuration, but expecting optimum results would be in error. A better choice for manual configuration would be \fBadjtimex\fP\(aqs \fB\-\-log\fP options. +.sp +It may be more effective to simply track the System Clock drift with \fBsntp\fP, or \fBdate \-Ins\fP and a precision timepiece, and then calculate the correction manually. +.sp +After setting the tick and frequency values, continue to test and refine the adjustments until the System Clock keeps good time. See \fBadjtimex\fP(2) for more information and the example demonstrating manual drift calculations. +.sp +Once the System Clock is ticking smoothly, move on to the Hardware Clock. +.sp +As a rule, cold drift will work best for most use cases. This should be true even for 24/7 machines whose normal downtime consists of a reboot. In that case the drift factor value makes little difference. But on the rare occasion that the machine is shut down for an extended period, then cold drift should yield better results. +.sp +\fBSteps to calculate cold drift:\fP +.sp +1 +.RS 4 +\fBEnsure that NTP daemon will not be launched at startup.\fP +.RE +.sp +2 +.RS 4 +The \fISystem Clock\fP time must be correct at shutdown! +.RE +.sp +3 +.RS 4 +Shut down the system. +.RE +.sp +4 +.RS 4 +Let an extended period pass without changing the Hardware Clock. +.RE +.sp +5 +.RS 4 +Start the system. +.RE +.sp +6 +.RS 4 +Immediately use \fBhwclock\fP to set the correct time, adding the \fB\-\-update\-drift\fP option. +.RE +.sp +Note: if step 6 uses \fB\-\-systohc\fP, then the System Clock must be set correctly (step 6a) just before doing so. +.sp +Having \fBhwclock\fP calculate the drift factor is a good starting point, but for optimal results it will likely need to be adjusted by directly editing the \fI/etc/adjtime\fP file. Continue to test and refine the drift factor until the Hardware Clock is corrected properly at startup. To check this, first make sure that the System Time is correct before shutdown and then use \fBsntp\fP, or \fBdate \-Ins\fP and a precision timepiece, immediately after startup. +.SS "LOCAL vs UTC" +.sp +Keeping the Hardware Clock in a local timescale causes inconsistent daylight saving time results: +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +If Linux is running during a daylight saving time change, the time written to the Hardware Clock will be adjusted for the change. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +If Linux is NOT running during a daylight saving time change, the time read from the Hardware Clock will NOT be adjusted for the change. +.RE +.sp +The Hardware Clock on an ISA compatible system keeps only a date and time, it has no concept of timezone nor daylight saving. Therefore, when \fBhwclock\fP is told that it is in local time, it assumes it is in the \(aqcorrect\(aq local time and makes no adjustments to the time read from it. +.sp +Linux handles daylight saving time changes transparently only when the Hardware Clock is kept in the UTC timescale. Doing so is made easy for system administrators as \fBhwclock\fP uses local time for its output and as the argument to the \fB\-\-date\fP option. +.sp +POSIX systems, like Linux, are designed to have the System Clock operate in the UTC timescale. The Hardware Clock\(cqs purpose is to initialize the System Clock, so also keeping it in UTC makes sense. +.sp +Linux does, however, attempt to accommodate the Hardware Clock being in the local timescale. This is primarily for dual\-booting with older versions of MS Windows. From Windows 7 on, the RealTimeIsUniversal registry key is supposed to be working properly so that its Hardware Clock can be kept in UTC. +.SS "POSIX vs \(aqRIGHT\(aq" +.sp +A discussion on date\-time configuration would be incomplete without addressing timezones, this is mostly well covered by \fBtzset\fP(3). One area that seems to have no documentation is the \(aqright\(aq directory of the Time Zone Database, sometimes called tz or zoneinfo. +.sp +There are two separate databases in the zoneinfo system, posix and \(aqright\(aq. \(aqRight\(aq (now named zoneinfo\-leaps) includes leap seconds and posix does not. To use the \(aqright\(aq database the System Clock must be set to (UTC + leap seconds), which is equivalent to (TAI \- 10). This allows calculating the exact number of seconds between two dates that cross a leap second epoch. The System Clock is then converted to the correct civil time, including UTC, by using the \(aqright\(aq timezone files which subtract the leap seconds. Note: this configuration is considered experimental and is known to have issues. +.sp +To configure a system to use a particular database all of the files located in its directory must be copied to the root of \fI/usr/share/zoneinfo\fP. Files are never used directly from the posix or \(aqright\(aq subdirectories, e.g., TZ=\(aq\fIright/Europe/Dublin\fP\(aq. This habit was becoming so common that the upstream zoneinfo project restructured the system\(cqs file tree by moving the posix and \(aqright\(aq subdirectories out of the zoneinfo directory and into sibling directories: +.sp +\fI/usr/share/zoneinfo\fP, \fI/usr/share/zoneinfo\-posix\fP, \fI/usr/share/zoneinfo\-leaps\fP +.sp +Unfortunately, some Linux distributions are changing it back to the old tree structure in their packages. So the problem of system administrators reaching into the \(aqright\(aq subdirectory persists. This causes the system timezone to be configured to include leap seconds while the zoneinfo database is still configured to exclude them. Then when an application such as a World Clock needs the South_Pole timezone file; or an email MTA, or \fBhwclock\fP needs the UTC timezone file; they fetch it from the root of \fI/usr/share/zoneinfo\fP , because that is what they are supposed to do. Those files exclude leap seconds, but the System Clock now includes them, causing an incorrect time conversion. +.sp +Attempting to mix and match files from these separate databases will not work, because they each require the System Clock to use a different timescale. The zoneinfo database must be configured to use either posix or \(aqright\(aq, as described above, or by assigning a database path to the \fITZDIR\fP environment variable. +.SH "EXIT STATUS" +.sp +One of the following exit values will be returned: +.sp +\fBEXIT_SUCCESS\fP (\(aq0\(aq on POSIX systems) +.RS 4 +Successful program execution. +.RE +.sp +\fBEXIT_FAILURE\fP (\(aq1\(aq on POSIX systems) +.RS 4 +The operation failed or the command syntax was not valid. +.RE +.SH "ENVIRONMENT" +.sp +\fBTZ\fP +.RS 4 +If this variable is set its value takes precedence over the system configured timezone. +.RE +.sp +\fBTZDIR\fP +.RS 4 +If this variable is set its value takes precedence over the system configured timezone database directory path. +.RE +.SH "FILES" +.sp +\fI/etc/adjtime\fP +.RS 4 +The configuration and state file for \fBhwclock\fP. See also \fBadjtime_config\fP(5). +.RE +.sp +\fI/etc/localtime\fP +.RS 4 +The system timezone file. +.RE +.sp +\fI/usr/share/zoneinfo/\fP +.RS 4 +The system timezone database directory. +.RE +.sp +Device files \fBhwclock\fP may try for Hardware Clock access: \fI/dev/rtc0\fP \fI/dev/rtc\fP \fI/dev/misc/rtc\fP \fI/dev/efirtc\fP \fI/dev/misc/efirtc\fP +.SH "SEE ALSO" +.sp +\fBdate\fP(1), +\fBadjtime_config\fP(5), +\fBadjtimex\fP(8), +\fBgettimeofday\fP(2), +\fBsettimeofday\fP(2), +\fBcrontab\fP(1p), +\fBtzset\fP(3) +.SH "AUTHORS" +.sp +Written by \c +.MTO "bryanh\(atgiraffe\-data.com" "Bryan Henderson" "," +September 1996, based on work done on the \fBclock\fP(8) program by Charles Hedrick, Rob Hooft, and Harald Koenig. See the source code for complete history and credits. +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBhwclock\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/hwclock.8.adoc b/sys-utils/hwclock.8.adoc new file mode 100644 index 0000000..0d32b0a --- /dev/null +++ b/sys-utils/hwclock.8.adoc @@ -0,0 +1,393 @@ +//po4a: entry man manual +//// +hwclock.8 -- man page for util-linux' hwclock + +2015-01-07 J William Piggott + Authored new section: DATE-TIME CONFIGURATION. + Subsections: Keeping Time..., LOCAL vs UTC, POSIX vs 'RIGHT'. +//// += hwclock(8) +:doctype: manpage +:man manual: System Administration +:man source: util-linux {release-version} +:page-layout: base +:command: hwclock +:plus: + + +== NAME + +hwclock - time clocks utility + +== SYNOPSIS + +*hwclock* [_function_] [_option_...] + +== DESCRIPTION + +*hwclock* is an administration tool for the time clocks. It can: display the Hardware Clock time; set the Hardware Clock to a specified time; set the Hardware Clock from the System Clock; set the System Clock from the Hardware Clock; compensate for Hardware Clock drift; correct the System Clock timescale; set the kernel's timezone, NTP timescale, and epoch (Alpha only); and predict future Hardware Clock values based on its drift rate. + +Since v2.26 important changes were made to the *--hctosys* function and the *--directisa* option, and a new option *--update-drift* was added. See their respective descriptions below. + +== FUNCTIONS + +The following functions are mutually exclusive, only one can be given at a time. If none is given, the default is *--show*. + +*-a, --adjust*:: +Add or subtract time from the Hardware Clock to account for systematic drift since the last time the clock was set or adjusted. See the discussion below, under *The Adjust Function*. + +*--getepoch*; *--setepoch*:: +These functions are for Alpha machines only, and are only available through the Linux kernel RTC driver. ++ +They are used to read and set the kernel's Hardware Clock epoch value. Epoch is the number of years into AD to which a zero year value in the Hardware Clock refers. For example, if the machine's BIOS sets the year counter in the Hardware Clock to contain the number of full years since 1952, then the kernel's Hardware Clock epoch value must be 1952. ++ +The *--setepoch* function requires using the *--epoch* option to specify the year. For example: ++ +**hwclock --setepoch --epoch=1952** ++ +The RTC driver attempts to guess the correct epoch value, so setting it may not be required. ++ +This epoch value is used whenever *hwclock* reads or sets the Hardware Clock on an Alpha machine. For ISA machines the kernel uses the fixed Hardware Clock epoch of 1900. + +**--param-get=**__parameter__; **--param-set=**__parameter__=__value__:: +Read and set the RTC's parameter. This is useful, for example, to retrieve the RTC's feature or set the RTC's Backup Switchover Mode. ++ +_parameter_ is either a numeric RTC parameter value (see the Kernel's _include/uapi/linux/rtc.h_) or an alias. See *--help* for a list of valid aliases. _parameter_ and _value_, if prefixed with 0x, are interpreted as hexadecimal, otherwise decimal values. + +*--predict*:: +Predict what the Hardware Clock will read in the future based upon the time given by the *--date* option and the information in _{ADJTIME_PATH}_. This is useful, for example, to account for drift when setting a Hardware Clock wakeup (aka alarm). See *rtcwake*(8). ++ +Do not use this function if the Hardware Clock is being modified by anything other than the current operating system's *hwclock* command, such as '11 minute mode' or from dual-booting another OS. + +*-r*, *--show*; *--get*:: +Read the Hardware Clock and print its time to standard output in the *ISO 8601* format. The time shown is always in local time, even if you keep your Hardware Clock in UTC. See the *--localtime* option. ++ +Showing the Hardware Clock time is the default when no function is specified. ++ +The *--get* function also applies drift correction to the time read, based upon the information in _{ADJTIME_PATH}_. Do not use this function if the Hardware Clock is being modified by anything other than the current operating system's *hwclock* command, such as '11 minute mode' or from dual-booting another OS. + +*-s*, *--hctosys*:: +Set the System Clock from the Hardware Clock. The time read from the Hardware Clock is compensated to account for systematic drift before using it to set the System Clock. See the discussion below, under *The Adjust Function*. ++ +The System Clock must be kept in the UTC timescale for date-time applications to work correctly in conjunction with the timezone configured for the system. If the Hardware Clock is kept in local time then the time read from it must be shifted to the UTC timescale before using it to set the System Clock. The *--hctosys* function does this based upon the information in the _{ADJTIME_PATH}_ file or the command line arguments *--localtime* and *--utc*. Note: no daylight saving adjustment is made. See the discussion below, under *LOCAL vs UTC*. ++ +The kernel also keeps a timezone value, the *--hctosys* function sets it to the timezone configured for the system. The system timezone is configured by the *TZ* environment variable or the _/etc/localtime_ file, as *tzset*(3) would interpret them. The obsolete _tz_dsttime_ field of the kernel's timezone value is set to zero. (For details on what this field used to mean, see *settimeofday*(2).) ++ +When used in a startup script, making the *--hctosys* function the first caller of *settimeofday*(2) from boot, it will set the NTP '11 minute mode' timescale via the _persistent_clock_is_local_ kernel variable. If the Hardware Clock's timescale configuration is changed then a reboot is required to inform the kernel. See the discussion below, under *Automatic Hardware Clock Synchronization by the Kernel*. ++ +This is a good function to use in one of the system startup scripts before the file systems are mounted read/write. ++ +This function should never be used on a running system. Jumping system time will cause problems, such as corrupted filesystem timestamps. Also, if something has changed the Hardware Clock, like NTP's '11 minute mode', then *--hctosys* will set the time incorrectly by including drift compensation. ++ +Drift compensation can be inhibited by setting the drift factor in _{ADJTIME_PATH}_ to zero. This setting will be persistent as long as the *--update-drift* option is not used with *--systohc* at shutdown (or anywhere else). Another way to inhibit this is by using the *--noadjfile* option when calling the *--hctosys* function. A third method is to delete the _{ADJTIME_PATH}_ file. *Hwclock* will then default to using the UTC timescale for the Hardware Clock. If the Hardware Clock is ticking local time it will need to be defined in the file. This can be done by calling *hwclock --localtime --adjust*; when the file is not present this command will not actually adjust the Clock, but it will create the file with local time configured, and a drift factor of zero. ++ +A condition under which inhibiting *hwclock*'s drift correction may be desired is when dual-booting multiple operating systems. If while this instance of Linux is stopped, another OS changes the Hardware Clock's value, then when this instance is started again the drift correction applied will be incorrect. ++ +For *hwclock*'s drift correction to work properly it is imperative that nothing changes the Hardware Clock while its Linux instance is not running. + +*--set*:: +Set the Hardware Clock to the time given by the *--date* option, and update the timestamps in _{ADJTIME_PATH}_. With the *--update-drift* option also (re)calculate the drift factor. Try it without the option if *--set* fails. See *--update-drift* below. + +*--systz*:: +This is an alternate to the *--hctosys* function that does not read the Hardware Clock nor set the System Clock; consequently there is not any drift correction. It is intended to be used in a startup script on systems with kernels above version 2.6 where you know the System Clock has been set from the Hardware Clock by the kernel during boot. ++ +It does the following things that are detailed above in the *--hctosys* function: + +* Corrects the System Clock timescale to UTC as needed. Only instead of accomplishing this by setting the System Clock, *hwclock* simply informs the kernel and it handles the change. +* Sets the kernel's NTP '11 minute mode' timescale. +* Sets the kernel's timezone. + +The first two are only available on the first call of *settimeofday*(2) after boot. Consequently this option only makes sense when used in a startup script. If the Hardware Clocks timescale configuration is changed then a reboot would be required to inform the kernel. + +*-w*, *--systohc*:: +Set the Hardware Clock from the System Clock, and update the timestamps in _{ADJTIME_PATH}_. With the *--update-drift* option also (re)calculate the drift factor. Try it without the option if *--systohc* fails. See *--update-drift* below. + +include::man-common/help-version.adoc[] + +== OPTIONS + +**--adjfile=**__filename__:: +Override the default _{ADJTIME_PATH}_ file path. + +**--date=**__date_string__:: +This option must be used with the *--set* or *--predict* functions, otherwise it is ignored. ++ +*hwclock --set --date='16:45'* ++ +*hwclock --predict --date='2525-08-14 07:11:05'* ++ +The argument must be in local time, even if you keep your Hardware Clock in UTC. See the *--localtime* option. Therefore, the argument should not include any timezone information. It also should not be a relative time like "+5 minutes", because *hwclock*'s precision depends upon correlation between the argument's value and when the enter key is pressed. Fractional seconds are silently dropped. This option is capable of understanding many time and date formats, but the previous parameters should be observed. + +**--delay=**__seconds__:: +This option can be used to overwrite the internally used delay when setting the clock time. The default is 0.5 (500ms) for rtc_cmos, for another RTC types the delay is 0. If RTC type is impossible to determine (from sysfs) then it defaults also to 0.5 to be backwardly compatible. ++ +The 500ms default is based on commonly used MC146818A-compatible (x86) hardware clock. This Hardware Clock can only be set to any integer time plus one half second. The integer time is required because there is no interface to set or get a fractional second. The additional half second delay is because the Hardware Clock updates to the following second precisely 500 ms after setting the new time. Unfortunately, this behavior is hardware specific and in same cases another delay is required. + +*-D*, *--debug*:: +Use *--verbose*. The *--debug* option has been deprecated and may be repurposed or removed in a future release. + +*--directisa*:: +This option is meaningful for ISA compatible machines in the x86 and x86_64 family. For other machines, it has no effect. This option tells *hwclock* to use explicit I/O instructions to access the Hardware Clock. Without this option, *hwclock* will use the rtc device file, which it assumes to be driven by the Linux RTC device driver. As of v2.26 it will no longer automatically use directisa when the rtc driver is unavailable; this was causing an unsafe condition that could allow two processes to access the Hardware Clock at the same time. Direct hardware access from userspace should only be used for testing, troubleshooting, and as a last resort when all other methods fail. See the *--rtc* option. + +**--epoch=**__year__:: +This option is required when using the *--setepoch* function. The minimum _year_ value is 1900. The maximum is system dependent (*ULONG_MAX - 1*). + +*-f*, **--rtc=**__filename__:: +Override *hwclock*'s default rtc device file name. Otherwise it will use the first one found in this order: _/dev/rtc0_, _/dev/rtc_, _/dev/misc/rtc_. For *IA-64:* _/dev/efirtc_ _/dev/misc/efirtc_ + +*-l*, *--localtime*; *-u*, *--utc*:: +Indicate which timescale the Hardware Clock is set to. ++ +The Hardware Clock may be configured to use either the UTC or the local timescale, but nothing in the clock itself says which alternative is being used. The *--localtime* or *--utc* options give this information to the *hwclock* command. If you specify the wrong one (or specify neither and take a wrong default), both setting and reading the Hardware Clock will be incorrect. ++ +If you specify neither *--utc* nor *--localtime* then the one last given with a set function (*--set*, *--systohc*, or *--adjust*), as recorded in _{ADJTIME_PATH}_, will be used. If the adjtime file doesn't exist, the default is UTC. ++ +Note: daylight saving time changes may be inconsistent when the Hardware Clock is kept in local time. See the discussion below, under *LOCAL vs UTC*. + +*--noadjfile*:: +Disable the facilities provided by _{ADJTIME_PATH}_. *hwclock* will not read nor write to that file with this option. Either *--utc* or *--localtime* must be specified when using this option. + +*--test*:: +Do not actually change anything on the system, that is, the Clocks or _{ADJTIME_PATH}_ (*--verbose* is implicit with this option). + +*--update-drift*:: +Update the Hardware Clock's drift factor in _{ADJTIME_PATH}_. It can only be used with *--set* or *--systohc*. ++ +A minimum four hour period between settings is required. This is to avoid invalid calculations. The longer the period, the more precise the resulting drift factor will be. ++ +This option was added in v2.26, because it is typical for systems to call *hwclock --systohc* at shutdown; with the old behavior this would automatically (re)calculate the drift factor which caused several problems: ++ +* When using NTP with an '11 minute mode' kernel the drift factor would be clobbered to near zero. +* It would not allow the use of 'cold' drift correction. With most configurations using 'cold' drift will yield favorable results. Cold, means when the machine is turned off which can have a significant impact on the drift factor. +* (Re)calculating drift factor on every shutdown delivers suboptimal results. For example, if ephemeral conditions cause the machine to be abnormally hot the drift factor calculation would be out of range. +* Significantly increased system shutdown times (as of v2.31 when not using *--update-drift* the RTC is not read). + +Having *hwclock* calculate the drift factor is a good starting point, but for optimal results it will likely need to be adjusted by directly editing the _{ADJTIME_PATH}_ file. For most configurations once a machine's optimal drift factor is crafted it should not need to be changed. Therefore, the old behavior to automatically (re)calculate drift was changed and now requires this option to be used. See the discussion below, under *The Adjust Function*. + +This option requires reading the Hardware Clock before setting it. If it cannot be read, then this option will cause the set functions to fail. This can happen, for example, if the Hardware Clock is corrupted by a power failure. In that case, the clock must first be set without this option. Despite it not working, the resulting drift correction factor would be invalid anyway. + +*-v*, *--verbose*:: +Display more details about what *hwclock* is doing internally. + +== NOTES + +=== Clocks in a Linux System + +There are two types of date-time clocks: + +*The Hardware Clock:* This clock is an independent hardware device, with its own power domain (battery, capacitor, etc), that operates when the machine is powered off, or even unplugged. + +On an ISA compatible system, this clock is specified as part of the ISA standard. A control program can read or set this clock only to a whole second, but it can also detect the edges of the 1 second clock ticks, so the clock actually has virtually infinite precision. + +This clock is commonly called the hardware clock, the real time clock, the RTC, the BIOS clock, and the CMOS clock. Hardware Clock, in its capitalized form, was coined for use by *hwclock*. The Linux kernel also refers to it as the persistent clock. + +Some non-ISA systems have a few real time clocks with only one of them having its own power domain. A very low power external I2C or SPI clock chip might be used with a backup battery as the hardware clock to initialize a more functional integrated real-time clock which is used for most other purposes. + +*The System Clock:* This clock is part of the Linux kernel and is driven by a timer interrupt. (On an ISA machine, the timer interrupt is part of the ISA standard.) It has meaning only while Linux is running on the machine. The System Time is the number of seconds since 00:00:00 January 1, 1970 UTC (or more succinctly, the number of seconds since 1969 UTC). The System Time is not an integer, though. It has virtually infinite precision. + +The System Time is the time that matters. The Hardware Clock's basic purpose is to keep time when Linux is not running so that the System Clock can be initialized from it at boot. Note that in DOS, for which ISA was designed, the Hardware Clock is the only real time clock. + +It is important that the System Time not have any discontinuities such as would happen if you used the *date*(1) program to set it while the system is running. You can, however, do whatever you want to the Hardware Clock while the system is running, and the next time Linux starts up, it will do so with the adjusted time from the Hardware Clock. Note: currently this is not possible on most systems because *hwclock --systohc* is called at shutdown. + +The Linux kernel's timezone is set by *hwclock*. But don't be misled -- almost nobody cares what timezone the kernel thinks it is in. Instead, programs that care about the timezone (perhaps because they want to display a local time for you) almost always use a more traditional method of determining the timezone: They use the *TZ* environment variable or the _/etc/localtime_ file, as explained in the man page for *tzset*(3). However, some programs and fringe parts of the Linux kernel such as filesystems use the kernel's timezone value. An example is the vfat filesystem. If the kernel timezone value is wrong, the vfat filesystem will report and set the wrong timestamps on files. Another example is the kernel's NTP '11 minute mode'. If the kernel's timezone value and/or the _persistent_clock_is_local_ variable are wrong, then the Hardware Clock will be set incorrectly by '11 minute mode'. See the discussion below, under *Automatic Hardware Clock Synchronization by the Kernel*. + +*hwclock* sets the kernel's timezone to the value indicated by *TZ* or _/etc/localtime_ with the *--hctosys* or *--systz* functions. + +The kernel's timezone value actually consists of two parts: 1) a field tz_minuteswest indicating how many minutes local time (not adjusted for DST) lags behind UTC, and 2) a field tz_dsttime indicating the type of Daylight Savings Time (DST) convention that is in effect in the locality at the present time. This second field is not used under Linux and is always zero. See also *settimeofday*(2). + +=== Hardware Clock Access Methods + +*hwclock* uses many different ways to get and set Hardware Clock values. The most normal way is to do I/O to the rtc device special file, which is presumed to be driven by the rtc device driver. Also, Linux systems using the rtc framework with udev, are capable of supporting multiple Hardware Clocks. This may bring about the need to override the default rtc device by specifying one with the *--rtc* option. + +However, this method is not always available as older systems do not have an rtc driver. On these systems, the method of accessing the Hardware Clock depends on the system hardware. + +On an ISA compatible system, *hwclock* can directly access the "CMOS memory" registers that constitute the clock, by doing I/O to Ports 0x70 and 0x71. It does this with actual I/O instructions and consequently can only do it if running with superuser effective userid. This method may be used by specifying the *--directisa* option. + +This is a really poor method of accessing the clock, for all the reasons that userspace programs are generally not supposed to do direct I/O and disable interrupts. *hwclock* provides it for testing, troubleshooting, and because it may be the only method available on ISA systems which do not have a working rtc device driver. + +=== The Adjust Function + +The Hardware Clock is usually not very accurate. However, much of its inaccuracy is completely predictable - it gains or loses the same amount of time every day. This is called systematic drift. *hwclock*'s *--adjust* function lets you apply systematic drift corrections to the Hardware Clock. + +It works like this: *hwclock* keeps a file, _{ADJTIME_PATH}_, that keeps some historical information. This is called the adjtime file. + +Suppose you start with no adjtime file. You issue a *hwclock --set* command to set the Hardware Clock to the true current time. *hwclock* creates the adjtime file and records in it the current time as the last time the clock was calibrated. Five days later, the clock has gained 10 seconds, so you issue a *hwclock --set --update-drift* command to set it back 10 seconds. *hwclock* updates the adjtime file to show the current time as the last time the clock was calibrated, and records 2 seconds per day as the systematic drift rate. 24 hours go by, and then you issue a *hwclock --adjust* command. *hwclock* consults the adjtime file and sees that the clock gains 2 seconds per day when left alone and that it has been left alone for exactly one day. So it subtracts 2 seconds from the Hardware Clock. It then records the current time as the last time the clock was adjusted. Another 24 hours go by and you issue another *hwclock --adjust*. *hwclock* does the same thing: subtracts 2 seconds and updates the adjtime file with the current time as the last time the clock was adjusted. + +When you use the *--update-drift* option with *--set* or *--systohc*, the systematic drift rate is (re)calculated by comparing the fully drift corrected current Hardware Clock time with the new set time, from that it derives the 24 hour drift rate based on the last calibrated timestamp from the adjtime file. This updated drift factor is then saved in _{ADJTIME_PATH}_. + +A small amount of error creeps in when the Hardware Clock is set, so *--adjust* refrains from making any adjustment that is less than 1 second. Later on, when you request an adjustment again, the accumulated drift will be more than 1 second and *--adjust* will make the adjustment including any fractional amount. + +*hwclock --hctosys* also uses the adjtime file data to compensate the value read from the Hardware Clock before using it to set the System Clock. It does not share the 1 second limitation of *--adjust*, and will correct sub-second drift values immediately. It does not change the Hardware Clock time nor the adjtime file. This may eliminate the need to use *--adjust*, unless something else on the system needs the Hardware Clock to be compensated. + +=== The Adjtime File + +While named for its historical purpose of controlling adjustments only, it actually contains other information used by *hwclock* from one invocation to the next. + +The format of the adjtime file is, in ASCII: + +Line 1: Three numbers, separated by blanks: 1) the systematic drift rate in seconds per day, floating point decimal; 2) the resulting number of seconds since 1969 UTC of most recent adjustment or calibration, decimal integer; 3) zero (for compatibility with *clock*(8)) as a floating point decimal. + +Line 2: One number: the resulting number of seconds since 1969 UTC of most recent calibration. Zero if there has been no calibration yet or it is known that any previous calibration is moot (for example, because the Hardware Clock has been found, since that calibration, not to contain a valid time). This is a decimal integer. + +Line 3: "UTC" or "LOCAL". Tells whether the Hardware Clock is set to Coordinated Universal Time or local time. You can always override this value with options on the *hwclock* command line. + +You can use an adjtime file that was previously used with the *clock*(8) program with *hwclock*. + +=== Automatic Hardware Clock Synchronization by the Kernel + +You should be aware of another way that the Hardware Clock is kept synchronized in some systems. The Linux kernel has a mode wherein it copies the System Time to the Hardware Clock every 11 minutes. This mode is a compile time option, so not all kernels will have this capability. This is a good mode to use when you are using something sophisticated like NTP to keep your System Clock synchronized. (NTP is a way to keep your System Time synchronized either to a time server somewhere on the network or to a radio clock hooked up to your system. See RFC 1305.) + +If the kernel is compiled with the '11 minute mode' option it will be active when the kernel's clock discipline is in a synchronized state. When in this state, bit 6 (the bit that is set in the mask 0x0040) of the kernel's _time_status_ variable is unset. This value is output as the 'status' line of the *adjtimex --print* or *ntptime* commands. + +It takes an outside influence, like the NTP daemon to put the kernel's clock discipline into a synchronized state, and therefore turn on '11 minute mode'. It can be turned off by running anything that sets the System Clock the old fashioned way, including *hwclock --hctosys*. However, if the NTP daemon is still running, it will turn '11 minute mode' back on again the next time it synchronizes the System Clock. + +If your system runs with '11 minute mode' on, it may need to use either *--hctosys* or *--systz* in a startup script, especially if the Hardware Clock is configured to use the local timescale. Unless the kernel is informed of what timescale the Hardware Clock is using, it may clobber it with the wrong one. The kernel uses UTC by default. + +The first userspace command to set the System Clock informs the kernel what timescale the Hardware Clock is using. This happens via the _persistent_clock_is_local_ kernel variable. If *--hctosys* or *--systz* is the first, it will set this variable according to the adjtime file or the appropriate command-line argument. Note that when using this capability and the Hardware Clock timescale configuration is changed, then a reboot is required to notify the kernel. + +*hwclock --adjust* should not be used with NTP '11 minute mode'. + +=== ISA Hardware Clock Century value + +There is some sort of standard that defines CMOS memory Byte 50 on an ISA machine as an indicator of what century it is. *hwclock* does not use or set that byte because there are some machines that don't define the byte that way, and it really isn't necessary anyway, since the year-of-century does a good job of implying which century it is. + +If you have a bona fide use for a CMOS century byte, contact the *hwclock* maintainer; an option may be appropriate. + +Note that this section is only relevant when you are using the "direct ISA" method of accessing the Hardware Clock. ACPI provides a standard way to access century values, when they are supported by the hardware. + +== DATE-TIME CONFIGURATION + +=== Keeping Time without External Synchronization + +This discussion is based on the following conditions: + +* Nothing is running that alters the date-time clocks, such as NTP daemon or a cron job." +* The system timezone is configured for the correct local time. See below, under *POSIX vs 'RIGHT'*. +* Early during startup the following are called, in this order: *adjtimex --tick* _value_ *--frequency* _value_ *hwclock --hctosys* +* During shutdown the following is called: *hwclock --systohc* + +*** Systems without *adjtimex* may use *ntptime*. + +Whether maintaining precision time with NTP daemon or not, it makes sense to configure the system to keep reasonably good date-time on its own. + +The first step in making that happen is having a clear understanding of the big picture. There are two completely separate hardware devices running at their own speed and drifting away from the 'correct' time at their own rates. The methods and software for drift correction are different for each of them. However, most systems are configured to exchange values between these two clocks at startup and shutdown. Now the individual device's time keeping errors are transferred back and forth between each other. Attempt to configure drift correction for only one of them, and the other's drift will be overlaid upon it. + +This problem can be avoided when configuring drift correction for the System Clock by simply not shutting down the machine. This, plus the fact that all of *hwclock*'s precision (including calculating drift factors) depends upon the System Clock's rate being correct, means that configuration of the System Clock should be done first. + +The System Clock drift is corrected with the *adjtimex*(8) command's *--tick* and *--frequency* options. These two work together: tick is the coarse adjustment and frequency is the fine adjustment. (For systems that do not have an *adjtimex* package, *ntptime -f* _ppm_ may be used instead.) + +Some Linux distributions attempt to automatically calculate the System Clock drift with *adjtimex*'s compare operation. Trying to correct one drifting clock by using another drifting clock as a reference is akin to a dog trying to catch its own tail. Success may happen eventually, but great effort and frustration will likely precede it. This automation may yield an improvement over no configuration, but expecting optimum results would be in error. A better choice for manual configuration would be *adjtimex*'s *--log* options. + +It may be more effective to simply track the System Clock drift with *sntp*, or *date -Ins* and a precision timepiece, and then calculate the correction manually. + +After setting the tick and frequency values, continue to test and refine the adjustments until the System Clock keeps good time. See *adjtimex*(2) for more information and the example demonstrating manual drift calculations. + +Once the System Clock is ticking smoothly, move on to the Hardware Clock. + +As a rule, cold drift will work best for most use cases. This should be true even for 24/7 machines whose normal downtime consists of a reboot. In that case the drift factor value makes little difference. But on the rare occasion that the machine is shut down for an extended period, then cold drift should yield better results. + +*Steps to calculate cold drift:* + +1:: +*Ensure that NTP daemon will not be launched at startup.* + +2:: +The _System Clock_ time must be correct at shutdown! + +3:: +Shut down the system. + +4:: +Let an extended period pass without changing the Hardware Clock. + +5:: +Start the system. + +6:: +Immediately use *hwclock* to set the correct time, adding the *--update-drift* option. + +Note: if step 6 uses *--systohc*, then the System Clock must be set correctly (step 6a) just before doing so. + +Having *hwclock* calculate the drift factor is a good starting point, but for optimal results it will likely need to be adjusted by directly editing the _{ADJTIME_PATH}_ file. Continue to test and refine the drift factor until the Hardware Clock is corrected properly at startup. To check this, first make sure that the System Time is correct before shutdown and then use *sntp*, or *date -Ins* and a precision timepiece, immediately after startup. + +=== LOCAL vs UTC + +Keeping the Hardware Clock in a local timescale causes inconsistent daylight saving time results: + +* If Linux is running during a daylight saving time change, the time written to the Hardware Clock will be adjusted for the change. +* If Linux is NOT running during a daylight saving time change, the time read from the Hardware Clock will NOT be adjusted for the change. + +The Hardware Clock on an ISA compatible system keeps only a date and time, it has no concept of timezone nor daylight saving. Therefore, when *hwclock* is told that it is in local time, it assumes it is in the 'correct' local time and makes no adjustments to the time read from it. + +Linux handles daylight saving time changes transparently only when the Hardware Clock is kept in the UTC timescale. Doing so is made easy for system administrators as *hwclock* uses local time for its output and as the argument to the *--date* option. + +POSIX systems, like Linux, are designed to have the System Clock operate in the UTC timescale. The Hardware Clock's purpose is to initialize the System Clock, so also keeping it in UTC makes sense. + +Linux does, however, attempt to accommodate the Hardware Clock being in the local timescale. This is primarily for dual-booting with older versions of MS Windows. From Windows 7 on, the RealTimeIsUniversal registry key is supposed to be working properly so that its Hardware Clock can be kept in UTC. + +=== POSIX vs 'RIGHT' + +A discussion on date-time configuration would be incomplete without addressing timezones, this is mostly well covered by *tzset*(3). One area that seems to have no documentation is the 'right' directory of the Time Zone Database, sometimes called tz or zoneinfo. + +//TRANSLATORS: Keep {plus} untranslated. +There are two separate databases in the zoneinfo system, posix and 'right'. 'Right' (now named zoneinfo-leaps) includes leap seconds and posix does not. To use the 'right' database the System Clock must be set to (UTC {plus} leap seconds), which is equivalent to (TAI - 10). This allows calculating the exact number of seconds between two dates that cross a leap second epoch. The System Clock is then converted to the correct civil time, including UTC, by using the 'right' timezone files which subtract the leap seconds. Note: this configuration is considered experimental and is known to have issues. + +To configure a system to use a particular database all of the files located in its directory must be copied to the root of _/usr/share/zoneinfo_. Files are never used directly from the posix or 'right' subdirectories, e.g., TZ='_right/Europe/Dublin_'. This habit was becoming so common that the upstream zoneinfo project restructured the system's file tree by moving the posix and 'right' subdirectories out of the zoneinfo directory and into sibling directories: + +_/usr/share/zoneinfo_, _/usr/share/zoneinfo-posix_, _/usr/share/zoneinfo-leaps_ + +Unfortunately, some Linux distributions are changing it back to the old tree structure in their packages. So the problem of system administrators reaching into the 'right' subdirectory persists. This causes the system timezone to be configured to include leap seconds while the zoneinfo database is still configured to exclude them. Then when an application such as a World Clock needs the South_Pole timezone file; or an email MTA, or *hwclock* needs the UTC timezone file; they fetch it from the root of _/usr/share/zoneinfo_ , because that is what they are supposed to do. Those files exclude leap seconds, but the System Clock now includes them, causing an incorrect time conversion. + +Attempting to mix and match files from these separate databases will not work, because they each require the System Clock to use a different timescale. The zoneinfo database must be configured to use either posix or 'right', as described above, or by assigning a database path to the _TZDIR_ environment variable. + +== EXIT STATUS + +One of the following exit values will be returned: + +*EXIT_SUCCESS* ('0' on POSIX systems):: +Successful program execution. + +*EXIT_FAILURE* ('1' on POSIX systems):: +The operation failed or the command syntax was not valid. + +== ENVIRONMENT + +*TZ*:: +If this variable is set its value takes precedence over the system configured timezone. + +*TZDIR*:: +If this variable is set its value takes precedence over the system configured timezone database directory path. + +== FILES + +_{ADJTIME_PATH}_:: +The configuration and state file for *hwclock*. See also *adjtime_config*(5). + +_/etc/localtime_:: +The system timezone file. + +_/usr/share/zoneinfo/_:: +The system timezone database directory. + +Device files *hwclock* may try for Hardware Clock access: _/dev/rtc0_ _/dev/rtc_ _/dev/misc/rtc_ _/dev/efirtc_ _/dev/misc/efirtc_ + +== SEE ALSO + +*date*(1), +*adjtime_config*(5), +*adjtimex*(8), +*gettimeofday*(2), +*settimeofday*(2), +*crontab*(1p), +*tzset*(3) + +== AUTHORS + +Written by mailto:bryanh@giraffe-data.com[Bryan Henderson], September 1996, based on work done on the *clock*(8) program by Charles Hedrick, Rob Hooft, and Harald Koenig. See the source code for complete history and credits. + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/hwclock.c b/sys-utils/hwclock.c new file mode 100644 index 0000000..1bd371a --- /dev/null +++ b/sys-utils/hwclock.c @@ -0,0 +1,1692 @@ +/* + * SPDX-License-Identifier: GPL-2.0-or-later + * + * Since 7a3000f7ba548cf7d74ac77cc63fe8de228a669e (v2.30) hwclock is linked + * with parse_date.y from gnullib. This gnulib code is distributed with GPLv3. + * Use --disable-hwclock-gplv3 to exclude this code. + * + * + * clock.c was written by Charles Hedrick, hedrick@cs.rutgers.edu, Apr 1992 + * Modified for clock adjustments - Rob Hooft <hooft@chem.ruu.nl>, Nov 1992 + * Improvements by Harald Koenig <koenig@nova.tat.physik.uni-tuebingen.de> + * and Alan Modra <alan@spri.levels.unisa.edu.au>. + * + * Major rewrite by Bryan Henderson <bryanh@giraffe-data.com>, 96.09.19. + * The new program is called hwclock. New features: + * + * - You can set the hardware clock without also modifying the system + * clock. + * - You can read and set the clock with finer than 1 second precision. + * - When you set the clock, hwclock automatically refigures the drift + * rate, based on how far off the clock was before you set it. + * + * Reshuffled things, added sparc code, and re-added alpha stuff + * by David Mosberger <davidm@azstarnet.com> + * and Jay Estabrook <jestabro@amt.tay1.dec.com> + * and Martin Ostermann <ost@comnets.rwth-aachen.de>, aeb@cwi.nl, 990212. + * + * Fix for Award 2094 bug, Dave Coffin (dcoffin@shore.net) 11/12/98 + * Change of local time handling, Stefan Ring <e9725446@stud3.tuwien.ac.at> + * Change of adjtime handling, James P. Rutledge <ao112@rgfn.epcc.edu>. + * + * + */ +/* + * Explanation of `adjusting' (Rob Hooft): + * + * The problem with my machine is that its CMOS clock is 10 seconds + * per day slow. With this version of clock.c, and my '/etc/rc.local' + * reading '/etc/clock -au' instead of '/etc/clock -u -s', this error + * is automatically corrected at every boot. + * + * To do this job, the program reads and writes the file '/etc/adjtime' + * to determine the correction, and to save its data. In this file are + * three numbers: + * + * 1) the correction in seconds per day. (So if your clock runs 5 + * seconds per day fast, the first number should read -5.0) + * 2) the number of seconds since 1/1/1970 the last time the program + * was used + * 3) the remaining part of a second which was leftover after the last + * adjustment + * + * Installation and use of this program: + * + * a) create a file '/etc/adjtime' containing as the first and only + * line: '0.0 0 0.0' + * b) run 'clock -au' or 'clock -a', depending on whether your cmos is + * in universal or local time. This updates the second number. + * c) set your system time using the 'date' command. + * d) update your cmos time using 'clock -wu' or 'clock -w' + * e) replace the first number in /etc/adjtime by your correction. + * f) put the command 'clock -au' or 'clock -a' in your '/etc/rc.local' + */ + +#include <errno.h> +#include <getopt.h> +#include <limits.h> +#include <math.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/time.h> +#ifdef HAVE_SYS_SYSCALL_H +#include <sys/syscall.h> +#endif +#include <time.h> +#include <unistd.h> +#include <inttypes.h> + +#include "c.h" +#include "closestream.h" +#include "nls.h" +#include "optutils.h" +#include "pathnames.h" +#include "hwclock.h" +#include "timeutils.h" +#include "env.h" +#include "xalloc.h" +#include "path.h" +#include "strutils.h" + +#ifdef HAVE_LIBAUDIT +#include <libaudit.h> +static int hwaudit_fd = -1; +#endif + +UL_DEBUG_DEFINE_MASK(hwclock); +UL_DEBUG_DEFINE_MASKNAMES(hwclock) = UL_DEBUG_EMPTY_MASKNAMES; + +/* The struct that holds our hardware access routines */ +static struct clock_ops *ur; + +/* Maximal clock adjustment in seconds per day. + (adjtime() glibc call has 2145 seconds limit on i386, so it is good enough for us as well, + 43219 is a maximal safe value preventing exact_adjustment overflow.) */ +#define MAX_DRIFT 2145.0 + +struct adjtime { + /* + * This is information we keep in the adjtime file that tells us how + * to do drift corrections. Elements are all straight from the + * adjtime file, so see documentation of that file for details. + * Exception is <dirty>, which is an indication that what's in this + * structure is not what's in the disk file (because it has been + * updated since read from the disk file). + */ + int dirty; + /* line 1 */ + double drift_factor; + time_t last_adj_time; + double not_adjusted; + /* line 2 */ + time_t last_calib_time; + /* + * The most recent time that we set the clock from an external + * authority (as opposed to just doing a drift adjustment) + */ + /* line 3 */ + enum a_local_utc { UTC = 0, LOCAL, UNKNOWN } local_utc; + /* + * To which time zone, local or UTC, we most recently set the + * hardware clock. + */ +}; + +static void hwclock_init_debug(const char *str) +{ + __UL_INIT_DEBUG_FROM_STRING(hwclock, HWCLOCK_DEBUG_, 0, str); + + DBG(INIT, ul_debug("hwclock debug mask: 0x%04x", hwclock_debug_mask)); + DBG(INIT, ul_debug("hwclock version: %s", PACKAGE_STRING)); +} + +/* FOR TESTING ONLY: inject random delays of up to 1000ms */ +static void up_to_1000ms_sleep(void) +{ + int usec = random() % 1000000; + + DBG(RANDOM_SLEEP, ul_debug("sleeping ~%d usec", usec)); + xusleep(usec); +} + +/* + * time_t to timeval conversion. + */ +static struct timeval t2tv(time_t timet) +{ + struct timeval rettimeval; + + rettimeval.tv_sec = timet; + rettimeval.tv_usec = 0; + return rettimeval; +} + +/* + * The difference in seconds between two times in "timeval" format. + */ +double time_diff(struct timeval subtrahend, struct timeval subtractor) +{ + return (subtrahend.tv_sec - subtractor.tv_sec) + + (subtrahend.tv_usec - subtractor.tv_usec) / 1E6; +} + +/* + * The time, in "timeval" format, which is <increment> seconds after the + * time <addend>. Of course, <increment> may be negative. + */ +static struct timeval time_inc(struct timeval addend, double increment) +{ + struct timeval newtime; + + newtime.tv_sec = addend.tv_sec + (time_t)increment; + newtime.tv_usec = addend.tv_usec + (increment - (time_t)increment) * 1E6; + + /* + * Now adjust it so that the microsecond value is between 0 and 1 + * million. + */ + if (newtime.tv_usec < 0) { + newtime.tv_usec += 1E6; + newtime.tv_sec -= 1; + } else if (newtime.tv_usec >= 1E6) { + newtime.tv_usec -= 1E6; + newtime.tv_sec += 1; + } + return newtime; +} + +static int +hw_clock_is_utc(const struct hwclock_control *ctl, + const struct adjtime *adjtime) +{ + int ret; + + if (ctl->utc) + ret = 1; /* --utc explicitly given on command line */ + else if (ctl->local_opt) + ret = 0; /* --localtime explicitly given */ + else + /* get info from adjtime file - default is UTC */ + ret = (adjtime->local_utc != LOCAL); + + if (ctl->verbose) + printf(_("Assuming hardware clock is kept in %s time.\n"), + ret ? _("UTC") : _("local")); + return ret; +} + +/* + * Read the adjustment parameters out of the /etc/adjtime file. + * + * Return them as the adjtime structure <*adjtime_p>. Its defaults are + * initialized in main(). + */ +static int read_adjtime(const struct hwclock_control *ctl, + struct adjtime *adjtime_p) +{ + FILE *adjfile; + char line1[81]; /* String: first line of adjtime file */ + char line2[81]; /* String: second line of adjtime file */ + char line3[81]; /* String: third line of adjtime file */ + int64_t last_adj_time; + int64_t last_calib_time; + + if (access(ctl->adj_file_name, R_OK) != 0) + return EXIT_SUCCESS; + + adjfile = fopen(ctl->adj_file_name, "r"); /* open file for reading */ + if (adjfile == NULL) { + warn(_("cannot open %s"), ctl->adj_file_name); + return EXIT_FAILURE; + } + + if (!fgets(line1, sizeof(line1), adjfile)) + line1[0] = '\0'; /* In case fgets fails */ + if (!fgets(line2, sizeof(line2), adjfile)) + line2[0] = '\0'; /* In case fgets fails */ + if (!fgets(line3, sizeof(line3), adjfile)) + line3[0] = '\0'; /* In case fgets fails */ + + fclose(adjfile); + + if (sscanf(line1, "%lf %"SCNd64" %lf", + &adjtime_p->drift_factor, + &last_adj_time, + &adjtime_p->not_adjusted) != 3) + warnx(_("Warning: unrecognized line in adjtime file: %s"), line1); + + if (sscanf(line2, "%"SCNd64, &last_calib_time) != 1) + warnx(_("Warning: unrecognized line in adjtime file: %s"), line2); + + adjtime_p->last_adj_time = (time_t)last_adj_time; + adjtime_p->last_calib_time = (time_t)last_calib_time; + + if (!strcmp(line3, "UTC\n")) { + adjtime_p->local_utc = UTC; + } else if (!strcmp(line3, "LOCAL\n")) { + adjtime_p->local_utc = LOCAL; + } else { + adjtime_p->local_utc = UNKNOWN; + if (line3[0]) { + warnx(_("Warning: unrecognized third line in adjtime file\n" + "(Expected: `UTC' or `LOCAL' or nothing.)")); + } + } + + if (ctl->verbose) { + printf(_("Last drift adjustment done at %"PRId64" seconds after 1969\n"), + (int64_t)adjtime_p->last_adj_time); + printf(_("Last calibration done at %"PRId64" seconds after 1969\n"), + (int64_t)adjtime_p->last_calib_time); + printf(_("Hardware clock is on %s time\n"), + (adjtime_p->local_utc == + LOCAL) ? _("local") : (adjtime_p->local_utc == + UTC) ? _("UTC") : _("unknown")); + } + + return EXIT_SUCCESS; +} + +/* + * Wait until the falling edge of the Hardware Clock's update flag so that + * any time that is read from the clock immediately after we return will be + * exact. + * + * The clock only has 1 second precision, so it gives the exact time only + * once per second, right on the falling edge of the update flag. + * + * We wait (up to one second) either blocked waiting for an rtc device or in + * a CPU spin loop. The former is probably not very accurate. + * + * Return 0 if it worked, nonzero if it didn't. + */ +static int synchronize_to_clock_tick(const struct hwclock_control *ctl) +{ + int rc; + + if (ctl->verbose) + printf(_("Waiting for clock tick...\n")); + + rc = ur->synchronize_to_clock_tick(ctl); + + if (ctl->verbose) { + if (rc) + printf(_("...synchronization failed\n")); + else + printf(_("...got clock tick\n")); + } + + return rc; +} + +/* + * Convert a time in broken down format (hours, minutes, etc.) into standard + * unix time (seconds into epoch). Return it as *systime_p. + * + * The broken down time is argument <tm>. This broken down time is either + * in local time zone or UTC, depending on value of logical argument + * "universal". True means it is in UTC. + * + * If the argument contains values that do not constitute a valid time, and + * mktime() recognizes this, return *valid_p == false and *systime_p + * undefined. However, mktime() sometimes goes ahead and computes a + * fictional time "as if" the input values were valid, e.g. if they indicate + * the 31st day of April, mktime() may compute the time of May 1. In such a + * case, we return the same fictional value mktime() does as *systime_p and + * return *valid_p == true. + */ +static int +mktime_tz(const struct hwclock_control *ctl, struct tm tm, + time_t *systime_p) +{ + int valid; + + if (ctl->universal) + *systime_p = timegm(&tm); + else + *systime_p = mktime(&tm); + if (*systime_p == -1) { + /* + * This apparently (not specified in mktime() documentation) + * means the 'tm' structure does not contain valid values + * (however, not containing valid values does _not_ imply + * mktime() returns -1). + */ + valid = 0; + if (ctl->verbose) + printf(_("Invalid values in hardware clock: " + "%4d/%.2d/%.2d %.2d:%.2d:%.2d\n"), + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, + tm.tm_hour, tm.tm_min, tm.tm_sec); + } else { + valid = 1; + if (ctl->verbose) + printf(_("Hw clock time : %4d/%.2d/%.2d %.2d:%.2d:%.2d = " + "%"PRId64" seconds since 1969\n"), tm.tm_year + 1900, + tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, + tm.tm_sec, (int64_t)*systime_p); + } + return valid; +} + +/* + * Read the hardware clock and return the current time via <tm> argument. + * + * Use the method indicated by <method> argument to access the hardware + * clock. + */ +static int +read_hardware_clock(const struct hwclock_control *ctl, + int *valid_p, time_t *systime_p) +{ + struct tm tm; + int err; + + err = ur->read_hardware_clock(ctl, &tm); + if (err) + return err; + + if (ctl->verbose) + printf(_("Time read from Hardware Clock: %4d/%.2d/%.2d %02d:%02d:%02d\n"), + tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, + tm.tm_min, tm.tm_sec); + *valid_p = mktime_tz(ctl, tm, systime_p); + + return 0; +} + +/* + * Set the Hardware Clock to the time <newtime>, in local time zone or UTC, + * according to <universal>. + */ +static void +set_hardware_clock(const struct hwclock_control *ctl, const time_t newtime) +{ + struct tm new_broken_time; + /* + * Time to which we will set Hardware Clock, in broken down format, + * in the time zone of caller's choice + */ + + if (ctl->universal) + gmtime_r(&newtime, &new_broken_time); + else + localtime_r(&newtime, &new_broken_time); + + if (ctl->verbose) + printf(_("Setting Hardware Clock to %.2d:%.2d:%.2d " + "= %"PRId64" seconds since 1969\n"), + new_broken_time.tm_hour, new_broken_time.tm_min, + new_broken_time.tm_sec, (int64_t)newtime); + + if (!ctl->testing) + ur->set_hardware_clock(ctl, &new_broken_time); +} + +static double +get_hardware_delay(const struct hwclock_control *ctl) +{ + const char *devpath, *rtcname; + char name[128 + 1]; + struct path_cxt *pc; + int rc; + + devpath = ur->get_device_path(); + if (!devpath) + goto unknown; + + rtcname = strrchr(devpath, '/'); + if (!rtcname || !*(rtcname + 1)) + goto unknown; + rtcname++; + + pc = ul_new_path("/sys/class/rtc/%s", rtcname); + if (!pc) + goto unknown; + rc = ul_path_scanf(pc, "name", "%128[^\n ]", name); + ul_unref_path(pc); + + if (rc != 1 || !*name) + goto unknown; + + if (ctl->verbose) + printf(_("RTC type: '%s'\n"), name); + + /* MC146818A-compatible (x86) */ + if (strcmp(name, "rtc_cmos") == 0) + return 0.5; + + /* Another HW */ + return 0; +unknown: + /* Let's be backwardly compatible */ + return 0.5; +} + + +/* + * Set the Hardware Clock to the time "sethwtime", in local time zone or + * UTC, according to "universal". + * + * Wait for a fraction of a second so that "sethwtime" is the value of the + * Hardware Clock as of system time "refsystime", which is in the past. For + * example, if "sethwtime" is 14:03:05 and "refsystime" is 12:10:04.5 and + * the current system time is 12:10:06.0: Wait .5 seconds (to make exactly 2 + * seconds since "refsystime") and then set the Hardware Clock to 14:03:07, + * thus getting a precise and retroactive setting of the clock. The .5 delay is + * default on x86, see --delay and get_hardware_delay(). + * + * (Don't be confused by the fact that the system clock and the Hardware + * Clock differ by two hours in the above example. That's just to remind you + * that there are two independent time scales here). + * + * This function ought to be able to accept set times as fractional times. + * Idea for future enhancement. + */ +static void +set_hardware_clock_exact(const struct hwclock_control *ctl, + const time_t sethwtime, + const struct timeval refsystime) +{ + /* + * The Hardware Clock can only be set to any integer time plus one + * half second. The integer time is required because there is no + * interface to set or get a fractional second. The additional half + * second is because the Hardware Clock updates to the following + * second precisely 500 ms (not 1 second!) after you release the + * divider reset (after setting the new time) - see description of + * DV2, DV1, DV0 in Register A in the MC146818A data sheet (and note + * that although that document doesn't say so, real-world code seems + * to expect that the SET bit in Register B functions the same way). + * That means that, e.g., when you set the clock to 1:02:03, it + * effectively really sets it to 1:02:03.5, because it will update to + * 1:02:04 only half a second later. Our caller passes the desired + * integer Hardware Clock time in sethwtime, and the corresponding + * system time (which may have a fractional part, and which may or may + * not be the same!) in refsystime. In an ideal situation, we would + * then apply sethwtime to the Hardware Clock at refsystime+500ms, so + * that when the Hardware Clock ticks forward to sethwtime+1s half a + * second later at refsystime+1000ms, everything is in sync. So we + * spin, waiting for gettimeofday() to return a time at or after that + * time (refsystime+500ms) up to a tolerance value, initially 1ms. If + * we miss that time due to being preempted for some other process, + * then we increase the margin a little bit (initially 1ms, doubling + * each time), add 1 second (or more, if needed to get a time that is + * in the future) to both the time for which we are waiting and the + * time that we will apply to the Hardware Clock, and start waiting + * again. + * + * For example, the caller requests that we set the Hardware Clock to + * 1:02:03, with reference time (current system time) = 6:07:08.250. + * We want the Hardware Clock to update to 1:02:04 at 6:07:09.250 on + * the system clock, and the first such update will occur 0.500 + * seconds after we write to the Hardware Clock, so we spin until the + * system clock reads 6:07:08.750. If we get there, great, but let's + * imagine the system is so heavily loaded that our process is + * preempted and by the time we get to run again, the system clock + * reads 6:07:11.990. We now want to wait until the next xx:xx:xx.750 + * time, which is 6:07:12.750 (4.5 seconds after the reference time), + * at which point we will set the Hardware Clock to 1:02:07 (4 seconds + * after the originally requested time). If we do that successfully, + * then at 6:07:13.250 (5 seconds after the reference time), the + * Hardware Clock will update to 1:02:08 (5 seconds after the + * originally requested time), and all is well thereafter. + */ + + time_t newhwtime = sethwtime; + double target_time_tolerance_secs = 0.001; /* initial value */ + double tolerance_incr_secs = 0.001; /* initial value */ + double delay; + struct timeval rtc_set_delay_tv; + + struct timeval targetsystime; + struct timeval nowsystime; + struct timeval prevsystime = refsystime; + double deltavstarget; + + if (ctl->rtc_delay != -1.0) /* --delay specified */ + delay = ctl->rtc_delay; + else + delay = get_hardware_delay(ctl); + + if (ctl->verbose) + printf(_("Using delay: %.6f seconds\n"), delay); + + rtc_set_delay_tv.tv_sec = 0; + rtc_set_delay_tv.tv_usec = delay * 1E6; + + timeradd(&refsystime, &rtc_set_delay_tv, &targetsystime); + + while (1) { + double ticksize; + + ON_DBG(RANDOM_SLEEP, up_to_1000ms_sleep()); + + gettimeofday(&nowsystime, NULL); + deltavstarget = time_diff(nowsystime, targetsystime); + ticksize = time_diff(nowsystime, prevsystime); + prevsystime = nowsystime; + + if (ticksize < 0) { + if (ctl->verbose) + printf(_("time jumped backward %.6f seconds " + "to %"PRId64".%06"PRId64" - retargeting\n"), + ticksize, (int64_t)nowsystime.tv_sec, + (int64_t)nowsystime.tv_usec); + /* The retarget is handled at the end of the loop. */ + } else if (deltavstarget < 0) { + /* deltavstarget < 0 if current time < target time */ + DBG(DELTA_VS_TARGET, + ul_debug("%"PRId64".%06"PRId64" < %"PRId64".%06"PRId64" (%.6f)", + (int64_t)nowsystime.tv_sec, (int64_t)nowsystime.tv_usec, + (int64_t)targetsystime.tv_sec, + (int64_t)targetsystime.tv_usec, deltavstarget)); + continue; /* not there yet - keep spinning */ + } else if (deltavstarget <= target_time_tolerance_secs) { + /* Close enough to the target time; done waiting. */ + break; + } else /* (deltavstarget > target_time_tolerance_secs) */ { + /* + * We missed our window. Increase the tolerance and + * aim for the next opportunity. + */ + if (ctl->verbose) + printf(_("missed it - %"PRId64".%06"PRId64" is too far " + "past %"PRId64".%06"PRId64" (%.6f > %.6f)\n"), + (int64_t)nowsystime.tv_sec, + (int64_t)nowsystime.tv_usec, + (int64_t)targetsystime.tv_sec, + (int64_t)targetsystime.tv_usec, + deltavstarget, + target_time_tolerance_secs); + target_time_tolerance_secs += tolerance_incr_secs; + tolerance_incr_secs *= 2; + } + + /* + * Aim for the same offset (tv_usec) within the second in + * either the current second (if that offset hasn't arrived + * yet), or the next second. + */ + if (nowsystime.tv_usec < targetsystime.tv_usec) + targetsystime.tv_sec = nowsystime.tv_sec; + else + targetsystime.tv_sec = nowsystime.tv_sec + 1; + } + + newhwtime = sethwtime + + ceil(time_diff(nowsystime, refsystime) + - delay /* don't count this */); + if (ctl->verbose) + printf(_("%"PRId64".%06"PRId64" is close enough to %"PRId64".%06"PRId64" (%.6f < %.6f)\n" + "Set RTC to %"PRId64" (%"PRId64" + %d; refsystime = %"PRId64".%06"PRId64")\n"), + (int64_t)nowsystime.tv_sec, (int64_t)nowsystime.tv_usec, + (int64_t)targetsystime.tv_sec, (int64_t)targetsystime.tv_usec, + deltavstarget, target_time_tolerance_secs, + (int64_t)newhwtime, (int64_t)sethwtime, + (int)((int64_t)newhwtime - (int64_t)sethwtime), + (int64_t)refsystime.tv_sec, (int64_t)refsystime.tv_usec); + + set_hardware_clock(ctl, newhwtime); +} + +static int +display_time(struct timeval hwctime) +{ + char buf[ISO_BUFSIZ]; + + if (strtimeval_iso(&hwctime, ISO_TIMESTAMP_DOT, buf, sizeof(buf))) + return EXIT_FAILURE; + + printf("%s\n", buf); + return EXIT_SUCCESS; +} + +/* + * Adjusts System time, sets the kernel's timezone and RTC timescale. + * + * The kernel warp_clock function adjusts the System time according to the + * tz.tz_minuteswest argument and sets PCIL (see below). At boot settimeofday(2) + * has one-shot access to this function as shown in the table below. + * + * +-------------------------------------------------------------------------+ + * | settimeofday(tv, tz) | + * |-------------------------------------------------------------------------| + * | Arguments | System Time | TZ | PCIL | | warp_clock | + * | tv | tz | set | warped | set | set | firsttime | locked | + * |---------|---------|---------------|-----|------|-----------|------------| + * | pointer | NULL | yes | no | no | no | 1 | no | + * | NULL | ptr2utc | no | no | yes | no | 0 | yes | + * | NULL | pointer | no | yes | yes | yes | 0 | yes | + * +-------------------------------------------------------------------------+ + * ptr2utc: tz.tz_minuteswest is zero (UTC). + * PCIL: persistent_clock_is_local, sets the "11 minute mode" timescale. + * firsttime: locks the warp_clock function (initialized to 1 at boot). + * + * +---------------------------------------------------------------------------+ + * | op | RTC scale | settimeofday calls | + * |---------|-----------|-----------------------------------------------------| + * | systz | Local | 1) warps system time*, sets PCIL* and kernel tz | + * | systz | UTC | 1st) locks warp_clock* 2nd) sets kernel tz | + * | hctosys | Local | 1st) sets PCIL* & kernel tz 2nd) sets system time | + * | hctosys | UTC | 1st) locks warp* 2nd) sets tz 3rd) sets system time | + * +---------------------------------------------------------------------------+ + * * only on first call after boot + * + * POSIX 2008 marked TZ in settimeofday() as deprecated. Unfortunately, + * different C libraries react to this deprecation in a different way. Since + * glibc v2.31 settimeofday() will fail if both args are not NULL, Musl-C + * ignores TZ at all, etc. We use __set_time() and __set_timezone() to hide + * these portability issues and to keep code readable. + */ +#define __set_time(_tv) settimeofday(_tv, NULL) + +#ifndef SYS_settimeofday +# ifdef __NR_settimeofday +# define SYS_settimeofday __NR_settimeofday +# elif defined(__NR_settimeofday_time32) +# define SYS_settimeofday __NR_settimeofday_time32 +# endif +#endif + +static inline int __set_timezone(const struct timezone *tz) +{ +#ifdef SYS_settimeofday + errno = 0; + return syscall(SYS_settimeofday, NULL, tz); +#else + return settimeofday(NULL, tz); +#endif +} + +static int +set_system_clock(const struct hwclock_control *ctl, + const struct timeval newtime) +{ + struct tm broken; + int minuteswest; + int rc = 0; + + localtime_r(&newtime.tv_sec, &broken); + minuteswest = -get_gmtoff(&broken) / 60; + + if (ctl->verbose) { + if (ctl->universal) { + puts(_("Calling settimeofday(NULL, 0) " + "to lock the warp_clock function.")); + if (!( ctl->universal && !minuteswest )) + printf(_("Calling settimeofday(NULL, %d) " + "to set the kernel timezone.\n"), + minuteswest); + } else + printf(_("Calling settimeofday(NULL, %d) to warp " + "System time, set PCIL and the kernel tz.\n"), + minuteswest); + + if (ctl->hctosys) + printf(_("Calling settimeofday(%"PRId64".%06"PRId64", NULL) " + "to set the System time.\n"), + (int64_t)newtime.tv_sec, (int64_t)newtime.tv_usec); + } + + if (!ctl->testing) { + const struct timezone tz_utc = { 0 }; + const struct timezone tz = { minuteswest }; + + /* If UTC RTC: lock warp_clock and PCIL */ + if (ctl->universal) + rc = __set_timezone(&tz_utc); + + /* Set kernel tz; if localtime RTC: warp_clock and set PCIL */ + if (!rc && !( ctl->universal && !minuteswest )) + rc = __set_timezone(&tz); + + /* Set the System Clock */ + if ((!rc || errno == ENOSYS) && ctl->hctosys) + rc = __set_time(&newtime); + + if (rc) { + warn(_("settimeofday() failed")); + return EXIT_FAILURE; + } + } + return EXIT_SUCCESS; +} + +/* + * Refresh the last calibrated and last adjusted timestamps in <*adjtime_p> + * to facilitate future drift calculations based on this set point. + * + * With the --update-drift option: + * Update the drift factor in <*adjtime_p> based on the fact that the + * Hardware Clock was just calibrated to <nowtime> and before that was + * set to the <hclocktime> time scale. + */ +static void +adjust_drift_factor(const struct hwclock_control *ctl, + struct adjtime *adjtime_p, + const struct timeval nowtime, + const struct timeval hclocktime) +{ + if (!ctl->update) { + if (ctl->verbose) + printf(_("Not adjusting drift factor because the " + "--update-drift option was not used.\n")); + } else if (adjtime_p->last_calib_time == 0) { + if (ctl->verbose) + printf(_("Not adjusting drift factor because last " + "calibration time is zero,\n" + "so history is bad and calibration startover " + "is necessary.\n")); + } else if ((hclocktime.tv_sec - adjtime_p->last_calib_time) < 4 * 60 * 60) { + if (ctl->verbose) + printf(_("Not adjusting drift factor because it has " + "been less than four hours since the last " + "calibration.\n")); + } else { + /* + * At adjustment time we drift correct the hardware clock + * according to the contents of the adjtime file and refresh + * its last adjusted timestamp. + * + * At calibration time we set the Hardware Clock and refresh + * both timestamps in <*adjtime_p>. + * + * Here, with the --update-drift option, we also update the + * drift factor in <*adjtime_p>. + * + * Let us do computation in doubles. (Floats almost suffice, + * but 195 days + 1 second equals 195 days in floats.) + */ + const double sec_per_day = 24.0 * 60.0 * 60.0; + double factor_adjust; + double drift_factor; + struct timeval last_calib; + + last_calib = t2tv(adjtime_p->last_calib_time); + /* + * Correction to apply to the current drift factor. + * + * Simplified: uncorrected_drift / days_since_calibration. + * + * hclocktime is fully corrected with the current drift factor. + * Its difference from nowtime is the missed drift correction. + */ + factor_adjust = time_diff(nowtime, hclocktime) / + (time_diff(nowtime, last_calib) / sec_per_day); + + drift_factor = adjtime_p->drift_factor + factor_adjust; + if (fabs(drift_factor) > MAX_DRIFT) { + if (ctl->verbose) + printf(_("Clock drift factor was calculated as " + "%f seconds/day.\n" + "It is far too much. Resetting to zero.\n"), + drift_factor); + drift_factor = 0; + } else { + if (ctl->verbose) + printf(_("Clock drifted %f seconds in the past " + "%f seconds\nin spite of a drift factor of " + "%f seconds/day.\n" + "Adjusting drift factor by %f seconds/day\n"), + time_diff(nowtime, hclocktime), + time_diff(nowtime, last_calib), + adjtime_p->drift_factor, factor_adjust); + } + + adjtime_p->drift_factor = drift_factor; + } + adjtime_p->last_calib_time = nowtime.tv_sec; + + adjtime_p->last_adj_time = nowtime.tv_sec; + + adjtime_p->not_adjusted = 0; + + adjtime_p->dirty = 1; +} + +/* + * Calculate the drift correction currently needed for the + * Hardware Clock based on the last time it was adjusted, + * and the current drift factor, as stored in the adjtime file. + * + * The total drift adjustment needed is stored at tdrift_p. + * + */ +static void +calculate_adjustment(const struct hwclock_control *ctl, + const double factor, + const time_t last_time, + const double not_adjusted, + const time_t systime, struct timeval *tdrift_p) +{ + double exact_adjustment; + + exact_adjustment = + ((double)(systime - last_time)) * factor / (24 * 60 * 60) + + not_adjusted; + tdrift_p->tv_sec = (time_t) floor(exact_adjustment); + tdrift_p->tv_usec = (exact_adjustment - + (double)tdrift_p->tv_sec) * 1E6; + if (ctl->verbose) { + printf(P_("Time since last adjustment is %"PRId64" second\n", + "Time since last adjustment is %"PRId64" seconds\n", + ((int64_t)systime - (int64_t)last_time)), + ((int64_t)systime - (int64_t)last_time)); + printf(_("Calculated Hardware Clock drift is %"PRId64".%06"PRId64" seconds\n"), + (int64_t)tdrift_p->tv_sec, (int64_t)tdrift_p->tv_usec); + } +} + +/* + * Write the contents of the <adjtime> structure to its disk file. + * + * But if the contents are clean (unchanged since read from disk), don't + * bother. + */ +static int save_adjtime(const struct hwclock_control *ctl, + const struct adjtime *adjtime) +{ + char *content; /* Stuff to write to disk file */ + FILE *fp; + + xasprintf(&content, "%f %"PRId64" %f\n%"PRId64"\n%s\n", + adjtime->drift_factor, + (int64_t)adjtime->last_adj_time, + adjtime->not_adjusted, + (int64_t)adjtime->last_calib_time, + (adjtime->local_utc == LOCAL) ? "LOCAL" : "UTC"); + + if (ctl->verbose){ + printf(_("New %s data:\n%s"), + ctl->adj_file_name, content); + } + + if (!ctl->testing) { + int rc; + + fp = fopen(ctl->adj_file_name, "w"); + if (fp == NULL) { + warn(_("cannot open %s"), ctl->adj_file_name); + return EXIT_FAILURE; + } + + rc = fputs(content, fp) < 0; + rc += close_stream(fp); + + if (rc) { + warn(_("cannot update %s"), ctl->adj_file_name); + return EXIT_FAILURE; + } + } + return EXIT_SUCCESS; +} + +/* + * Do the adjustment requested, by 1) setting the Hardware Clock (if + * necessary), and 2) updating the last-adjusted time in the adjtime + * structure. + * + * Do not update anything if the Hardware Clock does not currently present a + * valid time. + * + * <hclocktime> is the drift corrected time read from the Hardware Clock. + * + * <read_time> was the system time when the <hclocktime> was read, which due + * to computational delay could be a short time ago. It is used to define a + * trigger point for setting the Hardware Clock. The fractional part of the + * Hardware clock set time is subtracted from read_time to 'refer back', or + * delay, the trigger point. Fractional parts must be accounted for in this + * way, because the Hardware Clock can only be set to a whole second. + * + * <universal>: the Hardware Clock is kept in UTC. + * + * <testing>: We are running in test mode (no updating of clock). + * + */ +static void +do_adjustment(const struct hwclock_control *ctl, struct adjtime *adjtime_p, + const struct timeval hclocktime, + const struct timeval read_time) +{ + if (adjtime_p->last_adj_time == 0) { + if (ctl->verbose) + printf(_("Not setting clock because last adjustment time is zero, " + "so history is bad.\n")); + } else if (fabs(adjtime_p->drift_factor) > MAX_DRIFT) { + if (ctl->verbose) + printf(_("Not setting clock because drift factor %f is far too high.\n"), + adjtime_p->drift_factor); + } else { + set_hardware_clock_exact(ctl, hclocktime.tv_sec, + time_inc(read_time, + -(hclocktime.tv_usec / 1E6))); + adjtime_p->last_adj_time = hclocktime.tv_sec; + adjtime_p->not_adjusted = 0; + adjtime_p->dirty = 1; + } +} + +static void determine_clock_access_method(const struct hwclock_control *ctl) +{ + ur = NULL; + +#ifdef USE_HWCLOCK_CMOS + if (ctl->directisa) + ur = probe_for_cmos_clock(); +#endif +#ifdef __linux__ + if (!ur) + ur = probe_for_rtc_clock(ctl); +#endif + if (ur) { + if (ctl->verbose) + puts(ur->interface_name); + + } else { + if (ctl->verbose) + printf(_("No usable clock interface found.\n")); + + warnx(_("Cannot access the Hardware Clock via " + "any known method.")); + + if (!ctl->verbose) + warnx(_("Use the --verbose option to see the " + "details of our search for an access " + "method.")); + hwclock_exit(ctl, EXIT_FAILURE); + } +} + +/* Do all the normal work of hwclock - read, set clock, etc. */ +static int +manipulate_clock(const struct hwclock_control *ctl, const time_t set_time, + const struct timeval startup_time, struct adjtime *adjtime) +{ + /* The time at which we read the Hardware Clock */ + struct timeval read_time = { 0 }; + /* + * The Hardware Clock gives us a valid time, or at + * least something close enough to fool mktime(). + */ + int hclock_valid = 0; + /* + * Tick synchronized time read from the Hardware Clock and + * then drift corrected for all operations except --show. + */ + struct timeval hclocktime = { 0 }; + /* + * hclocktime correlated to startup_time. That is, what drift + * corrected Hardware Clock time would have been at start up. + */ + struct timeval startup_hclocktime = { 0 }; + /* Total Hardware Clock drift correction needed. */ + struct timeval tdrift = { 0 }; + + if ((ctl->set || ctl->systohc || ctl->adjust) && + (adjtime->local_utc == UTC) != ctl->universal) { + adjtime->local_utc = ctl->universal ? UTC : LOCAL; + adjtime->dirty = 1; + } + /* + * Negate the drift correction, because we want to 'predict' a + * Hardware Clock time that includes drift. + */ + if (ctl->predict) { + hclocktime = t2tv(set_time); + calculate_adjustment(ctl, adjtime->drift_factor, + adjtime->last_adj_time, + adjtime->not_adjusted, + hclocktime.tv_sec, &tdrift); + hclocktime = time_inc(hclocktime, (double) + -(tdrift.tv_sec + tdrift.tv_usec / 1E6)); + if (ctl->verbose) { + printf(_("Target date: %"PRId64"\n"), (int64_t)set_time); + printf(_("Predicted RTC: %"PRId64"\n"), (int64_t)hclocktime.tv_sec); + } + return display_time(hclocktime); + } + + if (ctl->systz) + return set_system_clock(ctl, startup_time); + + if (ur->get_permissions()) + return EXIT_FAILURE; + + /* + * Read and drift correct RTC time; except for RTC set functions + * without the --update-drift option because: 1) it's not needed; + * 2) it enables setting a corrupted RTC without reading it first; + * 3) it significantly reduces system shutdown time. + */ + if ( ! ((ctl->set || ctl->systohc) && !ctl->update)) { + /* + * Timing critical - do not change the order of, or put + * anything between the follow three statements. + * Synchronization failure MUST exit, because all drift + * operations are invalid without it. + */ + if (synchronize_to_clock_tick(ctl)) + return EXIT_FAILURE; + read_hardware_clock(ctl, &hclock_valid, &hclocktime.tv_sec); + gettimeofday(&read_time, NULL); + + if (!hclock_valid) { + warnx(_("RTC read returned an invalid value.")); + return EXIT_FAILURE; + } + /* + * Calculate and apply drift correction to the Hardware Clock + * time for everything except --show + */ + calculate_adjustment(ctl, adjtime->drift_factor, + adjtime->last_adj_time, + adjtime->not_adjusted, + hclocktime.tv_sec, &tdrift); + if (!ctl->show) + hclocktime = time_inc(tdrift, hclocktime.tv_sec); + + startup_hclocktime = + time_inc(hclocktime, time_diff(startup_time, read_time)); + } + if (ctl->show || ctl->get) { + return display_time(startup_hclocktime); + } + + if (ctl->set) { + set_hardware_clock_exact(ctl, set_time, startup_time); + if (!ctl->noadjfile) + adjust_drift_factor(ctl, adjtime, t2tv(set_time), + startup_hclocktime); + } else if (ctl->adjust) { + if (tdrift.tv_sec > 0 || tdrift.tv_sec < -1) + do_adjustment(ctl, adjtime, hclocktime, read_time); + else + printf(_("Needed adjustment is less than one second, " + "so not setting clock.\n")); + } else if (ctl->systohc) { + struct timeval nowtime, reftime; + /* + * We can only set_hardware_clock_exact to a + * whole seconds time, so we set it with + * reference to the most recent whole + * seconds time. + */ + gettimeofday(&nowtime, NULL); + reftime.tv_sec = nowtime.tv_sec; + reftime.tv_usec = 0; + set_hardware_clock_exact(ctl, (time_t) reftime.tv_sec, reftime); + if (!ctl->noadjfile) + adjust_drift_factor(ctl, adjtime, nowtime, + hclocktime); + } else if (ctl->hctosys) { + return set_system_clock(ctl, hclocktime); + } + if (!ctl->noadjfile && adjtime->dirty) + return save_adjtime(ctl, adjtime); + return EXIT_SUCCESS; +} + +/** + * Get or set the kernel RTC driver's epoch on Alpha machines. + * ISA machines are hard coded for 1900. + */ +#if defined(__linux__) && defined(__alpha__) +static void +manipulate_epoch(const struct hwclock_control *ctl) +{ + if (ctl->getepoch) { + unsigned long epoch; + + if (get_epoch_rtc(ctl, &epoch)) + warnx(_("unable to read the RTC epoch.")); + else + printf(_("The RTC epoch is set to %lu.\n"), epoch); + } else if (ctl->setepoch) { + if (!ctl->epoch_option) + warnx(_("--epoch is required for --setepoch.")); + else if (!ctl->testing) + if (set_epoch_rtc(ctl)) + warnx(_("unable to set the RTC epoch.")); + } +} +#endif /* __linux__ __alpha__ */ + +#ifdef __linux__ +static int +manipulate_rtc_param(const struct hwclock_control *ctl) +{ + if (ctl->param_get_option) { + uint64_t id = 0, value = 0; + + if (get_param_rtc(ctl, ctl->param_get_option, &id, &value)) { + warnx(_("unable to read the RTC parameter %s"), + ctl->param_get_option); + return 1; + } + + printf(_("The RTC parameter 0x%jx is set to 0x%jx.\n"), + (uintmax_t) id, (uintmax_t) value); + + } else if (ctl->param_set_option) { + if (ctl->testing) + return 0; + + return set_param_rtc(ctl, ctl->param_set_option); + } + + return 1; +} +#endif + +static void out_version(void) +{ + printf(UTIL_LINUX_VERSION); +} + +static void __attribute__((__noreturn__)) +usage(void) +{ +#ifdef __linux__ + const struct hwclock_param *param = get_hwclock_params(); +#endif + + fputs(USAGE_HEADER, stdout); + printf(_(" %s [function] [option...]\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, stdout); + puts(_("Time clocks utility.")); + + fputs(USAGE_FUNCTIONS, stdout); + puts(_(" -r, --show display the RTC time")); + puts(_(" --get display drift corrected RTC time")); + puts(_(" --set set the RTC according to --date")); + puts(_(" -s, --hctosys set the system time from the RTC")); + puts(_(" -w, --systohc set the RTC from the system time")); + puts(_(" --systz send timescale configurations to the kernel")); + puts(_(" -a, --adjust adjust the RTC to account for systematic drift")); +#if defined(__linux__) && defined(__alpha__) + puts(_(" --getepoch display the RTC epoch")); + puts(_(" --setepoch set the RTC epoch according to --epoch")); +#endif +#ifdef __linux__ + puts(_(" --param-get <param> display the RTC parameter")); + puts(_(" --param-set <param>=<value> set the RTC parameter")); +#endif + puts(_(" --predict predict the drifted RTC time according to --date")); + fputs(USAGE_OPTIONS, stdout); + puts(_(" -u, --utc the RTC timescale is UTC")); + puts(_(" -l, --localtime the RTC timescale is Local")); +#ifdef __linux__ + printf(_( + " -f, --rtc <file> use an alternate file to %1$s\n"), _PATH_RTC_DEV); +#endif + printf(_( + " --directisa use the ISA bus instead of %1$s access\n"), _PATH_RTC_DEV); + puts(_(" --date <time> date/time input for --set and --predict")); + puts(_(" --delay <sec> delay used when set new RTC time")); +#if defined(__linux__) && defined(__alpha__) + puts(_(" --epoch <year> epoch input for --setepoch")); +#endif + puts(_(" --update-drift update the RTC drift factor")); + printf(_( + " --noadjfile do not use %1$s\n"), _PATH_ADJTIME); + printf(_( + " --adjfile <file> use an alternate file to %1$s\n"), _PATH_ADJTIME); + puts(_(" --test dry run; implies --verbose")); + puts(_(" -v, --verbose display more details")); + + fputs(USAGE_SEPARATOR, stdout); + printf(USAGE_HELP_OPTIONS(33)); + +#ifdef __linux__ + fputs(USAGE_ARGUMENTS, stdout); + puts(_(" <param> is either a numeric RTC parameter value or one of these aliases:")); + + while (param->name) { + printf(_(" - %1$s: %2$s (0x%3$x)\n"), param->name, param->help, param->id); + param++; + } + + puts(_(" See Kernel's include/uapi/linux/rtc.h for parameters and values.")); + fputs(USAGE_ARG_SEPARATOR, stdout); + puts(_(" <param> and <value> accept hexadecimal values if prefixed with 0x, otherwise decimal.")); +#endif + printf(USAGE_MAN_TAIL("hwclock(8)")); + exit(EXIT_SUCCESS); +} + +int main(int argc, char **argv) +{ + struct hwclock_control ctl = { + .show = 1, /* default op is show */ + .rtc_delay = -1.0 /* unspecified */ + }; + struct timeval startup_time; + struct adjtime adjtime = { 0 }; + /* + * The time we started up, in seconds into the epoch, including + * fractions. + */ + time_t set_time = 0; /* Time to which user said to set Hardware Clock */ + int rc, c; + + /* Long only options. */ + enum { + OPT_ADJFILE = CHAR_MAX + 1, + OPT_DATE, + OPT_DELAY, + OPT_DIRECTISA, + OPT_EPOCH, + OPT_GET, + OPT_GETEPOCH, + OPT_NOADJFILE, + OPT_PARAM_GET, + OPT_PARAM_SET, + OPT_PREDICT, + OPT_SET, + OPT_SETEPOCH, + OPT_SYSTZ, + OPT_TEST, + OPT_UPDATE + }; + + static const struct option longopts[] = { + { "adjust", no_argument, NULL, 'a' }, + { "help", no_argument, NULL, 'h' }, + { "localtime", no_argument, NULL, 'l' }, + { "show", no_argument, NULL, 'r' }, + { "hctosys", no_argument, NULL, 's' }, + { "utc", no_argument, NULL, 'u' }, + { "version", no_argument, NULL, 'V' }, + { "systohc", no_argument, NULL, 'w' }, + { "debug", no_argument, NULL, 'D' }, + { "ul-debug", required_argument, NULL, 'd' }, + { "verbose", no_argument, NULL, 'v' }, + { "set", no_argument, NULL, OPT_SET }, +#if defined(__linux__) && defined(__alpha__) + { "getepoch", no_argument, NULL, OPT_GETEPOCH }, + { "setepoch", no_argument, NULL, OPT_SETEPOCH }, + { "epoch", required_argument, NULL, OPT_EPOCH }, +#endif +#ifdef __linux__ + { "param-get", required_argument, NULL, OPT_PARAM_GET }, + { "param-set", required_argument, NULL, OPT_PARAM_SET }, +#endif + { "noadjfile", no_argument, NULL, OPT_NOADJFILE }, + { "directisa", no_argument, NULL, OPT_DIRECTISA }, + { "test", no_argument, NULL, OPT_TEST }, + { "date", required_argument, NULL, OPT_DATE }, + { "delay", required_argument, NULL, OPT_DELAY }, +#ifdef __linux__ + { "rtc", required_argument, NULL, 'f' }, +#endif + { "adjfile", required_argument, NULL, OPT_ADJFILE }, + { "systz", no_argument, NULL, OPT_SYSTZ }, + { "predict", no_argument, NULL, OPT_PREDICT }, + { "get", no_argument, NULL, OPT_GET }, + { "update-drift", no_argument, NULL, OPT_UPDATE }, + { NULL, 0, NULL, 0 } + }; + + static const ul_excl_t excl[] = { /* rows and cols in ASCII order */ + { 'a','r','s','w', + OPT_GET, OPT_GETEPOCH, OPT_PREDICT, + OPT_SET, OPT_SETEPOCH, OPT_SYSTZ }, + { 'l', 'u' }, + { OPT_ADJFILE, OPT_NOADJFILE }, + { OPT_NOADJFILE, OPT_UPDATE }, + { 0 } + }; + int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; + + /* Remember what time we were invoked */ + gettimeofday(&startup_time, NULL); + +#ifdef HAVE_LIBAUDIT + hwaudit_fd = audit_open(); + if (hwaudit_fd < 0 && !(errno == EINVAL || errno == EPROTONOSUPPORT || + errno == EAFNOSUPPORT)) { + /* + * You get these error codes only when the kernel doesn't + * have audit compiled in. + */ + warnx(_("Unable to connect to audit system")); + return EXIT_FAILURE; + } +#endif + setlocale(LC_ALL, ""); +#ifdef LC_NUMERIC + /* + * We need LC_CTYPE and LC_TIME and LC_MESSAGES, but must avoid + * LC_NUMERIC since it gives problems when we write to /etc/adjtime. + * - gqueri@mail.dotcom.fr + */ + setlocale(LC_NUMERIC, "C"); +#endif + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + while ((c = getopt_long(argc, argv, + "hvVDd:alrsuwf:", longopts, NULL)) != -1) { + + err_exclusive_options(c, longopts, excl, excl_st); + + switch (c) { + case 'D': + warnx(_("use --verbose, --debug has been deprecated.")); + break; + case 'v': + ctl.verbose = 1; + break; + case 'd': + hwclock_init_debug(optarg); + break; + case 'a': + ctl.adjust = 1; + ctl.show = 0; + ctl.hwaudit_on = 1; + break; + case 'l': + ctl.local_opt = 1; /* --localtime */ + break; + case 'r': + ctl.show = 1; + break; + case 's': + ctl.hctosys = 1; + ctl.show = 0; + ctl.hwaudit_on = 1; + break; + case 'u': + ctl.utc = 1; + break; + case 'w': + ctl.systohc = 1; + ctl.show = 0; + ctl.hwaudit_on = 1; + break; + case OPT_SET: + ctl.set = 1; + ctl.show = 0; + ctl.hwaudit_on = 1; + break; +#if defined(__linux__) && defined(__alpha__) + case OPT_GETEPOCH: + ctl.getepoch = 1; + ctl.show = 0; + break; + case OPT_SETEPOCH: + ctl.setepoch = 1; + ctl.show = 0; + ctl.hwaudit_on = 1; + break; + case OPT_EPOCH: + ctl.epoch_option = optarg; /* --epoch */ + break; +#endif +#ifdef __linux__ + case OPT_PARAM_GET: + ctl.param_get_option = optarg; + ctl.show = 0; + break; + case OPT_PARAM_SET: + ctl.param_set_option = optarg; + ctl.show = 0; + ctl.hwaudit_on = 1; + break; +#endif + case OPT_NOADJFILE: + ctl.noadjfile = 1; + break; + case OPT_DIRECTISA: + ctl.directisa = 1; + break; + case OPT_TEST: + ctl.testing = 1; /* --test */ + ctl.verbose = 1; + break; + case OPT_DATE: + ctl.date_opt = optarg; /* --date */ + break; + case OPT_DELAY: + ctl.rtc_delay = strtod_or_err(optarg, "invalid --delay argument"); + break; + case OPT_ADJFILE: + ctl.adj_file_name = optarg; /* --adjfile */ + break; + case OPT_SYSTZ: + ctl.systz = 1; /* --systz */ + ctl.show = 0; + ctl.hwaudit_on = 1; + break; + case OPT_PREDICT: + ctl.predict = 1; /* --predict */ + ctl.show = 0; + break; + case OPT_GET: + ctl.get = 1; /* --get */ + ctl.show = 0; + break; + case OPT_UPDATE: + ctl.update = 1; /* --update-drift */ + break; +#ifdef __linux__ + case 'f': + ctl.rtc_dev_name = optarg; /* --rtc */ + break; +#endif + + case 'V': /* --version */ + print_version(EXIT_SUCCESS); + case 'h': /* --help */ + usage(); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if (argc -= optind) { + warnx(_("%d too many arguments given"), argc); + errtryhelp(EXIT_FAILURE); + } + + if (!ctl.adj_file_name) + ctl.adj_file_name = _PATH_ADJTIME; + + if (ctl.update && !ctl.set && !ctl.systohc) { + warnx(_("--update-drift requires --set or --systohc")); + exit(EXIT_FAILURE); + } + + if (ctl.noadjfile && !ctl.utc && !ctl.local_opt) { + warnx(_("With --noadjfile, you must specify " + "either --utc or --localtime")); + exit(EXIT_FAILURE); + } + + if (ctl.set || ctl.predict) { + if (!ctl.date_opt) { + warnx(_("--date is required for --set or --predict")); + exit(EXIT_FAILURE); + } +#ifdef USE_HWCLOCK_GPLv3_DATETIME + /* date(1) compatible GPLv3 parser */ + struct timespec when = { 0 }; + + if (parse_date(&when, ctl.date_opt, NULL)) + set_time = when.tv_sec; +#else + /* minimalistic GPLv2 based parser */ + usec_t usec; + + if (parse_timestamp(ctl.date_opt, &usec) == 0) + set_time = (time_t) (usec / 1000000); +#endif + else { + warnx(_("invalid date '%s'"), ctl.date_opt); + exit(EXIT_FAILURE); + } + } + +#ifdef __linux__ + if (ctl.param_get_option || ctl.param_set_option) { + if (manipulate_rtc_param(&ctl)) + hwclock_exit(&ctl, EXIT_FAILURE); + + hwclock_exit(&ctl, EXIT_SUCCESS); + } +#endif + +#if defined(__linux__) && defined(__alpha__) + if (ctl.getepoch || ctl.setepoch) { + manipulate_epoch(&ctl); + hwclock_exit(&ctl, EXIT_SUCCESS); + } +#endif + + if (ctl.verbose) { + out_version(); + printf(_("System Time: %"PRId64".%06"PRId64"\n"), + (int64_t)startup_time.tv_sec, (int64_t)startup_time.tv_usec); + } + + if (!ctl.systz && !ctl.predict) + determine_clock_access_method(&ctl); + + if (!ctl.noadjfile && !(ctl.systz && (ctl.utc || ctl.local_opt))) { + if ((rc = read_adjtime(&ctl, &adjtime)) != 0) + hwclock_exit(&ctl, rc); + } else + /* Avoid writing adjtime file if we don't have to. */ + adjtime.dirty = 0; + + ctl.universal = hw_clock_is_utc(&ctl, &adjtime); + rc = manipulate_clock(&ctl, set_time, startup_time, &adjtime); + if (ctl.testing) + puts(_("Test mode: nothing was changed.")); + hwclock_exit(&ctl, rc); + return rc; /* Not reached */ +} + +void +hwclock_exit(const struct hwclock_control *ctl +#ifndef HAVE_LIBAUDIT + __attribute__((__unused__)) +#endif + , int status) +{ +#ifdef HAVE_LIBAUDIT + if (ctl->hwaudit_on && !ctl->testing) { + audit_log_user_message(hwaudit_fd, AUDIT_USYS_CONFIG, + "op=change-system-time", NULL, NULL, NULL, + status == EXIT_SUCCESS ? 1 : 0); + } + close(hwaudit_fd); +#endif + exit(status); +} + +/* + * History of this program: + * + * 98.08.12 BJH Version 2.4 + * + * Don't use century byte from Hardware Clock. Add comments telling why. + * + * 98.06.20 BJH Version 2.3. + * + * Make --hctosys set the kernel timezone from TZ environment variable + * and/or /usr/lib/zoneinfo. From Klaus Ripke (klaus@ripke.com). + * + * 98.03.05 BJH. Version 2.2. + * + * Add --getepoch and --setepoch. + * + * Fix some word length things so it works on Alpha. + * + * Make it work when /dev/rtc doesn't have the interrupt functions. In this + * case, busywait for the top of a second instead of blocking and waiting + * for the update complete interrupt. + * + * Fix a bunch of bugs too numerous to mention. + * + * 97.06.01: BJH. Version 2.1. Read and write the century byte (Byte 50) of + * the ISA Hardware Clock when using direct ISA I/O. Problem discovered by + * job (jei@iclnl.icl.nl). + * + * Use the rtc clock access method in preference to the KDGHWCLK method. + * Problem discovered by Andreas Schwab <schwab@LS5.informatik.uni-dortmund.de>. + * + * November 1996: Version 2.0.1. Modifications by Nicolai Langfeldt + * (janl@math.uio.no) to make it compile on linux 1.2 machines as well as + * more recent versions of the kernel. Introduced the NO_CLOCK access method + * and wrote feature test code to detect absence of rtc headers. + * + *************************************************************************** + * Maintenance notes + * + * To compile this, you must use GNU compiler optimization (-O option) in + * order to make the "extern inline" functions from asm/io.h (inb(), etc.) + * compile. If you don't optimize, which means the compiler will generate no + * inline functions, the references to these functions in this program will + * be compiled as external references. Since you probably won't be linking + * with any functions by these names, you will have unresolved external + * references when you link. + * + * Here's some info on how we must deal with the time that elapses while + * this program runs: There are two major delays as we run: + * + * 1) Waiting up to 1 second for a transition of the Hardware Clock so + * we are synchronized to the Hardware Clock. + * 2) Running the "date" program to interpret the value of our --date + * option. + * + * Reading the /etc/adjtime file is the next biggest source of delay and + * uncertainty. + * + * The user wants to know what time it was at the moment they invoked us, not + * some arbitrary time later. And in setting the clock, they are giving us the + * time at the moment we are invoked, so if we set the clock some time + * later, we have to add some time to that. + * + * So we check the system time as soon as we start up, then run "date" and + * do file I/O if necessary, then wait to synchronize with a Hardware Clock + * edge, then check the system time again to see how much time we spent. We + * immediately read the clock then and (if appropriate) report that time, + * and additionally, the delay we measured. + * + * If we're setting the clock to a time given by the user, we wait some more + * so that the total delay is an integral number of seconds, then set the + * Hardware Clock to the time the user requested plus that integral number + * of seconds. N.B. The Hardware Clock can only be set in integral seconds. + * + * If we're setting the clock to the system clock value, we wait for the + * system clock to reach the top of a second, and then set the Hardware + * Clock to the system clock's value. + * + * Here's an interesting point about setting the Hardware Clock: On my + * machine, when you set it, it sets to that precise time. But one can + * imagine another clock whose update oscillator marches on a steady one + * second period, so updating the clock between any two oscillator ticks is + * the same as updating it right at the earlier tick. To avoid any + * complications that might cause, we set the clock as soon as possible + * after an oscillator tick. + * + * About synchronizing to the Hardware Clock when reading the time: The + * precision of the Hardware Clock counters themselves is one second. You + * can't read the counters and find out that is 12:01:02.5. But if you + * consider the location in time of the counter's ticks as part of its + * value, then its precision is as infinite as time is continuous! What I'm + * saying is this: To find out the _exact_ time in the hardware clock, we + * wait until the next clock tick (the next time the second counter changes) + * and measure how long we had to wait. We then read the value of the clock + * counters and subtract the wait time and we know precisely what time it + * was when we set out to query the time. + * + * hwclock uses this method, and considers the Hardware Clock to have + * infinite precision. + */ diff --git a/sys-utils/hwclock.h b/sys-utils/hwclock.h new file mode 100644 index 0000000..951857c --- /dev/null +++ b/sys-utils/hwclock.h @@ -0,0 +1,96 @@ +#ifndef HWCLOCK_CLOCK_H +#define HWCLOCK_CLOCK_H + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#include <time.h> + +#include "c.h" +#include "debug.h" +#include "nls.h" + +#define HWCLOCK_DEBUG_INIT (1 << 0) +#define HWCLOCK_DEBUG_RANDOM_SLEEP (1 << 1) +#define HWCLOCK_DEBUG_DELTA_VS_TARGET (1 << 2) +#define HWCLOCK_DEBUG_ALL 0xFFFF + +UL_DEBUG_DECLARE_MASK(hwclock); +#define DBG(m, x) __UL_DBG(hwclock, HWCLOCK_DEBUG_, m, x) +#define ON_DBG(m, x) __UL_DBG_CALL(hwclock, HWCLOCK_DEBUG_, m, x) + +struct hwclock_control { + char *date_opt; + char *adj_file_name; + double rtc_delay; /* --delay <seconds> */ +#if defined(__linux__) && defined(__alpha__) + char *epoch_option; +#endif +#ifdef __linux__ + char *rtc_dev_name; +#endif + char *param_get_option; + char *param_set_option; + unsigned int + hwaudit_on:1, + adjust:1, + show:1, + hctosys:1, + utc:1, + systohc:1, +#if defined(__linux__) && defined(__alpha__) + getepoch:1, + setepoch:1, +#endif + noadjfile:1, + local_opt:1, + directisa:1, + testing:1, + systz:1, + predict:1, + get:1, + set:1, + update:1, + universal:1, /* will store hw_clock_is_utc() return value */ + verbose:1; +}; + +struct clock_ops { + char *interface_name; + int (*get_permissions) (void); + int (*read_hardware_clock) (const struct hwclock_control *ctl, struct tm * tm); + int (*set_hardware_clock) (const struct hwclock_control *ctl, const struct tm * tm); + int (*synchronize_to_clock_tick) (const struct hwclock_control *ctl); + const char *(*get_device_path) (void); +}; + +extern struct clock_ops *probe_for_cmos_clock(void); +extern struct clock_ops *probe_for_rtc_clock(const struct hwclock_control *ctl); + +/* hwclock.c */ +extern double time_diff(struct timeval subtrahend, struct timeval subtractor); + +/* rtc.c */ +#if defined(__linux__) && defined(__alpha__) +extern int get_epoch_rtc(const struct hwclock_control *ctl, unsigned long *epoch); +extern int set_epoch_rtc(const struct hwclock_control *ctl); +#endif + +struct hwclock_param { + int id; + const char *name; + const char *help; +}; + +extern const struct hwclock_param *get_hwclock_params(void); +extern int get_param_rtc(const struct hwclock_control *ctl, + const char *name, uint64_t *id, uint64_t *value); +extern int set_param_rtc(const struct hwclock_control *ctl, const char *name); + +extern void __attribute__((__noreturn__)) +hwclock_exit(const struct hwclock_control *ctl, int status); + +extern int parse_date(struct timespec *, char const *, struct timespec const *); + +#endif /* HWCLOCK_CLOCK_H */ diff --git a/sys-utils/ipcmk.1 b/sys-utils/ipcmk.1 new file mode 100644 index 0000000..c434347 --- /dev/null +++ b/sys-utils/ipcmk.1 @@ -0,0 +1,88 @@ +'\" t +.\" Title: ipcmk +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: User Commands +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "IPCMK" "1" "2022-05-11" "util\-linux 2.38.1" "User Commands" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +ipcmk \- make various IPC resources +.SH "SYNOPSIS" +.sp +\fBipcmk\fP [options] +.SH "DESCRIPTION" +.sp +\fBipcmk\fP allows you to create System V inter\-process communication (IPC) objects: shared memory segments, message queues, and semaphore arrays. +.SH "OPTIONS" +.sp +Resources can be specified with these options: +.sp +\fB\-M\fP, \fB\-\-shmem\fP \fIsize\fP +.RS 4 +Create a shared memory segment of \fIsize\fP bytes. The \fIsize\fP argument may be followed by the multiplicative suffixes KiB (=1024), MiB (=1024*1024), and so on for GiB, etc. (the "iB" is optional, e.g., "K" has the same meaning as "KiB") or the suffixes KB (=1000), MB (=1000*1000), and so on for GB, etc. +.RE +.sp +\fB\-Q\fP, \fB\-\-queue\fP +.RS 4 +Create a message queue. +.RE +.sp +\fB\-S\fP, \fB\-\-semaphore\fP \fInumber\fP +.RS 4 +Create a semaphore array with \fInumber\fP of elements. +.RE +.sp +Other options are: +.sp +\fB\-p\fP, \fB\-\-mode\fP \fImode\fP +.RS 4 +Access permissions for the resource. Default is 0644. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "AUTHORS" +.sp +.MTO "hayden.james\(atgmail.com" "Hayden A. James" "" +.SH "SEE ALSO" +.sp +\fBipcrm\fP(1), +\fBipcs\fP(1), +\fBsysvipc\fP(7) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBipcmk\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/ipcmk.1.adoc b/sys-utils/ipcmk.1.adoc new file mode 100644 index 0000000..605b928 --- /dev/null +++ b/sys-utils/ipcmk.1.adoc @@ -0,0 +1,61 @@ +//po4a: entry man manual +//// +Copyright 2008 Hayden A. James (hayden.james@gmail.com) +May be distributed under the GNU General Public License +//// += ipcmk(1) +:doctype: manpage +:man manual: User Commands +:man source: util-linux {release-version} +:page-layout: base +:command: ipcmk + +== NAME + +ipcmk - make various IPC resources + +== SYNOPSIS + +*ipcmk* [options] + +== DESCRIPTION + +*ipcmk* allows you to create System V inter-process communication (IPC) objects: shared memory segments, message queues, and semaphore arrays. + +== OPTIONS + +Resources can be specified with these options: + +*-M*, *--shmem* _size_:: +Create a shared memory segment of _size_ bytes. The _size_ argument may be followed by the multiplicative suffixes KiB (=1024), MiB (=1024*1024), and so on for GiB, etc. (the "iB" is optional, e.g., "K" has the same meaning as "KiB") or the suffixes KB (=1000), MB (=1000*1000), and so on for GB, etc. + +*-Q*, *--queue*:: +Create a message queue. + +*-S*, *--semaphore* _number_:: +Create a semaphore array with _number_ of elements. + +Other options are: + +*-p*, *--mode* _mode_:: +Access permissions for the resource. Default is 0644. + +include::man-common/help-version.adoc[] + +== AUTHORS + +mailto:hayden.james@gmail.com[Hayden A. James] + +== SEE ALSO + +*ipcrm*(1), +*ipcs*(1), +*sysvipc*(7) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/ipcmk.c b/sys-utils/ipcmk.c new file mode 100644 index 0000000..9c1f608 --- /dev/null +++ b/sys-utils/ipcmk.c @@ -0,0 +1,171 @@ +/* + * ipcmk.c - used to create ad-hoc IPC segments + * + * Copyright (C) 2008 Hayden A. James (hayden.james@gmail.com) + * Copyright (C) 2008 Karel Zak <kzak@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <errno.h> +#include <getopt.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/ipc.h> +#include <sys/msg.h> +#include <sys/sem.h> +#include <sys/shm.h> +#include <sys/time.h> + +#include "c.h" +#include "nls.h" +#include "randutils.h" +#include "strutils.h" +#include "closestream.h" + +static int create_shm(size_t size, int permission) +{ + key_t key; + + ul_random_get_bytes(&key, sizeof(key)); + return shmget(key, size, permission | IPC_CREAT); +} + +static int create_msg(int permission) +{ + key_t key; + + ul_random_get_bytes(&key, sizeof(key)); + return msgget(key, permission | IPC_CREAT); +} + +static int create_sem(int nsems, int permission) +{ + key_t key; + + ul_random_get_bytes(&key, sizeof(key)); + return semget(key, nsems, permission | IPC_CREAT); +} + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + fputs(USAGE_HEADER, out); + fprintf(out, _(" %s [options]\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Create various IPC resources.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -M, --shmem <size> create shared memory segment of size <size>\n"), out); + fputs(_(" -S, --semaphore <number> create semaphore array with <number> elements\n"), out); + fputs(_(" -Q, --queue create message queue\n"), out); + fputs(_(" -p, --mode <mode> permission for the resource (default is 0644)\n"), out); + + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(26)); + + fputs(USAGE_ARGUMENTS, out); + printf(USAGE_ARG_SIZE(_("<size>"))); + + printf(USAGE_MAN_TAIL("ipcmk(1)")); + + exit(EXIT_SUCCESS); +} + +int main(int argc, char **argv) +{ + int permission = 0644; + int opt; + size_t size = 0; + int nsems = 0; + int ask_shm = 0, ask_msg = 0, ask_sem = 0; + static const struct option longopts[] = { + {"shmem", required_argument, NULL, 'M'}, + {"semaphore", required_argument, NULL, 'S'}, + {"queue", no_argument, NULL, 'Q'}, + {"mode", required_argument, NULL, 'p'}, + {"version", no_argument, NULL, 'V'}, + {"help", no_argument, NULL, 'h'}, + {NULL, 0, NULL, 0} + }; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + while((opt = getopt_long(argc, argv, "hM:QS:p:Vh", longopts, NULL)) != -1) { + switch(opt) { + case 'M': + size = strtosize_or_err(optarg, _("failed to parse size")); + ask_shm = 1; + break; + case 'Q': + ask_msg = 1; + break; + case 'S': + nsems = strtos32_or_err(optarg, _("failed to parse elements")); + ask_sem = 1; + break; + case 'p': + { + char *end = NULL; + errno = 0; + permission = strtoul(optarg, &end, 8); + if (errno || optarg == end || (end && *end)) + err(EXIT_FAILURE, _("failed to parse mode")); + break; + } + case 'h': + usage(); + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if(!ask_shm && !ask_msg && !ask_sem) { + warnx(_("bad usage")); + errtryhelp(EXIT_FAILURE); + } + if (ask_shm) { + int shmid; + if (-1 == (shmid = create_shm(size, permission))) + err(EXIT_FAILURE, _("create share memory failed")); + else + printf(_("Shared memory id: %d\n"), shmid); + } + + if (ask_msg) { + int msgid; + if (-1 == (msgid = create_msg(permission))) + err(EXIT_FAILURE, _("create message queue failed")); + else + printf(_("Message queue id: %d\n"), msgid); + } + + if (ask_sem) { + int semid; + if (-1 == (semid = create_sem(nsems, permission))) + err(EXIT_FAILURE, _("create semaphore failed")); + else + printf(_("Semaphore id: %d\n"), semid); + } + + return EXIT_SUCCESS; +} diff --git a/sys-utils/ipcrm.1 b/sys-utils/ipcrm.1 new file mode 100644 index 0000000..c93563d --- /dev/null +++ b/sys-utils/ipcrm.1 @@ -0,0 +1,119 @@ +'\" t +.\" Title: ipcrm +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: User Commands +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "IPCRM" "1" "2022-05-11" "util\-linux 2.38.1" "User Commands" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +ipcrm \- remove certain IPC resources +.SH "SYNOPSIS" +.sp +\fBipcrm\fP [options] +.sp +\fBipcrm\fP [\fBshm\fP|\fBmsg\fP|\fBsem\fP] \fIID\fP ... +.SH "DESCRIPTION" +.sp +\fBipcrm\fP removes System V inter\-process communication (IPC) objects and associated data structures from the system. In order to delete such objects, you must be superuser, or the creator or owner of the object. +.sp +System V IPC objects are of three types: shared memory, message queues, and semaphores. Deletion of a message queue or semaphore object is immediate (regardless of whether any process still holds an IPC identifier for the object). A shared memory object is only removed after all currently attached processes have detached (\fBshmdt\fP(2)) the object from their virtual address space. +.sp +Two syntax styles are supported. The old Linux historical syntax specifies a three\-letter keyword indicating which class of object is to be deleted, followed by one or more IPC identifiers for objects of this type. +.sp +The SUS\-compliant syntax allows the specification of zero or more objects of all three types in a single command line, with objects specified either by key or by identifier (see below). Both keys and identifiers may be specified in decimal, hexadecimal (specified with an initial \(aq0x\(aq or \(aq0X\(aq), or octal (specified with an initial \(aq0\(aq). +.sp +The details of the removes are described in \fBshmctl\fP(2), \fBmsgctl\fP(2), and \fBsemctl\fP(2). The identifiers and keys can be found by using \fBipcs\fP(1). +.SH "OPTIONS" +.sp +\fB\-a\fP, \fB\-\-all\fP [\fBshm\fP] [\fBmsg\fP] [\fBsem\fP] +.RS 4 +Remove all resources. When an option argument is provided, the removal is performed only for the specified resource types. +.sp +\fIWarning!\fP Do not use \fB\-a\fP if you are unsure how the software using the resources might react to missing objects. Some programs create these resources at startup and may not have any code to deal with an unexpected disappearance. +.RE +.sp +\fB\-M\fP, \fB\-\-shmem\-key\fP \fIshmkey\fP +.RS 4 +Remove the shared memory segment created with \fIshmkey\fP after the last detach is performed. +.RE +.sp +\fB\-m\fP, \fB\-\-shmem\-id\fP \fIshmid\fP +.RS 4 +Remove the shared memory segment identified by \fIshmid\fP after the last detach is performed. +.RE +.sp +\fB\-Q\fP, \fB\-\-queue\-key\fP \fImsgkey\fP +.RS 4 +Remove the message queue created with \fImsgkey\fP. +.RE +.sp +\fB\-q\fP, \fB\-\-queue\-id\fP \fImsgid\fP +.RS 4 +Remove the message queue identified by \fImsgid\fP. +.RE +.sp +\fB\-S\fP, \fB\-\-semaphore\-key\fP \fIsemkey\fP +.RS 4 +Remove the semaphore created with \fIsemkey\fP. +.RE +.sp +\fB\-s\fP, \fB\-\-semaphore\-id\fP \fIsemid\fP +.RS 4 +Remove the semaphore identified by \fIsemid\fP. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "NOTES" +.sp +In its first Linux implementation, \fBipcrm\fP used the deprecated syntax shown in the second line of the \fBSYNOPSIS\fP. Functionality present in other *nix implementations of \fBipcrm\fP has since been added, namely the ability to delete resources by key (not just identifier), and to respect the same command\-line syntax. For backward compatibility the previous syntax is still supported. +.SH "SEE ALSO" +.sp +\fBipcmk\fP(1), +\fBipcs\fP(1), +\fBmsgctl\fP(2), +\fBmsgget\fP(2), +\fBsemctl\fP(2), +\fBsemget\fP(2), +\fBshmctl\fP(2), +\fBshmdt\fP(2), +\fBshmget\fP(2), +\fBftok\fP(3), +\fBsysvipc\fP(7) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBipcrm\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/ipcrm.1.adoc b/sys-utils/ipcrm.1.adoc new file mode 100644 index 0000000..98e3638 --- /dev/null +++ b/sys-utils/ipcrm.1.adoc @@ -0,0 +1,88 @@ +//po4a: entry man manual +//// +Copyright 2002 Andre C. Mazzone (linuxdev@karagee.com) +May be distributed under the GNU General Public License +//// += ipcrm(1) +:doctype: manpage +:man manual: User Commands +:man source: util-linux {release-version} +:page-layout: base +:command: ipcrm +:asterisk: * + +== NAME + +ipcrm - remove certain IPC resources + +== SYNOPSIS + +*ipcrm* [options] + +*ipcrm* [*shm*|*msg*|*sem*] _ID_ ... + +== DESCRIPTION + +*ipcrm* removes System V inter-process communication (IPC) objects and associated data structures from the system. In order to delete such objects, you must be superuser, or the creator or owner of the object. + +System V IPC objects are of three types: shared memory, message queues, and semaphores. Deletion of a message queue or semaphore object is immediate (regardless of whether any process still holds an IPC identifier for the object). A shared memory object is only removed after all currently attached processes have detached (*shmdt*(2)) the object from their virtual address space. + +Two syntax styles are supported. The old Linux historical syntax specifies a three-letter keyword indicating which class of object is to be deleted, followed by one or more IPC identifiers for objects of this type. + +The SUS-compliant syntax allows the specification of zero or more objects of all three types in a single command line, with objects specified either by key or by identifier (see below). Both keys and identifiers may be specified in decimal, hexadecimal (specified with an initial '0x' or '0X'), or octal (specified with an initial '0'). + +The details of the removes are described in *shmctl*(2), *msgctl*(2), and *semctl*(2). The identifiers and keys can be found by using *ipcs*(1). + +== OPTIONS + +*-a*, *--all* [*shm*] [*msg*] [*sem*]:: +Remove all resources. When an option argument is provided, the removal is performed only for the specified resource types. ++ +_Warning!_ Do not use *-a* if you are unsure how the software using the resources might react to missing objects. Some programs create these resources at startup and may not have any code to deal with an unexpected disappearance. + +*-M*, *--shmem-key* _shmkey_:: +Remove the shared memory segment created with _shmkey_ after the last detach is performed. + +*-m*, *--shmem-id* _shmid_:: +Remove the shared memory segment identified by _shmid_ after the last detach is performed. + +*-Q*, *--queue-key* _msgkey_:: +Remove the message queue created with _msgkey_. + +*-q*, *--queue-id* _msgid_:: +Remove the message queue identified by _msgid_. + +*-S*, *--semaphore-key* _semkey_:: +Remove the semaphore created with _semkey_. + +*-s*, *--semaphore-id* _semid_:: +Remove the semaphore identified by _semid_. + +include::man-common/help-version.adoc[] + +== NOTES + +//TRANSLATORS: Keep {asterisk} untranslated; it expands to "*nix". +In its first Linux implementation, *ipcrm* used the deprecated syntax shown in the second line of the *SYNOPSIS*. Functionality present in other {asterisk}nix implementations of *ipcrm* has since been added, namely the ability to delete resources by key (not just identifier), and to respect the same command-line syntax. For backward compatibility the previous syntax is still supported. + +== SEE ALSO + +*ipcmk*(1), +*ipcs*(1), +*msgctl*(2), +*msgget*(2), +*semctl*(2), +*semget*(2), +*shmctl*(2), +*shmdt*(2), +*shmget*(2), +*ftok*(3), +*sysvipc*(7) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/ipcrm.c b/sys-utils/ipcrm.c new file mode 100644 index 0000000..52768a2 --- /dev/null +++ b/sys-utils/ipcrm.c @@ -0,0 +1,424 @@ +/* + * krishna balasubramanian 1993 + * + * 1999-02-22 Arkadiusz Miśkiewicz <misiek@pld.ORG.PL> + * - added Native Language Support + * + * 1999-04-02 frank zago + * - can now remove several id's in the same call + * + */ + +#include <errno.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ipc.h> +#include <sys/msg.h> +#include <sys/sem.h> +#include <sys/shm.h> +#include <sys/types.h> +#include "c.h" +#include "nls.h" +#include "strutils.h" +#include "closestream.h" + +#ifndef HAVE_UNION_SEMUN +/* according to X/OPEN we have to define it ourselves */ +union semun { + int val; + struct semid_ds *buf; + unsigned short int *array; + struct seminfo *__buf; +}; +#endif + +typedef enum type_id { + SHM, + SEM, + MSG, + ALL +} type_id; + +static int verbose = 0; + +/* print the usage */ +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + fputs(USAGE_HEADER, out); + fprintf(out, _(" %1$s [options]\n" + " %1$s shm|msg|sem <id>...\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Remove certain IPC resources.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -m, --shmem-id <id> remove shared memory segment by id\n"), out); + fputs(_(" -M, --shmem-key <key> remove shared memory segment by key\n"), out); + fputs(_(" -q, --queue-id <id> remove message queue by id\n"), out); + fputs(_(" -Q, --queue-key <key> remove message queue by key\n"), out); + fputs(_(" -s, --semaphore-id <id> remove semaphore by id\n"), out); + fputs(_(" -S, --semaphore-key <key> remove semaphore by key\n"), out); + fputs(_(" -a, --all[=shm|msg|sem] remove all (in the specified category)\n"), out); + fputs(_(" -v, --verbose explain what is being done\n"), out); + + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(28)); + printf(USAGE_MAN_TAIL("ipcrm(1)")); + + exit(EXIT_SUCCESS); +} + +static int remove_id(int type, int iskey, int id) +{ + int ret; + char *errmsg; + /* needed to delete semaphores */ + union semun arg; + arg.val = 0; + + /* do the removal */ + switch (type) { + case SHM: + if (verbose) + printf(_("removing shared memory segment id `%d'\n"), id); + ret = shmctl(id, IPC_RMID, NULL); + break; + case MSG: + if (verbose) + printf(_("removing message queue id `%d'\n"), id); + ret = msgctl(id, IPC_RMID, NULL); + break; + case SEM: + if (verbose) + printf(_("removing semaphore id `%d'\n"), id); + ret = semctl(id, 0, IPC_RMID, arg); + break; + default: + errx(EXIT_FAILURE, "impossible occurred"); + } + + /* how did the removal go? */ + if (ret < 0) { + switch (errno) { + case EACCES: + case EPERM: + errmsg = iskey ? _("permission denied for key") : _("permission denied for id"); + break; + case EINVAL: + errmsg = iskey ? _("invalid key") : _("invalid id"); + break; + case EIDRM: + errmsg = iskey ? _("already removed key") : _("already removed id"); + break; + default: + err(EXIT_FAILURE, "%s", iskey ? _("key failed") : _("id failed")); + } + warnx("%s (%d)", errmsg, id); + return 1; + } + return 0; +} + +static int remove_arg_list(type_id type, int argc, char **argv) +{ + int id; + char *end = NULL; + int nb_errors = 0; + + do { + errno = 0; + id = strtoul(argv[0], &end, 10); + if (errno || !end || *end != 0) { + warnx(_("invalid id: %s"), argv[0]); + nb_errors++; + } else { + if (remove_id(type, 0, id)) + nb_errors++; + } + argc--; + argv++; + } while (argc); + return (nb_errors); +} + +static int deprecated_main(int argc, char **argv) +{ + type_id type; + + if (!strcmp(argv[1], "shm")) + type = SHM; + else if (!strcmp(argv[1], "msg")) + type = MSG; + else if (!strcmp(argv[1], "sem")) + type = SEM; + else + return 0; + + if (argc < 3) { + warnx(_("not enough arguments")); + errtryhelp(EXIT_FAILURE); + } + + if (remove_arg_list(type, argc - 2, &argv[2])) + exit(EXIT_FAILURE); + + printf(_("resource(s) deleted\n")); + return 1; +} + +static unsigned long strtokey(const char *str, const char *errmesg) +{ + unsigned long num; + char *end = NULL; + + if (str == NULL || *str == '\0') + goto err; + errno = 0; + /* keys are in hex or decimal */ + num = strtoul(str, &end, 0); + + if (errno || str == end || (end && *end)) + goto err; + + return num; + err: + if (errno) + err(EXIT_FAILURE, "%s: '%s'", errmesg, str); + else + errx(EXIT_FAILURE, "%s: '%s'", errmesg, str); + return 0; +} + +static int key_to_id(type_id type, char *s) +{ + int id; + /* keys are in hex or decimal */ + key_t key = strtokey(s, "failed to parse argument"); + if (key == IPC_PRIVATE) { + warnx(_("illegal key (%s)"), s); + return -1; + } + switch (type) { + case SHM: + id = shmget(key, 0, 0); + break; + case MSG: + id = msgget(key, 0); + break; + case SEM: + id = semget(key, 0, 0); + break; + case ALL: + abort(); + default: + errx(EXIT_FAILURE, "impossible occurred"); + } + if (id < 0) { + char *errmsg; + switch (errno) { + case EACCES: + errmsg = _("permission denied for key"); + break; + case EIDRM: + errmsg = _("already removed key"); + break; + case ENOENT: + errmsg = _("invalid key"); + break; + default: + err(EXIT_FAILURE, _("key failed")); + } + warnx("%s (%s)", errmsg, s); + } + return id; +} + +static int remove_all(type_id type) +{ + int ret = 0; + int id, rm_me, maxid; + + struct shmid_ds shmseg; + + struct semid_ds semary; + struct seminfo seminfo; + union semun arg; + + struct msqid_ds msgque; + struct msginfo msginfo; + + if (type == SHM || type == ALL) { + maxid = shmctl(0, SHM_INFO, &shmseg); + if (maxid < 0) + errx(EXIT_FAILURE, + _("kernel not configured for shared memory")); + for (id = 0; id <= maxid; id++) { + rm_me = shmctl(id, SHM_STAT, &shmseg); + if (rm_me < 0) + continue; + ret |= remove_id(SHM, 0, rm_me); + } + } + if (type == SEM || type == ALL) { + arg.array = (ushort *) (void *)&seminfo; + maxid = semctl(0, 0, SEM_INFO, arg); + if (maxid < 0) + errx(EXIT_FAILURE, + _("kernel not configured for semaphores")); + for (id = 0; id <= maxid; id++) { + arg.buf = (struct semid_ds *)&semary; + rm_me = semctl(id, 0, SEM_STAT, arg); + if (rm_me < 0) + continue; + ret |= remove_id(SEM, 0, rm_me); + } + } +/* kFreeBSD hackery -- ah 20140723 */ +#ifndef MSG_STAT +#define MSG_STAT 11 +#endif +#ifndef MSG_INFO +#define MSG_INFO 12 +#endif + if (type == MSG || type == ALL) { + maxid = + msgctl(0, MSG_INFO, (struct msqid_ds *)(void *)&msginfo); + if (maxid < 0) + errx(EXIT_FAILURE, + _("kernel not configured for message queues")); + for (id = 0; id <= maxid; id++) { + rm_me = msgctl(id, MSG_STAT, &msgque); + if (rm_me < 0) + continue; + ret |= remove_id(MSG, 0, rm_me); + } + } + return ret; +} + +int main(int argc, char **argv) +{ + int c; + int ret = 0; + int id = -1; + int iskey; + int rm_all = 0; + type_id what_all = ALL; + + static const struct option longopts[] = { + {"shmem-id", required_argument, NULL, 'm'}, + {"shmem-key", required_argument, NULL, 'M'}, + {"queue-id", required_argument, NULL, 'q'}, + {"queue-key", required_argument, NULL, 'Q'}, + {"semaphore-id", required_argument, NULL, 's'}, + {"semaphore-key", required_argument, NULL, 'S'}, + {"all", optional_argument, NULL, 'a'}, + {"verbose", no_argument, NULL, 'v'}, + {"version", no_argument, NULL, 'V'}, + {"help", no_argument, NULL, 'h'}, + {NULL, 0, NULL, 0} + }; + + /* if the command is executed without parameters, do nothing */ + if (argc == 1) + return 0; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + /* check to see if the command is being invoked in the old way if so + * then remove argument list */ + if (deprecated_main(argc, argv)) + return EXIT_SUCCESS; + + /* process new syntax to conform with SYSV ipcrm */ + while((c = getopt_long(argc, argv, "q:m:s:Q:M:S:a::vhV", longopts, NULL)) != -1) { + iskey = 0; + switch (c) { + case 'M': + iskey = 1; + id = key_to_id(SHM, optarg); + if (id < 0) { + ret++; + break; + } + /* fallthrough */ + case 'm': + if (!iskey) + id = strtos32_or_err(optarg, _("failed to parse argument")); + if (remove_id(SHM, iskey, id)) + ret++; + break; + case 'Q': + iskey = 1; + id = key_to_id(MSG, optarg); + if (id < 0) { + ret++; + break; + } + /* fallthrough */ + case 'q': + if (!iskey) + id = strtos32_or_err(optarg, _("failed to parse argument")); + if (remove_id(MSG, iskey, id)) + ret++; + break; + case 'S': + iskey = 1; + id = key_to_id(SEM, optarg); + if (id < 0) { + ret++; + break; + } + /* fallthrough */ + case 's': + if (!iskey) + id = strtos32_or_err(optarg, _("failed to parse argument")); + if (remove_id(SEM, iskey, id)) + ret++; + break; + case 'a': + rm_all = 1; + if (optarg) { + if (!strcmp(optarg, "shm")) + what_all = SHM; + else if (!strcmp(optarg, "msg")) + what_all = MSG; + else if (!strcmp(optarg, "sem")) + what_all = SEM; + else + errx(EXIT_FAILURE, + _("unknown argument: %s"), optarg); + } else { + what_all = ALL; + } + break; + case 'v': + verbose = 1; + break; + + case 'h': + usage(); + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if (rm_all && remove_all(what_all)) + ret++; + + /* print usage if we still have some arguments left over */ + if (optind < argc) { + warnx(_("unknown argument: %s"), argv[optind]); + errtryhelp(EXIT_FAILURE); + } + + return ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/sys-utils/ipcs.1 b/sys-utils/ipcs.1 new file mode 100644 index 0000000..4b718ea --- /dev/null +++ b/sys-utils/ipcs.1 @@ -0,0 +1,150 @@ +'\" t +.\" Title: ipcs +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: User Commands +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "IPCS" "1" "2022-05-11" "util\-linux 2.38.1" "User Commands" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +ipcs \- show information on IPC facilities +.SH "SYNOPSIS" +.sp +\fBipcs\fP [options] +.SH "DESCRIPTION" +.sp +\fBipcs\fP shows information on System V inter\-process communication facilities. By default it shows information about all three resources: shared memory segments, message queues, and semaphore arrays. +.SH "OPTIONS" +.sp +\fB\-i\fP, \fB\-\-id\fP \fIid\fP +.RS 4 +Show full details on just the one resource element identified by \fIid\fP. This option needs to be combined with one of the three resource options: \fB\-m\fP, \fB\-q\fP or \fB\-s\fP. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SS "Resource options" +.sp +\fB\-m\fP, \fB\-\-shmems\fP +.RS 4 +Write information about active shared memory segments. +.RE +.sp +\fB\-q\fP, \fB\-\-queues\fP +.RS 4 +Write information about active message queues. +.RE +.sp +\fB\-s\fP, \fB\-\-semaphores\fP +.RS 4 +Write information about active semaphore sets. +.RE +.sp +\fB\-a\fP, \fB\-\-all\fP +.RS 4 +Write information about all three resources (default). +.RE +.SS "Output formats" +.sp +Of these options only one takes effect: the last one specified. +.sp +\fB\-c\fP, \fB\-\-creator\fP +.RS 4 +Show creator and owner. +.RE +.sp +\fB\-l\fP, \fB\-\-limits\fP +.RS 4 +Show resource limits. +.RE +.sp +\fB\-p\fP, \fB\-\-pid\fP +.RS 4 +Show PIDs of creator and last operator. +.RE +.sp +\fB\-t\fP, \fB\-\-time\fP +.RS 4 +Write time information. The time of the last control operation that changed the access permissions for all facilities, the time of the last \fBmsgsnd\fP(2) and \fBmsgrcv\fP(2) operations on message queues, the time of the last \fBshmat\fP(2) and \fBshmdt\fP(2) operations on shared memory, and the time of the last \fBsemop\fP(2) operation on semaphores. +.RE +.sp +\fB\-u\fP, \fB\-\-summary\fP +.RS 4 +Show status summary. +.RE +.SS "Representation" +.sp +These affect only the \fB\-l\fP (\fB\-\-limits\fP) option. +.sp +\fB\-b\fP, \fB\-\-bytes\fP +.RS 4 +Print the sizes in bytes rather than in a human\-readable format. +.sp +By default, the unit, sizes are expressed in, is byte, and unit prefixes are in +power of 2^10 (1024). Abbreviations of symbols are exhibited truncated in order +to reach a better readability, by exhibiting alone the first letter of them; +examples: "1 KiB" and "1 MiB" are respectively exhibited as "1 K" and "1 M", +then omitting on purpose the mention "iB", which is part of these abbreviations. +.RE +.sp +\fB\-\-human\fP +.RS 4 +Print sizes in human\-readable format. +.RE +.SH "CONFORMING TO" +.sp +The Linux \fBipcs\fP utility is not fully compatible to the POSIX \fBipcs\fP utility. The Linux version does not support the POSIX \fB\-a\fP, \fB\-b\fP and \fB\-o\fP options, but does support the \fB\-l\fP and \fB\-u\fP options not defined by POSIX. A portable application shall not use the \fB\-a\fP, \fB\-b\fP, \fB\-o\fP, \fB\-l\fP, and \fB\-u\fP options. +.SH "NOTES" +.sp +The current implementation of \fBipcs\fP obtains information about available IPC resources by parsing the files in \fI/proc/sysvipc\fP. Before util\-linux version v2.23, an alternate mechanism was used: the \fBIPC_STAT\fP command of \fBmsgctl\fP(2), \fBsemctl\fP(2), and \fBshmctl\fP(2). This mechanism is also used in later util\-linux versions in the case where \fI/proc\fP is unavailable. A limitation of the \fBIPC_STAT\fP mechanism is that it can only be used to retrieve information about IPC resources for which the user has read permission. +.SH "AUTHORS" +.sp +.MTO "balasub\(atcis.ohio\-state.edu" "Krishna Balasubramanian" "" +.SH "SEE ALSO" +.sp +\fBipcmk\fP(1), +\fBipcrm\fP(1), +\fBmsgrcv\fP(2), +\fBmsgsnd\fP(2), +\fBsemget\fP(2), +\fBsemop\fP(2), +\fBshmat\fP(2), +\fBshmdt\fP(2), +\fBshmget\fP(2), +\fBsysvipc\fP(7) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBipcs\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/ipcs.1.adoc b/sys-utils/ipcs.1.adoc new file mode 100644 index 0000000..0234232 --- /dev/null +++ b/sys-utils/ipcs.1.adoc @@ -0,0 +1,107 @@ +//po4a: entry man manual +//// +Copyright 1993 Rickard E. Faith (faith@cs.unc.edu) +May be distributed under the GNU General Public License +//// += ipcs(1) +:doctype: manpage +:man manual: User Commands +:man source: util-linux {release-version} +:page-layout: base +:command: ipcs + +== NAME + +ipcs - show information on IPC facilities + +== SYNOPSIS + +*ipcs* [options] + +== DESCRIPTION + +*ipcs* shows information on System V inter-process communication facilities. By default it shows information about all three resources: shared memory segments, message queues, and semaphore arrays. + +== OPTIONS + +*-i*, *--id* _id_:: +Show full details on just the one resource element identified by _id_. This option needs to be combined with one of the three resource options: *-m*, *-q* or *-s*. + +include::man-common/help-version.adoc[] + +=== Resource options + +*-m*, *--shmems*:: +Write information about active shared memory segments. + +*-q*, *--queues*:: +Write information about active message queues. + +*-s*, *--semaphores*:: +Write information about active semaphore sets. + +*-a*, *--all*:: +Write information about all three resources (default). + +=== Output formats + +Of these options only one takes effect: the last one specified. + +*-c*, *--creator*:: +Show creator and owner. + +*-l*, *--limits*:: +Show resource limits. + +*-p*, *--pid*:: +Show PIDs of creator and last operator. + +*-t*, *--time*:: +Write time information. The time of the last control operation that changed the access permissions for all facilities, the time of the last *msgsnd*(2) and *msgrcv*(2) operations on message queues, the time of the last *shmat*(2) and *shmdt*(2) operations on shared memory, and the time of the last *semop*(2) operation on semaphores. + +*-u*, *--summary*:: +Show status summary. + +=== Representation + +These affect only the *-l* (*--limits*) option. + +*-b*, *--bytes*:: +include::man-common/in-bytes.adoc[] + + +*--human*:: +Print sizes in human-readable format. + +== CONFORMING TO + +The Linux *ipcs* utility is not fully compatible to the POSIX *ipcs* utility. The Linux version does not support the POSIX *-a*, *-b* and *-o* options, but does support the *-l* and *-u* options not defined by POSIX. A portable application shall not use the *-a*, *-b*, *-o*, *-l*, and *-u* options. + +== NOTES + +The current implementation of *ipcs* obtains information about available IPC resources by parsing the files in _/proc/sysvipc_. Before util-linux version v2.23, an alternate mechanism was used: the *IPC_STAT* command of *msgctl*(2), *semctl*(2), and *shmctl*(2). This mechanism is also used in later util-linux versions in the case where _/proc_ is unavailable. A limitation of the *IPC_STAT* mechanism is that it can only be used to retrieve information about IPC resources for which the user has read permission. + +== AUTHORS + +mailto:balasub@cis.ohio-state.edu[Krishna Balasubramanian] + +== SEE ALSO + +*ipcmk*(1), +*ipcrm*(1), +*msgrcv*(2), +*msgsnd*(2), +*semget*(2), +*semop*(2), +*shmat*(2), +*shmdt*(2), +*shmget*(2), +*sysvipc*(7) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/ipcs.c b/sys-utils/ipcs.c new file mode 100644 index 0000000..9380d98 --- /dev/null +++ b/sys-utils/ipcs.c @@ -0,0 +1,681 @@ +/* Original author unknown, may be "krishna balasub@cis.ohio-state.edu" */ +/* + * Modified Sat Oct 9 10:55:28 1993 for 0.99.13 + * + * Patches from Mike Jagdis (jaggy@purplet.demon.co.uk) applied Wed Feb 8 + * 12:12:21 1995 by faith@cs.unc.edu to print numeric uids if no passwd file + * entry. + * + * Patch from arnolds@ifns.de (Heinz-Ado Arnolds) applied Mon Jul 1 19:30:41 + * 1996 by janl@math.uio.no to add code missing in case PID: clauses. + * + * Patched to display the key field -- hy@picksys.com 12/18/96 + * + * 1999-02-22 Arkadiusz Miśkiewicz <misiek@pld.ORG.PL> + * - added Native Language Support + */ + +#include <errno.h> +#include <getopt.h> + +#include "c.h" +#include "nls.h" +#include "closestream.h" +#include "timeutils.h" +#include "strutils.h" + +#include "ipcutils.h" + +enum output_formats { + NOTSPECIFIED, + LIMITS, + STATUS, + CREATOR, + TIME, + PID +}; +enum { + OPT_HUMAN = CHAR_MAX + 1 +}; + +static void do_shm (char format, int unit); +static void print_shm (int id, int unit); +static void do_sem (char format); +static void print_sem (int id); +static void do_msg (char format, int unit); +static void print_msg (int id, int unit); + +static inline char *ctime64(int64_t *t) +{ + static char buf[CTIME_BUFSIZ]; + + /* we read time as int64_t from /proc, so cast... */ + ctime_r((time_t *)t, buf); + return buf; +} + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + fputs(USAGE_HEADER, out); + fprintf(out, _(" %1$s [resource-option...] [output-option]\n" + " %1$s -m|-q|-s -i <id>\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Show information on IPC facilities.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -i, --id <id> print details on resource identified by <id>\n"), out); + printf(USAGE_HELP_OPTIONS(16)); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Resource options:\n"), out); + fputs(_(" -m, --shmems shared memory segments\n"), out); + fputs(_(" -q, --queues message queues\n"), out); + fputs(_(" -s, --semaphores semaphores\n"), out); + fputs(_(" -a, --all all (default)\n"), out); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Output options:\n"), out); + fputs(_(" -t, --time show attach, detach and change times\n"), out); + fputs(_(" -p, --pid show PIDs of creator and last operator\n"), out); + fputs(_(" -c, --creator show creator and owner\n"), out); + fputs(_(" -l, --limits show resource limits\n"), out); + fputs(_(" -u, --summary show status summary\n"), out); + fputs(_(" --human show sizes in human-readable format\n"), out); + fputs(_(" -b, --bytes show sizes in bytes\n"), out); + printf(USAGE_MAN_TAIL("ipcs(1)")); + + exit(EXIT_SUCCESS); +} + +int main (int argc, char **argv) +{ + int opt, msg = 0, shm = 0, sem = 0, id = 0, specific = 0; + char format = NOTSPECIFIED; + int unit = IPC_UNIT_DEFAULT; + static const struct option longopts[] = { + {"id", required_argument, NULL, 'i'}, + {"queues", no_argument, NULL, 'q'}, + {"shmems", no_argument, NULL, 'm'}, + {"semaphores", no_argument, NULL, 's'}, + {"all", no_argument, NULL, 'a'}, + {"time", no_argument, NULL, 't'}, + {"pid", no_argument, NULL, 'p'}, + {"creator", no_argument, NULL, 'c'}, + {"limits", no_argument, NULL, 'l'}, + {"summary", no_argument, NULL, 'u'}, + {"human", no_argument, NULL, OPT_HUMAN}, + {"bytes", no_argument, NULL, 'b'}, + {"version", no_argument, NULL, 'V'}, + {"help", no_argument, NULL, 'h'}, + {NULL, 0, NULL, 0} + }; + char options[] = "i:qmsatpclubVh"; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + while ((opt = getopt_long(argc, argv, options, longopts, NULL)) != -1) { + switch (opt) { + case 'i': + id = strtos32_or_err(optarg, _("failed to parse id argument")); + specific = 1; + break; + case 'a': + msg = shm = sem = 1; + break; + case 'q': + msg = 1; + break; + case 'm': + shm = 1; + break; + case 's': + sem = 1; + break; + case 't': + format = TIME; + break; + case 'c': + format = CREATOR; + break; + case 'p': + format = PID; + break; + case 'l': + format = LIMITS; + break; + case 'u': + format = STATUS; + break; + case OPT_HUMAN: + unit = IPC_UNIT_HUMAN; + break; + case 'b': + unit = IPC_UNIT_BYTES; + break; + + case 'h': + usage(); + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if (specific && (msg + shm + sem != 1)) + errx (EXIT_FAILURE, + _("when using an ID, a single resource must be specified")); + if (specific) { + if (msg) + print_msg (id, unit); + if (shm) + print_shm (id, unit); + if (sem) + print_sem (id); + } else { + if (!msg && !shm && !sem) + msg = shm = sem = 1; + printf ("\n"); + if (msg) { + do_msg (format, unit); + printf ("\n"); + } + if (shm) { + do_shm (format, unit); + printf ("\n"); + } + if (sem) { + do_sem (format); + printf ("\n"); + } + } + return EXIT_SUCCESS; +} + +static void do_shm (char format, int unit) +{ + struct passwd *pw; + struct shm_data *shmds, *shmdsp; + + switch (format) { + case LIMITS: + { + struct ipc_limits lim; + uint64_t tmp, pgsz = getpagesize(); + + if (ipc_shm_get_limits(&lim)) { + printf (_("unable to fetch shared memory limits\n")); + return; + } + printf (_("------ Shared Memory Limits --------\n")); + printf (_("max number of segments = %ju\n"), lim.shmmni); + ipc_print_size(unit == IPC_UNIT_DEFAULT ? IPC_UNIT_KB : unit, + _("max seg size"), lim.shmmax, "\n", 0); + + if (unit == IPC_UNIT_KB || unit == IPC_UNIT_DEFAULT) { + tmp = (uint64_t) lim.shmall * (pgsz / 1024); + if (lim.shmall != 0 && tmp / lim.shmall != pgsz / 1024) + tmp = UINT64_MAX - (UINT64_MAX % (pgsz / 1024)); + + ipc_print_size(IPC_UNIT_DEFAULT, _("max total shared memory (kbytes)"), tmp, "\n", 0); + } + else { + tmp = (uint64_t) lim.shmall * pgsz; + /* overflow handling, at least we don't print ridiculous small values */ + if (lim.shmall != 0 && tmp / lim.shmall != pgsz) + tmp = UINT64_MAX - (UINT64_MAX % pgsz); + + ipc_print_size(unit, _("max total shared memory"), tmp, "\n", 0); + } + ipc_print_size(unit == IPC_UNIT_DEFAULT ? IPC_UNIT_BYTES : unit, + _("min seg size"), lim.shmmin, "\n", 0); + return; + } + case STATUS: + { + int maxid; + struct shmid_ds shmbuf; + struct shm_info *shm_info; + + maxid = shmctl (0, SHM_INFO, &shmbuf); + shm_info = (struct shm_info *) &shmbuf; + if (maxid < 0) { + printf (_("kernel not configured for shared memory\n")); + return; + } + + printf (_("------ Shared Memory Status --------\n")); + /* + * TRANSLATORS: This output format is maintained for backward + * compatibility as ipcs is used in scripts. For consistency + * with the rest, the translated form can follow this model: + * + * "segments allocated = %d\n" + * "pages allocated = %ld\n" + * "pages resident = %ld\n" + * "pages swapped = %ld\n" + * "swap performance = %ld attempts, %ld successes\n" + */ + printf (_("segments allocated %d\n" + "pages allocated %ld\n" + "pages resident %ld\n" + "pages swapped %ld\n" + "Swap performance: %ld attempts\t %ld successes\n"), + shm_info->used_ids, + shm_info->shm_tot, + shm_info->shm_rss, + shm_info->shm_swp, + shm_info->swap_attempts, shm_info->swap_successes); + return; + } + + /* + * Headers only + */ + case CREATOR: + printf (_("------ Shared Memory Segment Creators/Owners --------\n")); + printf ("%-10s %-10s %-10s %-10s %-10s %-10s\n", + _("shmid"),_("perms"),_("cuid"),_("cgid"),_("uid"),_("gid")); + break; + + case TIME: + printf (_("------ Shared Memory Attach/Detach/Change Times --------\n")); + printf ("%-10s %-10s %-20s %-20s %-20s\n", + _("shmid"),_("owner"),_("attached"),_("detached"), + _("changed")); + break; + + case PID: + printf (_("------ Shared Memory Creator/Last-op PIDs --------\n")); + printf ("%-10s %-10s %-10s %-10s\n", + _("shmid"),_("owner"),_("cpid"),_("lpid")); + break; + + default: + printf (_("------ Shared Memory Segments --------\n")); + printf ("%-10s %-10s %-10s %-10s %-10s %-10s %-12s\n", + _("key"),_("shmid"),_("owner"),_("perms"), + unit == IPC_UNIT_HUMAN ? _("size") : _("bytes"), + _("nattch"),_("status")); + break; + } + + /* + * Print data + */ + if (ipc_shm_get_info(-1, &shmds) < 1) + return; + + for (shmdsp = shmds; shmdsp->next != NULL; shmdsp = shmdsp->next) { + if (format == CREATOR) { + ipc_print_perms(stdout, &shmdsp->shm_perm); + continue; + } + pw = getpwuid(shmdsp->shm_perm.uid); + switch (format) { + case TIME: + if (pw) + printf ("%-10d %-10.10s", shmdsp->shm_perm.id, pw->pw_name); + else + printf ("%-10d %-10u", shmdsp->shm_perm.id, shmdsp->shm_perm.uid); + /* ctime uses static buffer: use separate calls */ + printf(" %-20.16s", shmdsp->shm_atim + ? ctime64(&shmdsp->shm_atim) + 4 : _("Not set")); + printf(" %-20.16s", shmdsp->shm_dtim + ? ctime64(&shmdsp->shm_dtim) + 4 : _("Not set")); + printf(" %-20.16s\n", shmdsp->shm_ctim + ? ctime64(&shmdsp->shm_ctim) + 4 : _("Not set")); + break; + case PID: + if (pw) + printf ("%-10d %-10.10s", shmdsp->shm_perm.id, pw->pw_name); + else + printf ("%-10d %-10u", shmdsp->shm_perm.id, shmdsp->shm_perm.uid); + printf (" %-10u %-10u\n", + shmdsp->shm_cprid, shmdsp->shm_lprid); + break; + + default: + printf("0x%08x ", shmdsp->shm_perm.key); + if (pw) + printf ("%-10d %-10.10s", shmdsp->shm_perm.id, pw->pw_name); + else + printf ("%-10d %-10u", shmdsp->shm_perm.id, shmdsp->shm_perm.uid); + printf (" %-10o ", shmdsp->shm_perm.mode & 0777); + + if (unit == IPC_UNIT_HUMAN) + ipc_print_size(unit, NULL, shmdsp->shm_segsz, " ", 6); + else + ipc_print_size(unit, NULL, shmdsp->shm_segsz, NULL, -10); + + printf (" %-10ju %-6s %-6s\n", + shmdsp->shm_nattch, + shmdsp->shm_perm.mode & SHM_DEST ? _("dest") : " ", + shmdsp->shm_perm.mode & SHM_LOCKED ? _("locked") : " "); + break; + } + } + + ipc_shm_free_info(shmds); +} + +static void do_sem (char format) +{ + struct passwd *pw; + struct sem_data *semds, *semdsp; + + switch (format) { + case LIMITS: + { + struct ipc_limits lim; + + if (ipc_sem_get_limits(&lim)) { + printf (_("unable to fetch semaphore limits\n")); + return; + } + printf (_("------ Semaphore Limits --------\n")); + printf (_("max number of arrays = %d\n"), lim.semmni); + printf (_("max semaphores per array = %d\n"), lim.semmsl); + printf (_("max semaphores system wide = %d\n"), lim.semmns); + printf (_("max ops per semop call = %d\n"), lim.semopm); + printf (_("semaphore max value = %u\n"), lim.semvmx); + return; + } + case STATUS: + { + struct seminfo seminfo; + union semun arg; + arg.array = (ushort *) (void *) &seminfo; + if (semctl (0, 0, SEM_INFO, arg) < 0) { + printf (_("kernel not configured for semaphores\n")); + return; + } + printf (_("------ Semaphore Status --------\n")); + printf (_("used arrays = %d\n"), seminfo.semusz); + printf (_("allocated semaphores = %d\n"), seminfo.semaem); + return; + } + + case CREATOR: + printf (_("------ Semaphore Arrays Creators/Owners --------\n")); + printf ("%-10s %-10s %-10s %-10s %-10s %-10s\n", + _("semid"),_("perms"),_("cuid"),_("cgid"),_("uid"),_("gid")); + break; + + case TIME: + printf (_("------ Semaphore Operation/Change Times --------\n")); + printf ("%-8s %-10s %-26.24s %-26.24s\n", + _("semid"),_("owner"),_("last-op"),_("last-changed")); + break; + + case PID: + break; + + default: + printf (_("------ Semaphore Arrays --------\n")); + printf ("%-10s %-10s %-10s %-10s %-10s\n", + _("key"),_("semid"),_("owner"),_("perms"),_("nsems")); + break; + } + + /* + * Print data + */ + if (ipc_sem_get_info(-1, &semds) < 1) + return; + + for (semdsp = semds; semdsp->next != NULL; semdsp = semdsp->next) { + if (format == CREATOR) { + ipc_print_perms(stdout, &semdsp->sem_perm); + continue; + } + pw = getpwuid(semdsp->sem_perm.uid); + switch (format) { + case TIME: + if (pw) + printf ("%-8d %-10.10s", semdsp->sem_perm.id, pw->pw_name); + else + printf ("%-8d %-10u", semdsp->sem_perm.id, semdsp->sem_perm.uid); + printf (" %-26.24s", semdsp->sem_otime + ? ctime64(&semdsp->sem_otime) : _("Not set")); + printf (" %-26.24s\n", semdsp->sem_ctime + ? ctime64( &semdsp->sem_ctime) : _("Not set")); + break; + case PID: + break; + + default: + printf("0x%08x ", semdsp->sem_perm.key); + if (pw) + printf ("%-10d %-10.10s", semdsp->sem_perm.id, pw->pw_name); + else + printf ("%-10d %-10u", semdsp->sem_perm.id, semdsp->sem_perm.uid); + printf (" %-10o %-10ju\n", + semdsp->sem_perm.mode & 0777, + semdsp->sem_nsems); + break; + } + } + + ipc_sem_free_info(semds); +} + +static void do_msg (char format, int unit) +{ + struct passwd *pw; + struct msg_data *msgds, *msgdsp; + + switch (format) { + case LIMITS: + { + struct ipc_limits lim; + + if (ipc_msg_get_limits(&lim)) { + printf (_("unable to fetch message limits\n")); + return; + } + printf (_("------ Messages Limits --------\n")); + printf (_("max queues system wide = %d\n"), lim.msgmni); + ipc_print_size(unit == IPC_UNIT_DEFAULT ? IPC_UNIT_BYTES : unit, + _("max size of message"), lim.msgmax, "\n", 0); + ipc_print_size(unit == IPC_UNIT_DEFAULT ? IPC_UNIT_BYTES : unit, + _("default max size of queue"), lim.msgmnb, "\n", 0); + return; + } + case STATUS: + { + struct msginfo msginfo; + if (msgctl (0, MSG_INFO, (struct msqid_ds *) (void *) &msginfo) < 0) { + printf (_("kernel not configured for message queues\n")); + return; + } + printf (_("------ Messages Status --------\n")); +#ifndef __FreeBSD_kernel__ + printf (_("allocated queues = %d\n"), msginfo.msgpool); + printf (_("used headers = %d\n"), msginfo.msgmap); +#endif + ipc_print_size(unit, _("used space"), msginfo.msgtql, + unit == IPC_UNIT_DEFAULT ? _(" bytes\n") : "\n", 0); + return; + } + case CREATOR: + printf (_("------ Message Queues Creators/Owners --------\n")); + printf ("%-10s %-10s %-10s %-10s %-10s %-10s\n", + _("msqid"),_("perms"),_("cuid"),_("cgid"),_("uid"),_("gid")); + break; + + case TIME: + printf (_("------ Message Queues Send/Recv/Change Times --------\n")); + printf ("%-8s %-10s %-20s %-20s %-20s\n", + _("msqid"),_("owner"),_("send"),_("recv"),_("change")); + break; + + case PID: + printf (_("------ Message Queues PIDs --------\n")); + printf ("%-10s %-10s %-10s %-10s\n", + _("msqid"),_("owner"),_("lspid"),_("lrpid")); + break; + + default: + printf (_("------ Message Queues --------\n")); + printf ("%-10s %-10s %-10s %-10s %-12s %-12s\n", + _("key"), _("msqid"), _("owner"), _("perms"), + unit == IPC_UNIT_HUMAN ? _("size") : _("used-bytes"), + _("messages")); + break; + } + + /* + * Print data + */ + if (ipc_msg_get_info(-1, &msgds) < 1) + return; + + for (msgdsp = msgds; msgdsp->next != NULL; msgdsp = msgdsp->next) { + if (format == CREATOR) { + ipc_print_perms(stdout, &msgdsp->msg_perm); + continue; + } + pw = getpwuid(msgdsp->msg_perm.uid); + switch (format) { + case TIME: + if (pw) + printf ("%-8d %-10.10s", msgdsp->msg_perm.id, pw->pw_name); + else + printf ("%-8d %-10u", msgdsp->msg_perm.id, msgdsp->msg_perm.uid); + printf (" %-20.16s", msgdsp->q_stime + ? ctime64(&msgdsp->q_stime) + 4 : _("Not set")); + printf (" %-20.16s", msgdsp->q_rtime + ? ctime64(&msgdsp->q_rtime) + 4 : _("Not set")); + printf (" %-20.16s\n", msgdsp->q_ctime + ? ctime64(&msgdsp->q_ctime) + 4 : _("Not set")); + break; + case PID: + if (pw) + printf ("%-8d %-10.10s", msgdsp->msg_perm.id, pw->pw_name); + else + printf ("%-8d %-10u", msgdsp->msg_perm.id, msgdsp->msg_perm.uid); + printf (" %5d %5d\n", + msgdsp->q_lspid, msgdsp->q_lrpid); + break; + + default: + printf( "0x%08x ",msgdsp->msg_perm.key ); + if (pw) + printf ("%-10d %-10.10s", msgdsp->msg_perm.id, pw->pw_name); + else + printf ("%-10d %-10u", msgdsp->msg_perm.id, msgdsp->msg_perm.uid); + printf (" %-10o ", msgdsp->msg_perm.mode & 0777); + + if (unit == IPC_UNIT_HUMAN) + ipc_print_size(unit, NULL, msgdsp->q_cbytes, " ", 6); + else + ipc_print_size(unit, NULL, msgdsp->q_cbytes, NULL, -12); + + printf (" %-12ju\n", msgdsp->q_qnum); + break; + } + } + + ipc_msg_free_info(msgds); +} + +static void print_shm(int shmid, int unit) +{ + struct shm_data *shmdata; + + if (ipc_shm_get_info(shmid, &shmdata) < 1) { + warnx(_("id %d not found"), shmid); + return; + } + + printf(_("\nShared memory Segment shmid=%d\n"), shmid); + printf(_("uid=%u\tgid=%u\tcuid=%u\tcgid=%u\n"), + shmdata->shm_perm.uid, shmdata->shm_perm.gid, + shmdata->shm_perm.cuid, shmdata->shm_perm.cgid); + printf(_("mode=%#o\taccess_perms=%#o\n"), shmdata->shm_perm.mode, + shmdata->shm_perm.mode & 0777); + ipc_print_size(unit, unit == IPC_UNIT_HUMAN ? _("size=") : _("bytes="), + shmdata->shm_segsz, "\t", 0); + printf(_("lpid=%u\tcpid=%u\tnattch=%jd\n"), + shmdata->shm_lprid, shmdata->shm_cprid, + shmdata->shm_nattch); + printf(_("att_time=%-26.24s\n"), + shmdata->shm_atim ? ctime64(&(shmdata->shm_atim)) : _("Not set")); + printf(_("det_time=%-26.24s\n"), + shmdata->shm_dtim ? ctime64(&shmdata->shm_dtim) : _("Not set")); + printf(_("change_time=%-26.24s\n"), ctime64(&shmdata->shm_ctim)); + printf("\n"); + + ipc_shm_free_info(shmdata); +} + +static void print_msg(int msgid, int unit) +{ + struct msg_data *msgdata; + + if (ipc_msg_get_info(msgid, &msgdata) < 1) { + warnx(_("id %d not found"), msgid); + return; + } + + printf(_("\nMessage Queue msqid=%d\n"), msgid); + printf(_("uid=%u\tgid=%u\tcuid=%u\tcgid=%u\tmode=%#o\n"), + msgdata->msg_perm.uid, msgdata->msg_perm.gid, + msgdata->msg_perm.cuid, msgdata->msg_perm.cgid, + msgdata->msg_perm.mode); + ipc_print_size(unit, unit == IPC_UNIT_HUMAN ? _("csize=") : _("cbytes="), + msgdata->q_cbytes, "\t", 0); + ipc_print_size(unit, unit == IPC_UNIT_HUMAN ? _("qsize=") : _("qbytes="), + msgdata->q_qbytes, "\t", 0); + printf("qnum=%jd\tlspid=%d\tlrpid=%d\n", + msgdata->q_qnum, + msgdata->q_lspid, msgdata->q_lrpid); + printf(_("send_time=%-26.24s\n"), + msgdata->q_stime ? ctime64(&msgdata->q_stime) : _("Not set")); + printf(_("rcv_time=%-26.24s\n"), + msgdata->q_rtime ? ctime64(&msgdata->q_rtime) : _("Not set")); + printf(_("change_time=%-26.24s\n"), + msgdata->q_ctime ? ctime64(&msgdata->q_ctime) : _("Not set")); + printf("\n"); + + ipc_msg_free_info(msgdata); +} + +static void print_sem(int semid) +{ + struct sem_data *semdata; + size_t i; + + if (ipc_sem_get_info(semid, &semdata) < 1) { + warnx(_("id %d not found"), semid); + return; + } + + printf(_("\nSemaphore Array semid=%d\n"), semid); + printf(_("uid=%u\t gid=%u\t cuid=%u\t cgid=%u\n"), + semdata->sem_perm.uid, semdata->sem_perm.gid, + semdata->sem_perm.cuid, semdata->sem_perm.cgid); + printf(_("mode=%#o, access_perms=%#o\n"), + semdata->sem_perm.mode, semdata->sem_perm.mode & 0777); + printf(_("nsems = %ju\n"), semdata->sem_nsems); + printf(_("otime = %-26.24s\n"), + semdata->sem_otime ? ctime64(&semdata->sem_otime) : _("Not set")); + printf(_("ctime = %-26.24s\n"), ctime64(&semdata->sem_ctime)); + + printf("%-10s %-10s %-10s %-10s %-10s\n", + _("semnum"), _("value"), _("ncount"), _("zcount"), _("pid")); + + for (i = 0; i < semdata->sem_nsems; i++) { + struct sem_elem *e = &semdata->elements[i]; + printf("%-10zu %-10d %-10d %-10d %-10d\n", + i, e->semval, e->ncount, e->zcount, e->pid); + } + printf("\n"); + ipc_sem_free_info(semdata); +} diff --git a/sys-utils/ipcutils.c b/sys-utils/ipcutils.c new file mode 100644 index 0000000..226a431 --- /dev/null +++ b/sys-utils/ipcutils.c @@ -0,0 +1,536 @@ +#include <inttypes.h> + +#include "c.h" +#include "nls.h" +#include "xalloc.h" +#include "path.h" +#include "pathnames.h" +#include "ipcutils.h" +#include "strutils.h" + +#ifndef SEMVMX +# define SEMVMX 32767 /* <= 32767 semaphore maximum value */ +#endif +#ifndef SHMMIN +# define SHMMIN 1 /* min shared segment size in bytes */ +#endif + + +int ipc_msg_get_limits(struct ipc_limits *lim) +{ + if (access(_PATH_PROC_IPC_MSGMNI, F_OK) == 0 && + access(_PATH_PROC_IPC_MSGMNB, F_OK) == 0 && + access(_PATH_PROC_IPC_MSGMAX, F_OK) == 0) { + + if (ul_path_read_s32(NULL, &lim->msgmni, _PATH_PROC_IPC_MSGMNI) != 0) + return 1; + if (ul_path_read_s32(NULL, &lim->msgmnb, _PATH_PROC_IPC_MSGMNB) != 0) + return 1; + if (ul_path_read_u64(NULL, &lim->msgmax, _PATH_PROC_IPC_MSGMAX) != 0) + return 1; + } else { + struct msginfo msginfo; + + if (msgctl(0, IPC_INFO, (struct msqid_ds *) &msginfo) < 0) + return 1; + lim->msgmni = msginfo.msgmni; + lim->msgmnb = msginfo.msgmnb; + lim->msgmax = msginfo.msgmax; + } + + return 0; +} + +int ipc_sem_get_limits(struct ipc_limits *lim) +{ + FILE *f; + int rc = 0; + + lim->semvmx = SEMVMX; + + f = fopen(_PATH_PROC_IPC_SEM, "r"); + if (f) { + rc = fscanf(f, "%d\t%d\t%d\t%d", + &lim->semmsl, &lim->semmns, &lim->semopm, &lim->semmni); + fclose(f); + } + + if (rc != 4) { + struct seminfo seminfo = { .semmni = 0 }; + union semun arg = { .array = (ushort *) &seminfo }; + + if (semctl(0, 0, IPC_INFO, arg) < 0) + return 1; + lim->semmni = seminfo.semmni; + lim->semmsl = seminfo.semmsl; + lim->semmns = seminfo.semmns; + lim->semopm = seminfo.semopm; + } + + return 0; +} + +int ipc_shm_get_limits(struct ipc_limits *lim) +{ + lim->shmmin = SHMMIN; + + if (access(_PATH_PROC_IPC_SHMALL, F_OK) == 0 && + access(_PATH_PROC_IPC_SHMMAX, F_OK) == 0 && + access(_PATH_PROC_IPC_SHMMNI, F_OK) == 0) { + + ul_path_read_u64(NULL, &lim->shmall, _PATH_PROC_IPC_SHMALL); + ul_path_read_u64(NULL, &lim->shmmax, _PATH_PROC_IPC_SHMMAX); + ul_path_read_u64(NULL, &lim->shmmni, _PATH_PROC_IPC_SHMMNI); + + } else { + struct shminfo *shminfo; + struct shmid_ds shmbuf; + + if (shmctl(0, IPC_INFO, &shmbuf) < 0) + return 1; + shminfo = (struct shminfo *) &shmbuf; + lim->shmmni = shminfo->shmmni; + lim->shmall = shminfo->shmall; + lim->shmmax = shminfo->shmmax; + } + + return 0; +} + +int ipc_shm_get_info(int id, struct shm_data **shmds) +{ + FILE *f; + int i = 0, maxid, j; + char buf[BUFSIZ]; + struct shm_data *p; + struct shmid_ds dummy; + + p = *shmds = xcalloc(1, sizeof(struct shm_data)); + p->next = NULL; + + f = fopen(_PATH_PROC_SYSV_SHM, "r"); + if (!f) + goto shm_fallback; + + while (fgetc(f) != '\n'); /* skip header */ + + while (fgets(buf, sizeof(buf), f) != NULL) { + /* scan for the first 14-16 columns (e.g. Linux 2.6.32 has 14) */ + p->shm_rss = 0xdead; + p->shm_swp = 0xdead; + if (sscanf(buf, + "%d %d %o %"SCNu64 " %u %u " + "%"SCNu64 " %u %u %u %u %"SCNi64 " %"SCNi64 " %"SCNi64 + " %"SCNu64 " %"SCNu64 "\n", + &p->shm_perm.key, + &p->shm_perm.id, + &p->shm_perm.mode, + &p->shm_segsz, + &p->shm_cprid, + &p->shm_lprid, + &p->shm_nattch, + &p->shm_perm.uid, + &p->shm_perm.gid, + &p->shm_perm.cuid, + &p->shm_perm.cgid, + &p->shm_atim, + &p->shm_dtim, + &p->shm_ctim, + &p->shm_rss, + &p->shm_swp) < 14) + continue; /* invalid line, skipped */ + + if (id > -1) { + /* ID specified */ + if (id == p->shm_perm.id) { + i = 1; + break; + } + continue; + } + + p->next = xcalloc(1, sizeof(struct shm_data)); + p = p->next; + p->next = NULL; + i++; + } + + if (i == 0) + free(*shmds); + fclose(f); + return i; + + /* Fallback; /proc or /sys file(s) missing. */ +shm_fallback: + maxid = shmctl(0, SHM_INFO, &dummy); + + for (j = 0; j <= maxid; j++) { + int shmid; + struct shmid_ds shmseg; + struct ipc_perm *ipcp = &shmseg.shm_perm; + + shmid = shmctl(j, SHM_STAT, &shmseg); + if (shmid < 0 || (id > -1 && shmid != id)) { + continue; + } + + i++; + p->shm_perm.key = ipcp->KEY; + p->shm_perm.id = shmid; + p->shm_perm.mode = ipcp->mode; + p->shm_segsz = shmseg.shm_segsz; + p->shm_cprid = shmseg.shm_cpid; + p->shm_lprid = shmseg.shm_lpid; + p->shm_nattch = shmseg.shm_nattch; + p->shm_perm.uid = ipcp->uid; + p->shm_perm.gid = ipcp->gid; + p->shm_perm.cuid = ipcp->cuid; + p->shm_perm.cgid = ipcp->cuid; + p->shm_atim = shmseg.shm_atime; + p->shm_dtim = shmseg.shm_dtime; + p->shm_ctim = shmseg.shm_ctime; + p->shm_rss = 0xdead; + p->shm_swp = 0xdead; + + if (id < 0) { + p->next = xcalloc(1, sizeof(struct shm_data)); + p = p->next; + p->next = NULL; + } else + break; + } + + if (i == 0) + free(*shmds); + return i; +} + +void ipc_shm_free_info(struct shm_data *shmds) +{ + while (shmds) { + struct shm_data *next = shmds->next; + free(shmds); + shmds = next; + } +} + +static void get_sem_elements(struct sem_data *p) +{ + size_t i; + + if (!p || !p->sem_nsems || p->sem_nsems > SIZE_MAX || p->sem_perm.id < 0) + return; + + p->elements = xcalloc(p->sem_nsems, sizeof(struct sem_elem)); + + for (i = 0; i < p->sem_nsems; i++) { + struct sem_elem *e = &p->elements[i]; + union semun arg = { .val = 0 }; + + e->semval = semctl(p->sem_perm.id, i, GETVAL, arg); + if (e->semval < 0) + err(EXIT_FAILURE, _("%s failed"), "semctl(GETVAL)"); + + e->ncount = semctl(p->sem_perm.id, i, GETNCNT, arg); + if (e->ncount < 0) + err(EXIT_FAILURE, _("%s failed"), "semctl(GETNCNT)"); + + e->zcount = semctl(p->sem_perm.id, i, GETZCNT, arg); + if (e->zcount < 0) + err(EXIT_FAILURE, _("%s failed"), "semctl(GETZCNT)"); + + e->pid = semctl(p->sem_perm.id, i, GETPID, arg); + if (e->pid < 0) + err(EXIT_FAILURE, _("%s failed"), "semctl(GETPID)"); + } +} + +int ipc_sem_get_info(int id, struct sem_data **semds) +{ + FILE *f; + int i = 0, maxid, j; + struct sem_data *p; + struct seminfo dummy; + union semun arg; + + p = *semds = xcalloc(1, sizeof(struct sem_data)); + p->next = NULL; + + f = fopen(_PATH_PROC_SYSV_SEM, "r"); + if (!f) + goto sem_fallback; + + while (fgetc(f) != '\n') ; /* skip header */ + + while (feof(f) == 0) { + if (fscanf(f, + "%d %d %o %" SCNu64 " %u %u %u %u %" + SCNi64 " %" SCNi64 "\n", + &p->sem_perm.key, + &p->sem_perm.id, + &p->sem_perm.mode, + &p->sem_nsems, + &p->sem_perm.uid, + &p->sem_perm.gid, + &p->sem_perm.cuid, + &p->sem_perm.cgid, + &p->sem_otime, + &p->sem_ctime) != 10) + continue; + + if (id > -1) { + /* ID specified */ + if (id == p->sem_perm.id) { + get_sem_elements(p); + i = 1; + break; + } + continue; + } + + p->next = xcalloc(1, sizeof(struct sem_data)); + p = p->next; + p->next = NULL; + i++; + } + + if (i == 0) + free(*semds); + fclose(f); + return i; + + /* Fallback; /proc or /sys file(s) missing. */ +sem_fallback: + arg.array = (ushort *) (void *)&dummy; + maxid = semctl(0, 0, SEM_INFO, arg); + + for (j = 0; j <= maxid; j++) { + int semid; + struct semid_ds semseg; + struct ipc_perm *ipcp = &semseg.sem_perm; + arg.buf = (struct semid_ds *)&semseg; + + semid = semctl(j, 0, SEM_STAT, arg); + if (semid < 0 || (id > -1 && semid != id)) { + continue; + } + + i++; + p->sem_perm.key = ipcp->KEY; + p->sem_perm.id = semid; + p->sem_perm.mode = ipcp->mode; + p->sem_nsems = semseg.sem_nsems; + p->sem_perm.uid = ipcp->uid; + p->sem_perm.gid = ipcp->gid; + p->sem_perm.cuid = ipcp->cuid; + p->sem_perm.cgid = ipcp->cuid; + p->sem_otime = semseg.sem_otime; + p->sem_ctime = semseg.sem_ctime; + + if (id < 0) { + p->next = xcalloc(1, sizeof(struct sem_data)); + p = p->next; + p->next = NULL; + i++; + } else { + get_sem_elements(p); + break; + } + } + + if (i == 0) + free(*semds); + return i; +} + +void ipc_sem_free_info(struct sem_data *semds) +{ + while (semds) { + struct sem_data *next = semds->next; + free(semds->elements); + free(semds); + semds = next; + } +} + +int ipc_msg_get_info(int id, struct msg_data **msgds) +{ + FILE *f; + int i = 0, maxid, j; + struct msg_data *p; + struct msqid_ds dummy; + struct msqid_ds msgseg; + + p = *msgds = xcalloc(1, sizeof(struct msg_data)); + p->next = NULL; + + f = fopen(_PATH_PROC_SYSV_MSG, "r"); + if (!f) + goto msg_fallback; + + while (fgetc(f) != '\n') ; /* skip header */ + + while (feof(f) == 0) { + if (fscanf(f, + "%d %d %o %" SCNu64 " %" SCNu64 + " %u %u %u %u %u %u %" SCNi64 " %" SCNi64 " %" SCNi64 "\n", + &p->msg_perm.key, + &p->msg_perm.id, + &p->msg_perm.mode, + &p->q_cbytes, + &p->q_qnum, + &p->q_lspid, + &p->q_lrpid, + &p->msg_perm.uid, + &p->msg_perm.gid, + &p->msg_perm.cuid, + &p->msg_perm.cgid, + &p->q_stime, + &p->q_rtime, + &p->q_ctime) != 14) + continue; + + if (id > -1) { + /* ID specified */ + if (id == p->msg_perm.id) { + if (msgctl(id, IPC_STAT, &msgseg) != -1) + p->q_qbytes = msgseg.msg_qbytes; + i = 1; + break; + } + continue; + } + + p->next = xcalloc(1, sizeof(struct msg_data)); + p = p->next; + p->next = NULL; + i++; + } + + if (i == 0) + free(*msgds); + fclose(f); + return i; + + /* Fallback; /proc or /sys file(s) missing. */ +msg_fallback: + maxid = msgctl(0, MSG_INFO, &dummy); + + for (j = 0; j <= maxid; j++) { + int msgid; + struct ipc_perm *ipcp = &msgseg.msg_perm; + + msgid = msgctl(j, MSG_STAT, &msgseg); + if (msgid < 0 || (id > -1 && msgid != id)) { + continue; + } + + i++; + p->msg_perm.key = ipcp->KEY; + p->msg_perm.id = msgid; + p->msg_perm.mode = ipcp->mode; + p->q_cbytes = msgseg.msg_cbytes; + p->q_qnum = msgseg.msg_qnum; + p->q_lspid = msgseg.msg_lspid; + p->q_lrpid = msgseg.msg_lrpid; + p->msg_perm.uid = ipcp->uid; + p->msg_perm.gid = ipcp->gid; + p->msg_perm.cuid = ipcp->cuid; + p->msg_perm.cgid = ipcp->cgid; + p->q_stime = msgseg.msg_stime; + p->q_rtime = msgseg.msg_rtime; + p->q_ctime = msgseg.msg_ctime; + p->q_qbytes = msgseg.msg_qbytes; + + if (id < 0) { + p->next = xcalloc(1, sizeof(struct msg_data)); + p = p->next; + p->next = NULL; + } else + break; + } + + if (i == 0) + free(*msgds); + return i; +} + +void ipc_msg_free_info(struct msg_data *msgds) +{ + while (msgds) { + struct msg_data *next = msgds->next; + free(msgds); + msgds = next; + } +} + +void ipc_print_perms(FILE *f, struct ipc_stat *is) +{ + struct passwd *pw; + struct group *gr; + + fprintf(f, "%-10d %-10o", is->id, is->mode & 0777); + + if ((pw = getpwuid(is->cuid))) + fprintf(f, " %-10s", pw->pw_name); + else + fprintf(f, " %-10u", is->cuid); + + if ((gr = getgrgid(is->cgid))) + fprintf(f, " %-10s", gr->gr_name); + else + fprintf(f, " %-10u", is->cgid); + + if ((pw = getpwuid(is->uid))) + fprintf(f, " %-10s", pw->pw_name); + else + fprintf(f, " %-10u", is->uid); + + if ((gr = getgrgid(is->gid))) + fprintf(f, " %-10s\n", gr->gr_name); + else + fprintf(f, " %-10u\n", is->gid); +} + +void ipc_print_size(int unit, char *msg, uint64_t size, const char *end, + int width) +{ + char format[32]; + + if (!msg) + /* NULL */ ; + else if (msg[strlen(msg) - 1] == '=') + printf("%s", msg); + else if (unit == IPC_UNIT_BYTES) + printf(_("%s (bytes) = "), msg); + else if (unit == IPC_UNIT_KB) + printf(_("%s (kbytes) = "), msg); + else + printf("%s = ", msg); + + switch (unit) { + case IPC_UNIT_DEFAULT: + case IPC_UNIT_BYTES: + snprintf(format, sizeof(format), "%%%dju", width); + printf(format, size); + break; + case IPC_UNIT_KB: + snprintf(format, sizeof(format), "%%%dju", width); + printf(format, size / 1024); + break; + case IPC_UNIT_HUMAN: + { + char *tmp; + snprintf(format, sizeof(format), "%%%ds", width); + printf(format, (tmp = size_to_human_string(SIZE_SUFFIX_1LETTER, size))); + free(tmp); + break; + } + default: + /* impossible occurred */ + abort(); + } + + if (end) + printf("%s", end); +} diff --git a/sys-utils/ipcutils.h b/sys-utils/ipcutils.h new file mode 100644 index 0000000..db85f57 --- /dev/null +++ b/sys-utils/ipcutils.h @@ -0,0 +1,187 @@ +#ifndef UTIL_LINUX_IPCUTILS_H +#define UTIL_LINUX_IPCUTILS_H + +#include <stdio.h> +#include <stdlib.h> +#include <sys/ipc.h> +#include <sys/msg.h> +#include <sys/sem.h> +#include <sys/shm.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> +#include <grp.h> +#include <pwd.h> +#include <stdint.h> + +/* + * SHM_DEST and SHM_LOCKED are defined in kernel headers, but inside + * #ifdef __KERNEL__ ... #endif + */ +#ifndef SHM_DEST + /* shm_mode upper byte flags */ +# define SHM_DEST 01000 /* segment will be destroyed on last detach */ +# define SHM_LOCKED 02000 /* segment will not be swapped */ +#endif + +/* For older kernels the same holds for the defines below */ +#ifndef MSG_STAT +# define MSG_STAT 11 +# define MSG_INFO 12 +#endif + +#ifndef SHM_STAT +# define SHM_STAT 13 +# define SHM_INFO 14 +struct shm_info { + int used_ids; + unsigned long shm_tot; /* total allocated shm */ + unsigned long shm_rss; /* total resident shm */ + unsigned long shm_swp; /* total swapped shm */ + unsigned long swap_attempts; + unsigned long swap_successes; +}; +#endif + +#ifndef SEM_STAT +# define SEM_STAT 18 +# define SEM_INFO 19 +#endif + +/* Some versions of libc only define IPC_INFO when __USE_GNU is defined. */ +#ifndef IPC_INFO +# define IPC_INFO 3 +#endif + +/* + * * The last arg of semctl is a union semun, but where is it defined? X/OPEN + * * tells us to define it ourselves, but until recently Linux include files + * * would also define it. + * */ +#ifndef HAVE_UNION_SEMUN +/* according to X/OPEN we have to define it ourselves */ +union semun { + int val; + struct semid_ds *buf; + unsigned short int *array; + struct seminfo *__buf; +}; +#endif + +/* + * X/OPEN (Jan 1987) does not define fields key, seq in struct ipc_perm; + * glibc-1.09 has no support for sysv ipc. + * glibc 2 uses __key, __seq + */ +#if defined (__GLIBC__) && __GLIBC__ >= 2 +# define KEY __key +#else +# define KEY key +#endif + +/* Size printing in ipcs is using these. */ +enum { + IPC_UNIT_DEFAULT, + IPC_UNIT_BYTES, + IPC_UNIT_KB, + IPC_UNIT_HUMAN +}; + +struct ipc_limits { + uint64_t shmmni; /* max number of segments */ + uint64_t shmmax; /* max segment size */ + uint64_t shmall; /* max total shared memory */ + uint64_t shmmin; /* min segment size */ + + int semmni; /* max number of arrays */ + int semmsl; /* max semaphores per array */ + int semmns; /* max semaphores system wide */ + int semopm; /* max ops per semop call */ + unsigned int semvmx; /* semaphore max value (constant) */ + + int msgmni; /* max queues system wide */ + uint64_t msgmax; /* max size of message */ + int msgmnb; /* default max size of queue */ +}; + +extern int ipc_msg_get_limits(struct ipc_limits *lim); +extern int ipc_sem_get_limits(struct ipc_limits *lim); +extern int ipc_shm_get_limits(struct ipc_limits *lim); + +struct ipc_stat { + int id; + key_t key; + uid_t uid; /* current uid */ + gid_t gid; /* current gid */ + uid_t cuid; /* creator uid */ + gid_t cgid; /* creator gid */ + unsigned int mode; +}; + +extern void ipc_print_perms(FILE *f, struct ipc_stat *is); +extern void ipc_print_size(int unit, char *msg, uint64_t size, const char *end, int width); + +/* See 'struct shmid_kernel' in kernel sources + */ +struct shm_data { + struct ipc_stat shm_perm; + + uint64_t shm_nattch; + uint64_t shm_segsz; + int64_t shm_atim; /* __kernel_time_t is signed long */ + int64_t shm_dtim; + int64_t shm_ctim; + pid_t shm_cprid; + pid_t shm_lprid; + uint64_t shm_rss; + uint64_t shm_swp; + + struct shm_data *next; +}; + +extern int ipc_shm_get_info(int id, struct shm_data **shmds); +extern void ipc_shm_free_info(struct shm_data *shmds); + +/* See 'struct sem_array' in kernel sources + */ +struct sem_elem { + int semval; + int ncount; /* processes waiting on increase semval */ + int zcount; /* processes waiting on semval set to zero */ + pid_t pid; /* process last executed semop(2) call */ +}; +struct sem_data { + struct ipc_stat sem_perm; + + int64_t sem_ctime; + int64_t sem_otime; + uint64_t sem_nsems; + + struct sem_elem *elements; + struct sem_data *next; +}; + +extern int ipc_sem_get_info(int id, struct sem_data **semds); +extern void ipc_sem_free_info(struct sem_data *semds); + +/* See 'struct msg_queue' in kernel sources + */ +struct msg_data { + struct ipc_stat msg_perm; + + int64_t q_stime; + int64_t q_rtime; + int64_t q_ctime; + uint64_t q_cbytes; + uint64_t q_qnum; + uint64_t q_qbytes; + pid_t q_lspid; + pid_t q_lrpid; + + struct msg_data *next; +}; + +extern int ipc_msg_get_info(int id, struct msg_data **msgds); +extern void ipc_msg_free_info(struct msg_data *msgds); + +#endif /* UTIL_LINUX_IPCUTILS_H */ diff --git a/sys-utils/irq-common.c b/sys-utils/irq-common.c new file mode 100644 index 0000000..11a4e4e --- /dev/null +++ b/sys-utils/irq-common.c @@ -0,0 +1,554 @@ +/* + * irq-common.c - functions to display kernel interrupt information. + * + * Copyright (C) 2019 zhenwei pi <pizhenwei@bytedance.com> + * Copyright (C) 2020 Karel Zak <kzak@redhat.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <locale.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <unistd.h> + +#include <libsmartcols.h> + +#include "c.h" +#include "nls.h" +#include "pathnames.h" +#include "strutils.h" +#include "xalloc.h" + +#include "irq-common.h" + +#define IRQ_INFO_LEN 64 + +struct colinfo { + const char *name; + double whint; + int flags; + const char *help; + int json_type; +}; + +static const struct colinfo infos[] = { + [COL_IRQ] = {"IRQ", 0.10, SCOLS_FL_RIGHT, N_("interrupts"), SCOLS_JSON_STRING}, + [COL_TOTAL] = {"TOTAL", 0.10, SCOLS_FL_RIGHT, N_("total count"), SCOLS_JSON_NUMBER}, + [COL_DELTA] = {"DELTA", 0.10, SCOLS_FL_RIGHT, N_("delta count"), SCOLS_JSON_NUMBER}, + [COL_NAME] = {"NAME", 0.70, SCOLS_FL_TRUNC, N_("name"), SCOLS_JSON_STRING}, +}; + +/* make softirq friendly to end-user */ +struct softirq_desc { + char *irq; + char *desc; +} softirq_descs[] = { + { .irq = "HI", .desc = "high priority tasklet softirq" }, + { .irq = "TIMER", .desc = "timer softirq" }, + { .irq = "NET_TX", .desc = "network transmit softirq", }, + { .irq = "NET_RX", .desc = "network receive softirq" }, + { .irq = "BLOCK", .desc = "block device softirq" }, + { .irq = "IRQ_POLL", .desc = "IO poll softirq" }, + { .irq = "TASKLET", .desc = "normal priority tasklet softirq" }, + { .irq = "SCHED", .desc = "schedule softirq" }, + { .irq = "HRTIMER", .desc = "high resolution timer softirq" }, + { .irq = "RCU", .desc = "RCU softirq" }, +}; + +static void get_softirq_desc(struct irq_info *curr) +{ + int i, size = ARRAY_SIZE(softirq_descs); + + for (i = 0; i < size; i++) { + if (!strcmp(curr->irq, softirq_descs[i].irq)) + break; + } + + if (i < size) + curr->name = xstrdup(softirq_descs[i].desc); + else + curr->name = xstrdup(""); +} + +int irq_column_name_to_id(const char *name, size_t namesz) +{ + size_t i; + + assert(name); + for (i = 0; i < ARRAY_SIZE(infos); i++) { + const char *cn = infos[i].name; + + if (!strncasecmp(name, cn, namesz) && !*(cn + namesz)) + return i; + } + warnx(_("unknown column: %s"), name); + return -1; +} + +static inline int get_column_id(struct irq_output *out, size_t const num) +{ + assert(num < out->ncolumns); + assert(out->columns[num] < (int)ARRAY_SIZE(infos)); + + return out->columns[num]; +} + +static inline const struct colinfo *get_column_info( + struct irq_output *out, unsigned num) +{ + return &infos[get_column_id(out, num)]; +} + +void irq_print_columns(FILE *f, int nodelta) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(infos); i++) { + if (nodelta && i == COL_DELTA) + continue; + fprintf(f, " %-5s %s\n", infos[i].name, _(infos[i].help)); + } +} + +static struct libscols_table *new_scols_table(struct irq_output *out) +{ + size_t i; + struct libscols_table *table; + + table = scols_new_table(); + if (!table) { + warn(_("failed to initialize output table")); + return NULL; + } + scols_table_enable_json(table, out->json); + scols_table_enable_noheadings(table, out->no_headings); + scols_table_enable_export(table, out->pairs); + + if (out->json) + scols_table_set_name(table, "interrupts"); + + for (i = 0; i < out->ncolumns; i++) { + const struct colinfo *col = get_column_info(out, i); + int flags = col->flags; + struct libscols_column *cl; + + cl = scols_table_new_column(table, col->name, col->whint, flags); + if (cl == NULL) { + warnx(_("failed to initialize output column")); + goto err; + } + if (out->json) + scols_column_set_json_type(cl, col->json_type); + } + + return table; + err: + scols_unref_table(table); + return NULL; +} + +static struct libscols_line *new_scols_line(struct libscols_table *table) +{ + struct libscols_line *line = scols_table_new_line(table, NULL); + if (!line) { + warn(_("failed to add line to output")); + return NULL; + } + return line; +} + +static void add_scols_line(struct irq_output *out, + struct irq_info *info, + struct libscols_table *table) +{ + size_t i; + struct libscols_line *line = new_scols_line(table); + + for (i = 0; i < out->ncolumns; i++) { + char *str = NULL; + + switch (get_column_id(out, i)) { + case COL_IRQ: + xasprintf(&str, "%s", info->irq); + break; + case COL_TOTAL: + xasprintf(&str, "%ld", info->total); + break; + case COL_DELTA: + xasprintf(&str, "%ld", info->delta); + break; + case COL_NAME: + xasprintf(&str, "%s", info->name); + break; + default: + break; + } + + if (str && scols_line_refer_data(line, i, str) != 0) + err_oom(); + } +} + +static char *remove_repeated_spaces(char *str) +{ + char *inp = str, *outp = str; + uint8_t prev_space = 0; + + while (*inp) { + if (isspace(*inp)) { + if (!prev_space) { + *outp++ = ' '; + prev_space = 1; + } + } else { + *outp++ = *inp; + prev_space = 0; + } + ++inp; + } + *outp = '\0'; + return str; +} + +/* + * irqinfo - parse the system's interrupts + */ +static struct irq_stat *get_irqinfo(int softirq) +{ + FILE *irqfile; + char *line = NULL, *tmp; + size_t len = 0; + struct irq_stat *stat; + struct irq_info *curr; + + /* NAME + ':' + 11 bytes/cpu + IRQ_NAME_LEN */ + stat = xcalloc(1, sizeof(*stat)); + + stat->irq_info = xmalloc(sizeof(*stat->irq_info) * IRQ_INFO_LEN); + stat->nr_irq_info = IRQ_INFO_LEN; + + if (softirq) + irqfile = fopen(_PATH_PROC_SOFTIRQS, "r"); + else + irqfile = fopen(_PATH_PROC_INTERRUPTS, "r"); + if (!irqfile) { + warn(_("cannot open %s"), _PATH_PROC_INTERRUPTS); + goto free_stat; + } + + /* read header firstly */ + if (getline(&line, &len, irqfile) < 0) { + warn(_("cannot read %s"), _PATH_PROC_INTERRUPTS); + goto close_file; + } + + tmp = line; + while ((tmp = strstr(tmp, "CPU")) != NULL) { + tmp += 3; /* skip this "CPU", find next */ + stat->nr_active_cpu++; + } + + stat->cpus = xcalloc(stat->nr_active_cpu, sizeof(struct irq_cpu)); + + /* parse each line of _PATH_PROC_INTERRUPTS */ + while (getline(&line, &len, irqfile) >= 0) { + unsigned long count; + size_t index; + int length; + + tmp = strchr(line, ':'); + if (!tmp) + continue; + + length = strlen(line); + + curr = stat->irq_info + stat->nr_irq++; + memset(curr, 0, sizeof(*curr)); + *tmp = '\0'; + curr->irq = xstrdup(line); + ltrim_whitespace((unsigned char *)curr->irq); + + tmp += 1; + for (index = 0; (index < stat->nr_active_cpu) && (tmp - line < length); index++) { + struct irq_cpu *cpu = &stat->cpus[index]; + + if (sscanf(tmp, " %10lu", &count) != 1) + continue; + curr->total += count; + cpu->total += count; + stat->total_irq += count; + + tmp += 11; + } + + /* softirq always has no desc, add additional desc for softirq */ + if (softirq) + get_softirq_desc(curr); + else { + if (tmp - line < length) { + /* strip all space before desc */ + while (isspace(*tmp)) + tmp++; + tmp = remove_repeated_spaces(tmp); + rtrim_whitespace((unsigned char *)tmp); + curr->name = xstrdup(tmp); + } else /* no irq name string, we have to set '\0' here */ + curr->name = xstrdup(""); + } + + if (stat->nr_irq == stat->nr_irq_info) { + stat->nr_irq_info *= 2; + stat->irq_info = xrealloc(stat->irq_info, + sizeof(*stat->irq_info) * stat->nr_irq_info); + } + } + fclose(irqfile); + free(line); + return stat; + + close_file: + fclose(irqfile); + free_stat: + free(stat->irq_info); + free(stat->cpus); + free(stat); + free(line); + return NULL; +} + +void free_irqstat(struct irq_stat *stat) +{ + size_t i; + + if (!stat) + return; + + for (i = 0; i < stat->nr_irq; i++) { + free(stat->irq_info[i].name); + free(stat->irq_info[i].irq); + } + + free(stat->irq_info); + free(stat->cpus); + free(stat); +} + +static inline int cmp_name(const struct irq_info *a, + const struct irq_info *b) +{ + return (strcmp(a->name, b->name) > 0) ? 1 : 0; +} + +static inline int cmp_total(const struct irq_info *a, + const struct irq_info *b) +{ + return a->total < b->total; +} + +static inline int cmp_delta(const struct irq_info *a, + const struct irq_info *b) +{ + return a->delta < b->delta; +} + +static inline int cmp_interrupts(const struct irq_info *a, + const struct irq_info *b) +{ + return (strverscmp(a->irq, b->irq) > 0) ? 1 : 0; +} + +static void sort_result(struct irq_output *out, + struct irq_info *result, + size_t nmemb) +{ + irq_cmp_t *func = cmp_total; /* default */ + + if (out->sort_cmp_func) + func = out->sort_cmp_func; + + qsort(result, nmemb, sizeof(*result), + (int (*)(const void *, const void *)) func); +} + +void set_sort_func_by_name(struct irq_output *out, const char *name) +{ + if (strcasecmp(name, "IRQ") == 0) + out->sort_cmp_func = cmp_interrupts; + else if (strcasecmp(name, "TOTAL") == 0) + out->sort_cmp_func = cmp_total; + else if (strcasecmp(name, "DELTA") == 0) + out->sort_cmp_func = cmp_delta; + else if (strcasecmp(name, "NAME") == 0) + out->sort_cmp_func = cmp_name; + else + errx(EXIT_FAILURE, _("unsupported column name to sort output")); +} + +void set_sort_func_by_key(struct irq_output *out, char c) +{ + switch (c) { + case 'i': + out->sort_cmp_func = cmp_interrupts; + break; + case 't': + out->sort_cmp_func = cmp_total; + break; + case 'd': + out->sort_cmp_func = cmp_delta; + break; + case 'n': + out->sort_cmp_func = cmp_name; + break; + } +} + +struct libscols_table *get_scols_cpus_table(struct irq_output *out, + struct irq_stat *prev, + struct irq_stat *curr) +{ + struct libscols_table *table; + struct libscols_column *cl; + struct libscols_line *ln; + char colname[sizeof("cpu") + sizeof(stringify_value(LONG_MAX))]; + size_t i; + + if (prev) { + for (i = 0; i < curr->nr_active_cpu; i++) { + struct irq_cpu *pre = &prev->cpus[i]; + struct irq_cpu *cur = &curr->cpus[i]; + + cur->delta = cur->total - pre->total; + } + } + + table = scols_new_table(); + if (!table) { + warn(_("failed to initialize output table")); + return NULL; + } + scols_table_enable_json(table, out->json); + scols_table_enable_noheadings(table, out->no_headings); + scols_table_enable_export(table, out->pairs); + + if (out->json) + scols_table_set_name(table, _("cpu-interrupts")); + else + scols_table_new_column(table, "", 0, SCOLS_FL_RIGHT); + + for (i = 0; i < curr->nr_active_cpu; i++) { + snprintf(colname, sizeof(colname), "cpu%zu", i); + cl = scols_table_new_column(table, colname, 0, SCOLS_FL_RIGHT); + if (cl == NULL) { + warnx(_("failed to initialize output column")); + goto err; + } + if (out->json) + scols_column_set_json_type(cl, SCOLS_JSON_STRING); + } + + /* per cpu % of total */ + ln = new_scols_line(table); + if (!ln || (!out->json && scols_line_set_data(ln, 0, "%irq:") != 0)) + goto err; + + for (i = 0; i < curr->nr_active_cpu; i++) { + struct irq_cpu *cpu = &curr->cpus[i]; + char *str; + + xasprintf(&str, "%0.1f", (double)((long double) cpu->total / (long double) curr->total_irq * 100.0)); + if (str && scols_line_refer_data(ln, i + 1, str) != 0) + goto err; + } + + /* per cpu % of delta */ + ln = new_scols_line(table); + /* xgettext:no-c-format */ + if (!ln || (!out->json && scols_line_set_data(ln, 0, _("%delta:")) != 0)) + goto err; + + for (i = 0; i < curr->nr_active_cpu; i++) { + struct irq_cpu *cpu = &curr->cpus[i]; + char *str; + + if (!curr->delta_irq) + continue; + xasprintf(&str, "%0.1f", (double)((long double) cpu->delta / (long double) curr->delta_irq * 100.0)); + if (str && scols_line_refer_data(ln, i + 1, str) != 0) + goto err; + } + + return table; + err: + scols_unref_table(table); + return NULL; +} + +struct libscols_table *get_scols_table(struct irq_output *out, + struct irq_stat *prev, + struct irq_stat **xstat, + int softirq) +{ + struct libscols_table *table; + struct irq_info *result; + struct irq_stat *stat; + size_t size; + size_t i; + + /* the stats */ + stat = get_irqinfo(softirq); + if (!stat) + return NULL; + + size = sizeof(*stat->irq_info) * stat->nr_irq; + result = xmalloc(size); + memcpy(result, stat->irq_info, size); + + if (prev) { + stat->delta_irq = 0; + for (i = 0; i < stat->nr_irq; i++) { + struct irq_info *cur = &result[i]; + struct irq_info *pre = &prev->irq_info[i]; + + cur->delta = cur->total - pre->total; + stat->delta_irq += cur->delta; + } + } + sort_result(out, result, stat->nr_irq); + + table = new_scols_table(out); + if (!table) { + free(result); + free_irqstat(stat); + return NULL; + } + + for (i = 0; i < stat->nr_irq; i++) + add_scols_line(out, &result[i], table); + + free(result); + + if (xstat) + *xstat = stat; + else + free_irqstat(stat); + + return table; +} diff --git a/sys-utils/irq-common.h b/sys-utils/irq-common.h new file mode 100644 index 0000000..c4f1fa3 --- /dev/null +++ b/sys-utils/irq-common.h @@ -0,0 +1,72 @@ +#ifndef UTIL_LINUX_H_IRQ_COMMON +#define UTIL_LINUX_H_IRQ_COMMON + +#include "c.h" +#include "nls.h" + +/* supported columns */ +enum { + COL_IRQ = 0, + COL_TOTAL, + COL_DELTA, + COL_NAME, + + __COL_COUNT +}; + +struct irq_info { + char *irq; /* short name of this irq */ + char *name; /* descriptive name of this irq */ + unsigned long total; /* total count since system start up */ + unsigned long delta; /* delta count since previous update */ +}; + +struct irq_cpu { + unsigned long total; + unsigned long delta; +}; + +struct irq_stat { + unsigned long nr_irq; /* number of irq vector */ + unsigned long nr_irq_info; /* number of irq info */ + struct irq_info *irq_info; /* array of irq_info */ + struct irq_cpu *cpus; /* array of irq_cpu */ + size_t nr_active_cpu; /* number of active cpu */ + unsigned long total_irq; /* total irqs */ + unsigned long delta_irq; /* delta irqs */ +}; + + +typedef int (irq_cmp_t)(const struct irq_info *, const struct irq_info *); + +/* output definition */ +struct irq_output { + int columns[__COL_COUNT * 2]; + size_t ncolumns; + + irq_cmp_t *sort_cmp_func; + + unsigned int + json:1, /* JSON output */ + pairs:1, /* export, NAME="value" aoutput */ + no_headings:1; /* don't print header */ +}; + +int irq_column_name_to_id(char const *const name, size_t const namesz); +void free_irqstat(struct irq_stat *stat); + +void irq_print_columns(FILE *f, int nodelta); + +void set_sort_func_by_name(struct irq_output *out, const char *name); +void set_sort_func_by_key(struct irq_output *out, const char c); + +struct libscols_table *get_scols_table(struct irq_output *out, + struct irq_stat *prev, + struct irq_stat **xstat, + int softirq); + +struct libscols_table *get_scols_cpus_table(struct irq_output *out, + struct irq_stat *prev, + struct irq_stat *curr); + +#endif /* UTIL_LINUX_H_IRQ_COMMON */ diff --git a/sys-utils/irqtop.1 b/sys-utils/irqtop.1 new file mode 100644 index 0000000..055964a --- /dev/null +++ b/sys-utils/irqtop.1 @@ -0,0 +1,114 @@ +'\" t +.\" Title: irqtop +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-08-04 +.\" Manual: User Commands +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "IRQTOP" "1" "2022-08-04" "util\-linux 2.38.1" "User Commands" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +irqtop \- utility to display kernel interrupt information +.SH "SYNOPSIS" +.sp +\fBirqtop\fP [options] +.SH "DESCRIPTION" +.sp +Display kernel interrupt counter information in \fBtop\fP(1) style view. +.sp +The default output is subject to change. So whenever possible, you should avoid using default outputs in your scripts. Always explicitly define expected columns by using \fB\-\-output\fP. +.SH "OPTIONS" +.sp +\fB\-o\fP, \fB\-\-output\fP \fIlist\fP +.RS 4 +Specify which output columns to print. Use \fB\-\-help\fP to get a list of all supported columns. The default list of columns may be extended if list is specified in the format \fI+list\fP. +.RE +.sp +\fB\-c\fP, \fB\-\-cpu\-stat\fP \fImode\fP +.RS 4 +Show per\-cpu statistics by specified mode. Available modes are: \fBauto\fP, \fBenable\fP, \fBdisable\fP. The default option \fBauto\fP detects the width of window, then shows the per\-cpu statistics if the width of window is large enouth to show a full line of statistics. +.RE +.sp +\fB\-d\fP, \fB\-\-delay\fP \fIseconds\fP +.RS 4 +Update interrupt output every \fIseconds\fP intervals. +.RE +.sp +\fB\-s\fP, \fB\-\-sort\fP \fIcolumn\fP +.RS 4 +Specify sort criteria by column name. See \fB\-\-help\fP output to get column names. The sort criteria may be changes in interactive mode. +.RE +.sp +\fB\-S\fP, \fB\-\-softirq\fP +.RS 4 +Show softirqs information. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "INTERACTIVE MODE KEY COMMANDS" +.sp +\fBi\fP +.RS 4 +sort by short irq name or number field +.RE +.sp +\fBt\fP +.RS 4 +sort by total count of interrupts (the default) +.RE +.sp +\fBd\fP +.RS 4 +sort by delta count of interrupts +.RE +.sp +\fBn\fP +.RS 4 +sort by long descriptive name field +.RE +.sp +\fBq Q\fP +.RS 4 +stop updates and exit program +.RE +.SH "AUTHORS" +.sp +.MTO "pizhenwei\(atbytedance.com" "Zhenwei Pi" "," +.MTO "kerolasa\(atiki.fi" "Sami Kerola" "," +.MTO "kzak\(atredhat.com" "Karel Zak" "" +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBirqtop\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/irqtop.1.adoc b/sys-utils/irqtop.1.adoc new file mode 100644 index 0000000..3f215d2 --- /dev/null +++ b/sys-utils/irqtop.1.adoc @@ -0,0 +1,71 @@ +//po4a: entry man manual += irqtop(1) +:doctype: manpage +:man manual: User Commands +:man source: util-linux {release-version} +:page-layout: base +:command: irqtop + +== NAME + +irqtop - utility to display kernel interrupt information + +== SYNOPSIS + +*irqtop* [options] + +== DESCRIPTION + +Display kernel interrupt counter information in *top*(1) style view. + +The default output is subject to change. So whenever possible, you should avoid using default outputs in your scripts. Always explicitly define expected columns by using *--output*. + +== OPTIONS + +*-o*, *--output* _list_:: +Specify which output columns to print. Use *--help* to get a list of all supported columns. The default list of columns may be extended if list is specified in the format _+list_. + +*-c*, *--cpu-stat* _mode_:: +Show per-cpu statistics by specified mode. Available modes are: *auto*, *enable*, *disable*. The default option *auto* detects the width of window, then shows the per-cpu statistics if the width of window is large enouth to show a full line of statistics. + +*-d*, *--delay* _seconds_:: +Update interrupt output every _seconds_ intervals. + +*-s*, *--sort* _column_:: +Specify sort criteria by column name. See *--help* output to get column names. The sort criteria may be changes in interactive mode. + +*-S*, *--softirq*:: +Show softirqs information. + +include::man-common/help-version.adoc[] + +== INTERACTIVE MODE KEY COMMANDS + +*i*:: +sort by short irq name or number field + +*t*:: +sort by total count of interrupts (the default) + +*d*:: +sort by delta count of interrupts + +*n*:: +sort by long descriptive name field + +*q Q*:: +stop updates and exit program + +== AUTHORS + +mailto:pizhenwei@bytedance.com[Zhenwei Pi], +mailto:kerolasa@iki.fi[Sami Kerola], +mailto:kzak@redhat.com[Karel Zak] + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/irqtop.c b/sys-utils/irqtop.c new file mode 100644 index 0000000..adf7559 --- /dev/null +++ b/sys-utils/irqtop.c @@ -0,0 +1,398 @@ +/* + * irqtop.c - utility to display kernel interrupt information. + * + * Copyright (C) 2019 zhenwei pi <pizhenwei@bytedance.com> + * Copyright (C) 2020 Karel Zak <kzak@redhat.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include <ctype.h> +#include <errno.h> +#include <getopt.h> +#include <limits.h> +#include <locale.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/epoll.h> +#include <sys/ioctl.h> +#include <sys/select.h> +#include <sys/signalfd.h> +#include <sys/time.h> +#include <sys/timerfd.h> +#include <sys/types.h> +#include <termios.h> +#include <unistd.h> + +#ifdef HAVE_SLCURSES_H +# include <slcurses.h> +#elif defined(HAVE_SLANG_SLCURSES_H) +# include <slang/slcurses.h> +#elif defined(HAVE_NCURSESW_NCURSES_H) && defined(HAVE_WIDECHAR) +# include <ncursesw/ncurses.h> +#elif defined(HAVE_NCURSES_H) +# include <ncurses.h> +#elif defined(HAVE_NCURSES_NCURSES_H) +# include <ncurses/ncurses.h> +#endif + +#ifdef HAVE_WIDECHAR +# include <wctype.h> +# include <wchar.h> +#endif + +#include <libsmartcols.h> + +#include "closestream.h" +#include "monotonic.h" +#include "pathnames.h" +#include "strutils.h" +#include "timeutils.h" +#include "ttyutils.h" +#include "xalloc.h" + +#include "irq-common.h" + +#define MAX_EVENTS 3 + +enum irqtop_cpustat_mode { + IRQTOP_CPUSTAT_AUTO, + IRQTOP_CPUSTAT_ENABLE, + IRQTOP_CPUSTAT_DISABLE, +}; + +/* top control struct */ +struct irqtop_ctl { + WINDOW *win; + int cols; + int rows; + char *hostname; + + struct itimerspec timer; + struct irq_stat *prev_stat; + + enum irqtop_cpustat_mode cpustat_mode; + unsigned int request_exit:1; + unsigned int softirq:1; +}; + +/* user's input parser */ +static void parse_input(struct irqtop_ctl *ctl, struct irq_output *out, char c) +{ + switch (c) { + case 'q': + case 'Q': + ctl->request_exit = 1; + break; + default: + set_sort_func_by_key(out, c); + break; + } +} + +static int update_screen(struct irqtop_ctl *ctl, struct irq_output *out) +{ + struct libscols_table *table, *cpus = NULL; + struct irq_stat *stat; + time_t now = time(NULL); + char timestr[64], *data, *data0, *p; + + /* make irqs table */ + table = get_scols_table(out, ctl->prev_stat, &stat, ctl->softirq); + if (!table) { + ctl->request_exit = 1; + return 1; + } + scols_table_enable_maxout(table, 1); + scols_table_enable_nowrap(table, 1); + scols_table_reduce_termwidth(table, 1); + + /* make cpus table */ + if (ctl->cpustat_mode != IRQTOP_CPUSTAT_DISABLE) { + cpus = get_scols_cpus_table(out, ctl->prev_stat, stat); + scols_table_reduce_termwidth(cpus, 1); + if (ctl->cpustat_mode == IRQTOP_CPUSTAT_AUTO) + scols_table_enable_nowrap(cpus, 1); + } + + /* print header */ + move(0, 0); + strtime_iso(&now, ISO_TIMESTAMP, timestr, sizeof(timestr)); + wprintw(ctl->win, _("irqtop | total: %ld delta: %ld | %s | %s\n\n"), + stat->total_irq, stat->delta_irq, ctl->hostname, timestr); + + /* print cpus table or not by -c option */ + if (cpus) { + scols_print_table_to_string(cpus, &data); + wprintw(ctl->win, "%s\n\n", data); + free(data); + } + + /* print irqs table */ + scols_print_table_to_string(table, &data0); + data = data0; + + p = strchr(data, '\n'); + if (p) { + /* print header in reverse mode */ + *p = '\0'; + attron(A_REVERSE); + wprintw(ctl->win, "%s\n", data); + attroff(A_REVERSE); + data = p + 1; + } + + wprintw(ctl->win, "%s", data); + free(data0); + + /* clean up */ + scols_unref_table(table); + if (ctl->prev_stat) + free_irqstat(ctl->prev_stat); + ctl->prev_stat = stat; + return 0; +} + +static int event_loop(struct irqtop_ctl *ctl, struct irq_output *out) +{ + int efd, sfd, tfd; + sigset_t sigmask; + struct signalfd_siginfo siginfo; + struct epoll_event ev, events[MAX_EVENTS]; + long int nr; + uint64_t unused; + int retval = 0; + + efd = epoll_create1(0); + + if ((tfd = timerfd_create(CLOCK_MONOTONIC, 0)) < 0) + err(EXIT_FAILURE, _("cannot not create timerfd")); + if (timerfd_settime(tfd, 0, &ctl->timer, NULL) != 0) + err(EXIT_FAILURE, _("cannot set timerfd")); + + ev.events = EPOLLIN; + ev.data.fd = tfd; + if (epoll_ctl(efd, EPOLL_CTL_ADD, tfd, &ev) != 0) + err(EXIT_FAILURE, _("epoll_ctl failed")); + + if (sigfillset(&sigmask) != 0) + err(EXIT_FAILURE, _("sigfillset failed")); + if (sigprocmask(SIG_BLOCK, &sigmask, NULL) != 0) + err(EXIT_FAILURE, _("sigprocmask failed")); + + sigaddset(&sigmask, SIGWINCH); + sigaddset(&sigmask, SIGTERM); + sigaddset(&sigmask, SIGINT); + sigaddset(&sigmask, SIGQUIT); + + if ((sfd = signalfd(-1, &sigmask, SFD_CLOEXEC)) < 0) + err(EXIT_FAILURE, _("cannot not create signalfd")); + + ev.events = EPOLLIN; + ev.data.fd = sfd; + if (epoll_ctl(efd, EPOLL_CTL_ADD, sfd, &ev) != 0) + err(EXIT_FAILURE, _("epoll_ctl failed")); + + ev.events = EPOLLIN; + ev.data.fd = STDIN_FILENO; + if (epoll_ctl(efd, EPOLL_CTL_ADD, STDIN_FILENO, &ev) != 0) + err(EXIT_FAILURE, _("epoll_ctl failed")); + + retval |= update_screen(ctl, out); + refresh(); + + while (!ctl->request_exit) { + const ssize_t nr_events = epoll_wait(efd, events, MAX_EVENTS, -1); + + for (nr = 0; nr < nr_events; nr++) { + if (events[nr].data.fd == tfd) { + if (read(tfd, &unused, sizeof(unused)) < 0) + warn(_("read failed")); + } else if (events[nr].data.fd == sfd) { + if (read(sfd, &siginfo, sizeof(siginfo)) < 0) { + warn(_("read failed")); + continue; + } + if (siginfo.ssi_signo == SIGWINCH) { + get_terminal_dimension(&ctl->cols, &ctl->rows); +#if HAVE_RESIZETERM + resizeterm(ctl->rows, ctl->cols); +#endif + } + else { + ctl->request_exit = 1; + break; + } + } else if (events[nr].data.fd == STDIN_FILENO) { + char c; + + if (read(STDIN_FILENO, &c, 1) != 1) + warn(_("read failed")); + parse_input(ctl, out, c); + } else + abort(); + retval |= update_screen(ctl, out); + refresh(); + } + } + return retval; +} + +static void __attribute__((__noreturn__)) usage(void) +{ + fputs(USAGE_HEADER, stdout); + printf(_(" %s [options]\n"), program_invocation_short_name); + fputs(USAGE_SEPARATOR, stdout); + + puts(_("Interactive utility to display kernel interrupt information.")); + + fputs(USAGE_OPTIONS, stdout); + fputs(_(" -c, --cpu-stat <mode> show per-cpu stat (auto, enable, disable)\n"), stdout); + fputs(_(" -d, --delay <secs> delay updates\n"), stdout); + fputs(_(" -o, --output <list> define which output columns to use\n"), stdout); + fputs(_(" -s, --sort <column> specify sort column\n"), stdout); + fputs(_(" -S, --softirq show softirqs instead of interrupts\n"), stdout); + fputs(USAGE_SEPARATOR, stdout); + printf(USAGE_HELP_OPTIONS(22)); + + fputs(_("\nThe following interactive key commands are valid:\n"), stdout); + fputs(_(" i sort by IRQ\n"), stdout); + fputs(_(" t sort by TOTAL\n"), stdout); + fputs(_(" d sort by DELTA\n"), stdout); + fputs(_(" n sort by NAME\n"), stdout); + fputs(_(" q Q quit program\n"), stdout); + + fputs(USAGE_COLUMNS, stdout); + irq_print_columns(stdout, 0); + + printf(USAGE_MAN_TAIL("irqtop(1)")); + exit(EXIT_SUCCESS); +} + +static void parse_args( struct irqtop_ctl *ctl, + struct irq_output *out, + int argc, + char **argv) +{ + const char *outarg = NULL; + static const struct option longopts[] = { + {"cpu-stat", required_argument, NULL, 'c'}, + {"delay", required_argument, NULL, 'd'}, + {"sort", required_argument, NULL, 's'}, + {"output", required_argument, NULL, 'o'}, + {"softirq", no_argument, NULL, 'S'}, + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'V'}, + {NULL, 0, NULL, 0} + }; + int o; + + while ((o = getopt_long(argc, argv, "c:d:o:s:ShV", longopts, NULL)) != -1) { + switch (o) { + case 'c': + if (!strcmp(optarg, "auto")) + ctl->cpustat_mode = IRQTOP_CPUSTAT_AUTO; + else if (!strcmp(optarg, "enable")) + ctl->cpustat_mode = IRQTOP_CPUSTAT_ENABLE; + else if (!strcmp(optarg, "disable")) + ctl->cpustat_mode = IRQTOP_CPUSTAT_DISABLE; + else + errx(EXIT_FAILURE, _("unsupported mode '%s'"), optarg); + break; + case 'd': + { + struct timeval delay; + + strtotimeval_or_err(optarg, &delay, + _("failed to parse delay argument")); + TIMEVAL_TO_TIMESPEC(&delay, &ctl->timer.it_interval); + ctl->timer.it_value = ctl->timer.it_interval; + } + break; + case 's': + set_sort_func_by_name(out, optarg); + break; + case 'o': + outarg = optarg; + break; + case 'S': + ctl->softirq = 1; + break; + case 'V': + print_version(EXIT_SUCCESS); + case 'h': + usage(); + default: + errtryhelp(EXIT_FAILURE); + } + } + + /* default */ + if (!out->ncolumns) { + out->columns[out->ncolumns++] = COL_IRQ; + out->columns[out->ncolumns++] = COL_TOTAL; + out->columns[out->ncolumns++] = COL_DELTA; + out->columns[out->ncolumns++] = COL_NAME; + } + + /* add -o [+]<list> to putput */ + if (outarg && string_add_to_idarray(outarg, out->columns, + ARRAY_SIZE(out->columns), + &out->ncolumns, + irq_column_name_to_id) < 0) + exit(EXIT_FAILURE); +} + +int main(int argc, char **argv) +{ + int is_tty = 0; + struct termios saved_tty; + struct irq_output out = { + .ncolumns = 0 + }; + struct irqtop_ctl ctl = { + .timer.it_interval = {3, 0}, + .timer.it_value = {3, 0} + }; + + setlocale(LC_ALL, ""); + + parse_args(&ctl, &out, argc, argv); + + is_tty = isatty(STDIN_FILENO); + if (is_tty && tcgetattr(STDIN_FILENO, &saved_tty) == -1) + fputs(_("terminal setting retrieval"), stdout); + + ctl.win = initscr(); + get_terminal_dimension(&ctl.cols, &ctl.rows); +#if HAVE_RESIZETERM + resizeterm(ctl.rows, ctl.cols); +#endif + curs_set(0); + + ctl.hostname = xgethostname(); + event_loop(&ctl, &out); + + free_irqstat(ctl.prev_stat); + free(ctl.hostname); + + if (is_tty) + tcsetattr(STDIN_FILENO, TCSAFLUSH, &saved_tty); + delwin(ctl.win); + endwin(); + + return EXIT_SUCCESS; +} diff --git a/sys-utils/ldattach.8 b/sys-utils/ldattach.8 new file mode 100644 index 0000000..1436090 --- /dev/null +++ b/sys-utils/ldattach.8 @@ -0,0 +1,198 @@ +'\" t +.\" Title: ldattach +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: System Administration +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "LDATTACH" "8" "2022-05-11" "util\-linux 2.38.1" "System Administration" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +ldattach \- attach a line discipline to a serial line +.SH "SYNOPSIS" +.sp +\fBldattach\fP [\fB\-1278denoVh\fP] [\fB\-i\fP \fIiflag\fP] [\fB\-s\fP \fIspeed\fP] \fIldisc device\fP +.SH "DESCRIPTION" +.sp +The \fBldattach\fP daemon opens the specified \fIdevice\fP file (which should refer to a serial device) and attaches the line discipline \fIldisc\fP to it for processing of the sent and/or received data. It then goes into the background keeping the device open so that the line discipline stays loaded. +.sp +The line discipline \fIldisc\fP may be specified either by name or by number. +.sp +In order to detach the line discipline, \fBkill\fP(1) the \fBldattach\fP process. +.sp +With no arguments, \fBldattach\fP prints usage information. +.SH "LINE DISCIPLINES" +.sp +Depending on the kernel release, the following line disciplines are supported: +.sp +\fBTTY\fP(\fB0\fP) +.RS 4 +The default line discipline, providing transparent operation (raw mode) as well as the habitual terminal line editing capabilities (cooked mode). +.RE +.sp +\fBSLIP\fP(\fB1\fP) +.RS 4 +Serial Line IP (SLIP) protocol processor for transmitting TCP/IP packets over serial lines. +.RE +.sp +\fBMOUSE\fP(\fB2\fP) +.RS 4 +Device driver for RS232 connected pointing devices (serial mice). +.RE +.sp +\fBPPP\fP(\fB3\fP) +.RS 4 +Point to Point Protocol (PPP) processor for transmitting network packets over serial lines. +.RE +.sp +\fBSTRIP\fP(\fB4\fP); \fBAX25\fP(\fB5\fP); \fBX25\fP(\fB6\fP) +.RS 4 +Line driver for transmitting X.25 packets over asynchronous serial lines. +.RE +.sp +\fB6PACK\fP(\fB7\fP); \fBR3964\fP(\fB9\fP) +.RS 4 +Driver for Simatic R3964 module. +.RE +.sp +\fBIRDA\fP(\fB11\fP) +.RS 4 +Linux IrDa (infrared data transmission) driver \- see \c +.URL "http://irda.sourceforge.net/" "" "" +.RE +.sp +\fBHDLC\fP(\fB13\fP) +.RS 4 +Synchronous HDLC driver. +.RE +.sp +\fBSYNC_PPP\fP(\fB14\fP) +.RS 4 +Synchronous PPP driver. +.RE +.sp +\fBHCI\fP(\fB15\fP) +.RS 4 +Bluetooth HCI UART driver. +.RE +.sp +\fBGIGASET_M101\fP(\fB16\fP) +.RS 4 +Driver for Siemens Gigaset M101 serial DECT adapter. +.RE +.sp +\fBPPS\fP(\fB18\fP) +.RS 4 +Driver for serial line Pulse Per Second (PPS) source. +.RE +.sp +\fBGSM0710\fP(\fB21\fP) +.RS 4 +Driver for GSM 07.10 multiplexing protocol modem (CMUX). +.RE +.SH "OPTIONS" +.sp +\fB\-1\fP, \fB\-\-onestopbit\fP +.RS 4 +Set the number of stop bits of the serial line to one. +.RE +.sp +\fB\-2\fP, \fB\-\-twostopbits\fP +.RS 4 +Set the number of stop bits of the serial line to two. +.RE +.sp +\fB\-7\fP, \fB\-\-sevenbits\fP +.RS 4 +Set the character size of the serial line to 7 bits. +.RE +.sp +\fB\-8\fP, \fB\-\-eightbits\fP +.RS 4 +Set the character size of the serial line to 8 bits. +.RE +.sp +\fB\-d\fP, \fB\-\-debug\fP +.RS 4 +Keep \fBldattach\fP in the foreground so that it can be interrupted or debugged, and to print verbose messages about its progress to standard error output. +.RE +.sp +\fB\-e\fP, \fB\-\-evenparity\fP +.RS 4 +Set the parity of the serial line to even. +.RE +.sp +\fB\-i\fP, \fB\-\-iflag\fP \fIvalue\fP... +.RS 4 +Set the specified bits in the c_iflag word of the serial line. The given \fIvalue\fP may be a number or a symbolic name. If \fIvalue\fP is prefixed by a minus sign, the specified bits are cleared instead. Several comma\-separated values may be given in order to set and clear multiple bits. +.RE +.sp +\fB\-n\fP, \fB\-\-noparity\fP +.RS 4 +Set the parity of the serial line to none. +.RE +.sp +\fB\-o\fP, \fB\-\-oddparity\fP +.RS 4 +Set the parity of the serial line to odd. +.RE +.sp +\fB\-s\fP, \fB\-\-speed\fP \fIvalue\fP +.RS 4 +Set the speed (the baud rate) of the serial line to the specified \fIvalue\fP. +.RE +.sp +\fB\-c\fP, \fB\-\-intro\-command\fP \fIstring\fP +.RS 4 +Define an intro command that is sent through the serial line before the invocation of \fBldattach\fP. E.g. in conjunction with line discipline GSM0710, the command \(aqAT+CMUX=0\(rsr\(aq is commonly suitable to switch the modem into the CMUX mode. +.RE +.sp +\fB\-p\fP, \fB\-\-pause\fP \fIvalue\fP +.RS 4 +Sleep for \fIvalue\fP seconds before the invocation of \fBldattach\fP. Default is one second. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "AUTHORS" +.sp +.MTO "tilman\(atimap.cc" "Tilman Schmidt" "" +.SH "SEE ALSO" +.sp +\fBinputattach\fP(1), +\fBttys\fP(4) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBldattach\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/ldattach.8.adoc b/sys-utils/ldattach.8.adoc new file mode 100644 index 0000000..07549fb --- /dev/null +++ b/sys-utils/ldattach.8.adoc @@ -0,0 +1,129 @@ +//po4a: entry man manual +//// +Copyright 2008 Tilman Schmidt (tilman@imap.cc) +May be distributed under the GNU General Public License version 2 or later +//// += ldattach(8) +:doctype: manpage +:man manual: System Administration +:man source: util-linux {release-version} +:page-layout: base +:command: ldattach + +== NAME + +ldattach - attach a line discipline to a serial line + +== SYNOPSIS + +*ldattach* [*-1278denoVh*] [*-i* _iflag_] [*-s* _speed_] _ldisc device_ + +== DESCRIPTION + +The *ldattach* daemon opens the specified _device_ file (which should refer to a serial device) and attaches the line discipline _ldisc_ to it for processing of the sent and/or received data. It then goes into the background keeping the device open so that the line discipline stays loaded. + +The line discipline _ldisc_ may be specified either by name or by number. + +In order to detach the line discipline, *kill*(1) the *ldattach* process. + +With no arguments, *ldattach* prints usage information. + +== LINE DISCIPLINES + +Depending on the kernel release, the following line disciplines are supported: + +*TTY*(*0*):: +The default line discipline, providing transparent operation (raw mode) as well as the habitual terminal line editing capabilities (cooked mode). + +*SLIP*(*1*):: +Serial Line IP (SLIP) protocol processor for transmitting TCP/IP packets over serial lines. + +*MOUSE*(*2*):: +Device driver for RS232 connected pointing devices (serial mice). + +*PPP*(*3*):: +Point to Point Protocol (PPP) processor for transmitting network packets over serial lines. + +*STRIP*(*4*); *AX25*(*5*); *X25*(*6*):: +Line driver for transmitting X.25 packets over asynchronous serial lines. + +*6PACK*(*7*); *R3964*(*9*):: +Driver for Simatic R3964 module. + +*IRDA*(*11*):: +Linux IrDa (infrared data transmission) driver - see http://irda.sourceforge.net/ + +*HDLC*(*13*):: +Synchronous HDLC driver. + +*SYNC_PPP*(*14*):: +Synchronous PPP driver. + +*HCI*(*15*):: +Bluetooth HCI UART driver. + +*GIGASET_M101*(*16*):: +Driver for Siemens Gigaset M101 serial DECT adapter. + +*PPS*(*18*):: +Driver for serial line Pulse Per Second (PPS) source. + +*GSM0710*(*21*):: +Driver for GSM 07.10 multiplexing protocol modem (CMUX). + +== OPTIONS + +*-1*, *--onestopbit*:: +Set the number of stop bits of the serial line to one. + +*-2*, *--twostopbits*:: +Set the number of stop bits of the serial line to two. + +*-7*, *--sevenbits*:: +Set the character size of the serial line to 7 bits. + +*-8*, *--eightbits*:: +Set the character size of the serial line to 8 bits. + +*-d*, *--debug*:: +Keep *ldattach* in the foreground so that it can be interrupted or debugged, and to print verbose messages about its progress to standard error output. + +*-e*, *--evenparity*:: +Set the parity of the serial line to even. + +*-i*, *--iflag* [*-*]_value_...:: +Set the specified bits in the c_iflag word of the serial line. The given _value_ may be a number or a symbolic name. If _value_ is prefixed by a minus sign, the specified bits are cleared instead. Several comma-separated values may be given in order to set and clear multiple bits. + +*-n*, *--noparity*:: +Set the parity of the serial line to none. + +*-o*, *--oddparity*:: +Set the parity of the serial line to odd. + +*-s*, *--speed* _value_:: +Set the speed (the baud rate) of the serial line to the specified _value_. + +*-c*, *--intro-command* _string_:: +Define an intro command that is sent through the serial line before the invocation of *ldattach*. E.g. in conjunction with line discipline GSM0710, the command 'AT+CMUX=0\r' is commonly suitable to switch the modem into the CMUX mode. + +*-p*, *--pause* _value_:: +Sleep for _value_ seconds before the invocation of *ldattach*. Default is one second. + +include::man-common/help-version.adoc[] + +== AUTHORS + +mailto:tilman@imap.cc[Tilman Schmidt] + +== SEE ALSO + +*inputattach*(1), +*ttys*(4) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/ldattach.c b/sys-utils/ldattach.c new file mode 100644 index 0000000..3c853f8 --- /dev/null +++ b/sys-utils/ldattach.c @@ -0,0 +1,489 @@ +/* line discipline loading daemon + * open a serial device and attach a line discipline on it + * + * Usage: + * ldattach GIGASET_M101 /dev/ttyS0 + * + * ===================================================================== + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 of + * the License, or (at your option) any later version. + * ===================================================================== + */ + +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <termios.h> +#include <unistd.h> + +#include "c.h" +#include "all-io.h" +#include "nls.h" +#include "strutils.h" +#include "closestream.h" + +#include <signal.h> +#include <sys/socket.h> +#include <linux/if.h> + +#include <linux/tty.h> /* for N_GSM0710 */ + +#ifdef LINUX_GSMMUX_H +# include <linux/gsmmux.h> /* Add by guowenxue */ +#else +struct gsm_config +{ + unsigned int adaption; + unsigned int encapsulation; + unsigned int initiator; + unsigned int t1; + unsigned int t2; + unsigned int t3; + unsigned int n2; + unsigned int mru; + unsigned int mtu; + unsigned int k; + unsigned int i; + unsigned int unused[8]; /* Padding for expansion without + breaking stuff */ +}; +# define GSMIOC_GETCONF _IOR('G', 0, struct gsm_config) +# define GSMIOC_SETCONF _IOW('G', 1, struct gsm_config) +#endif + +#ifndef N_GIGASET_M101 +# define N_GIGASET_M101 16 +#endif + +#ifndef N_PPS +# define N_PPS 18 +#endif + +#ifndef N_GSM0710 +# define N_GSM0710 21 +#endif + +#define MAXINTROPARMLEN 32 + +/* attach a line discipline ioctl */ +#ifndef TIOCSETD +# define TIOCSETD 0x5423 +#endif + +static int debug = 0; + +struct ld_table { + const char *name; + int value; +}; + +/* currently supported line disciplines, plus some aliases */ +static const struct ld_table ld_discs[] = { + { "TTY", N_TTY }, + { "SLIP", N_SLIP }, + { "MOUSE", N_MOUSE }, + { "PPP", N_PPP }, + { "STRIP", N_STRIP }, + { "AX25", N_AX25 }, + { "X25", N_X25 }, + { "6PACK", N_6PACK }, + { "R3964", N_R3964 }, + { "IRDA", N_IRDA }, + { "HDLC", N_HDLC }, + { "SYNC_PPP", N_SYNC_PPP }, + { "SYNCPPP", N_SYNC_PPP }, + { "HCI", N_HCI }, + { "GIGASET_M101", N_GIGASET_M101 }, + { "M101", N_GIGASET_M101 }, + { "GIGASET", N_GIGASET_M101 }, + { "PPS", N_PPS }, + { "GSM0710", N_GSM0710}, + { NULL, 0 } +}; + +/* known c_iflag names */ +static const struct ld_table ld_iflags[] = +{ + { "IGNBRK", IGNBRK }, + { "BRKINT", BRKINT }, + { "IGNPAR", IGNPAR }, + { "PARMRK", PARMRK }, + { "INPCK", INPCK }, + { "ISTRIP", ISTRIP }, + { "INLCR", INLCR }, + { "IGNCR", IGNCR }, + { "ICRNL", ICRNL }, + { "IUCLC", IUCLC }, + { "IXON", IXON }, + { "IXANY", IXANY }, + { "IXOFF", IXOFF }, + { "IMAXBEL", IMAXBEL }, + { "IUTF8", IUTF8 }, + { NULL, 0 } +}; + +static void __attribute__((__format__ (__printf__, 1, 2))) + dbg(char *fmt, ...) +{ + va_list args; + + if (debug == 0) + return; + fflush(NULL); + va_start(args, fmt); +#ifdef HAVE_VWARNX + vwarnx(fmt, args); +#else + fprintf(stderr, "%s: ", program_invocation_short_name); + vfprintf(stderr, fmt, args); + fprintf(stderr, "\n"); +#endif + va_end(args); + fflush(NULL); +} + +static int lookup_table(const struct ld_table *tab, const char *str) +{ + const struct ld_table *t; + + for (t = tab; t && t->name; t++) + if (!strcasecmp(t->name, str)) + return t->value; + return -1; +} + +static void print_table(FILE * out, const struct ld_table *tab) +{ + const struct ld_table *t; + int i; + + for (t = tab, i = 1; t && t->name; t++, i++) { + fprintf(out, " %-12s", t->name); + if (!(i % 5)) + fputc('\n', out); + } +} + +static int parse_iflag(char *str, int *set_iflag, int *clr_iflag) +{ + int iflag; + char *s; + + for (s = strtok(str, ","); s != NULL; s = strtok(NULL, ",")) { + if (*s == '-') + s++; + if ((iflag = lookup_table(ld_iflags, s)) < 0) + iflag = strtos32_or_err(s, _("invalid iflag")); + if (s > str && *(s - 1) == '-') + *clr_iflag |= iflag; + else + *set_iflag |= iflag; + } + dbg("iflag (set/clear): %d/%d", *set_iflag, *clr_iflag); + return 0; +} + + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + + fputs(USAGE_HEADER, out); + fprintf(out, _(" %s [options] <ldisc> <device>\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Attach a line discipline to a serial line.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -d, --debug print verbose messages to stderr\n"), out); + fputs(_(" -s, --speed <value> set serial line speed\n"), out); + fputs(_(" -c, --intro-command <string> intro sent before ldattach\n"), out); + fputs(_(" -p, --pause <seconds> pause between intro and ldattach\n"), out); + fputs(_(" -7, --sevenbits set character size to 7 bits\n"), out); + fputs(_(" -8, --eightbits set character size to 8 bits\n"), out); + fputs(_(" -n, --noparity set parity to none\n"), out); + fputs(_(" -e, --evenparity set parity to even\n"), out); + fputs(_(" -o, --oddparity set parity to odd\n"), out); + fputs(_(" -1, --onestopbit set stop bits to one\n"), out); + fputs(_(" -2, --twostopbits set stop bits to two\n"), out); + fputs(_(" -i, --iflag [-]<iflag> set input mode flag\n"), out); + + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(25)); + + fputs(_("\nKnown <ldisc> names:\n"), out); + print_table(out, ld_discs); + fputs(USAGE_SEPARATOR, out); + + fputs(_("\nKnown <iflag> names:\n"), out); + print_table(out, ld_iflags); + + printf(USAGE_MAN_TAIL("ldattach(8)")); + exit(EXIT_SUCCESS); +} + +static int my_cfsetspeed(struct termios *ts, int speed) +{ + /* Standard speeds + * -- cfsetspeed() is able to translate number to Bxxx constants + */ + if (cfsetspeed(ts, speed) == 0) + return 0; + + /* Nonstandard speeds + * -- we have to bypass glibc and set the speed manually (because glibc + * checks for speed and supports Bxxx bit rates only)... + */ +#if _HAVE_STRUCT_TERMIOS_C_ISPEED +# define BOTHER 0010000 /* non standard rate */ + dbg("using non-standard speeds"); + ts->c_ospeed = ts->c_ispeed = speed; + ts->c_cflag &= ~CBAUD; + ts->c_cflag |= BOTHER; + return 0; +#else + return -1; +#endif +} + +static void handler(int s) +{ + dbg("got SIG %i -> exiting", s); + exit(EXIT_SUCCESS); +} + +static void gsm0710_set_conf(int tty_fd) +{ + struct gsm_config c; + + /* Add by guowenxue */ + /* get n_gsm configuration */ + ioctl(tty_fd, GSMIOC_GETCONF, &c); + /* we are initiator and need encoding 0 (basic) */ + c.initiator = 1; + c.encapsulation = 0; + /* our modem defaults to a maximum size of 127 bytes */ + c.mru = 127; + c.mtu = 127; + /* set the new configuration */ + ioctl(tty_fd, GSMIOC_SETCONF, &c); + /* Add by guowenxue end*/ +} + +int main(int argc, char **argv) +{ + int tty_fd; + struct termios ts; + int speed = 0, bits = '-', parity = '-', stop = '-'; + int set_iflag = 0, clr_iflag = 0; + int ldisc; + int optc; + char *dev; + int intropause = 1; + char *introparm = NULL; + + static const struct option opttbl[] = { + {"speed", required_argument, NULL, 's'}, + {"sevenbits", no_argument, NULL, '7'}, + {"eightbits", no_argument, NULL, '8'}, + {"noparity", no_argument, NULL, 'n'}, + {"evenparity", no_argument, NULL, 'e'}, + {"oddparity", no_argument, NULL, 'o'}, + {"onestopbit", no_argument, NULL, '1'}, + {"twostopbits", no_argument, NULL, '2'}, + {"iflag", required_argument, NULL, 'i'}, + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'V'}, + {"debug", no_argument, NULL, 'd'}, + {"intro-command", no_argument, NULL, 'c'}, + {"pause", no_argument, NULL, 'p'}, + {NULL, 0, NULL, 0} + }; + + signal(SIGKILL, handler); + signal(SIGINT, handler); + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + /* parse options */ + if (argc == 0) + errx(EXIT_FAILURE, _("bad usage")); + + while ((optc = + getopt_long(argc, argv, "dhV78neo12s:i:c:p:", opttbl, + NULL)) >= 0) { + switch (optc) { + case 'd': + debug = 1; + break; + case '1': + case '2': + stop = optc; + break; + case '7': + case '8': + bits = optc; + break; + case 'n': + case 'e': + case 'o': + parity = optc; + break; + case 's': + speed = strtos32_or_err(optarg, _("invalid speed argument")); + break; + case 'p': + intropause = strtou32_or_err(optarg, _("invalid pause argument")); + if (intropause > 10) + errx(EXIT_FAILURE, "invalid pause: %s", optarg); + break; + case 'c': + introparm = optarg; + break; + case 'i': + parse_iflag(optarg, &set_iflag, &clr_iflag); + break; + + case 'V': + print_version(EXIT_SUCCESS); + case 'h': + usage(); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if (argc - optind != 2) { + warnx(_("not enough arguments")); + errtryhelp(EXIT_FAILURE); + } + /* parse line discipline specification */ + ldisc = lookup_table(ld_discs, argv[optind]); + if (ldisc < 0) + ldisc = strtos32_or_err(argv[optind], _("invalid line discipline argument")); + + /* ldisc specific option settings */ + if (ldisc == N_GIGASET_M101) { + /* device specific defaults for line speed and data format */ + if (speed == 0) + speed = 115200; + if (bits == '-') + bits = '8'; + if (parity == '-') + parity = 'n'; + if (stop == '-') + stop = '1'; + } + + /* open device */ + dev = argv[optind + 1]; + if ((tty_fd = open(dev, O_RDWR | O_NOCTTY)) < 0) + err(EXIT_FAILURE, _("cannot open %s"), dev); + if (!isatty(tty_fd)) + errx(EXIT_FAILURE, _("%s is not a serial line"), dev); + + dbg("opened %s", dev); + + /* set line speed and format */ + if (tcgetattr(tty_fd, &ts) < 0) + err(EXIT_FAILURE, + _("cannot get terminal attributes for %s"), dev); + cfmakeraw(&ts); + if (speed && my_cfsetspeed(&ts, speed) < 0) + errx(EXIT_FAILURE, _("speed %d unsupported"), speed); + + switch (stop) { + case '1': + ts.c_cflag &= ~CSTOPB; + break; + case '2': + ts.c_cflag |= CSTOPB; + break; + case '-': + break; + default: + abort(); + } + switch (bits) { + case '7': + ts.c_cflag = (ts.c_cflag & ~CSIZE) | CS7; + break; + case '8': + ts.c_cflag = (ts.c_cflag & ~CSIZE) | CS8; + break; + case '-': + break; + default: + abort(); + } + switch (parity) { + case 'n': + ts.c_cflag &= ~(PARENB | PARODD); + break; + case 'e': + ts.c_cflag |= PARENB; + ts.c_cflag &= ~PARODD; + break; + case 'o': + ts.c_cflag |= (PARENB | PARODD); + break; + case '-': + break; + default: + abort(); + } + + ts.c_cflag |= CREAD; /* just to be on the safe side */ + ts.c_iflag |= set_iflag; + ts.c_iflag &= ~clr_iflag; + + if (tcsetattr(tty_fd, TCSAFLUSH, &ts) < 0) + err(EXIT_FAILURE, + _("cannot set terminal attributes for %s"), dev); + + dbg("set to raw %d %c%c%c: cflag=0x%x", + speed, bits, parity, stop, ts.c_cflag); + + if (introparm && *introparm) + { + dbg("intro command is '%s'", introparm); + if (write_all(tty_fd, introparm, strlen(introparm)) != 0) + err(EXIT_FAILURE, + _("cannot write intro command to %s"), dev); + + if (intropause) { + dbg("waiting for %d seconds", intropause); + sleep(intropause); + } + } + + /* Attach the line discipline. */ + if (ioctl(tty_fd, TIOCSETD, &ldisc) < 0) + err(EXIT_FAILURE, _("cannot set line discipline")); + + dbg("line discipline set to %d", ldisc); + + /* ldisc specific post-attach actions */ + if (ldisc == N_GSM0710) + gsm0710_set_conf(tty_fd); + + /* Go into background if not in debug mode. */ + if (!debug && daemon(0, 0) < 0) + err(EXIT_FAILURE, _("cannot daemonize")); + + /* Sleep to keep the line discipline active. */ + pause(); + + exit(EXIT_SUCCESS); +} diff --git a/sys-utils/losetup.8 b/sys-utils/losetup.8 new file mode 100644 index 0000000..9d701ed --- /dev/null +++ b/sys-utils/losetup.8 @@ -0,0 +1,230 @@ +'\" t +.\" Title: losetup +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-07-20 +.\" Manual: System Administration +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "LOSETUP" "8" "2022-07-20" "util\-linux 2.38.1" "System Administration" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +losetup \- set up and control loop devices +.SH "SYNOPSIS" +.sp +Get info: +.sp +\fBlosetup\fP [\fIloopdev\fP] +.sp +\fBlosetup\fP \fB\-l\fP [\fB\-a\fP] +.sp +\fBlosetup\fP \fB\-j\fP \fIfile\fP [\fB\-o\fP \fIoffset\fP] +.sp +Detach a loop device: +.sp +\fBlosetup\fP \fB\-d\fP \fIloopdev\fP ... +.sp +Detach all associated loop devices: +.sp +\fBlosetup\fP \fB\-D\fP +.sp +Set up a loop device: +.sp +\fBlosetup\fP [\fB\-o\fP \fIoffset\fP] [\fB\-\-sizelimit\fP \fIsize\fP] [\fB\-\-sector\-size\fP \fIsize\fP] [\fB\-Pr\fP] [\fB\-\-show\fP] \fB\-f\fP|\fIloopdev file\fP +.sp +Resize a loop device: +.sp +\fBlosetup\fP \fB\-c\fP \fIloopdev\fP +.SH "DESCRIPTION" +.sp +\fBlosetup\fP is used to associate loop devices with regular files or block devices, to detach loop devices, and to query the status of a loop device. If only the \fIloopdev\fP argument is given, the status of the corresponding loop device is shown. If no option is given, all loop devices are shown. +.sp +Note that the old output format (i.e., \fBlosetup \-a\fP) with comma\-delimited strings is deprecated in favour of the \fB\-\-list\fP output format. +.sp +It\(cqs possible to create more independent loop devices for the same backing file. \fBThis setup may be dangerous, can cause data loss, corruption and overwrites.\fP Use \fB\-\-nooverlap\fP with \fB\-\-find\fP during setup to avoid this problem. +.sp +The loop device setup is not an atomic operation when used with \fB\-\-find\fP, and \fBlosetup\fP does not protect this operation by any lock. The number of attempts is internally restricted to a maximum of 16. It is recommended to use for example \fBflock\fP(1) to avoid a collision in heavily parallel use cases. +.SH "OPTIONS" +.sp +The \fIsize\fP and \fIoffset\fP arguments may be followed by the multiplicative suffixes KiB (=1024), MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB" is optional, e.g., "K" has the same meaning as "KiB") or the suffixes KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB and YB. +.sp +\fB\-a\fP, \fB\-\-all\fP +.RS 4 +Show the status of all loop devices. Note that not all information is accessible for non\-root users. See also \fB\-\-list\fP. The old output format (as printed without \fB\-\-list\fP) is deprecated. +.RE +.sp +\fB\-d\fP, \fB\-\-detach\fP \fIloopdev\fP... +.RS 4 +Detach the file or device associated with the specified loop device(s). Note that since Linux v3.7 kernel uses "lazy device destruction". The detach operation does not return \fBEBUSY\fP error anymore if device is actively used by system, but it is marked by autoclear flag and destroyed later. +.RE +.sp +\fB\-D\fP, \fB\-\-detach\-all\fP +.RS 4 +Detach all associated loop devices. +.RE +.sp +\fB\-f\fP, \fB\-\-find\fP [\fIfile\fP] +.RS 4 +Find the first unused loop device. If a \fIfile\fP argument is present, use the found device as loop device. Otherwise, just print its name. +.RE +.sp +\fB\-\-show\fP +.RS 4 +Display the name of the assigned loop device if the \fB\-f\fP option and a \fIfile\fP argument are present. +.RE +.sp +\fB\-L\fP, \fB\-\-nooverlap\fP +.RS 4 +Check for conflicts between loop devices to avoid situation when the same backing file is shared between more loop devices. If the file is already used by another device then re\-use the device rather than a new one. The option makes sense only with \fB\-\-find\fP. +.RE +.sp +\fB\-j\fP, \fB\-\-associated\fP \fIfile\fP [\fB\-o\fP \fIoffset\fP] +.RS 4 +Show the status of all loop devices associated with the given \fIfile\fP. +.RE +.sp +\fB\-o\fP, \fB\-\-offset\fP \fIoffset\fP +.RS 4 +The data start is moved \fIoffset\fP bytes into the specified file or device. The \fIoffset\fP may be followed by the multiplicative suffixes; see above. +.RE +.sp +\fB\-\-sizelimit\fP \fIsize\fP +.RS 4 +The data end is set to no more than \fIsize\fP bytes after the data start. The \fIsize\fP may be followed by the multiplicative suffixes; see above. +.RE +.sp +\fB\-b\fP, \fB\-\-sector\-size\fP \fIsize\fP +.RS 4 +Set the logical sector size of the loop device in bytes (since Linux 4.14). The option may be used when creating a new loop device as well as a stand\-alone command to modify sector size of the already existing loop device. +.RE +.sp +\fB\-c\fP, \fB\-\-set\-capacity\fP \fIloopdev\fP +.RS 4 +Force the loop driver to reread the size of the file associated with the specified loop device. +.RE +.sp +\fB\-P\fP, \fB\-\-partscan\fP +.RS 4 +Force the kernel to scan the partition table on a newly created loop device. Note that the partition table parsing depends on sector sizes. The default is sector size is 512 bytes, otherwise you need to use the option \fB\-\-sector\-size\fP together with \fB\-\-partscan\fP. +.RE +.sp +\fB\-r\fP, \fB\-\-read\-only\fP +.RS 4 +Set up a read\-only loop device. +.RE +.sp +\fB\-\-direct\-io\fP[\fB=on\fP|\fBoff\fP] +.RS 4 +Enable or disable direct I/O for the backing file. The optional argument can be either \fBon\fP or \fBoff\fP. If the optional argument is omitted, it defaults to \fBon\fP. +.RE +.sp +\fB\-v\fP, \fB\-\-verbose\fP +.RS 4 +Verbose mode. +.RE +.sp +\fB\-l\fP, \fB\-\-list\fP +.RS 4 +If a loop device or the \fB\-a\fP option is specified, print the default columns for either the specified loop device or all loop devices; the default is to print info about all devices. See also \fB\-\-output\fP, \fB\-\-noheadings\fP, \fB\-\-raw\fP, and \fB\-\-json\fP. +.RE +.sp +\fB\-O\fP, \fB\-\-output\fP \fIcolumn\fP[,\fIcolumn\fP]... +.RS 4 +Specify the columns that are to be printed for the \fB\-\-list\fP output. Use \fB\-\-help\fP to get a list of all supported columns. +.RE +.sp +\fB\-\-output\-all\fP +.RS 4 +Output all available columns. +.RE +.sp +\fB\-n\fP, \fB\-\-noheadings\fP +.RS 4 +Don\(cqt print headings for \fB\-\-list\fP output format. +.RE +.sp +\fB\-\-raw\fP +.RS 4 +Use the raw \fB\-\-list\fP output format. +.RE +.sp +\fB\-J\fP, \fB\-\-json\fP +.RS 4 +Use JSON format for \fB\-\-list\fP output. +.RE +.SH "ENCRYPTION" +.sp +\fBCryptoloop is no longer supported in favor of dm\-crypt.\fP For more details see \fBcryptsetup\fP(8). +.SH "EXIT STATUS" +.sp +\fBlosetup\fP returns 0 on success, nonzero on failure. When \fBlosetup\fP displays the status of a loop device, it returns 1 if the device is not configured and 2 if an error occurred which prevented determining the status of the device. +.SH "NOTES" +.sp +Since version 2.37 \fBlosetup\fP uses \fBLOOP_CONFIGURE\fP ioctl to setup a new loop device by one ioctl call. The old versions use \fBLOOP_SET_FD\fP and \fBLOOP_SET_STATUS64\fP ioctls to do the same. +.SH "ENVIRONMENT" +.sp +\fBLOOPDEV_DEBUG\fP=all +.RS 4 +enables debug output. +.RE +.SH "FILES" +.sp +\fI/dev/loop[0..N]\fP +.RS 4 +loop block devices +.RE +.sp +\fI/dev/loop\-control\fP +.RS 4 +loop control device +.RE +.SH "EXAMPLE" +.sp +The following commands can be used as an example of using the loop device. +.sp +.if n .RS 4 +.nf +.fam C +# dd if=/dev/zero of=~/file.img bs=1024k count=10 +# losetup \-\-find \-\-show ~/file.img +/dev/loop0 +# mkfs \-t ext2 /dev/loop0 +# mount /dev/loop0 /mnt +\&... +# umount /dev/loop0 +# losetup \-\-detach /dev/loop0 +.fam +.fi +.if n .RE +.SH "AUTHORS" +.sp +.MTO "kzak\(atredhat.com" "Karel Zak" "," +based on the original version from +.MTO "tytso\(atathena.mit.edu" "Theodore Ts\(cqo" "." +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBlosetup\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/losetup.8.adoc b/sys-utils/losetup.8.adoc new file mode 100644 index 0000000..94e8e7b --- /dev/null +++ b/sys-utils/losetup.8.adoc @@ -0,0 +1,164 @@ +//po4a: entry man manual += losetup(8) +:doctype: manpage +:man manual: System Administration +:man source: util-linux {release-version} +:page-layout: base +:command: losetup + +== NAME + +losetup - set up and control loop devices + +== SYNOPSIS + +Get info: + +*losetup* [_loopdev_] + +*losetup* *-l* [*-a*] + +*losetup* *-j* _file_ [*-o* _offset_] + +Detach a loop device: + +*losetup* *-d* _loopdev_ ... + +Detach all associated loop devices: + +*losetup* *-D* + +Set up a loop device: + +*losetup* [*-o* _offset_] [*--sizelimit* _size_] [*--sector-size* _size_] [*-Pr*] [*--show*] *-f*|_loopdev file_ + +Resize a loop device: + +*losetup* *-c* _loopdev_ + +== DESCRIPTION + +*losetup* is used to associate loop devices with regular files or block devices, to detach loop devices, and to query the status of a loop device. If only the _loopdev_ argument is given, the status of the corresponding loop device is shown. If no option is given, all loop devices are shown. + +Note that the old output format (i.e., *losetup -a*) with comma-delimited strings is deprecated in favour of the *--list* output format. + +It's possible to create more independent loop devices for the same backing file. *This setup may be dangerous, can cause data loss, corruption and overwrites.* Use *--nooverlap* with *--find* during setup to avoid this problem. + +The loop device setup is not an atomic operation when used with *--find*, and *losetup* does not protect this operation by any lock. The number of attempts is internally restricted to a maximum of 16. It is recommended to use for example *flock*(1) to avoid a collision in heavily parallel use cases. + +== OPTIONS + +The _size_ and _offset_ arguments may be followed by the multiplicative suffixes KiB (=1024), MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB" is optional, e.g., "K" has the same meaning as "KiB") or the suffixes KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB and YB. + +*-a*, *--all*:: +Show the status of all loop devices. Note that not all information is accessible for non-root users. See also *--list*. The old output format (as printed without *--list*) is deprecated. + +*-d*, *--detach* _loopdev_...:: +Detach the file or device associated with the specified loop device(s). Note that since Linux v3.7 kernel uses "lazy device destruction". The detach operation does not return *EBUSY* error anymore if device is actively used by system, but it is marked by autoclear flag and destroyed later. + +*-D*, *--detach-all*:: +Detach all associated loop devices. + +*-f*, *--find* [_file_]:: +Find the first unused loop device. If a _file_ argument is present, use the found device as loop device. Otherwise, just print its name. + +*--show*:: +Display the name of the assigned loop device if the *-f* option and a _file_ argument are present. + +*-L*, *--nooverlap*:: +Check for conflicts between loop devices to avoid situation when the same backing file is shared between more loop devices. If the file is already used by another device then re-use the device rather than a new one. The option makes sense only with *--find*. + +*-j*, *--associated* _file_ [*-o* _offset_]:: +Show the status of all loop devices associated with the given _file_. + +*-o*, *--offset* _offset_:: +The data start is moved _offset_ bytes into the specified file or device. The _offset_ may be followed by the multiplicative suffixes; see above. + +*--sizelimit* _size_:: +The data end is set to no more than _size_ bytes after the data start. The _size_ may be followed by the multiplicative suffixes; see above. + +*-b*, *--sector-size* _size_:: +Set the logical sector size of the loop device in bytes (since Linux 4.14). The option may be used when creating a new loop device as well as a stand-alone command to modify sector size of the already existing loop device. + +*-c*, *--set-capacity* _loopdev_:: +Force the loop driver to reread the size of the file associated with the specified loop device. + +*-P*, *--partscan*:: +Force the kernel to scan the partition table on a newly created loop device. Note that the partition table parsing depends on sector sizes. The default is sector size is 512 bytes, otherwise you need to use the option *--sector-size* together with *--partscan*. + +*-r*, *--read-only*:: +Set up a read-only loop device. + +*--direct-io*[**=on**|*off*]:: +Enable or disable direct I/O for the backing file. The optional argument can be either *on* or *off*. If the optional argument is omitted, it defaults to *on*. + +*-v*, *--verbose*:: +Verbose mode. + +*-l*, *--list*:: +If a loop device or the *-a* option is specified, print the default columns for either the specified loop device or all loop devices; the default is to print info about all devices. See also *--output*, *--noheadings*, *--raw*, and *--json*. + +*-O*, *--output* _column_[,_column_]...:: +Specify the columns that are to be printed for the *--list* output. Use *--help* to get a list of all supported columns. + +*--output-all*:: +Output all available columns. + +*-n*, *--noheadings*:: +Don't print headings for *--list* output format. + +*--raw*:: +Use the raw *--list* output format. + +*-J*, *--json*:: +Use JSON format for *--list* output. + +== ENCRYPTION + +*Cryptoloop is no longer supported in favor of dm-crypt.* For more details see *cryptsetup*(8). + +== EXIT STATUS + +*losetup* returns 0 on success, nonzero on failure. When *losetup* displays the status of a loop device, it returns 1 if the device is not configured and 2 if an error occurred which prevented determining the status of the device. + +== NOTES + +Since version 2.37 *losetup* uses *LOOP_CONFIGURE* ioctl to setup a new loop device by one ioctl call. The old versions use *LOOP_SET_FD* and *LOOP_SET_STATUS64* ioctls to do the same. + +== ENVIRONMENT + +*LOOPDEV_DEBUG*=all:: +enables debug output. + +== FILES + +_/dev/loop[0..N]_:: +loop block devices + +_/dev/loop-control_:: +loop control device + +== EXAMPLE + +The following commands can be used as an example of using the loop device. + + # dd if=/dev/zero of=~/file.img bs=1024k count=10 + # losetup --find --show ~/file.img + /dev/loop0 + # mkfs -t ext2 /dev/loop0 + # mount /dev/loop0 /mnt + ... + # umount /dev/loop0 + # losetup --detach /dev/loop0 + +== AUTHORS + +mailto:kzak@redhat.com[Karel Zak], based on the original version from mailto:tytso@athena.mit.edu[Theodore Ts'o]. + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/losetup.c b/sys-utils/losetup.c new file mode 100644 index 0000000..29bb785 --- /dev/null +++ b/sys-utils/losetup.c @@ -0,0 +1,926 @@ +/* + * Copyright (C) 2011 Karel Zak <kzak@redhat.com> + * Originally from Ted's losetup.c + * + * losetup.c - setup and control loop devices + */ +#include <assert.h> +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <inttypes.h> +#include <getopt.h> + +#include <libsmartcols.h> + +#include "c.h" +#include "nls.h" +#include "strutils.h" +#include "loopdev.h" +#include "closestream.h" +#include "optutils.h" +#include "xalloc.h" +#include "canonicalize.h" +#include "pathnames.h" + +enum { + A_CREATE = 1, /* setup a new device */ + A_DELETE, /* delete given device(s) */ + A_DELETE_ALL, /* delete all devices */ + A_SHOW, /* list devices */ + A_SHOW_ONE, /* print info about one device */ + A_FIND_FREE, /* find first unused */ + A_SET_CAPACITY, /* set device capacity */ + A_SET_DIRECT_IO, /* set accessing backing file by direct io */ + A_SET_BLOCKSIZE, /* set logical block size of the loop device */ +}; + +enum { + COL_NAME = 0, + COL_AUTOCLR, + COL_BACK_FILE, + COL_BACK_INO, + COL_BACK_MAJMIN, + COL_MAJMIN, + COL_OFFSET, + COL_PARTSCAN, + COL_RO, + COL_SIZELIMIT, + COL_DIO, + COL_LOGSEC, +}; + +/* basic output flags */ +static int no_headings; +static int raw; +static int json; + +struct colinfo { + const char *name; + double whint; + int flags; + const char *help; + + int json_type; /* default is string */ +}; + +static struct colinfo infos[] = { + [COL_AUTOCLR] = { "AUTOCLEAR", 1, SCOLS_FL_RIGHT, N_("autoclear flag set"), SCOLS_JSON_BOOLEAN}, + [COL_BACK_FILE] = { "BACK-FILE", 0.3, 0, N_("device backing file")}, + [COL_BACK_INO] = { "BACK-INO", 4, SCOLS_FL_RIGHT, N_("backing file inode number"), SCOLS_JSON_NUMBER}, + [COL_BACK_MAJMIN] = { "BACK-MAJ:MIN", 6, 0, N_("backing file major:minor device number")}, + [COL_NAME] = { "NAME", 0.25, 0, N_("loop device name")}, + [COL_OFFSET] = { "OFFSET", 5, SCOLS_FL_RIGHT, N_("offset from the beginning"), SCOLS_JSON_NUMBER}, + [COL_PARTSCAN] = { "PARTSCAN", 1, SCOLS_FL_RIGHT, N_("partscan flag set"), SCOLS_JSON_BOOLEAN}, + [COL_RO] = { "RO", 1, SCOLS_FL_RIGHT, N_("read-only device"), SCOLS_JSON_BOOLEAN}, + [COL_SIZELIMIT] = { "SIZELIMIT", 5, SCOLS_FL_RIGHT, N_("size limit of the file in bytes"), SCOLS_JSON_NUMBER}, + [COL_MAJMIN] = { "MAJ:MIN", 3, 0, N_("loop device major:minor number")}, + [COL_DIO] = { "DIO", 1, SCOLS_FL_RIGHT, N_("access backing file with direct-io"), SCOLS_JSON_BOOLEAN}, + [COL_LOGSEC] = { "LOG-SEC", 4, SCOLS_FL_RIGHT, N_("logical sector size in bytes"), SCOLS_JSON_NUMBER}, +}; + +static int columns[ARRAY_SIZE(infos) * 2] = {-1}; +static size_t ncolumns; + +static int get_column_id(int num) +{ + assert(num >= 0); + assert((size_t) num < ncolumns); + assert(columns[num] < (int) ARRAY_SIZE(infos)); + return columns[num]; +} + +static struct colinfo *get_column_info(int num) +{ + return &infos[ get_column_id(num) ]; +} + +static int column_name_to_id(const char *name, size_t namesz) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(infos); i++) { + const char *cn = infos[i].name; + + if (!strncasecmp(name, cn, namesz) && !*(cn + namesz)) + return i; + } + warnx(_("unknown column: %s"), name); + return -1; +} + +static int printf_loopdev(struct loopdev_cxt *lc) +{ + uint64_t x; + dev_t dev = 0; + ino_t ino = 0; + char *fname; + uint32_t type; + + fname = loopcxt_get_backing_file(lc); + if (!fname) + return -EINVAL; + + if (loopcxt_get_backing_devno(lc, &dev) == 0) + loopcxt_get_backing_inode(lc, &ino); + + if (!dev && !ino) { + /* + * Probably non-root user (no permissions to + * call LOOP_GET_STATUS ioctls). + */ + printf("%s: []: (%s)", + loopcxt_get_device(lc), fname); + + if (loopcxt_get_offset(lc, &x) == 0 && x) + printf(_(", offset %ju"), x); + + if (loopcxt_get_sizelimit(lc, &x) == 0 && x) + printf(_(", sizelimit %ju"), x); + goto done; + } + + printf("%s: [%04jd]:%ju (%s)", + loopcxt_get_device(lc), (intmax_t) dev, (uintmax_t) ino, fname); + + if (loopcxt_get_offset(lc, &x) == 0 && x) + printf(_(", offset %ju"), x); + + if (loopcxt_get_sizelimit(lc, &x) == 0 && x) + printf(_(", sizelimit %ju"), x); + + if (loopcxt_get_encrypt_type(lc, &type) == 0) { + const char *e = loopcxt_get_crypt_name(lc); + + if ((!e || !*e) && type == 1) + e = "XOR"; + if (e && *e) + printf(_(", encryption %s (type %u)"), e, type); + } + +done: + free(fname); + printf("\n"); + return 0; +} + +static int show_all_loops(struct loopdev_cxt *lc, const char *file, + uint64_t offset, int flags) +{ + struct stat sbuf, *st = &sbuf; + char *cn_file = NULL; + + if (loopcxt_init_iterator(lc, LOOPITER_FL_USED)) + return -1; + + if (!file || stat(file, st)) + st = NULL; + + while (loopcxt_next(lc) == 0) { + if (file) { + int used; + const char *bf = cn_file ? cn_file : file; + + used = loopcxt_is_used(lc, st, bf, offset, 0, flags); + if (!used && !cn_file) { + bf = cn_file = canonicalize_path(file); + used = loopcxt_is_used(lc, st, bf, offset, 0, flags); + } + if (!used) + continue; + } + printf_loopdev(lc); + } + loopcxt_deinit_iterator(lc); + free(cn_file); + return 0; +} + +static int delete_loop(struct loopdev_cxt *lc) +{ + if (loopcxt_delete_device(lc)) + warn(_("%s: detach failed"), loopcxt_get_device(lc)); + else + return 0; + + return -1; +} + +static int delete_all_loops(struct loopdev_cxt *lc) +{ + int res = 0; + + if (loopcxt_init_iterator(lc, LOOPITER_FL_USED)) + return -1; + + while (loopcxt_next(lc) == 0) + res += delete_loop(lc); + + loopcxt_deinit_iterator(lc); + return res; +} + +static int set_scols_data(struct loopdev_cxt *lc, struct libscols_line *ln) +{ + size_t i; + + for (i = 0; i < ncolumns; i++) { + const char *p = NULL; /* external data */ + char *np = NULL; /* allocated here */ + uint64_t x = 0; + int rc = 0; + + switch(get_column_id(i)) { + case COL_NAME: + p = loopcxt_get_device(lc); + break; + case COL_BACK_FILE: + np = loopcxt_get_backing_file(lc); + break; + case COL_OFFSET: + if (loopcxt_get_offset(lc, &x) == 0) + xasprintf(&np, "%jd", x); + break; + case COL_SIZELIMIT: + if (loopcxt_get_sizelimit(lc, &x) == 0) + xasprintf(&np, "%jd", x); + break; + case COL_BACK_MAJMIN: + { + dev_t dev = 0; + if (loopcxt_get_backing_devno(lc, &dev) == 0 && dev) + xasprintf(&np, "%8u:%-3u", major(dev), minor(dev)); + break; + } + case COL_MAJMIN: + { + struct stat st; + + if (loopcxt_get_device(lc) + && stat(loopcxt_get_device(lc), &st) == 0 + && S_ISBLK(st.st_mode) + && major(st.st_rdev) == LOOPDEV_MAJOR) + xasprintf(&np, "%3u:%-3u", major(st.st_rdev), + minor(st.st_rdev)); + break; + } + case COL_BACK_INO: + { + ino_t ino = 0; + if (loopcxt_get_backing_inode(lc, &ino) == 0 && ino) + xasprintf(&np, "%ju", ino); + break; + } + case COL_AUTOCLR: + p = loopcxt_is_autoclear(lc) ? "1" : "0"; + break; + case COL_RO: + p = loopcxt_is_readonly(lc) ? "1" : "0"; + break; + case COL_DIO: + p = loopcxt_is_dio(lc) ? "1" : "0"; + break; + case COL_PARTSCAN: + p = loopcxt_is_partscan(lc) ? "1" : "0"; + break; + case COL_LOGSEC: + if (loopcxt_get_blocksize(lc, &x) == 0) + xasprintf(&np, "%jd", x); + break; + default: + return -EINVAL; + } + + + if (p) + rc = scols_line_set_data(ln, i, p); /* calls strdup() */ + else if (np) + rc = scols_line_refer_data(ln, i, np); /* only refers */ + + if (rc) + err(EXIT_FAILURE, _("failed to add output data")); + } + + return 0; +} + +static int show_table(struct loopdev_cxt *lc, + const char *file, + uint64_t offset, + int flags) +{ + struct stat sbuf, *st = &sbuf; + struct libscols_table *tb; + struct libscols_line *ln; + int rc = 0; + size_t i; + + scols_init_debug(0); + + if (!(tb = scols_new_table())) + err(EXIT_FAILURE, _("failed to allocate output table")); + scols_table_enable_raw(tb, raw); + scols_table_enable_json(tb, json); + scols_table_enable_noheadings(tb, no_headings); + + if (json) + scols_table_set_name(tb, "loopdevices"); + + for (i = 0; i < ncolumns; i++) { + struct colinfo *ci = get_column_info(i); + struct libscols_column *cl; + + cl = scols_table_new_column(tb, ci->name, ci->whint, ci->flags); + if (!cl) + err(EXIT_FAILURE, _("failed to allocate output column")); + if (json) + scols_column_set_json_type(cl, ci->json_type); + } + + /* only one loopdev requested (already assigned to loopdev_cxt) */ + if (loopcxt_get_device(lc)) { + ln = scols_table_new_line(tb, NULL); + if (!ln) + err(EXIT_FAILURE, _("failed to allocate output line")); + rc = set_scols_data(lc, ln); + + /* list all loopdevs */ + } else { + char *cn_file = NULL; + + rc = loopcxt_init_iterator(lc, LOOPITER_FL_USED); + if (rc) + goto done; + if (!file || stat(file, st)) + st = NULL; + + while (loopcxt_next(lc) == 0) { + if (file) { + int used; + const char *bf = cn_file ? cn_file : file; + + used = loopcxt_is_used(lc, st, bf, offset, 0, flags); + if (!used && !cn_file) { + bf = cn_file = canonicalize_path(file); + used = loopcxt_is_used(lc, st, bf, offset, 0, flags); + } + if (!used) + continue; + } + + ln = scols_table_new_line(tb, NULL); + if (!ln) + err(EXIT_FAILURE, _("failed to allocate output line")); + rc = set_scols_data(lc, ln); + if (rc) + break; + } + + loopcxt_deinit_iterator(lc); + free(cn_file); + } +done: + if (rc == 0) + rc = scols_print_table(tb); + scols_unref_table(tb); + return rc; +} + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + size_t i; + + fputs(USAGE_HEADER, out); + + fprintf(out, + _(" %1$s [options] [<loopdev>]\n" + " %1$s [options] -f | <loopdev> <file>\n"), + program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Set up and control loop devices.\n"), out); + + /* commands */ + fputs(USAGE_OPTIONS, out); + fputs(_(" -a, --all list all used devices\n"), out); + fputs(_(" -d, --detach <loopdev>... detach one or more devices\n"), out); + fputs(_(" -D, --detach-all detach all used devices\n"), out); + fputs(_(" -f, --find find first unused device\n"), out); + fputs(_(" -c, --set-capacity <loopdev> resize the device\n"), out); + fputs(_(" -j, --associated <file> list all devices associated with <file>\n"), out); + fputs(_(" -L, --nooverlap avoid possible conflict between devices\n"), out); + + /* commands options */ + fputs(USAGE_SEPARATOR, out); + fputs(_(" -o, --offset <num> start at offset <num> into file\n"), out); + fputs(_(" --sizelimit <num> device is limited to <num> bytes of the file\n"), out); + fputs(_(" -b, --sector-size <num> set the logical sector size to <num>\n"), out); + fputs(_(" -P, --partscan create a partitioned loop device\n"), out); + fputs(_(" -r, --read-only set up a read-only loop device\n"), out); + fputs(_(" --direct-io[=<on|off>] open backing file with O_DIRECT\n"), out); + fputs(_(" --show print device name after setup (with -f)\n"), out); + fputs(_(" -v, --verbose verbose mode\n"), out); + + /* output options */ + fputs(USAGE_SEPARATOR, out); + fputs(_(" -J, --json use JSON --list output format\n"), out); + fputs(_(" -l, --list list info about all or specified (default)\n"), out); + fputs(_(" -n, --noheadings don't print headings for --list output\n"), out); + fputs(_(" -O, --output <cols> specify columns to output for --list\n"), out); + fputs(_(" --output-all output all columns\n"), out); + fputs(_(" --raw use raw --list output format\n"), out); + + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(31)); + + fputs(USAGE_COLUMNS, out); + for (i = 0; i < ARRAY_SIZE(infos); i++) + fprintf(out, " %12s %s\n", infos[i].name, _(infos[i].help)); + + printf(USAGE_MAN_TAIL("losetup(8)")); + + exit(EXIT_SUCCESS); +} + +static void warn_size(const char *filename, uint64_t size, uint64_t offset, int flags) +{ + struct stat st; + + if (!size) { + if (stat(filename, &st) || S_ISBLK(st.st_mode)) + return; + size = st.st_size; + + if (flags & LOOPDEV_FL_OFFSET) + size -= offset; + } + + if (size < 512) + warnx(_("%s: Warning: file is smaller than 512 bytes; the loop device " + "may be useless or invisible for system tools."), + filename); + else if (size % 512) + warnx(_("%s: Warning: file does not fit into a 512-byte sector; " + "the end of the file will be ignored."), + filename); +} + +static int create_loop(struct loopdev_cxt *lc, + int nooverlap, int lo_flags, int flags, + const char *file, uint64_t offset, uint64_t sizelimit, + uint64_t blocksize) +{ + int hasdev = loopcxt_has_device(lc); + int rc = 0, ntries = 0; + + /* losetup --find --noverlap file.img */ + if (!hasdev && nooverlap) { + rc = loopcxt_find_overlap(lc, file, offset, sizelimit); + switch (rc) { + case 0: /* not found */ + break; + + case 1: /* overlap */ + loopcxt_deinit(lc); + errx(EXIT_FAILURE, _("%s: overlapping loop device exists"), file); + + case 2: /* overlap -- full size and offset match (reuse) */ + { + uint32_t lc_encrypt_type; + + /* Once a loop is initialized RO, there is no + * way to change its parameters. */ + if (loopcxt_is_readonly(lc) + && !(lo_flags & LO_FLAGS_READ_ONLY)) { + loopcxt_deinit(lc); + errx(EXIT_FAILURE, _("%s: overlapping read-only loop device exists"), file); + } + + /* This is no more supported, but check to be safe. */ + if (loopcxt_get_encrypt_type(lc, &lc_encrypt_type) == 0 + && lc_encrypt_type != LO_CRYPT_NONE) { + loopcxt_deinit(lc); + errx(EXIT_FAILURE, _("%s: overlapping encrypted loop device exists"), file); + } + + lc->config.info.lo_flags &= ~LO_FLAGS_AUTOCLEAR; + if (loopcxt_ioctl_status(lc)) { + loopcxt_deinit(lc); + errx(EXIT_FAILURE, _("%s: failed to re-use loop device"), file); + } + return 0; /* success, re-use */ + } + default: /* error */ + loopcxt_deinit(lc); + errx(EXIT_FAILURE, _("failed to inspect loop devices")); + return -errno; + } + } + + if (hasdev) + loopcxt_add_device(lc); + + /* losetup --noverlap /dev/loopN file.img */ + if (hasdev && nooverlap) { + struct loopdev_cxt lc2; + + if (loopcxt_init(&lc2, 0)) { + loopcxt_deinit(lc); + err(EXIT_FAILURE, _("failed to initialize loopcxt")); + } + rc = loopcxt_find_overlap(&lc2, file, offset, sizelimit); + loopcxt_deinit(&lc2); + + if (rc) { + loopcxt_deinit(lc); + if (rc > 0) + errx(EXIT_FAILURE, _("%s: overlapping loop device exists"), file); + err(EXIT_FAILURE, _("%s: failed to check for conflicting loop devices"), file); + } + } + + /* Create a new device */ + do { + const char *errpre; + + /* Note that loopcxt_{find_unused,set_device}() resets + * loopcxt struct. + */ + if (!hasdev && (rc = loopcxt_find_unused(lc))) { + warnx(_("cannot find an unused loop device")); + break; + } + if (flags & LOOPDEV_FL_OFFSET) + loopcxt_set_offset(lc, offset); + if (flags & LOOPDEV_FL_SIZELIMIT) + loopcxt_set_sizelimit(lc, sizelimit); + if (lo_flags) + loopcxt_set_flags(lc, lo_flags); + if (blocksize > 0) + loopcxt_set_blocksize(lc, blocksize); + + if ((rc = loopcxt_set_backing_file(lc, file))) { + warn(_("%s: failed to use backing file"), file); + break; + } + errno = 0; + rc = loopcxt_setup_device(lc); + if (rc == 0) + break; /* success */ + + if ((errno == EBUSY || errno == EAGAIN) && !hasdev && ntries < 64) { + xusleep(200000); + ntries++; + continue; + } + + /* errors */ + errpre = hasdev && loopcxt_get_fd(lc) < 0 ? + loopcxt_get_device(lc) : file; + warn(_("%s: failed to set up loop device"), errpre); + break; + } while (hasdev == 0); + + return rc; +} + +int main(int argc, char **argv) +{ + struct loopdev_cxt lc; + int act = 0, flags = 0, no_overlap = 0, c; + char *file = NULL; + uint64_t offset = 0, sizelimit = 0, blocksize = 0; + int res = 0, showdev = 0, lo_flags = 0; + char *outarg = NULL; + int list = 0; + unsigned long use_dio = 0, set_dio = 0, set_blocksize = 0; + + enum { + OPT_SIZELIMIT = CHAR_MAX + 1, + OPT_SHOW, + OPT_RAW, + OPT_DIO, + OPT_OUTPUT_ALL + }; + static const struct option longopts[] = { + { "all", no_argument, NULL, 'a' }, + { "set-capacity", required_argument, NULL, 'c' }, + { "detach", required_argument, NULL, 'd' }, + { "detach-all", no_argument, NULL, 'D' }, + { "find", no_argument, NULL, 'f' }, + { "nooverlap", no_argument, NULL, 'L' }, + { "help", no_argument, NULL, 'h' }, + { "associated", required_argument, NULL, 'j' }, + { "json", no_argument, NULL, 'J' }, + { "list", no_argument, NULL, 'l' }, + { "sector-size", required_argument, NULL, 'b' }, + { "noheadings", no_argument, NULL, 'n' }, + { "offset", required_argument, NULL, 'o' }, + { "output", required_argument, NULL, 'O' }, + { "output-all", no_argument, NULL, OPT_OUTPUT_ALL }, + { "sizelimit", required_argument, NULL, OPT_SIZELIMIT }, + { "partscan", no_argument, NULL, 'P' }, + { "read-only", no_argument, NULL, 'r' }, + { "direct-io", optional_argument, NULL, OPT_DIO }, + { "raw", no_argument, NULL, OPT_RAW }, + { "show", no_argument, NULL, OPT_SHOW }, + { "verbose", no_argument, NULL, 'v' }, + { "version", no_argument, NULL, 'V' }, + { NULL, 0, NULL, 0 } + }; + + static const ul_excl_t excl[] = { /* rows and cols in ASCII order */ + { 'D','a','c','d','f','j' }, + { 'D','c','d','f','l' }, + { 'D','c','d','f','O' }, + { 'J',OPT_RAW }, + { 0 } + }; + int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + if (loopcxt_init(&lc, 0)) + err(EXIT_FAILURE, _("failed to initialize loopcxt")); + + while ((c = getopt_long(argc, argv, "ab:c:d:Dfhj:JlLno:O:PrvV", + longopts, NULL)) != -1) { + + err_exclusive_options(c, longopts, excl, excl_st); + + switch (c) { + case 'a': + act = A_SHOW; + break; + case 'b': + set_blocksize = 1; + blocksize = strtosize_or_err(optarg, _("failed to parse logical block size")); + break; + case 'c': + act = A_SET_CAPACITY; + if (!is_loopdev(optarg) || + loopcxt_set_device(&lc, optarg)) + err(EXIT_FAILURE, _("%s: failed to use device"), + optarg); + break; + case 'r': + lo_flags |= LO_FLAGS_READ_ONLY; + break; + case 'd': + act = A_DELETE; + if (!is_loopdev(optarg) || + loopcxt_set_device(&lc, optarg)) + err(EXIT_FAILURE, _("%s: failed to use device"), + optarg); + break; + case 'D': + act = A_DELETE_ALL; + break; + case 'f': + act = A_FIND_FREE; + break; + case 'J': + json = 1; + break; + case 'j': + act = A_SHOW; + file = optarg; + break; + case 'l': + list = 1; + break; + case 'L': + no_overlap = 1; + break; + case 'n': + no_headings = 1; + break; + case OPT_RAW: + raw = 1; + break; + case 'o': + offset = strtosize_or_err(optarg, _("failed to parse offset")); + flags |= LOOPDEV_FL_OFFSET; + break; + case 'O': + outarg = optarg; + list = 1; + break; + case OPT_OUTPUT_ALL: + for (ncolumns = 0; ncolumns < ARRAY_SIZE(infos); ncolumns++) + columns[ncolumns] = ncolumns; + break; + case 'P': + lo_flags |= LO_FLAGS_PARTSCAN; + break; + case OPT_SHOW: + showdev = 1; + break; + case OPT_DIO: + use_dio = set_dio = 1; + if (optarg) + use_dio = parse_switch(optarg, _("argument error"), "on", "off", NULL); + if (use_dio) + lo_flags |= LO_FLAGS_DIRECT_IO; + break; + case 'v': + break; + case OPT_SIZELIMIT: /* --sizelimit */ + sizelimit = strtosize_or_err(optarg, _("failed to parse size")); + flags |= LOOPDEV_FL_SIZELIMIT; + break; + + case 'h': + usage(); + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + } + + ul_path_init_debug(); + ul_sysfs_init_debug(); + + /* default is --list --all */ + if (argc == 1) { + act = A_SHOW; + list = 1; + } + + if (!act && argc == 2 && (raw || json)) { + act = A_SHOW; + list = 1; + } + + /* default --list output columns */ + if (list && !ncolumns) { + columns[ncolumns++] = COL_NAME; + columns[ncolumns++] = COL_SIZELIMIT; + columns[ncolumns++] = COL_OFFSET; + columns[ncolumns++] = COL_AUTOCLR; + columns[ncolumns++] = COL_RO; + columns[ncolumns++] = COL_BACK_FILE; + columns[ncolumns++] = COL_DIO; + columns[ncolumns++] = COL_LOGSEC; + } + + if (act == A_FIND_FREE && optind < argc) { + /* + * losetup -f <backing_file> + */ + act = A_CREATE; + file = argv[optind++]; + + if (optind < argc) + errx(EXIT_FAILURE, _("unexpected arguments")); + } + + if (list && !act && optind == argc) + /* + * losetup --list defaults to --all + */ + act = A_SHOW; + + if (!act && optind + 1 == argc) { + /* + * losetup [--list] <device> + * OR + * losetup {--direct-io[=off]|--logical-blocksize=size}... <device> + */ + if (set_dio) { + act = A_SET_DIRECT_IO; + lo_flags &= ~LO_FLAGS_DIRECT_IO; + } else if (set_blocksize) + act = A_SET_BLOCKSIZE; + else + act = A_SHOW_ONE; + + if (!is_loopdev(argv[optind]) || + loopcxt_set_device(&lc, argv[optind])) + err(EXIT_FAILURE, _("%s: failed to use device"), + argv[optind]); + optind++; + } + if (!act) { + /* + * losetup <loopdev> <backing_file> + */ + act = A_CREATE; + + if (optind >= argc) + errx(EXIT_FAILURE, _("no loop device specified")); + /* don't use is_loopdev() here, the device does not have exist yet */ + if (loopcxt_set_device(&lc, argv[optind])) + err(EXIT_FAILURE, _("%s: failed to use device"), + argv[optind]); + optind++; + + if (optind >= argc) + errx(EXIT_FAILURE, _("no file specified")); + file = argv[optind++]; + } + + if (act != A_CREATE && + (sizelimit || lo_flags || showdev)) + errx(EXIT_FAILURE, + _("the options %s are allowed during loop device setup only"), + "--{sizelimit,partscan,read-only,show}"); + + if ((flags & LOOPDEV_FL_OFFSET) && + act != A_CREATE && (act != A_SHOW || !file)) + errx(EXIT_FAILURE, _("the option --offset is not allowed in this context")); + + if (outarg && string_add_to_idarray(outarg, columns, ARRAY_SIZE(columns), + &ncolumns, column_name_to_id) < 0) + return EXIT_FAILURE; + + switch (act) { + case A_CREATE: + res = create_loop(&lc, no_overlap, lo_flags, flags, file, + offset, sizelimit, blocksize); + if (res == 0) { + if (showdev) + printf("%s\n", loopcxt_get_device(&lc)); + warn_size(file, sizelimit, offset, flags); + } + break; + case A_DELETE: + res = delete_loop(&lc); + while (optind < argc) { + if (!is_loopdev(argv[optind]) || + loopcxt_set_device(&lc, argv[optind])) + warn(_("%s: failed to use device"), + argv[optind]); + optind++; + res += delete_loop(&lc); + } + break; + case A_DELETE_ALL: + res = delete_all_loops(&lc); + break; + case A_FIND_FREE: + res = loopcxt_find_unused(&lc); + if (res) { + int errsv = errno; + + if (access(_PATH_DEV_LOOPCTL, F_OK) == 0 && + access(_PATH_DEV_LOOPCTL, W_OK) != 0) + ; + else + errno = errsv; + + warn(_("cannot find an unused loop device")); + } else + printf("%s\n", loopcxt_get_device(&lc)); + break; + case A_SHOW: + if (list) + res = show_table(&lc, file, offset, flags); + else + res = show_all_loops(&lc, file, offset, flags); + break; + case A_SHOW_ONE: + if (list) + res = show_table(&lc, NULL, 0, 0); + else + res = printf_loopdev(&lc); + if (res) + warn("%s", loopcxt_get_device(&lc)); + break; + case A_SET_CAPACITY: + res = loopcxt_ioctl_capacity(&lc); + if (res) + warn(_("%s: set capacity failed"), + loopcxt_get_device(&lc)); + break; + case A_SET_DIRECT_IO: + res = loopcxt_ioctl_dio(&lc, use_dio); + if (res) + warn(_("%s: set direct io failed"), + loopcxt_get_device(&lc)); + break; + case A_SET_BLOCKSIZE: + res = loopcxt_ioctl_blocksize(&lc, blocksize); + if (res) + warn(_("%s: set logical block size failed"), + loopcxt_get_device(&lc)); + break; + default: + warnx(_("bad usage")); + errtryhelp(EXIT_FAILURE); + break; + } + + loopcxt_deinit(&lc); + return res ? EXIT_FAILURE : EXIT_SUCCESS; +} + diff --git a/sys-utils/lscpu-arm.c b/sys-utils/lscpu-arm.c new file mode 100644 index 0000000..86e5ea7 --- /dev/null +++ b/sys-utils/lscpu-arm.c @@ -0,0 +1,371 @@ +/* + * lscpu-arm.c - ARM CPU identification tables + * + * Copyright (C) 2018 Riku Voipio <riku.voipio@iki.fi> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * The information here is gathered from + * - ARM manuals + * - Linux kernel: arch/armX/include/asm/cputype.h + * - GCC sources: config/arch/arch-cores.def + * - Ancient wisdom + * - SMBIOS tables (if applicable) + */ +#include "lscpu.h" + +struct id_part { + const int id; + const char* name; +}; + +static const struct id_part arm_part[] = { + { 0x810, "ARM810" }, + { 0x920, "ARM920" }, + { 0x922, "ARM922" }, + { 0x926, "ARM926" }, + { 0x940, "ARM940" }, + { 0x946, "ARM946" }, + { 0x966, "ARM966" }, + { 0xa20, "ARM1020" }, + { 0xa22, "ARM1022" }, + { 0xa26, "ARM1026" }, + { 0xb02, "ARM11 MPCore" }, + { 0xb36, "ARM1136" }, + { 0xb56, "ARM1156" }, + { 0xb76, "ARM1176" }, + { 0xc05, "Cortex-A5" }, + { 0xc07, "Cortex-A7" }, + { 0xc08, "Cortex-A8" }, + { 0xc09, "Cortex-A9" }, + { 0xc0d, "Cortex-A17" }, /* Originally A12 */ + { 0xc0f, "Cortex-A15" }, + { 0xc0e, "Cortex-A17" }, + { 0xc14, "Cortex-R4" }, + { 0xc15, "Cortex-R5" }, + { 0xc17, "Cortex-R7" }, + { 0xc18, "Cortex-R8" }, + { 0xc20, "Cortex-M0" }, + { 0xc21, "Cortex-M1" }, + { 0xc23, "Cortex-M3" }, + { 0xc24, "Cortex-M4" }, + { 0xc27, "Cortex-M7" }, + { 0xc60, "Cortex-M0+" }, + { 0xd01, "Cortex-A32" }, + { 0xd03, "Cortex-A53" }, + { 0xd04, "Cortex-A35" }, + { 0xd05, "Cortex-A55" }, + { 0xd06, "Cortex-A65" }, + { 0xd07, "Cortex-A57" }, + { 0xd08, "Cortex-A72" }, + { 0xd09, "Cortex-A73" }, + { 0xd0a, "Cortex-A75" }, + { 0xd0b, "Cortex-A76" }, + { 0xd0c, "Neoverse-N1" }, + { 0xd0d, "Cortex-A77" }, + { 0xd0e, "Cortex-A76AE" }, + { 0xd13, "Cortex-R52" }, + { 0xd20, "Cortex-M23" }, + { 0xd21, "Cortex-M33" }, + { 0xd40, "Neoverse-V1" }, + { 0xd41, "Cortex-A78" }, + { 0xd42, "Cortex-A78AE" }, + { 0xd44, "Cortex-X1" }, + { 0xd46, "Cortex-510" }, + { 0xd47, "Cortex-710" }, + { 0xd48, "Cortex-X2" }, + { 0xd49, "Neoverse-N2" }, + { 0xd4a, "Neoverse-E1" }, + { 0xd4b, "Cortex-A78C" }, + { -1, "unknown" }, +}; + +static const struct id_part brcm_part[] = { + { 0x0f, "Brahma B15" }, + { 0x100, "Brahma B53" }, + { 0x516, "ThunderX2" }, + { -1, "unknown" }, +}; + +static const struct id_part dec_part[] = { + { 0xa10, "SA110" }, + { 0xa11, "SA1100" }, + { -1, "unknown" }, +}; + +static const struct id_part cavium_part[] = { + { 0x0a0, "ThunderX" }, + { 0x0a1, "ThunderX 88XX" }, + { 0x0a2, "ThunderX 81XX" }, + { 0x0a3, "ThunderX 83XX" }, + { 0x0af, "ThunderX2 99xx" }, + { -1, "unknown" }, +}; + +static const struct id_part apm_part[] = { + { 0x000, "X-Gene" }, + { -1, "unknown" }, +}; + +static const struct id_part qcom_part[] = { + { 0x00f, "Scorpion" }, + { 0x02d, "Scorpion" }, + { 0x04d, "Krait" }, + { 0x06f, "Krait" }, + { 0x201, "Kryo" }, + { 0x205, "Kryo" }, + { 0x211, "Kryo" }, + { 0x800, "Falkor V1/Kryo" }, + { 0x801, "Kryo V2" }, + { 0xc00, "Falkor" }, + { 0xc01, "Saphira" }, + { -1, "unknown" }, +}; + +static const struct id_part samsung_part[] = { + { 0x001, "exynos-m1" }, + { -1, "unknown" }, +}; + +static const struct id_part nvidia_part[] = { + { 0x000, "Denver" }, + { 0x003, "Denver 2" }, + { 0x004, "Carmel" }, + { -1, "unknown" }, +}; + +static const struct id_part marvell_part[] = { + { 0x131, "Feroceon 88FR131" }, + { 0x581, "PJ4/PJ4b" }, + { 0x584, "PJ4B-MP" }, + { -1, "unknown" }, +}; + +static const struct id_part apple_part[] = { + { 0x022, "Icestorm" }, + { 0x023, "Firestorm" }, + { -1, "unknown" }, +}; + +static const struct id_part faraday_part[] = { + { 0x526, "FA526" }, + { 0x626, "FA626" }, + { -1, "unknown" }, +}; + +static const struct id_part intel_part[] = { + { 0x200, "i80200" }, + { 0x210, "PXA250A" }, + { 0x212, "PXA210A" }, + { 0x242, "i80321-400" }, + { 0x243, "i80321-600" }, + { 0x290, "PXA250B/PXA26x" }, + { 0x292, "PXA210B" }, + { 0x2c2, "i80321-400-B0" }, + { 0x2c3, "i80321-600-B0" }, + { 0x2d0, "PXA250C/PXA255/PXA26x" }, + { 0x2d2, "PXA210C" }, + { 0x411, "PXA27x" }, + { 0x41c, "IPX425-533" }, + { 0x41d, "IPX425-400" }, + { 0x41f, "IPX425-266" }, + { 0x682, "PXA32x" }, + { 0x683, "PXA930/PXA935" }, + { 0x688, "PXA30x" }, + { 0x689, "PXA31x" }, + { 0xb11, "SA1110" }, + { 0xc12, "IPX1200" }, + { -1, "unknown" }, +}; + +static const struct id_part fujitsu_part[] = { + { 0x001, "A64FX" }, + { -1, "unknown" }, +}; + +static const struct id_part hisi_part[] = { + { 0xd01, "Kunpeng-920" }, /* aka tsv110 */ + { -1, "unknown" }, +}; + +static const struct id_part ft_part[] = { + { 0x660, "FTC660" }, + { 0x661, "FTC661" }, + { 0x662, "FTC662" }, + { 0x663, "FTC663" }, + { -1, "unknown" }, +}; + +static const struct id_part unknown_part[] = { + { -1, "unknown" }, +}; + +struct hw_impl { + const int id; + const struct id_part *parts; + const char *name; +}; + +static const struct hw_impl hw_implementer[] = { + { 0x41, arm_part, "ARM" }, + { 0x42, brcm_part, "Broadcom" }, + { 0x43, cavium_part, "Cavium" }, + { 0x44, dec_part, "DEC" }, + { 0x46, fujitsu_part, "FUJITSU" }, + { 0x48, hisi_part, "HiSilicon" }, + { 0x49, unknown_part, "Infineon" }, + { 0x4d, unknown_part, "Motorola/Freescale" }, + { 0x4e, nvidia_part, "NVIDIA" }, + { 0x50, apm_part, "APM" }, + { 0x51, qcom_part, "Qualcomm" }, + { 0x53, samsung_part, "Samsung" }, + { 0x56, marvell_part, "Marvell" }, + { 0x61, apple_part, "Apple" }, + { 0x66, faraday_part, "Faraday" }, + { 0x69, intel_part, "Intel" }, + { 0x70, ft_part, "Phytium" }, + { 0xc0, unknown_part, "Ampere" }, + { -1, unknown_part, "unknown" }, +}; + +static int parse_id(const char *str) +{ + int id; + char *end = NULL; + + if (!str || strncmp(str, "0x",2) != 0) + return -EINVAL; + + errno = 0; + id = (int) strtol(str, &end, 0); + if (errno || str == end) + return -EINVAL; + + return id; +} + +#define parse_model_id(_cxt) (parse_id((_cxt)->model)) + +static inline int parse_implementer_id(struct lscpu_cputype *ct) +{ + if (ct->vendor_id) + return ct->vendor_id; + ct->vendor_id = parse_id(ct->vendor); + return ct->vendor_id; +} + +/* + * Use model and vendor IDs to decode to human readable names. + */ +static int arm_ids_decode(struct lscpu_cputype *ct) +{ + int impl, part, j; + const struct id_part *parts = NULL; + + impl = parse_implementer_id(ct); + if (impl <= 0) + return -EINVAL; /* no ARM or missing ID */ + + /* decode vendor */ + for (j = 0; hw_implementer[j].id != -1; j++) { + if (hw_implementer[j].id == impl) { + parts = hw_implementer[j].parts; + free(ct->vendor); + ct->vendor = xstrdup(hw_implementer[j].name); + break; + } + } + + /* decode model */ + if (!parts) + goto done; + + part = parse_model_id(ct); + if (part <= 0) + goto done; + + for (j = 0; parts[j].id != -1; j++) { + if (parts[j].id == part) { + free(ct->modelname); + ct->modelname = xstrdup(parts[j].name); + break; + } + } +done: + return 0; +} + +/* use "rXpY" string as stepping */ +static int arm_rXpY_decode(struct lscpu_cputype *ct) +{ + int impl, revision, variant; + char *end = NULL; + char buf[8]; + + impl = parse_implementer_id(ct); + + if (impl != 0x41 || !ct->revision || !ct->stepping) + return -EINVAL; + + errno = 0; + revision = (int) strtol(ct->revision, &end, 10); + if (errno || ct->revision == end) + return -EINVAL; + + errno = 0; + variant = (int) strtol(ct->stepping, &end, 0); + if (errno || ct->stepping == end) + return -EINVAL; + + snprintf(buf, sizeof(buf), "r%dp%d", variant, revision); + free(ct->stepping); + ct->stepping = xstrdup(buf); + + return 0; +} + +static void arm_decode(struct lscpu_cxt *cxt, struct lscpu_cputype *ct) +{ + if (!cxt->noalive && access(_PATH_SYS_DMI, R_OK) == 0) + dmi_decode_cputype(ct); + + arm_ids_decode(ct); + arm_rXpY_decode(ct); + + if (!cxt->noalive && cxt->is_cluster) + ct->nr_socket_on_cluster = get_number_of_physical_sockets_from_dmi(); +} + +static int is_cluster_arm(struct lscpu_cxt *cxt) +{ + struct stat st; + + if (!cxt->noalive + && strcmp(cxt->arch->name, "aarch64") == 0 + && stat(_PATH_ACPI_PPTT, &st) < 0 && cxt->ncputypes == 1) + return 1; + else + return 0; +} + +void lscpu_decode_arm(struct lscpu_cxt *cxt) +{ + size_t i; + + cxt->is_cluster = is_cluster_arm(cxt); + + for (i = 0; i < cxt->ncputypes; i++) + arm_decode(cxt, cxt->cputypes[i]); +} diff --git a/sys-utils/lscpu-cpu.c b/sys-utils/lscpu-cpu.c new file mode 100644 index 0000000..6250cf7 --- /dev/null +++ b/sys-utils/lscpu-cpu.c @@ -0,0 +1,90 @@ +#include "lscpu.h" + +struct lscpu_cpu *lscpu_new_cpu(int id) +{ + struct lscpu_cpu *cpu; + + cpu = xcalloc(1, sizeof(struct lscpu_cpu)); + cpu->refcount = 1; + cpu->logical_id = id; + cpu->coreid = -1; + cpu->socketid = -1; + cpu->bookid = -1; + cpu->bookid = -1; + cpu->address = -1; + cpu->configured = -1; + + DBG(CPU, ul_debugobj(cpu, "alloc")); + return cpu; +} + +void lscpu_ref_cpu(struct lscpu_cpu *cpu) +{ + if (cpu) + cpu->refcount++; +} + +void lscpu_unref_cpu(struct lscpu_cpu *cpu) +{ + if (!cpu) + return; + + if (--cpu->refcount <= 0) { + DBG(CPU, ul_debugobj(cpu, " freeing #%d", cpu->logical_id)); + lscpu_unref_cputype(cpu->type); + cpu->type = NULL; + free(cpu->dynamic_mhz); + free(cpu->static_mhz); + free(cpu->mhz); + free(cpu->bogomips); + free(cpu); + } +} + +/* + * Create and initialize array with CPU structs according to @cpuset. + */ +int lscpu_create_cpus(struct lscpu_cxt *cxt, cpu_set_t *cpuset, size_t setsize) +{ + size_t n, i; + + assert(!cxt->cpus); + + cxt->npossibles = CPU_COUNT_S(setsize, cpuset); + cxt->cpus = xcalloc(1, cxt->npossibles * sizeof(struct lscpu_cpu *)); + + for (n = 0, i = 0; n < (size_t) cxt->maxcpus && i < cxt->npossibles; n++) { + if (CPU_ISSET_S(n, setsize, cpuset)) + cxt->cpus[i++] = lscpu_new_cpu(n); + } + + return 0; +} + +int lscpu_cpu_set_type(struct lscpu_cpu *cpu, struct lscpu_cputype *type) +{ + if (cpu->type == type) + return 0; + + lscpu_unref_cputype(cpu->type); + cpu->type = type; + lscpu_ref_cputype(type); + + DBG(CPU, ul_debugobj(cpu, "cputype set to %s", type ? type->vendor : NULL)); + return 0; +} + +/* don't forget lscpu_ref_cpu() ! */ +struct lscpu_cpu *lscpu_get_cpu(struct lscpu_cxt *cxt, int logical_id) +{ + size_t i; + + for (i = 0; i < cxt->npossibles; i++) { + struct lscpu_cpu *cpu = cxt->cpus[i]; + + if (cpu && cpu->logical_id == logical_id) + return cpu; + } + + return NULL; +} diff --git a/sys-utils/lscpu-cputype.c b/sys-utils/lscpu-cputype.c new file mode 100644 index 0000000..31f4009 --- /dev/null +++ b/sys-utils/lscpu-cputype.c @@ -0,0 +1,911 @@ + +#include <sys/utsname.h> +#include <sys/personality.h> + +#if defined(HAVE_LIBRTAS) +# include <librtas.h> +#endif + +#include "lscpu.h" + +#include "fileutils.h" +#include "c_strtod.h" + +/* Lookup a pattern and get the value for format "<pattern> : <key>" + */ +int lookup(char *line, char *pattern, char **value) +{ + char *p, *v; + int len = strlen(pattern); + + /* don't re-fill already found tags, first one wins */ + if (!*line || *value) + return 0; + /* pattern */ + if (strncmp(line, pattern, len)) + return 0; + /* white spaces */ + for (p = line + len; isspace(*p); p++); + + /* separator */ + if (*p != ':') + return 0; + /* white spaces */ + for (++p; isspace(*p); p++); + + /* value */ + if (!*p) + return 0; + v = p; + + /* end of value */ + len = strlen(line) - 1; + for (p = line + len; isspace(*(p-1)); p--); + *p = '\0'; + + *value = xstrdup(v); + return 1; +} + +struct lscpu_cputype *lscpu_new_cputype(void) +{ + struct lscpu_cputype *ct; + + ct = xcalloc(1, sizeof(struct lscpu_cputype)); + ct->refcount = 1; + ct->dispatching = -1; + ct->freqboost = -1; + + DBG(TYPE, ul_debugobj(ct, "alloc")); + return ct; +} + +void lscpu_ref_cputype(struct lscpu_cputype *ct) +{ + if (ct) { + ct->refcount++; + DBG(TYPE, ul_debugobj(ct, ">>> ref %d", ct->refcount)); + } +} + +void lscpu_unref_cputype(struct lscpu_cputype *ct) +{ + if (!ct) + return; + + /*DBG(TYPE, ul_debugobj(ct, ">>> unref %d", ct->refcount - 1));*/ + + if (--ct->refcount <= 0) { + DBG(TYPE, ul_debugobj(ct, " freeing %s/%s", ct->vendor, ct->model)); + lscpu_cputype_free_topology(ct); + free(ct->vendor); + free(ct->bios_vendor); + free(ct->machinetype); /* s390 */ + free(ct->family); + free(ct->model); + free(ct->modelname); + free(ct->bios_modelname); + free(ct->bios_family); + free(ct->revision); /* alternative for model (ppc) */ + free(ct->stepping); + free(ct->bogomips); + free(ct->flags); + free(ct->mtid); /* maximum thread id (s390) */ + free(ct->addrsz); /* address sizes */ + free(ct->static_mhz); + free(ct->dynamic_mhz); + free(ct); + } +} + +struct lscpu_cputype *lscpu_cputype_get_default(struct lscpu_cxt *cxt) +{ + return cxt->cputypes ? cxt->cputypes[0] : NULL; +} + +#define match(astr, bstr) \ + ((!astr && !bstr) || (astr && bstr && strcmp(astr, bstr) == 0)) + +struct lscpu_cputype *lscpu_add_cputype(struct lscpu_cxt *cxt, struct lscpu_cputype *ct) +{ + DBG(TYPE, ul_debugobj(ct, "add new")); + cxt->cputypes = xrealloc(cxt->cputypes, (cxt->ncputypes + 1) + * sizeof(struct lscpu_cputype *)); + cxt->cputypes[cxt->ncputypes] = ct; + cxt->ncputypes++; + lscpu_ref_cputype(ct); + return ct; +} + +static void fprintf_cputypes(FILE *f, struct lscpu_cxt *cxt) +{ + size_t i; + + for (i = 0; i < cxt->ncputypes; i++) { + struct lscpu_cputype *ct = cxt->cputypes[i]; + + fprintf(f, "\n vendor: %s\n", ct->vendor); + fprintf(f, " machinetype: %s\n", ct->machinetype); + fprintf(f, " family: %s\n", ct->family); + fprintf(f, " model: %s\n", ct->model); + fprintf(f, " modelname: %s\n", ct->modelname); + fprintf(f, " revision: %s\n", ct->revision); + fprintf(f, " stepping: %s\n", ct->stepping); + fprintf(f, " mtid: %s\n", ct->mtid); + fprintf(f, " addrsz: %s\n", ct->addrsz); + } +} + +enum { + CPUINFO_LINE_UNKNOWN, /* unknown line */ + CPUINFO_LINE_CPUTYPE, /* line found in type_patterns[] */ + CPUINFO_LINE_CPU, /* line found in cpu_patterns[] */ + CPUINFO_LINE_CACHE /* line found in cache_pattern[] */ +}; + +/* Describes /proc/cpuinfo fields */ +struct cpuinfo_pattern { + int id; /* field ID */ + int domain; /* CPUINFO_LINE_* */ + const char *pattern; /* field name as used in /proc/cpuinfo */ + size_t offset; /* offset in lscpu_cputype or lscpu_cpu struct */ +}; + +/* field identifiers (field name may be different on different archs) */ +enum { + PAT_ADDRESS_SIZES, + PAT_BOGOMIPS, /* global */ + PAT_BOGOMIPS_CPU, /* per-cpu */ + PAT_CPU, + PAT_FAMILY, + PAT_FEATURES, + PAT_FLAGS, + PAT_IMPLEMENTER, + PAT_MAX_THREAD_ID, + PAT_MHZ, + PAT_MHZ_DYNAMIC, + PAT_MHZ_STATIC, + PAT_MODEL, + PAT_MODEL_NAME, + PAT_PART, + PAT_PROCESSOR, + PAT_REVISION, + PAT_STEPPING, + PAT_TYPE, + PAT_VARIANT, + PAT_VENDOR, + PAT_CACHE +}; + +/* + * /proc/cpuinfo to lscpu_cputype conversion + */ +#define DEF_PAT_CPUTYPE(_str, _id, _member) \ + { \ + .id = (_id), \ + .domain = CPUINFO_LINE_CPUTYPE, \ + .pattern = (_str), \ + .offset = offsetof(struct lscpu_cputype, _member), \ + } + +static const struct cpuinfo_pattern type_patterns[] = +{ + /* Sort by fields name! */ + DEF_PAT_CPUTYPE( "ASEs implemented", PAT_FLAGS, flags), /* mips */ + DEF_PAT_CPUTYPE( "BogoMIPS", PAT_BOGOMIPS, bogomips), /* aarch64 */ + DEF_PAT_CPUTYPE( "CPU implementer", PAT_IMPLEMENTER,vendor), /* ARM and aarch64 */ + DEF_PAT_CPUTYPE( "CPU part", PAT_PART, model), /* ARM and aarch64 */ + DEF_PAT_CPUTYPE( "CPU revision", PAT_REVISION, revision), /* aarch64 */ + DEF_PAT_CPUTYPE( "CPU variant", PAT_VARIANT, stepping), /* aarch64 */ + DEF_PAT_CPUTYPE( "Features", PAT_FEATURES, flags), /* aarch64 */ + DEF_PAT_CPUTYPE( "address sizes", PAT_ADDRESS_SIZES, addrsz),/* x86 */ + DEF_PAT_CPUTYPE( "bogomips per cpu", PAT_BOGOMIPS, bogomips), /* s390 */ + DEF_PAT_CPUTYPE( "cpu", PAT_CPU, modelname), /* ppc, sparc */ + DEF_PAT_CPUTYPE( "cpu family", PAT_FAMILY, family), + DEF_PAT_CPUTYPE( "cpu model", PAT_MODEL, model), /* mips */ + DEF_PAT_CPUTYPE( "family", PAT_FAMILY, family), + DEF_PAT_CPUTYPE( "features", PAT_FEATURES, flags), /* s390 */ + DEF_PAT_CPUTYPE( "flags", PAT_FLAGS, flags), /* x86 */ + DEF_PAT_CPUTYPE( "max thread id", PAT_MAX_THREAD_ID, mtid), /* s390 */ + DEF_PAT_CPUTYPE( "model", PAT_MODEL, model), + DEF_PAT_CPUTYPE( "model name", PAT_MODEL_NAME, modelname), + DEF_PAT_CPUTYPE( "revision", PAT_REVISION, revision), + DEF_PAT_CPUTYPE( "stepping", PAT_STEPPING, stepping), + DEF_PAT_CPUTYPE( "type", PAT_TYPE, flags), /* sparc64 */ + DEF_PAT_CPUTYPE( "vendor", PAT_VENDOR, vendor), + DEF_PAT_CPUTYPE( "vendor_id", PAT_VENDOR, vendor), /* s390 */ +}; + +/* + * /proc/cpuinfo to lscpu_cpu conversion + */ +#define DEF_PAT_CPU(_str, _id, _member) \ + { \ + .id = (_id), \ + .domain = CPUINFO_LINE_CPU, \ + .pattern = (_str), \ + .offset = offsetof(struct lscpu_cpu, _member), \ + } + +static const struct cpuinfo_pattern cpu_patterns[] = +{ + /* Sort by fields name! */ + DEF_PAT_CPU( "bogomips", PAT_BOGOMIPS_CPU, bogomips), + DEF_PAT_CPU( "cpu MHz", PAT_MHZ, mhz), + DEF_PAT_CPU( "cpu MHz dynamic", PAT_MHZ_DYNAMIC, dynamic_mhz), /* s390 */ + DEF_PAT_CPU( "cpu MHz static", PAT_MHZ_STATIC, static_mhz), /* s390 */ + DEF_PAT_CPU( "cpu number", PAT_PROCESSOR, logical_id), /* s390 */ + DEF_PAT_CPU( "processor", PAT_PROCESSOR, logical_id), + +}; + +/* + * /proc/cpuinfo to lscpu_cache conversion + */ +#define DEF_PAT_CACHE(_str, _id) \ + { \ + .id = (_id), \ + .domain = CPUINFO_LINE_CACHE, \ + .pattern = (_str) \ + } + +static const struct cpuinfo_pattern cache_patterns[] = +{ + /* Sort by fields name! */ + DEF_PAT_CACHE("cache", PAT_CACHE), +}; + +#define CPUTYPE_PATTERN_BUFSZ 32 + +static int cmp_pattern(const void *a0, const void *b0) +{ + const struct cpuinfo_pattern + *a = (const struct cpuinfo_pattern *) a0, + *b = (const struct cpuinfo_pattern *) b0; + return strcmp(a->pattern, b->pattern); +} + +struct cpuinfo_parser { + struct lscpu_cxt *cxt; + struct lscpu_cpu *curr_cpu; + struct lscpu_cputype *curr_type; + unsigned int curr_type_added : 1; +}; + +static int is_different_cputype(struct lscpu_cputype *ct, size_t offset, const char *value) +{ + switch (offset) { + case offsetof(struct lscpu_cputype, vendor): + return ct->vendor && value && strcmp(ct->vendor, value) != 0; + case offsetof(struct lscpu_cputype, model): + return ct->model && value && strcmp(ct->model, value) != 0; + case offsetof(struct lscpu_cputype, modelname): + return ct->modelname && value && strcmp(ct->modelname, value) != 0; + case offsetof(struct lscpu_cputype, stepping): + return ct->stepping && value && strcmp(ct->stepping, value) != 0; + } + return 0; +} + +/* canonicalize @str -- remove number at the end return the + * number by @keynum. This is usable for example for "processor 5" or "cache1" + * cpuinfo lines */ +static char *key_cleanup(char *str, int *keynum) +{ + size_t sz = rtrim_whitespace((unsigned char *)str); + size_t i; + + if (!sz) + return str; + + for (i = sz; i > 0; i--) { + if (!isdigit(str[i - 1])) + break; + } + + if (i < sz) { + char *end = NULL, *p = str + i; + int n; + + errno = 0; + n = strtol(p, &end, 10); + if (errno || !end || end == p) + return str; + + *keynum = n; + str[i] = '\0'; + rtrim_whitespace((unsigned char *)str); + } + return str; +} + +static const struct cpuinfo_pattern *cpuinfo_parse_line(char *str, char **value, int *keynum) +{ + struct cpuinfo_pattern key = { .id = 0 }, *pat; + char *p, *v; + char buf[CPUTYPE_PATTERN_BUFSZ] = { 0 }; + + DBG(GATHER, ul_debug("parse \"%s\"", str)); + + if (!str || !*str) + return NULL; + p = (char *) skip_blank(str); + if (!p || !*p) + return NULL; + + v = strchr(p, ':'); + if (!v || !*v) + return NULL; + + /* prepare name of the field */ + xstrncpy(buf, p, sizeof(buf)); + buf[v - p] = '\0'; + v++; + + /* prepare value */ + v = (char *) skip_space(v); + if (!v || !*v) + return NULL; + + key.pattern = key_cleanup(buf, keynum); + /* CPU-type */ + if ((pat = bsearch(&key, type_patterns, + ARRAY_SIZE(type_patterns), + sizeof(struct cpuinfo_pattern), + cmp_pattern))) + goto found; + + /* CPU */ + if ((pat = bsearch(&key, cpu_patterns, + ARRAY_SIZE(cpu_patterns), + sizeof(struct cpuinfo_pattern), + cmp_pattern))) + goto found; + + /* CACHE */ + if ((pat = bsearch(&key, cache_patterns, + ARRAY_SIZE(cache_patterns), + sizeof(struct cpuinfo_pattern), + cmp_pattern))) + goto found; + + return NULL; +found: + rtrim_whitespace((unsigned char *) v); + *value = v; + return pat; +} + +/* Parse extra cache lines contained within /proc/cpuinfo but which are not + * part of the cache topology information within the sysfs filesystem. This is + * true for all shared caches on e.g. s390. When there are layers of + * hypervisors in between it is not knows which CPUs share which caches. + * Therefore information about shared caches is only available in + * /proc/cpuinfo. Format is: + * + * cache<nr> : level=<lvl> type=<type> scope=<scope> size=<size> line_size=<lsz> associativity=<as> + * + * the cache<nr> part is parsed in cpuinfo_parse_line, in this function parses part after ":". + */ +static int cpuinfo_parse_cache(struct lscpu_cxt *cxt, int keynum, char *data) +{ + struct lscpu_cache *cache; + long long size; + char *p, type; + int level; + unsigned int line_size, associativity; + + DBG(GATHER, ul_debugobj(cxt, " parse cpuinfo cache '%s'", data)); + + p = strstr(data, "scope=") + 6; + /* Skip private caches, also present in sysfs */ + if (!p || strncmp(p, "Private", 7) == 0) + return 0; + p = strstr(data, "level="); + if (!p || sscanf(p, "level=%d", &level) != 1) + return 0; + p = strstr(data, "type=") + 5; + if (!p || !*p) + return 0; + type = 0; + if (strncmp(p, "Data", 4) == 0) + type = 'd'; + else if (strncmp(p, "Instruction", 11) == 0) + type = 'i'; + else if (strncmp(p, "Unified", 7) == 0) + type = 'u'; + p = strstr(data, "size="); + if (!p || sscanf(p, "size=%lld", &size) != 1) + return 0; + + p = strstr(data, "line_size="); + if (!p || sscanf(p, "line_size=%u", &line_size) != 1) + return 0; + + p = strstr(data, "associativity="); + if (!p || sscanf(p, "associativity=%u", &associativity) != 1) + return 0; + + cxt->necaches++; + cxt->ecaches = xrealloc(cxt->ecaches, + cxt->necaches * sizeof(struct lscpu_cache)); + cache = &cxt->ecaches[cxt->necaches - 1]; + memset(cache, 0 , sizeof(*cache)); + + if (type == 'i' || type == 'd') + xasprintf(&cache->name, "L%d%c", level, type); + else + xasprintf(&cache->name, "L%d", level); + + cache->nth = keynum; + cache->level = level; + cache->size = size * 1024; + cache->ways_of_associativity = associativity; + cache->coherency_line_size = line_size; + /* Number of sets for s390. For safety, just check divide by zero */ + cache->number_of_sets = line_size ? (cache->size / line_size): 0; + cache->number_of_sets = associativity ? (cache->number_of_sets / associativity) : 0; + + cache->type = type == 'i' ? xstrdup("Instruction") : + type == 'd' ? xstrdup("Data") : + type == 'u' ? xstrdup("Unified") : NULL; + return 1; +} + +int lscpu_read_cpuinfo(struct lscpu_cxt *cxt) +{ + FILE *fp; + char buf[BUFSIZ]; + size_t i; + struct lscpu_cputype *ct; + struct cpuinfo_parser _pr = { .cxt = cxt }, *pr = &_pr; + + assert(cxt->npossibles); /* lscpu_create_cpus() required */ + assert(cxt->cpus); + + DBG(GATHER, ul_debugobj(cxt, "reading cpuinfo")); + + fp = ul_path_fopen(cxt->procfs, "r", "cpuinfo"); + if (!fp) + err(EXIT_FAILURE, _("cannot open %s"), "/proc/cpuinfo"); + + do { + int keynum = -1; + char *p = NULL, *value = NULL; + const struct cpuinfo_pattern *pattern; + + if (fgets(buf, sizeof(buf), fp) != NULL) + p = (char *) skip_space(buf); + + if (p == NULL || (*buf && !*p)) { + /* Blank line separates information */ + if (p == NULL) + break; /* fgets() returns nothing; EOF */ + continue; + } + + rtrim_whitespace((unsigned char *) buf); + + /* parse */ + pattern = cpuinfo_parse_line(p, &value, &keynum); + if (!pattern) { + DBG(GATHER, ul_debug("'%s' not found", buf)); + continue; + } + + /* set data */ + switch (pattern->domain) { + case CPUINFO_LINE_CPU: + if (pattern->id == PAT_PROCESSOR) { + /* switch CPU */ + int id = 0; + + if (keynum >= 0) + id = keynum; + else { + uint32_t n; + if (ul_strtou32(value, &n, 10) == 0) + id = n; + } + + if (pr->curr_cpu && pr->curr_type) + lscpu_cpu_set_type(pr->curr_cpu, pr->curr_type); + + lscpu_unref_cpu(pr->curr_cpu); + pr->curr_cpu = lscpu_get_cpu(cxt, id); + + if (!pr->curr_cpu) + DBG(GATHER, ul_debug("*** cpu ID '%d' undefined", id)); + else + DBG(GATHER, ul_debug(" switch to CPU %d", id)); + lscpu_ref_cpu(pr->curr_cpu); + break; + } + if (!pr->curr_cpu) + DBG(GATHER, ul_debug("*** cpu data before cpu ID")); + else + strdup_to_offset(pr->curr_cpu, pattern->offset, value); + + if (pattern->id == PAT_MHZ_DYNAMIC && pr->curr_type && !pr->curr_type->dynamic_mhz) + pr->curr_type->dynamic_mhz = xstrdup(value); + if (pattern->id == PAT_MHZ_STATIC && pr->curr_type && !pr->curr_type->static_mhz) + pr->curr_type->static_mhz = xstrdup(value); + if (pattern->id == PAT_BOGOMIPS_CPU && pr->curr_type && !pr->curr_type->bogomips) + pr->curr_type->bogomips = xstrdup(value); + if (pattern->id == PAT_MHZ && pr->curr_cpu && value) { + errno = 0; + pr->curr_cpu->mhz_cur_freq = (float) c_strtod(value, NULL); + if (errno) + pr->curr_cpu->mhz_cur_freq = 0; + } + break; + case CPUINFO_LINE_CPUTYPE: + if (pr->curr_type && is_different_cputype(pr->curr_type, pattern->offset, value)) { + lscpu_unref_cputype(pr->curr_type); + pr->curr_type = NULL; + } + if (!pr->curr_type) { + pr->curr_type = lscpu_new_cputype(); + lscpu_add_cputype(cxt, pr->curr_type); + } + + strdup_to_offset(pr->curr_type, pattern->offset, value); + break; + case CPUINFO_LINE_CACHE: + if (pattern->id != PAT_CACHE) + break; + cpuinfo_parse_cache(cxt, keynum, value); + break; + } + } while (1); + + DBG(GATHER, fprintf_cputypes(stderr, cxt)); + + if (pr->curr_cpu && !pr->curr_cpu->type) + lscpu_cpu_set_type(pr->curr_cpu, pr->curr_type); + + lscpu_unref_cputype(pr->curr_type); + lscpu_unref_cpu(pr->curr_cpu); + + fclose(fp); + lscpu_sort_caches(cxt->ecaches, cxt->necaches); + + /* Set the default type to CPUs which are missing (or not parsed) + * in cpuinfo */ + ct = lscpu_cputype_get_default(cxt); + for (i = 0; ct && i < cxt->npossibles; i++) { + struct lscpu_cpu *cpu = cxt->cpus[i]; + + if (cpu && !cpu->type) + lscpu_cpu_set_type(cpu, ct); + } + + return 0; +} + +struct lscpu_arch *lscpu_read_architecture(struct lscpu_cxt *cxt) +{ + struct utsname utsbuf; + struct lscpu_arch *ar; + struct lscpu_cputype *ct; + + assert(cxt); + + DBG(GATHER, ul_debug("reading architecture")); + + if (uname(&utsbuf) == -1) + err(EXIT_FAILURE, _("error: uname failed")); + + ar = xcalloc(1, sizeof(*cxt->arch)); + ar->name = xstrdup(utsbuf.machine); + + if (cxt->noalive) + /* reading info from any /{sys,proc} dump, don't mix it with + * information about our real CPU */ + ; + else { +#if defined(__alpha__) || defined(__ia64__) + ar->bit64 = 1; /* 64bit platforms only */ +#endif + /* platforms with 64bit flag in /proc/cpuinfo, define + * 32bit default here */ +#if defined(__i386__) || defined(__x86_64__) || \ + defined(__s390x__) || defined(__s390__) || defined(__sparc_v9__) + ar->bit32 = 1; +#endif + +#if defined(__aarch64__) + { + /* personality() is the most reliable way (since 4.7) + * to determine aarch32 support */ + int pers = personality(PER_LINUX32); + if (pers != -1) { + personality(pers); + ar->bit32 = 1; + } + ar->bit64 = 1; + } +#endif + } + + ct = lscpu_cputype_get_default(cxt); + if (ct && ct->flags) { + char buf[BUFSIZ]; + + snprintf(buf, sizeof(buf), " %s ", ct->flags); + if (strstr(buf, " lm ")) + ar->bit32 = 1, ar->bit64 = 1; /* x86_64 */ + if (strstr(buf, " zarch ")) + ar->bit32 = 1, ar->bit64 = 1; /* s390x */ + if (strstr(buf, " sun4v ") || strstr(buf, " sun4u ")) + ar->bit32 = 1, ar->bit64 = 1; /* sparc64 */ + } + + if (ar->name && !cxt->noalive) { + if (strcmp(ar->name, "ppc64") == 0) + ar->bit32 = 1, ar->bit64 = 1; + else if (strcmp(ar->name, "ppc") == 0) + ar->bit32 = 1; + } + + DBG(GATHER, ul_debugobj(ar, "arch: name=%s %s %s", + ar->name, + ar->bit64 ? "64-bit" : "", + ar->bit64 ? "32-bit" : "")); + return ar; +} + +void lscpu_free_architecture(struct lscpu_arch *ar) +{ + if (!ar) + return; + free(ar->name); + free(ar); +} + +int lscpu_read_cpulists(struct lscpu_cxt *cxt) +{ + cpu_set_t *cpuset = NULL; + + assert(cxt); + DBG(GATHER, ul_debugobj(cxt, "reading cpulists")); + + if (ul_path_read_s32(cxt->syscpu, &cxt->maxcpus, "kernel_max") == 0) + /* note that kernel_max is maximum index [NR_CPUS-1] */ + cxt->maxcpus += 1; + + else if (!cxt->noalive) + /* the root is '/' so we are working with data from the current kernel */ + cxt->maxcpus = get_max_number_of_cpus(); + + if (cxt->maxcpus <= 0) + /* error or we are reading some /sys snapshot instead of the + * real /sys, let's use any crazy number... */ + cxt->maxcpus = 2048; + + cxt->setsize = CPU_ALLOC_SIZE(cxt->maxcpus); + + /* create CPUs from possible mask */ + if (ul_path_readf_cpulist(cxt->syscpu, &cpuset, cxt->maxcpus, "possible") == 0) { + lscpu_create_cpus(cxt, cpuset, cxt->setsize); + cpuset_free(cpuset); + cpuset = NULL; + } else + err(EXIT_FAILURE, _("failed to determine number of CPUs: %s"), + _PATH_SYS_CPU "/possible"); + + + /* get mask for present CPUs */ + if (ul_path_readf_cpulist(cxt->syscpu, &cxt->present, cxt->maxcpus, "present") == 0) + cxt->npresents = CPU_COUNT_S(cxt->setsize, cxt->present); + + /* get mask for online CPUs */ + if (ul_path_readf_cpulist(cxt->syscpu, &cxt->online, cxt->maxcpus, "online") == 0) + cxt->nonlines = CPU_COUNT_S(cxt->setsize, cxt->online); + + return 0; +} + +#if defined(HAVE_LIBRTAS) +# define PROCESSOR_MODULE_INFO 43 +static int strbe16toh(const char *buf, int offset) +{ + return (buf[offset] << 8) + buf[offset+1]; +} +#endif + +/* some extra information for the default CPU type */ +int lscpu_read_archext(struct lscpu_cxt *cxt) +{ + FILE *f; + char buf[BUFSIZ]; + struct lscpu_cputype *ct; + + DBG(GATHER, ul_debugobj(cxt, "reading extra arch info")); + + assert(cxt); + ct = lscpu_cputype_get_default(cxt); + if (!ct) + return -EINVAL; + + /* get dispatching mode */ + if (ul_path_read_s32(cxt->syscpu, &ct->dispatching, "dispatching") != 0) + ct->dispatching = -1; + + /* get cpufreq boost mode */ + if (ul_path_read_s32(cxt->syscpu, &ct->freqboost, "cpufreq/boost") != 0) + ct->freqboost = -1; + + if ((f = ul_path_fopen(cxt->procfs, "r", "sysinfo"))) { + while (fgets(buf, sizeof(buf), f) != NULL) { + if (lookup(buf, "Type", &ct->machinetype)) + break; + } + fclose(f); + } + +#if defined(HAVE_LIBRTAS) + /* Get PowerPC specific info */ + if (!cxt->noalive) { + int rc, len, ntypes; + + ct->physsockets = ct->physchips = ct->physcoresperchip = 0; + + rc = rtas_get_sysparm(PROCESSOR_MODULE_INFO, sizeof(buf), buf); + if (rc < 0) + goto nortas; + + len = strbe16toh(buf, 0); + if (len < 8) + goto nortas; + + ntypes = strbe16toh(buf, 2); + if (!ntypes) + goto nortas; + + ct->physsockets = strbe16toh(buf, 4); + ct->physchips = strbe16toh(buf, 6); + ct->physcoresperchip = strbe16toh(buf, 8); + } +nortas: +#endif + return 0; +} + +static int cmp_vulnerability_name(const void *a0, const void *b0) +{ + const struct lscpu_vulnerability + *a = (const struct lscpu_vulnerability *) a0, + *b = (const struct lscpu_vulnerability *) b0; + return strcmp(a->name, b->name); +} + +int lscpu_read_vulnerabilities(struct lscpu_cxt *cxt) +{ + struct dirent *d; + DIR *dir; + size_t n = 0; + + assert(cxt); + + DBG(GATHER, ul_debugobj(cxt, "reading vulnerabilities")); + + dir = ul_path_opendir(cxt->syscpu, "vulnerabilities"); + if (!dir) + return 0; + + cxt->nvuls = n = 0; + while (xreaddir(dir)) + n++; + if (!n) { + closedir(dir); + return 0; + } + + rewinddir(dir); + cxt->vuls = xcalloc(n, sizeof(struct lscpu_vulnerability)); + + while (cxt->nvuls < n && (d = xreaddir(dir))) { + char *str, *p; + struct lscpu_vulnerability *vu; + +#ifdef _DIRENT_HAVE_D_TYPE + if (d->d_type == DT_DIR || d->d_type == DT_UNKNOWN) + continue; +#endif + if (ul_path_readf_string(cxt->syscpu, &str, + "vulnerabilities/%s", d->d_name) <= 0) + continue; + + vu = &cxt->vuls[cxt->nvuls++]; + + /* Name */ + vu->name = xstrdup(d->d_name); + *vu->name = toupper(*vu->name); + strrep(vu->name, '_', ' '); + + /* Description */ + vu->text = str; + p = (char *) startswith(vu->text, "Mitigation"); + if (p) { + *p = ';'; + strrem(vu->text, ':'); + } + } + closedir(dir); + + qsort(cxt->vuls, cxt->nvuls, + sizeof(struct lscpu_vulnerability), cmp_vulnerability_name); + + return 0; +} + +static inline int is_node_dirent(struct dirent *d) +{ + return + d && +#ifdef _DIRENT_HAVE_D_TYPE + (d->d_type == DT_DIR || d->d_type == DT_UNKNOWN) && +#endif + strncmp(d->d_name, "node", 4) == 0 && + isdigit_string(d->d_name + 4); +} + +static int nodecmp(const void *ap, const void *bp) +{ + int *a = (int *) ap, *b = (int *) bp; + return *a - *b; +} + +int lscpu_read_numas(struct lscpu_cxt *cxt) +{ + size_t i = 0; + DIR *dir; + struct dirent *d; + struct path_cxt *sys; + + assert(!cxt->nnodes); + + + sys = ul_new_path(_PATH_SYS_NODE); + if (!sys) + err(EXIT_FAILURE, _("failed to initialize %s handler"), _PATH_SYS_NODE); + + ul_path_set_prefix(sys, cxt->prefix); + + dir = ul_path_opendir(sys, NULL); + if (!dir) + goto done; + + while ((d = readdir(dir))) { + if (is_node_dirent(d)) + cxt->nnodes++; + } + + if (!cxt->nnodes) { + closedir(dir); + goto done; + } + + cxt->nodemaps = xcalloc(cxt->nnodes, sizeof(cpu_set_t *)); + cxt->idx2nodenum = xmalloc(cxt->nnodes * sizeof(int)); + + rewinddir(dir); + for (i = 0; (d = readdir(dir)) && i < cxt->nnodes;) { + if (is_node_dirent(d)) + cxt->idx2nodenum[i++] = strtol_or_err(((d->d_name) + 4), + _("Failed to extract the node number")); + } + closedir(dir); + qsort(cxt->idx2nodenum, cxt->nnodes, sizeof(int), nodecmp); + + /* information about how nodes share different CPUs */ + for (i = 0; i < cxt->nnodes; i++) + ul_path_readf_cpuset(sys, &cxt->nodemaps[i], cxt->maxcpus, + "node%d/cpumap", cxt->idx2nodenum[i]); +done: + DBG(GATHER, ul_debugobj(cxt, "read %zu numas", cxt->nnodes)); + + ul_unref_path(sys); + return 0; +} diff --git a/sys-utils/lscpu-dmi.c b/sys-utils/lscpu-dmi.c new file mode 100644 index 0000000..9b63dd6 --- /dev/null +++ b/sys-utils/lscpu-dmi.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 FUJITSU LIMITED. All rights reserved. + */ + +#include "lscpu.h" + +void to_dmi_header(struct lscpu_dmi_header *h, uint8_t *data) +{ + h->type = data[0]; + h->length = data[1]; + memcpy(&h->handle, data + 2, sizeof(h->handle)); + h->data = data; +} + +char *dmi_string(const struct lscpu_dmi_header *dm, uint8_t s) +{ + char *bp = (char *)dm->data; + + if (!s || !bp) + return NULL; + + bp += dm->length; + while (s > 1 && *bp) { + bp += strlen(bp); + bp++; + s--; + } + + return !*bp ? NULL : bp; +} + +int parse_dmi_table(uint16_t len, uint16_t num, + uint8_t *data, + struct dmi_info *di) +{ + uint8_t *buf = data; + int rc = -1; + int i = 0; + + /* 4 is the length of an SMBIOS structure header */ + while (i < num && data + 4 <= buf + len) { + uint8_t *next; + struct lscpu_dmi_header h; + + to_dmi_header(&h, data); + + /* + * If a short entry is found (less than 4 bytes), not only it + * is invalid, but we cannot reliably locate the next entry. + * Better stop at this point. + */ + if (h.length < 4) + goto done; + + /* look for the next handle */ + next = data + h.length; + while (next - buf + 1 < len && (next[0] != 0 || next[1] != 0)) + next++; + next += 2; + switch (h.type) { + case 0: + di->vendor = dmi_string(&h, data[0x04]); + break; + case 1: + di->manufacturer = dmi_string(&h, data[0x04]); + di->product = dmi_string(&h, data[0x05]); + break; + case 4: + /* Get the first processor information */ + if (di->sockets == 0) { + di->processor_manufacturer = dmi_string(&h, data[0x7]); + di->processor_version = dmi_string(&h, data[0x10]); + di->current_speed = *((uint16_t *)(&data[0x16])); + di->part_num = dmi_string(&h, data[0x22]); + + if (data[0x6] == 0xfe) + di->processor_family = *((uint16_t *)(&data[0x28])); + else + di->processor_family = data[0x6]; + } + di->sockets++; + break; + default: + break; + } + + data = next; + i++; + } + rc = 0; +done: + return rc; +} + +int dmi_decode_cputype(struct lscpu_cputype *ct) +{ + static char const sys_fw_dmi_tables[] = _PATH_SYS_DMI; + struct dmi_info di = { }; + struct stat st; + uint8_t *data; + int rc = 0; + char buf[100] = { }; + + if (stat(sys_fw_dmi_tables, &st)) + return rc; + + data = get_mem_chunk(0, st.st_size, sys_fw_dmi_tables); + if (!data) + return rc; + + rc = parse_dmi_table(st.st_size, st.st_size/4, data, &di); + if (rc < 0) { + free(data); + return rc; + } + + if (di.processor_manufacturer) + ct->bios_vendor = xstrdup(di.processor_manufacturer); + + snprintf(buf, sizeof(buf), "%s %s CPU @ %d.%dGHz", + (di.processor_version ?: ""), (di.part_num ?: ""), + di.current_speed/1000, (di.current_speed % 1000) / 100); + ct->bios_modelname = xstrdup(buf); + + /* Get CPU family */ + memset(buf, 0, sizeof(buf)); + snprintf(buf, sizeof(buf), "%d", di.processor_family); + ct->bios_family = xstrdup(buf); + + free(data); + return 0; +} + +size_t get_number_of_physical_sockets_from_dmi(void) +{ + static char const sys_fw_dmi_tables[] = _PATH_SYS_DMI; + struct dmi_info di; + struct stat st; + uint8_t *data; + int rc = 0; + + if (stat(sys_fw_dmi_tables, &st)) + return rc; + + data = get_mem_chunk(0, st.st_size, sys_fw_dmi_tables); + if (!data) + return rc; + + memset(&di, 0, sizeof(struct dmi_info)); + rc = parse_dmi_table(st.st_size, st.st_size/4, data, &di); + + free(data); + + if ((rc < 0) || !di.sockets) + return 0; + else + return di.sockets; +} diff --git a/sys-utils/lscpu-topology.c b/sys-utils/lscpu-topology.c new file mode 100644 index 0000000..fe4da7c --- /dev/null +++ b/sys-utils/lscpu-topology.c @@ -0,0 +1,661 @@ +#include <errno.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> +#include <stdio.h> + +#include "lscpu.h" + +/* add @set to the @ary, unnecessary set is deallocated. */ +static int add_cpuset_to_array(cpu_set_t **ary, size_t *items, cpu_set_t *set, size_t setsize) +{ + size_t i; + + if (!ary) + return -EINVAL; + + for (i = 0; i < *items; i++) { + if (CPU_EQUAL_S(setsize, set, ary[i])) + break; + } + if (i == *items) { + ary[*items] = set; + ++*items; + return 0; + } + CPU_FREE(set); + return 1; +} + +static void free_cpuset_array(cpu_set_t **ary, int items) +{ + int i; + + if (!ary) + return; + for (i = 0; i < items; i++) + free(ary[i]); + free(ary); +} + +void lscpu_cputype_free_topology(struct lscpu_cputype *ct) +{ + if (!ct) + return; + free_cpuset_array(ct->coremaps, ct->ncores); + free_cpuset_array(ct->socketmaps, ct->nsockets); + free_cpuset_array(ct->bookmaps, ct->nbooks); + free_cpuset_array(ct->drawermaps, ct->ndrawers); +} + +void lscpu_free_caches(struct lscpu_cache *caches, size_t n) +{ + size_t i; + + if (!caches) + return; + + for (i = 0; i < n; i++) { + struct lscpu_cache *c = &caches[i]; + + DBG(MISC, ul_debug(" freeing cache #%zu %s::%d", + i, c->name, c->id)); + + free(c->name); + free(c->type); + free(c->allocation_policy); + free(c->write_policy); + free(c->sharedmap); + } + free(caches); +} + +static int cmp_cache(const void *a0, const void *b0) +{ + const struct lscpu_cache + *a = (const struct lscpu_cache *) a0, + *b = (const struct lscpu_cache *) b0; + return strcmp(a->name, b->name); +} + +void lscpu_sort_caches(struct lscpu_cache *caches, size_t n) +{ + if (caches && n) + qsort(caches, n, sizeof(struct lscpu_cache), cmp_cache); +} + + +/* Read topology for specified type */ +static int cputype_read_topology(struct lscpu_cxt *cxt, struct lscpu_cputype *ct) +{ + size_t i, npos; + struct path_cxt *sys; + int nthreads = 0, sw_topo = 0; + FILE *fd; + + sys = cxt->syscpu; /* /sys/devices/system/cpu/ */ + npos = cxt->npossibles; /* possible CPUs */ + + DBG(TYPE, ul_debugobj(ct, "reading %s/%s/%s topology", + ct->vendor ?: "", ct->model ?: "", ct->modelname ?:"")); + + for (i = 0; i < cxt->npossibles; i++) { + struct lscpu_cpu *cpu = cxt->cpus[i]; + cpu_set_t *thread_siblings = NULL, *core_siblings = NULL; + cpu_set_t *book_siblings = NULL, *drawer_siblings = NULL; + int num, n; + + if (!cpu || cpu->type != ct) + continue; + + num = cpu->logical_id; + if (ul_path_accessf(sys, F_OK, + "cpu%d/topology/thread_siblings", num) != 0) + continue; + + /* read topology maps */ + ul_path_readf_cpuset(sys, &thread_siblings, cxt->maxcpus, + "cpu%d/topology/thread_siblings", num); + ul_path_readf_cpuset(sys, &core_siblings, cxt->maxcpus, + "cpu%d/topology/core_siblings", num); + ul_path_readf_cpuset(sys, &book_siblings, cxt->maxcpus, + "cpu%d/topology/book_siblings", num); + ul_path_readf_cpuset(sys, &drawer_siblings, cxt->maxcpus, + "cpu%d/topology/drawer_siblings", num); + + n = CPU_COUNT_S(cxt->setsize, thread_siblings); + if (!n) + n = 1; + if (n > nthreads) + nthreads = n; + + /* Allocate arrays for topology maps. + * + * For each map we make sure that it can have up to ncpuspos + * entries. This is because we cannot reliably calculate the + * number of cores, sockets and books on all architectures. + * E.g. completely virtualized architectures like s390 may + * have multiple sockets of different sizes. + */ + if (!ct->coremaps) + ct->coremaps = xcalloc(npos, sizeof(cpu_set_t *)); + if (!ct->socketmaps) + ct->socketmaps = xcalloc(npos, sizeof(cpu_set_t *)); + if (!ct->bookmaps && book_siblings) + ct->bookmaps = xcalloc(npos, sizeof(cpu_set_t *)); + if (!ct->drawermaps && drawer_siblings) + ct->drawermaps = xcalloc(npos, sizeof(cpu_set_t *)); + + /* add to topology maps */ + add_cpuset_to_array(ct->coremaps, &ct->ncores, thread_siblings, cxt->setsize); + add_cpuset_to_array(ct->socketmaps, &ct->nsockets, core_siblings, cxt->setsize); + + if (book_siblings) + add_cpuset_to_array(ct->bookmaps, &ct->nbooks, book_siblings, cxt->setsize); + if (drawer_siblings) + add_cpuset_to_array(ct->drawermaps, &ct->ndrawers, drawer_siblings, cxt->setsize); + + } + + /* s390 detects its cpu topology via /proc/sysinfo, if present. + * Using simply the cpu topology masks in sysfs will not give + * usable results since everything is virtualized. E.g. + * virtual core 0 may have only 1 cpu, but virtual core 2 may + * five cpus. + * If the cpu topology is not exported (e.g. 2nd level guest) + * fall back to old calculation scheme. + */ + if ((fd = ul_path_fopen(cxt->procfs, "r", "sysinfo"))) { + int t0, t1; + char buf[BUFSIZ]; + + DBG(TYPE, ul_debugobj(ct, " reading sysinfo")); + + while (fgets(buf, sizeof(buf), fd) != NULL) { + if (sscanf(buf, "CPU Topology SW: %d %d %zu %zu %zu %zu", + &t0, &t1, + &ct->ndrawers_per_system, + &ct->nbooks_per_drawer, + &ct->nsockets_per_book, + &ct->ncores_per_socket) == 6) { + sw_topo = 1; + DBG(TYPE, ul_debugobj(ct, " using SW topology")); + break; + } + } + if (fd) + fclose(fd); + } + + ct->nthreads_per_core = nthreads; + if (ct->mtid) { + uint64_t x; + if (ul_strtou64(ct->mtid, &x, 10) == 0 && x <= ULONG_MAX) + ct->nthreads_per_core = (size_t) x + 1; + } + + if (!sw_topo) { + ct->ncores_per_socket = ct->nsockets ? ct->ncores / ct->nsockets : 0; + ct->nsockets_per_book = ct->nbooks ? ct->nsockets / ct->nbooks : 0; + ct->nbooks_per_drawer = ct->ndrawers ? ct->nbooks / ct->ndrawers : 0; + ct->ndrawers_per_system = ct->ndrawers; + } + + DBG(TYPE, ul_debugobj(ct, " nthreads: %zu (per core)", ct->nthreads_per_core)); + DBG(TYPE, ul_debugobj(ct, " ncores: %zu (%zu per socket)", ct->ncores, ct->ncores_per_socket)); + DBG(TYPE, ul_debugobj(ct, " nsockets: %zu (%zu per books)", ct->nsockets, ct->nsockets_per_book)); + DBG(TYPE, ul_debugobj(ct, " nbooks: %zu (%zu per drawer)", ct->nbooks, ct->nbooks_per_drawer)); + DBG(TYPE, ul_debugobj(ct, " ndrawers: %zu (%zu per system)", ct->ndrawers, ct->ndrawers_per_system)); + + return 0; +} + +/* count size of all instancess of the "name" */ +size_t lscpu_get_cache_full_size(struct lscpu_cxt *cxt, const char *name, int *instances) +{ + size_t i, sz = 0; + + if (instances) + *instances = 0; + + for (i = 0; i < cxt->ncaches; i++) { + if (strcmp(cxt->caches[i].name, name) == 0) { + sz += cxt->caches[i].size; + if (instances) + (*instances)++; + } + } + + return sz; +} + +struct lscpu_cache *lscpu_cpu_get_cache(struct lscpu_cxt *cxt, + struct lscpu_cpu *cpu, const char *name) +{ + size_t i; + + for (i = 0; i < cxt->ncaches; i++) { + struct lscpu_cache *ca = &cxt->caches[i]; + + if (strcmp(ca->name, name) == 0 && + CPU_ISSET_S(cpu->logical_id, cxt->setsize, ca->sharedmap)) + return ca; + } + + return NULL; +} + +/* + * The cache is identifued by type+level+id. + */ +static struct lscpu_cache *get_cache(struct lscpu_cxt *cxt, + const char *type, int level, int id) +{ + size_t i; + + for (i = 0; i < cxt->ncaches; i++) { + struct lscpu_cache *ca = &cxt->caches[i]; + if (ca->id == id && + ca->level == level && + strcmp(ca->type, type) == 0) + return ca; + } + return NULL; +} + +static struct lscpu_cache *add_cache(struct lscpu_cxt *cxt, + const char *type, int level, int id) +{ + struct lscpu_cache *ca; + + cxt->ncaches++; + cxt->caches = xrealloc(cxt->caches, + cxt->ncaches * sizeof(*cxt->caches)); + + ca = &cxt->caches[cxt->ncaches - 1]; + memset(ca, 0 , sizeof(*ca)); + + ca->id = id; + ca->level = level; + ca->type = xstrdup(type); + + DBG(GATHER, ul_debugobj(cxt, "add cache %s%d::%d", type, level, id)); + return ca; +} + +static int mk_cache_id(struct lscpu_cxt *cxt, struct lscpu_cpu *cpu, char *type, int level) +{ + size_t i; + int idx = 0; + + for (i = 0; i < cxt->ncaches; i++) { + struct lscpu_cache *ca = &cxt->caches[i]; + + if (ca->level != level || strcmp(ca->type, type) != 0) + continue; + + if (ca->sharedmap && + CPU_ISSET_S(cpu->logical_id, cxt->setsize, ca->sharedmap)) + return idx; + idx++; + } + + return idx; +} + +static int read_sparc_onecache(struct lscpu_cxt *cxt, struct lscpu_cpu *cpu, + int level, char *typestr, int type) +{ + struct lscpu_cache *ca; + struct path_cxt *sys = cxt->syscpu; + int num = cpu->logical_id; + uint32_t size; + int rc, id; + char buf[32]; + + if (type) + snprintf(buf, sizeof(buf), "l%d_%c", level, type); + else + snprintf(buf, sizeof(buf), "l%d_", level); + + rc = ul_path_readf_u32(sys, &size, + "cpu%d/%scache_size", num, buf); + if (rc != 0) + return rc; + + DBG(CPU, ul_debugobj(cpu, "#%d reading sparc %s cache", num, buf)); + + id = mk_cache_id(cxt, cpu, typestr, level); + + ca = get_cache(cxt, typestr, level, id); + if (!ca) + ca = add_cache(cxt, typestr, level, id); + + if (!ca->name) { + ul_path_readf_u32(sys, &ca->coherency_line_size, + "cpu%d/%scache_line_size", num, buf); + assert(ca->type); + + if (type) + snprintf(buf, sizeof(buf), "L%d%c", ca->level, type); + else + snprintf(buf, sizeof(buf), "L%d", ca->level); + ca->name = xstrdup(buf); + ca->size = size; + } + /* There is no sharedmap of the cache in /sys, we assume that caches are + * not shared. Send a patch if your /sys provides another information. + */ + if (!ca->sharedmap) { + size_t setsize = 0; + + ca->sharedmap = cpuset_alloc(cxt->maxcpus, &setsize, NULL); + CPU_ZERO_S(setsize, ca->sharedmap); + CPU_SET_S(num, setsize, ca->sharedmap); + } + + return 0; +} + +static int read_sparc_caches(struct lscpu_cxt *cxt, struct lscpu_cpu *cpu) +{ + read_sparc_onecache(cxt, cpu, 1, "Instruction", 'i'); + read_sparc_onecache(cxt, cpu, 1, "Data", 'd'); + read_sparc_onecache(cxt, cpu, 2, "Unified", 0); + read_sparc_onecache(cxt, cpu, 2, "Unified", 0); + + return 0; +} + +static int read_caches(struct lscpu_cxt *cxt, struct lscpu_cpu *cpu) +{ + char buf[256]; + struct path_cxt *sys = cxt->syscpu; + int num = cpu->logical_id; + size_t i, ncaches = 0; + + while (ul_path_accessf(sys, F_OK, + "cpu%d/cache/index%zu", + num, ncaches) == 0) + ncaches++; + + if (ncaches == 0 && ul_path_accessf(sys, F_OK, + "cpu%d/l1_icache_size", num) == 0) + return read_sparc_caches(cxt, cpu); + + DBG(CPU, ul_debugobj(cpu, "#%d reading %zd caches", num, ncaches)); + + for (i = 0; i < ncaches; i++) { + struct lscpu_cache *ca; + int id, level; + + if (ul_path_readf_s32(sys, &id, "cpu%d/cache/index%zu/id", num, i) != 0) + id = -1; + if (ul_path_readf_s32(sys, &level, "cpu%d/cache/index%zu/level", num, i) != 0) + continue; + if (ul_path_readf_buffer(sys, buf, sizeof(buf), + "cpu%d/cache/index%zu/type", num, i) <= 0) + continue; + + if (id == -1) + id = mk_cache_id(cxt, cpu, buf, level); + + ca = get_cache(cxt, buf, level, id); + if (!ca) + ca = add_cache(cxt, buf, level, id); + + if (!ca->name) { + int type = 0; + + assert(ca->type); + + if (!strcmp(ca->type, "Data")) + type = 'd'; + else if (!strcmp(ca->type, "Instruction")) + type = 'i'; + + if (type) + snprintf(buf, sizeof(buf), "L%d%c", ca->level, type); + else + snprintf(buf, sizeof(buf), "L%d", ca->level); + + ca->name = xstrdup(buf); + + ul_path_readf_u32(sys, &ca->ways_of_associativity, + "cpu%d/cache/index%zu/ways_of_associativity", num, i); + ul_path_readf_u32(sys, &ca->physical_line_partition, + "cpu%d/cache/index%zu/physical_line_partition", num, i); + ul_path_readf_u32(sys, &ca->number_of_sets, + "cpu%d/cache/index%zu/number_of_sets", num, i); + ul_path_readf_u32(sys, &ca->coherency_line_size, + "cpu%d/cache/index%zu/coherency_line_size", num, i); + + ul_path_readf_string(sys, &ca->allocation_policy, + "cpu%d/cache/index%zu/allocation_policy", num, i); + ul_path_readf_string(sys, &ca->write_policy, + "cpu%d/cache/index%zu/write_policy", num, i); + + /* cache size */ + if (ul_path_readf_buffer(sys, buf, sizeof(buf), + "cpu%d/cache/index%zu/size", num, i) > 0) + parse_size(buf, &ca->size, NULL); + else + ca->size = 0; + } + + if (!ca->sharedmap) + /* information about how CPUs share different caches */ + ul_path_readf_cpuset(sys, &ca->sharedmap, cxt->maxcpus, + "cpu%d/cache/index%zu/shared_cpu_map", num, i); + } + + return 0; +} + +static int read_ids(struct lscpu_cxt *cxt, struct lscpu_cpu *cpu) +{ + struct path_cxt *sys = cxt->syscpu; + int num = cpu->logical_id; + + if (ul_path_accessf(sys, F_OK, "cpu%d/topology", num) != 0) + return 0; + + DBG(CPU, ul_debugobj(cpu, "#%d reading IDs", num)); + + if (ul_path_readf_s32(sys, &cpu->coreid, "cpu%d/topology/core_id", num) != 0) + cpu->coreid = -1; + if (ul_path_readf_s32(sys, &cpu->socketid, "cpu%d/topology/physical_package_id", num) != 0) + cpu->socketid = -1; + if (ul_path_readf_s32(sys, &cpu->bookid, "cpu%d/topology/book_id", num) != 0) + cpu->bookid = -1; + if (ul_path_readf_s32(sys, &cpu->drawerid, "cpu%d/topology/drawer_id", num) != 0) + cpu->drawerid = -1; + + return 0; +} + +static int read_polarization(struct lscpu_cxt *cxt, struct lscpu_cpu *cpu) +{ + struct path_cxt *sys = cxt->syscpu; + int num = cpu->logical_id; + char mode[64]; + + if (ul_path_accessf(sys, F_OK, "cpu%d/polarization", num) != 0) + return 0; + + ul_path_readf_buffer(sys, mode, sizeof(mode), "cpu%d/polarization", num); + + DBG(CPU, ul_debugobj(cpu, "#%d reading polar=%s", num, mode)); + + if (strncmp(mode, "vertical:low", sizeof(mode)) == 0) + cpu->polarization = POLAR_VLOW; + else if (strncmp(mode, "vertical:medium", sizeof(mode)) == 0) + cpu->polarization = POLAR_VMEDIUM; + else if (strncmp(mode, "vertical:high", sizeof(mode)) == 0) + cpu->polarization = POLAR_VHIGH; + else if (strncmp(mode, "horizontal", sizeof(mode)) == 0) + cpu->polarization = POLAR_HORIZONTAL; + else + cpu->polarization = POLAR_UNKNOWN; + + if (cpu->type) + cpu->type->has_polarization = 1; + return 0; +} + +static int read_address(struct lscpu_cxt *cxt, struct lscpu_cpu *cpu) +{ + struct path_cxt *sys = cxt->syscpu; + int num = cpu->logical_id; + + if (ul_path_accessf(sys, F_OK, "cpu%d/address", num) != 0) + return 0; + + DBG(CPU, ul_debugobj(cpu, "#%d reading address", num)); + + ul_path_readf_s32(sys, &cpu->address, "cpu%d/address", num); + if (cpu->type) + cpu->type->has_addresses = 1; + return 0; +} + +static int read_configure(struct lscpu_cxt *cxt, struct lscpu_cpu *cpu) +{ + struct path_cxt *sys = cxt->syscpu; + int num = cpu->logical_id; + + if (ul_path_accessf(sys, F_OK, "cpu%d/configure", num) != 0) + return 0; + + DBG(CPU, ul_debugobj(cpu, "#%d reading configure", num)); + + ul_path_readf_s32(sys, &cpu->configured, "cpu%d/configure", num); + if (cpu->type) + cpu->type->has_configured = 1; + return 0; +} + +static int read_mhz(struct lscpu_cxt *cxt, struct lscpu_cpu *cpu) +{ + struct path_cxt *sys = cxt->syscpu; + int num = cpu->logical_id; + int mhz; + + DBG(CPU, ul_debugobj(cpu, "#%d reading mhz", num)); + + if (ul_path_readf_s32(sys, &mhz, "cpu%d/cpufreq/cpuinfo_max_freq", num) == 0) + cpu->mhz_max_freq = (float) mhz / 1000; + if (ul_path_readf_s32(sys, &mhz, "cpu%d/cpufreq/cpuinfo_min_freq", num) == 0) + cpu->mhz_min_freq = (float) mhz / 1000; + + /* The default current-frequency value comes is from /proc/cpuinfo (if + * available). This /proc value is usually based on MSR registers + * (APERF/APERF) and it changes pretty often. It seems better to read + * frequency from cpufreq subsystem that provides the current frequency + * for the current policy. There is also cpuinfo_cur_freq in sysfs, but + * it's not always available. + */ + if (ul_path_readf_s32(sys, &mhz, "cpu%d/cpufreq/scaling_cur_freq", num) == 0) + cpu->mhz_cur_freq = (float) mhz / 1000; + + if (cpu->type && (cpu->mhz_min_freq || cpu->mhz_max_freq)) + cpu->type->has_freq = 1; + + return 0; +} + +float lsblk_cputype_get_maxmhz(struct lscpu_cxt *cxt, struct lscpu_cputype *ct) +{ + size_t i; + float res = 0.0; + + for (i = 0; i < cxt->npossibles; i++) { + struct lscpu_cpu *cpu = cxt->cpus[i]; + + if (!cpu || cpu->type != ct || !is_cpu_present(cxt, cpu)) + continue; + res = max(res, cpu->mhz_max_freq); + } + return res; +} + +float lsblk_cputype_get_minmhz(struct lscpu_cxt *cxt, struct lscpu_cputype *ct) +{ + size_t i; + float res = -1.0; + + for (i = 0; i < cxt->npossibles; i++) { + struct lscpu_cpu *cpu = cxt->cpus[i]; + + if (!cpu || cpu->type != ct || !is_cpu_present(cxt, cpu)) + continue; + if (res < 0.0 || cpu->mhz_min_freq < res) + res = cpu->mhz_min_freq; + } + return res; +} + +/* returns scaling (use) of CPUs freq. in percent */ +float lsblk_cputype_get_scalmhz(struct lscpu_cxt *cxt, struct lscpu_cputype *ct) +{ + size_t i; + float fmax = 0, fcur = 0; + + for (i = 0; i < cxt->npossibles; i++) { + struct lscpu_cpu *cpu = cxt->cpus[i]; + + if (!cpu || cpu->type != ct || !is_cpu_present(cxt, cpu)) + continue; + if (cpu->mhz_max_freq <= 0.0 || cpu->mhz_cur_freq <= 0.0) + continue; + fmax += cpu->mhz_max_freq; + fcur += cpu->mhz_cur_freq; + } + if (fcur <= 0.0) + return 0.0; + return fcur / fmax * 100; +} + +int lscpu_read_topology(struct lscpu_cxt *cxt) +{ + size_t i; + int rc = 0; + + + for (i = 0; i < cxt->ncputypes; i++) + rc += cputype_read_topology(cxt, cxt->cputypes[i]); + + for (i = 0; rc == 0 && i < cxt->npossibles; i++) { + struct lscpu_cpu *cpu = cxt->cpus[i]; + + if (!cpu || !cpu->type) + continue; + + DBG(CPU, ul_debugobj(cpu, "#%d reading topology", cpu->logical_id)); + + rc = read_ids(cxt, cpu); + if (!rc) + rc = read_polarization(cxt, cpu); + if (!rc) + rc = read_address(cxt, cpu); + if (!rc) + rc = read_configure(cxt, cpu); + if (!rc) + rc = read_mhz(cxt, cpu); + if (!rc) + rc = read_caches(cxt, cpu); + } + + lscpu_sort_caches(cxt->caches, cxt->ncaches); + DBG(GATHER, ul_debugobj(cxt, " L1d: %zu", lscpu_get_cache_full_size(cxt, "L1d", NULL))); + DBG(GATHER, ul_debugobj(cxt, " L1i: %zu", lscpu_get_cache_full_size(cxt, "L1i", NULL))); + DBG(GATHER, ul_debugobj(cxt, " L2: %zu", lscpu_get_cache_full_size(cxt, "L2", NULL))); + DBG(GATHER, ul_debugobj(cxt, " L3: %zu", lscpu_get_cache_full_size(cxt, "L3", NULL))); + + return rc; +} + + diff --git a/sys-utils/lscpu-virt.c b/sys-utils/lscpu-virt.c new file mode 100644 index 0000000..6b6deb8 --- /dev/null +++ b/sys-utils/lscpu-virt.c @@ -0,0 +1,686 @@ +#include <errno.h> +#include <stdlib.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> +#include <stdio.h> + +#include "lscpu.h" + +#if (defined(__x86_64__) || defined(__i386__)) +# define INCLUDE_VMWARE_BDOOR +#endif + +#ifdef INCLUDE_VMWARE_BDOOR +# include <stdint.h> +# include <signal.h> +# include <strings.h> +# include <setjmp.h> +# ifdef HAVE_SYS_IO_H +# include <sys/io.h> +# endif +#endif + +/* Xen Domain feature flag used for /sys/hypervisor/properties/features */ +#define XENFEAT_supervisor_mode_kernel 3 +#define XENFEAT_mmu_pt_update_preserve_ad 5 +#define XENFEAT_hvm_callback_vector 8 + +#define XEN_FEATURES_PV_MASK (1U << XENFEAT_mmu_pt_update_preserve_ad) +#define XEN_FEATURES_PVH_MASK ( (1U << XENFEAT_supervisor_mode_kernel) \ + | (1U << XENFEAT_hvm_callback_vector) ) +static const int hv_vendor_pci[] = { + [VIRT_VENDOR_NONE] = 0x0000, + [VIRT_VENDOR_XEN] = 0x5853, + [VIRT_VENDOR_KVM] = 0x0000, + [VIRT_VENDOR_MSHV] = 0x1414, + [VIRT_VENDOR_VMWARE] = 0x15ad, + [VIRT_VENDOR_VBOX] = 0x80ee, +}; + +static const int hv_graphics_pci[] = { + [VIRT_VENDOR_NONE] = 0x0000, + [VIRT_VENDOR_XEN] = 0x0001, + [VIRT_VENDOR_KVM] = 0x0000, + [VIRT_VENDOR_MSHV] = 0x5353, + [VIRT_VENDOR_VMWARE] = 0x0710, + [VIRT_VENDOR_VBOX] = 0xbeef, +}; + +#define WORD(x) (uint16_t)(*(const uint16_t *)(x)) +#define DWORD(x) (uint32_t)(*(const uint32_t *)(x)) + +void *get_mem_chunk(size_t base, size_t len, const char *devmem) +{ + void *p = NULL; + int fd; + + if ((fd = open(devmem, O_RDONLY)) < 0) + return NULL; + + if (!(p = malloc(len))) + goto nothing; + if (lseek(fd, base, SEEK_SET) == -1) + goto nothing; + if (read_all(fd, p, len) == -1) + goto nothing; + + close(fd); + return p; + +nothing: + free(p); + close(fd); + return NULL; +} + +static int hypervisor_from_dmi_table(uint32_t base, uint16_t len, + uint16_t num, const char *devmem) +{ + uint8_t *data; + int rc = VIRT_VENDOR_NONE; + struct dmi_info di; + + data = get_mem_chunk(base, len, devmem); + if (!data) + return rc; + + memset(&di, 0, sizeof(struct dmi_info)); + rc = parse_dmi_table(len, num, data, &di); + if (rc < 0) + goto done; + + if (di.manufacturer && !strcmp(di.manufacturer, "innotek GmbH")) + rc = VIRT_VENDOR_INNOTEK; + else if (di.manufacturer && strstr(di.manufacturer, "HITACHI") && + di.product && strstr(di.product, "LPAR")) + rc = VIRT_VENDOR_HITACHI; + else if (di.vendor && !strcmp(di.vendor, "Parallels")) + rc = VIRT_VENDOR_PARALLELS; +done: + free(data); + return rc; +} + +static int checksum(const uint8_t *buf, size_t len) +{ + uint8_t sum = 0; + size_t a; + + for (a = 0; a < len; a++) + sum += buf[a]; + return (sum == 0); +} + +#if defined(__x86_64__) || defined(__i386__) +static int hypervisor_decode_legacy(uint8_t *buf, const char *devmem) +{ + if (!checksum(buf, 0x0F)) + return -1; + + return hypervisor_from_dmi_table(DWORD(buf + 0x08), WORD(buf + 0x06), + WORD(buf + 0x0C), + devmem); +} +#endif + +static int hypervisor_decode_smbios(uint8_t *buf, const char *devmem) +{ + if (!checksum(buf, buf[0x05]) + || memcmp(buf + 0x10, "_DMI_", 5) != 0 + || !checksum(buf + 0x10, 0x0F)) + return -1; + + return hypervisor_from_dmi_table(DWORD(buf + 0x18), WORD(buf + 0x16), + WORD(buf + 0x1C), + devmem); +} + +/* + * Probe for EFI interface + */ +#define EFI_NOT_FOUND (-1) +#define EFI_NO_SMBIOS (-2) +static int address_from_efi(size_t *address) +{ + FILE *tab; + char linebuf[64]; + int ret; + + *address = 0; /* Prevent compiler warning */ + + /* + * Linux up to 2.6.6: /proc/efi/systab + * Linux 2.6.7 and up: /sys/firmware/efi/systab + */ + if (!(tab = fopen("/sys/firmware/efi/systab", "r")) && + !(tab = fopen("/proc/efi/systab", "r"))) + return EFI_NOT_FOUND; /* No EFI interface */ + + ret = EFI_NO_SMBIOS; + while ((fgets(linebuf, sizeof(linebuf) - 1, tab)) != NULL) { + char *addrp = strchr(linebuf, '='); + if (!addrp) + continue; + *(addrp++) = '\0'; + if (strcmp(linebuf, "SMBIOS") == 0) { + errno = 0; + *address = strtoul(addrp, NULL, 0); + if (errno) + continue; + ret = 0; + break; + } + } + + fclose(tab); + return ret; +} + +static int read_hypervisor_dmi_from_devmem(void) +{ + int rc = VIRT_VENDOR_NONE; + uint8_t *buf = NULL; + size_t fp = 0; + + /* First try EFI (ia64, Intel-based Mac) */ + switch (address_from_efi(&fp)) { + case EFI_NOT_FOUND: + goto memory_scan; + case EFI_NO_SMBIOS: + goto done; + } + + buf = get_mem_chunk(fp, 0x20, _PATH_DEV_MEM); + if (!buf) + goto done; + + rc = hypervisor_decode_smbios(buf, _PATH_DEV_MEM); + if (rc >= VIRT_VENDOR_NONE) + goto done; + + free(buf); + buf = NULL; +memory_scan: +#if defined(__x86_64__) || defined(__i386__) + /* Fallback to memory scan (x86, x86_64) */ + buf = get_mem_chunk(0xF0000, 0x10000, _PATH_DEV_MEM); + if (!buf) + goto done; + + for (fp = 0; fp <= 0xFFF0; fp += 16) { + if (memcmp(buf + fp, "_SM_", 4) == 0 && fp <= 0xFFE0) { + rc = hypervisor_decode_smbios(buf + fp, _PATH_DEV_MEM); + if (rc < 0) + fp += 16; + + } else if (memcmp(buf + fp, "_DMI_", 5) == 0) + rc = hypervisor_decode_legacy(buf + fp, _PATH_DEV_MEM); + + if (rc >= VIRT_VENDOR_NONE) + break; + } +#endif +done: + free(buf); + return rc; +} + +static int read_hypervisor_dmi_from_sysfw(void) +{ + static char const sys_fw_dmi_tables[] = _PATH_SYS_DMI; + struct stat st; + + if (stat(sys_fw_dmi_tables, &st)) + return -1; + + return hypervisor_from_dmi_table(0, st.st_size, st.st_size / 4, + sys_fw_dmi_tables); +} + +static int read_hypervisor_dmi(void) +{ + int rc; + + if (sizeof(uint8_t) != 1 + || sizeof(uint16_t) != 2 + || sizeof(uint32_t) != 4 + || '\0' != 0) + return VIRT_VENDOR_NONE; + + /* -1 : no DMI in /sys, + * 0 : DMI exist, nothing detected (VIRT_VENDOR_NONE) + * >0 : hypervisor detected + */ + rc = read_hypervisor_dmi_from_sysfw(); + if (rc < 0) + rc = read_hypervisor_dmi_from_devmem(); + + return rc < 0 ? VIRT_VENDOR_NONE : rc; +} + +static int has_pci_device(struct lscpu_cxt *cxt, + unsigned int vendor, unsigned int device) +{ + FILE *f; + unsigned int num, fn, ven, dev; + int res = 1; + + f = ul_path_fopen(cxt->procfs, "r", "bus/pci/devices"); + if (!f) + return 0; + + /* for more details about bus/pci/devices format see + * drivers/pci/proc.c in linux kernel + */ + while(fscanf(f, "%02x%02x\t%04x%04x\t%*[^\n]", + &num, &fn, &ven, &dev) == 4) { + + if (ven == vendor && dev == device) + goto found; + } + + res = 0; +found: + fclose(f); + return res; +} + +#if defined(__x86_64__) || defined(__i386__) +/* + * This CPUID leaf returns the information about the hypervisor. + * EAX : maximum input value for CPUID supported by the hypervisor. + * EBX, ECX, EDX : Hypervisor vendor ID signature. E.g. VMwareVMware. + */ +#define HYPERVISOR_INFO_LEAF 0x40000000 + +static inline void cpuid(unsigned int op, unsigned int *eax, + unsigned int *ebx, unsigned int *ecx, + unsigned int *edx) +{ + __asm__( +#if defined(__PIC__) && defined(__i386__) + /* x86 PIC cannot clobber ebx -- gcc bitches */ + "xchg %%ebx, %%esi;" + "cpuid;" + "xchg %%esi, %%ebx;" + : "=S" (*ebx), +#else + "cpuid;" + : "=b" (*ebx), +#endif + "=a" (*eax), + "=c" (*ecx), + "=d" (*edx) + : "1" (op), "c"(0)); +} + +static int read_hypervisor_cpuid(void) +{ + unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0; + char hyper_vendor_id[13] = { 0 }; + + cpuid(HYPERVISOR_INFO_LEAF, &eax, &ebx, &ecx, &edx); + memcpy(hyper_vendor_id + 0, &ebx, 4); + memcpy(hyper_vendor_id + 4, &ecx, 4); + memcpy(hyper_vendor_id + 8, &edx, 4); + hyper_vendor_id[12] = '\0'; + + if (!hyper_vendor_id[0]) + goto none; + + if (!strncmp("XenVMMXenVMM", hyper_vendor_id, 12)) + return VIRT_VENDOR_XEN; + else if (!strncmp("KVMKVMKVM", hyper_vendor_id, 9)) + return VIRT_VENDOR_KVM; + else if (!strncmp("Microsoft Hv", hyper_vendor_id, 12)) + return VIRT_VENDOR_MSHV; + else if (!strncmp("VMwareVMware", hyper_vendor_id, 12)) + return VIRT_VENDOR_VMWARE; + else if (!strncmp("UnisysSpar64", hyper_vendor_id, 12)) + return VIRT_VENDOR_SPAR; +none: + return VIRT_VENDOR_NONE; +} + +#else /* ! (__x86_64__ || __i386__) */ +static int read_hypervisor_cpuid(void) +{ + return 0; +} +#endif + +static int is_devtree_compatible(struct lscpu_cxt *cxt, const char *str) +{ + FILE *fd = ul_path_fopen(cxt->procfs, "r", "device-tree/compatible"); + + if (fd) { + char buf[256]; + size_t i, len; + + memset(buf, 0, sizeof(buf)); + len = fread(buf, 1, sizeof(buf) - 1, fd); + fclose(fd); + + for (i = 0; i < len;) { + if (!strcmp(&buf[i], str)) + return 1; + i += strlen(&buf[i]); + i++; + } + } + + return 0; +} + +static int read_hypervisor_powerpc(struct lscpu_cxt *cxt, int *type) +{ + int vendor = VIRT_VENDOR_NONE; + + *type = VIRT_TYPE_NONE; + + /* IBM iSeries: legacy, para-virtualized on top of OS/400 */ + if (ul_path_access(cxt->procfs, F_OK, "iSeries") == 0) { + vendor = VIRT_VENDOR_OS400; + *type = VIRT_TYPE_PARA; + + /* PowerNV (POWER Non-Virtualized, bare-metal) */ + } else if (is_devtree_compatible(cxt, "ibm,powernv") != 0) { + ; + + /* PowerVM (IBM's proprietary hypervisor, aka pHyp) */ + } else if (ul_path_access(cxt->procfs, F_OK, "device-tree/ibm,partition-name") == 0 + && ul_path_access(cxt->procfs, F_OK, "device-tree/hmc-managed?") == 0 + && ul_path_access(cxt->procfs, F_OK, "device-tree/chosen/qemu,graphic-width") != 0) { + + FILE *fd; + vendor = VIRT_VENDOR_PHYP; + *type = VIRT_TYPE_PARA; + + fd = ul_path_fopen(cxt->procfs, "r", "device-tree/ibm,partition-name"); + if (fd) { + char buf[256]; + if (fscanf(fd, "%255s", buf) == 1 && !strcmp(buf, "full")) + *type = VIRT_TYPE_NONE; + fclose(fd); + } + + /* Qemu */ + } else if (is_devtree_compatible(cxt, "qemu,pseries")) { + vendor = VIRT_VENDOR_KVM; + *type = VIRT_TYPE_PARA; + } + + return vendor; +} + +#ifdef INCLUDE_VMWARE_BDOOR + +#define VMWARE_BDOOR_MAGIC 0x564D5868 +#define VMWARE_BDOOR_PORT 0x5658 +#define VMWARE_BDOOR_CMD_GETVERSION 10 + +static UL_ASAN_BLACKLIST +void vmware_bdoor(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) +{ + __asm__( +#if defined(__PIC__) && defined(__i386__) + /* x86 PIC cannot clobber ebx -- gcc bitches */ + "xchg %%ebx, %%esi;" + "inl (%%dx), %%eax;" + "xchg %%esi, %%ebx;" + : "=S" (*ebx), +#else + "inl (%%dx), %%eax;" + : "=b" (*ebx), +#endif + "=a" (*eax), + "=c" (*ecx), + "=d" (*edx) + : "0" (VMWARE_BDOOR_MAGIC), + "1" (VMWARE_BDOOR_CMD_GETVERSION), + "2" (VMWARE_BDOOR_PORT), + "3" (0) + : "memory"); +} + +static jmp_buf segv_handler_env; + +static void +segv_handler(__attribute__((__unused__)) int sig, + __attribute__((__unused__)) siginfo_t *info, + __attribute__((__unused__)) void *ignored) +{ + siglongjmp(segv_handler_env, 1); +} + +static int is_vmware_platform(void) +{ + uint32_t eax, ebx, ecx, edx; + struct sigaction act, oact; + + /* + * FIXME: Not reliable for non-root users. Note it works as expected if + * vmware_bdoor() is not optimized for PIE, but then it fails to build + * on 32bit x86 systems. See lscpu git log for more details (commit + * 7845b91dbc7690064a2be6df690e4aaba728fb04). kzak [3-Nov-2016] + */ + if (getuid() != 0) + return 0; + + /* + * The assembly routine for vmware detection works + * fine under vmware, even if ran as regular user. But + * on real HW or under other hypervisors, it segfaults (which is + * expected). So we temporarily install SIGSEGV handler to catch + * the signal. All this magic is needed because lscpu + * isn't supposed to require root privileges. + */ + if (sigsetjmp(segv_handler_env, 1)) + return 0; + + memset(&act, 0, sizeof(act)); + act.sa_sigaction = segv_handler; + act.sa_flags = SA_SIGINFO; + + if (sigaction(SIGSEGV, &act, &oact)) + err(EXIT_FAILURE, _("cannot set signal handler")); + + vmware_bdoor(&eax, &ebx, &ecx, &edx); + + if (sigaction(SIGSEGV, &oact, NULL)) + err(EXIT_FAILURE, _("cannot restore signal handler")); + + return eax != (uint32_t)-1 && ebx == VMWARE_BDOOR_MAGIC; +} + +#else /* ! INCLUDE_VMWARE_BDOOR */ + +static int is_vmware_platform(void) +{ + return 0; +} + +#endif /* INCLUDE_VMWARE_BDOOR */ +struct lscpu_virt *lscpu_read_virtualization(struct lscpu_cxt *cxt) +{ + char buf[BUFSIZ]; + struct lscpu_cputype *ct; + struct lscpu_virt *virt; + FILE *fd; + + DBG(VIRT, ul_debug("reading virtualization")); + virt = xcalloc(1, sizeof(*virt)); + + /* CPU flags */ + ct = lscpu_cputype_get_default(cxt); + if (ct && ct->flags) { + snprintf(buf, sizeof(buf), " %s ", ct->flags); + if (strstr(buf, " svm ")) + virt->cpuflag = xstrdup("svm"); + else if (strstr(buf, " vmx ")) + virt->cpuflag = xstrdup("vmx"); + } + + + /* We have to detect WSL first. is_vmware_platform() crashes on Windows 10. */ + fd = ul_path_fopen(cxt->procfs, "r", "sys/kernel/osrelease"); + if (fd) { + if (fgets(buf, sizeof(buf), fd) && strstr(buf, "Microsoft")) { + virt->vendor = VIRT_VENDOR_WSL; + virt->type = VIRT_TYPE_CONTAINER; + } + fclose(fd); + if (virt->type) + goto done; + } + + if (!cxt->noalive) { + virt->vendor = read_hypervisor_cpuid(); + if (!virt->vendor) + virt->vendor = read_hypervisor_dmi(); + if (!virt->vendor && is_vmware_platform()) + virt->vendor = VIRT_VENDOR_VMWARE; + } + + if (virt->vendor) { + virt->type = VIRT_TYPE_FULL; + + if (virt->vendor == VIRT_VENDOR_XEN) { + uint32_t features; + + fd = ul_prefix_fopen(cxt->prefix, "r", _PATH_SYS_HYP_FEATURES); + + if (fd && fscanf(fd, "%x", &features) == 1) { + /* Xen PV domain */ + if (features & XEN_FEATURES_PV_MASK) + virt->type = VIRT_TYPE_PARA; + /* Xen PVH domain */ + else if ((features & XEN_FEATURES_PVH_MASK) + == XEN_FEATURES_PVH_MASK) + virt->type = VIRT_TYPE_PARA; + } + if (fd) + fclose(fd); + } + } else if ((virt->vendor = read_hypervisor_powerpc(cxt, &virt->type))) { + ; + + /* Xen para-virt or dom0 */ + } else if (ul_path_access(cxt->procfs, F_OK, "xen") == 0) { + int dom0 = 0; + + fd = ul_path_fopen(cxt->procfs, "r", "xen/capabilities"); + if (fd) { + char xenbuf[256]; + + if (fscanf(fd, "%255s", xenbuf) == 1 && + !strcmp(xenbuf, "control_d")) + dom0 = 1; + fclose(fd); + } + virt->type = dom0 ? VIRT_TYPE_NONE : VIRT_TYPE_PARA; + virt->vendor = VIRT_VENDOR_XEN; + + /* Xen full-virt on non-x86_64 */ + } else if (has_pci_device(cxt, hv_vendor_pci[VIRT_VENDOR_XEN], hv_graphics_pci[VIRT_VENDOR_XEN])) { + virt->vendor = VIRT_VENDOR_XEN; + virt->type = VIRT_TYPE_FULL; + } else if (has_pci_device(cxt, hv_vendor_pci[VIRT_VENDOR_VMWARE], hv_graphics_pci[VIRT_VENDOR_VMWARE])) { + virt->vendor = VIRT_VENDOR_VMWARE; + virt->type = VIRT_TYPE_FULL; + } else if (has_pci_device(cxt, hv_vendor_pci[VIRT_VENDOR_VBOX], hv_graphics_pci[VIRT_VENDOR_VBOX])) { + virt->vendor = VIRT_VENDOR_VBOX; + virt->type = VIRT_TYPE_FULL; + + /* IBM PR/SM */ + } else if ((fd = ul_path_fopen(cxt->procfs, "r", "sysinfo"))) { + + virt->vendor = VIRT_VENDOR_IBM; + virt->hypervisor = "PR/SM"; + virt->type = VIRT_TYPE_FULL; + + while (fgets(buf, sizeof(buf), fd) != NULL) { + if (!strstr(buf, "Control Program:")) + continue; + virt->vendor = strstr(buf, "KVM") ? VIRT_VENDOR_KVM : VIRT_VENDOR_IBM; + virt->hypervisor = strchr(buf, ':'); + + if (virt->hypervisor) { + virt->hypervisor++; + normalize_whitespace((unsigned char *) virt->hypervisor); + break; + } + } + if (virt->hypervisor) + virt->hypervisor = xstrdup(virt->hypervisor); + fclose(fd); + } + + /* OpenVZ/Virtuozzo - /proc/vz dir should exist + * /proc/bc should not */ + else if (ul_path_access(cxt->procfs, F_OK, "vz") == 0 && + ul_path_access(cxt->procfs, F_OK, "bc") != 0) { + virt->vendor = VIRT_VENDOR_PARALLELS; + virt->type = VIRT_TYPE_CONTAINER; + + /* IBM */ + } else if (virt->hypervisor && + (strcmp(virt->hypervisor, "PowerVM Lx86") == 0 || + strcmp(virt->hypervisor, "IBM/S390") == 0)) { + virt->vendor = VIRT_VENDOR_IBM; + virt->type = VIRT_TYPE_FULL; + + /* User-mode-linux */ + } else if (ct && ct->modelname && strstr(ct->modelname, "UML")) { + virt->vendor = VIRT_VENDOR_UML; + virt->type = VIRT_TYPE_PARA; + + /* Linux-VServer */ + } else if ((fd = ul_path_fopen(cxt->procfs, "r", "self/status"))) { + char *val = NULL; + + while (fgets(buf, sizeof(buf), fd) != NULL) { + if (lookup(buf, "VxID", &val)) + break; + } + fclose(fd); + + if (val) { + char *org = val; + + while (isdigit(*val)) + ++val; + if (!*val) { + virt->vendor = VIRT_VENDOR_VSERVER; + virt->type = VIRT_TYPE_CONTAINER; + } + free(org); + } + } +done: + DBG(VIRT, ul_debugobj(virt, "virt: cpu='%s' hypervisor='%s' vendor=%d type=%d", + virt->cpuflag, + virt->hypervisor, + virt->vendor, + virt->type)); + + if (!virt->cpuflag && !virt->hypervisor && !virt->vendor && !virt->type) { + lscpu_free_virtualization(virt); + virt = NULL; + } + return virt; +} + +void lscpu_free_virtualization(struct lscpu_virt *virt) +{ + if (!virt) + return; + + free(virt->cpuflag); + free(virt->hypervisor); + free(virt); +} + diff --git a/sys-utils/lscpu.1 b/sys-utils/lscpu.1 new file mode 100644 index 0000000..b0cbd34 --- /dev/null +++ b/sys-utils/lscpu.1 @@ -0,0 +1,157 @@ +'\" t +.\" Title: lscpu +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-08-04 +.\" Manual: User Commands +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "LSCPU" "1" "2022-08-04" "util\-linux 2.38.1" "User Commands" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +lscpu \- display information about the CPU architecture +.SH "SYNOPSIS" +.sp +\fBlscpu\fP [options] +.SH "DESCRIPTION" +.sp +\fBlscpu\fP gathers CPU architecture information from \fIsysfs\fP, \fI/proc/cpuinfo\fP and any applicable architecture\-specific libraries (e.g. \fBlibrtas\fP on Powerpc). The command output can be optimized for parsing or for easy readability by humans. The information includes, for example, the number of CPUs, threads, cores, sockets, and Non\-Uniform Memory Access (NUMA) nodes. There is also information about the CPU caches and cache sharing, family, model, bogoMIPS, byte order, and stepping. +.sp +The default output formatting on terminal is subject to change and maybe optimized for better readability. The output for non\-terminals (e.g., pipes) is never affected by this optimization and it is always in "Field: data\(rsn" format. Use for example "\fBlscpu | less\fP" to see the default output without optimizations. +.sp +In virtualized environments, the CPU architecture information displayed reflects the configuration of the guest operating system which is typically different from the physical (host) system. On architectures that support retrieving physical topology information, \fBlscpu\fP also displays the number of physical sockets, chips, cores in the host system. +.sp +Options that result in an output table have a \fIlist\fP argument. Use this argument to customize the command output. Specify a comma\-separated list of column labels to limit the output table to only the specified columns, arranged in the specified order. See \fBCOLUMNS\fP for a list of valid column labels. The column labels are not case sensitive. +.sp +Not all columns are supported on all architectures. If an unsupported column is specified, \fBlscpu\fP prints the column but does not provide any data for it. +.sp +The cache sizes are reported as summary from all CPUs. The versions before v2.34 reported per\-core sizes, but this output was confusing due to complicated CPUs topology and the way how caches are shared between CPUs. For more details about caches see \fB\-\-cache\fP. Since version v2.37 \fBlscpu\fP follows cache IDs as provided by Linux kernel and it does not always start from zero. +.SH "OPTIONS" +.sp +\fB\-a\fP, \fB\-\-all\fP +.RS 4 +Include lines for online and offline CPUs in the output (default for \fB\-e\fP). This option may only be specified together with option \fB\-e\fP or \fB\-p\fP. +.RE +.sp +\fB\-B\fP, \fB\-\-bytes\fP +.RS 4 +Print the sizes in bytes rather than in a human\-readable format. +.sp +By default, the unit, sizes are expressed in, is byte, and unit prefixes are in +power of 2^10 (1024). Abbreviations of symbols are exhibited truncated in order +to reach a better readability, by exhibiting alone the first letter of them; +examples: "1 KiB" and "1 MiB" are respectively exhibited as "1 K" and "1 M", +then omitting on purpose the mention "iB", which is part of these abbreviations. +.RE +.sp +\fB\-b\fP, \fB\-\-online\fP +.RS 4 +Limit the output to online CPUs (default for \fB\-p\fP). This option may only be specified together with option \fB\-e\fP or \fB\-p\fP. +.RE +.sp +\fB\-C\fP, \fB\-\-caches\fP[=\fIlist\fP] +.RS 4 +Display details about CPU caches. For details about available information see \fB\-\-help\fP output. +.sp +If the \fIlist\fP argument is omitted, all columns for which data is available are included in the command output. +.sp +When specifying the \fIlist\fP argument, the string of option, equal sign (=), and \fIlist\fP must not contain any blanks or other whitespace. Examples: \fB\-C=NAME,ONE\-SIZE\fP or \fB\-\-caches=NAME,ONE\-SIZE\fP. +.sp +The default list of columns may be extended if list is specified in the format +list (e.g., \fBlscpu \-C=+ALLOC\-POLICY\fP). +.RE +.sp +\fB\-c\fP, \fB\-\-offline\fP +.RS 4 +Limit the output to offline CPUs. This option may only be specified together with option \fB\-e\fP or \fB\-p\fP. +.RE +.sp +\fB\-e\fP, \fB\-\-extended\fP[=\fIlist\fP] +.RS 4 +Display the CPU information in human\-readable format. +.sp +If the \fIlist\fP argument is omitted, the default columns are included in the command output. The default output is subject to change. +.sp +When specifying the \fIlist\fP argument, the string of option, equal sign (=), and \fIlist\fP must not contain any blanks or other whitespace. Examples: \(aq\fB\-e=cpu,node\fP\(aq or \(aq\fB\-\-extended=cpu,node\fP\(aq. +.sp +The default list of columns may be extended if list is specified in the format +list (e.g., lscpu \-e=+MHZ). +.RE +.sp +\fB\-J\fP, \fB\-\-json\fP +.RS 4 +Use JSON output format for the default summary or extended output (see \fB\-\-extended\fP). +.RE +.sp +\fB\-p\fP, \fB\-\-parse\fP[=\fIlist\fP] +.RS 4 +Optimize the command output for easy parsing. +.sp +If the \fIlist\fP argument is omitted, the command output is compatible with earlier versions of \fBlscpu\fP. In this compatible format, two commas are used to separate CPU cache columns. If no CPU caches are identified the cache column is omitted. If the \fIlist\fP argument is used, cache columns are separated with a colon (:). +.sp +When specifying the \fIlist\fP argument, the string of option, equal sign (=), and \fIlist\fP must not contain any blanks or other whitespace. Examples: \(aq\fB\-p=cpu,node\fP\(aq or \(aq\fB\-\-parse=cpu,node\fP\(aq. +.sp +The default list of columns may be extended if list is specified in the format +list (e.g., lscpu \-p=+MHZ). +.RE +.sp +\fB\-s\fP, \fB\-\-sysroot\fP \fIdirectory\fP +.RS 4 +Gather CPU data for a Linux instance other than the instance from which the \fBlscpu\fP command is issued. The specified \fIdirectory\fP is the system root of the Linux instance to be inspected. +.RE +.sp +\fB\-x\fP, \fB\-\-hex\fP +.RS 4 +Use hexadecimal masks for CPU sets (for example "ff"). The default is to print the sets in list format (for example 0,1). Note that before version 2.30 the mask has been printed with 0x prefix. +.RE +.sp +\fB\-y\fP, \fB\-\-physical\fP +.RS 4 +Display physical IDs for all columns with topology elements (core, socket, etc.). Other than logical IDs, which are assigned by \fBlscpu\fP, physical IDs are platform\-specific values that are provided by the kernel. Physical IDs are not necessarily unique and they might not be arranged sequentially. If the kernel could not retrieve a physical ID for an element \fBlscpu\fP prints the dash (\-) character. +.sp +The CPU logical numbers are not affected by this option. +.RE +.sp +\fB\-\-output\-all\fP +.RS 4 +Output all available columns. This option must be combined with either \fB\-\-extended\fP, \fB\-\-parse\fP or \fB\-\-caches\fP. +.RE +.SH "BUGS" +.sp +The basic overview of CPU family, model, etc. is always based on the first CPU only. +.sp +Sometimes in Xen Dom0 the kernel reports wrong data. +.sp +On virtual hardware the number of cores per socket, etc. can be wrong. +.SH "AUTHORS" +.sp +.MTO "qcai\(atredhat.com" "Cai Qian" "," +.MTO "kzak\(atredhat.com" "Karel Zak" "," +.MTO "heiko.carstens\(atde.ibm.com" "Heiko Carstens" "" +.SH "SEE ALSO" +.sp +\fBchcpu\fP(8) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBlscpu\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/lscpu.1.adoc b/sys-utils/lscpu.1.adoc new file mode 100644 index 0000000..88a999e --- /dev/null +++ b/sys-utils/lscpu.1.adoc @@ -0,0 +1,113 @@ +//po4a: entry man manual += lscpu(1) +:doctype: manpage +:man manual: User Commands +:man source: util-linux {release-version} +:page-layout: base +:command: lscpu + +== NAME + +lscpu - display information about the CPU architecture + +== SYNOPSIS + +*lscpu* [options] + +== DESCRIPTION + +*lscpu* gathers CPU architecture information from _sysfs_, _/proc/cpuinfo_ and any applicable architecture-specific libraries (e.g. *librtas* on Powerpc). The command output can be optimized for parsing or for easy readability by humans. The information includes, for example, the number of CPUs, threads, cores, sockets, and Non-Uniform Memory Access (NUMA) nodes. There is also information about the CPU caches and cache sharing, family, model, bogoMIPS, byte order, and stepping. + +The default output formatting on terminal is subject to change and maybe optimized for better readability. The output for non-terminals (e.g., pipes) is never affected by this optimization and it is always in "Field: data\n" format. Use for example "*lscpu | less*" to see the default output without optimizations. + +In virtualized environments, the CPU architecture information displayed reflects the configuration of the guest operating system which is typically different from the physical (host) system. On architectures that support retrieving physical topology information, *lscpu* also displays the number of physical sockets, chips, cores in the host system. + +Options that result in an output table have a _list_ argument. Use this argument to customize the command output. Specify a comma-separated list of column labels to limit the output table to only the specified columns, arranged in the specified order. See *COLUMNS* for a list of valid column labels. The column labels are not case sensitive. + +Not all columns are supported on all architectures. If an unsupported column is specified, *lscpu* prints the column but does not provide any data for it. + +The cache sizes are reported as summary from all CPUs. The versions before v2.34 reported per-core sizes, but this output was confusing due to complicated CPUs topology and the way how caches are shared between CPUs. For more details about caches see *--cache*. Since version v2.37 *lscpu* follows cache IDs as provided by Linux kernel and it does not always start from zero. + +== OPTIONS + +*-a*, *--all*:: +Include lines for online and offline CPUs in the output (default for *-e*). This option may only be specified together with option *-e* or *-p*. + +*-B*, *--bytes*:: +include::man-common/in-bytes.adoc[] + +*-b*, *--online*:: +Limit the output to online CPUs (default for *-p*). This option may only be specified together with option *-e* or *-p*. + +*-C*, *--caches*[=_list_]:: +Display details about CPU caches. For details about available information see *--help* output. ++ +If the _list_ argument is omitted, all columns for which data is available are included in the command output. ++ +When specifying the _list_ argument, the string of option, equal sign (=), and _list_ must not contain any blanks or other whitespace. Examples: *-C=NAME,ONE-SIZE* or *--caches=NAME,ONE-SIZE*. ++ +The default list of columns may be extended if list is specified in the format +list (e.g., **lscpu -C=+ALLOC-POLICY**). + +*-c*, *--offline*:: +Limit the output to offline CPUs. This option may only be specified together with option *-e* or *-p*. + +*-e*, *--extended*[=_list_]:: +Display the CPU information in human-readable format. ++ +If the _list_ argument is omitted, the default columns are included in the command output. The default output is subject to change. ++ +When specifying the _list_ argument, the string of option, equal sign (=), and _list_ must not contain any blanks or other whitespace. Examples: '*-e=cpu,node*' or '*--extended=cpu,node*'. ++ +The default list of columns may be extended if list is specified in the format +list (e.g., lscpu -e=+MHZ). + +*-J*, *--json*:: +Use JSON output format for the default summary or extended output (see *--extended*). + +*-p*, *--parse*[=_list_]:: +Optimize the command output for easy parsing. ++ +If the _list_ argument is omitted, the command output is compatible with earlier versions of *lscpu*. In this compatible format, two commas are used to separate CPU cache columns. If no CPU caches are identified the cache column is omitted. If the _list_ argument is used, cache columns are separated with a colon (:). ++ +When specifying the _list_ argument, the string of option, equal sign (=), and _list_ must not contain any blanks or other whitespace. Examples: '*-p=cpu,node*' or '*--parse=cpu,node*'. ++ +The default list of columns may be extended if list is specified in the format +list (e.g., lscpu -p=+MHZ). + +*-s*, *--sysroot* _directory_:: +Gather CPU data for a Linux instance other than the instance from which the *lscpu* command is issued. The specified _directory_ is the system root of the Linux instance to be inspected. + +*-x*, *--hex*:: +Use hexadecimal masks for CPU sets (for example "ff"). The default is to print the sets in list format (for example 0,1). Note that before version 2.30 the mask has been printed with 0x prefix. + +*-y*, *--physical*:: +Display physical IDs for all columns with topology elements (core, socket, etc.). Other than logical IDs, which are assigned by *lscpu*, physical IDs are platform-specific values that are provided by the kernel. Physical IDs are not necessarily unique and they might not be arranged sequentially. If the kernel could not retrieve a physical ID for an element *lscpu* prints the dash (-) character. ++ +The CPU logical numbers are not affected by this option. + +*--output-all*:: +Output all available columns. This option must be combined with either *--extended*, *--parse* or *--caches*. + +== BUGS + +The basic overview of CPU family, model, etc. is always based on the first CPU only. + +Sometimes in Xen Dom0 the kernel reports wrong data. + +On virtual hardware the number of cores per socket, etc. can be wrong. + +== AUTHORS + +mailto:qcai@redhat.com[Cai Qian], +mailto:kzak@redhat.com[Karel Zak], +mailto:heiko.carstens@de.ibm.com[Heiko Carstens] + +== SEE ALSO + +*chcpu*(8) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/lscpu.c b/sys-utils/lscpu.c new file mode 100644 index 0000000..86fd0b9 --- /dev/null +++ b/sys-utils/lscpu.c @@ -0,0 +1,1437 @@ +/* + * lscpu - CPU architecture information helper + * + * Copyright (C) 2008 Cai Qian <qcai@redhat.com> + * Copyright (C) 2008 Karel Zak <kzak@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <assert.h> +#include <ctype.h> +#include <dirent.h> +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/utsname.h> +#include <unistd.h> +#include <stdarg.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/personality.h> + +#include <libsmartcols.h> + +#include "closestream.h" +#include "optutils.h" +#include "c_strtod.h" + +#include "lscpu.h" + +static const char *virt_types[] = { + [VIRT_TYPE_NONE] = N_("none"), + [VIRT_TYPE_PARA] = N_("para"), + [VIRT_TYPE_FULL] = N_("full"), + [VIRT_TYPE_CONTAINER] = N_("container"), +}; + +static const char *hv_vendors[] = { + [VIRT_VENDOR_NONE] = NULL, + [VIRT_VENDOR_XEN] = "Xen", + [VIRT_VENDOR_KVM] = "KVM", + [VIRT_VENDOR_MSHV] = "Microsoft", + [VIRT_VENDOR_VMWARE] = "VMware", + [VIRT_VENDOR_IBM] = "IBM", + [VIRT_VENDOR_VSERVER] = "Linux-VServer", + [VIRT_VENDOR_UML] = "User-mode Linux", + [VIRT_VENDOR_INNOTEK] = "Innotek GmbH", + [VIRT_VENDOR_HITACHI] = "Hitachi", + [VIRT_VENDOR_PARALLELS] = "Parallels", + [VIRT_VENDOR_VBOX] = "Oracle", + [VIRT_VENDOR_OS400] = "OS/400", + [VIRT_VENDOR_PHYP] = "pHyp", + [VIRT_VENDOR_SPAR] = "Unisys s-Par", + [VIRT_VENDOR_WSL] = "Windows Subsystem for Linux" +}; + +/* dispatching modes */ +static const char *disp_modes[] = { + [DISP_HORIZONTAL] = N_("horizontal"), + [DISP_VERTICAL] = N_("vertical") +}; + +struct polarization_modes { + char *parsable; + char *readable; +}; + +static struct polarization_modes polar_modes[] = { + [POLAR_UNKNOWN] = {"U", "-"}, + [POLAR_VLOW] = {"VL", "vert-low"}, + [POLAR_VMEDIUM] = {"VM", "vert-medium"}, + [POLAR_VHIGH] = {"VH", "vert-high"}, + [POLAR_HORIZONTAL] = {"H", "horizontal"}, +}; + +/* + * IDs + */ +enum { + COL_CPU_BOGOMIPS, + COL_CPU_CPU, + COL_CPU_CORE, + COL_CPU_SOCKET, + COL_CPU_CLUSTER, + COL_CPU_NODE, + COL_CPU_BOOK, + COL_CPU_DRAWER, + COL_CPU_CACHE, + COL_CPU_POLARIZATION, + COL_CPU_ADDRESS, + COL_CPU_CONFIGURED, + COL_CPU_ONLINE, + COL_CPU_MHZ, + COL_CPU_SCALMHZ, + COL_CPU_MAXMHZ, + COL_CPU_MINMHZ, +}; + +enum { + COL_CACHE_ALLSIZE, + COL_CACHE_LEVEL, + COL_CACHE_NAME, + COL_CACHE_ONESIZE, + COL_CACHE_TYPE, + COL_CACHE_WAYS, + COL_CACHE_ALLOCPOL, + COL_CACHE_WRITEPOL, + COL_CACHE_PHYLINE, + COL_CACHE_SETS, + COL_CACHE_COHERENCYSIZE +}; + + +/* column description + */ +struct lscpu_coldesc { + const char *name; + const char *help; + + int flags; + unsigned int is_abbr:1; /* name is abbreviation */ + int json_type; +}; + +static struct lscpu_coldesc coldescs_cpu[] = +{ + [COL_CPU_BOGOMIPS] = { "BOGOMIPS", N_("crude measurement of CPU speed"), SCOLS_FL_RIGHT, 1, SCOLS_JSON_NUMBER }, + [COL_CPU_CPU] = { "CPU", N_("logical CPU number"), SCOLS_FL_RIGHT, 1, SCOLS_JSON_NUMBER }, + [COL_CPU_CORE] = { "CORE", N_("logical core number"), SCOLS_FL_RIGHT, 0, SCOLS_JSON_NUMBER }, + [COL_CPU_CLUSTER] = { "CLUSTER", N_("logical cluster number"), SCOLS_FL_RIGHT, 0, SCOLS_JSON_NUMBER }, + [COL_CPU_SOCKET] = { "SOCKET", N_("logical socket number"), SCOLS_FL_RIGHT, 0, SCOLS_JSON_NUMBER }, + [COL_CPU_NODE] = { "NODE", N_("logical NUMA node number"), SCOLS_FL_RIGHT, 0, SCOLS_JSON_NUMBER }, + [COL_CPU_BOOK] = { "BOOK", N_("logical book number"), SCOLS_FL_RIGHT, 0, SCOLS_JSON_NUMBER }, + [COL_CPU_DRAWER] = { "DRAWER", N_("logical drawer number"), SCOLS_FL_RIGHT, SCOLS_JSON_NUMBER }, + [COL_CPU_CACHE] = { "CACHE", N_("shows how caches are shared between CPUs") }, + [COL_CPU_POLARIZATION] = { "POLARIZATION", N_("CPU dispatching mode on virtual hardware") }, + [COL_CPU_ADDRESS] = { "ADDRESS", N_("physical address of a CPU") }, + [COL_CPU_CONFIGURED] = { "CONFIGURED", N_("shows if the hypervisor has allocated the CPU"), 0, 0, SCOLS_JSON_BOOLEAN_OPTIONAL }, + [COL_CPU_ONLINE] = { "ONLINE", N_("shows if Linux currently makes use of the CPU"), SCOLS_FL_RIGHT, 0, SCOLS_JSON_BOOLEAN_OPTIONAL }, + [COL_CPU_MHZ] = { "MHZ", N_("shows the currently MHz of the CPU"), SCOLS_FL_RIGHT, 0, SCOLS_JSON_NUMBER }, + [COL_CPU_SCALMHZ] = { "SCALMHZ%", N_("shows scaling percentage of the CPU frequency"), SCOLS_FL_RIGHT, SCOLS_JSON_NUMBER }, + [COL_CPU_MAXMHZ] = { "MAXMHZ", N_("shows the maximum MHz of the CPU"), SCOLS_FL_RIGHT, 0, SCOLS_JSON_NUMBER }, + [COL_CPU_MINMHZ] = { "MINMHZ", N_("shows the minimum MHz of the CPU"), SCOLS_FL_RIGHT, 0, SCOLS_JSON_NUMBER } +}; + +static struct lscpu_coldesc coldescs_cache[] = +{ + [COL_CACHE_ALLSIZE] = { "ALL-SIZE", N_("size of all system caches"), SCOLS_FL_RIGHT }, + [COL_CACHE_LEVEL] = { "LEVEL", N_("cache level"), SCOLS_FL_RIGHT, 0, SCOLS_JSON_NUMBER }, + [COL_CACHE_NAME] = { "NAME", N_("cache name") }, + [COL_CACHE_ONESIZE] = { "ONE-SIZE", N_("size of one cache"), SCOLS_FL_RIGHT }, + [COL_CACHE_TYPE] = { "TYPE", N_("cache type") }, + [COL_CACHE_WAYS] = { "WAYS", N_("ways of associativity"), SCOLS_FL_RIGHT, 0, SCOLS_JSON_NUMBER }, + [COL_CACHE_ALLOCPOL] = { "ALLOC-POLICY", N_("allocation policy") }, + [COL_CACHE_WRITEPOL] = { "WRITE-POLICY", N_("write policy") }, + [COL_CACHE_PHYLINE] = { "PHY-LINE", N_("number of physical cache line per cache t"), SCOLS_FL_RIGHT, 0, SCOLS_JSON_NUMBER }, + [COL_CACHE_SETS] = { "SETS", N_("number of sets in the cache; set lines has the same cache index"), SCOLS_FL_RIGHT, 0, SCOLS_JSON_NUMBER }, + [COL_CACHE_COHERENCYSIZE] = { "COHERENCY-SIZE", N_("minimum amount of data in bytes transferred from memory to cache"), SCOLS_FL_RIGHT, 0, SCOLS_JSON_NUMBER } +}; + +static int is_term = 0; + +UL_DEBUG_DEFINE_MASK(lscpu); +UL_DEBUG_DEFINE_MASKNAMES(lscpu) = UL_DEBUG_EMPTY_MASKNAMES; + +static void lscpu_init_debug(void) +{ + __UL_INIT_DEBUG_FROM_ENV(lscpu, LSCPU_DEBUG_, 0, LSCPU_DEBUG); +} + +static int +cpu_column_name_to_id(const char *name, size_t namesz) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(coldescs_cpu); i++) { + const char *cn = coldescs_cpu[i].name; + + if (!strncasecmp(name, cn, namesz) && !*(cn + namesz)) + return i; + } + warnx(_("unknown column: %s"), name); + return -1; +} + +static int +cache_column_name_to_id(const char *name, size_t namesz) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(coldescs_cache); i++) { + const char *cn = coldescs_cache[i].name; + + if (!strncasecmp(name, cn, namesz) && !*(cn + namesz)) + return i; + } + warnx(_("unknown column: %s"), name); + return -1; +} + +static void lscpu_context_init_paths(struct lscpu_cxt *cxt) +{ + DBG(MISC, ul_debugobj(cxt, "initialize paths")); + ul_path_init_debug(); + + /* /sys/devices/system/cpu */ + cxt->syscpu = ul_new_path(_PATH_SYS_CPU); + if (!cxt->syscpu) + err(EXIT_FAILURE, _("failed to initialize CPUs sysfs handler")); + if (cxt->prefix) + ul_path_set_prefix(cxt->syscpu, cxt->prefix); + + /* /proc */ + cxt->procfs = ul_new_path("/proc"); + if (!cxt->procfs) + err(EXIT_FAILURE, _("failed to initialize procfs handler")); + if (cxt->prefix) + ul_path_set_prefix(cxt->procfs, cxt->prefix); +} + +static struct lscpu_cxt *lscpu_new_context(void) +{ + return xcalloc(1, sizeof(struct lscpu_cxt)); +} + +static void lscpu_free_context(struct lscpu_cxt *cxt) +{ + size_t i; + + if (!cxt) + return; + + DBG(MISC, ul_debugobj(cxt, "freeing context")); + + DBG(MISC, ul_debugobj(cxt, " de-initialize paths")); + ul_unref_path(cxt->syscpu); + ul_unref_path(cxt->procfs); + + DBG(MISC, ul_debugobj(cxt, " freeing cpus")); + for (i = 0; i < cxt->npossibles; i++) { + lscpu_unref_cpu(cxt->cpus[i]); + cxt->cpus[i] = NULL; + } + DBG(MISC, ul_debugobj(cxt, " freeing types")); + for (i = 0; i < cxt->ncputypes; i++) { + lscpu_unref_cputype(cxt->cputypes[i]); + cxt->cputypes[i] = NULL; + } + + free(cxt->present); + free(cxt->online); + free(cxt->cputypes); + free(cxt->cpus); + + for (i = 0; i < cxt->nvuls; i++) { + free(cxt->vuls[i].name); + free(cxt->vuls[i].text); + } + free(cxt->vuls); + + for (i = 0; i < cxt->nnodes; i++) + free(cxt->nodemaps[i]); + + free(cxt->nodemaps); + free(cxt->idx2nodenum); + + lscpu_free_virtualization(cxt->virt); + lscpu_free_architecture(cxt->arch); + + lscpu_free_caches(cxt->ecaches, cxt->necaches); + lscpu_free_caches(cxt->caches, cxt->ncaches); + + free(cxt); +} + +static void __fill_id( struct lscpu_cxt *cxt, + struct lscpu_cpu *cpu, + int id, cpu_set_t **map, + size_t nitems, + char *buf, size_t bufsz) +{ + *buf = '\0'; + + if (cxt->show_physical) { + if (id < 0) + snprintf(buf, bufsz, "-"); + else + snprintf(buf, bufsz, "%d", id); + } else if (map) { + size_t i; + + if (cpuset_ary_isset(cpu->logical_id, map, nitems, + cxt->setsize, &i) == 0) + snprintf(buf, bufsz, "%zu", i); + } +} + +static void get_cell_boolean( + struct lscpu_cxt *cxt, + int has_data, int data, + char *buf, size_t bufsz) +{ + if (!has_data) + return; + + if (cxt->mode == LSCPU_OUTPUT_PARSABLE || cxt->json) + snprintf(buf, bufsz, "%s", + data ? _("Y") : _("N")); + else + snprintf(buf, bufsz, "%s", + data ? _("yes") : _("no")); +} + +#define fill_id(_cxt, _cpu, NAME, _buf, _bufsz) \ + __fill_id(_cxt, (_cpu), \ + (_cpu)-> NAME ## id, \ + (_cpu)->type-> NAME ## maps, \ + (_cpu)->type->n ## NAME ## s, \ + _buf, _bufsz) + +static char *get_cell_data( + struct lscpu_cxt *cxt, + struct lscpu_cpu *cpu, int col, + char *buf, size_t bufsz) +{ + size_t i; + + *buf = '\0'; + + if (!cpu->type) + return NULL; + + switch (col) { + case COL_CPU_CPU: + snprintf(buf, bufsz, "%d", cpu->logical_id); + break; + case COL_CPU_BOGOMIPS: + if (!cpu->bogomips && !cpu->type->bogomips) + break; + snprintf(buf, bufsz, "%.2f", (float) c_strtod( + cpu->bogomips ? : cpu->type->bogomips, NULL)); + break; + case COL_CPU_CORE: + fill_id(cxt, cpu, core, buf, bufsz); + break; + case COL_CPU_SOCKET: + fill_id(cxt, cpu, socket, buf, bufsz); + break; + case COL_CPU_CLUSTER: + if (cxt->is_cluster) + fill_id(cxt, cpu, socket, buf, bufsz); + break; + case COL_CPU_DRAWER: + fill_id(cxt, cpu, drawer, buf, bufsz); + break; + case COL_CPU_BOOK: + fill_id(cxt, cpu, book, buf, bufsz); + break; + case COL_CPU_NODE: + if (cpuset_ary_isset(cpu->logical_id, cxt->nodemaps, + cxt->nnodes, cxt->setsize, &i) == 0) + snprintf(buf, bufsz, "%d", cxt->idx2nodenum[i]); + break; + case COL_CPU_CACHE: + { + const char *last = NULL; + char *p = buf; + size_t sz = bufsz; + + for (i = 0; i < cxt->ncaches; i++) { + int x; + struct lscpu_cache *ca; + const char *name = cxt->caches[i].name; + + if (last && strcmp(last, name) == 0) + continue; + last = name; + ca = lscpu_cpu_get_cache(cxt, cpu, name); + if (!ca) + continue; + x = snprintf(p, sz, "%d", ca->id); + if (x < 0 || (size_t) x >= sz) + return NULL; + p += x; + sz -= x; + if (sz < 2) + return NULL; + *p++ = cxt->show_compatible ? ',' : ':'; + *p = '\0'; + sz--; + } + if (p > buf && (*(p - 1) == ',' || *(p - 1) == ':')) + *(p - 1) = '\0'; + break; + } + case COL_CPU_POLARIZATION: + if (cpu->polarization < 0) + break; + snprintf(buf, bufsz, "%s", + cxt->mode == LSCPU_OUTPUT_PARSABLE ? + polar_modes[cpu->polarization].parsable : + polar_modes[cpu->polarization].readable); + break; + case COL_CPU_ADDRESS: + if (cpu->address < 0) + break; + snprintf(buf, bufsz, "%d", cpu->address); + break; + case COL_CPU_CONFIGURED: + get_cell_boolean(cxt, cpu->configured >= 0, cpu->configured, buf, bufsz); + break; + case COL_CPU_ONLINE: + get_cell_boolean(cxt, !!cxt->online, is_cpu_online(cxt, cpu), buf, bufsz); + break; + case COL_CPU_MHZ: + if (cpu->mhz_cur_freq) + snprintf(buf, bufsz, "%.4f", cpu->mhz_cur_freq); + break; + case COL_CPU_SCALMHZ: + if (cpu->mhz_cur_freq && cpu->mhz_max_freq) + snprintf(buf, bufsz, "%.0f%%", cpu->mhz_cur_freq / cpu->mhz_max_freq * 100); + break; + case COL_CPU_MAXMHZ: + if (cpu->mhz_max_freq) + snprintf(buf, bufsz, "%.4f", cpu->mhz_max_freq); + break; + case COL_CPU_MINMHZ: + if (cpu->mhz_min_freq) + snprintf(buf, bufsz, "%.4f", cpu->mhz_min_freq); + break; + } + return buf; +} + +static char *get_cell_header( + struct lscpu_cxt *cxt, int col, + char *buf, size_t bufsz) +{ + *buf = '\0'; + + if (col == COL_CPU_CACHE) { + const char *last = NULL; + char *p = buf; + size_t sz = bufsz; + size_t i; + + for (i = 0; i < cxt->ncaches; i++) { + struct lscpu_cache *ca = &cxt->caches[i]; + int x; + + if (last && strcmp(last, ca->name) == 0) + continue; + last = ca->name; + + x = snprintf(p, sz, "%s", ca->name); + if (x < 0 || (size_t) x >= sz) + return NULL; + sz -= x; + p += x; + if (sz < 2) + return NULL; + *p++ = cxt->show_compatible ? ',' : ':'; + *p = '\0'; + sz--; + } + if (p > buf && (*(p - 1) == ',' || *(p - 1) == ':')) + *(p - 1) = '\0'; + if (cxt->ncaches) + return buf; + } + snprintf(buf, bufsz, "%s", coldescs_cpu[col].name); + return buf; +} + + +static void caches_add_line(struct lscpu_cxt *cxt, + struct libscols_table *tb, + struct lscpu_cache *ca, + int cols[], size_t ncols) +{ + struct libscols_line *ln; + size_t c; + + ln = scols_table_new_line(tb, NULL); + if (!ln) + err(EXIT_FAILURE, _("failed to allocate output line")); + + for (c = 0; c < ncols; c++) { + char *data = NULL; + int col = cols[c]; + + switch (col) { + case COL_CACHE_NAME: + if (ca->name) + data = xstrdup(ca->name); + break; + case COL_CACHE_ONESIZE: + if (!ca->size) + break; + if (cxt->bytes) + xasprintf(&data, "%" PRIu64, ca->size); + else + data = size_to_human_string(SIZE_SUFFIX_1LETTER, ca->size); + break; + case COL_CACHE_ALLSIZE: + { + uint64_t sz = 0; + if (ca->name) + sz = lscpu_get_cache_full_size(cxt, ca->name, NULL); + if (!sz) + break; + if (cxt->bytes) + xasprintf(&data, "%" PRIu64, sz); + else + data = size_to_human_string(SIZE_SUFFIX_1LETTER, sz); + break; + } + case COL_CACHE_WAYS: + if (ca->ways_of_associativity) + xasprintf(&data, "%u", ca->ways_of_associativity); + break; + + case COL_CACHE_TYPE: + if (ca->type) + data = xstrdup(ca->type); + break; + case COL_CACHE_LEVEL: + if (ca->level) + xasprintf(&data, "%d", ca->level); + break; + case COL_CACHE_ALLOCPOL: + if (ca->allocation_policy) + data = xstrdup(ca->allocation_policy); + break; + case COL_CACHE_WRITEPOL: + if (ca->write_policy) + data = xstrdup(ca->write_policy); + break; + case COL_CACHE_PHYLINE: + if (ca->physical_line_partition) + xasprintf(&data, "%u", ca->physical_line_partition); + break; + case COL_CACHE_SETS: + if (ca->number_of_sets) + xasprintf(&data, "%u", ca->number_of_sets); + break; + case COL_CACHE_COHERENCYSIZE: + if (ca->coherency_line_size) + xasprintf(&data, "%u", ca->coherency_line_size); + break; + } + + if (data && scols_line_refer_data(ln, c, data)) + err(EXIT_FAILURE, _("failed to add output data")); + } +} + + +/* + * [-C] backend + */ +static void print_caches_readable(struct lscpu_cxt *cxt, int cols[], size_t ncols) +{ + size_t i; + struct libscols_table *tb; + const char *last = NULL; + + scols_init_debug(0); + + tb = scols_new_table(); + if (!tb) + err(EXIT_FAILURE, _("failed to allocate output table")); + if (cxt->json) { + scols_table_enable_json(tb, 1); + scols_table_set_name(tb, "caches"); + } + + for (i = 0; i < ncols; i++) { + struct lscpu_coldesc *cd = &coldescs_cache[cols[i]]; + struct libscols_column *cl; + + cl = scols_table_new_column(tb, cd->name, 0, cd->flags); + if (cl == NULL) + err(EXIT_FAILURE, _("failed to allocate output column")); + if (cxt->json) + scols_column_set_json_type(cl, cd->json_type); + } + + /* standard caches */ + for (i = 0; i < cxt->ncaches; i++) { + struct lscpu_cache *ca = &cxt->caches[i]; + + if (last && strcmp(last, ca->name) == 0) + continue; + last = ca->name; + caches_add_line(cxt, tb, ca, cols, ncols); + } + + /* extra caches */ + for (i = 0; i < cxt->necaches; i++) { + struct lscpu_cache *ca = &cxt->ecaches[i]; + + if (last && strcmp(last, ca->name) == 0) + continue; + last = ca->name; + caches_add_line(cxt, tb, ca, cols, ncols); + } + + scols_print_table(tb); + scols_unref_table(tb); +} + +/* + * [-p] backend, we support two parsable formats: + * + * 1) "compatible" -- this format is compatible with the original lscpu(1) + * output and it contains fixed set of the columns. The CACHE columns are at + * the end of the line and the CACHE is not printed if the number of the caches + * is zero. The CACHE columns are separated by two commas, for example: + * + * $ lscpu --parse + * # CPU,Core,Socket,Node,,L1d,L1i,L2 + * 0,0,0,0,,0,0,0 + * 1,1,0,0,,1,1,0 + * + * 2) "user defined output" -- this format prints always all columns without + * special prefix for CACHE column. If there are not CACHEs then the column is + * empty and the header "Cache" is printed rather than a real name of the cache. + * The CACHE columns are separated by ':'. + * + * $ lscpu --parse=CPU,CORE,SOCKET,NODE,CACHE + * # CPU,Core,Socket,Node,L1d:L1i:L2 + * 0,0,0,0,0:0:0 + * 1,1,0,0,1:1:0 + */ +static void print_cpus_parsable(struct lscpu_cxt *cxt, int cols[], size_t ncols) +{ + char buf[BUFSIZ], *data; + size_t i; + + /* + * Header + */ + printf(_( + "# The following is the parsable format, which can be fed to other\n" + "# programs. Each different item in every column has an unique ID\n" + "# starting usually from zero.\n")); + + fputs("# ", stdout); + for (i = 0; i < ncols; i++) { + int col = cols[i]; + + if (col == COL_CPU_CACHE) { + if (cxt->show_compatible && !cxt->ncaches) + continue; + if (cxt->show_compatible && i != 0) + putchar(','); + } + if (i > 0) + putchar(','); + + data = get_cell_header(cxt, col, buf, sizeof(buf)); + if (data && * data && col != COL_CPU_CACHE && + !coldescs_cpu[col].is_abbr) { + /* + * For normal column names use mixed case (e.g. "Socket") + */ + char *p = data + 1; + + while (p && *p != '\0') { + *p = tolower((unsigned int) *p); + p++; + } + } + fputs(data && *data ? data : "", stdout); + } + putchar('\n'); + + /* + * Data + */ + for (i = 0; i < cxt->npossibles; i++) { + struct lscpu_cpu *cpu = cxt->cpus[i]; + size_t c; + + if (cxt->online) { + if (!cxt->show_offline && !is_cpu_online(cxt, cpu)) + continue; + if (!cxt->show_online && is_cpu_online(cxt, cpu)) + continue; + } + if (cxt->present && !is_cpu_present(cxt, cpu)) + continue; + + for (c = 0; c < ncols; c++) { + if (cxt->show_compatible && cols[c] == COL_CPU_CACHE) { + if (!cxt->ncaches) + continue; + if (c > 0) + putchar(','); + } + if (c > 0) + putchar(','); + + data = get_cell_data(cxt, cpu, cols[c], buf, sizeof(buf)); + fputs(data && *data ? data : "", stdout); + *buf = '\0'; + } + putchar('\n'); + } +} + +/* + * [-e] backend + */ +static void print_cpus_readable(struct lscpu_cxt *cxt, int cols[], size_t ncols) +{ + size_t i; + char buf[BUFSIZ]; + const char *data; + struct libscols_table *tb; + + scols_init_debug(0); + + tb = scols_new_table(); + if (!tb) + err(EXIT_FAILURE, _("failed to allocate output table")); + if (cxt->json) { + scols_table_enable_json(tb, 1); + scols_table_set_name(tb, "cpus"); + } + + for (i = 0; i < ncols; i++) { + data = get_cell_header(cxt, cols[i], buf, sizeof(buf)); + struct lscpu_coldesc *cd = &coldescs_cpu[cols[i]]; + struct libscols_column *cl; + + cl = scols_table_new_column(tb, data, 0, cd->flags); + if (cl == NULL) + err(EXIT_FAILURE, _("failed to allocate output column")); + if (cxt->json) + scols_column_set_json_type(cl, cd->json_type); + } + + for (i = 0; i < cxt->npossibles; i++) { + size_t c; + struct libscols_line *ln; + struct lscpu_cpu *cpu = cxt->cpus[i]; + + if (cxt->online) { + if (!cxt->show_offline && !is_cpu_online(cxt, cpu)) + continue; + if (!cxt->show_online && is_cpu_online(cxt, cpu)) + continue; + } + + if (cxt->present && !is_cpu_present(cxt, cpu)) + continue; + + ln = scols_table_new_line(tb, NULL); + if (!ln) + err(EXIT_FAILURE, _("failed to allocate output line")); + + for (c = 0; c < ncols; c++) { + data = get_cell_data(cxt, cpu, cols[c], buf, sizeof(buf)); + if (!data || !*data) + data = "-"; + if (scols_line_set_data(ln, c, data)) + err(EXIT_FAILURE, _("failed to add output data")); + } + } + + scols_print_table(tb); + scols_unref_table(tb); +} + +static struct libscols_line * + __attribute__ ((__format__(printf, 4, 5))) + add_summary_sprint(struct libscols_table *tb, + struct libscols_line *sec, + const char *txt, + const char *fmt, + ...) +{ + struct libscols_line *ln; + va_list args; + + /* Don't print section lines without data on non-terminal output */ + if (!is_term && fmt == NULL) + return NULL; + + ln = scols_table_new_line(tb, sec); + if (!ln) + err(EXIT_FAILURE, _("failed to allocate output line")); + + /* description column */ + if (txt && scols_line_set_data(ln, 0, txt)) + err(EXIT_FAILURE, _("failed to add output data")); + + /* data column */ + if (fmt) { + char *data; + va_start(args, fmt); + xvasprintf(&data, fmt, args); + va_end(args); + + if (data && scols_line_refer_data(ln, 1, data)) + err(EXIT_FAILURE, _("failed to add output data")); + } + + return ln; +} + +#define add_summary_e(tb, sec, txt) add_summary_sprint(tb, sec, txt, NULL) +#define add_summary_n(tb, sec, txt, num) add_summary_sprint(tb, sec, txt, "%zu", num) +#define add_summary_s(tb, sec, txt, str) add_summary_sprint(tb, sec, txt, "%s", str) +#define add_summary_x(tb, sec, txt, fmt, x) add_summary_sprint(tb, sec, txt, fmt, x) + +static void +print_cpuset(struct lscpu_cxt *cxt, + struct libscols_table *tb, + struct libscols_line *sec, + const char *key, cpu_set_t *set) +{ + size_t setbuflen = 7 * cxt->maxcpus; + char setbuf[setbuflen], *p; + + assert(set); + assert(key); + assert(tb); + assert(cxt); + + if (cxt->hex) { + p = cpumask_create(setbuf, setbuflen, set, cxt->setsize); + add_summary_s(tb, sec, key, p); + } else { + p = cpulist_create(setbuf, setbuflen, set, cxt->setsize); + add_summary_s(tb, sec, key, p); + } +} + +static void +print_summary_cputype(struct lscpu_cxt *cxt, + struct lscpu_cputype *ct, + struct libscols_table *tb, + struct libscols_line *sec) +{ + sec = add_summary_s(tb, sec, _("Model name:"), ct->modelname ? ct->modelname : "-"); + if (ct->bios_modelname) + add_summary_s(tb, sec, _("BIOS Model name:"), ct->bios_modelname); + if (ct->bios_family) + add_summary_s(tb, sec, _("BIOS CPU family:"), ct->bios_family); + if (ct->machinetype) + add_summary_s(tb, sec, _("Machine type:"), ct->machinetype); + if (ct->family) + add_summary_s(tb, sec, _("CPU family:"), ct->family); + if (ct->model || ct->revision) + add_summary_s(tb, sec, _("Model:"), ct->revision ? ct->revision : ct->model); + + add_summary_n(tb, sec, _("Thread(s) per core:"), ct->nthreads_per_core); + if (cxt->is_cluster) + add_summary_n(tb, sec, _("Core(s) per cluster:"), ct->ncores_per_socket); + else + add_summary_n(tb, sec, _("Core(s) per socket:"), ct->ncores_per_socket); + + if (ct->nbooks) { + add_summary_n(tb, sec, _("Socket(s) per book:"), ct->nsockets_per_book); + if (ct->ndrawers_per_system || ct->ndrawers) { + add_summary_n(tb, sec, _("Book(s) per drawer:"), ct->nbooks_per_drawer); + add_summary_n(tb, sec, _("Drawer(s):"), ct->ndrawers_per_system ?: ct->ndrawers); + } else + add_summary_n(tb, sec, _("Book(s):"), ct->nbooks_per_drawer ?: ct->nbooks); + } else { + if (cxt->is_cluster) { + if (ct->nr_socket_on_cluster > 0) + add_summary_n(tb, sec, _("Socket(s):"), ct->nr_socket_on_cluster); + else + add_summary_s(tb, sec, _("Socket(s):"), "-"); + + add_summary_n(tb, sec, _("Cluster(s):"), + ct->nsockets_per_book ?: ct->nsockets); + } else + add_summary_n(tb, sec, _("Socket(s):"), + ct->nsockets_per_book ?: ct->nsockets); + } + + if (ct->stepping) + add_summary_s(tb, sec, _("Stepping:"), ct->stepping); + if (ct->freqboost >= 0) + add_summary_s(tb, sec, _("Frequency boost:"), ct->freqboost ? + _("enabled") : _("disabled")); + + /* s390 -- from the first CPU where is dynamic/static MHz */ + if (ct->dynamic_mhz) + add_summary_s(tb, sec, _("CPU dynamic MHz:"), ct->dynamic_mhz); + if (ct->static_mhz) + add_summary_s(tb, sec, _("CPU static MHz:"), ct->static_mhz); + + if (ct->has_freq) { + float scal = lsblk_cputype_get_scalmhz(cxt, ct); + if (scal > 0.0) + add_summary_x(tb, sec, _("CPU(s) scaling MHz:"), "%.0f%%", scal); + add_summary_x(tb, sec, _("CPU max MHz:"), "%.4f", lsblk_cputype_get_maxmhz(cxt, ct)); + add_summary_x(tb, sec, _("CPU min MHz:"), "%.4f", lsblk_cputype_get_minmhz(cxt, ct)); + } + if (ct->bogomips) + add_summary_x(tb, sec, _("BogoMIPS:"), "%.2f", (float) c_strtod(ct->bogomips, NULL)); + + if (ct->dispatching >= 0) + add_summary_s(tb, sec, _("Dispatching mode:"), _(disp_modes[ct->dispatching])); + + if (ct->physsockets) { + add_summary_n(tb, sec, _("Physical sockets:"), ct->physsockets); + add_summary_n(tb, sec, _("Physical chips:"), ct->physchips); + add_summary_n(tb, sec, _("Physical cores/chip:"), ct->physcoresperchip); + } + + if (ct->flags) + add_summary_s(tb, sec, _("Flags:"), ct->flags); +} + +/* + * default output + */ +static void print_summary(struct lscpu_cxt *cxt) +{ + struct lscpu_cputype *ct; + char field[256]; + size_t i = 0; + struct libscols_table *tb; + struct libscols_line *sec = NULL; + int hdr_caches = 0; + + scols_init_debug(0); + + tb = scols_new_table(); + if (!tb) + err(EXIT_FAILURE, _("failed to allocate output table")); + + scols_table_enable_noheadings(tb, 1); + if (cxt->json) { + scols_table_enable_json(tb, 1); + scols_table_set_name(tb, "lscpu"); + } else if (is_term) { + struct libscols_symbols *sy = scols_new_symbols(); + + if (!sy) + err_oom(); + scols_symbols_set_branch(sy, " "); + scols_symbols_set_vertical(sy, " "); + scols_symbols_set_right(sy, " "); + scols_table_set_symbols(tb, sy); + scols_unref_symbols(sy); + } + + if (scols_table_new_column(tb, "field", 0, is_term ? SCOLS_FL_TREE : 0) == NULL || + scols_table_new_column(tb, "data", 0, SCOLS_FL_NOEXTREMES | SCOLS_FL_WRAP) == NULL) + err(EXIT_FAILURE, _("failed to initialize output column")); + + ct = lscpu_cputype_get_default(cxt); + + /* Section: architecture */ + if (cxt->arch) + sec = add_summary_s(tb, NULL, _("Architecture:"), cxt->arch->name); + if (cxt->arch && (cxt->arch->bit32 || cxt->arch->bit64)) { + char buf[32], *p = buf; + + if (cxt->arch->bit32) { + strcpy(p, "32-bit, "); + p += 8; + } + if (cxt->arch->bit64) { + strcpy(p, "64-bit, "); + p += 8; + } + *(p - 2) = '\0'; + add_summary_s(tb, sec, _("CPU op-mode(s):"), buf); + } + if (ct && ct->addrsz) + add_summary_s(tb, sec, _("Address sizes:"), ct->addrsz); +#if !defined(WORDS_BIGENDIAN) + add_summary_s(tb, sec, _("Byte Order:"), "Little Endian"); +#else + add_summary_s(tb, sec, _("Byte Order:"), "Big Endian"); +#endif + + /* Section: CPU lists */ + sec = add_summary_n(tb, NULL, _("CPU(s):"), cxt->npresents); + + if (cxt->online) + print_cpuset(cxt, tb, sec, + cxt->hex ? _("On-line CPU(s) mask:") : + _("On-line CPU(s) list:"), + cxt->online); + + if (cxt->online && cxt->nonlines != cxt->npresents) { + cpu_set_t *set; + + /* Linux kernel provides cpuset of off-line CPUs that contains + * all configured CPUs (see /sys/devices/system/cpu/offline), + * but want to print real (present in system) off-line CPUs only. + */ + set = cpuset_alloc(cxt->maxcpus, NULL, NULL); + if (!set) + err(EXIT_FAILURE, _("failed to callocate cpu set")); + CPU_ZERO_S(cxt->setsize, set); + for (i = 0; i < cxt->npossibles; i++) { + struct lscpu_cpu *cpu = cxt->cpus[i]; + + if (cpu && is_cpu_present(cxt, cpu) && !is_cpu_online(cxt, cpu)) + CPU_SET_S(cpu->logical_id, cxt->setsize, set); + } + print_cpuset(cxt, tb, sec, + cxt->hex ? _("Off-line CPU(s) mask:") : + _("Off-line CPU(s) list:"), set); + cpuset_free(set); + } + sec = NULL; + + /* Section: cpu type description */ + if (ct && ct->vendor) + sec = add_summary_s(tb, NULL, _("Vendor ID:"), ct->vendor); + if (ct && ct->bios_vendor) + add_summary_s(tb, sec, _("BIOS Vendor ID:"), ct->bios_vendor); + + for (i = 0; i < cxt->ncputypes; i++) + print_summary_cputype(cxt, cxt->cputypes[i], tb, sec); + sec = NULL; + + /* Section: vitualiazation */ + if (cxt->virt) { + sec = add_summary_e(tb, NULL, _("Virtualization features:")); + if (cxt->virt->cpuflag && !strcmp(cxt->virt->cpuflag, "svm")) + add_summary_s(tb, sec, _("Virtualization:"), "AMD-V"); + else if (cxt->virt->cpuflag && !strcmp(cxt->virt->cpuflag, "vmx")) + add_summary_s(tb, sec, _("Virtualization:"), "VT-x"); + + if (cxt->virt->hypervisor) + add_summary_s(tb, sec, _("Hypervisor:"), cxt->virt->hypervisor); + if (cxt->virt->vendor) { + add_summary_s(tb, sec, _("Hypervisor vendor:"), hv_vendors[cxt->virt->vendor]); + add_summary_s(tb, sec, _("Virtualization type:"), _(virt_types[cxt->virt->type])); + } + sec = NULL; + } + + /* Section: caches */ + if (cxt->ncaches) { + const char *last = NULL; + + /* The caches are sorted by name, cxt->caches[] may contains + * multiple instances for the same name. + */ + for (i = 0; i < cxt->ncaches; i++) { + const char *name = cxt->caches[i].name; + uint64_t sz; + int n = 0; + + if (last && strcmp(last, name) == 0) + continue; + sz = lscpu_get_cache_full_size(cxt, name, &n); + if (!sz) + continue; + if (!hdr_caches) { + sec = add_summary_e(tb, NULL, _("Caches (sum of all):")); + hdr_caches = 1; + } + + snprintf(field, sizeof(field), is_term ? _("%s:") : _("%s cache:"), name); + if (cxt->bytes) + add_summary_sprint(tb, sec, field, + P_("%" PRIu64 " (%d instance)", + "%" PRIu64 " (%d instances)", n), + sz, n); + else { + char *tmp = size_to_human_string( + SIZE_SUFFIX_3LETTER | + SIZE_SUFFIX_SPACE, + sz); + add_summary_sprint(tb, sec, field, + P_("%s (%d instance)", + "%s (%d instances)", n), + tmp, n); + free(tmp); + } + last = name; + } + } + + for (i = 0; i < cxt->necaches; i++) { + struct lscpu_cache *ca = &cxt->ecaches[i]; + + if (ca->size == 0) + continue; + if (!hdr_caches) { + sec = add_summary_e(tb, NULL, _("Caches:")); + hdr_caches = 1; + } + snprintf(field, sizeof(field), is_term ? _("%s:") : _("%s cache:"), ca->name); + if (cxt->bytes) + add_summary_x(tb, sec, field, "%" PRIu64, ca->size); + else { + char *tmp = size_to_human_string( + SIZE_SUFFIX_3LETTER | + SIZE_SUFFIX_SPACE, + ca->size); + add_summary_s(tb, sec, field, tmp); + free(tmp); + } + } + sec = NULL; + + /* Section: NUMA modes */ + if (cxt->nnodes) { + sec = add_summary_e(tb, NULL, _("NUMA:")); + + add_summary_n(tb, sec,_("NUMA node(s):"), cxt->nnodes); + for (i = 0; i < cxt->nnodes; i++) { + snprintf(field, sizeof(field), _("NUMA node%d CPU(s):"), cxt->idx2nodenum[i]); + print_cpuset(cxt, tb, sec, field, cxt->nodemaps[i]); + } + sec = NULL; + } + + /* Section: Vulnerabilities */ + if (cxt->vuls) { + sec = add_summary_e(tb, NULL, _("Vulnerabilities:")); + + for (i = 0; i < cxt->nvuls; i++) { + snprintf(field, sizeof(field), is_term ? + _("%s:") : _("Vulnerability %s:"), cxt->vuls[i].name); + add_summary_s(tb, sec, field, cxt->vuls[i].text); + } + sec = NULL; + } + scols_print_table(tb); + scols_unref_table(tb); +} + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + size_t i; + + fputs(USAGE_HEADER, out); + fprintf(out, _(" %s [options]\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Display information about the CPU architecture.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -a, --all print both online and offline CPUs (default for -e)\n"), out); + fputs(_(" -b, --online print online CPUs only (default for -p)\n"), out); + fputs(_(" -B, --bytes print sizes in bytes rather than in human readable format\n"), out); + fputs(_(" -C, --caches[=<list>] info about caches in extended readable format\n"), out); + fputs(_(" -c, --offline print offline CPUs only\n"), out); + fputs(_(" -J, --json use JSON for default or extended format\n"), out); + fputs(_(" -e, --extended[=<list>] print out an extended readable format\n"), out); + fputs(_(" -p, --parse[=<list>] print out a parsable format\n"), out); + fputs(_(" -s, --sysroot <dir> use specified directory as system root\n"), out); + fputs(_(" -x, --hex print hexadecimal masks rather than lists of CPUs\n"), out); + fputs(_(" -y, --physical print physical instead of logical IDs\n"), out); + fputs(_(" --output-all print all available columns for -e, -p or -C\n"), out); + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(25)); + + fputs(_("\nAvailable output columns for -e or -p:\n"), out); + for (i = 0; i < ARRAY_SIZE(coldescs_cpu); i++) + fprintf(out, " %13s %s\n", coldescs_cpu[i].name, _(coldescs_cpu[i].help)); + + fputs(_("\nAvailable output columns for -C:\n"), out); + for (i = 0; i < ARRAY_SIZE(coldescs_cache); i++) + fprintf(out, " %13s %s\n", coldescs_cache[i].name, _(coldescs_cache[i].help)); + + printf(USAGE_MAN_TAIL("lscpu(1)")); + + exit(EXIT_SUCCESS); +} + +int main(int argc, char *argv[]) +{ + struct lscpu_cxt *cxt; + int c, all = 0; + int columns[ARRAY_SIZE(coldescs_cpu)]; + int cpu_modifier_specified = 0; + char *outarg = NULL; + size_t i, ncolumns = 0; + enum { + OPT_OUTPUT_ALL = CHAR_MAX + 1, + }; + static const struct option longopts[] = { + { "all", no_argument, NULL, 'a' }, + { "online", no_argument, NULL, 'b' }, + { "bytes", no_argument, NULL, 'B' }, + { "caches", optional_argument, NULL, 'C' }, + { "offline", no_argument, NULL, 'c' }, + { "help", no_argument, NULL, 'h' }, + { "extended", optional_argument, NULL, 'e' }, + { "json", no_argument, NULL, 'J' }, + { "parse", optional_argument, NULL, 'p' }, + { "sysroot", required_argument, NULL, 's' }, + { "physical", no_argument, NULL, 'y' }, + { "hex", no_argument, NULL, 'x' }, + { "version", no_argument, NULL, 'V' }, + { "output-all", no_argument, NULL, OPT_OUTPUT_ALL }, + { NULL, 0, NULL, 0 } + }; + + static const ul_excl_t excl[] = { /* rows and cols in ASCII order */ + { 'C','e','p' }, + { 'a','b','c' }, + { 0 } + }; + int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + cxt = lscpu_new_context(); + + while ((c = getopt_long(argc, argv, "aBbC::ce::hJp::s:xyV", longopts, NULL)) != -1) { + + err_exclusive_options(c, longopts, excl, excl_st); + + switch (c) { + case 'a': + cxt->show_online = cxt->show_offline = 1; + cpu_modifier_specified = 1; + break; + case 'B': + cxt->bytes = 1; + break; + case 'b': + cxt->show_online = 1; + cpu_modifier_specified = 1; + break; + case 'c': + cxt->show_offline = 1; + cpu_modifier_specified = 1; + break; + case 'C': + if (optarg) { + if (*optarg == '=') + optarg++; + outarg = optarg; + } + cxt->mode = LSCPU_OUTPUT_CACHES; + break; + case 'J': + cxt->json = 1; + break; + case 'p': + case 'e': + if (optarg) { + if (*optarg == '=') + optarg++; + outarg = optarg; + } + cxt->mode = c == 'p' ? LSCPU_OUTPUT_PARSABLE : LSCPU_OUTPUT_READABLE; + break; + case 's': + cxt->prefix = optarg; + cxt->noalive = 1; + break; + case 'x': + cxt->hex = 1; + break; + case 'y': + cxt->show_physical = 1; + break; + case OPT_OUTPUT_ALL: + all = 1; + break; + + case 'h': + usage(); + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if (all && ncolumns == 0) { + size_t maxsz = cxt->mode == LSCPU_OUTPUT_CACHES ? + ARRAY_SIZE(coldescs_cache) : + ARRAY_SIZE(coldescs_cpu); + + for (i = 0; i < maxsz; i++) + columns[ncolumns++] = i; + } + + if (cpu_modifier_specified && cxt->mode == LSCPU_OUTPUT_SUMMARY) { + fprintf(stderr, + _("%s: options --all, --online and --offline may only " + "be used with options --extended or --parse.\n"), + program_invocation_short_name); + return EXIT_FAILURE; + } + + if (argc != optind) { + warnx(_("bad usage")); + errtryhelp(EXIT_FAILURE); + } + + /* set default cpu display mode if none was specified */ + if (!cxt->show_online && !cxt->show_offline) { + cxt->show_online = 1; + cxt->show_offline = cxt->mode == LSCPU_OUTPUT_READABLE ? 1 : 0; + } + + is_term = isatty(STDOUT_FILENO); /* global variable */ + + lscpu_init_debug(); + + lscpu_context_init_paths(cxt); + + lscpu_read_cpulists(cxt); + lscpu_read_cpuinfo(cxt); + cxt->arch = lscpu_read_architecture(cxt); + + lscpu_read_archext(cxt); + lscpu_read_vulnerabilities(cxt); + lscpu_read_numas(cxt); + lscpu_read_topology(cxt); + + lscpu_decode_arm(cxt); + + cxt->virt = lscpu_read_virtualization(cxt); + + switch(cxt->mode) { + case LSCPU_OUTPUT_SUMMARY: + print_summary(cxt); + break; + case LSCPU_OUTPUT_CACHES: + if (!ncolumns) { + columns[ncolumns++] = COL_CACHE_NAME; + columns[ncolumns++] = COL_CACHE_ONESIZE; + columns[ncolumns++] = COL_CACHE_ALLSIZE; + columns[ncolumns++] = COL_CACHE_WAYS; + columns[ncolumns++] = COL_CACHE_TYPE; + columns[ncolumns++] = COL_CACHE_LEVEL; + columns[ncolumns++] = COL_CACHE_SETS; + columns[ncolumns++] = COL_CACHE_PHYLINE; + columns[ncolumns++] = COL_CACHE_COHERENCYSIZE; + } + if (outarg && string_add_to_idarray(outarg, columns, + ARRAY_SIZE(columns), + &ncolumns, cache_column_name_to_id) < 0) + return EXIT_FAILURE; + + print_caches_readable(cxt, columns, ncolumns); + break; + case LSCPU_OUTPUT_READABLE: + if (!ncolumns) { + /* No list was given. Just print whatever is there. */ + struct lscpu_cputype *ct = lscpu_cputype_get_default(cxt); + + columns[ncolumns++] = COL_CPU_CPU; + if (cxt->nnodes) + columns[ncolumns++] = COL_CPU_NODE; + if (ct && ct->ndrawers) + columns[ncolumns++] = COL_CPU_DRAWER; + if (ct && ct->nbooks) + columns[ncolumns++] = COL_CPU_BOOK; + if (ct && ct->nsockets) { + if (cxt->is_cluster) + columns[ncolumns++] = COL_CPU_CLUSTER; + else + columns[ncolumns++] = COL_CPU_SOCKET; + } + if (ct && ct->ncores) + columns[ncolumns++] = COL_CPU_CORE; + if (cxt->ncaches) + columns[ncolumns++] = COL_CPU_CACHE; + if (cxt->online) + columns[ncolumns++] = COL_CPU_ONLINE; + if (ct && ct->has_configured) + columns[ncolumns++] = COL_CPU_CONFIGURED; + if (ct && ct->has_polarization) + columns[ncolumns++] = COL_CPU_POLARIZATION; + if (ct && ct->has_addresses) + columns[ncolumns++] = COL_CPU_ADDRESS; + if (ct && ct->has_freq) { + columns[ncolumns++] = COL_CPU_MAXMHZ; + columns[ncolumns++] = COL_CPU_MINMHZ; + columns[ncolumns++] = COL_CPU_MHZ; + } + } + if (outarg && string_add_to_idarray(outarg, columns, + ARRAY_SIZE(columns), + &ncolumns, cpu_column_name_to_id) < 0) + return EXIT_FAILURE; + print_cpus_readable(cxt, columns, ncolumns); + break; + case LSCPU_OUTPUT_PARSABLE: + if (!ncolumns) { + columns[ncolumns++] = COL_CPU_CPU; + columns[ncolumns++] = COL_CPU_CORE; + if (cxt->is_cluster) + columns[ncolumns++] = COL_CPU_CLUSTER; + else + columns[ncolumns++] = COL_CPU_SOCKET; + columns[ncolumns++] = COL_CPU_NODE; + columns[ncolumns++] = COL_CPU_CACHE; + cxt->show_compatible = 1; + } + if (outarg && string_add_to_idarray(outarg, columns, + ARRAY_SIZE(columns), + &ncolumns, cpu_column_name_to_id) < 0) + return EXIT_FAILURE; + + print_cpus_parsable(cxt, columns, ncolumns); + break; + } + + lscpu_free_context(cxt); + + return EXIT_SUCCESS; +} diff --git a/sys-utils/lscpu.h b/sys-utils/lscpu.h new file mode 100644 index 0000000..8cb0d63 --- /dev/null +++ b/sys-utils/lscpu.h @@ -0,0 +1,334 @@ +#ifndef LSCPU_H +#define LSCPU_H + +#include "c.h" +#include "nls.h" +#include "cpuset.h" +#include "xalloc.h" +#include "strutils.h" +#include "bitops.h" +#include "path.h" +#include "pathnames.h" +#include "all-io.h" +#include "debug.h" + +#define LSCPU_DEBUG_INIT (1 << 1) +#define LSCPU_DEBUG_MISC (1 << 2) +#define LSCPU_DEBUG_GATHER (1 << 3) +#define LSCPU_DEBUG_TYPE (1 << 4) +#define LSCPU_DEBUG_CPU (1 << 5) +#define LSCPU_DEBUG_VIRT (1 << 6) +#define LSBLK_DEBUG_ALL 0xFFFF + +UL_DEBUG_DECLARE_MASK(lscpu); +#define DBG(m, x) __UL_DBG(lscpu, LSCPU_DEBUG_, m, x) +#define ON_DBG(m, x) __UL_DBG_CALL(lscpu, LSCPU_DEBUG_, m, x) + +#define UL_DEBUG_CURRENT_MASK UL_DEBUG_MASK(lscpu) +#include "debugobj.h" + +#define _PATH_SYS_SYSTEM "/sys/devices/system" +#define _PATH_SYS_HYP_FEATURES "/sys/hypervisor/properties/features" +#define _PATH_SYS_CPU _PATH_SYS_SYSTEM "/cpu" +#define _PATH_SYS_NODE _PATH_SYS_SYSTEM "/node" +#define _PATH_SYS_DMI "/sys/firmware/dmi/tables/DMI" +#define _PATH_ACPI_PPTT "/sys/firmware/acpi/tables/PPTT" + +struct lscpu_cache { + int id; /* unique identifier */ + int nth; /* cache<number> from cpuinfo */ + char *name; + char *type; + char *allocation_policy; + char *write_policy; + + int level; + uint64_t size; + + unsigned int ways_of_associativity; + unsigned int physical_line_partition; + unsigned int number_of_sets; + unsigned int coherency_line_size; + + cpu_set_t *sharedmap; +}; + +struct lscpu_cputype { + int refcount; + + char *vendor; + int vendor_id; /* created by lscpu_decode_arm() */ + char *bios_vendor; /* aarch64 */ + char *machinetype; /* s390 */ + char *family; + char *model; + char *modelname; + char *bios_modelname; /* aarch64 */ + char *bios_family; /* aarch64 */ + char *revision; /* alternative for model (ppc) */ + char *stepping; + char *bogomips; + char *flags; + char *mtid; /* maximum thread id (s390) */ + char *addrsz; /* address sizes */ + int dispatching; /* -1 if not evailable, DIST_* */ + int freqboost; /* -1 if not evailable */ + + size_t physsockets; /* Physical sockets (modules) */ + size_t physchips; /* Physical chips */ + size_t physcoresperchip; /* Physical cores per chip */ + + size_t nthreads_per_core; + size_t ncores_per_socket; + size_t nsockets_per_book; + size_t nbooks_per_drawer; + size_t ndrawers_per_system; + + char *dynamic_mhz; /* s390; copy from the first CPU */ + char *static_mhz; /* s390; copy from the first CPU */ + + /* siblings maps */ + size_t ncores; + cpu_set_t **coremaps; + size_t nsockets; + cpu_set_t **socketmaps; + size_t nbooks; + cpu_set_t **bookmaps; + size_t ndrawers; + cpu_set_t **drawermaps; + + unsigned int has_freq : 1, + has_configured : 1, + has_polarization : 1, + has_addresses : 1; + + size_t nr_socket_on_cluster; /* the number of sockets if the is_cluster is 1 */ +}; + +/* dispatching modes */ +enum { + DISP_HORIZONTAL = 0, + DISP_VERTICAL = 1 +}; + +/* cpu polarization */ +enum { + POLAR_UNKNOWN = 0, + POLAR_VLOW, + POLAR_VMEDIUM, + POLAR_VHIGH, + POLAR_HORIZONTAL +}; + +struct lscpu_cpu { + int refcount; + struct lscpu_cputype *type; + + int logical_id; + + char *bogomips; /* per-CPU bogomips */ + char *mhz; /* freq from cpuinfo */ + char *dynamic_mhz; /* from cpuinf for s390 */ + char *static_mhz; /* from cpuinf for s390 */ + float mhz_max_freq; /* realtime freq from /sys/.../cpuinfo_max_freq */ + float mhz_min_freq; /* realtime freq from /sys/.../cpuinfo_min_freq */ + float mhz_cur_freq; + + int coreid; + int socketid; + int bookid; + int drawerid; + + int polarization; /* POLAR_* */ + int address; /* physical cpu address */ + int configured; /* cpu configured */ +}; + +struct lscpu_arch { + char *name; /* uname() .machine */ + + unsigned int bit32:1, + bit64:1; +}; + +struct lscpu_vulnerability { + char *name; + char *text; +}; + +/* virtualization types */ +enum { + VIRT_TYPE_NONE = 0, + VIRT_TYPE_PARA, + VIRT_TYPE_FULL, + VIRT_TYPE_CONTAINER +}; + +/* hypervisor vendors */ +enum { + VIRT_VENDOR_NONE = 0, + VIRT_VENDOR_XEN, + VIRT_VENDOR_KVM, + VIRT_VENDOR_MSHV, + VIRT_VENDOR_VMWARE, + VIRT_VENDOR_IBM, /* sys-z powervm */ + VIRT_VENDOR_VSERVER, + VIRT_VENDOR_UML, + VIRT_VENDOR_INNOTEK, /* VBOX */ + VIRT_VENDOR_HITACHI, + VIRT_VENDOR_PARALLELS, /* OpenVZ/VIrtuozzo */ + VIRT_VENDOR_VBOX, + VIRT_VENDOR_OS400, + VIRT_VENDOR_PHYP, + VIRT_VENDOR_SPAR, + VIRT_VENDOR_WSL, +}; + +struct lscpu_virt { + char *cpuflag; /* virtualization flag (vmx, svm) */ + char *hypervisor; /* hypervisor software */ + int vendor; /* VIRT_VENDOR_* */ + int type; /* VIRT_TYPE_* ? */ + +}; + +enum { + LSCPU_OUTPUT_SUMMARY = 0, /* default */ + LSCPU_OUTPUT_CACHES, + LSCPU_OUTPUT_PARSABLE, + LSCPU_OUTPUT_READABLE +}; + +struct lscpu_cxt { + int maxcpus; /* size in bits of kernel cpu mask */ + size_t setsize; + const char *prefix; /* path to /sys and /proc snapshot or NULL */ + + struct path_cxt *syscpu; /* _PATH_SYS_CPU path handler */ + struct path_cxt *procfs; /* /proc path handler */ + + size_t ncputypes; + struct lscpu_cputype **cputypes; + + size_t npossibles; /* number of possible CPUs */ + struct lscpu_cpu **cpus; /* possible CPUs, contains gaps (cups[n]=NULL) */ + + size_t npresents; + cpu_set_t *present; /* mask with present CPUs */ + + size_t nonlines; /* aka number of trhreads */ + cpu_set_t *online; /* mask with online CPUs */ + + struct lscpu_arch *arch; + struct lscpu_virt *virt; + + struct lscpu_vulnerability *vuls; /* array of CPU vulnerabilities */ + size_t nvuls; /* number of CPU vulnerabilities */ + + struct lscpu_cache *caches; /* all instances of the all caches from /sys */ + size_t ncaches; + + struct lscpu_cache *ecaches; + size_t necaches; /* extra caches (s390) from /proc/cpuinfo */ + + size_t nnodes; /* number of NUMA modes */ + int *idx2nodenum; /* Support for discontinuous nodes */ + cpu_set_t **nodemaps; /* array with NUMA nodes */ + + int mode; /* LSCPU_OUTPUT_* */ + + unsigned int noalive : 1, + show_online : 1, + show_offline : 1, + show_physical : 1, + show_compatible : 1, + hex : 1, + json : 1, + bytes : 1; + + int is_cluster; /* For aarch64 if the machine doesn't have ACPI PPTT */ +}; + +#define is_cpu_online(_cxt, _cpu) \ + ((_cxt) && (_cpu) && (_cxt)->online && \ + CPU_ISSET_S((_cpu)->logical_id, (_cxt)->setsize, (_cxt)->online)) + +#define is_cpu_present(_cxt, _cpu) \ + ((_cxt) && (_cpu) && (_cxt)->present && \ + CPU_ISSET_S((_cpu)->logical_id, (_cxt)->setsize, (_cxt)->present)) + +struct lscpu_cputype *lscpu_new_cputype(void); +void lscpu_ref_cputype(struct lscpu_cputype *ct); +void lscpu_unref_cputype(struct lscpu_cputype *ct); +struct lscpu_cputype *lscpu_add_cputype(struct lscpu_cxt *cxt, struct lscpu_cputype *ct); +struct lscpu_cputype *lscpu_cputype_get_default(struct lscpu_cxt *cxt); + +int lscpu_read_cpuinfo(struct lscpu_cxt *cxt); +int lscpu_read_cpulists(struct lscpu_cxt *cxt); +int lscpu_read_archext(struct lscpu_cxt *cxt); +int lscpu_read_vulnerabilities(struct lscpu_cxt *cxt); +int lscpu_read_numas(struct lscpu_cxt *cxt); + +void lscpu_free_caches(struct lscpu_cache *caches, size_t n); +void lscpu_sort_caches(struct lscpu_cache *caches, size_t n); + +size_t lscpu_get_cache_full_size(struct lscpu_cxt *cxt, const char *name, int *instances); +struct lscpu_cache *lscpu_cpu_get_cache(struct lscpu_cxt *cxt, + struct lscpu_cpu *cpu, const char *name); + +int lscpu_read_topology(struct lscpu_cxt *cxt); +void lscpu_cputype_free_topology(struct lscpu_cputype *ct); + +float lsblk_cputype_get_maxmhz(struct lscpu_cxt *cxt, struct lscpu_cputype *ct); +float lsblk_cputype_get_minmhz(struct lscpu_cxt *cxt, struct lscpu_cputype *ct); +float lsblk_cputype_get_scalmhz(struct lscpu_cxt *cxt, struct lscpu_cputype *ct); + +struct lscpu_arch *lscpu_read_architecture(struct lscpu_cxt *cxt); +void lscpu_free_architecture(struct lscpu_arch *ar); + +struct lscpu_virt *lscpu_read_virtualization(struct lscpu_cxt *cxt); +void lscpu_free_virtualization(struct lscpu_virt *virt); + +struct lscpu_cpu *lscpu_new_cpu(int id); +void lscpu_ref_cpu(struct lscpu_cpu *cpu); +void lscpu_unref_cpu(struct lscpu_cpu *cpu); +struct lscpu_cpu *lscpu_get_cpu(struct lscpu_cxt *cxt, int logical_id); +int lscpu_cpu_set_type(struct lscpu_cpu *cpu, struct lscpu_cputype *type); +int lscpu_create_cpus(struct lscpu_cxt *cxt, cpu_set_t *cpuset, size_t setsize); +struct lscpu_cpu *lscpu_cpus_loopup_by_type(struct lscpu_cxt *cxt, struct lscpu_cputype *ct); + +void lscpu_decode_arm(struct lscpu_cxt *cxt); + +int lookup(char *line, char *pattern, char **value); + +void *get_mem_chunk(size_t base, size_t len, const char *devmem); + +struct lscpu_dmi_header +{ + uint8_t type; + uint8_t length; + uint16_t handle; + uint8_t *data; +}; + +struct dmi_info { + char *vendor; + char *product; + char *manufacturer; + int sockets; + + /* Processor Information */ + uint16_t processor_family; + char *processor_manufacturer; + char *processor_version; + uint16_t current_speed; + char *part_num; +}; + + +void to_dmi_header(struct lscpu_dmi_header *h, uint8_t *data); +char *dmi_string(const struct lscpu_dmi_header *dm, uint8_t s); +int parse_dmi_table(uint16_t len, uint16_t num, uint8_t *data, struct dmi_info *di); +size_t get_number_of_physical_sockets_from_dmi(void); +int dmi_decode_cputype(struct lscpu_cputype *); +#endif /* LSCPU_H */ diff --git a/sys-utils/lsipc.1 b/sys-utils/lsipc.1 new file mode 100644 index 0000000..60bfbb4 --- /dev/null +++ b/sys-utils/lsipc.1 @@ -0,0 +1,188 @@ +'\" t +.\" Title: lsipc +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: User Commands +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "LSIPC" "1" "2022-05-11" "util\-linux 2.38.1" "User Commands" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +lsipc \- show information on IPC facilities currently employed in the system +.SH "SYNOPSIS" +.sp +\fBlsipc\fP [options] +.SH "DESCRIPTION" +.sp +\fBlsipc\fP shows information on the System V inter\-process communication facilities for which the calling process has read access. +.SH "OPTIONS" +.sp +\fB\-i\fP, \fB\-\-id\fP \fIid\fP +.RS 4 +Show full details on just the one resource element identified by \fIid\fP. This option needs to be combined with one of the three resource options: \fB\-m\fP, \fB\-q\fP or \fB\-s\fP. It is possible to override the default output format for this option with the \fB\-\-list\fP, \fB\-\-raw\fP, \fB\-\-json\fP or \fB\-\-export\fP option. +.RE +.sp +\fB\-g\fP, \fB\-\-global\fP +.RS 4 +Show system\-wide usage and limits of IPC resources. This option may be combined with one of the three resource options: \fB\-m\fP, \fB\-q\fP or \fB\-s\fP. The default is to show information about all resources. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SS "Resource options" +.sp +\fB\-m\fP, \fB\-\-shmems\fP +.RS 4 +Write information about active shared memory segments. +.RE +.sp +\fB\-q\fP, \fB\-\-queues\fP +.RS 4 +Write information about active message queues. +.RE +.sp +\fB\-s\fP, \fB\-\-semaphores\fP +.RS 4 +Write information about active semaphore sets. +.RE +.SS "Output formatting" +.sp +\fB\-c\fP, \fB\-\-creator\fP +.RS 4 +Show creator and owner. +.RE +.sp +\fB\-e\fP, \fB\-\-export\fP +.RS 4 +Produce output in the form of key="value" pairs. All potentially unsafe value characters are hex\-escaped (\(rsx<code>). See also option \fB\-\-shell\fP. +.RE +.sp +\fB\-J\fP, \fB\-\-json\fP +.RS 4 +Use the JSON output format. +.RE +.sp +\fB\-l\fP, \fB\-\-list\fP +.RS 4 +Use the list output format. This is the default, except when \fB\-\-id\fP is used. +.RE +.sp +\fB\-n\fP, \fB\-\-newline\fP +.RS 4 +Display each piece of information on a separate line. +.RE +.sp +\fB\-\-noheadings\fP +.RS 4 +Do not print a header line. +.RE +.sp +\fB\-\-notruncate\fP +.RS 4 +Don\(cqt truncate output. +.RE +.sp +\fB\-o\fP, \fB\-\-output\fP \fIlist\fP +.RS 4 +Specify which output columns to print. Use \fB\-\-help\fP to get a list of all supported columns. +.RE +.sp +\fB\-b\fP, \fB\-\-bytes\fP +.RS 4 +Print size in bytes rather than in human readable format. +.RE +.sp +\fB\-r\fP, \fB\-\-raw\fP +.RS 4 +Raw output (no columnation). +.RE +.sp +\fB\-t\fP, \fB\-\-time\fP +.RS 4 +Write time information. The time of the last control operation that changed the access permissions for all facilities, the time of the last \fBmsgsnd\fP(2) and \fBmsgrcv\fP(2) operations on message queues, the time of the last \fBshmat\fP(2) and \fBshmdt\fP(2) operations on shared memory, and the time of the last \fBsemop\fP(2) operation on semaphores. +.RE +.sp +\fB\-\-time\-format\fP \fItype\fP +.RS 4 +Display dates in short, full or iso format. The default is short, this time format is designed to be space efficient and human readable. +.RE +.sp +\fB\-P\fP, \fB\-\-numeric\-perms\fP +.RS 4 +Print numeric permissions in PERMS column. +.RE +.sp +\fB\-y\fP, \fB\-\-shell\fP +.RS 4 +The column name will be modified to contain only characters allowed for shell variable identifiers. This is usable, for example, with \fB\-\-export\fP. Note that this feature has been automatically enabled for \fB\-\-export\fP in version 2.37, but due to compatibility issues, now it\(cqs necessary to request this behavior by \fB\-\-shell\fP. +.RE +.SH "EXIT STATUS" +.sp +0 +.RS 4 +if OK, +.RE +.sp +1 +.RS 4 +if incorrect arguments specified, +.RE +.sp +2 +.RS 4 +if a serious error occurs. +.RE +.SH "HISTORY" +.sp +The \fBlsipc\fP utility is inspired by the \fBipcs\fP(1) utility. +.SH "AUTHORS" +.sp +.MTO "ooprala\(atredhat.com" "Ondrej Oprala" "," +.MTO "kzak\(atredhat.com" "Karel Zak" "" +.SH "SEE ALSO" +.sp +\fBipcmk\fP(1), +\fBipcrm\fP(1), +\fBmsgrcv\fP(2), +\fBmsgsnd\fP(2), +\fBsemget\fP(2), +\fBsemop\fP(2), +\fBshmat\fP(2), +\fBshmdt\fP(2), +\fBshmget\fP(2), +\fBsysvipc\fP(7) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBlsipc\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/lsipc.1.adoc b/sys-utils/lsipc.1.adoc new file mode 100644 index 0000000..3ee790e --- /dev/null +++ b/sys-utils/lsipc.1.adoc @@ -0,0 +1,125 @@ +//po4a: entry man manual += lsipc(1) +:doctype: manpage +:man manual: User Commands +:man source: util-linux {release-version} +:page-layout: base +:command: lsipc + +== NAME + +lsipc - show information on IPC facilities currently employed in the system + +== SYNOPSIS + +*lsipc* [options] + +== DESCRIPTION + +*lsipc* shows information on the System V inter-process communication facilities for which the calling process has read access. + +== OPTIONS + +*-i*, *--id* _id_:: +Show full details on just the one resource element identified by _id_. This option needs to be combined with one of the three resource options: *-m*, *-q* or *-s*. It is possible to override the default output format for this option with the *--list*, *--raw*, *--json* or *--export* option. + +*-g*, *--global*:: +Show system-wide usage and limits of IPC resources. This option may be combined with one of the three resource options: *-m*, *-q* or *-s*. The default is to show information about all resources. + +include::man-common/help-version.adoc[] + +=== Resource options + +*-m*, *--shmems*:: +Write information about active shared memory segments. + +*-q*, *--queues*:: +Write information about active message queues. + +*-s*, *--semaphores*:: +Write information about active semaphore sets. + +=== Output formatting + +*-c*, *--creator*:: +Show creator and owner. + +*-e*, *--export*:: +Produce output in the form of key="value" pairs. All potentially unsafe value characters are hex-escaped (\x<code>). See also option *--shell*. + +*-J*, *--json*:: +Use the JSON output format. + +*-l*, *--list*:: +Use the list output format. This is the default, except when *--id* is used. + +*-n*, *--newline*:: +Display each piece of information on a separate line. + +*--noheadings*:: +Do not print a header line. + +*--notruncate*:: +Don't truncate output. + +*-o*, *--output* _list_:: +Specify which output columns to print. Use *--help* to get a list of all supported columns. + +*-b*, *--bytes*:: +Print size in bytes rather than in human readable format. + +*-r*, *--raw*:: +Raw output (no columnation). + +*-t*, *--time*:: +Write time information. The time of the last control operation that changed the access permissions for all facilities, the time of the last *msgsnd*(2) and *msgrcv*(2) operations on message queues, the time of the last *shmat*(2) and *shmdt*(2) operations on shared memory, and the time of the last *semop*(2) operation on semaphores. + +*--time-format* _type_:: +Display dates in short, full or iso format. The default is short, this time format is designed to be space efficient and human readable. + +*-P*, *--numeric-perms*:: +Print numeric permissions in PERMS column. + +*-y*, *--shell*:: +The column name will be modified to contain only characters allowed for shell variable identifiers. This is usable, for example, with *--export*. Note that this feature has been automatically enabled for *--export* in version 2.37, but due to compatibility issues, now it's necessary to request this behavior by *--shell*. + +== EXIT STATUS + +0:: +if OK, + +1:: +if incorrect arguments specified, + +2:: +if a serious error occurs. + +== HISTORY + +The *lsipc* utility is inspired by the *ipcs*(1) utility. + +== AUTHORS + +mailto:ooprala@redhat.com[Ondrej Oprala], +mailto:kzak@redhat.com[Karel Zak] + +== SEE ALSO + +*ipcmk*(1), +*ipcrm*(1), +*msgrcv*(2), +*msgsnd*(2), +*semget*(2), +*semop*(2), +*shmat*(2), +*shmdt*(2), +*shmget*(2), +*sysvipc*(7) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/lsipc.c b/sys-utils/lsipc.c new file mode 100644 index 0000000..0f3cd94 --- /dev/null +++ b/sys-utils/lsipc.c @@ -0,0 +1,1357 @@ +/* + * lsipc - List information about IPC instances employed in the system + * + * Copyright (C) 2015 Ondrej Oprala <ooprala@redhat.com> + * Copyright (C) 2015 Karel Zak <ooprala@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * + * lsipc is inspired by the ipcs utility. The aim is to create + * a utility unencumbered by a standard to provide more flexible + * means of controlling the output. + */ + +#include <errno.h> +#include <getopt.h> +#include <sys/time.h> +#include <unistd.h> + +#include <libsmartcols.h> + +#include "c.h" +#include "nls.h" +#include "closestream.h" +#include "strutils.h" +#include "optutils.h" +#include "xalloc.h" +#include "procfs.h" +#include "ipcutils.h" +#include "timeutils.h" + +/* + * time modes + * */ +enum { + TIME_INVALID = 0, + TIME_SHORT, + TIME_FULL, + TIME_ISO +}; + +/* + * IDs + */ +enum { + /* generic */ + COLDESC_IDX_GEN_FIRST = 0, + COL_KEY = COLDESC_IDX_GEN_FIRST, + COL_ID, + COL_OWNER, + COL_PERMS, + COL_CUID, + COL_CUSER, + COL_CGID, + COL_CGROUP, + COL_UID, + COL_USER, + COL_GID, + COL_GROUP, + COL_CTIME, + COLDESC_IDX_GEN_LAST = COL_CTIME, + + /* msgq-specific */ + COLDESC_IDX_MSG_FIRST, + COL_USEDBYTES = COLDESC_IDX_MSG_FIRST, + COL_MSGS, + COL_SEND, + COL_RECV, + COL_LSPID, + COL_LRPID, + COLDESC_IDX_MSG_LAST = COL_LRPID, + + /* shm-specific */ + COLDESC_IDX_SHM_FIRST, + COL_SIZE = COLDESC_IDX_SHM_FIRST, + COL_NATTCH, + COL_STATUS, + COL_ATTACH, + COL_DETACH, + COL_COMMAND, + COL_CPID, + COL_LPID, + COLDESC_IDX_SHM_LAST = COL_LPID, + + /* sem-specific */ + COLDESC_IDX_SEM_FIRST, + COL_NSEMS = COLDESC_IDX_SEM_FIRST, + COL_OTIME, + COLDESC_IDX_SEM_LAST = COL_OTIME, + + /* summary (--global) */ + COLDESC_IDX_SUM_FIRST, + COL_RESOURCE = COLDESC_IDX_SUM_FIRST, + COL_DESC, + COL_LIMIT, + COL_USED, + COL_USEPERC, + COLDESC_IDX_SUM_LAST = COL_USEPERC +}; + +/* not all columns apply to all options, so we specify a legal range for each */ +static size_t LOWER, UPPER; + +/* + * output modes + */ +enum { + OUT_EXPORT = 1, + OUT_NEWLINE, + OUT_RAW, + OUT_JSON, + OUT_PRETTY, + OUT_LIST +}; + +struct lsipc_control { + int outmode; + unsigned int noheadings : 1, /* don't print header line */ + notrunc : 1, /* don't truncate columns */ + shellvar : 1, /* use shell compatible colum names */ + bytes : 1, /* SIZE in bytes */ + numperms : 1, /* numeric permissions */ + time_mode : 2; +}; + +struct lsipc_coldesc { + const char *name; + const char *help; + const char *pretty_name; + + double whint; /* width hint */ + long flag; +}; + +static const struct lsipc_coldesc coldescs[] = +{ + /* common */ + [COL_KEY] = { "KEY", N_("Resource key"), N_("Key"), 1}, + [COL_ID] = { "ID", N_("Resource ID"), N_("ID"), 1}, + [COL_OWNER] = { "OWNER", N_("Owner's username or UID"), N_("Owner"), 1, SCOLS_FL_RIGHT}, + [COL_PERMS] = { "PERMS", N_("Permissions"), N_("Permissions"), 1, SCOLS_FL_RIGHT}, + [COL_CUID] = { "CUID", N_("Creator UID"), N_("Creator UID"), 1, SCOLS_FL_RIGHT}, + [COL_CUSER] = { "CUSER", N_("Creator user"), N_("Creator user"), 1 }, + [COL_CGID] = { "CGID", N_("Creator GID"), N_("Creator GID"), 1, SCOLS_FL_RIGHT}, + [COL_CGROUP] = { "CGROUP", N_("Creator group"), N_("Creator group"), 1 }, + [COL_UID] = { "UID", N_("User ID"), N_("UID"), 1, SCOLS_FL_RIGHT}, + [COL_USER] = { "USER", N_("User name"), N_("User name"), 1}, + [COL_GID] = { "GID", N_("Group ID"), N_("GID"), 1, SCOLS_FL_RIGHT}, + [COL_GROUP] = { "GROUP", N_("Group name"), N_("Group name"), 1}, + [COL_CTIME] = { "CTIME", N_("Time of the last change"), N_("Last change"), 1, SCOLS_FL_RIGHT}, + + /* msgq-specific */ + [COL_USEDBYTES] = { "USEDBYTES",N_("Bytes used"), N_("Bytes used"), 1, SCOLS_FL_RIGHT}, + [COL_MSGS] = { "MSGS", N_("Number of messages"), N_("Messages"), 1}, + [COL_SEND] = { "SEND", N_("Time of last msg sent"), N_("Msg sent"), 1, SCOLS_FL_RIGHT}, + [COL_RECV] = { "RECV", N_("Time of last msg received"), N_("Msg received"), 1, SCOLS_FL_RIGHT}, + [COL_LSPID] = { "LSPID", N_("PID of the last msg sender"), N_("Msg sender"), 1, SCOLS_FL_RIGHT}, + [COL_LRPID] = { "LRPID", N_("PID of the last msg receiver"), N_("Msg receiver"), 1, SCOLS_FL_RIGHT}, + + /* shm-specific */ + [COL_SIZE] = { "SIZE", N_("Segment size"), N_("Segment size"), 1, SCOLS_FL_RIGHT}, + [COL_NATTCH] = { "NATTCH", N_("Number of attached processes"), N_("Attached processes"), 1, SCOLS_FL_RIGHT}, + [COL_STATUS] = { "STATUS", N_("Status"), N_("Status"), 1, SCOLS_FL_NOEXTREMES}, + [COL_ATTACH] = { "ATTACH", N_("Attach time"), N_("Attach time"), 1, SCOLS_FL_RIGHT}, + [COL_DETACH] = { "DETACH", N_("Detach time"), N_("Detach time"), 1, SCOLS_FL_RIGHT}, + [COL_COMMAND] = { "COMMAND", N_("Creator command line"), N_("Creator command"), 0, SCOLS_FL_TRUNC}, + [COL_CPID] = { "CPID", N_("PID of the creator"), N_("Creator PID"), 1, SCOLS_FL_RIGHT}, + [COL_LPID] = { "LPID", N_("PID of last user"), N_("Last user PID"), 1, SCOLS_FL_RIGHT}, + + /* sem-specific */ + [COL_NSEMS] = { "NSEMS", N_("Number of semaphores"), N_("Semaphores"), 1, SCOLS_FL_RIGHT}, + [COL_OTIME] = { "OTIME", N_("Time of the last operation"), N_("Last operation"), 1, SCOLS_FL_RIGHT}, + + /* cols for summarized information */ + [COL_RESOURCE] = { "RESOURCE", N_("Resource name"), N_("Resource"), 1 }, + [COL_DESC] = { "DESCRIPTION",N_("Resource description"), N_("Description"), 1 }, + [COL_USED] = { "USED", N_("Currently used"), N_("Used"), 1, SCOLS_FL_RIGHT }, + [COL_USEPERC] = { "USE%", N_("Currently use percentage"), N_("Use"), 1, SCOLS_FL_RIGHT }, + [COL_LIMIT] = { "LIMIT", N_("System-wide limit"), N_("Limit"), 1, SCOLS_FL_RIGHT }, +}; + + +/* columns[] array specifies all currently wanted output column. The columns + * are defined by coldescs[] array and you can specify (on command line) each + * column twice. That's enough, dynamically allocated array of the columns is + * unnecessary overkill and over-engineering in this case */ +static int columns[ARRAY_SIZE(coldescs) * 2]; +static size_t ncolumns; + +static inline size_t err_columns_index(size_t arysz, size_t idx) +{ + if (idx >= arysz) + errx(EXIT_FAILURE, _("too many columns specified, " + "the limit is %zu columns"), + arysz - 1); + return idx; +} + +#define add_column(ary, n, id) \ + ((ary)[ err_columns_index(ARRAY_SIZE(ary), (n)) ] = (id)) + +static int column_name_to_id(const char *name, size_t namesz) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(coldescs); i++) { + const char *cn = coldescs[i].name; + + if (!strncasecmp(name, cn, namesz) && !*(cn + namesz)) { + if (i > COL_CTIME) { + if (i >= LOWER && i <= UPPER) + return i; + + warnx(_("column %s does not apply to the specified IPC"), name); + return -1; + } + + return i; + } + } + warnx(_("unknown column: %s"), name); + return -1; +} + +static int get_column_id(int num) +{ + assert(num >= 0); + assert((size_t) num < ncolumns); + assert((size_t) columns[num] < ARRAY_SIZE(coldescs)); + return columns[num]; +} + +static const struct lsipc_coldesc *get_column_desc(int num) +{ + return &coldescs[ get_column_id(num) ]; +} + +static char *get_username(struct passwd **pw, uid_t id) +{ + if (!*pw || (*pw)->pw_uid != id) + *pw = getpwuid(id); + + return *pw ? xstrdup((*pw)->pw_name) : NULL; +} + +static char *get_groupname(struct group **gr, gid_t id) +{ + if (!*gr || (*gr)->gr_gid != id) + *gr = getgrgid(id); + + return *gr ? xstrdup((*gr)->gr_name) : NULL; +} + +static int parse_time_mode(const char *s) +{ + struct lsipc_timefmt { + const char *name; + const int val; + }; + static const struct lsipc_timefmt timefmts[] = { + {"iso", TIME_ISO}, + {"full", TIME_FULL}, + {"short", TIME_SHORT}, + }; + size_t i; + + for (i = 0; i < ARRAY_SIZE(timefmts); i++) { + if (strcmp(timefmts[i].name, s) == 0) + return timefmts[i].val; + } + errx(EXIT_FAILURE, _("unknown time format: %s"), s); +} + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + size_t i; + + fputs(USAGE_HEADER, out); + fprintf(out, _(" %s [options]\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Show information on IPC facilities.\n"), out); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Resource options:\n"), out); + fputs(_(" -m, --shmems shared memory segments\n"), out); + fputs(_(" -q, --queues message queues\n"), out); + fputs(_(" -s, --semaphores semaphores\n"), out); + fputs(_(" -g, --global info about system-wide usage (may be used with -m, -q and -s)\n"), out); + fputs(_(" -i, --id <id> print details on resource identified by <id>\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" --noheadings don't print headings\n"), out); + fputs(_(" --notruncate don't truncate output\n"), out); + fputs(_(" --time-format=<type> display dates in short, full or iso format\n"), out); + fputs(_(" -b, --bytes print SIZE in bytes rather than in human readable format\n"), out); + fputs(_(" -c, --creator show creator and owner\n"), out); + fputs(_(" -e, --export display in an export-able output format\n"), out); + fputs(_(" -J, --json use the JSON output format\n"), out); + fputs(_(" -n, --newline display each piece of information on a new line\n"), out); + fputs(_(" -l, --list force list output format (for example with --id)\n"), out); + fputs(_(" -o, --output[=<list>] define the columns to output\n"), out); + fputs(_(" -P, --numeric-perms print numeric permissions (PERMS column)\n"), out); + fputs(_(" -r, --raw display in raw mode\n"), out); + fputs(_(" -t, --time show attach, detach and change times\n"), out); + fputs(_(" -y, --shell use column names to be usable as shell variable identifiers\n"), out); + + + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(26)); + + fprintf(out, _("\nGeneric columns:\n")); + for (i = COLDESC_IDX_GEN_FIRST; i <= COLDESC_IDX_GEN_LAST; i++) + fprintf(out, " %14s %s\n", coldescs[i].name, _(coldescs[i].help)); + + fprintf(out, _("\nShared-memory columns (--shmems):\n")); + for (i = COLDESC_IDX_SHM_FIRST; i <= COLDESC_IDX_SHM_LAST; i++) + fprintf(out, " %14s %s\n", coldescs[i].name, _(coldescs[i].help)); + + fprintf(out, _("\nMessage-queue columns (--queues):\n")); + for (i = COLDESC_IDX_MSG_FIRST; i <= COLDESC_IDX_MSG_LAST; i++) + fprintf(out, " %14s %s\n", coldescs[i].name, _(coldescs[i].help)); + + fprintf(out, _("\nSemaphore columns (--semaphores):\n")); + for (i = COLDESC_IDX_SEM_FIRST; i <= COLDESC_IDX_SEM_LAST; i++) + fprintf(out, " %14s %s\n", coldescs[i].name, _(coldescs[i].help)); + + fprintf(out, _("\nSummary columns (--global):\n")); + for (i = COLDESC_IDX_SUM_FIRST; i <= COLDESC_IDX_SUM_LAST; i++) + fprintf(out, " %14s %s\n", coldescs[i].name, _(coldescs[i].help)); + + printf(USAGE_MAN_TAIL("lsipc(1)")); + exit(EXIT_SUCCESS); +} + +static struct libscols_table *new_table(struct lsipc_control *ctl) +{ + struct libscols_table *table = scols_new_table(); + + if (!table) + err(EXIT_FAILURE, _("failed to allocate output table")); + + if (ctl->noheadings) + scols_table_enable_noheadings(table, 1); + if (ctl->shellvar) + scols_table_enable_shellvar(table, 1); + + switch(ctl->outmode) { + case OUT_NEWLINE: + scols_table_set_column_separator(table, "\n"); + /* fallthrough */ + case OUT_EXPORT: + scols_table_enable_export(table, 1); + break; + case OUT_RAW: + scols_table_enable_raw(table, 1); + break; + case OUT_PRETTY: + scols_table_enable_noheadings(table, 1); + break; + case OUT_JSON: + scols_table_enable_json(table, 1); + break; + default: + break; + } + return table; +} + +static struct libscols_table *setup_table(struct lsipc_control *ctl) +{ + struct libscols_table *table = new_table(ctl); + size_t n; + + for (n = 0; n < ncolumns; n++) { + const struct lsipc_coldesc *desc = get_column_desc(n); + int flags = desc->flag; + + if (ctl->notrunc) + flags &= ~SCOLS_FL_TRUNC; + if (!scols_table_new_column(table, desc->name, desc->whint, flags)) + goto fail; + } + return table; +fail: + scols_unref_table(table); + return NULL; +} + +static int print_pretty(struct libscols_table *table) +{ + struct libscols_iter *itr = scols_new_iter(SCOLS_ITER_FORWARD); + struct libscols_column *col; + struct libscols_cell *data; + struct libscols_line *ln; + const char *hstr, *dstr; + int n = 0; + + ln = scols_table_get_line(table, 0); + while (!scols_table_next_column(table, itr, &col)) { + + data = scols_line_get_cell(ln, n); + + hstr = N_(get_column_desc(n)->pretty_name); + dstr = scols_cell_get_data(data); + + if (dstr) + printf("%s:%*c%-36s\n", hstr, 35 - (int)strlen(hstr), ' ', dstr); + ++n; + } + + /* this is used to pretty-print detailed info about a semaphore array */ + if (ln) { + struct libscols_table *subtab = scols_line_get_userdata(ln); + if (subtab) { + printf(_("Elements:\n\n")); + scols_print_table(subtab); + } + } + + scols_free_iter(itr); + return 0; + +} + +static int print_table(struct lsipc_control *ctl, struct libscols_table *tb) +{ + if (ctl->outmode == OUT_PRETTY) + print_pretty(tb); + else + scols_print_table(tb); + return 0; +} +static struct timeval now; + +static char *make_time(int mode, time_t time) +{ + char buf[64] = {0}; + + switch(mode) { + case TIME_FULL: + { + struct tm tm; + char *s; + + localtime_r(&time, &tm); + asctime_r(&tm, buf); + if (*(s = buf + strlen(buf) - 1) == '\n') + *s = '\0'; + break; + } + case TIME_SHORT: + strtime_short(&time, &now, 0, buf, sizeof(buf)); + break; + case TIME_ISO: + strtime_iso(&time, ISO_TIMESTAMP_T, buf, sizeof(buf)); + break; + default: + errx(EXIT_FAILURE, _("unsupported time type")); + } + return xstrdup(buf); +} + +static void global_set_data(struct lsipc_control *ctl, struct libscols_table *tb, + const char *resource, const char *desc, uintmax_t used, + uintmax_t limit, int usage, int byte_unit) +{ + struct libscols_line *ln; + size_t n; + + ln = scols_table_new_line(tb, NULL); + if (!ln) + err(EXIT_FAILURE, _("failed to allocate output line")); + + for (n = 0; n < ncolumns; n++) { + int rc = 0; + char *arg = NULL; + + switch (get_column_id(n)) { + case COL_RESOURCE: + rc = scols_line_set_data(ln, n, resource); + break; + case COL_DESC: + rc = scols_line_set_data(ln, n, desc); + break; + case COL_USED: + if (usage) { + if (!byte_unit || ctl->bytes) + xasprintf(&arg, "%ju", used); + else + arg = size_to_human_string(SIZE_SUFFIX_1LETTER, used); + rc = scols_line_refer_data(ln, n, arg); + } else + rc = scols_line_set_data(ln, n, "-"); + break; + case COL_USEPERC: + if (usage) { + xasprintf(&arg, "%2.2f%%", (double) used / limit * 100); + rc = scols_line_refer_data(ln, n, arg); + } else + rc = scols_line_set_data(ln, n, "-"); + break; + case COL_LIMIT: + if (!byte_unit || ctl->bytes) + xasprintf(&arg, "%ju", limit); + else + arg = size_to_human_string(SIZE_SUFFIX_1LETTER, limit); + rc = scols_line_refer_data(ln, n, arg); + break; + } + + if (rc != 0) + err(EXIT_FAILURE, _("failed to add output data")); + } +} + +static void setup_sem_elements_columns(struct libscols_table *tb) +{ + scols_table_set_name(tb, "elements"); + if (!scols_table_new_column(tb, "SEMNUM", 0, SCOLS_FL_RIGHT)) + err_oom(); + if (!scols_table_new_column(tb, "VALUE", 0, SCOLS_FL_RIGHT)) + err_oom(); + if (!scols_table_new_column(tb, "NCOUNT", 0, SCOLS_FL_RIGHT)) + err_oom(); + if (!scols_table_new_column(tb, "ZCOUNT", 0, SCOLS_FL_RIGHT)) + err_oom(); + if (!scols_table_new_column(tb, "PID", 0, SCOLS_FL_RIGHT)) + err_oom(); + if (!scols_table_new_column(tb, "COMMAND", 0, SCOLS_FL_RIGHT)) + err_oom(); +} + +static void do_sem(int id, struct lsipc_control *ctl, struct libscols_table *tb) +{ + struct libscols_line *ln; + struct passwd *pw = NULL, *cpw = NULL; + struct group *gr = NULL, *cgr = NULL; + struct sem_data *semds, *semdsp; + char *arg = NULL; + + scols_table_set_name(tb, "semaphores"); + + if (ipc_sem_get_info(id, &semds) < 1) { + if (id > -1) + warnx(_("id %d not found"), id); + return; + } + for (semdsp = semds; semdsp->next != NULL || id > -1; semdsp = semdsp->next) { + size_t n; + + ln = scols_table_new_line(tb, NULL); + if (!ln) + err(EXIT_FAILURE, _("failed to allocate output line")); + + for (n = 0; n < ncolumns; n++) { + int rc = 0; + switch (get_column_id(n)) { + case COL_KEY: + xasprintf(&arg, "0x%08x",semdsp->sem_perm.key); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_ID: + xasprintf(&arg, "%d",semdsp->sem_perm.id); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_OWNER: + arg = get_username(&pw, semdsp->sem_perm.uid); + if (!arg) + xasprintf(&arg, "%u", semdsp->sem_perm.uid); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_PERMS: + if (ctl->numperms) + xasprintf(&arg, "%#o", semdsp->sem_perm.mode & 0777); + else { + arg = xmalloc(11); + xstrmode(semdsp->sem_perm.mode & 0777, arg); + } + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_CUID: + xasprintf(&arg, "%u", semdsp->sem_perm.cuid); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_CUSER: + arg = get_username(&cpw, semdsp->sem_perm.cuid); + if (arg) + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_CGID: + xasprintf(&arg, "%u", semdsp->sem_perm.cgid); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_CGROUP: + arg = get_groupname(&cgr, semdsp->sem_perm.cgid); + if (arg) + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_UID: + xasprintf(&arg, "%u", semdsp->sem_perm.uid); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_USER: + arg = get_username(&pw, semdsp->sem_perm.uid); + if (arg) + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_GID: + xasprintf(&arg, "%u", semdsp->sem_perm.gid); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_GROUP: + arg = get_groupname(&gr, semdsp->sem_perm.gid); + if (arg) + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_CTIME: + if (semdsp->sem_ctime != 0) { + rc = scols_line_refer_data(ln, n, + make_time(ctl->time_mode, + (time_t)semdsp->sem_ctime)); + } + break; + case COL_NSEMS: + xasprintf(&arg, "%ju", semdsp->sem_nsems); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_OTIME: + if (semdsp->sem_otime != 0) { + rc = scols_line_refer_data(ln, n, + make_time(ctl->time_mode, + (time_t)semdsp->sem_otime)); + } + break; + } + if (rc != 0) + err(EXIT_FAILURE, _("failed to add output data")); + arg = NULL; + } + + if (id > -1 && semds->sem_nsems) { + /* Create extra table with ID specific semaphore elements */ + struct libscols_table *sub = new_table(ctl); + size_t i; + int rc = 0; + + scols_table_enable_noheadings(sub, 0); + setup_sem_elements_columns(sub); + + for (i = 0; i < semds->sem_nsems; i++) { + struct sem_elem *e = &semds->elements[i]; + struct libscols_line *sln = scols_table_new_line(sub, NULL); + + if (!sln) + err(EXIT_FAILURE, _("failed to allocate output line")); + + /* SEMNUM */ + xasprintf(&arg, "%zu", i); + rc = scols_line_refer_data(sln, 0, arg); + if (rc) + break; + + /* VALUE */ + xasprintf(&arg, "%d", e->semval); + rc = scols_line_refer_data(sln, 1, arg); + if (rc) + break; + + /* NCOUNT */ + xasprintf(&arg, "%d", e->ncount); + rc = scols_line_refer_data(sln, 2, arg); + if (rc) + break; + + /* ZCOUNT */ + xasprintf(&arg, "%d", e->zcount); + rc = scols_line_refer_data(sln, 3, arg); + if (rc) + break; + + /* PID */ + xasprintf(&arg, "%d", e->pid); + rc = scols_line_refer_data(sln, 4, arg); + if (rc) + break; + + /* COMMAND */ + arg = pid_get_cmdline(e->pid); + rc = scols_line_refer_data(sln, 5, arg); + if (rc) + break; + } + + if (rc != 0) + err(EXIT_FAILURE, _("failed to set data")); + + scols_line_set_userdata(ln, (void *)sub); + break; + } + } + ipc_sem_free_info(semds); +} + +static void do_sem_global(struct lsipc_control *ctl, struct libscols_table *tb) +{ + struct sem_data *semds, *semdsp; + struct ipc_limits lim; + int nsems = 0, nsets = 0; + + ipc_sem_get_limits(&lim); + + if (ipc_sem_get_info(-1, &semds) > 0) { + for (semdsp = semds; semdsp->next != NULL; semdsp = semdsp->next) { + ++nsets; + nsems += semds->sem_nsems; + } + ipc_sem_free_info(semds); + } + + global_set_data(ctl, tb, "SEMMNI", _("Number of semaphore identifiers"), nsets, lim.semmni, 1, 0); + global_set_data(ctl, tb, "SEMMNS", _("Total number of semaphores"), nsems, lim.semmns, 1, 0); + global_set_data(ctl, tb, "SEMMSL", _("Max semaphores per semaphore set."), 0, lim.semmsl, 0, 0); + global_set_data(ctl, tb, "SEMOPM", _("Max number of operations per semop(2)"), 0, lim.semopm, 0, 0); + global_set_data(ctl, tb, "SEMVMX", _("Semaphore max value"), 0, lim.semvmx, 0, 0); +} + +static void do_msg(int id, struct lsipc_control *ctl, struct libscols_table *tb) +{ + struct libscols_line *ln; + struct passwd *pw = NULL; + struct group *gr = NULL; + struct msg_data *msgds, *msgdsp; + char *arg = NULL; + + if (ipc_msg_get_info(id, &msgds) < 1) { + if (id > -1) + warnx(_("id %d not found"), id); + return; + } + scols_table_set_name(tb, "messages"); + + for (msgdsp = msgds; msgdsp->next != NULL || id > -1 ; msgdsp = msgdsp->next) { + size_t n; + ln = scols_table_new_line(tb, NULL); + + if (!ln) + err(EXIT_FAILURE, _("failed to allocate output line")); + + /* no need to call getpwuid() for the same user */ + if (!(pw && pw->pw_uid == msgdsp->msg_perm.uid)) + pw = getpwuid(msgdsp->msg_perm.uid); + + /* no need to call getgrgid() for the same user */ + if (!(gr && gr->gr_gid == msgdsp->msg_perm.gid)) + gr = getgrgid(msgdsp->msg_perm.gid); + + for (n = 0; n < ncolumns; n++) { + int rc = 0; + + switch (get_column_id(n)) { + case COL_KEY: + xasprintf(&arg, "0x%08x",msgdsp->msg_perm.key); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_ID: + xasprintf(&arg, "%d",msgdsp->msg_perm.id); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_OWNER: + arg = get_username(&pw, msgdsp->msg_perm.uid); + if (!arg) + xasprintf(&arg, "%u", msgdsp->msg_perm.uid); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_PERMS: + if (ctl->numperms) + xasprintf(&arg, "%#o", msgdsp->msg_perm.mode & 0777); + else { + arg = xmalloc(11); + xstrmode(msgdsp->msg_perm.mode & 0777, arg); + rc = scols_line_refer_data(ln, n, arg); + } + break; + case COL_CUID: + xasprintf(&arg, "%u", msgdsp->msg_perm.cuid); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_CUSER: + arg = get_username(&pw, msgdsp->msg_perm.cuid); + if (arg) + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_CGID: + xasprintf(&arg, "%u", msgdsp->msg_perm.cuid); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_CGROUP: + arg = get_groupname(&gr, msgdsp->msg_perm.cgid); + if (arg) + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_UID: + xasprintf(&arg, "%u", msgdsp->msg_perm.uid); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_USER: + arg = get_username(&pw, msgdsp->msg_perm.uid); + if (arg) + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_GID: + xasprintf(&arg, "%u", msgdsp->msg_perm.gid); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_GROUP: + arg = get_groupname(&gr,msgdsp->msg_perm.gid); + if (arg) + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_CTIME: + if (msgdsp->q_ctime != 0) + rc = scols_line_refer_data(ln, n, + make_time(ctl->time_mode, + (time_t)msgdsp->q_ctime)); + break; + case COL_USEDBYTES: + if (ctl->bytes) + xasprintf(&arg, "%ju", msgdsp->q_cbytes); + else + arg = size_to_human_string(SIZE_SUFFIX_1LETTER, msgdsp->q_cbytes); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_MSGS: + xasprintf(&arg, "%ju", msgdsp->q_qnum); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_SEND: + if (msgdsp->q_stime != 0) + rc = scols_line_refer_data(ln, n, + make_time(ctl->time_mode, + (time_t)msgdsp->q_stime)); + break; + case COL_RECV: + if (msgdsp->q_rtime != 0) + rc = scols_line_refer_data(ln, n, + make_time(ctl->time_mode, + (time_t)msgdsp->q_rtime)); + break; + case COL_LSPID: + xasprintf(&arg, "%u", msgdsp->q_lspid); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_LRPID: + xasprintf(&arg, "%u", msgdsp->q_lrpid); + rc = scols_line_refer_data(ln, n, arg); + break; + } + if (rc != 0) + err(EXIT_FAILURE, _("failed to set data")); + arg = NULL; + } + if (id > -1) + break; + } + ipc_msg_free_info(msgds); +} + + +static void do_msg_global(struct lsipc_control *ctl, struct libscols_table *tb) +{ + struct msg_data *msgds, *msgdsp; + struct ipc_limits lim; + int msgqs = 0; + + ipc_msg_get_limits(&lim); + + /* count number of used queues */ + if (ipc_msg_get_info(-1, &msgds) > 0) { + for (msgdsp = msgds; msgdsp->next != NULL; msgdsp = msgdsp->next) + ++msgqs; + ipc_msg_free_info(msgds); + } + + global_set_data(ctl, tb, "MSGMNI", _("Number of message queues"), msgqs, lim.msgmni, 1, 0); + global_set_data(ctl, tb, "MSGMAX", _("Max size of message (bytes)"), 0, lim.msgmax, 0, 1); + global_set_data(ctl, tb, "MSGMNB", _("Default max size of queue (bytes)"), 0, lim.msgmnb, 0, 1); +} + + +static void do_shm(int id, struct lsipc_control *ctl, struct libscols_table *tb) +{ + struct libscols_line *ln; + struct passwd *pw = NULL; + struct group *gr = NULL; + struct shm_data *shmds, *shmdsp; + char *arg = NULL; + + if (ipc_shm_get_info(id, &shmds) < 1) { + if (id > -1) + warnx(_("id %d not found"), id); + return; + } + + scols_table_set_name(tb, "sharedmemory"); + + for (shmdsp = shmds; shmdsp->next != NULL || id > -1 ; shmdsp = shmdsp->next) { + size_t n; + ln = scols_table_new_line(tb, NULL); + + if (!ln) + err(EXIT_FAILURE, _("failed to allocate output line")); + + for (n = 0; n < ncolumns; n++) { + int rc = 0; + + switch (get_column_id(n)) { + case COL_KEY: + xasprintf(&arg, "0x%08x",shmdsp->shm_perm.key); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_ID: + xasprintf(&arg, "%d",shmdsp->shm_perm.id); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_OWNER: + arg = get_username(&pw, shmdsp->shm_perm.uid); + if (!arg) + xasprintf(&arg, "%u", shmdsp->shm_perm.uid); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_PERMS: + if (ctl->numperms) + xasprintf(&arg, "%#o", shmdsp->shm_perm.mode & 0777); + else { + arg = xmalloc(11); + xstrmode(shmdsp->shm_perm.mode & 0777, arg); + } + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_CUID: + xasprintf(&arg, "%u", shmdsp->shm_perm.cuid); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_CUSER: + arg = get_username(&pw, shmdsp->shm_perm.cuid); + if (arg) + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_CGID: + xasprintf(&arg, "%u", shmdsp->shm_perm.cuid); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_CGROUP: + arg = get_groupname(&gr, shmdsp->shm_perm.cgid); + if (arg) + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_UID: + xasprintf(&arg, "%u", shmdsp->shm_perm.uid); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_USER: + arg = get_username(&pw, shmdsp->shm_perm.uid); + if (arg) + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_GID: + xasprintf(&arg, "%u", shmdsp->shm_perm.gid); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_GROUP: + arg = get_groupname(&gr, shmdsp->shm_perm.gid); + if (arg) + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_CTIME: + if (shmdsp->shm_ctim != 0) + rc = scols_line_refer_data(ln, n, + make_time(ctl->time_mode, + (time_t)shmdsp->shm_ctim)); + break; + case COL_SIZE: + if (ctl->bytes) + xasprintf(&arg, "%ju", shmdsp->shm_segsz); + else + arg = size_to_human_string(SIZE_SUFFIX_1LETTER, shmdsp->shm_segsz); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_NATTCH: + xasprintf(&arg, "%ju", shmdsp->shm_nattch); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_STATUS: { + int comma = 0; + size_t offt = 0; + + free(arg); + arg = xcalloc(1, sizeof(char) * strlen(_("dest")) + + strlen(_("locked")) + + strlen(_("hugetlb")) + + strlen(_("noreserve")) + 4); +#ifdef SHM_DEST + if (shmdsp->shm_perm.mode & SHM_DEST) { + offt += sprintf(arg, "%s", _("dest")); + comma++; + } +#endif +#ifdef SHM_LOCKED + if (shmdsp->shm_perm.mode & SHM_LOCKED) { + if (comma) + arg[offt++] = ','; + offt += sprintf(arg + offt, "%s", _("locked")); + } +#endif +#ifdef SHM_HUGETLB + if (shmdsp->shm_perm.mode & SHM_HUGETLB) { + if (comma) + arg[offt++] = ','; + offt += sprintf(arg + offt, "%s", _("hugetlb")); + } +#endif +#ifdef SHM_NORESERVE + if (shmdsp->shm_perm.mode & SHM_NORESERVE) { + if (comma) + arg[offt++] = ','; + sprintf(arg + offt, "%s", _("noreserve")); + } +#endif + rc = scols_line_refer_data(ln, n, arg); + } + break; + case COL_ATTACH: + if (shmdsp->shm_atim != 0) + rc = scols_line_refer_data(ln, n, + make_time(ctl->time_mode, + (time_t)shmdsp->shm_atim)); + break; + case COL_DETACH: + if (shmdsp->shm_dtim != 0) + rc = scols_line_refer_data(ln, n, + make_time(ctl->time_mode, + (time_t)shmdsp->shm_dtim)); + break; + case COL_CPID: + xasprintf(&arg, "%u", shmdsp->shm_cprid); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_LPID: + xasprintf(&arg, "%u", shmdsp->shm_lprid); + rc = scols_line_refer_data(ln, n, arg); + break; + case COL_COMMAND: + arg = pid_get_cmdline(shmdsp->shm_cprid); + rc = scols_line_refer_data(ln, n, arg); + break; + } + if (rc != 0) + err(EXIT_FAILURE, _("failed to set data")); + arg = NULL; + } + if (id > -1) + break; + } + ipc_shm_free_info(shmds); +} + +static void do_shm_global(struct lsipc_control *ctl, struct libscols_table *tb) +{ + struct shm_data *shmds, *shmdsp; + uint64_t nsegs = 0, sum_segsz = 0; + struct ipc_limits lim; + + ipc_shm_get_limits(&lim); + + if (ipc_shm_get_info(-1, &shmds) > 0) { + for (shmdsp = shmds; shmdsp->next != NULL; shmdsp = shmdsp->next) { + ++nsegs; + sum_segsz += shmdsp->shm_segsz; + } + ipc_shm_free_info(shmds); + } + + global_set_data(ctl, tb, "SHMMNI", _("Shared memory segments"), nsegs, lim.shmmni, 1, 0); + global_set_data(ctl, tb, "SHMALL", _("Shared memory pages"), sum_segsz / getpagesize(), lim.shmall, 1, 0); + global_set_data(ctl, tb, "SHMMAX", _("Max size of shared memory segment (bytes)"), 0, lim.shmmax, 0, 1); + global_set_data(ctl, tb, "SHMMIN", _("Min size of shared memory segment (bytes)"), 0, lim.shmmin, 0, 1); +} + +int main(int argc, char *argv[]) +{ + int opt, msg = 0, sem = 0, shm = 0, id = -1; + int show_time = 0, show_creat = 0, global = 0; + size_t i; + struct lsipc_control *ctl = xcalloc(1, sizeof(struct lsipc_control)); + static struct libscols_table *tb; + char *outarg = NULL; + + /* long only options. */ + enum { + OPT_NOTRUNC = CHAR_MAX + 1, + OPT_NOHEAD, + OPT_TIME_FMT + }; + + static const struct option longopts[] = { + { "bytes", no_argument, NULL, 'b' }, + { "creator", no_argument, NULL, 'c' }, + { "export", no_argument, NULL, 'e' }, + { "global", no_argument, NULL, 'g' }, + { "help", no_argument, NULL, 'h' }, + { "id", required_argument, NULL, 'i' }, + { "json", no_argument, NULL, 'J' }, + { "list", no_argument, NULL, 'l' }, + { "newline", no_argument, NULL, 'n' }, + { "noheadings", no_argument, NULL, OPT_NOHEAD }, + { "notruncate", no_argument, NULL, OPT_NOTRUNC }, + { "numeric-perms", no_argument, NULL, 'P' }, + { "output", required_argument, NULL, 'o' }, + { "queues", no_argument, NULL, 'q' }, + { "raw", no_argument, NULL, 'r' }, + { "semaphores", no_argument, NULL, 's' }, + { "shmems", no_argument, NULL, 'm' }, + { "time", no_argument, NULL, 't' }, + { "time-format", required_argument, NULL, OPT_TIME_FMT }, + { "version", no_argument, NULL, 'V' }, + { "shell", no_argument, NULL, 'y' }, + {NULL, 0, NULL, 0} + }; + + static const ul_excl_t excl[] = { /* rows and cols in ASCII order */ + { 'J', 'e', 'l', 'n', 'r' }, + { 'g', 'i' }, + { 'c', 'o', 't' }, + { 'm', 'q', 's' }, + { 0 } + }; + int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + ctl->time_mode = 0; + + scols_init_debug(0); + + while ((opt = getopt_long(argc, argv, "bceghi:Jlmno:PqrstVy", longopts, NULL)) != -1) { + + err_exclusive_options(opt, longopts, excl, excl_st); + + switch (opt) { + case 'b': + ctl->bytes = 1; + break; + case 'i': + id = strtos32_or_err(optarg, _("failed to parse IPC identifier")); + break; + case 'e': + ctl->outmode = OUT_EXPORT; + break; + case 'r': + ctl->outmode = OUT_RAW; + break; + case 'o': + outarg = optarg; + break; + case 'g': + global = 1; + break; + case 'q': + msg = 1; + add_column(columns, ncolumns++, COL_KEY); + add_column(columns, ncolumns++, COL_ID); + add_column(columns, ncolumns++, COL_PERMS); + add_column(columns, ncolumns++, COL_OWNER); + add_column(columns, ncolumns++, COL_USEDBYTES); + add_column(columns, ncolumns++, COL_MSGS); + add_column(columns, ncolumns++, COL_LSPID); + add_column(columns, ncolumns++, COL_LRPID); + LOWER = COLDESC_IDX_MSG_FIRST; + UPPER = COLDESC_IDX_MSG_LAST; + break; + case 'l': + ctl->outmode = OUT_LIST; + break; + case 'm': + shm = 1; + add_column(columns, ncolumns++, COL_KEY); + add_column(columns, ncolumns++, COL_ID); + add_column(columns, ncolumns++, COL_PERMS); + add_column(columns, ncolumns++, COL_OWNER); + add_column(columns, ncolumns++, COL_SIZE); + add_column(columns, ncolumns++, COL_NATTCH); + add_column(columns, ncolumns++, COL_STATUS); + add_column(columns, ncolumns++, COL_CTIME); + add_column(columns, ncolumns++, COL_CPID); + add_column(columns, ncolumns++, COL_LPID); + add_column(columns, ncolumns++, COL_COMMAND); + LOWER = COLDESC_IDX_SHM_FIRST; + UPPER = COLDESC_IDX_SHM_LAST; + break; + case 'n': + ctl->outmode = OUT_NEWLINE; + break; + case 'P': + ctl->numperms = 1; + break; + case 's': + sem = 1; + add_column(columns, ncolumns++, COL_KEY); + add_column(columns, ncolumns++, COL_ID); + add_column(columns, ncolumns++, COL_PERMS); + add_column(columns, ncolumns++, COL_OWNER); + add_column(columns, ncolumns++, COL_NSEMS); + LOWER = COLDESC_IDX_SEM_FIRST; + UPPER = COLDESC_IDX_SEM_LAST; + break; + case OPT_NOTRUNC: + ctl->notrunc = 1; + break; + case OPT_NOHEAD: + ctl->noheadings = 1; + break; + case OPT_TIME_FMT: + ctl->time_mode = parse_time_mode(optarg); + break; + case 'J': + ctl->outmode = OUT_JSON; + break; + case 't': + show_time = 1; + break; + case 'c': + show_creat = 1; + break; + case 'y': + ctl->shellvar = 1; + break; + + case 'h': + usage(); + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + } + + /* default is global */ + if (msg + shm + sem == 0) { + msg = shm = sem = global = 1; + if (show_time || show_creat || id != -1) + errx(EXIT_FAILURE, _("--global is mutually exclusive with --creator, --id and --time")); + } + if (global) { + add_column(columns, ncolumns++, COL_RESOURCE); + add_column(columns, ncolumns++, COL_DESC); + add_column(columns, ncolumns++, COL_LIMIT); + add_column(columns, ncolumns++, COL_USED); + add_column(columns, ncolumns++, COL_USEPERC); + LOWER = COLDESC_IDX_SUM_FIRST; + UPPER = COLDESC_IDX_SUM_LAST; + } + + /* default to pretty-print if --id specified */ + if (id != -1 && !ctl->outmode) + ctl->outmode = OUT_PRETTY; + + if (!ctl->time_mode) + ctl->time_mode = ctl->outmode == OUT_PRETTY ? TIME_FULL : TIME_SHORT; + + if (ctl->outmode == OUT_PRETTY && !(optarg || show_creat || show_time)) { + /* all columns for lsipc --<RESOURCE> --id <ID> */ + for (ncolumns = 0, i = 0; i < ARRAY_SIZE(coldescs); i++) + columns[ncolumns++] = i; + } else { + if (show_creat) { + add_column(columns, ncolumns++, COL_CUID); + add_column(columns, ncolumns++, COL_CGID); + add_column(columns, ncolumns++, COL_UID); + add_column(columns, ncolumns++, COL_GID); + } + if (msg && show_time) { + add_column(columns, ncolumns++, COL_SEND); + add_column(columns, ncolumns++, COL_RECV); + add_column(columns, ncolumns++, COL_CTIME); + } + if (shm && show_time) { + /* keep "COMMAND" as last column */ + size_t cmd = columns[ncolumns - 1] == COL_COMMAND; + + if (cmd) + ncolumns--; + add_column(columns, ncolumns++, COL_ATTACH); + add_column(columns, ncolumns++, COL_DETACH); + if (cmd) + add_column(columns, ncolumns++, COL_COMMAND); + } + if (sem && show_time) { + add_column(columns, ncolumns++, COL_OTIME); + add_column(columns, ncolumns++, COL_CTIME); + } + } + + if (outarg && string_add_to_idarray(outarg, columns, ARRAY_SIZE(columns), + &ncolumns, column_name_to_id) < 0) + return EXIT_FAILURE; + + tb = setup_table(ctl); + if (!tb) + return EXIT_FAILURE; + + if (global) + scols_table_set_name(tb, "ipclimits"); + + if (msg) { + if (global) + do_msg_global(ctl, tb); + else + do_msg(id, ctl, tb); + } + if (shm) { + if (global) + do_shm_global(ctl ,tb); + else + do_shm(id, ctl, tb); + } + if (sem) { + if (global) + do_sem_global(ctl, tb); + else + do_sem(id, ctl, tb); + } + + print_table(ctl, tb); + + scols_unref_table(tb); + free(ctl); + + return EXIT_SUCCESS; +} + diff --git a/sys-utils/lsirq.1 b/sys-utils/lsirq.1 new file mode 100644 index 0000000..b27f7ca --- /dev/null +++ b/sys-utils/lsirq.1 @@ -0,0 +1,93 @@ +'\" t +.\" Title: lsirq +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: User Commands +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "LSIRQ" "1" "2022-05-11" "util\-linux 2.38.1" "User Commands" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +lsirq \- utility to display kernel interrupt information +.SH "SYNOPSIS" +.sp +\fBlsirq\fP [options] +.SH "DESCRIPTION" +.sp +Display kernel interrupt counter information. +.sp +The default output is subject to change. So whenever possible, you should avoid using default outputs in your scripts. Always explicitly define expected columns by using \fB\-\-output\fP. +.SH "OPTIONS" +.sp +\fB\-n\fP, \fB\-\-noheadings\fP +.RS 4 +Don\(cqt print headings. +.RE +.sp +\fB\-o\fP, \fB\-\-output\fP \fIlist\fP +.RS 4 +Specify which output columns to print. Use \fB\-\-help\fP to get a list of all supported columns. The default list of columns may be extended if list is specified in the format \fI+list\fP. +.RE +.sp +\fB\-s\fP, \fB\-\-sort\fP \fIcolumn\fP +.RS 4 +Specify sort criteria by column name. See \fB\-\-help\fP output to get column names. +.RE +.sp +\fB\-J\fP, \fB\-\-json\fP +.RS 4 +Use JSON output format. +.RE +.sp +\fB\-P\fP, \fB\-\-pairs\fP +.RS 4 +Produce output in the form of key="value" pairs. All potentially unsafe characters are hex\-escaped (\(rsx<code>). +.RE +.sp +\fB\-S\fP, \fB\-\-softirq\fP +.RS 4 +Show softirqs information. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "AUTHORS" +.sp +.MTO "pizhenwei\(atbytedance.com" "Zhenwei Pi" "," +.MTO "kerolasa\(atiki.fi" "Sami Kerola" "," +.MTO "kzak\(atredhat.com" "Karel Zak" "" +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBlsirq\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/lsirq.1.adoc b/sys-utils/lsirq.1.adoc new file mode 100644 index 0000000..67bd690 --- /dev/null +++ b/sys-utils/lsirq.1.adoc @@ -0,0 +1,57 @@ +//po4a: entry man manual += lsirq(1) +:doctype: manpage +:man manual: User Commands +:man source: util-linux {release-version} +:page-layout: base +:command: lsirq + +== NAME + +lsirq - utility to display kernel interrupt information + +== SYNOPSIS + +*lsirq* [options] + +== DESCRIPTION + +Display kernel interrupt counter information. + +The default output is subject to change. So whenever possible, you should avoid using default outputs in your scripts. Always explicitly define expected columns by using *--output*. + +== OPTIONS + +*-n*, *--noheadings*:: +Don't print headings. + +*-o*, *--output* _list_:: +Specify which output columns to print. Use *--help* to get a list of all supported columns. The default list of columns may be extended if list is specified in the format _+list_. + +*-s*, *--sort* _column_:: +Specify sort criteria by column name. See *--help* output to get column names. + +*-J*, *--json*:: +Use JSON output format. + +*-P*, *--pairs*:: +Produce output in the form of key="value" pairs. All potentially unsafe characters are hex-escaped (\x<code>). + +*-S*, *--softirq*:: +Show softirqs information. + +include::man-common/help-version.adoc[] + +== AUTHORS + +mailto:pizhenwei@bytedance.com[Zhenwei Pi], +mailto:kerolasa@iki.fi[Sami Kerola], +mailto:kzak@redhat.com[Karel Zak] + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/lsirq.c b/sys-utils/lsirq.c new file mode 100644 index 0000000..1a90efe --- /dev/null +++ b/sys-utils/lsirq.c @@ -0,0 +1,152 @@ +/* + * lsirq - utility to display kernel interrupt information. + * + * Copyright (C) 2019 zhenwei pi <pizhenwei@bytedance.com> + * Copyright (C) 2020 Karel Zak <kzak@redhat.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include <ctype.h> +#include <errno.h> +#include <getopt.h> +#include <limits.h> +#include <locale.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <unistd.h> + +#include <libsmartcols.h> + +#include "closestream.h" +#include "optutils.h" +#include "strutils.h" +#include "xalloc.h" + +#include "irq-common.h" + +static int print_irq_data(struct irq_output *out, int softirq) +{ + struct libscols_table *table; + + table = get_scols_table(out, NULL, NULL, softirq); + if (!table) + return -1; + + scols_print_table(table); + scols_unref_table(table); + return 0; +} + +static void __attribute__((__noreturn__)) usage(void) +{ + fputs(USAGE_HEADER, stdout); + printf(_(" %s [options]\n"), program_invocation_short_name); + fputs(USAGE_SEPARATOR, stdout); + + puts(_("Utility to display kernel interrupt information.")); + + fputs(USAGE_OPTIONS, stdout); + fputs(_(" -J, --json use JSON output format\n"), stdout); + fputs(_(" -P, --pairs use key=\"value\" output format\n"), stdout); + fputs(_(" -n, --noheadings don't print headings\n"), stdout); + fputs(_(" -o, --output <list> define which output columns to use\n"), stdout); + fputs(_(" -s, --sort <column> specify sort column\n"), stdout); + fputs(_(" -S, --softirq show softirqs instead of interrupts\n"), stdout); + fputs(USAGE_SEPARATOR, stdout); + printf(USAGE_HELP_OPTIONS(22)); + + fputs(USAGE_COLUMNS, stdout); + irq_print_columns(stdout, 1); + + printf(USAGE_MAN_TAIL("lsirq(1)")); + exit(EXIT_SUCCESS); +} + +int main(int argc, char **argv) +{ + struct irq_output out = { + .ncolumns = 0 + }; + static const struct option longopts[] = { + {"sort", required_argument, NULL, 's'}, + {"noheadings", no_argument, NULL, 'n'}, + {"output", required_argument, NULL, 'o'}, + {"softirq", no_argument, NULL, 'S'}, + {"json", no_argument, NULL, 'J'}, + {"pairs", no_argument, NULL, 'P'}, + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'V'}, + {NULL, 0, NULL, 0} + }; + int c; + const char *outarg = NULL; + static const ul_excl_t excl[] = { /* rows and cols in ASCII order */ + {'J', 'P'}, + {0} + }; + int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; + int softirq = 0; + + setlocale(LC_ALL, ""); + + while ((c = getopt_long(argc, argv, "no:s:ShJPV", longopts, NULL)) != -1) { + err_exclusive_options(c, longopts, excl, excl_st); + + switch (c) { + case 'J': + out.json = 1; + break; + case 'P': + out.pairs = 1; + break; + case 'n': + out.no_headings = 1; + break; + case 'o': + outarg = optarg; + break; + case 's': + set_sort_func_by_name(&out, optarg); + break; + case 'S': + softirq = 1; + break; + case 'V': + print_version(EXIT_SUCCESS); + case 'h': + usage(); + default: + errtryhelp(EXIT_FAILURE); + } + } + + /* default */ + if (!out.ncolumns) { + out.columns[out.ncolumns++] = COL_IRQ; + out.columns[out.ncolumns++] = COL_TOTAL; + out.columns[out.ncolumns++] = COL_NAME; + } + + /* add -o [+]<list> to putput */ + if (outarg && string_add_to_idarray(outarg, out.columns, + ARRAY_SIZE(out.columns), + &out.ncolumns, + irq_column_name_to_id) < 0) + exit(EXIT_FAILURE); + + return print_irq_data(&out, softirq) == 0 ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/sys-utils/lsmem.1 b/sys-utils/lsmem.1 new file mode 100644 index 0000000..5aed3b4 --- /dev/null +++ b/sys-utils/lsmem.1 @@ -0,0 +1,133 @@ +'\" t +.\" Title: lsmem +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: User Commands +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "LSMEM" "1" "2022-05-11" "util\-linux 2.38.1" "User Commands" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +lsmem \- list the ranges of available memory with their online status +.SH "SYNOPSIS" +.sp +\fBlsmem\fP [options] +.SH "DESCRIPTION" +.sp +The \fBlsmem\fP command lists the ranges of available memory with their online status. The listed memory blocks correspond to the memory block representation in sysfs. The command also shows the memory block size and the amount of memory in online and offline state. +.sp +The default output is compatible with original implementation from s390\-tools, but it\(cqs strongly recommended to avoid using default outputs in your scripts. Always explicitly define expected columns by using the \fB\-\-output\fP option together with a columns list in environments where a stable output is required. +.sp +The \fBlsmem\fP command lists a new memory range always when the current memory block distinguish from the previous block by some output column. This default behavior is possible to override by the \fB\-\-split\fP option (e.g., \fBlsmem \-\-split=ZONES\fP). The special word "none" may be used to ignore all differences between memory blocks and to create as large as possible continuous ranges. The opposite semantic is \fB\-\-all\fP to list individual memory blocks. +.sp +Note that some output columns may provide inaccurate information if a split policy forces \fBlsmem\fP to ignore differences in some attributes. For example if you merge removable and non\-removable memory blocks to the one range than all the range will be marked as non\-removable on \fBlsmem\fP output. +.sp +Not all columns are supported on all systems. If an unsupported column is specified, \fBlsmem\fP prints the column but does not provide any data for it. +.sp +Use the \fB\-\-help\fP option to see the columns description. +.SH "OPTIONS" +.sp +\fB\-a\fP, \fB\-\-all\fP +.RS 4 +List each individual memory block, instead of combining memory blocks with similar attributes. +.RE +.sp +\fB\-b\fP, \fB\-\-bytes\fP +.RS 4 +Print the sizes in bytes rather than in a human\-readable format. +.sp +By default, the unit, sizes are expressed in, is byte, and unit prefixes are in +power of 2^10 (1024). Abbreviations of symbols are exhibited truncated in order +to reach a better readability, by exhibiting alone the first letter of them; +examples: "1 KiB" and "1 MiB" are respectively exhibited as "1 K" and "1 M", +then omitting on purpose the mention "iB", which is part of these abbreviations. +.RE +.sp +\fB\-J\fP, \fB\-\-json\fP +.RS 4 +Use JSON output format. +.RE +.sp +\fB\-n\fP, \fB\-\-noheadings\fP +.RS 4 +Do not print a header line. +.RE +.sp +\fB\-o\fP, \fB\-\-output\fP \fIlist\fP +.RS 4 +Specify which output columns to print. Use \fB\-\-help\fP to get a list of all supported columns. The default list of columns may be extended if \fIlist\fP is specified in the format \fB+\fP\fIlist\fP (e.g., \fBlsmem \-o +NODE\fP). +.RE +.sp +\fB\-\-output\-all\fP +.RS 4 +Output all available columns. +.RE +.sp +\fB\-P\fP, \fB\-\-pairs\fP +.RS 4 +Produce output in the form of key="value" pairs. All potentially unsafe value characters are hex\-escaped (\(rsx<code>). +.RE +.sp +\fB\-r\fP, \fB\-\-raw\fP +.RS 4 +Produce output in raw format. All potentially unsafe characters are hex\-escaped (\(rsx<code>). +.RE +.sp +\fB\-S\fP, \fB\-\-split\fP \fIlist\fP +.RS 4 +Specify which columns (attributes) use to split memory blocks to ranges. The supported columns are STATE, REMOVABLE, NODE and ZONES, or "none". The other columns are silently ignored. For more details see \fBDESCRIPTION\fP above. +.RE +.sp +\fB\-s\fP, \fB\-\-sysroot\fP \fIdirectory\fP +.RS 4 +Gather memory data for a Linux instance other than the instance from which the \fBlsmem\fP command is issued. The specified \fIdirectory\fP is the system root of the Linux instance to be inspected. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.sp +\fB\-\-summary\fP[=\fIwhen\fP] +.RS 4 +This option controls summary lines output. The optional argument \fIwhen\fP can be \fBnever\fP, \fBalways\fP or \fBonly\fP. If the \fIwhen\fP argument is omitted, it defaults to \fB"only"\fP. The summary output is suppressed for \fB\-\-raw\fP, \fB\-\-pairs\fP and \fB\-\-json\fP. +.RE +.SH "AUTHORS" +.sp +\fBlsmem\fP was originally written by Gerald Schaefer for s390\-tools in Perl. The C version for util\-linux was written by Clemens von Mann, Heiko Carstens and Karel Zak. +.SH "SEE ALSO" +.sp +\fBchmem\fP(8) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBlsmem\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/lsmem.1.adoc b/sys-utils/lsmem.1.adoc new file mode 100644 index 0000000..f1ece8f --- /dev/null +++ b/sys-utils/lsmem.1.adoc @@ -0,0 +1,82 @@ +//po4a: entry man manual += lsmem(1) +:doctype: manpage +:man manual: User Commands +:man source: util-linux {release-version} +:page-layout: base +:command: lsmem + +== NAME + +lsmem - list the ranges of available memory with their online status + +== SYNOPSIS + +*lsmem* [options] + +== DESCRIPTION + +The *lsmem* command lists the ranges of available memory with their online status. The listed memory blocks correspond to the memory block representation in sysfs. The command also shows the memory block size and the amount of memory in online and offline state. + +The default output is compatible with original implementation from s390-tools, but it's strongly recommended to avoid using default outputs in your scripts. Always explicitly define expected columns by using the *--output* option together with a columns list in environments where a stable output is required. + +The *lsmem* command lists a new memory range always when the current memory block distinguish from the previous block by some output column. This default behavior is possible to override by the *--split* option (e.g., *lsmem --split=ZONES*). The special word "none" may be used to ignore all differences between memory blocks and to create as large as possible continuous ranges. The opposite semantic is *--all* to list individual memory blocks. + +Note that some output columns may provide inaccurate information if a split policy forces *lsmem* to ignore differences in some attributes. For example if you merge removable and non-removable memory blocks to the one range than all the range will be marked as non-removable on *lsmem* output. + +Not all columns are supported on all systems. If an unsupported column is specified, *lsmem* prints the column but does not provide any data for it. + +Use the *--help* option to see the columns description. + +== OPTIONS + +*-a*, *--all*:: +List each individual memory block, instead of combining memory blocks with similar attributes. + +*-b*, *--bytes*:: +include::man-common/in-bytes.adoc[] + +*-J*, *--json*:: +Use JSON output format. + +*-n*, *--noheadings*:: +Do not print a header line. + +*-o*, *--output* _list_:: +Specify which output columns to print. Use *--help* to get a list of all supported columns. The default list of columns may be extended if _list_ is specified in the format **+**__list__ (e.g., *lsmem -o +NODE*). + +*--output-all*:: +Output all available columns. + +*-P*, *--pairs*:: +Produce output in the form of key="value" pairs. All potentially unsafe value characters are hex-escaped (\x<code>). + +*-r*, *--raw*:: +Produce output in raw format. All potentially unsafe characters are hex-escaped (\x<code>). + +*-S*, *--split* _list_:: +Specify which columns (attributes) use to split memory blocks to ranges. The supported columns are STATE, REMOVABLE, NODE and ZONES, or "none". The other columns are silently ignored. For more details see *DESCRIPTION* above. + +*-s*, *--sysroot* _directory_:: +Gather memory data for a Linux instance other than the instance from which the *lsmem* command is issued. The specified _directory_ is the system root of the Linux instance to be inspected. + +include::man-common/help-version.adoc[] + +*--summary*[=_when_]:: +This option controls summary lines output. The optional argument _when_ can be *never*, *always* or *only*. If the _when_ argument is omitted, it defaults to *"only"*. The summary output is suppressed for *--raw*, *--pairs* and *--json*. + +== AUTHORS + +*lsmem* was originally written by Gerald Schaefer for s390-tools in Perl. The C version for util-linux was written by Clemens von Mann, Heiko Carstens and Karel Zak. + +== SEE ALSO + +*chmem*(8) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/lsmem.c b/sys-utils/lsmem.c new file mode 100644 index 0000000..379d616 --- /dev/null +++ b/sys-utils/lsmem.c @@ -0,0 +1,774 @@ +/* + * lsmem - Show memory configuration + * + * Copyright IBM Corp. 2016 + * Copyright (C) 2016 Karel Zak <kzak@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#include <c.h> +#include <nls.h> +#include <path.h> +#include <strutils.h> +#include <closestream.h> +#include <xalloc.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <dirent.h> +#include <fcntl.h> +#include <inttypes.h> +#include <assert.h> +#include <optutils.h> +#include <libsmartcols.h> + +#define _PATH_SYS_MEMORY "/sys/devices/system/memory" + +#define MEMORY_STATE_ONLINE 0 +#define MEMORY_STATE_OFFLINE 1 +#define MEMORY_STATE_GOING_OFFLINE 2 +#define MEMORY_STATE_UNKNOWN 3 + +enum zone_id { + ZONE_DMA = 0, + ZONE_DMA32, + ZONE_NORMAL, + ZONE_HIGHMEM, + ZONE_MOVABLE, + ZONE_DEVICE, + ZONE_NONE, + ZONE_UNKNOWN, + MAX_NR_ZONES, +}; + +struct memory_block { + uint64_t index; + uint64_t count; + int state; + int node; + int nr_zones; + int zones[MAX_NR_ZONES]; + unsigned int removable:1; +}; + +struct lsmem { + struct path_cxt *sysmem; /* _PATH_SYS_MEMORY directory handler */ + struct dirent **dirs; + int ndirs; + struct memory_block *blocks; + int nblocks; + uint64_t block_size; + uint64_t mem_online; + uint64_t mem_offline; + + struct libscols_table *table; + unsigned int have_nodes : 1, + raw : 1, + export : 1, + json : 1, + noheadings : 1, + summary : 1, + list_all : 1, + bytes : 1, + want_summary : 1, + want_table : 1, + split_by_node : 1, + split_by_state : 1, + split_by_removable : 1, + split_by_zones : 1, + have_zones : 1; +}; + + +enum { + COL_RANGE, + COL_SIZE, + COL_STATE, + COL_REMOVABLE, + COL_BLOCK, + COL_NODE, + COL_ZONES, +}; + +static char *zone_names[] = { + [ZONE_DMA] = "DMA", + [ZONE_DMA32] = "DMA32", + [ZONE_NORMAL] = "Normal", + [ZONE_HIGHMEM] = "Highmem", + [ZONE_MOVABLE] = "Movable", + [ZONE_DEVICE] = "Device", + [ZONE_NONE] = "None", /* block contains more than one zone, can't be offlined */ + [ZONE_UNKNOWN] = "Unknown", +}; + +/* column names */ +struct coldesc { + const char *name; /* header */ + double whint; /* width hint (N < 1 is in percent of termwidth) */ + int flags; /* SCOLS_FL_* */ + const char *help; +}; + +/* columns descriptions */ +static struct coldesc coldescs[] = { + [COL_RANGE] = { "RANGE", 0, 0, N_("start and end address of the memory range")}, + [COL_SIZE] = { "SIZE", 5, SCOLS_FL_RIGHT, N_("size of the memory range")}, + [COL_STATE] = { "STATE", 0, SCOLS_FL_RIGHT, N_("online status of the memory range")}, + [COL_REMOVABLE] = { "REMOVABLE", 0, SCOLS_FL_RIGHT, N_("memory is removable")}, + [COL_BLOCK] = { "BLOCK", 0, SCOLS_FL_RIGHT, N_("memory block number or blocks range")}, + [COL_NODE] = { "NODE", 0, SCOLS_FL_RIGHT, N_("numa node of memory")}, + [COL_ZONES] = { "ZONES", 0, SCOLS_FL_RIGHT, N_("valid zones for the memory range")}, +}; + +/* columns[] array specifies all currently wanted output column. The columns + * are defined by coldescs[] array and you can specify (on command line) each + * column twice. That's enough, dynamically allocated array of the columns is + * unnecessary overkill and over-engineering in this case */ +static int columns[ARRAY_SIZE(coldescs) * 2]; +static size_t ncolumns; + +static inline size_t err_columns_index(size_t arysz, size_t idx) +{ + if (idx >= arysz) + errx(EXIT_FAILURE, _("too many columns specified, " + "the limit is %zu columns"), + arysz - 1); + return idx; +} + +/* + * name must be null-terminated + */ +static int zone_name_to_id(const char *name) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(zone_names); i++) { + if (!strcasecmp(name, zone_names[i])) + return i; + } + return ZONE_UNKNOWN; +} + +#define add_column(ary, n, id) \ + ((ary)[ err_columns_index(ARRAY_SIZE(ary), (n)) ] = (id)) + +static int column_name_to_id(const char *name, size_t namesz) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(coldescs); i++) { + const char *cn = coldescs[i].name; + + if (!strncasecmp(name, cn, namesz) && !*(cn + namesz)) + return i; + } + warnx(_("unknown column: %s"), name); + return -1; +} + +static inline int get_column_id(int num) +{ + assert(num >= 0); + assert((size_t) num < ncolumns); + assert(columns[num] < (int) ARRAY_SIZE(coldescs)); + + return columns[num]; +} + +static inline struct coldesc *get_column_desc(int num) +{ + return &coldescs[ get_column_id(num) ]; +} + +static inline void reset_split_policy(struct lsmem *l, int enable) +{ + l->split_by_state = enable; + l->split_by_node = enable; + l->split_by_removable = enable; + l->split_by_zones = enable; +} + +static void set_split_policy(struct lsmem *l, int cols[], size_t ncols) +{ + size_t i; + + reset_split_policy(l, 0); + + for (i = 0; i < ncols; i++) { + switch (cols[i]) { + case COL_STATE: + l->split_by_state = 1; + break; + case COL_NODE: + l->split_by_node = 1; + break; + case COL_REMOVABLE: + l->split_by_removable = 1; + break; + case COL_ZONES: + l->split_by_zones = 1; + break; + default: + break; + } + } +} + +static void add_scols_line(struct lsmem *lsmem, struct memory_block *blk) +{ + size_t i; + struct libscols_line *line; + + line = scols_table_new_line(lsmem->table, NULL); + if (!line) + err_oom(); + + for (i = 0; i < ncolumns; i++) { + char *str = NULL; + + switch (get_column_id(i)) { + case COL_RANGE: + { + uint64_t start = blk->index * lsmem->block_size; + uint64_t size = blk->count * lsmem->block_size; + xasprintf(&str, "0x%016"PRIx64"-0x%016"PRIx64, start, start + size - 1); + break; + } + case COL_SIZE: + if (lsmem->bytes) + xasprintf(&str, "%"PRId64, (uint64_t) blk->count * lsmem->block_size); + else + str = size_to_human_string(SIZE_SUFFIX_1LETTER, + (uint64_t) blk->count * lsmem->block_size); + break; + case COL_STATE: + str = xstrdup( + blk->state == MEMORY_STATE_ONLINE ? _("online") : + blk->state == MEMORY_STATE_OFFLINE ? _("offline") : + blk->state == MEMORY_STATE_GOING_OFFLINE ? _("on->off") : + "?"); + break; + case COL_REMOVABLE: + if (blk->state == MEMORY_STATE_ONLINE) + str = xstrdup(blk->removable ? _("yes") : _("no")); + break; + case COL_BLOCK: + if (blk->count == 1) + xasprintf(&str, "%"PRId64, blk->index); + else + xasprintf(&str, "%"PRId64"-%"PRId64, + blk->index, blk->index + blk->count - 1); + break; + case COL_NODE: + if (lsmem->have_nodes) + xasprintf(&str, "%d", blk->node); + break; + case COL_ZONES: + if (lsmem->have_zones) { + char valid_zones[BUFSIZ]; + int j, zone_id; + + valid_zones[0] = '\0'; + for (j = 0; j < blk->nr_zones; j++) { + zone_id = blk->zones[j]; + if (strlen(valid_zones) + + strlen(zone_names[zone_id]) > BUFSIZ - 2) + break; + strcat(valid_zones, zone_names[zone_id]); + if (j + 1 < blk->nr_zones) + strcat(valid_zones, "/"); + } + str = xstrdup(valid_zones); + } + break; + } + + if (str && scols_line_refer_data(line, i, str) != 0) + err_oom(); + } +} + +static void fill_scols_table(struct lsmem *lsmem) +{ + int i; + + for (i = 0; i < lsmem->nblocks; i++) + add_scols_line(lsmem, &lsmem->blocks[i]); +} + +static void print_summary(struct lsmem *lsmem) +{ + if (lsmem->bytes) { + printf("%-23s %15"PRId64"\n",_("Memory block size:"), lsmem->block_size); + printf("%-23s %15"PRId64"\n",_("Total online memory:"), lsmem->mem_online); + printf("%-23s %15"PRId64"\n",_("Total offline memory:"), lsmem->mem_offline); + } else { + char *p; + + if ((p = size_to_human_string(SIZE_SUFFIX_1LETTER, lsmem->block_size))) + printf("%-23s %5s\n",_("Memory block size:"), p); + free(p); + + if ((p = size_to_human_string(SIZE_SUFFIX_1LETTER, lsmem->mem_online))) + printf("%-23s %5s\n",_("Total online memory:"), p); + free(p); + + if ((p = size_to_human_string(SIZE_SUFFIX_1LETTER, lsmem->mem_offline))) + printf("%-23s %5s\n",_("Total offline memory:"), p); + free(p); + } +} + +static int memory_block_get_node(struct lsmem *lsmem, char *name) +{ + struct dirent *de; + DIR *dir; + int node; + + dir = ul_path_opendir(lsmem->sysmem, name); + if (!dir) + err(EXIT_FAILURE, _("Failed to open %s"), name); + + node = -1; + while ((de = readdir(dir)) != NULL) { + if (strncmp("node", de->d_name, 4) != 0) + continue; + if (!isdigit_string(de->d_name + 4)) + continue; + errno = 0; + node = strtol(de->d_name + 4, NULL, 10); + if (errno) + continue; + break; + } + closedir(dir); + return node; +} + +static int memory_block_read_attrs(struct lsmem *lsmem, char *name, + struct memory_block *blk) +{ + char *line = NULL; + int i, x = 0, rc = 0; + + memset(blk, 0, sizeof(*blk)); + + errno = 0; + blk->count = 1; + blk->state = MEMORY_STATE_UNKNOWN; + blk->index = strtoumax(name + 6, NULL, 10); /* get <num> of "memory<num>" */ + + if (errno) + rc = -errno; + + if (ul_path_readf_s32(lsmem->sysmem, &x, "%s/removable", name) == 0) + blk->removable = x == 1; + + if (ul_path_readf_string(lsmem->sysmem, &line, "%s/state", name) > 0 && line) { + if (strcmp(line, "offline") == 0) + blk->state = MEMORY_STATE_OFFLINE; + else if (strcmp(line, "online") == 0) + blk->state = MEMORY_STATE_ONLINE; + else if (strcmp(line, "going-offline") == 0) + blk->state = MEMORY_STATE_GOING_OFFLINE; + free(line); + } + + if (lsmem->have_nodes) + blk->node = memory_block_get_node(lsmem, name); + + blk->nr_zones = 0; + if (lsmem->have_zones + && ul_path_readf_string(lsmem->sysmem, &line, "%s/valid_zones", name) > 0 + && line) { + + char *token = strtok(line, " "); + + for (i = 0; token && i < MAX_NR_ZONES; i++) { + blk->zones[i] = zone_name_to_id(token); + blk->nr_zones++; + token = strtok(NULL, " "); + } + free(line); + } + + return rc; +} + +static int is_mergeable(struct lsmem *lsmem, struct memory_block *blk) +{ + struct memory_block *curr; + int i; + + if (!lsmem->nblocks) + return 0; + curr = &lsmem->blocks[lsmem->nblocks - 1]; + if (lsmem->list_all) + return 0; + if (curr->index + curr->count != blk->index) + return 0; + if (lsmem->split_by_state && curr->state != blk->state) + return 0; + if (lsmem->split_by_removable && curr->removable != blk->removable) + return 0; + if (lsmem->split_by_node && lsmem->have_nodes) { + if (curr->node != blk->node) + return 0; + } + if (lsmem->split_by_zones && lsmem->have_zones) { + if (curr->nr_zones != blk->nr_zones) + return 0; + for (i = 0; i < curr->nr_zones; i++) { + if (curr->zones[i] == ZONE_UNKNOWN || + curr->zones[i] != blk->zones[i]) + return 0; + } + } + return 1; +} + +static void free_info(struct lsmem *lsmem) +{ + int i; + + if (!lsmem) + return; + free(lsmem->blocks); + for (i = 0; i < lsmem->ndirs; i++) + free(lsmem->dirs[i]); + free(lsmem->dirs); +} + +static void read_info(struct lsmem *lsmem) +{ + struct memory_block blk; + char buf[128]; + int i; + + if (ul_path_read_buffer(lsmem->sysmem, buf, sizeof(buf), "block_size_bytes") <= 0) + err(EXIT_FAILURE, _("failed to read memory block size")); + + errno = 0; + lsmem->block_size = strtoumax(buf, NULL, 16); + if (errno) + err(EXIT_FAILURE, _("failed to read memory block size")); + + for (i = 0; i < lsmem->ndirs; i++) { + memory_block_read_attrs(lsmem, lsmem->dirs[i]->d_name, &blk); + if (blk.state == MEMORY_STATE_ONLINE) + lsmem->mem_online += lsmem->block_size; + else + lsmem->mem_offline += lsmem->block_size; + if (is_mergeable(lsmem, &blk)) { + lsmem->blocks[lsmem->nblocks - 1].count++; + continue; + } + lsmem->nblocks++; + lsmem->blocks = xrealloc(lsmem->blocks, lsmem->nblocks * sizeof(blk)); + *&lsmem->blocks[lsmem->nblocks - 1] = blk; + } +} + +static int memory_block_filter(const struct dirent *de) +{ + if (strncmp("memory", de->d_name, 6) != 0) + return 0; + return isdigit_string(de->d_name + 6); +} + +static void read_basic_info(struct lsmem *lsmem) +{ + char dir[PATH_MAX]; + + if (ul_path_access(lsmem->sysmem, F_OK, "block_size_bytes") != 0) + errx(EXIT_FAILURE, _("This system does not support memory blocks")); + + ul_path_get_abspath(lsmem->sysmem, dir, sizeof(dir), NULL); + + lsmem->ndirs = scandir(dir, &lsmem->dirs, memory_block_filter, versionsort); + if (lsmem->ndirs <= 0) + err(EXIT_FAILURE, _("Failed to read %s"), dir); + + if (memory_block_get_node(lsmem, lsmem->dirs[0]->d_name) != -1) + lsmem->have_nodes = 1; + + /* The valid_zones sysmem attribute was introduced with kernel 3.18 */ + if (ul_path_access(lsmem->sysmem, F_OK, "memory0/valid_zones") == 0) + lsmem->have_zones = 1; +} + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + size_t i; + + fputs(USAGE_HEADER, out); + fprintf(out, _(" %s [options]\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("List the ranges of available memory with their online status.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -J, --json use JSON output format\n"), out); + fputs(_(" -P, --pairs use key=\"value\" output format\n"), out); + fputs(_(" -a, --all list each individual memory block\n"), out); + fputs(_(" -b, --bytes print SIZE in bytes rather than in human readable format\n"), out); + fputs(_(" -n, --noheadings don't print headings\n"), out); + fputs(_(" -o, --output <list> output columns\n"), out); + fputs(_(" --output-all output all columns\n"), out); + fputs(_(" -r, --raw use raw output format\n"), out); + fputs(_(" -S, --split <list> split ranges by specified columns\n"), out); + fputs(_(" -s, --sysroot <dir> use the specified directory as system root\n"), out); + fputs(_(" --summary[=when] print summary information (never,always or only)\n"), out); + + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(22)); + + fputs(USAGE_COLUMNS, out); + for (i = 0; i < ARRAY_SIZE(coldescs); i++) + fprintf(out, " %10s %s\n", coldescs[i].name, _(coldescs[i].help)); + + printf(USAGE_MAN_TAIL("lsmem(1)")); + + exit(EXIT_SUCCESS); +} + +int main(int argc, char **argv) +{ + struct lsmem _lsmem = { + .want_table = 1, + .want_summary = 1 + }, *lsmem = &_lsmem; + + const char *outarg = NULL, *splitarg = NULL, *prefix = NULL; + int c; + size_t i; + + enum { + LSMEM_OPT_SUMARRY = CHAR_MAX + 1, + OPT_OUTPUT_ALL + }; + + static const struct option longopts[] = { + {"all", no_argument, NULL, 'a'}, + {"bytes", no_argument, NULL, 'b'}, + {"help", no_argument, NULL, 'h'}, + {"json", no_argument, NULL, 'J'}, + {"noheadings", no_argument, NULL, 'n'}, + {"output", required_argument, NULL, 'o'}, + {"output-all", no_argument, NULL, OPT_OUTPUT_ALL}, + {"pairs", no_argument, NULL, 'P'}, + {"raw", no_argument, NULL, 'r'}, + {"sysroot", required_argument, NULL, 's'}, + {"split", required_argument, NULL, 'S'}, + {"version", no_argument, NULL, 'V'}, + {"summary", optional_argument, NULL, LSMEM_OPT_SUMARRY }, + {NULL, 0, NULL, 0} + }; + static const ul_excl_t excl[] = { /* rows and cols in ASCII order */ + { 'J', 'P', 'r' }, + { 'S', 'a' }, + { 0 } + }; + int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + while ((c = getopt_long(argc, argv, "abhJno:PrS:s:V", longopts, NULL)) != -1) { + + err_exclusive_options(c, longopts, excl, excl_st); + + switch (c) { + case 'a': + lsmem->list_all = 1; + break; + case 'b': + lsmem->bytes = 1; + break; + case 'J': + lsmem->json = 1; + lsmem->want_summary = 0; + break; + case 'n': + lsmem->noheadings = 1; + break; + case 'o': + outarg = optarg; + break; + case OPT_OUTPUT_ALL: + for (ncolumns = 0; (size_t)ncolumns < ARRAY_SIZE(coldescs); ncolumns++) + columns[ncolumns] = ncolumns; + break; + case 'P': + lsmem->export = 1; + lsmem->want_summary = 0; + break; + case 'r': + lsmem->raw = 1; + lsmem->want_summary = 0; + break; + case 's': + prefix = optarg; + break; + case 'S': + splitarg = optarg; + break; + case LSMEM_OPT_SUMARRY: + if (optarg) { + if (strcmp(optarg, "never") == 0) + lsmem->want_summary = 0; + else if (strcmp(optarg, "only") == 0) + lsmem->want_table = 0; + else if (strcmp(optarg, "always") == 0) + lsmem->want_summary = 1; + else + errx(EXIT_FAILURE, _("unsupported --summary argument")); + } else + lsmem->want_table = 0; + break; + + case 'h': + usage(); + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if (argc != optind) { + warnx(_("bad usage")); + errtryhelp(EXIT_FAILURE); + } + + if (lsmem->want_table + lsmem->want_summary == 0) + errx(EXIT_FAILURE, _("options --{raw,json,pairs} and --summary=only are mutually exclusive")); + + ul_path_init_debug(); + + lsmem->sysmem = ul_new_path(_PATH_SYS_MEMORY); + if (!lsmem->sysmem) + err(EXIT_FAILURE, _("failed to initialize %s handler"), _PATH_SYS_MEMORY); + if (prefix && ul_path_set_prefix(lsmem->sysmem, prefix) != 0) + err(EXIT_FAILURE, _("invalid argument to --sysroot")); + if (!ul_path_is_accessible(lsmem->sysmem)) + err(EXIT_FAILURE, _("cannot open %s"), _PATH_SYS_MEMORY); + + /* Shortcut to avoid scols machinery on --summary=only */ + if (lsmem->want_table == 0 && lsmem->want_summary) { + read_basic_info(lsmem); + read_info(lsmem); + print_summary(lsmem); + return EXIT_SUCCESS; + } + + /* + * Default columns + */ + if (!ncolumns) { + add_column(columns, ncolumns++, COL_RANGE); + add_column(columns, ncolumns++, COL_SIZE); + add_column(columns, ncolumns++, COL_STATE); + add_column(columns, ncolumns++, COL_REMOVABLE); + add_column(columns, ncolumns++, COL_BLOCK); + } + + if (outarg && string_add_to_idarray(outarg, columns, ARRAY_SIZE(columns), + &ncolumns, column_name_to_id) < 0) + return EXIT_FAILURE; + + /* + * Initialize output + */ + scols_init_debug(0); + + if (!(lsmem->table = scols_new_table())) + errx(EXIT_FAILURE, _("failed to initialize output table")); + scols_table_enable_raw(lsmem->table, lsmem->raw); + scols_table_enable_export(lsmem->table, lsmem->export); + scols_table_enable_json(lsmem->table, lsmem->json); + scols_table_enable_noheadings(lsmem->table, lsmem->noheadings); + + if (lsmem->json) + scols_table_set_name(lsmem->table, "memory"); + + for (i = 0; i < ncolumns; i++) { + struct coldesc *ci = get_column_desc(i); + struct libscols_column *cl; + + cl = scols_table_new_column(lsmem->table, ci->name, ci->whint, ci->flags); + if (!cl) + err(EXIT_FAILURE, _("Failed to initialize output column")); + + if (lsmem->json) { + int id = get_column_id(i); + + switch (id) { + case COL_SIZE: + if (!lsmem->bytes) + break; + /* fallthrough */ + case COL_NODE: + scols_column_set_json_type(cl, SCOLS_JSON_NUMBER); + break; + case COL_REMOVABLE: + scols_column_set_json_type(cl, SCOLS_JSON_BOOLEAN); + break; + } + } + } + + if (splitarg) { + int split[ARRAY_SIZE(coldescs)] = { 0 }; + static size_t nsplits = 0; + + if (strcasecmp(splitarg, "none") == 0) + ; + else if (string_add_to_idarray(splitarg, split, ARRAY_SIZE(split), + &nsplits, column_name_to_id) < 0) + return EXIT_FAILURE; + + set_split_policy(lsmem, split, nsplits); + + } else + /* follow output columns */ + set_split_policy(lsmem, columns, ncolumns); + + /* + * Read data and print output + */ + read_basic_info(lsmem); + read_info(lsmem); + + if (lsmem->want_table) { + fill_scols_table(lsmem); + scols_print_table(lsmem->table); + + if (lsmem->want_summary) + fputc('\n', stdout); + } + + if (lsmem->want_summary) + print_summary(lsmem); + + scols_unref_table(lsmem->table); + ul_unref_path(lsmem->sysmem); + free_info(lsmem); + return 0; +} diff --git a/sys-utils/lsns.8 b/sys-utils/lsns.8 new file mode 100644 index 0000000..6c1ed21 --- /dev/null +++ b/sys-utils/lsns.8 @@ -0,0 +1,130 @@ +'\" t +.\" Title: lsns +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-07-20 +.\" Manual: System Administration +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "LSNS" "8" "2022-07-20" "util\-linux 2.38.1" "System Administration" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +lsns \- list namespaces +.SH "SYNOPSIS" +.sp +\fBlsns\fP [options] \fInamespace\fP +.SH "DESCRIPTION" +.sp +\fBlsns\fP lists information about all the currently accessible namespaces or about the given \fInamespace\fP. The \fInamespace\fP identifier is an inode number. +.sp +The default output is subject to change. So whenever possible, you should avoid using default outputs in your scripts. Always explicitly define expected output mode (\fB\-\-tree\fP or \fB\-\-list\fP) and columns by using the \fB\-\-output\fP option together with a columns list in environments where a stable output is required. +.sp +The \fBNSFS\fP column, printed when \fBnet\fP is specified for the \fB\-\-type\fP option, is special; it uses multi\-line cells. Use the option \fB\-\-nowrap\fP to switch to ","\-separated single\-line representation. +.sp +Note that \fBlsns\fP reads information directly from the \fI/proc\fP filesystem and for non\-root users it may return incomplete information. The current \fI/proc\fP filesystem may be unshared and affected by a PID namespace (see \fBunshare \-\-mount\-proc\fP for more details). \fBlsns\fP is not able to see persistent namespaces without processes where the namespace instance is held by a bind mount to /proc/\fIpid\fP/ns/\fItype\fP. +.SH "OPTIONS" +.sp +\fB\-J\fP, \fB\-\-json\fP +.RS 4 +Use JSON output format. +.RE +.sp +\fB\-l\fP, \fB\-\-list\fP +.RS 4 +Use list output format. +.RE +.sp +\fB\-n\fP, \fB\-\-noheadings\fP +.RS 4 +Do not print a header line. +.RE +.sp +\fB\-o\fP, \fB\-\-output\fP \fIlist\fP +.RS 4 +Specify which output columns to print. Use \fB\-\-help\fP to get a list of all supported columns. +.sp +The default list of columns may be extended if \fIlist\fP is specified in the format \fB+\fP\fIlist\fP (e.g., \fBlsns \-o +PATH\fP). +.RE +.sp +\fB\-\-output\-all\fP +.RS 4 +Output all available columns. +.RE +.sp +\fB\-p\fP, \fB\-\-task\fP \fIPID\fP +.RS 4 +Display only the namespaces held by the process with this \fIPID\fP. +.RE +.sp +\fB\-r\fP, \fB\-\-raw\fP +.RS 4 +Use the raw output format. +.RE +.sp +\fB\-t\fP, \fB\-\-type\fP \fItype\fP +.RS 4 +Display the specified \fItype\fP of namespaces only. The supported types are \fBmnt\fP, \fBnet\fP, \fBipc\fP, \fBuser\fP, \fBpid\fP, \fButs\fP, \fBcgroup\fP and \fBtime\fP. This option may be given more than once. +.RE +.sp +\fB\-u\fP, \fB\-\-notruncate\fP +.RS 4 +Do not truncate text in columns. +.RE +.sp +\fB\-W\fP, \fB\-\-nowrap\fP +.RS 4 +Do not use multi\-line text in columns. +.RE +.sp +\fB\-T\fP, \fB\-\-tree\fP \fIrel\fP +.RS 4 +Use tree\-like output format. If \fBprocess\fP is given as \fIrel\fP, print process tree(s) in each name space. This is default when \fB\-\-tree\fP is not specified. If \fBparent\fP is given, print tree(s) constructed by the parent/child relationship. If \fBowner\fP is given, print tree(s) constructed by the owner/owned relationship. \fBowner\fP is used as default when \fIrel\fP is omitted. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "AUTHORS" +.sp +.MTO "kzak\(atredhat.com" "Karel Zak" "" +.SH "SEE ALSO" +.sp +\fBnsenter\fP(1), +\fBunshare\fP(1), +\fBclone\fP(2), +\fBnamespaces\fP(7), +\fBioctl_ns\fP(2), +\fBip\-netns\fP(8) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBlsns\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/lsns.8.adoc b/sys-utils/lsns.8.adoc new file mode 100644 index 0000000..bcbc909 --- /dev/null +++ b/sys-utils/lsns.8.adoc @@ -0,0 +1,90 @@ +//po4a: entry man manual +//// +Man page for the lsns command. +Copyright 2015 Karel Zak <kzak@redhat.com> +May be distributed under the GNU General Public License +//// += lsns(8) +:doctype: manpage +:man manual: System Administration +:man source: util-linux {release-version} +:page-layout: base +:command: lsns + +== NAME + +lsns - list namespaces + +== SYNOPSIS + +*lsns* [options] _namespace_ + +== DESCRIPTION + +*lsns* lists information about all the currently accessible namespaces or about the given _namespace_. The _namespace_ identifier is an inode number. + +The default output is subject to change. So whenever possible, you should avoid using default outputs in your scripts. Always explicitly define expected output mode (*--tree* or *--list*) and columns by using the *--output* option together with a columns list in environments where a stable output is required. + +The *NSFS* column, printed when *net* is specified for the *--type* option, is special; it uses multi-line cells. Use the option *--nowrap* to switch to ","-separated single-line representation. + +Note that *lsns* reads information directly from the _/proc_ filesystem and for non-root users it may return incomplete information. The current _/proc_ filesystem may be unshared and affected by a PID namespace (see *unshare --mount-proc* for more details). *lsns* is not able to see persistent namespaces without processes where the namespace instance is held by a bind mount to /proc/_pid_/ns/_type_. + +== OPTIONS + +*-J*, *--json*:: +Use JSON output format. + +*-l*, *--list*:: +Use list output format. + +*-n*, *--noheadings*:: +Do not print a header line. + +*-o*, *--output* _list_:: +Specify which output columns to print. Use *--help* to get a list of all supported columns. ++ +The default list of columns may be extended if _list_ is specified in the format **+**__list__ (e.g., *lsns -o +PATH*). + +*--output-all*:: +Output all available columns. + +*-p*, *--task* _PID_:: +Display only the namespaces held by the process with this _PID_. + +*-r*, *--raw*:: +Use the raw output format. + +*-t*, *--type* _type_:: +Display the specified _type_ of namespaces only. The supported types are *mnt*, *net*, *ipc*, *user*, *pid*, *uts*, *cgroup* and *time*. This option may be given more than once. + +*-u*, *--notruncate*:: +Do not truncate text in columns. + +*-W*, *--nowrap*:: +Do not use multi-line text in columns. + +*-T*, *--tree* _rel_:: +Use tree-like output format. If *process* is given as _rel_, print process tree(s) in each name space. This is default when *--tree* is not specified. If *parent* is given, print tree(s) constructed by the parent/child relationship. If *owner* is given, print tree(s) constructed by the owner/owned relationship. *owner* is used as default when _rel_ is omitted. + +include::man-common/help-version.adoc[] + +== AUTHORS + +mailto:kzak@redhat.com[Karel Zak] + +== SEE ALSO + +*nsenter*(1), +*unshare*(1), +*clone*(2), +*namespaces*(7), +*ioctl_ns*(2), +*ip-netns*(8) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/lsns.c b/sys-utils/lsns.c new file mode 100644 index 0000000..75625b3 --- /dev/null +++ b/sys-utils/lsns.c @@ -0,0 +1,1436 @@ +/* + * lsns(8) - list system namespaces + * + * Copyright (C) 2015 Karel Zak <kzak@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +#include <stdio.h> +#include <string.h> +#include <getopt.h> +#include <stdlib.h> +#include <assert.h> +#include <dirent.h> +#include <unistd.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <wchar.h> +#include <libsmartcols.h> +#include <libmount.h> + +#ifdef HAVE_LINUX_NET_NAMESPACE_H +# include <stdbool.h> +# include <sys/socket.h> +# include <linux/netlink.h> +# include <linux/rtnetlink.h> +# include <linux/net_namespace.h> +#endif + +#ifdef HAVE_LINUX_NSFS_H +# include <linux/nsfs.h> +# if defined(NS_GET_NSTYPE) && defined(NS_GET_OWNER_UID) +# define USE_NS_GET_API 1 +# endif +#endif + +#include "pathnames.h" +#include "nls.h" +#include "xalloc.h" +#include "c.h" +#include "list.h" +#include "closestream.h" +#include "optutils.h" +#include "procfs.h" +#include "strutils.h" +#include "namespace.h" +#include "idcache.h" +#include "fileutils.h" + +#include "debug.h" + +static UL_DEBUG_DEFINE_MASK(lsns); +UL_DEBUG_DEFINE_MASKNAMES(lsns) = UL_DEBUG_EMPTY_MASKNAMES; + +#define LSNS_DEBUG_INIT (1 << 1) +#define LSNS_DEBUG_PROC (1 << 2) +#define LSNS_DEBUG_NS (1 << 3) +#define LSNS_DEBUG_ALL 0xFFFF + +#define LSNS_NETNS_UNUSABLE -2 + +#define DBG(m, x) __UL_DBG(lsns, LSNS_DEBUG_, m, x) +#define ON_DBG(m, x) __UL_DBG_CALL(lsns, LSNS_DEBUG_, m, x) + +#define UL_DEBUG_CURRENT_MASK UL_DEBUG_MASK(lsns) +#include "debugobj.h" + +static struct idcache *uid_cache = NULL; + +/* column IDs */ +enum { + COL_NS = 0, + COL_TYPE, + COL_PATH, + COL_NPROCS, + COL_PID, + COL_PPID, + COL_COMMAND, + COL_UID, + COL_USER, + COL_NETNSID, + COL_NSFS, + COL_PNS, /* parent namespace */ + COL_ONS, /* owner namespace */ +}; + +/* column names */ +struct colinfo { + const char *name; /* header */ + double whint; /* width hint (N < 1 is in percent of termwidth) */ + int flags; /* SCOLS_FL_* */ + const char *help; + int json_type; +}; + +/* columns descriptions */ +static const struct colinfo infos[] = { + [COL_NS] = { "NS", 10, SCOLS_FL_RIGHT, N_("namespace identifier (inode number)"), SCOLS_JSON_NUMBER }, + [COL_TYPE] = { "TYPE", 5, 0, N_("kind of namespace") }, + [COL_PATH] = { "PATH", 0, 0, N_("path to the namespace")}, + [COL_NPROCS] = { "NPROCS", 5, SCOLS_FL_RIGHT, N_("number of processes in the namespace"), SCOLS_JSON_NUMBER }, + [COL_PID] = { "PID", 5, SCOLS_FL_RIGHT, N_("lowest PID in the namespace"), SCOLS_JSON_NUMBER }, + [COL_PPID] = { "PPID", 5, SCOLS_FL_RIGHT, N_("PPID of the PID"), SCOLS_JSON_NUMBER }, + [COL_COMMAND] = { "COMMAND", 0, SCOLS_FL_TRUNC, N_("command line of the PID")}, + [COL_UID] = { "UID", 0, SCOLS_FL_RIGHT, N_("UID of the PID"), SCOLS_JSON_NUMBER}, + [COL_USER] = { "USER", 0, 0, N_("username of the PID")}, + [COL_NETNSID] = { "NETNSID", 0, SCOLS_FL_RIGHT, N_("namespace ID as used by network subsystem")}, + [COL_NSFS] = { "NSFS", 0, SCOLS_FL_WRAP, N_("nsfs mountpoint (usually used network subsystem)")}, + [COL_PNS] = { "PNS", 10, SCOLS_FL_RIGHT, N_("parent namespace identifier (inode number)"), SCOLS_JSON_NUMBER }, + [COL_ONS] = { "ONS", 10, SCOLS_FL_RIGHT, N_("owner namespace identifier (inode number)"), SCOLS_JSON_NUMBER }, +}; + +static int columns[ARRAY_SIZE(infos) * 2]; +static size_t ncolumns; + +enum { + LSNS_ID_MNT = 0, + LSNS_ID_NET, + LSNS_ID_PID, + LSNS_ID_UTS, + LSNS_ID_IPC, + LSNS_ID_USER, + LSNS_ID_CGROUP, + LSNS_ID_TIME +}; + +static char *ns_names[] = { + [LSNS_ID_MNT] = "mnt", + [LSNS_ID_NET] = "net", + [LSNS_ID_PID] = "pid", + [LSNS_ID_UTS] = "uts", + [LSNS_ID_IPC] = "ipc", + [LSNS_ID_USER] = "user", + [LSNS_ID_CGROUP] = "cgroup", + [LSNS_ID_TIME] = "time" +}; + +enum { + RELA_PARENT, + RELA_OWNER, + MAX_RELA +}; + +struct lsns_namespace { + ino_t id; + int type; /* LSNS_* */ + int nprocs; + int netnsid; + ino_t related_id[MAX_RELA]; + + struct lsns_process *proc; + + struct lsns_namespace *related_ns[MAX_RELA]; + struct libscols_line *ns_outline; + uid_t uid_fallback; /* refer this member if `proc' is NULL. */ + + struct list_head namespaces; /* lsns->processes member */ + struct list_head processes; /* head of lsns_process *siblings */ +}; + +struct lsns_process { + pid_t pid; /* process PID */ + pid_t ppid; /* parent's PID */ + pid_t tpid; /* thread group */ + char state; + uid_t uid; + + ino_t ns_ids[ARRAY_SIZE(ns_names)]; + ino_t ns_pids[ARRAY_SIZE(ns_names)]; + ino_t ns_oids[ARRAY_SIZE(ns_names)]; + + struct list_head ns_siblings[ARRAY_SIZE(ns_names)]; + + struct list_head processes; /* list of processes */ + + struct libscols_line *outline; + struct lsns_process *parent; + + int netnsid; +}; + + +enum { + LSNS_TREE_NONE, + LSNS_TREE_PROCESS, + LSNS_TREE_OWNER, + LSNS_TREE_PARENT, +}; + +struct lsns { + struct list_head processes; + struct list_head namespaces; + + pid_t fltr_pid; /* filter out by PID */ + ino_t fltr_ns; /* filter out by namespace */ + int fltr_types[ARRAY_SIZE(ns_names)]; + int fltr_ntypes; + + unsigned int raw : 1, + json : 1, + tree : 2, + no_trunc : 1, + no_headings: 1, + no_wrap : 1; + + + struct libmnt_table *tab; +}; + +struct netnsid_cache { + ino_t ino; + int id; + struct list_head netnsids; +}; + +static struct list_head netnsids_cache; + +static int netlink_fd = -1; + +static void lsns_init_debug(void) +{ + __UL_INIT_DEBUG_FROM_ENV(lsns, LSNS_DEBUG_, 0, LSNS_DEBUG); +} + +static int ns_name2type(const char *name) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(ns_names); i++) { + if (strcmp(ns_names[i], name) == 0) + return i; + } + return -1; +} + +static int column_name_to_id(const char *name, size_t namesz) +{ + size_t i; + + assert(name); + + for (i = 0; i < ARRAY_SIZE(infos); i++) { + const char *cn = infos[i].name; + + if (!strncasecmp(name, cn, namesz) && !*(cn + namesz)) + return i; + } + warnx(_("unknown column: %s"), name); + return -1; +} + +static int has_column(int id) +{ + size_t i; + + for (i = 0; i < ncolumns; i++) { + if (columns[i] == id) + return 1; + } + return 0; +} + +static inline int get_column_id(int num) +{ + assert(num >= 0); + assert((size_t) num < ncolumns); + assert(columns[num] < (int) ARRAY_SIZE(infos)); + + return columns[num]; +} + +static inline const struct colinfo *get_column_info(unsigned num) +{ + return &infos[ get_column_id(num) ]; +} + +static int get_ns_ino(int dir, const char *nsname, ino_t *ino, ino_t *pino, ino_t *oino) +{ + struct stat st; + char path[16]; + + snprintf(path, sizeof(path), "ns/%s", nsname); + + if (fstatat(dir, path, &st, 0) != 0) + return -errno; + *ino = st.st_ino; + + *pino = 0; + *oino = 0; + +#ifdef USE_NS_GET_API + int fd, pfd, ofd; + fd = openat(dir, path, 0); + if (fd < 0) + return -errno; + if (strcmp(nsname, "pid") == 0 || strcmp(nsname, "user") == 0) { + if ((pfd = ioctl(fd, NS_GET_PARENT)) < 0) { + if (errno == EPERM) + goto user; + close(fd); + return -errno; + } + if (fstat(pfd, &st) < 0) { + close(pfd); + close(fd); + return -errno; + } + *pino = st.st_ino; + close(pfd); + } + user: + if ((ofd = ioctl(fd, NS_GET_USERNS)) < 0) { + if (errno == EPERM) + goto out; + close(fd); + return -errno; + } + if (fstat(ofd, &st) < 0) { + close(ofd); + close(fd); + return -errno; + } + *oino = st.st_ino; + close(ofd); + out: + close(fd); +#endif + return 0; +} + +static int parse_proc_stat(FILE *fp, pid_t *pid, char *state, pid_t *ppid) +{ + char *line = NULL, *p; + size_t len = 0; + int rc; + + if (getline(&line, &len, fp) < 0) { + rc = -errno; + goto error; + } + + p = strrchr(line, ')'); + if (p == NULL || + sscanf(line, "%d (", pid) != 1 || + sscanf(p, ") %c %d*[^\n]", state, ppid) != 2) { + rc = -EINVAL; + goto error; + } + rc = 0; + +error: + free(line); + return rc; +} + +#ifdef HAVE_LINUX_NET_NAMESPACE_H +static int netnsid_cache_find(ino_t netino, int *netnsid) +{ + struct list_head *p; + + list_for_each(p, &netnsids_cache) { + struct netnsid_cache *e = list_entry(p, + struct netnsid_cache, + netnsids); + if (e->ino == netino) { + *netnsid = e->id; + return 1; + } + } + + return 0; +} + +static void netnsid_cache_add(ino_t netino, int netnsid) +{ + struct netnsid_cache *e; + + e = xcalloc(1, sizeof(*e)); + e->ino = netino; + e->id = netnsid; + INIT_LIST_HEAD(&e->netnsids); + list_add(&e->netnsids, &netnsids_cache); +} + +static int get_netnsid_via_netlink_send_request(int target_fd) +{ + unsigned char req[NLMSG_SPACE(sizeof(struct rtgenmsg)) + + RTA_SPACE(sizeof(int32_t))]; + + struct nlmsghdr *nlh = (struct nlmsghdr *)req; + struct rtgenmsg *rt = NLMSG_DATA(req); + struct rtattr *rta = (struct rtattr *) + (req + NLMSG_SPACE(sizeof(struct rtgenmsg))); + int32_t *fd = RTA_DATA(rta); + + nlh->nlmsg_len = sizeof(req); + nlh->nlmsg_flags = NLM_F_REQUEST; + nlh->nlmsg_type = RTM_GETNSID; + rt->rtgen_family = AF_UNSPEC; + rta->rta_type = NETNSA_FD; + rta->rta_len = RTA_SPACE(sizeof(int32_t)); + *fd = target_fd; + + if (send(netlink_fd, req, sizeof(req), 0) < 0) + return -1; + return 0; +} + +static int get_netnsid_via_netlink_recv_response(int *netnsid) +{ + unsigned char res[NLMSG_SPACE(sizeof(struct rtgenmsg)) + + ((RTA_SPACE(sizeof(int32_t)) + < RTA_SPACE(sizeof(struct nlmsgerr))) + ? RTA_SPACE(sizeof(struct nlmsgerr)) + : RTA_SPACE(sizeof(int32_t)))]; + int rtalen; + ssize_t reslen; + + struct nlmsghdr *nlh; + struct rtattr *rta; + + reslen = recv(netlink_fd, res, sizeof(res), 0); + if (reslen < 0) + return -1; + + nlh = (struct nlmsghdr *)res; + if (!(NLMSG_OK(nlh, (size_t)reslen) + && nlh->nlmsg_type == RTM_NEWNSID)) + return -1; + + rtalen = NLMSG_PAYLOAD(nlh, sizeof(struct rtgenmsg)); + rta = (struct rtattr *)(res + NLMSG_SPACE(sizeof(struct rtgenmsg))); + if (!(RTA_OK(rta, rtalen) + && rta->rta_type == NETNSA_NSID)) + return -1; + + *netnsid = *(int *)RTA_DATA(rta); + + return 0; +} + +static int get_netnsid_via_netlink(int dir, const char *path) +{ + int netnsid; + int target_fd; + + if (netlink_fd < 0) + return LSNS_NETNS_UNUSABLE; + + target_fd = openat(dir, path, O_RDONLY); + if (target_fd < 0) + return LSNS_NETNS_UNUSABLE; + + if (get_netnsid_via_netlink_send_request(target_fd) < 0) { + netnsid = LSNS_NETNS_UNUSABLE; + goto out; + } + + if (get_netnsid_via_netlink_recv_response(&netnsid) < 0) { + netnsid = LSNS_NETNS_UNUSABLE; + goto out; + } + + out: + close(target_fd); + return netnsid; +} + +static int get_netnsid(int dir, ino_t netino) +{ + int netnsid; + + if (!netnsid_cache_find(netino, &netnsid)) { + netnsid = get_netnsid_via_netlink(dir, "ns/net"); + netnsid_cache_add(netino, netnsid); + } + + return netnsid; +} +#else +static int get_netnsid(int dir __attribute__((__unused__)), + ino_t netino __attribute__((__unused__))) +{ + return LSNS_NETNS_UNUSABLE; +} +#endif /* HAVE_LINUX_NET_NAMESPACE_H */ + +static int read_process(struct lsns *ls, pid_t pid) +{ + struct lsns_process *p = NULL; + char buf[BUFSIZ]; + DIR *dir; + int rc = 0, fd; + FILE *f = NULL; + size_t i; + struct stat st; + + DBG(PROC, ul_debug("reading %d", (int) pid)); + + snprintf(buf, sizeof(buf), "/proc/%d", pid); + dir = opendir(buf); + if (!dir) + return -errno; + + p = xcalloc(1, sizeof(*p)); + p->netnsid = LSNS_NETNS_UNUSABLE; + + if (fstat(dirfd(dir), &st) == 0) { + p->uid = st.st_uid; + add_uid(uid_cache, st.st_uid); + } + + fd = openat(dirfd(dir), "stat", O_RDONLY); + if (fd < 0) { + rc = -errno; + goto done; + } + if (!(f = fdopen(fd, "r"))) { + rc = -errno; + goto done; + } + rc = parse_proc_stat(f, &p->pid, &p->state, &p->ppid); + if (rc < 0) + goto done; + rc = 0; + + for (i = 0; i < ARRAY_SIZE(p->ns_ids); i++) { + INIT_LIST_HEAD(&p->ns_siblings[i]); + + if (!ls->fltr_types[i]) + continue; + + rc = get_ns_ino(dirfd(dir), ns_names[i], &p->ns_ids[i], + &p->ns_pids[i], &p->ns_oids[i]); + if (rc && rc != -EACCES && rc != -ENOENT) + goto done; + if (i == LSNS_ID_NET) + p->netnsid = get_netnsid(dirfd(dir), p->ns_ids[i]); + rc = 0; + } + + INIT_LIST_HEAD(&p->processes); + + DBG(PROC, ul_debugobj(p, "new pid=%d", p->pid)); + list_add_tail(&p->processes, &ls->processes); +done: + if (f) + fclose(f); + closedir(dir); + if (rc) + free(p); + return rc; +} + +static int read_processes(struct lsns *ls) +{ + DIR *dir; + struct dirent *d; + int rc = 0; + + DBG(PROC, ul_debug("opening /proc")); + + dir = opendir(_PATH_PROC); + if (!dir) + return -errno; + + while ((d = xreaddir(dir))) { + pid_t pid = 0; + + if (procfs_dirent_get_pid(d, &pid) != 0) + continue; + + /* TODO: use ul_new_procfs_path(pid, NULL) to read files from /proc/pid/ + */ + rc = read_process(ls, pid); + if (rc && rc != -EACCES && rc != -ENOENT) + break; + rc = 0; + } + + DBG(PROC, ul_debug("closing /proc")); + closedir(dir); + return rc; +} + +static struct lsns_namespace *get_namespace(struct lsns *ls, ino_t ino) +{ + struct list_head *p; + + list_for_each(p, &ls->namespaces) { + struct lsns_namespace *ns = list_entry(p, struct lsns_namespace, namespaces); + + if (ns->id == ino) + return ns; + } + return NULL; +} + +static int namespace_has_process(struct lsns_namespace *ns, pid_t pid) +{ + struct list_head *p; + + list_for_each(p, &ns->processes) { + struct lsns_process *proc = list_entry(p, struct lsns_process, ns_siblings[ns->type]); + + if (proc->pid == pid) + return 1; + } + return 0; +} + +static struct lsns_namespace *add_namespace(struct lsns *ls, int type, ino_t ino, + ino_t parent_ino, ino_t owner_ino) +{ + struct lsns_namespace *ns = xcalloc(1, sizeof(*ns)); + + if (!ns) + return NULL; + + DBG(NS, ul_debugobj(ns, "new %s[%ju]", ns_names[type], (uintmax_t)ino)); + + INIT_LIST_HEAD(&ns->processes); + INIT_LIST_HEAD(&ns->namespaces); + + ns->type = type; + ns->id = ino; + ns->related_id[RELA_PARENT] = parent_ino; + ns->related_id[RELA_OWNER] = owner_ino; + + list_add_tail(&ns->namespaces, &ls->namespaces); + return ns; +} + +static int add_process_to_namespace(struct lsns *ls, struct lsns_namespace *ns, struct lsns_process *proc) +{ + struct list_head *p; + + DBG(NS, ul_debugobj(ns, "add process [%p] pid=%d to %s[%ju]", + proc, proc->pid, ns_names[ns->type], (uintmax_t)ns->id)); + + list_for_each(p, &ls->processes) { + struct lsns_process *xproc = list_entry(p, struct lsns_process, processes); + + if (xproc->pid == proc->ppid) /* my parent */ + proc->parent = xproc; + else if (xproc->ppid == proc->pid) /* my child */ + xproc->parent = proc; + } + + list_add_tail(&proc->ns_siblings[ns->type], &ns->processes); + ns->nprocs++; + + if (!ns->proc || ns->proc->pid > proc->pid) + ns->proc = proc; + + return 0; +} + +static int cmp_namespaces(struct list_head *a, struct list_head *b, + __attribute__((__unused__)) void *data) +{ + struct lsns_namespace *xa = list_entry(a, struct lsns_namespace, namespaces), + *xb = list_entry(b, struct lsns_namespace, namespaces); + + return cmp_numbers(xa->id, xb->id); +} + +static int netnsid_xasputs(char **str, int netnsid) +{ + if (netnsid >= 0) + return xasprintf(str, "%d", netnsid); +#ifdef NETNSA_NSID_NOT_ASSIGNED + if (netnsid == NETNSA_NSID_NOT_ASSIGNED) + return xasprintf(str, "%s", "unassigned"); +#endif + return 0; +} + +#ifdef USE_NS_GET_API +static int clone_type_to_lsns_type(int clone_type) +{ + switch (clone_type) { + case CLONE_NEWNS: + return LSNS_ID_MNT; + case CLONE_NEWCGROUP: + return LSNS_ID_CGROUP; + case CLONE_NEWUTS: + return LSNS_ID_UTS; + case CLONE_NEWIPC: + return LSNS_ID_IPC; + case CLONE_NEWUSER: + return LSNS_ID_USER; + case CLONE_NEWPID: + return LSNS_ID_PID; + case CLONE_NEWNET: + return LSNS_ID_NET; + default: + return -1; + } +} + +static struct lsns_namespace *add_namespace_for_nsfd(struct lsns *ls, int fd, ino_t ino) +{ + int fd_owner = -1, fd_parent = -1; + struct stat st_owner, st_parent; + ino_t ino_owner = 0, ino_parent = 0; + struct lsns_namespace *ns; + int clone_type, lsns_type; + + clone_type = ioctl(fd, NS_GET_NSTYPE); + if (clone_type < 0) + return NULL; + lsns_type = clone_type_to_lsns_type(clone_type); + if (lsns_type < 0) + return NULL; + + fd_owner = ioctl(fd, NS_GET_USERNS); + if (fd_owner < 0) + goto parent; + if (fstat(fd_owner, &st_owner) < 0) + goto parent; + ino_owner = st_owner.st_ino; + + parent: + fd_parent = ioctl(fd, NS_GET_PARENT); + if (fd_parent < 0) + goto add_ns; + if (fstat(fd_parent, &st_parent) < 0) + goto add_ns; + ino_parent = st_parent.st_ino; + + add_ns: + ns = add_namespace(ls, lsns_type, ino, ino_parent, ino_owner); + ioctl(fd, NS_GET_OWNER_UID, &ns->uid_fallback); + add_uid(uid_cache, ns->uid_fallback); + + if ((lsns_type == LSNS_ID_USER || lsns_type == LSNS_ID_PID) + && ino_parent != ino && ino_parent != 0) { + ns->related_ns[RELA_PARENT] = get_namespace(ls, ino_parent); + if (!ns->related_ns[RELA_PARENT]) { + ns->related_ns[RELA_PARENT] = add_namespace_for_nsfd(ls, fd_parent, ino_parent); + if (ino_parent == ino_owner) + ns->related_ns[RELA_OWNER] = ns->related_ns[RELA_PARENT]; + } + } + + if (ns->related_ns[RELA_OWNER] == NULL && ino_owner != 0) { + ns->related_ns[RELA_OWNER] = get_namespace(ls, ino_owner); + if (!ns->related_ns[RELA_OWNER]) + ns->related_ns[RELA_OWNER] = add_namespace_for_nsfd(ls, fd_owner, ino_owner); + } + + if (fd_owner >= 0) + close(fd_owner); + if (fd_parent >= 0) + close(fd_parent); + + return ns; +} + +static void interpolate_missing_namespaces(struct lsns *ls, struct lsns_namespace *orphan, int rela) +{ + const int cmd[MAX_RELA] = { + [RELA_PARENT] = NS_GET_PARENT, + [RELA_OWNER] = NS_GET_USERNS + }; + char buf[BUFSIZ]; + int fd_orphan, fd_missing; + struct stat st; + + orphan->related_ns[rela] = get_namespace(ls, orphan->related_id[rela]); + if (orphan->related_ns[rela]) + return; + + snprintf(buf, sizeof(buf), "/proc/%d/ns/%s", orphan->proc->pid, ns_names[orphan->type]); + fd_orphan = open(buf, O_RDONLY); + if (fd_orphan < 0) + return; + + fd_missing = ioctl(fd_orphan, cmd[rela]); + close(fd_orphan); + if (fd_missing < 0) + return; + + if (fstat(fd_missing, &st) < 0 + || st.st_ino != orphan->related_id[rela]) { + close(fd_missing); + return; + } + + orphan->related_ns[rela] = add_namespace_for_nsfd(ls, fd_missing, orphan->related_id[rela]); + close(fd_missing); +} + +static void read_related_namespaces(struct lsns *ls) +{ + struct list_head *p; + struct lsns_namespace *orphan[2] = {NULL, NULL}; + int rela; + + list_for_each(p, &ls->namespaces) { + struct lsns_namespace *ns = list_entry(p, struct lsns_namespace, namespaces); + struct list_head *pp; + list_for_each(pp, &ls->namespaces) { + struct lsns_namespace *pns = list_entry(pp, struct lsns_namespace, namespaces); + if (ns->type == LSNS_ID_USER + || ns->type == LSNS_ID_PID) { + if (ns->related_id[RELA_PARENT] == pns->id) + ns->related_ns[RELA_PARENT] = pns; + if (ns->related_id[RELA_OWNER] == pns->id) + ns->related_ns[RELA_OWNER] = pns; + if (ns->related_ns[RELA_PARENT] && ns->related_ns[RELA_OWNER]) + break; + } else { + if (ns->related_id[RELA_OWNER] == pns->id) { + ns->related_ns[RELA_OWNER] = pns; + break; + } + } + } + + /* lsns scans /proc/[0-9]+ for finding namespaces. + * So if a namespace has no process, lsns cannot + * find it. Here we call it a missing namespace. + * + * If the id for a related namesspce is known but + * namespace for the id is not found, there must + * be orphan namespaces. A missing namespace is an + * owner or a parent of the orphan namespace. + */ + for (rela = 0; rela < MAX_RELA; rela++) { + if (ns->related_id[rela] != 0 + && ns->related_ns[rela] == NULL) { + ns->related_ns[rela] = orphan[rela]; + orphan[rela] = ns; + } + } + } + + for (rela = 0; rela < MAX_RELA; rela++) { + while (orphan[rela]) { + struct lsns_namespace *current = orphan[rela]; + orphan[rela] = orphan[rela]->related_ns[rela]; + current->related_ns[rela] = NULL; + interpolate_missing_namespaces(ls, current, rela); + } + } +} + +#endif /* USE_NS_GET_API */ + +static int read_namespaces(struct lsns *ls) +{ + struct list_head *p; + + DBG(NS, ul_debug("reading namespace")); + + list_for_each(p, &ls->processes) { + size_t i; + struct lsns_namespace *ns; + struct lsns_process *proc = list_entry(p, struct lsns_process, processes); + + for (i = 0; i < ARRAY_SIZE(proc->ns_ids); i++) { + if (proc->ns_ids[i] == 0) + continue; + if (!(ns = get_namespace(ls, proc->ns_ids[i]))) { + ns = add_namespace(ls, i, proc->ns_ids[i], + proc->ns_pids[i], proc->ns_oids[i]); + if (!ns) + return -ENOMEM; + } + add_process_to_namespace(ls, ns, proc); + } + } + +#ifdef USE_NS_GET_API + if (ls->tree == LSNS_TREE_OWNER || ls->tree == LSNS_TREE_PARENT) + read_related_namespaces(ls); +#endif + list_sort(&ls->namespaces, cmp_namespaces, NULL); + + return 0; +} + +static int is_nsfs_root(struct libmnt_fs *fs, void *data) +{ + if (!mnt_fs_match_fstype(fs, "nsfs") || !mnt_fs_get_root(fs)) + return 0; + + return (strcmp(mnt_fs_get_root(fs), (char *)data) == 0); +} + +static int is_path_included(const char *path_set, const char *elt, + const char sep) +{ + size_t elt_len; + size_t path_set_len; + char *tmp; + + + tmp = strstr(path_set, elt); + if (!tmp) + return 0; + + elt_len = strlen(elt); + path_set_len = strlen(path_set); + + /* path_set includes only elt or + * path_set includes elt as the first element. + */ + if (tmp == path_set + && ((path_set_len == elt_len) + || (path_set[elt_len] == sep))) + return 1; + + /* path_set includes elt at the middle + * or as the last element. + */ + if ((*(tmp - 1) == sep) + && ((*(tmp + elt_len) == sep) + || (*(tmp + elt_len) == '\0'))) + return 1; + + return 0; +} + +static int nsfs_xasputs(char **str, + struct lsns_namespace *ns, + struct libmnt_table *tab, + char sep) +{ + struct libmnt_iter *itr = mnt_new_iter(MNT_ITER_FORWARD); + char *expected_root; + struct libmnt_fs *fs = NULL; + + xasprintf(&expected_root, "%s:[%ju]", ns_names[ns->type], (uintmax_t)ns->id); + *str = NULL; + + while (mnt_table_find_next_fs(tab, itr, is_nsfs_root, + expected_root, &fs) == 0) { + + const char *tgt = mnt_fs_get_target(fs); + + if (!*str) + xasprintf(str, "%s", tgt); + + else if (!is_path_included(*str, tgt, sep)) { + char *tmp = NULL; + + xasprintf(&tmp, "%s%c%s", *str, sep, tgt); + free(*str); + *str = tmp; + } + } + free(expected_root); + mnt_free_iter(itr); + + return 1; +} +static void add_scols_line(struct lsns *ls, struct libscols_table *table, + struct lsns_namespace *ns, struct lsns_process *proc) +{ + size_t i; + struct libscols_line *line; + + assert(ns); + assert(table); + + line = scols_table_new_line(table, + (ls->tree == LSNS_TREE_PROCESS && proc) && proc->parent ? proc->parent->outline: + (ls->tree == LSNS_TREE_PARENT) && ns->related_ns[RELA_PARENT] ? ns->related_ns[RELA_PARENT]->ns_outline: + (ls->tree == LSNS_TREE_OWNER) && ns->related_ns[RELA_OWNER] ? ns->related_ns[RELA_OWNER]->ns_outline: + NULL); + if (!line) { + warn(_("failed to add line to output")); + return; + } + + for (i = 0; i < ncolumns; i++) { + char *str = NULL; + + switch (get_column_id(i)) { + case COL_NS: + xasprintf(&str, "%ju", (uintmax_t)ns->id); + break; + case COL_PID: + if (proc) + xasprintf(&str, "%d", (int) proc->pid); + break; + case COL_PPID: + if (proc) + xasprintf(&str, "%d", (int) proc->ppid); + break; + case COL_TYPE: + xasprintf(&str, "%s", ns_names[ns->type]); + break; + case COL_NPROCS: + xasprintf(&str, "%d", ns->nprocs); + break; + case COL_COMMAND: + if (!proc) + break; + str = pid_get_cmdline(proc->pid); + if (!str) + str = pid_get_cmdname(proc->pid); + break; + case COL_PATH: + if (!proc) + break; + xasprintf(&str, "/proc/%d/ns/%s", (int) proc->pid, ns_names[ns->type]); + break; + case COL_UID: + xasprintf(&str, "%d", proc? (int) proc->uid: (int) ns->uid_fallback); + break; + case COL_USER: + xasprintf(&str, "%s", get_id(uid_cache, proc? proc->uid: ns->uid_fallback)->name); + break; + case COL_NETNSID: + if (!proc) + break; + if (ns->type == LSNS_ID_NET) + netnsid_xasputs(&str, proc->netnsid); + break; + case COL_NSFS: + nsfs_xasputs(&str, ns, ls->tab, ls->no_wrap ? ',' : '\n'); + break; + case COL_PNS: + xasprintf(&str, "%ju", (uintmax_t)ns->related_id[RELA_PARENT]); + break; + case COL_ONS: + xasprintf(&str, "%ju", (uintmax_t)ns->related_id[RELA_OWNER]); + break; + default: + break; + } + + if (str && scols_line_refer_data(line, i, str) != 0) + err_oom(); + } + + if (ls->tree == LSNS_TREE_OWNER || ls->tree == LSNS_TREE_PARENT) + ns->ns_outline = line; + else if (proc) + proc->outline = line; +} + +static struct libscols_table *init_scols_table(struct lsns *ls) +{ + struct libscols_table *tab; + size_t i; + + tab = scols_new_table(); + if (!tab) { + warn(_("failed to initialize output table")); + return NULL; + } + + scols_table_enable_raw(tab, ls->raw); + scols_table_enable_json(tab, ls->json); + scols_table_enable_noheadings(tab, ls->no_headings); + + if (ls->json) + scols_table_set_name(tab, "namespaces"); + + for (i = 0; i < ncolumns; i++) { + const struct colinfo *col = get_column_info(i); + int flags = col->flags; + struct libscols_column *cl; + + if (ls->no_trunc) + flags &= ~SCOLS_FL_TRUNC; + if (ls->tree == LSNS_TREE_PROCESS && get_column_id(i) == COL_COMMAND) + flags |= SCOLS_FL_TREE; + if (ls->no_wrap) + flags &= ~SCOLS_FL_WRAP; + if ((ls->tree == LSNS_TREE_OWNER || ls->tree == LSNS_TREE_PARENT) + && get_column_id(i) == COL_NS) { + flags |= SCOLS_FL_TREE; + flags &= ~SCOLS_FL_RIGHT; + } + + cl = scols_table_new_column(tab, col->name, col->whint, flags); + if (cl == NULL) { + warnx(_("failed to initialize output column")); + goto err; + } + if (ls->json) + scols_column_set_json_type(cl, col->json_type); + + if (!ls->no_wrap && get_column_id(i) == COL_NSFS) { + scols_column_set_wrapfunc(cl, + scols_wrapnl_chunksize, + scols_wrapnl_nextchunk, + NULL); + scols_column_set_safechars(cl, "\n"); + } + } + + return tab; +err: + scols_unref_table(tab); + return NULL; +} + +static void show_namespace(struct lsns *ls, struct libscols_table *tab, + struct lsns_namespace *ns, struct lsns_process *proc) +{ + /* + * create a tree from owner->owned and/or parent->child relation + */ + if (ls->tree == LSNS_TREE_OWNER + && ns->related_ns[RELA_OWNER] + && !ns->related_ns[RELA_OWNER]->ns_outline) + show_namespace(ls, tab, ns->related_ns[RELA_OWNER], ns->related_ns[RELA_OWNER]->proc); + else if (ls->tree == LSNS_TREE_PARENT) { + if (ns->related_ns[RELA_PARENT]) { + if (!ns->related_ns[RELA_PARENT]->ns_outline) + show_namespace(ls, tab, ns->related_ns[RELA_PARENT], ns->related_ns[RELA_PARENT]->proc); + } + else if (ns->related_ns[RELA_OWNER] && !ns->related_ns[RELA_OWNER]->ns_outline) + show_namespace(ls, tab, ns->related_ns[RELA_OWNER], ns->related_ns[RELA_OWNER]->proc); + } + + add_scols_line(ls, tab, ns, proc); +} + +static int show_namespaces(struct lsns *ls) +{ + struct libscols_table *tab; + struct list_head *p; + int rc = 0; + + tab = init_scols_table(ls); + if (!tab) + return -ENOMEM; + + list_for_each(p, &ls->namespaces) { + struct lsns_namespace *ns = list_entry(p, struct lsns_namespace, namespaces); + + if (ls->fltr_pid != 0 && !namespace_has_process(ns, ls->fltr_pid)) + continue; + + if (!ns->ns_outline) + show_namespace(ls, tab, ns, ns->proc); + } + + scols_print_table(tab); + scols_unref_table(tab); + return rc; +} + +static void show_process(struct lsns *ls, struct libscols_table *tab, + struct lsns_process *proc, struct lsns_namespace *ns) +{ + /* + * create a tree from parent->child relation, but only if the parent is + * within the same namespace + */ + if (ls->tree == LSNS_TREE_PROCESS + && proc->parent + && !proc->parent->outline + && proc->parent->ns_ids[ns->type] == proc->ns_ids[ns->type]) + show_process(ls, tab, proc->parent, ns); + + add_scols_line(ls, tab, ns, proc); +} + + +static int show_namespace_processes(struct lsns *ls, struct lsns_namespace *ns) +{ + struct libscols_table *tab; + struct list_head *p; + + tab = init_scols_table(ls); + if (!tab) + return -ENOMEM; + + list_for_each(p, &ns->processes) { + struct lsns_process *proc = list_entry(p, struct lsns_process, ns_siblings[ns->type]); + + if (!proc->outline) + show_process(ls, tab, proc, ns); + } + + + scols_print_table(tab); + scols_unref_table(tab); + return 0; +} + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + size_t i; + + fputs(USAGE_HEADER, out); + + fprintf(out, + _(" %s [options] [<namespace>]\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("List system namespaces.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -J, --json use JSON output format\n"), out); + fputs(_(" -l, --list use list format output\n"), out); + fputs(_(" -n, --noheadings don't print headings\n"), out); + fputs(_(" -o, --output <list> define which output columns to use\n"), out); + fputs(_(" --output-all output all columns\n"), out); + fputs(_(" -p, --task <pid> print process namespaces\n"), out); + fputs(_(" -r, --raw use the raw output format\n"), out); + fputs(_(" -u, --notruncate don't truncate text in columns\n"), out); + fputs(_(" -W, --nowrap don't use multi-line representation\n"), out); + fputs(_(" -t, --type <name> namespace type (mnt, net, ipc, user, pid, uts, cgroup, time)\n"), out); + fputs(_(" -T, --tree <rel> use tree format (parent, owner, or process)\n"), out); + + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(24)); + + fputs(USAGE_COLUMNS, out); + for (i = 0; i < ARRAY_SIZE(infos); i++) + fprintf(out, " %11s %s\n", infos[i].name, _(infos[i].help)); + + printf(USAGE_MAN_TAIL("lsns(8)")); + + exit(EXIT_SUCCESS); +} + + +int main(int argc, char *argv[]) +{ + struct lsns ls; + int c, force_list = 0; + int r = 0; + char *outarg = NULL; + enum { + OPT_OUTPUT_ALL = CHAR_MAX + 1 + }; + static const struct option long_opts[] = { + { "json", no_argument, NULL, 'J' }, + { "task", required_argument, NULL, 'p' }, + { "help", no_argument, NULL, 'h' }, + { "output", required_argument, NULL, 'o' }, + { "output-all", no_argument, NULL, OPT_OUTPUT_ALL }, + { "notruncate", no_argument, NULL, 'u' }, + { "version", no_argument, NULL, 'V' }, + { "noheadings", no_argument, NULL, 'n' }, + { "nowrap", no_argument, NULL, 'W' }, + { "list", no_argument, NULL, 'l' }, + { "raw", no_argument, NULL, 'r' }, + { "type", required_argument, NULL, 't' }, + { "tree", optional_argument, NULL, 'T' }, + { NULL, 0, NULL, 0 } + }; + + static const ul_excl_t excl[] = { /* rows and cols in ASCII order */ + { 'J','r' }, + { 'l','T' }, + { 0 } + }; + int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; + int is_net = 0; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + lsns_init_debug(); + memset(&ls, 0, sizeof(ls)); + + INIT_LIST_HEAD(&ls.processes); + INIT_LIST_HEAD(&ls.namespaces); + INIT_LIST_HEAD(&netnsids_cache); + + while ((c = getopt_long(argc, argv, + "Jlp:o:nruhVt:T::W", long_opts, NULL)) != -1) { + + err_exclusive_options(c, long_opts, excl, excl_st); + + switch(c) { + case 'J': + ls.json = 1; + break; + case 'l': + force_list = 1; + break; + case 'o': + outarg = optarg; + break; + case OPT_OUTPUT_ALL: + for (ncolumns = 0; ncolumns < ARRAY_SIZE(infos); ncolumns++) + columns[ncolumns] = ncolumns; + break; + case 'p': + ls.fltr_pid = strtos32_or_err(optarg, _("invalid PID argument")); + break; + case 'n': + ls.no_headings = 1; + break; + case 'r': + ls.no_wrap = ls.raw = 1; + break; + case 'u': + ls.no_trunc = 1; + break; + case 't': + { + int type = ns_name2type(optarg); + if (type < 0) + errx(EXIT_FAILURE, _("unknown namespace type: %s"), optarg); + ls.fltr_types[type] = 1; + ls.fltr_ntypes++; + if (type == LSNS_ID_NET) + is_net = 1; + break; + } + case 'W': + ls.no_wrap = 1; + break; + case 'T': + ls.tree = LSNS_TREE_OWNER; + if (optarg) { + if (*optarg == '=') + optarg++; + if (strcmp (optarg, "parent") == 0) + ls.tree = LSNS_TREE_PARENT; + else if (strcmp (optarg, "process") == 0) + ls.tree = LSNS_TREE_PROCESS; + else if (strcmp (optarg, "owner") != 0) + errx(EXIT_FAILURE, _("unknown tree type: %s"), optarg); + } + break; + + case 'h': + usage(); + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if (!ls.fltr_ntypes) { + size_t i; + + for (i = 0; i < ARRAY_SIZE(ns_names); i++) + ls.fltr_types[i] = 1; + } + + if (optind < argc) { + if (ls.fltr_pid) + errx(EXIT_FAILURE, _("--task is mutually exclusive with <namespace>")); + ls.fltr_ns = strtou64_or_err(argv[optind], _("invalid namespace argument")); + if (!ls.tree && !force_list) + ls.tree = LSNS_TREE_PROCESS; + + if (!ncolumns) { + columns[ncolumns++] = COL_PID; + columns[ncolumns++] = COL_PPID; + columns[ncolumns++] = COL_USER; + columns[ncolumns++] = COL_COMMAND; + } + } + + if (!ncolumns) { + columns[ncolumns++] = COL_NS; + columns[ncolumns++] = COL_TYPE; + columns[ncolumns++] = COL_NPROCS; + columns[ncolumns++] = COL_PID; + columns[ncolumns++] = COL_USER; + if (is_net) { + columns[ncolumns++] = COL_NETNSID; + columns[ncolumns++] = COL_NSFS; + } + columns[ncolumns++] = COL_COMMAND; + + if (!ls.tree && !force_list) + ls.tree = LSNS_TREE_PROCESS; + } + +#ifndef USE_NS_GET_API + if (ls.tree && ls.tree != LSNS_TREE_PROCESS) + errx(EXIT_FAILURE, _("--tree={parent|owner} is unsupported for your system")); +#endif + if (outarg && string_add_to_idarray(outarg, columns, ARRAY_SIZE(columns), + &ncolumns, column_name_to_id) < 0) + return EXIT_FAILURE; + + scols_init_debug(0); + + uid_cache = new_idcache(); + if (!uid_cache) + err(EXIT_FAILURE, _("failed to allocate UID cache")); + +#ifdef HAVE_LINUX_NET_NAMESPACE_H + if (has_column(COL_NETNSID)) + netlink_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); +#endif + if (has_column(COL_NSFS)) { + ls.tab = mnt_new_table_from_file(_PATH_PROC_MOUNTINFO); + if (!ls.tab) + err(MNT_EX_FAIL, _("failed to parse %s"), _PATH_PROC_MOUNTINFO); + } + + r = read_processes(&ls); + if (!r) + r = read_namespaces(&ls); + if (!r) { + if (ls.fltr_ns) { + struct lsns_namespace *ns = get_namespace(&ls, ls.fltr_ns); + + if (!ns) + errx(EXIT_FAILURE, _("not found namespace: %ju"), (uintmax_t) ls.fltr_ns); + r = show_namespace_processes(&ls, ns); + } else + r = show_namespaces(&ls); + } + + mnt_free_table(ls.tab); + if (netlink_fd >= 0) + close(netlink_fd); + free_idcache(uid_cache); + return r == 0 ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/sys-utils/meson.build b/sys-utils/meson.build new file mode 100644 index 0000000..4b6cb7a --- /dev/null +++ b/sys-utils/meson.build @@ -0,0 +1,233 @@ +dir_sys_utils = include_directories('.') + +hwclock_parse_date = bison_gen.process('hwclock-parse-date.y') + +lsmem_sources = files( + 'lsmem.c', +) + +chmem_sources = files( + 'chmem.c', +) + +choom_sources = files( + 'choom.c', +) + +ipcmk_sources = files( + 'ipcmk.c', +) + +ipcrm_sources = files( + 'ipcrm.c', +) + +ipcs_sources = files( + 'ipcs.c', + 'ipcutils.c', + 'ipcutils.h', +) + +rfkill_sources = files( + 'rfkill.c', +) + +renice_sources = files( + 'renice.c', +) + +setsid_sources = files( + 'setsid.c', +) + +readprofile_sources = files( + 'readprofile.c', +) + +tunelp_sources = files( + 'tunelp.c', +) + +fstrim_sources = files( + 'fstrim.c', +) + +dmesg_sources = files( + 'dmesg.c', +) + \ + monotonic_c + +ctrlaltdel_sources = files( + 'ctrlaltdel.c', +) + +fsfreeze_sources = files( + 'fsfreeze.c', +) + +blkdiscard_sources = files( + 'blkdiscard.c', +) + \ + monotonic_c + +blkzone_sources = files( + 'blkzone.c', +) + +ldattach_sources = files( + 'ldattach.c', +) + +rtcwake_sources = files( + 'rtcwake.c', +) + +setarch_sources = files( + 'setarch.c', +) + +eject_sources = files( + 'eject.c', +) + \ + monotonic_c + +losetup_sources = files( + 'losetup.c', +) + +zramctl_sources = files( + 'zramctl.c', +) + \ + ismounted_c + +prlimit_sources = files( + 'prlimit.c', +) + +lsns_sources = files( + 'lsns.c', +) + +mount_sources = files( + 'mount.c', +) + +umount_sources = files( + 'umount.c', +) +swapon_sources = files( + 'swapon.c', + 'swapon-common.c', + 'swapon-common.h', +) + \ + swapprober_c + +swapoff_sources = files( + 'swapoff.c', + 'swapon-common.c', + 'swapon-common.h', +) + \ + swapprober_c + +lscpu_sources = files( + 'lscpu.c', + 'lscpu.h', + 'lscpu-cputype.c', + 'lscpu-cpu.c', + 'lscpu-topology.c', + 'lscpu-virt.c', + 'lscpu-arm.c', + 'lscpu-dmi.c', +) + +chcpu_sources = files( + 'chcpu.c', +) + +wdctl_sources = files( + 'wdctl.c', +) + +mountpoint_sources = files( + 'mountpoint.c', +) + +fallocate_sources = files( + 'fallocate.c', +) + +pivot_root_sources = files( + 'pivot_root.c', +) + +switch_root_sources = files( + 'switch_root.c', +) + +unshare_sources = files( + 'unshare.c', +) + \ + exec_shell_c + +nsenter_sources = files( + 'nsenter.c', +) + \ + exec_shell_c + +setpriv_sources = files( + 'setpriv.c', +) + +flock_sources = files( + 'flock.c', +) + \ + monotonic_c + \ + timer_c + +lsipc_sources = files( + 'lsipc.c', + 'ipcutils.c', + 'ipcutils.h', +) + +lsirq_sources = files( + 'lsirq.c', + 'irq-common.c', + 'irq-common.h', +) + +irqtop_sources = files( + 'irqtop.c', + 'irq-common.c', + 'irq-common.h', +) + \ + monotonic_c + +hwclock_sources = [ + 'sys-utils/hwclock.c', + 'sys-utils/hwclock.h', + hwclock_parse_date, +] +if use_hwclock_cmos + hwclock_sources += [ + 'sys-utils/hwclock-cmos.c', + ] +endif +if LINUX + hwclock_sources += [ + 'sys-utils/hwclock-rtc.c', + monotonic_c, + ] +endif + +if systemd.found() + fstrim_service = configure_file( + input : 'fstrim.service.in', + output : 'fstrim.service', + configuration : conf) + + install_data(fstrim_service, + install_dir : systemdsystemunitdir) + install_data('fstrim.timer', + install_dir : systemdsystemunitdir) +endif diff --git a/sys-utils/mount.8 b/sys-utils/mount.8 new file mode 100644 index 0000000..c8abc7c --- /dev/null +++ b/sys-utils/mount.8 @@ -0,0 +1,2418 @@ +'\" t +.\" Title: mount +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-08-04 +.\" Manual: System Administration +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "MOUNT" "8" "2022-08-04" "util\-linux 2.38.1" "System Administration" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +mount \- mount a filesystem +.SH "SYNOPSIS" +.sp +\fBmount\fP [\fB\-h\fP|\fB\-V\fP] +.sp +\fBmount\fP [\fB\-l\fP] [\fB\-t\fP \fIfstype\fP] +.sp +\fBmount\fP \fB\-a\fP [\fB\-fFnrsvw\fP] [\fB\-t\fP \fIfstype\fP] [\fB\-O\fP \fIoptlist\fP] +.sp +\fBmount\fP [\fB\-fnrsvw\fP] [\fB\-o\fP \fIoptions\fP] \fIdevice\fP|\fImountpoint\fP +.sp +\fBmount\fP [\fB\-fnrsvw\fP] [\fB\-t\fP \fIfstype\fP] [\fB\-o\fP \fIoptions\fP] \fIdevice mountpoint\fP +.sp +\fBmount\fP \fB\-\-bind\fP|\fB\-\-rbind\fP|\fB\-\-move\fP \fIolddir newdir\fP +.sp +\fBmount\fP \fB\-\-make\-\fP[\fBshared\fP|\fBslave\fP|\fBprivate\fP|\fBunbindable\fP|\fBrshared\fP|\fBrslave\fP|\fBrprivate\fP|\fBrunbindable\fP] \fImountpoint\fP +.SH "DESCRIPTION" +.sp +All files accessible in a Unix system are arranged in one big tree, the file hierarchy, rooted at \fI/\fP. These files can be spread out over several devices. The \fBmount\fP command serves to attach the filesystem found on some device to the big file tree. Conversely, the \fBumount\fP(8) command will detach it again. The filesystem is used to control how data is stored on the device or provided in a virtual way by network or other services. +.sp +The standard form of the \fBmount\fP command is: +.RS 3 +.ll -.6i +.sp +\fBmount \-t\fP \fItype device dir\fP +.br +.RE +.ll +.sp +This tells the kernel to attach the filesystem found on \fIdevice\fP (which is of type \fItype\fP) at the directory \fIdir\fP. The option \fB\-t\fP \fItype\fP is optional. The \fBmount\fP command is usually able to detect a filesystem. The root permissions are necessary to mount a filesystem by default. See section "Non\-superuser mounts" below for more details. The previous contents (if any) and owner and mode of \fIdir\fP become invisible, and as long as this filesystem remains mounted, the pathname \fIdir\fP refers to the root of the filesystem on \fIdevice\fP. +.sp +If only the directory or the device is given, for example: +.RS 3 +.ll -.6i +.sp +\fBmount\fP \fI/dir\fP +.br +.RE +.ll +.sp +then \fBmount\fP looks for a mountpoint (and if not found then for a device) in the \fI/etc/fstab\fP file. It\(cqs possible to use the \fB\-\-target\fP or \fB\-\-source\fP options to avoid ambiguous interpretation of the given argument. For example: +.RS 3 +.ll -.6i +.sp +\fBmount \-\-target\fP \fI/mountpoint\fP +.br +.RE +.ll +.sp +The same filesystem may be mounted more than once, and in some cases (e.g., network filesystems) the same filesystem may be mounted on the same mountpoint multiple times. The \fBmount\fP command does not implement any policy to control this behavior. All behavior is controlled by the kernel and it is usually specific to the filesystem driver. The exception is \fB\-\-all\fP, in this case already mounted filesystems are ignored (see \fB\-\-all\fP below for more details). +.SS "Listing the mounts" +.sp +The listing mode is maintained for backward compatibility only. +.sp +For more robust and customizable output use \fBfindmnt\fP(8), \fBespecially in your scripts\fP. Note that control characters in the mountpoint name are replaced with \(aq?\(aq. +.sp +The following command lists all mounted filesystems (of type \fItype\fP): +.RS 3 +.ll -.6i +.sp +\fBmount\fP [\fB\-l\fP] [\fB\-t\fP \fItype\fP] +.br +.RE +.ll +.sp +The option \fB\-l\fP adds labels to this listing. See below. +.SS "Indicating the device and filesystem" +.sp +Most devices are indicated by a filename (of a block special device), like \fI/dev/sda1\fP, but there are other possibilities. For example, in the case of an NFS mount, \fIdevice\fP may look like \fIknuth.cwi.nl:/dir\fP. +.sp +The device names of disk partitions are unstable; hardware reconfiguration, and adding or removing a device can cause changes in names. This is the reason why it\(cqs strongly recommended to use filesystem or partition identifiers like UUID or LABEL. Currently supported identifiers (tags): +.sp +LABEL=\fIlabel\fP +.RS 4 +Human readable filesystem identifier. See also \fB\-L\fP. +.RE +.sp +UUID=\fIuuid\fP +.RS 4 +Filesystem universally unique identifier. The format of the UUID is usually a series of hex digits separated by hyphens. See also \fB\-U\fP. +.sp +Note that \fBmount\fP uses UUIDs as strings. The UUIDs from the command line or from \fBfstab\fP(5) are not converted to internal binary representation. The string representation of the UUID should be based on lower case characters. +.RE +.sp +PARTLABEL=\fIlabel\fP +.RS 4 +Human readable partition identifier. This identifier is independent on filesystem and does not change by \fBmkfs\fP or \fBmkswap\fP operations. It\(cqs supported for example for GUID Partition Tables (GPT). +.RE +.sp +PARTUUID=\fIuuid\fP +.RS 4 +Partition universally unique identifier. This identifier is independent on filesystem and does not change by \fBmkfs\fP or \fBmkswap\fP operations. It\(cqs supported for example for GUID Partition Tables (GPT). +.RE +.sp +ID=\fIid\fP +.RS 4 +Hardware block device ID as generated by udevd. This identifier is usually based on WWN (unique storage identifier) and assigned by the hardware manufacturer. See \fBls /dev/disk/by\-id\fP for more details, this directory and running udevd is required. This identifier is not recommended for generic use as the identifier is not strictly defined and it depends on udev, udev rules and hardware. +.RE +.sp +The command \fBlsblk \-\-fs\fP provides an overview of filesystems, LABELs and UUIDs on available block devices. The command \fBblkid \-p <device>\fP provides details about a filesystem on the specified device. +.sp +Don\(cqt forget that there is no guarantee that UUIDs and labels are really unique, especially if you move, share or copy the device. Use \fBlsblk \-o +UUID,PARTUUID\fP to verify that the UUIDs are really unique in your system. +.sp +The recommended setup is to use tags (e.g. \fBUUID\fP=\fIuuid\fP) rather than \fI/dev/disk/by\-{label,uuid,id,partuuid,partlabel}\fP udev symlinks in the \fI/etc/fstab\fP file. Tags are more readable, robust and portable. The \fBmount\fP(8) command internally uses udev symlinks, so the use of symlinks in \fI/etc/fstab\fP has no advantage over tags. For more details see \fBlibblkid\fP(3). +.sp +The \fIproc\fP filesystem is not associated with a special device, and when mounting it, an arbitrary keyword \- for example, \fIproc\fP \- can be used instead of a device specification. (The customary choice \fInone\fP is less fortunate: the error message \(aqnone already mounted\(aq from \fBmount\fP can be confusing.) +.SS "The files /etc/fstab, /etc/mtab and /proc/mounts" +.sp +The file \fI/etc/fstab\fP (see \fBfstab\fP(5)), may contain lines describing what devices are usually mounted where, using which options. The default location of the \fBfstab\fP(5) file can be overridden with the \fB\-\-fstab\fP \fIpath\fP command\-line option (see below for more details). +.sp +The command +.RS 3 +.ll -.6i +.sp +\fBmount \-a\fP [\fB\-t\fP \fItype\fP] [\fB\-O\fP \fIoptlist\fP] +.br +.RE +.ll +.sp +(usually given in a bootscript) causes all filesystems mentioned in \fIfstab\fP (of the proper type and/or having or not having the proper options) to be mounted as indicated, except for those whose line contains the \fBnoauto\fP keyword. Adding the \fB\-F\fP option will make \fBmount\fP fork, so that the filesystems are mounted in parallel. +.sp +When mounting a filesystem mentioned in \fIfstab\fP or \fImtab\fP, it suffices to specify on the command line only the device, or only the mount point. +.sp +The programs \fBmount\fP and \fBumount\fP(8) traditionally maintained a list of currently mounted filesystems in the file \fI/etc/mtab\fP. The support for regular classic \fI/etc/mtab\fP is completely disabled at compile time by default, because on current Linux systems it is better to make \fI/etc/mtab\fP a symlink to \fI/proc/mounts\fP instead. The regular \fImtab\fP file maintained in userspace cannot reliably work with namespaces, containers and other advanced Linux features. If the regular \fImtab\fP support is enabled, then it\(cqs possible to use the file as well as the symlink. +.sp +If no arguments are given to \fBmount\fP, the list of mounted filesystems is printed. +.sp +If you want to override mount options from \fI/etc/fstab\fP, you have to use the \fB\-o\fP option: +.RS 3 +.ll -.6i +.sp +\fBmount\fP \fIdevice\fP|\fIdir\fP \fB\-o\fP \fIoptions\fP +.br +.RE +.ll +.sp +and then the mount options from the command line will be appended to the list of options from \fI/etc/fstab\fP. This default behaviour can be changed using the \fB\-\-options\-mode\fP command\-line option. The usual behavior is that the last option wins if there are conflicting ones. +.sp +The \fBmount\fP program does not read the \fI/etc/fstab\fP file if both \fIdevice\fP (or LABEL, UUID, ID, PARTUUID or PARTLABEL) and \fIdir\fP are specified. For example, to mount device \fBfoo\fP at \fB/dir\fP: +.RS 3 +.ll -.6i +.sp +\fBmount /dev/foo /dir\fP +.br +.RE +.ll +.sp +This default behaviour can be changed by using the \fB\-\-options\-source\-force\fP command\-line option to always read configuration from \fIfstab\fP. For non\-root users \fBmount\fP always reads the \fIfstab\fP configuration. +.SS "Non\-superuser mounts" +.sp +Normally, only the superuser can mount filesystems. However, when \fIfstab\fP contains the \fBuser\fP option on a line, anybody can mount the corresponding filesystem. +.sp +Thus, given a line +.RS 3 +.ll -.6i +.sp +\fB/dev/cdrom /cd iso9660 ro,user,noauto,unhide\fP +.br +.RE +.ll +.sp +any user can mount the iso9660 filesystem found on an inserted CDROM using the command: +.RS 3 +.ll -.6i +.sp +\fBmount /cd\fP +.br +.RE +.ll +.sp +Note that \fBmount\fP is very strict about non\-root users and all paths specified on command line are verified before \fIfstab\fP is parsed or a helper program is executed. It\(cqs strongly recommended to use a valid mountpoint to specify filesystem, otherwise \fBmount\fP may fail. For example it\(cqs a bad idea to use NFS or CIFS source on command line. +.sp +Since util\-linux 2.35, \fBmount\fP does not exit when user permissions are inadequate according to libmount\(cqs internal security rules. Instead, it drops suid permissions and continues as regular non\-root user. This behavior supports use\-cases where root permissions are not necessary (e.g., fuse filesystems, user namespaces, etc). +.sp +For more details, see \fBfstab\fP(5). Only the user that mounted a filesystem can unmount it again. If any user should be able to unmount it, then use \fBusers\fP instead of \fBuser\fP in the \fIfstab\fP line. The \fBowner\fP option is similar to the \fBuser\fP option, with the restriction that the user must be the owner of the special file. This may be useful e.g. for \fI/dev/fd\fP if a login script makes the console user owner of this device. The \fBgroup\fP option is similar, with the restriction that the user must be a member of the group of the special file. +.SS "Bind mount operation" +.sp +Remount part of the file hierarchy somewhere else. The call is: +.RS 3 +.ll -.6i +.sp +\fBmount \-\-bind\fP \fIolddir newdir\fP +.br +.RE +.ll +.sp +or by using this \fIfstab\fP entry: +.RS 3 +.ll -.6i +.sp +\fB/\fP\fIolddir\fP \fB/\fP\fInewdir\fP \fBnone bind\fP +.br +.RE +.ll +.sp +After this call the same contents are accessible in two places. +.sp +It is important to understand that "bind" does not create any second\-class or special node in the kernel VFS. The "bind" is just another operation to attach a filesystem. There is nowhere stored information that the filesystem has been attached by a "bind" operation. The \fIolddir\fP and \fInewdir\fP are independent and the \fIolddir\fP may be unmounted. +.sp +One can also remount a single file (on a single file). It\(cqs also possible to use a bind mount to create a mountpoint from a regular directory, for example: +.RS 3 +.ll -.6i +.sp +\fBmount \-\-bind foo foo\fP +.br +.RE +.ll +.sp +The bind mount call attaches only (part of) a single filesystem, not possible submounts. The entire file hierarchy including submounts can be attached a second place by using: +.RS 3 +.ll -.6i +.sp +\fBmount \-\-rbind\fP \fIolddir newdir\fP +.br +.RE +.ll +.sp +Note that the filesystem mount options maintained by the kernel will remain the same as those on the original mount point. The userspace mount options (e.g., _netdev) will not be copied by \fBmount\fP and it\(cqs necessary to explicitly specify the options on the \fBmount\fP command line. +.sp +Since util\-linux 2.27 \fBmount\fP permits changing the mount options by passing the relevant options along with \fB\-\-bind\fP. For example: +.RS 3 +.ll -.6i +.sp +\fBmount \-o bind,ro foo foo\fP +.br +.RE +.ll +.sp +This feature is not supported by the Linux kernel; it is implemented in userspace by an additional \fBmount\fP(2) remounting system call. This solution is not atomic. +.sp +The alternative (classic) way to create a read\-only bind mount is to use the remount operation, for example: +.RS 3 +.ll -.6i +.sp +\fBmount \-\-bind\fP \fIolddir newdir\fP +.sp +\fBmount \-o remount,bind,ro\fP \fIolddir newdir\fP +.br +.RE +.ll +.sp +Note that a read\-only bind will create a read\-only mountpoint (VFS entry), but the original filesystem superblock will still be writable, meaning that the \fIolddir\fP will be writable, but the \fInewdir\fP will be read\-only. +.sp +It\(cqs also possible to change nosuid, nodev, noexec, noatime, nodiratime, relatime and nosymfollow VFS entry flags via a "remount,bind" operation. The other flags (for example filesystem\-specific flags) are silently ignored. It\(cqs impossible to change mount options recursively (for example with \fB\-o rbind,ro\fP). +.sp +Since util\-linux 2.31, \fBmount\fP ignores the \fBbind\fP flag from \fI/etc/fstab\fP on a \fBremount\fP operation (if \fB\-o remount\fP is specified on command line). This is necessary to fully control mount options on remount by command line. In previous versions the bind flag has been always applied and it was impossible to re\-define mount options without interaction with the bind semantic. This \fBmount\fP behavior does not affect situations when "remount,bind" is specified in the \fI/etc/fstab\fP file. +.SS "The move operation" +.sp +Move a \fBmounted tree\fP to another place (atomically). The call is: +.RS 3 +.ll -.6i +.sp +\fBmount \-\-move\fP \fIolddir newdir\fP +.br +.RE +.ll +.sp +This will cause the contents which previously appeared under \fIolddir\fP to now be accessible under \fInewdir\fP. The physical location of the files is not changed. Note that \fIolddir\fP has to be a mountpoint. +.sp +Note also that moving a mount residing under a shared mount is invalid and unsupported. Use \fBfindmnt \-o TARGET,PROPAGATION\fP to see the current propagation flags. +.SS "Shared subtree operations" +.sp +Since Linux 2.6.15 it is possible to mark a mount and its submounts as shared, private, slave or unbindable. A shared mount provides the ability to create mirrors of that mount such that mounts and unmounts within any of the mirrors propagate to the other mirror. A slave mount receives propagation from its master, but not vice versa. A private mount carries no propagation abilities. An unbindable mount is a private mount which cannot be cloned through a bind operation. The detailed semantics are documented in \fIDocumentation/filesystems/sharedsubtree.txt\fP file in the kernel source tree; see also \fBmount_namespaces\fP(7). +.sp +Supported operations are: +.sp +.if n .RS 4 +.nf +.fam C +mount \-\-make\-shared mountpoint +mount \-\-make\-slave mountpoint +mount \-\-make\-private mountpoint +mount \-\-make\-unbindable mountpoint +.fam +.fi +.if n .RE +.sp +The following commands allow one to recursively change the type of all the mounts under a given mountpoint. +.sp +.if n .RS 4 +.nf +.fam C +mount \-\-make\-rshared mountpoint +mount \-\-make\-rslave mountpoint +mount \-\-make\-rprivate mountpoint +mount \-\-make\-runbindable mountpoint +.fam +.fi +.if n .RE +.sp +\fBmount\fP \fBdoes not read\fP \fBfstab\fP(5) when a \fB\-\-make\-\fP* operation is requested. All necessary information has to be specified on the command line. +.sp +Note that the Linux kernel does not allow changing multiple propagation flags with a single \fBmount\fP(2) system call, and the flags cannot be mixed with other mount options and operations. +.sp +Since util\-linux 2.23 the \fBmount\fP command can be used to do more propagation (topology) changes by one \fBmount\fP(8) call and do it also together with other mount operations. The propagation flags are applied by additional \fBmount\fP(2) system calls when the preceding mount operations were successful. Note that this use case is not atomic. It is possible to specify the propagation flags in \fBfstab\fP(5) as mount options (\fBprivate\fP, \fBslave\fP, \fBshared\fP, \fBunbindable\fP, \fBrprivate\fP, \fBrslave\fP, \fBrshared\fP, \fBrunbindable\fP). +.sp +For example: +.sp +.if n .RS 4 +.nf +.fam C +mount \-\-make\-private \-\-make\-unbindable /dev/sda1 /foo +.fam +.fi +.if n .RE +.sp +is the same as: +.sp +.if n .RS 4 +.nf +.fam C +mount /dev/sda1 /foo +mount \-\-make\-private /foo +mount \-\-make\-unbindable /foo +.fam +.fi +.if n .RE +.SH "COMMAND\-LINE OPTIONS" +.sp +The full set of mount options used by an invocation of \fBmount\fP is determined by first extracting the mount options for the filesystem from the \fIfstab\fP table, then applying any options specified by the \fB\-o\fP argument, and finally applying a \fB\-r\fP or \fB\-w\fP option, when present. +.sp +The \fBmount\fP command does not pass all command\-line options to the \fB/sbin/mount.\fP\fIsuffix\fP mount helpers. The interface between \fBmount\fP and the mount helpers is described below in the \fBEXTERNAL HELPERS\fP section. +.sp +Command\-line options available for the \fBmount\fP command are: +.sp +\fB\-a\fP, \fB\-\-all\fP +.RS 4 +Mount all filesystems (of the given types) mentioned in \fIfstab\fP (except for those whose line contains the \fBnoauto\fP keyword). The filesystems are mounted following their order in \fIfstab\fP. The \fBmount\fP command compares filesystem source, target (and fs root for bind mount or btrfs) to detect already mounted filesystems. The kernel table with already mounted filesystems is cached during \fBmount \-\-all\fP. This means that all duplicated \fIfstab\fP entries will be mounted. +.sp +The correct functionality depends on \fI/proc\fP (to detect already mounted filesystems) and on \fI/sys\fP (to evaluate filesystem tags like UUID= or LABEL=). It\(cqs strongly recommended to mount \fI/proc\fP and \fI/sys\fP filesystems before \fBmount \-a\fP is executed, or keep /proc and /sys at the beginning of \fIfstab\fP. +.sp +The option \fB\-\-all\fP is possible to use for remount operation too. In this case all filters (\fB\-t\fP and \fB\-O\fP) are applied to the table of already mounted filesystems. +.sp +Since version 2.35 it is possible to use the command line option \fB\-o\fP to alter mount options from \fIfstab\fP (see also \fB\-\-options\-mode\fP). +.sp +Note that it is a bad practice to use \fBmount \-a\fP for \fIfstab\fP checking. The recommended solution is \fBfindmnt \-\-verify\fP. +.RE +.sp +\fB\-B\fP, \fB\-\-bind\fP +.RS 4 +Remount a subtree somewhere else (so that its contents are available in both places). See above, under \fBBind mounts\fP. +.RE +.sp +\fB\-c\fP, \fB\-\-no\-canonicalize\fP +.RS 4 +Don\(cqt canonicalize paths. The \fBmount\fP command canonicalizes all paths (from the command line or \fIfstab\fP) by default. This option can be used together with the \fB\-f\fP flag for already canonicalized absolute paths. The option is designed for mount helpers which call \fBmount \-i\fP. It is strongly recommended to not use this command\-line option for normal mount operations. +.sp +Note that \fBmount\fP does not pass this option to the \fB/sbin/mount.\fP\fItype\fP helpers. +.RE +.sp +\fB\-F\fP, \fB\-\-fork\fP +.RS 4 +(Used in conjunction with \fB\-a\fP.) Fork off a new incarnation of \fBmount\fP for each device. This will do the mounts on different devices or different NFS servers in parallel. This has the advantage that it is faster; also NFS timeouts proceed in parallel. A disadvantage is that the order of the mount operations is undefined. Thus, you cannot use this option if you want to mount both \fI/usr\fP and \fI/usr/spool\fP. +.RE +.sp +\fB\-f, \-\-fake\fP +.RS 4 +Causes everything to be done except for the actual system call; if it\(cqs not obvious, this "fakes" mounting the filesystem. This option is useful in conjunction with the \fB\-v\fP flag to determine what the \fBmount\fP command is trying to do. It can also be used to add entries for devices that were mounted earlier with the \fB\-n\fP option. The \fB\-f\fP option checks for an existing record in \fI/etc/mtab\fP and fails when the record already exists (with a regular non\-fake mount, this check is done by the kernel). +.RE +.sp +\fB\-i, \-\-internal\-only\fP +.RS 4 +Don\(cqt call the \fB/sbin/mount.\fP\fIfilesystem\fP helper even if it exists. +.RE +.sp +\fB\-L\fP, \fB\-\-label\fP \fIlabel\fP +.RS 4 +Mount the partition that has the specified \fIlabel\fP. +.RE +.sp +\fB\-l\fP, \fB\-\-show\-labels\fP +.RS 4 +Add the labels in the mount output. \fBmount\fP must have permission to read the disk device (e.g. be set\-user\-ID root) for this to work. One can set such a label for ext2, ext3 or ext4 using the \fBe2label\fP(8) utility, or for XFS using \fBxfs_admin\fP(8), or for reiserfs using \fBreiserfstune\fP(8). +.RE +.sp +\fB\-M\fP, \fB\-\-move\fP +.RS 4 +Move a subtree to some other place. See above, the subsection \fBThe move operation\fP. +.RE +.sp +\fB\-m\fP, \fB\-\-mkdir\fP[=\fImode\fP] +.RS 4 +Allow to make a target directory (mountpoint) if it does not exist yet. Alias to "\-o X\-mount.mkdir[=mode]", the default mode is 0755. For more details see \fBX\-mount.mkdir\fP below. +.RE +.sp +\fB\-n\fP, \fB\-\-no\-mtab\fP +.RS 4 +Mount without writing in \fI/etc/mtab\fP. This is necessary for example when \fI/etc\fP is on a read\-only filesystem. +.RE +.sp +\fB\-N\fP, \fB\-\-namespace\fP \fIns\fP +.RS 4 +Perform the mount operation in the mount namespace specified by \fIns\fP. \fIns\fP is either PID of process running in that namespace or special file representing that namespace. +.sp +\fBmount\fP switches to the mount namespace when it reads \fI/etc/fstab\fP, writes \fI/etc/mtab: (or writes to _/run/mount\fP) and calls \fBmount\fP(2), otherwise it runs in the original mount namespace. This means that the target namespace does not have to contain any libraries or other requirements necessary to execute the \fBmount\fP(2) call. +.sp +See \fBmount_namespaces\fP(7) for more information. +.RE +.sp +\fB\-O\fP, \fB\-\-test\-opts\fP \fIopts\fP +.RS 4 +Limit the set of filesystems to which the \fB\-a\fP option applies. In this regard it is like the \fB\-t\fP option except that \fB\-O\fP is useless without \fB\-a\fP. For example, the command +.sp +\fBmount \-a \-O no_netdev\fP +.sp +mounts all filesystems except those which have the option \fInetdev\fP specified in the options field in the \fI/etc/fstab\fP file. +.sp +It is different from \fB\-t\fP in that each option is matched exactly; a leading \fBno\fP at the beginning of one option does not negate the rest. +.sp +The \fB\-t\fP and \fB\-O\fP options are cumulative in effect; that is, the command +.sp +\fBmount \-a \-t ext2 \-O _netdev\fP +.sp +mounts all ext2 filesystems with the _netdev option, not all filesystems that are either ext2 or have the _netdev option specified. +.RE +.sp +\fB\-o\fP, \fB\-\-options\fP \fIopts\fP +.RS 4 +Use the specified mount options. The \fIopts\fP argument is a comma\-separated list. For example: +.sp +\fBmount LABEL=mydisk \-o noatime,nodev,nosuid\fP +.sp +For more details, see the \fBFILESYSTEM\-INDEPENDENT MOUNT OPTIONS\fP and \fBFILESYSTEM\-SPECIFIC MOUNT OPTIONS\fP sections. +.RE +.sp +\fB\-\-options\-mode\fP \fImode\fP +.RS 4 +Controls how to combine options from \fIfstab\fP/\fImtab\fP with options from the command line. \fImode\fP can be one of \fBignore\fP, \fBappend\fP, \fBprepend\fP or \fBreplace\fP. For example, \fBappend\fP means that options from \fIfstab\fP are appended to options from the command line. The default value is \fBprepend\fP \(em it means command line options are evaluated after \fIfstab\fP options. Note that the last option wins if there are conflicting ones. +.RE +.sp +\fB\-\-options\-source\fP \fIsource\fP +.RS 4 +Source of default options. \fIsource\fP is a comma\-separated list of \fBfstab\fP, \fBmtab\fP and \fBdisable\fP. \fBdisable\fP disables \fBfstab\fP and \fBmtab\fP and enables \fB\-\-options\-source\-force\fP. The default value is \fBfstab,mtab\fP. +.RE +.sp +\fB\-\-options\-source\-force\fP +.RS 4 +Use options from \fIfstab\fP/\fImtab\fP even if both \fIdevice\fP and \fIdir\fP are specified. +.RE +.sp +\fB\-R\fP, \fB\-\-rbind\fP +.RS 4 +Remount a subtree and all possible submounts somewhere else (so that its contents are available in both places). See above, the subsection \fBBind mounts\fP. +.RE +.sp +\fB\-r\fP, \fB\-\-read\-only\fP +.RS 4 +Mount the filesystem read\-only. A synonym is \fB\-o ro\fP. +.sp +Note that, depending on the filesystem type, state and kernel behavior, the system may still write to the device. For example, ext3 and ext4 will replay the journal if the filesystem is dirty. To prevent this kind of write access, you may want to mount an ext3 or ext4 filesystem with the \fBro,noload\fP mount options or set the block device itself to read\-only mode, see the \fBblockdev\fP(8) command. +.RE +.sp +\fB\-s\fP +.RS 4 +Tolerate sloppy mount options rather than failing. This will ignore mount options not supported by a filesystem type. Not all filesystems support this option. Currently it\(cqs supported by the \fBmount.nfs\fP mount helper only. +.RE +.sp +\fB\-\-source\fP \fIdevice\fP +.RS 4 +If only one argument for the \fBmount\fP command is given, then the argument might be interpreted as the target (mountpoint) or source (device). This option allows you to explicitly define that the argument is the mount source. +.RE +.sp +\fB\-\-target\fP \fIdirectory\fP +.RS 4 +If only one argument for the mount command is given, then the argument might be interpreted as the target (mountpoint) or source (device). This option allows you to explicitly define that the argument is the mount target. +.RE +.sp +\fB\-\-target\-prefix\fP \fIdirectory\fP +.RS 4 +Prepend the specified directory to all mount targets. This option can be used to follow \fIfstab\fP, but mount operations are done in another place, for example: +.sp +\fBmount \-\-all \-\-target\-prefix /chroot \-o X\-mount.mkdir\fP +.sp +mounts all from system \fIfstab\fP to \fI/chroot\fP, all missing mountpoint are created (due to X\-mount.mkdir). See also \fB\-\-fstab\fP to use an alternative \fIfstab\fP. +.RE +.sp +\fB\-T\fP, \fB\-\-fstab\fP \fIpath\fP +.RS 4 +Specifies an alternative \fIfstab\fP file. If \fIpath\fP is a directory, then the files in the directory are sorted by \fBstrverscmp\fP(3); files that start with "." or without an \fI.fstab\fP extension are ignored. The option can be specified more than once. This option is mostly designed for initramfs or chroot scripts where additional configuration is specified beyond standard system configuration. +.sp +Note that \fBmount\fP does not pass the option \fB\-\-fstab\fP to the \fB/sbin/mount.\fP\fItype\fP helpers, meaning that the alternative \fIfstab\fP files will be invisible for the helpers. This is no problem for normal mounts, but user (non\-root) mounts always require \fIfstab\fP to verify the user\(cqs rights. +.RE +.sp +\fB\-t\fP, \fB\-\-types\fP \fIfstype\fP +.RS 4 +The argument following the \fB\-t\fP is used to indicate the filesystem type. The filesystem types which are currently supported depend on the running kernel. See \fI/proc/filesystems\fP and \fI/lib/modules/$(uname \-r)/kernel/fs\fP for a complete list of the filesystems. The most common are ext2, ext3, ext4, xfs, btrfs, vfat, sysfs, proc, nfs and cifs. +.sp +The programs \fBmount\fP and \fBumount\fP(8) support filesystem subtypes. The subtype is defined by a \(aq.subtype\(aq suffix. For example \(aqfuse.sshfs\(aq. It\(cqs recommended to use subtype notation rather than add any prefix to the mount source (for example \(aqsshfs#example.com\(aq is deprecated). +.sp +If no \fB\-t\fP option is given, or if the \fBauto\fP type is specified, \fBmount\fP will try to guess the desired type. \fBmount\fP uses the \fBlibblkid\fP(3) library for guessing the filesystem type; if that does not turn up anything that looks familiar, \fBmount\fP will try to read the file \fI/etc/filesystems\fP, or, if that does not exist, \fI/proc/filesystems\fP. All of the filesystem types listed there will be tried, except for those that are labeled "nodev" (e.g. \fIdevpts\fP, \fIproc\fP and \fInfs\fP). If \fI/etc/filesystems\fP ends in a line with a single *, mount will read \fI/proc/filesystems\fP afterwards. While trying, all filesystem types will be mounted with the mount option \fBsilent\fP. +.sp +The \fBauto\fP type may be useful for user\-mounted floppies. Creating a file \fI/etc/filesystems\fP can be useful to change the probe order (e.g., to try vfat before msdos or ext3 before ext2) or if you use a kernel module autoloader. +.sp +More than one type may be specified in a comma\-separated list, for the \fB\-t\fP option as well as in an \fI/etc/fstab\fP entry. The list of filesystem types for the \fB\-t\fP option can be prefixed with \fBno\fP to specify the filesystem types on which no action should be taken. The prefix \fBno\fP has no effect when specified in an \fI/etc/fstab\fP entry. +.sp +The prefix \fBno\fP can be meaningful with the \fB\-a\fP option. For example, the command +.sp +\fBmount \-a \-t nomsdos,smbfs\fP +.sp +mounts all filesystems except those of type \fImsdos\fP and \fIsmbfs\fP. +.sp +For most types all the \fBmount\fP program has to do is issue a simple \fBmount\fP(2) system call, and no detailed knowledge of the filesystem type is required. For a few types however (like nfs, nfs4, cifs, smbfs, ncpfs) an ad hoc code is necessary. The nfs, nfs4, cifs, smbfs, and ncpfs filesystems have a separate mount program. In order to make it possible to treat all types in a uniform way, \fBmount\fP will execute the program \fB/sbin/mount.\fP\fItype\fP (if that exists) when called with type \fItype\fP. Since different versions of the \fBsmbmount\fP program have different calling conventions, \fB/sbin/mount.smbfs\fP may have to be a shell script that sets up the desired call. +.RE +.sp +\fB\-U\fP, \fB\-\-uuid\fP \fIuuid\fP +.RS 4 +Mount the partition that has the specified \fIuuid\fP. +.RE +.sp +\fB\-v\fP, \fB\-\-verbose\fP +.RS 4 +Verbose mode. +.RE +.sp +\fB\-w\fP, \fB\-\-rw\fP, \fB\-\-read\-write\fP +.RS 4 +Mount the filesystem read/write. Read\-write is the kernel default and the \fBmount\fP default is to try read\-only if the previous \fBmount\fP(2) syscall with read\-write flags on write\-protected devices failed. +.sp +A synonym is \fB\-o rw\fP. +.sp +Note that specifying \fB\-w\fP on the command line forces \fBmount\fP to never try read\-only mount on write\-protected devices or already mounted read\-only filesystems. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "FILESYSTEM\-INDEPENDENT MOUNT OPTIONS" +.sp +Some of these options are only useful when they appear in the \fI/etc/fstab\fP file. +.sp +Some of these options could be enabled or disabled by default in the system kernel. To check the current setting see the options in \fI/proc/mounts\fP. Note that filesystems also have per\-filesystem specific default mount options (see for example \fBtune2fs \-l\fP output for ext\fIN\fP filesystems). +.sp +The following options apply to any filesystem that is being mounted (but not every filesystem actually honors them \- e.g., the \fBsync\fP option today has an effect only for ext2, ext3, ext4, fat, vfat, ufs and xfs): +.sp +\fBasync\fP +.RS 4 +All I/O to the filesystem should be done asynchronously. (See also the \fBsync\fP option.) +.RE +.sp +\fBatime\fP +.RS 4 +Do not use the \fBnoatime\fP feature, so the inode access time is controlled by kernel defaults. See also the descriptions of the \fBrelatime\fP and \fBstrictatime\fP mount options. +.RE +.sp +\fBnoatime\fP +.RS 4 +Do not update inode access times on this filesystem (e.g. for faster access on the news spool to speed up news servers). This works for all inode types (directories too), so it implies \fBnodiratime\fP. +.RE +.sp +\fBauto\fP +.RS 4 +Can be mounted with the \fB\-a\fP option. +.RE +.sp +\fBnoauto\fP +.RS 4 +Can only be mounted explicitly (i.e., the \fB\-a\fP option will not cause the filesystem to be mounted). +.RE +.sp +\fBcontext=\fP\fIcontext\fP, \fBfscontext=\fP\fIcontext\fP, \fBdefcontext=\fP\fIcontext\fP, and \fBrootcontext=\fP\fIcontext\fP +.RS 4 +The \fBcontext=\fP option is useful when mounting filesystems that do not support extended attributes, such as a floppy or hard disk formatted with VFAT, or systems that are not normally running under SELinux, such as an ext3 or ext4 formatted disk from a non\-SELinux workstation. You can also use \fBcontext=\fP on filesystems you do not trust, such as a floppy. It also helps in compatibility with xattr\-supporting filesystems on earlier 2.4.<x> kernel versions. Even where xattrs are supported, you can save time not having to label every file by assigning the entire disk one security context. +.sp +A commonly used option for removable media is \fBcontext="system_u:object_r:removable_t\fP. +.sp +The \fBfscontext=\fP option works for all filesystems, regardless of their xattr support. The fscontext option sets the overarching filesystem label to a specific security context. This filesystem label is separate from the individual labels on the files. It represents the entire filesystem for certain kinds of permission checks, such as during mount or file creation. Individual file labels are still obtained from the xattrs on the files themselves. The context option actually sets the aggregate context that fscontext provides, in addition to supplying the same label for individual files. +.sp +You can set the default security context for unlabeled files using \fBdefcontext=\fP option. This overrides the value set for unlabeled files in the policy and requires a filesystem that supports xattr labeling. +.sp +The \fBrootcontext=\fP option allows you to explicitly label the root inode of a FS being mounted before that FS or inode becomes visible to userspace. This was found to be useful for things like stateless Linux. +.sp +Note that the kernel rejects any remount request that includes the context option, \fBeven\fP when unchanged from the current context. +.sp +\fBWarning: the\fP \fIcontext\fP \fBvalue might contain commas\fP, in which case the value has to be properly quoted, otherwise \fBmount\fP will interpret the comma as a separator between mount options. Don\(cqt forget that the shell strips off quotes and thus \fBdouble quoting is required\fP. For example: +.RE +.RS 3 +.ll -.6i +.sp +mount \-t tmpfs none /mnt \-o \(rs +\(aqcontext="system_u:object_r:tmp_t:s0:c127,c456",noexec\(aq +.br +.RE +.ll +.sp +For more details, see \fBselinux\fP(8). +.sp +\fBdefaults\fP +.RS 4 +Use the default options: \fBrw\fP, \fBsuid\fP, \fBdev\fP, \fBexec\fP, \fBauto\fP, \fBnouser\fP, and \fBasync\fP. +.sp +Note that the real set of all default mount options depends on the kernel and filesystem type. See the beginning of this section for more details. +.RE +.sp +\fBdev\fP +.RS 4 +Interpret character or block special devices on the filesystem. +.RE +.sp +\fBnodev\fP +.RS 4 +Do not interpret character or block special devices on the filesystem. +.RE +.sp +\fBdiratime\fP +.RS 4 +Update directory inode access times on this filesystem. This is the default. (This option is ignored when \fBnoatime\fP is set.) +.RE +.sp +\fBnodiratime\fP +.RS 4 +Do not update directory inode access times on this filesystem. (This option is implied when \fBnoatime\fP is set.) +.RE +.sp +\fBdirsync\fP +.RS 4 +All directory updates within the filesystem should be done synchronously. This affects the following system calls: \fBcreat\fP(2), \fBlink\fP(2), \fBunlink\fP(2), \fBsymlink\fP(2), \fBmkdir\fP(2), \fBrmdir\fP(2), \fBmknod\fP(2) and \fBrename\fP(2). +.RE +.sp +\fBexec\fP +.RS 4 +Permit execution of binaries and other executable files. +.RE +.sp +\fBnoexec\fP +.RS 4 +Do not permit direct execution of any binaries on the mounted filesystem. +.RE +.sp +\fBgroup\fP +.RS 4 +Allow an ordinary user to mount the filesystem if one of that user\(cqs groups matches the group of the device. This option implies the options \fBnosuid\fP and \fBnodev\fP (unless overridden by subsequent options, as in the option line \fBgroup,dev,suid\fP). +.RE +.sp +\fBiversion\fP +.RS 4 +Every time the inode is modified, the i_version field will be incremented. +.RE +.sp +\fBnoiversion\fP +.RS 4 +Do not increment the i_version inode field. +.RE +.sp +\fBmand\fP +.RS 4 +Allow mandatory locks on this filesystem. See \fBfcntl\fP(2). This option was deprecated in Linux 5.15. +.RE +.sp +\fBnomand\fP +.RS 4 +Do not allow mandatory locks on this filesystem. +.RE +.sp +\fB_netdev\fP +.RS 4 +The filesystem resides on a device that requires network access (used to prevent the system from attempting to mount these filesystems until the network has been enabled on the system). +.RE +.sp +\fBnofail\fP +.RS 4 +Do not report errors for this device if it does not exist. +.RE +.sp +\fBrelatime\fP +.RS 4 +Update inode access times relative to modify or change time. Access time is only updated if the previous access time was earlier than the current modify or change time. (Similar to \fBnoatime\fP, but it doesn\(cqt break \fBmutt\fP(1) or other applications that need to know if a file has been read since the last time it was modified.) +.sp +Since Linux 2.6.30, the kernel defaults to the behavior provided by this option (unless \fBnoatime\fP was specified), and the \fBstrictatime\fP option is required to obtain traditional semantics. In addition, since Linux 2.6.30, the file\(cqs last access time is always updated if it is more than 1 day old. +.RE +.sp +\fBnorelatime\fP +.RS 4 +Do not use the \fBrelatime\fP feature. See also the \fBstrictatime\fP mount option. +.RE +.sp +\fBstrictatime\fP +.RS 4 +Allows to explicitly request full atime updates. This makes it possible for the kernel to default to \fBrelatime\fP or \fBnoatime\fP but still allow userspace to override it. For more details about the default system mount options see \fI/proc/mounts\fP. +.RE +.sp +\fBnostrictatime\fP +.RS 4 +Use the kernel\(cqs default behavior for inode access time updates. +.RE +.sp +\fBlazytime\fP +.RS 4 +Only update times (atime, mtime, ctime) on the in\-memory version of the file inode. +.sp +This mount option significantly reduces writes to the inode table for workloads that perform frequent random writes to preallocated files. +.sp +The on\-disk timestamps are updated only when: +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +the inode needs to be updated for some change unrelated to file timestamps +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +the application employs \fBfsync\fP(2), \fBsyncfs\fP(2), or \fBsync\fP(2) +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +an undeleted inode is evicted from memory +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +more than 24 hours have passed since the inode was written to disk. +.RE +.RE +.sp +\fBnolazytime\fP +.RS 4 +Do not use the lazytime feature. +.RE +.sp +\fBsuid\fP +.RS 4 +Honor set\-user\-ID and set\-group\-ID bits or file capabilities when executing programs from this filesystem. +.RE +.sp +\fBnosuid\fP +.RS 4 +Do not honor set\-user\-ID and set\-group\-ID bits or file capabilities when executing programs from this filesystem. In addition, SELinux domain transitions require permission \fInosuid_transition\fP, which in turn needs also policy capability \fInnp_nosuid_transition\fP. +.RE +.sp +\fBsilent\fP +.RS 4 +Turn on the silent flag. +.RE +.sp +\fBloud\fP +.RS 4 +Turn off the silent flag. +.RE +.sp +\fBowner\fP +.RS 4 +Allow an ordinary user to mount the filesystem if that user is the owner of the device. This option implies the options \fBnosuid\fP and \fBnodev\fP (unless overridden by subsequent options, as in the option line \fBowner,dev,suid\fP). +.RE +.sp +\fBremount\fP +.RS 4 +Attempt to remount an already\-mounted filesystem. This is commonly used to change the mount flags for a filesystem, especially to make a readonly filesystem writable. It does not change device or mount point. +.sp +The remount operation together with the \fBbind\fP flag has special semantics. See above, the subsection \fBBind mounts\fP. +.sp +The remount functionality follows the standard way the \fBmount\fP command works with options from \fIfstab\fP. This means that \fBmount\fP does not read \fIfstab\fP (or \fImtab\fP) only when both \fIdevice\fP and \fIdir\fP are specified. +.sp +\fBmount \-o remount,rw /dev/foo /dir\fP +.sp +After this call all old mount options are replaced and arbitrary stuff from \fIfstab\fP (or \fImtab\fP) is ignored, except the \fBloop=\fP option which is internally generated and maintained by the \fBmount\fP command. +.sp +\fBmount \-o remount,rw /dir\fP +.sp +After this call, \fBmount\fP reads \fIfstab\fP and merges these options with the options from the command line (\fB\-o\fP). If no mountpoint is found in \fIfstab\fP, then a remount with unspecified source is allowed. +.sp +\fBmount\fP allows the use of \fB\-\-all\fP to remount all already mounted filesystems which match a specified filter (\fB\-O\fP and \fB\-t\fP). For example: +.sp +\fBmount \-\-all \-o remount,ro \-t vfat\fP +.sp +remounts all already mounted vfat filesystems in read\-only mode. Each of the filesystems is remounted by \fBmount \-o remount,ro\fP \fI/dir\fP semantic. This means the \fBmount\fP command reads \fIfstab\fP or \fImtab\fP and merges these options with the options from the command line. +.RE +.sp +\fBro\fP +.RS 4 +Mount the filesystem read\-only. +.RE +.sp +\fBrw\fP +.RS 4 +Mount the filesystem read\-write. +.RE +.sp +\fBsync\fP +.RS 4 +All I/O to the filesystem should be done synchronously. In the case of media with a limited number of write cycles (e.g. some flash drives), \fBsync\fP may cause life\-cycle shortening. +.RE +.sp +\fBuser\fP +.RS 4 +Allow an ordinary user to mount the filesystem. The name of the mounting user is written to the \fImtab\fP file (or to the private libmount file in \fI/run/mount\fP on systems without a regular \fImtab\fP) so that this same user can unmount the filesystem again. This option implies the options \fBnoexec\fP, \fBnosuid\fP, and \fBnodev\fP (unless overridden by subsequent options, as in the option line \fBuser,exec,dev,suid\fP). +.RE +.sp +\fBnouser\fP +.RS 4 +Forbid an ordinary user to mount the filesystem. This is the default; it does not imply any other options. +.RE +.sp +\fBusers\fP +.RS 4 +Allow any user to mount and to unmount the filesystem, even when some other ordinary user mounted it. This option implies the options \fBnoexec\fP, \fBnosuid\fP, and \fBnodev\fP (unless overridden by subsequent options, as in the option line \fBusers,exec,dev,suid\fP). +.RE +.sp +\fBX\-\fP* +.RS 4 +All options prefixed with "X\-" are interpreted as comments or as userspace application\-specific options. These options are not stored in user space (e.g., \fImtab\fP file), nor sent to the mount.\fItype\fP helpers nor to the \fBmount\fP(2) system call. The suggested format is \fBX\-\fP\fIappname\fP.\fIoption\fP. +.RE +.sp +\fBx\-\fP* +.RS 4 +The same as \fBX\-\fP* options, but stored permanently in user space. This means the options are also available for \fBumount\fP(8) or other operations. Note that maintaining mount options in user space is tricky, because it\(cqs necessary use libmount\-based tools and there is no guarantee that the options will be always available (for example after a move mount operation or in unshared namespace). +.sp +Note that before util\-linux v2.30 the x\-* options have not been maintained by libmount and stored in user space (functionality was the same as for X\-* now), but due to the growing number of use\-cases (in initrd, systemd etc.) the functionality has been extended to keep existing \fIfstab\fP configurations usable without a change. +.RE +.sp +\fBX\-mount.mkdir\fP[=\fImode\fP] +.RS 4 +Allow to make a target directory (mountpoint) if it does not exist yet. The optional argument \fImode\fP specifies the filesystem access mode used for \fBmkdir\fP(2) in octal notation. The default mode is 0755. This functionality is supported only for root users or when \fBmount\fP is executed without suid permissions. The option is also supported as \fBx\-mount.mkdir\fP, but this notation is deprecated since v2.30. See also \fB\-\-mkdir\fP command line option. +.RE +.sp +\fBX\-mount.subdir=\fP\fIdirectory\fP +.RS 4 +Allow mounting sub\-directory from a filesystem instead of the root directory. For now, this feature is implemented by temporary filesystem root directory mount in unshared namespace and then bind the sub\-directory to the final mount point and umount the root of the filesystem. The sub\-directory mount shows up atomically for the rest of the system although it is implemented by multiple \fBmount\fP(2) syscalls. This feature is EXPERIMENTAL. +.RE +.sp +\fBnosymfollow\fP +.RS 4 +Do not follow symlinks when resolving paths. Symlinks can still be created, and \fBreadlink\fP(1), \fBreadlink\fP(2), \fBrealpath\fP(1), and \fBrealpath\fP(3) all still work properly. +.RE +.SH "FILESYSTEM\-SPECIFIC MOUNT OPTIONS" +.sp +This section lists options that are specific to particular filesystems. Where possible, you should first consult filesystem\-specific manual pages for details. Some of those pages are listed in the following table. +.TS +allbox tab(:); +lt lt. +T{ +.sp +\fBFilesystem(s)\fP +T}:T{ +.sp +\fBManual page\fP +T} +T{ +.sp +btrfs +T}:T{ +.sp +\fBbtrfs\fP(5) +T} +T{ +.sp +cifs +T}:T{ +.sp +\fBmount.cifs\fP(8) +T} +T{ +.sp +ext2, ext3, ext4 +T}:T{ +.sp +\fBext4\fP(5) +T} +T{ +.sp +fuse +T}:T{ +.sp +\fBfuse\fP(8) +T} +T{ +.sp +nfs +T}:T{ +.sp +\fBnfs\fP(5) +T} +T{ +.sp +tmpfs +T}:T{ +.sp +\fBtmpfs\fP(5) +T} +T{ +.sp +xfs +T}:T{ +.sp +\fBxfs\fP(5) +T} +.TE +.sp +.sp +Note that some of the pages listed above might be available only after you install the respective userland tools. +.sp +The following options apply only to certain filesystems. We sort them by filesystem. All options follow the \fB\-o\fP flag. +.sp +What options are supported depends a bit on the running kernel. Further information may be available in filesystem\-specific files in the kernel source subdirectory \fIDocumentation/filesystems\fP. +.SS "Mount options for adfs" +.sp +\fBuid=\fP\fIvalue\fP and \fBgid=\fP\fIvalue\fP +.RS 4 +Set the owner and group of the files in the filesystem (default: uid=gid=0). +.RE +.sp +\fBownmask=\fP\fIvalue\fP and \fBothmask=\fP\fIvalue\fP +.RS 4 +Set the permission mask for ADFS \(aqowner\(aq permissions and \(aqother\(aq permissions, respectively (default: 0700 and 0077, respectively). See also \fI/usr/src/linux/Documentation/filesystems/adfs.rst\fP. +.RE +.SS "Mount options for affs" +.sp +\fBuid=\fP\fIvalue\fP and \fBgid=\fP\fIvalue\fP +.RS 4 +Set the owner and group of the root of the filesystem (default: uid=gid=0, but with option \fBuid\fP or \fBgid\fP without specified value, the UID and GID of the current process are taken). +.RE +.sp +\fBsetuid=\fP\fIvalue\fP and \fBsetgid=\fP\fIvalue\fP +.RS 4 +Set the owner and group of all files. +.RE +.sp +\fBmode=\fP\fIvalue\fP +.RS 4 +Set the mode of all files to \fIvalue\fP & 0777 disregarding the original permissions. Add search permission to directories that have read permission. The value is given in octal. +.RE +.sp +\fBprotect\fP +.RS 4 +Do not allow any changes to the protection bits on the filesystem. +.RE +.sp +\fBusemp\fP +.RS 4 +Set UID and GID of the root of the filesystem to the UID and GID of the mount point upon the first sync or umount, and then clear this option. Strange... +.RE +.sp +\fBverbose\fP +.RS 4 +Print an informational message for each successful mount. +.RE +.sp +\fBprefix=\fP\fIstring\fP +.RS 4 +Prefix used before volume name, when following a link. +.RE +.sp +\fBvolume=\fP\fIstring\fP +.RS 4 +Prefix (of length at most 30) used before \(aq/\(aq when following a symbolic link. +.RE +.sp +\fBreserved=\fP\fIvalue\fP +.RS 4 +(Default: 2.) Number of unused blocks at the start of the device. +.RE +.sp +\fBroot=\fP\fIvalue\fP +.RS 4 +Give explicitly the location of the root block. +.RE +.sp +\fBbs=\fP\fIvalue\fP +.RS 4 +Give blocksize. Allowed values are 512, 1024, 2048, 4096. +.RE +.sp +\fBgrpquota\fP|\fBnoquota\fP|\fBquota\fP|\fBusrquota\fP +.RS 4 +These options are accepted but ignored. (However, quota utilities may react to such strings in \fI/etc/fstab\fP.) +.RE +.SS "Mount options for debugfs" +.sp +The debugfs filesystem is a pseudo filesystem, traditionally mounted on \fI/sys/kernel/debug\fP. As of kernel version 3.4, debugfs has the following options: +.sp +\fBuid=\fP\fIn\fP\fB, gid=\fP\fIn\fP +.RS 4 +Set the owner and group of the mountpoint. +.RE +.sp +\fBmode=\fP\fIvalue\fP +.RS 4 +Sets the mode of the mountpoint. +.RE +.SS "Mount options for devpts" +.sp +The devpts filesystem is a pseudo filesystem, traditionally mounted on \fI/dev/pts\fP. In order to acquire a pseudo terminal, a process opens \fI/dev/ptmx\fP; the number of the pseudo terminal is then made available to the process and the pseudo terminal slave can be accessed as \fI/dev/pts/\fP<number>. +.sp +\fBuid=\fP\fIvalue\fP and \fBgid=\fP\fIvalue\fP +.RS 4 +This sets the owner or the group of newly created pseudo terminals to the specified values. When nothing is specified, they will be set to the UID and GID of the creating process. For example, if there is a tty group with GID 5, then \fBgid=5\fP will cause newly created pseudo terminals to belong to the tty group. +.RE +.sp +\fBmode=\fP\fIvalue\fP +.RS 4 +Set the mode of newly created pseudo terminals to the specified value. The default is 0600. A value of \fBmode=620\fP and \fBgid=5\fP makes "mesg y" the default on newly created pseudo terminals. +.RE +.sp +\fBnewinstance\fP +.RS 4 +Create a private instance of the devpts filesystem, such that indices of pseudo terminals allocated in this new instance are independent of indices created in other instances of devpts. +.sp +All mounts of devpts without this \fBnewinstance\fP option share the same set of pseudo terminal indices (i.e., legacy mode). Each mount of devpts with the \fBnewinstance\fP option has a private set of pseudo terminal indices. +.sp +This option is mainly used to support containers in the Linux kernel. It is implemented in Linux kernel versions starting with 2.6.29. Further, this mount option is valid only if \fBCONFIG_DEVPTS_MULTIPLE_INSTANCES\fP is enabled in the kernel configuration. +.sp +To use this option effectively, \fI/dev/ptmx\fP must be a symbolic link to \fIpts/ptmx\fP. See \fIDocumentation/filesystems/devpts.txt\fP in the Linux kernel source tree for details. +.RE +.sp +\fBptmxmode=\fP\fIvalue\fP +.RS 4 +Set the mode for the new \fIptmx\fP device node in the devpts filesystem. +.sp +With the support for multiple instances of devpts (see \fBnewinstance\fP option above), each instance has a private \fIptmx\fP node in the root of the devpts filesystem (typically \fI/dev/pts/ptmx\fP). +.sp +For compatibility with older versions of the kernel, the default mode of the new \fIptmx\fP node is 0000. \fBptmxmode=\fP\fIvalue\fP specifies a more useful mode for the \fIptmx\fP node and is highly recommended when the \fBnewinstance\fP option is specified. +.sp +This option is only implemented in Linux kernel versions starting with 2.6.29. Further, this option is valid only if \fBCONFIG_DEVPTS_MULTIPLE_INSTANCES\fP is enabled in the kernel configuration. +.RE +.SS "Mount options for fat" +.sp +(Note: \fIfat\fP is not a separate filesystem, but a common part of the \fImsdos\fP, \fIumsdos\fP and \fIvfat\fP filesystems.) +.sp +\fBblocksize=\fP{\fB512\fP|\fB1024\fP|\fB2048\fP} +.RS 4 +Set blocksize (default 512). This option is obsolete. +.RE +.sp +\fBuid=\fP\fIvalue\fP and \fBgid=\fP\fIvalue\fP +.RS 4 +Set the owner and group of all files. (Default: the UID and GID of the current process.) +.RE +.sp +\fBumask=\fP\fIvalue\fP +.RS 4 +Set the umask (the bitmask of the permissions that are \fBnot\fP present). The default is the umask of the current process. The value is given in octal. +.RE +.sp +\fBdmask=\fP\fIvalue\fP +.RS 4 +Set the umask applied to directories only. The default is the umask of the current process. The value is given in octal. +.RE +.sp +\fBfmask=\fP\fIvalue\fP +.RS 4 +Set the umask applied to regular files only. The default is the umask of the current process. The value is given in octal. +.RE +.sp +\fBallow_utime=\fP\fIvalue\fP +.RS 4 +This option controls the permission check of mtime/atime. +.sp +\fB20\fP +.RS 4 +If current process is in group of file\(cqs group ID, you can change timestamp. +.RE +.sp +\fB2\fP +.RS 4 +Other users can change timestamp. +.RE +.RE +.sp +The default is set from \(aqdmask\(aq option. (If the directory is writable, \fButime\fP(2) is also allowed. I.e. ~dmask & 022) +.sp +Normally \fButime\fP(2) checks that the current process is owner of the file, or that it has the \fBCAP_FOWNER\fP capability. But FAT filesystems don\(cqt have UID/GID on disk, so the normal check is too inflexible. With this option you can relax it. +.sp +\fBcheck=\fP\fIvalue\fP +.RS 4 +Three different levels of pickiness can be chosen: +.sp +\fBr\fP[\fBelaxed\fP] +.RS 4 +Upper and lower case are accepted and equivalent, long name parts are truncated (e.g. \fIverylongname.foobar\fP becomes \fIverylong.foo\fP), leading and embedded spaces are accepted in each name part (name and extension). +.RE +.sp +\fBn\fP[\fBormal\fP] +.RS 4 +Like "relaxed", but many special characters (*, ?, <, spaces, etc.) are rejected. This is the default. +.RE +.sp +\fBs\fP[\fBtrict\fP] +.RS 4 +Like "normal", but names that contain long parts or special characters that are sometimes used on Linux but are not accepted by MS\-DOS (+, =, etc.) are rejected. +.RE +.RE +.sp +\fBcodepage=\fP\fIvalue\fP +.RS 4 +Sets the codepage for converting to shortname characters on FAT and VFAT filesystems. By default, codepage 437 is used. +.RE +.sp +\fBconv=\fP\fImode\fP +.RS 4 +This option is obsolete and may fail or be ignored. +.RE +.sp +\fBcvf_format=\fP\fImodule\fP +.RS 4 +Forces the driver to use the CVF (Compressed Volume File) module cvf\fI_module\fP instead of auto\-detection. If the kernel supports \fBkmod\fP, the \fBcvf_format=\fP\fIxxx\fP option also controls on\-demand CVF module loading. This option is obsolete. +.RE +.sp +\fBcvf_option=\fP\fIoption\fP +.RS 4 +Option passed to the CVF module. This option is obsolete. +.RE +.sp +\fBdebug\fP +.RS 4 +Turn on the \fIdebug\fP flag. A version string and a list of filesystem parameters will be printed (these data are also printed if the parameters appear to be inconsistent). +.RE +.sp +\fBdiscard\fP +.RS 4 +If set, causes discard/TRIM commands to be issued to the block device when blocks are freed. This is useful for SSD devices and sparse/thinly\-provisioned LUNs. +.RE +.sp +\fBdos1xfloppy\fP +.RS 4 +If set, use a fallback default BIOS Parameter Block configuration, determined by backing device size. These static parameters match defaults assumed by DOS 1.x for 160 kiB, 180 kiB, 320 kiB, and 360 kiB floppies and floppy images. +.RE +.sp +\fBerrors=\fP{\fBpanic\fP|\fBcontinue\fP|\fBremount\-ro\fP} +.RS 4 +Specify FAT behavior on critical errors: panic, continue without doing anything, or remount the partition in read\-only mode (default behavior). +.RE +.sp +\fBfat=\fP{\fB12\fP|\fB16\fP|\fB32\fP} +.RS 4 +Specify a 12, 16 or 32 bit fat. This overrides the automatic FAT type detection routine. Use with caution! +.RE +.sp +\fBiocharset=\fP\fIvalue\fP +.RS 4 +Character set to use for converting between 8 bit characters and 16 bit Unicode characters. The default is iso8859\-1. Long filenames are stored on disk in Unicode format. +.RE +.sp +\fBnfs=\fP{\fBstale_rw\fP|\fBnostale_ro\fP} +.RS 4 +Enable this only if you want to export the FAT filesystem over NFS. +.sp +\fBstale_rw\fP: This option maintains an index (cache) of directory inodes which is used by the nfs\-related code to improve look\-ups. Full file operations (read/write) over NFS are supported but with cache eviction at NFS server, this could result in spurious \fBESTALE\fP errors. +.sp +\fBnostale_ro\fP: This option bases the inode number and file handle on the on\-disk location of a file in the FAT directory entry. This ensures that \fBESTALE\fP will not be returned after a file is evicted from the inode cache. However, it means that operations such as rename, create and unlink could cause file handles that previously pointed at one file to point at a different file, potentially causing data corruption. For this reason, this option also mounts the filesystem readonly. +.sp +To maintain backward compatibility, \fB\-o nfs\fP is also accepted, defaulting to \fBstale_rw\fP. +.RE +.sp +\fBtz=UTC\fP +.RS 4 +This option disables the conversion of timestamps between local time (as used by Windows on FAT) and UTC (which Linux uses internally). This is particularly useful when mounting devices (like digital cameras) that are set to UTC in order to avoid the pitfalls of local time. +.RE +.sp +\fBtime_offset=\fP\fIminutes\fP +.RS 4 +Set offset for conversion of timestamps from local time used by FAT to UTC. I.e., \fIminutes\fP will be subtracted from each timestamp to convert it to UTC used internally by Linux. This is useful when the time zone set in the kernel via \fBsettimeofday\fP(2) is not the time zone used by the filesystem. Note that this option still does not provide correct time stamps in all cases in presence of DST \- time stamps in a different DST setting will be off by one hour. +.RE +.sp +\fBquiet\fP +.RS 4 +Turn on the \fIquiet\fP flag. Attempts to chown or chmod files do not return errors, although they fail. Use with caution! +.RE +.sp +\fBrodir\fP +.RS 4 +FAT has the \fBATTR_RO\fP (read\-only) attribute. On Windows, the \fBATTR_RO\fP of the directory will just be ignored, and is used only by applications as a flag (e.g. it\(cqs set for the customized folder). +.sp +If you want to use \fBATTR_RO\fP as read\-only flag even for the directory, set this option. +.RE +.sp +\fBshowexec\fP +.RS 4 +If set, the execute permission bits of the file will be allowed only if the extension part of the name is .EXE, .COM, or .BAT. Not set by default. +.RE +.sp +\fBsys_immutable\fP +.RS 4 +If set, \fBATTR_SYS\fP attribute on FAT is handled as \fBIMMUTABLE\fP flag on Linux. Not set by default. +.RE +.sp +\fBflush\fP +.RS 4 +If set, the filesystem will try to flush to disk more early than normal. Not set by default. +.RE +.sp +\fBusefree\fP +.RS 4 +Use the "free clusters" value stored on \fBFSINFO\fP. It\(cqll be used to determine number of free clusters without scanning disk. But it\(cqs not used by default, because recent Windows don\(cqt update it correctly in some case. If you are sure the "free clusters" on \fBFSINFO\fP is correct, by this option you can avoid scanning disk. +.RE +.sp +\fBdots\fP, \fBnodots\fP, \fBdotsOK=\fP[\fByes\fP|\fBno\fP] +.RS 4 +Various misguided attempts to force Unix or DOS conventions onto a FAT filesystem. +.RE +.SS "Mount options for hfs" +.sp +\fBcreator=\fP\fIcccc\fP\fB, type=\fP\fIcccc\fP +.RS 4 +Set the creator/type values as shown by the MacOS finder used for creating new files. Default values: \(aq????\(aq. +.RE +.sp +\fBuid=\fP\fIn\fP\fB, gid=\fP\fIn\fP +.RS 4 +Set the owner and group of all files. (Default: the UID and GID of the current process.) +.RE +.sp +\fBdir_umask=\fP\fIn\fP\fB, file_umask=\fP\fIn\fP\fB, umask=\fP\fIn\fP +.RS 4 +Set the umask used for all directories, all regular files, or all files and directories. Defaults to the umask of the current process. +.RE +.sp +\fBsession=\fP\fIn\fP +.RS 4 +Select the CDROM session to mount. Defaults to leaving that decision to the CDROM driver. This option will fail with anything but a CDROM as underlying device. +.RE +.sp +\fBpart=\fP\fIn\fP +.RS 4 +Select partition number n from the device. Only makes sense for CDROMs. Defaults to not parsing the partition table at all. +.RE +.sp +\fBquiet\fP +.RS 4 +Don\(cqt complain about invalid mount options. +.RE +.SS "Mount options for hpfs" +.sp +\fBuid=\fP\fIvalue\fP and \fBgid=\fP\fIvalue\fP +.RS 4 +Set the owner and group of all files. (Default: the UID and GID of the current process.) +.RE +.sp +\fBumask=\fP\fIvalue\fP +.RS 4 +Set the umask (the bitmask of the permissions that are \fBnot\fP present). The default is the umask of the current process. The value is given in octal. +.RE +.sp +\fBcase=\fP{\fBlower\fP|\fBasis\fP} +.RS 4 +Convert all files names to lower case, or leave them. (Default: \fBcase=lower\fP.) +.RE +.sp +\fBconv=\fP\fImode\fP +.RS 4 +This option is obsolete and may fail or being ignored. +.RE +.sp +\fBnocheck\fP +.RS 4 +Do not abort mounting when certain consistency checks fail. +.RE +.SS "Mount options for iso9660" +.sp +ISO 9660 is a standard describing a filesystem structure to be used on CD\-ROMs. (This filesystem type is also seen on some DVDs. See also the \fIudf\fP filesystem.) +.sp +Normal \fIiso9660\fP filenames appear in an 8.3 format (i.e., DOS\-like restrictions on filename length), and in addition all characters are in upper case. Also there is no field for file ownership, protection, number of links, provision for block/character devices, etc. +.sp +Rock Ridge is an extension to iso9660 that provides all of these UNIX\-like features. Basically there are extensions to each directory record that supply all of the additional information, and when Rock Ridge is in use, the filesystem is indistinguishable from a normal UNIX filesystem (except that it is read\-only, of course). +.sp +\fBnorock\fP +.RS 4 +Disable the use of Rock Ridge extensions, even if available. Cf. \fBmap\fP. +.RE +.sp +\fBnojoliet\fP +.RS 4 +Disable the use of Microsoft Joliet extensions, even if available. Cf. \fBmap\fP. +.RE +.sp +\fBcheck=\fP{\fBr\fP[\fBelaxed\fP]|\fBs\fP[\fBtrict\fP]} +.RS 4 +With \fBcheck=relaxed\fP, a filename is first converted to lower case before doing the lookup. This is probably only meaningful together with \fBnorock\fP and \fBmap=normal\fP. (Default: \fBcheck=strict\fP.) +.RE +.sp +\fBuid=\fP\fIvalue\fP and \fBgid=\fP\fIvalue\fP +.RS 4 +Give all files in the filesystem the indicated user or group id, possibly overriding the information found in the Rock Ridge extensions. (Default: \fBuid=0,gid=0\fP.) +.RE +.sp +\fBmap=\fP{\fBn\fP[\fBormal\fP]|\fBo\fP[\fBff\fP]|\fBa\fP[\fBcorn\fP]} +.RS 4 +For non\-Rock Ridge volumes, normal name translation maps upper to lower case ASCII, drops a trailing \(aq;1\(aq, and converts \(aq;\(aq to \(aq.\(aq. With \fBmap=off\fP no name translation is done. See \fBnorock\fP. (Default: \fBmap=normal\fP.) \fBmap=acorn\fP is like \fBmap=normal\fP but also apply Acorn extensions if present. +.RE +.sp +\fBmode=\fP\fIvalue\fP +.RS 4 +For non\-Rock Ridge volumes, give all files the indicated mode. (Default: read and execute permission for everybody.) Octal mode values require a leading 0. +.RE +.sp +\fBunhide\fP +.RS 4 +Also show hidden and associated files. (If the ordinary files and the associated or hidden files have the same filenames, this may make the ordinary files inaccessible.) +.RE +.sp +\fBblock=\fP{\fB512\fP|\fB1024\fP|\fB2048\fP} +.RS 4 +Set the block size to the indicated value. (Default: \fBblock=1024\fP.) +.RE +.sp +\fBconv=\fP\fImode\fP +.RS 4 +This option is obsolete and may fail or being ignored. +.RE +.sp +\fBcruft\fP +.RS 4 +If the high byte of the file length contains other garbage, set this mount option to ignore the high order bits of the file length. This implies that a file cannot be larger than 16 MB. +.RE +.sp +\fBsession=\fP\fIx\fP +.RS 4 +Select number of session on a multisession CD. +.RE +.sp +\fBsbsector=\fP\fIxxx\fP +.RS 4 +Session begins from sector xxx. +.RE +.sp +The following options are the same as for vfat and specifying them only makes sense when using discs encoded using Microsoft\(cqs Joliet extensions. +.sp +\fBiocharset=\fP\fIvalue\fP +.RS 4 +Character set to use for converting 16 bit Unicode characters on CD to 8 bit characters. The default is iso8859\-1. +.RE +.sp +\fButf8\fP +.RS 4 +Convert 16 bit Unicode characters on CD to UTF\-8. +.RE +.SS "Mount options for jfs" +.sp +\fBiocharset=\fP\fIname\fP +.RS 4 +Character set to use for converting from Unicode to ASCII. The default is to do no conversion. Use \fBiocharset=utf8\fP for UTF8 translations. This requires \fBCONFIG_NLS_UTF8\fP to be set in the kernel \fI.config\fP file. +.RE +.sp +\fBresize=\fP\fIvalue\fP +.RS 4 +Resize the volume to \fIvalue\fP blocks. JFS only supports growing a volume, not shrinking it. This option is only valid during a remount, when the volume is mounted read\-write. The \fBresize\fP keyword with no value will grow the volume to the full size of the partition. +.RE +.sp +\fBnointegrity\fP +.RS 4 +Do not write to the journal. The primary use of this option is to allow for higher performance when restoring a volume from backup media. The integrity of the volume is not guaranteed if the system abnormally ends. +.RE +.sp +\fBintegrity\fP +.RS 4 +Default. Commit metadata changes to the journal. Use this option to remount a volume where the \fBnointegrity\fP option was previously specified in order to restore normal behavior. +.RE +.sp +\fBerrors=\fP{\fBcontinue\fP|\fBremount\-ro\fP|\fBpanic\fP} +.RS 4 +Define the behavior when an error is encountered. (Either ignore errors and just mark the filesystem erroneous and continue, or remount the filesystem read\-only, or panic and halt the system.) +.RE +.sp +\fBnoquota\fP|\fBquota\fP|\fBusrquota\fP|\fBgrpquota\fP +.RS 4 +These options are accepted but ignored. +.RE +.SS "Mount options for msdos" +.sp +See mount options for fat. If the \fImsdos\fP filesystem detects an inconsistency, it reports an error and sets the file system read\-only. The filesystem can be made writable again by remounting it. +.SS "Mount options for ncpfs" +.sp +Just like \fInfs\fP, the \fIncpfs\fP implementation expects a binary argument (a \fIstruct ncp_mount_data\fP) to the \fBmount\fP(2) system call. This argument is constructed by \fBncpmount\fP(8) and the current version of \fBmount\fP (2.12) does not know anything about ncpfs. +.SS "Mount options for ntfs" +.sp +\fBiocharset=\fP\fIname\fP +.RS 4 +Character set to use when returning file names. Unlike VFAT, NTFS suppresses names that contain nonconvertible characters. Deprecated. +.RE +.sp +\fBnls=\fP\fIname\fP +.RS 4 +New name for the option earlier called \fIiocharset\fP. +.RE +.sp +\fButf8\fP +.RS 4 +Use UTF\-8 for converting file names. +.RE +.sp +\fBuni_xlate=\fP{\fB0\fP|\fB1\fP|\fB2\fP} +.RS 4 +For 0 (or \(aqno\(aq or \(aqfalse\(aq), do not use escape sequences for unknown Unicode characters. For 1 (or \(aqyes\(aq or \(aqtrue\(aq) or 2, use vfat\-style 4\-byte escape sequences starting with ":". Here 2 gives a little\-endian encoding and 1 a byteswapped bigendian encoding. +.RE +.sp +\fBposix=[0|1]\fP +.RS 4 +If enabled (posix=1), the filesystem distinguishes between upper and lower case. The 8.3 alias names are presented as hard links instead of being suppressed. This option is obsolete. +.RE +.sp +\fBuid=\fP\fIvalue\fP, \fBgid=\fP\fIvalue\fP and \fBumask=\fP\fIvalue\fP +.RS 4 +Set the file permission on the filesystem. The umask value is given in octal. By default, the files are owned by root and not readable by somebody else. +.RE +.SS "Mount options for overlay" +.sp +Since Linux 3.18 the overlay pseudo filesystem implements a union mount for other filesystems. +.sp +An overlay filesystem combines two filesystems \- an \fBupper\fP filesystem and a \fBlower\fP filesystem. When a name exists in both filesystems, the object in the upper filesystem is visible while the object in the lower filesystem is either hidden or, in the case of directories, merged with the upper object. +.sp +The lower filesystem can be any filesystem supported by Linux and does not need to be writable. The lower filesystem can even be another overlayfs. The upper filesystem will normally be writable and if it is it must support the creation of trusted.* extended attributes, and must provide a valid d_type in readdir responses, so NFS is not suitable. +.sp +A read\-only overlay of two read\-only filesystems may use any filesystem type. The options \fBlowerdir\fP and \fBupperdir\fP are combined into a merged directory by using: +.RS 3 +.ll -.6i +.sp +.if n .RS 4 +.nf +.fam C +mount \-t overlay overlay \(rs + \-olowerdir=/lower,upperdir=/upper,workdir=/work /merged +.fam +.fi +.if n .RE +.br +.RE +.ll +.sp +\fBlowerdir=\fP\fIdirectory\fP +.RS 4 +Any filesystem, does not need to be on a writable filesystem. +.RE +.sp +\fBupperdir=\fP\fIdirectory\fP +.RS 4 +The upperdir is normally on a writable filesystem. +.RE +.sp +\fBworkdir=\fP\fIdirectory\fP +.RS 4 +The workdir needs to be an empty directory on the same filesystem as upperdir. +.RE +.sp +\fBuserxattr\fP +.RS 4 +Use the "\fBuser.overlay.\fP" xattr namespace instead of "\fBtrusted.overlay.\fP". This is useful for unprivileged mounting of overlayfs. +.RE +.sp +\fBredirect_dir=\fP{\fBon\fP|\fBoff\fP|\fBfollow\fP|\fBnofollow\fP} +.RS 4 +If the \fIredirect_dir\fP feature is enabled, then the directory will be copied up (but not the contents). Then the "{\fBtrusted\fP|\fBuser\fP}.overlay.redirect" extended attribute is set to the path of the original location from the root of the overlay. Finally the directory is moved to the new location. +.sp +\fBon\fP +.RS 4 +Redirects are enabled. +.RE +.sp +\fBoff\fP +.RS 4 +Redirects are not created and only followed if "redirect_always_follow" feature is enabled in the kernel/module config. +.RE +.sp +\fBfollow\fP +.RS 4 +Redirects are not created, but followed. +.RE +.sp +\fBnofollow\fP +.RS 4 +Redirects are not created and not followed (equivalent to "redirect_dir=off" if "redirect_always_follow" feature is not enabled). +.RE +.RE +.sp +\fBindex=\fP{\fBon\fP|\fBoff\fP} +.RS 4 +Inode index. If this feature is disabled and a file with multiple hard links is copied up, then this will "break" the link. Changes will not be propagated to other names referring to the same inode. +.RE +.sp +\fBuuid=\fP{\fBon\fP|\fBoff\fP} +.RS 4 +Can be used to replace UUID of the underlying filesystem in file handles with null, and effectively disable UUID checks. This can be useful in case the underlying disk is copied and the UUID of this copy is changed. This is only applicable if all lower/upper/work directories are on the same filesystem, otherwise it will fallback to normal behaviour. +.RE +.sp +\fBnfs_export=\fP{\fBon\fP|\fBoff\fP} +.RS 4 +When the underlying filesystems supports NFS export and the "nfs_export" +feature is enabled, an overlay filesystem may be exported to NFS. +.sp +With the "nfs_export" feature, on copy_up of any lower object, an index entry +is created under the index directory. The index entry name is the hexadecimal +representation of the copy up origin file handle. For a non\-directory object, +the index entry is a hard link to the upper inode. For a directory object, the +index entry has an extended attribute "{\fBtrusted\fP|\fBuser\fP}.overlay.upper" +with an encoded file handle of the upper directory inode. +.sp +When encoding a file handle from an overlay filesystem object, the following rules apply +.RS 4 +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +For a non\-upper object, encode a lower file handle from lower inode +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +For an indexed object, encode a lower file handle from copy_up origin +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +For a pure\-upper object and for an existing non\-indexed upper object, encode an upper file handle from upper inode +.RE +.RE +.sp +The encoded overlay file handle includes +.RS 4 +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +Header including path type information (e.g. lower/upper) +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +UUID of the underlying filesystem +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +Underlying filesystem encoding of underlying inode +.RE +.RE +.sp +This encoding format is identical to the encoding format of file handles that are stored in extended attribute "{\fBtrusted\fP|\fBuser\fP}.overlay.origin". When decoding an overlay file handle, the following steps are followed +.RS 4 +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +Find underlying layer by UUID and path type information. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +Decode the underlying filesystem file handle to underlying dentry. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +For a lower file handle, lookup the handle in index directory by name. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +If a whiteout is found in index, return \fBESTALE\fP. This represents an overlay object that was deleted after its file handle was encoded. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +For a non\-directory, instantiate a disconnected overlay dentry from the decoded underlying dentry, the path type and index inode, if found. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +For a directory, use the connected underlying decoded dentry, path type and index, to lookup a connected overlay dentry. +.RE +.RE +.sp +Decoding a non\-directory file handle may return a disconnected dentry. copy_up +of that disconnected dentry will create an upper index entry with no upper +alias. +.sp +When overlay filesystem has multiple lower layers, a middle layer directory may +have a "redirect" to lower directory. Because middle layer "redirects" are not +indexed, a lower file handle that was encoded from the "redirect" origin +directory, cannot be used to find the middle or upper layer directory. +Similarly, a lower file handle that was encoded from a descendant of the +"redirect" origin directory, cannot be used to reconstruct a connected overlay +path. To mitigate the cases of directories that cannot be decoded from a lower +file handle, these directories are copied up on encode and encoded as an upper +file handle. On an overlay filesystem with no upper layer this mitigation +cannot be used NFS export in this setup requires turning off redirect follow +(e.g. "\fIredirect_dir=nofollow\fP"). +.sp +The overlay filesystem does not support non\-directory connectable file handles, so exporting with the \fIsubtree_check\fP exportfs configuration will cause failures to lookup files over NFS. +.sp +When the NFS export feature is enabled, all directory index entries are verified on mount time to check that upper file handles are not stale. This verification may cause significant overhead in some cases. +.sp +Note: the mount options \fIindex=off,nfs_export=on\fP are conflicting for a +read\-write mount and will result in an error. +.RE +.sp +\fBxino=\fP{\fBon\fP|\fBoff\fP|\fBauto\fP} +.RS 4 +The "xino" feature composes a unique object identifier from the real object st_ino and an underlying fsid index. The "xino" feature uses the high inode number bits for fsid, because the underlying filesystems rarely use the high inode number bits. In case the underlying inode number does overflow into the high xino bits, overlay filesystem will fall back to the non xino behavior for that inode. +.sp +For a detailed description of the effect of this option please refer to \c +.URL "https://www.kernel.org/doc/html/latest/filesystems/overlayfs.html?highlight=overlayfs" "" "" +.RE +.sp +\fBmetacopy=\fP{\fBon\fP|\fBoff\fP} +.RS 4 +When metadata only copy up feature is enabled, overlayfs will only copy up metadata (as opposed to whole file), when a metadata specific operation like chown/chmod is performed. Full file will be copied up later when file is opened for WRITE operation. +.sp +In other words, this is delayed data copy up operation and data is copied up when there is a need to actually modify data. +.RE +.sp +\fBvolatile\fP +.RS 4 +Volatile mounts are not guaranteed to survive a crash. It is strongly recommended that volatile mounts are only used if data written to the overlay can be recreated without significant effort. +.sp +The advantage of mounting with the "volatile" option is that all forms of sync calls to the upper filesystem are omitted. +.sp +In order to avoid a giving a false sense of safety, the syncfs (and fsync) semantics of volatile mounts are slightly different than that of the rest of VFS. If any writeback error occurs on the upperdir’s filesystem after a volatile mount takes place, all sync functions will return an error. Once this condition is reached, the filesystem will not recover, and every subsequent sync call will return an error, even if the upperdir has not experience a new error since the last sync call. +.sp +When overlay is mounted with "volatile" option, the directory "$workdir/work/incompat/volatile" is created. During next mount, overlay checks for this directory and refuses to mount if present. This is a strong indicator that user should throw away upper and work directories and create fresh one. In very limited cases where the user knows that the system has not crashed and contents of upperdir are intact, The "volatile" directory can be removed. +.RE +.SS "Mount options for reiserfs" +.sp +Reiserfs is a journaling filesystem. +.sp +\fBconv\fP +.RS 4 +Instructs version 3.6 reiserfs software to mount a version 3.5 filesystem, using the 3.6 format for newly created objects. This filesystem will no longer be compatible with reiserfs 3.5 tools. +.RE +.sp +\fBhash=\fP{\fBrupasov\fP|\fBtea\fP|\fBr5\fP|\fBdetect\fP} +.RS 4 +Choose which hash function reiserfs will use to find files within directories. +.sp +\fBrupasov\fP +.RS 4 +A hash invented by Yury Yu. Rupasov. It is fast and preserves locality, mapping lexicographically close file names to close hash values. This option should not be used, as it causes a high probability of hash collisions. +.RE +.sp +\fBtea\fP +.RS 4 +A Davis\-Meyer function implemented by Jeremy Fitzhardinge. It uses hash permuting bits in the name. It gets high randomness and, therefore, low probability of hash collisions at some CPU cost. This may be used if \fBEHASHCOLLISION\fP errors are experienced with the r5 hash. +.RE +.sp +\fBr5\fP +.RS 4 +A modified version of the rupasov hash. It is used by default and is the best choice unless the filesystem has huge directories and unusual file\-name patterns. +.RE +.sp +\fBdetect\fP +.RS 4 +Instructs \fBmount\fP to detect which hash function is in use by examining the filesystem being mounted, and to write this information into the reiserfs superblock. This is only useful on the first mount of an old format filesystem. +.RE +.RE +.sp +\fBhashed_relocation\fP +.RS 4 +Tunes the block allocator. This may provide performance improvements in some situations. +.RE +.sp +\fBno_unhashed_relocation\fP +.RS 4 +Tunes the block allocator. This may provide performance improvements in some situations. +.RE +.sp +\fBnoborder\fP +.RS 4 +Disable the border allocator algorithm invented by Yury Yu. Rupasov. This may provide performance improvements in some situations. +.RE +.sp +\fBnolog\fP +.RS 4 +Disable journaling. This will provide slight performance improvements in some situations at the cost of losing reiserfs\(cqs fast recovery from crashes. Even with this option turned on, reiserfs still performs all journaling operations, save for actual writes into its journaling area. Implementation of \fInolog\fP is a work in progress. +.RE +.sp +\fBnotail\fP +.RS 4 +By default, reiserfs stores small files and \(aqfile tails\(aq directly into its tree. This confuses some utilities such as \fBlilo\fP(8). This option is used to disable packing of files into the tree. +.RE +.sp +\fBreplayonly\fP +.RS 4 +Replay the transactions which are in the journal, but do not actually mount the filesystem. Mainly used by \fIreiserfsck\fP. +.RE +.sp +\fBresize=\fP\fInumber\fP +.RS 4 +A remount option which permits online expansion of reiserfs partitions. Instructs reiserfs to assume that the device has \fInumber\fP blocks. This option is designed for use with devices which are under logical volume management (LVM). There is a special \fIresizer\fP utility which can be obtained from \fI\c +.URL "ftp://ftp.namesys.com/pub/reiserfsprogs" "" "\fP." +.RE +.sp +\fBuser_xattr\fP +.RS 4 +Enable Extended User Attributes. See the \fBattr\fP(1) manual page. +.RE +.sp +\fBacl\fP +.RS 4 +Enable POSIX Access Control Lists. See the \fBacl\fP(5) manual page. +.RE +.sp +\fBbarrier=none\fP / \fBbarrier=flush\fP +.RS 4 +This disables / enables the use of write barriers in the journaling code. \fBbarrier=none\fP disables, \fBbarrier=flush\fP enables (default). This also requires an IO stack which can support barriers, and if reiserfs gets an error on a barrier write, it will disable barriers again with a warning. Write barriers enforce proper on\-disk ordering of journal commits, making volatile disk write caches safe to use, at some performance penalty. If your disks are battery\-backed in one way or another, disabling barriers may safely improve performance. +.RE +.SS "Mount options for ubifs" +.sp +UBIFS is a flash filesystem which works on top of UBI volumes. Note that \fBatime\fP is not supported and is always turned off. +.sp +The device name may be specified as +.RS 3 +.ll -.6i +.sp +\fBubiX_Y\fP +.RS 4 +UBI device number \fBX\fP, volume number \fBY\fP +.RE +.sp +\fBubiY\fP +.RS 4 +UBI device number \fB0\fP, volume number \fBY\fP +.RE +.sp +\fBubiX:NAME\fP +.RS 4 +UBI device number \fBX\fP, volume with name \fBNAME\fP +.RE +.sp +\fBubi:NAME\fP +.RS 4 +UBI device number \fB0\fP, volume with name \fBNAME\fP +.RE +.br +.RE +.ll +.sp +Alternative \fB!\fP separator may be used instead of \fB:\fP. +.sp +The following mount options are available: +.sp +\fBbulk_read\fP +.RS 4 +Enable bulk\-read. VFS read\-ahead is disabled because it slows down the filesystem. Bulk\-Read is an internal optimization. Some flashes may read faster if the data are read at one go, rather than at several read requests. For example, OneNAND can do "read\-while\-load" if it reads more than one NAND page. +.RE +.sp +\fBno_bulk_read\fP +.RS 4 +Do not bulk\-read. This is the default. +.RE +.sp +\fBchk_data_crc\fP +.RS 4 +Check data CRC\-32 checksums. This is the default. +.RE +.sp +\fBno_chk_data_crc\fP +.RS 4 +Do not check data CRC\-32 checksums. With this option, the filesystem does not check CRC\-32 checksum for data, but it does check it for the internal indexing information. This option only affects reading, not writing. CRC\-32 is always calculated when writing the data. +.RE +.sp +\fBcompr=\fP{\fBnone\fP|\fBlzo\fP|\fBzlib\fP} +.RS 4 +Select the default compressor which is used when new files are written. It is still possible to read compressed files if mounted with the \fBnone\fP option. +.RE +.SS "Mount options for udf" +.sp +UDF is the "Universal Disk Format" filesystem defined by OSTA, the Optical Storage Technology Association, and is often used for DVD\-ROM, frequently in the form of a hybrid UDF/ISO\-9660 filesystem. It is, however, perfectly usable by itself on disk drives, flash drives and other block devices. See also \fIiso9660\fP. +.sp +\fBuid=\fP +.RS 4 +Make all files in the filesystem belong to the given user. uid=forget can be specified independently of (or usually in addition to) uid=<user> and results in UDF not storing uids to the media. In fact the recorded uid is the 32\-bit overflow uid \-1 as defined by the UDF standard. The value is given as either <user> which is a valid user name or the corresponding decimal user id, or the special string "forget". +.RE +.sp +\fBgid=\fP +.RS 4 +Make all files in the filesystem belong to the given group. gid=forget can be specified independently of (or usually in addition to) gid=<group> and results in UDF not storing gids to the media. In fact the recorded gid is the 32\-bit overflow gid \-1 as defined by the UDF standard. The value is given as either <group> which is a valid group name or the corresponding decimal group id, or the special string "forget". +.RE +.sp +\fBumask=\fP +.RS 4 +Mask out the given permissions from all inodes read from the filesystem. The value is given in octal. +.RE +.sp +\fBmode=\fP +.RS 4 +If \fBmode=\fP is set the permissions of all non\-directory inodes read from the filesystem will be set to the given mode. The value is given in octal. +.RE +.sp +\fBdmode=\fP +.RS 4 +If \fBdmode=\fP is set the permissions of all directory inodes read from the filesystem will be set to the given dmode. The value is given in octal. +.RE +.sp +\fBbs=\fP +.RS 4 +Set the block size. Default value prior to kernel version 2.6.30 was 2048. Since 2.6.30 and prior to 4.11 it was logical device block size with fallback to 2048. Since 4.11 it is logical block size with fallback to any valid block size between logical device block size and 4096. +.sp +For other details see the \fBmkudffs\fP(8) 2.0+ manpage, see the \fBCOMPATIBILITY\fP and \fBBLOCK SIZE\fP sections. +.RE +.sp +\fBunhide\fP +.RS 4 +Show otherwise hidden files. +.RE +.sp +\fBundelete\fP +.RS 4 +Show deleted files in lists. +.RE +.sp +\fBadinicb\fP +.RS 4 +Embed data in the inode. (default) +.RE +.sp +\fBnoadinicb\fP +.RS 4 +Don\(cqt embed data in the inode. +.RE +.sp +\fBshortad\fP +.RS 4 +Use short UDF address descriptors. +.RE +.sp +\fBlongad\fP +.RS 4 +Use long UDF address descriptors. (default) +.RE +.sp +\fBnostrict\fP +.RS 4 +Unset strict conformance. +.RE +.sp +\fBiocharset=\fP +.RS 4 +Set the NLS character set. This requires kernel compiled with \fBCONFIG_UDF_NLS\fP option. +.RE +.sp +\fButf8\fP +.RS 4 +Set the UTF\-8 character set. +.RE +.SS "Mount options for debugging and disaster recovery" +.sp +\fBnovrs\fP +.RS 4 +Ignore the Volume Recognition Sequence and attempt to mount anyway. +.RE +.sp +\fBsession=\fP +.RS 4 +Select the session number for multi\-session recorded optical media. (default= last session) +.RE +.sp +\fBanchor=\fP +.RS 4 +Override standard anchor location. (default= 256) +.RE +.sp +\fBlastblock=\fP +.RS 4 +Set the last block of the filesystem. +.RE +.SS "Unused historical mount options that may be encountered and should be removed" +.sp +\fBuid=ignore\fP +.RS 4 +Ignored, use uid=<user> instead. +.RE +.sp +\fBgid=ignore\fP +.RS 4 +Ignored, use gid=<group> instead. +.RE +.sp +\fBvolume=\fP +.RS 4 +Unimplemented and ignored. +.RE +.sp +\fBpartition=\fP +.RS 4 +Unimplemented and ignored. +.RE +.sp +\fBfileset=\fP +.RS 4 +Unimplemented and ignored. +.RE +.sp +\fBrootdir=\fP +.RS 4 +Unimplemented and ignored. +.RE +.SS "Mount options for ufs" +.sp +\fBufstype=\fP\fIvalue\fP +.RS 4 +UFS is a filesystem widely used in different operating systems. The problem are differences among implementations. Features of some implementations are undocumented, so its hard to recognize the type of ufs automatically. That\(cqs why the user must specify the type of ufs by mount option. Possible values are: +.sp +\fBold\fP +.RS 4 +Old format of ufs, this is the default, read only. (Don\(cqt forget to give the \fB\-r\fP option.) +.RE +.sp +\fB44bsd\fP +.RS 4 +For filesystems created by a BSD\-like system (NetBSD, FreeBSD, OpenBSD). +.RE +.sp +\fBufs2\fP +.RS 4 +Used in FreeBSD 5.x supported as read\-write. +.RE +.sp +\fB5xbsd\fP +.RS 4 +Synonym for ufs2. +.RE +.sp +\fBsun\fP +.RS 4 +For filesystems created by SunOS or Solaris on Sparc. +.RE +.sp +\fBsunx86\fP +.RS 4 +For filesystems created by Solaris on x86. +.RE +.sp +\fBhp\fP +.RS 4 +For filesystems created by HP\-UX, read\-only. +.RE +.sp +\fBnextstep\fP +.RS 4 +For filesystems created by NeXTStep (on NeXT station) (currently read only). +.RE +.sp +\fBnextstep\-cd\fP +.RS 4 +For NextStep CDROMs (block_size == 2048), read\-only. +.RE +.sp +\fBopenstep\fP +.RS 4 +For filesystems created by OpenStep (currently read only). The same filesystem type is also used by macOS. +.RE +.RE +.sp +\fBonerror=\fP\fIvalue\fP +.RS 4 +Set behavior on error: +.sp +\fBpanic\fP +.RS 4 +If an error is encountered, cause a kernel panic. +.RE +.sp +[\fBlock\fP|\fBumount\fP|\fBrepair\fP] +.RS 4 +These mount options don\(cqt do anything at present; when an error is encountered only a console message is printed. +.RE +.RE +.SS "Mount options for umsdos" +.sp +See mount options for msdos. The \fBdotsOK\fP option is explicitly killed by \fIumsdos\fP. +.SS "Mount options for vfat" +.sp +First of all, the mount options for \fIfat\fP are recognized. The \fBdotsOK\fP option is explicitly killed by \fIvfat\fP. Furthermore, there are +.sp +\fBuni_xlate\fP +.RS 4 +Translate unhandled Unicode characters to special escaped sequences. This lets you backup and restore filenames that are created with any Unicode characters. Without this option, a \(aq?\(aq is used when no translation is possible. The escape character is \(aq:\(aq because it is otherwise invalid on the vfat filesystem. The escape sequence that gets used, where u is the Unicode character, is: \(aq:\(aq, (u & 0x3f), ((u>>6) & 0x3f), (u>>12). +.RE +.sp +\fBposix\fP +.RS 4 +Allow two files with names that only differ in case. This option is obsolete. +.RE +.sp +\fBnonumtail\fP +.RS 4 +First try to make a short name without sequence number, before trying \fIname~num.ext\fP. +.RE +.sp +\fButf8\fP +.RS 4 +UTF8 is the filesystem safe 8\-bit encoding of Unicode that is used by the console. It can be enabled for the filesystem with this option or disabled with utf8=0, utf8=no or utf8=false. If \fIuni_xlate\fP gets set, UTF8 gets disabled. +.RE +.sp +\fBshortname=\fP\fImode\fP +.RS 4 +Defines the behavior for creation and display of filenames which fit into 8.3 characters. If a long name for a file exists, it will always be the preferred one for display. There are four \fImode\fPs: +.sp +\fBlower\fP +.RS 4 +Force the short name to lower case upon display; store a long name when the short name is not all upper case. +.RE +.sp +\fBwin95\fP +.RS 4 +Force the short name to upper case upon display; store a long name when the short name is not all upper case. +.RE +.sp +\fBwinnt\fP +.RS 4 +Display the short name as is; store a long name when the short name is not all lower case or all upper case. +.RE +.sp +\fBmixed\fP +.RS 4 +Display the short name as is; store a long name when the short name is not all upper case. This mode is the default since Linux 2.6.32. +.RE +.RE +.SS "Mount options for usbfs" +.sp +\fBdevuid=\fP\fIuid\fP and \fBdevgid=\fP\fIgid\fP and \fBdevmode=\fP\fImode\fP +.RS 4 +Set the owner and group and mode of the device files in the usbfs filesystem (default: uid=gid=0, mode=0644). The mode is given in octal. +.RE +.sp +\fBbusuid=\fP\fIuid\fP and \fBbusgid=\fP\fIgid\fP and \fBbusmode=\fP\fImode\fP +.RS 4 +Set the owner and group and mode of the bus directories in the usbfs filesystem (default: uid=gid=0, mode=0555). The mode is given in octal. +.RE +.sp +\fBlistuid=\fP\fIuid\fP and \fBlistgid=\fP\fIgid\fP and \fBlistmode=\fP\fImode\fP +.RS 4 +Set the owner and group and mode of the file \fIdevices\fP (default: uid=gid=0, mode=0444). The mode is given in octal. +.RE +.SH "DM\-VERITY SUPPORT" +.sp +The device\-mapper verity target provides read\-only transparent integrity checking of block devices using kernel crypto API. The \fBmount\fP command can open the dm\-verity device and do the integrity verification before the device filesystem is mounted. Requires libcryptsetup with in libmount (optionally via \fBdlopen\fP(3)). If libcryptsetup supports extracting the root hash of an already mounted device, existing devices will be automatically reused in case of a match. Mount options for dm\-verity: +.sp +\fBverity.hashdevice=\fP\fIpath\fP +.RS 4 +Path to the hash tree device associated with the source volume to pass to dm\-verity. +.RE +.sp +\fBverity.roothash=\fP\fIhex\fP +.RS 4 +Hex\-encoded hash of the root of \fIverity.hashdevice\fP. Mutually exclusive with \fIverity.roothashfile.\fP +.RE +.sp +\fBverity.roothashfile=\fP\fIpath\fP +.RS 4 +Path to file containing the hex\-encoded hash of the root of \fIverity.hashdevice.\fP Mutually exclusive with \fIverity.roothash.\fP +.RE +.sp +\fBverity.hashoffset=\fP\fIoffset\fP +.RS 4 +If the hash tree device is embedded in the source volume, \fIoffset\fP (default: 0) is used by dm\-verity to get to the tree. +.RE +.sp +\fBverity.fecdevice=\fP\fIpath\fP +.RS 4 +Path to the Forward Error Correction (FEC) device associated with the source volume to pass to dm\-verity. Optional. Requires kernel built with \fBCONFIG_DM_VERITY_FEC\fP. +.RE +.sp +\fBverity.fecoffset=\fP\fIoffset\fP +.RS 4 +If the FEC device is embedded in the source volume, \fIoffset\fP (default: 0) is used by dm\-verity to get to the FEC area. Optional. +.RE +.sp +\fBverity.fecroots=\fP\fIvalue\fP +.RS 4 +Parity bytes for FEC (default: 2). Optional. +.RE +.sp +\fBverity.roothashsig=\fP\fIpath\fP +.RS 4 +Path to \fBpkcs7\fP(1ssl) signature of root hash hex string. Requires crypt_activate_by_signed_key() from cryptsetup and kernel built with \fBCONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG\fP. For device reuse, signatures have to be either used by all mounts of a device or by none. Optional. +.RE +.sp +\fBverity.oncorruption=\fP\fIignore\fP|\fIrestart\fP|\fIpanic\fP +.RS 4 +Instruct the kernel to ignore, reboot or panic when corruption is detected. By default the I/O operation simply fails. Requires Linux 4.1 or newer, and libcrypsetup 2.3.4 or newer. Optional. +.RE +.sp +Supported since util\-linux v2.35. +.sp +For example commands: +.sp +.if n .RS 4 +.nf +.fam C +mksquashfs /etc /tmp/etc.squashfs +dd if=/dev/zero of=/tmp/etc.hash bs=1M count=10 +veritysetup format /tmp/etc.squashfs /tmp/etc.hash +openssl smime \-sign \-in <hash> \-nocerts \-inkey private.key \(rs +\-signer private.crt \-noattr \-binary \-outform der \-out /tmp/etc.roothash.p7s +mount \-o verity.hashdevice=/tmp/etc.hash,verity.roothash=<hash>,\(rs +verity.roothashsig=/tmp/etc.roothash.p7s /tmp/etc.squashfs /mnt +.fam +.fi +.if n .RE +.sp +create squashfs image from \fI/etc\fP directory, verity hash device and mount verified filesystem image to \fI/mnt\fP. The kernel will verify that the root hash is signed by a key from the kernel keyring if roothashsig is used. +.SH "LOOP\-DEVICE SUPPORT" +.sp +One further possible type is a mount via the loop device. For example, the command +.RS 3 +.ll -.6i +.sp +\fBmount /tmp/disk.img /mnt \-t vfat \-o loop=/dev/loop3\fP +.br +.RE +.ll +.sp +will set up the loop device \fI/dev/loop3\fP to correspond to the file \fI/tmp/disk.img\fP, and then mount this device on \fI/mnt\fP. +.sp +If no explicit loop device is mentioned (but just an option \(aq\fB\-o loop\fP\(aq is given), then \fBmount\fP will try to find some unused loop device and use that, for example +.RS 3 +.ll -.6i +.sp +\fBmount /tmp/disk.img /mnt \-o loop\fP +.br +.RE +.ll +.sp +The \fBmount\fP command \fBautomatically\fP creates a loop device from a regular file if a filesystem type is not specified or the filesystem is known for libblkid, for example: +.RS 3 +.ll -.6i +.sp +\fBmount /tmp/disk.img /mnt\fP +.sp +\fBmount \-t ext4 /tmp/disk.img /mnt\fP +.br +.RE +.ll +.sp +This type of mount knows about three options, namely \fBloop\fP, \fBoffset\fP and \fBsizelimit\fP, that are really options to \fBlosetup\fP(8). (These options can be used in addition to those specific to the filesystem type.) +.sp +Since Linux 2.6.25 auto\-destruction of loop devices is supported, meaning that any loop device allocated by \fBmount\fP will be freed by \fBumount\fP independently of \fI/etc/mtab\fP. +.sp +You can also free a loop device by hand, using \fBlosetup \-d\fP or \fBumount \-d\fP. +.sp +Since util\-linux v2.29, \fBmount\fP re\-uses the loop device rather than initializing a new device if the same backing file is already used for some loop device with the same offset and sizelimit. This is necessary to avoid a filesystem corruption. +.SH "EXIT STATUS" +.sp +\fBmount\fP has the following exit status values (the bits can be ORed): +.sp +\fB0\fP +.RS 4 +success +.RE +.sp +\fB1\fP +.RS 4 +incorrect invocation or permissions +.RE +.sp +\fB2\fP +.RS 4 +system error (out of memory, cannot fork, no more loop devices) +.RE +.sp +\fB4\fP +.RS 4 +internal \fBmount\fP bug +.RE +.sp +\fB8\fP +.RS 4 +user interrupt +.RE +.sp +\fB16\fP +.RS 4 +problems writing or locking \fI/etc/mtab\fP +.RE +.sp +\fB32\fP +.RS 4 +mount failure +.RE +.sp +\fB64\fP +.RS 4 +some mount succeeded +.sp +The command \fBmount \-a\fP returns 0 (all succeeded), 32 (all failed), or 64 (some failed, some succeeded). +.RE +.SH "EXTERNAL HELPERS" +.sp +The syntax of external mount helpers is: +.sp +\fB/sbin/mount.\fP\fIsuffix\fP \fIspec dir\fP [\fB\-sfnv\fP] [\fB\-N\fP \fInamespace\fP] [\fB\-o\fP \fIoptions\fP] [\fB\-t\fP \fItype\fP\fB.\fP\fIsubtype\fP] +.sp +where the \fIsuffix\fP is the filesystem type and the \fB\-sfnvoN\fP options have the same meaning as the normal mount options. The \fB\-t\fP option is used for filesystems with subtypes support (for example \fB/sbin/mount.fuse \-t fuse.sshfs\fP). +.sp +The command \fBmount\fP does not pass the mount options \fBunbindable\fP, \fBrunbindable\fP, \fBprivate\fP, \fBrprivate\fP, \fBslave\fP, \fBrslave\fP, \fBshared\fP, \fBrshared\fP, \fBauto\fP, \fBnoauto\fP, \fBcomment\fP, \fBx\-\fP*, \fBloop\fP, \fBoffset\fP and \fBsizelimit\fP to the mount.<suffix> helpers. All other options are used in a comma\-separated list as an argument to the \fB\-o\fP option. +.SH "ENVIRONMENT" +.sp +\fBLIBMOUNT_FSTAB\fP=<path> +.RS 4 +overrides the default location of the \fIfstab\fP file (ignored for suid) +.RE +.sp +\fBLIBMOUNT_MTAB\fP=<path> +.RS 4 +overrides the default location of the \fImtab\fP file (ignored for suid) +.RE +.sp +\fBLIBMOUNT_DEBUG\fP=all +.RS 4 +enables libmount debug output +.RE +.sp +\fBLIBBLKID_DEBUG\fP=all +.RS 4 +enables libblkid debug output +.RE +.sp +\fBLOOPDEV_DEBUG\fP=all +.RS 4 +enables loop device setup debug output +.RE +.SH "FILES" +.sp +See also "\fBThe files /etc/fstab, /etc/mtab and /proc/mounts\fP" section above. +.sp +\fI/etc/fstab\fP +.RS 4 +filesystem table +.RE +.sp +\fI/run/mount\fP +.RS 4 +libmount private runtime directory +.RE +.sp +\fI/etc/mtab\fP +.RS 4 +table of mounted filesystems or symlink to \fI/proc/mounts\fP +.RE +.sp +\fI/etc/mtab~\fP +.RS 4 +lock file (unused on systems with \fImtab\fP symlink) +.RE +.sp +\fI/etc/mtab.tmp\fP +.RS 4 +temporary file (unused on systems with \fImtab\fP symlink) +.RE +.sp +\fI/etc/filesystems\fP +.RS 4 +a list of filesystem types to try +.RE +.SH "HISTORY" +.sp +A \fBmount\fP command existed in Version 5 AT&T UNIX. +.SH "BUGS" +.sp +It is possible for a corrupted filesystem to cause a crash. +.sp +Some Linux filesystems don\(cqt support \fB\-o sync\fP and \fB\-o dirsync\fP (the ext2, ext3, ext4, fat and vfat filesystems \fIdo\fP support synchronous updates (a la BSD) when mounted with the \fBsync\fP option). +.sp +The \fB\-o remount\fP may not be able to change mount parameters (all \fIext2fs\fP\-specific parameters, except \fBsb\fP, are changeable with a remount, for example, but you can\(cqt change \fBgid\fP or \fBumask\fP for the \fIfatfs\fP). +.sp +It is possible that the files \fI/etc/mtab\fP and \fI/proc/mounts\fP don\(cqt match on systems with a regular \fImtab\fP file. The first file is based only on the \fBmount\fP command options, but the content of the second file also depends on the kernel and others settings (e.g. on a remote NFS server \(em in certain cases the \fBmount\fP command may report unreliable information about an NFS mount point and the \fI/proc/mount\fP file usually contains more reliable information.) This is another reason to replace the \fImtab\fP file with a symlink to the \fI/proc/mounts\fP file. +.sp +Checking files on NFS filesystems referenced by file descriptors (i.e. the \fBfcntl\fP and \fBioctl\fP families of functions) may lead to inconsistent results due to the lack of a consistency check in the kernel even if the \fBnoac\fP mount option is used. +.sp +The \fBloop\fP option with the \fBoffset\fP or \fBsizelimit\fP options used may fail when using older kernels if the \fBmount\fP command can\(cqt confirm that the size of the block device has been configured as requested. This situation can be worked around by using the \fBlosetup\fP(8) command manually before calling \fBmount\fP with the configured loop device. +.SH "AUTHORS" +.sp +.MTO "kzak\(atredhat.com" "Karel Zak" "" +.SH "SEE ALSO" +.sp +\fBmount\fP(2), +\fBumount\fP(2), +\fBfilesystems\fP(5), +\fBfstab\fP(5), +\fBnfs\fP(5), +\fBxfs\fP(5), +\fBmount_namespaces\fP(7), +\fBxattr\fP(7), +\fBe2label\fP(8), +\fBfindmnt\fP(8), +\fBlosetup\fP(8), +\fBlsblk\fP(8), +\fBmke2fs\fP(8), +\fBmountd\fP(8), +\fBnfsd\fP(8), +\fBswapon\fP(8), +\fBtune2fs\fP(8), +\fBumount\fP(8), +\fBxfs_admin\fP(8) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBmount\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/mount.8.adoc b/sys-utils/mount.8.adoc new file mode 100644 index 0000000..343d7e2 --- /dev/null +++ b/sys-utils/mount.8.adoc @@ -0,0 +1,1594 @@ +//po4a: entry man manual +//// +Copyright (c) 1996-2004 Andries Brouwer +Copyright (C) 2006-2012 Karel Zak <kzak@redhat.com> + +This page is somewhat derived from a page that was +(c) 1980, 1989, 1991 The Regents of the University of California +and had been heavily modified by Rik Faith and myself. +(Probably no BSD text remains.) +Fragments of text were written by Werner Almesberger, Remy Card, +Stephen Tweedie and Eric Youngdale. + +This is free documentation; you can redistribute it and/or +modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of +the License, or (at your option) any later version. + +The GNU General Public License's references to "object code" +and "executables" are to be interpreted as the output of any +document formatting or typesetting system, including +intermediate and printed output. + +This manual is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +//// += mount(8) +:doctype: manpage +:man manual: System Administration +:man source: util-linux {release-version} +:page-layout: base +:command: mount +:asterisk: * + +== NAME + +mount - mount a filesystem + +== SYNOPSIS + +*mount* [*-h*|*-V*] + +*mount* [*-l*] [*-t* _fstype_] + +*mount* *-a* [*-fFnrsvw*] [*-t* _fstype_] [*-O* _optlist_] + +*mount* [*-fnrsvw*] [*-o* _options_] _device_|_mountpoint_ + +*mount* [*-fnrsvw*] [*-t* _fstype_] [*-o* _options_] _device mountpoint_ + +*mount* *--bind*|*--rbind*|*--move* _olddir newdir_ + +*mount* *--make-*[*shared*|*slave*|*private*|*unbindable*|*rshared*|*rslave*|*rprivate*|*runbindable*] _mountpoint_ + +== DESCRIPTION + +All files accessible in a Unix system are arranged in one big tree, the file hierarchy, rooted at _/_. These files can be spread out over several devices. The *mount* command serves to attach the filesystem found on some device to the big file tree. Conversely, the *umount*(8) command will detach it again. The filesystem is used to control how data is stored on the device or provided in a virtual way by network or other services. + +The standard form of the *mount* command is: + +____ +*mount -t* _type device dir_ +____ + +This tells the kernel to attach the filesystem found on _device_ (which is of type _type_) at the directory _dir_. The option *-t* _type_ is optional. The *mount* command is usually able to detect a filesystem. The root permissions are necessary to mount a filesystem by default. See section "Non-superuser mounts" below for more details. The previous contents (if any) and owner and mode of _dir_ become invisible, and as long as this filesystem remains mounted, the pathname _dir_ refers to the root of the filesystem on _device_. + +If only the directory or the device is given, for example: + +____ +*mount* _/dir_ +____ + +then *mount* looks for a mountpoint (and if not found then for a device) in the _/etc/fstab_ file. It's possible to use the *--target* or *--source* options to avoid ambiguous interpretation of the given argument. For example: + +____ +*mount --target* _/mountpoint_ +____ + +The same filesystem may be mounted more than once, and in some cases (e.g., network filesystems) the same filesystem may be mounted on the same mountpoint multiple times. The *mount* command does not implement any policy to control this behavior. All behavior is controlled by the kernel and it is usually specific to the filesystem driver. The exception is *--all*, in this case already mounted filesystems are ignored (see *--all* below for more details). + +=== Listing the mounts + +The listing mode is maintained for backward compatibility only. + +For more robust and customizable output use *findmnt*(8), *especially in your scripts*. Note that control characters in the mountpoint name are replaced with '?'. + +The following command lists all mounted filesystems (of type _type_): + +____ +*mount* [*-l*] [*-t* _type_] +____ + +The option *-l* adds labels to this listing. See below. + +=== Indicating the device and filesystem + +Most devices are indicated by a filename (of a block special device), like _/dev/sda1_, but there are other possibilities. For example, in the case of an NFS mount, _device_ may look like _knuth.cwi.nl:/dir_. + +The device names of disk partitions are unstable; hardware reconfiguration, and adding or removing a device can cause changes in names. This is the reason why it's strongly recommended to use filesystem or partition identifiers like UUID or LABEL. Currently supported identifiers (tags): + +LABEL=__label__:: +Human readable filesystem identifier. See also *-L*. + +UUID=__uuid__:: +Filesystem universally unique identifier. The format of the UUID is usually a series of hex digits separated by hyphens. See also *-U*. ++ +Note that *mount* uses UUIDs as strings. The UUIDs from the command line or from *fstab*(5) are not converted to internal binary representation. The string representation of the UUID should be based on lower case characters. + +PARTLABEL=__label__:: +Human readable partition identifier. This identifier is independent on filesystem and does not change by *mkfs* or *mkswap* operations. It's supported for example for GUID Partition Tables (GPT). + +PARTUUID=__uuid__:: +Partition universally unique identifier. This identifier is independent on filesystem and does not change by *mkfs* or *mkswap* operations. It's supported for example for GUID Partition Tables (GPT). + +ID=__id__:: +Hardware block device ID as generated by udevd. This identifier is usually based on WWN (unique storage identifier) and assigned by the hardware manufacturer. See *ls /dev/disk/by-id* for more details, this directory and running udevd is required. This identifier is not recommended for generic use as the identifier is not strictly defined and it depends on udev, udev rules and hardware. + +The command *lsblk --fs* provides an overview of filesystems, LABELs and UUIDs on available block devices. The command *blkid -p <device>* provides details about a filesystem on the specified device. + +Don't forget that there is no guarantee that UUIDs and labels are really unique, especially if you move, share or copy the device. Use *lsblk -o +UUID,PARTUUID* to verify that the UUIDs are really unique in your system. + +The recommended setup is to use tags (e.g. *UUID*=_uuid_) rather than _/dev/disk/by-{label,uuid,id,partuuid,partlabel}_ udev symlinks in the _/etc/fstab_ file. Tags are more readable, robust and portable. The *mount*(8) command internally uses udev symlinks, so the use of symlinks in _/etc/fstab_ has no advantage over tags. For more details see *libblkid*(3). + +The _proc_ filesystem is not associated with a special device, and when mounting it, an arbitrary keyword - for example, __proc__ - can be used instead of a device specification. (The customary choice _none_ is less fortunate: the error message 'none already mounted' from *mount* can be confusing.) + +=== The files /etc/fstab, /etc/mtab and /proc/mounts + +The file _/etc/fstab_ (see *fstab*(5)), may contain lines describing what devices are usually mounted where, using which options. The default location of the *fstab*(5) file can be overridden with the *--fstab* _path_ command-line option (see below for more details). + +The command + +____ +*mount -a* [*-t* _type_] [*-O* _optlist_] +____ + +(usually given in a bootscript) causes all filesystems mentioned in _fstab_ (of the proper type and/or having or not having the proper options) to be mounted as indicated, except for those whose line contains the *noauto* keyword. Adding the *-F* option will make *mount* fork, so that the filesystems are mounted in parallel. + +When mounting a filesystem mentioned in _fstab_ or _mtab_, it suffices to specify on the command line only the device, or only the mount point. + +The programs *mount* and *umount*(8) traditionally maintained a list of currently mounted filesystems in the file _/etc/mtab_. The support for regular classic _/etc/mtab_ is completely disabled at compile time by default, because on current Linux systems it is better to make _/etc/mtab_ a symlink to _/proc/mounts_ instead. The regular _mtab_ file maintained in userspace cannot reliably work with namespaces, containers and other advanced Linux features. If the regular _mtab_ support is enabled, then it's possible to use the file as well as the symlink. + +If no arguments are given to *mount*, the list of mounted filesystems is printed. + +If you want to override mount options from _/etc/fstab_, you have to use the *-o* option: + +____ +*mount* __device__|__dir__ *-o* _options_ +____ + +and then the mount options from the command line will be appended to the list of options from _/etc/fstab_. This default behaviour can be changed using the *--options-mode* command-line option. The usual behavior is that the last option wins if there are conflicting ones. + +The *mount* program does not read the _/etc/fstab_ file if both _device_ (or LABEL, UUID, ID, PARTUUID or PARTLABEL) and _dir_ are specified. For example, to mount device *foo* at */dir*: + +____ +*mount /dev/foo /dir* +____ + +This default behaviour can be changed by using the *--options-source-force* command-line option to always read configuration from _fstab_. For non-root users *mount* always reads the _fstab_ configuration. + +=== Non-superuser mounts + +Normally, only the superuser can mount filesystems. However, when _fstab_ contains the *user* option on a line, anybody can mount the corresponding filesystem. + +Thus, given a line + +____ +*/dev/cdrom /cd iso9660 ro,user,noauto,unhide* +____ + +any user can mount the iso9660 filesystem found on an inserted CDROM using the command: + +____ +*mount /cd* +____ + +Note that *mount* is very strict about non-root users and all paths specified on command line are verified before _fstab_ is parsed or a helper program is executed. It's strongly recommended to use a valid mountpoint to specify filesystem, otherwise *mount* may fail. For example it's a bad idea to use NFS or CIFS source on command line. + +Since util-linux 2.35, *mount* does not exit when user permissions are inadequate according to libmount's internal security rules. Instead, it drops suid permissions and continues as regular non-root user. This behavior supports use-cases where root permissions are not necessary (e.g., fuse filesystems, user namespaces, etc). + +For more details, see *fstab*(5). Only the user that mounted a filesystem can unmount it again. If any user should be able to unmount it, then use *users* instead of *user* in the _fstab_ line. The *owner* option is similar to the *user* option, with the restriction that the user must be the owner of the special file. This may be useful e.g. for _/dev/fd_ if a login script makes the console user owner of this device. The *group* option is similar, with the restriction that the user must be a member of the group of the special file. + +=== Bind mount operation + +Remount part of the file hierarchy somewhere else. The call is: + +____ +*mount --bind* _olddir newdir_ +____ + +or by using this _fstab_ entry: + +____ +**/**__olddir__ **/**__newdir__ *none bind* +____ + +After this call the same contents are accessible in two places. + +It is important to understand that "bind" does not create any second-class or special node in the kernel VFS. The "bind" is just another operation to attach a filesystem. There is nowhere stored information that the filesystem has been attached by a "bind" operation. The _olddir_ and _newdir_ are independent and the _olddir_ may be unmounted. + +One can also remount a single file (on a single file). It's also possible to use a bind mount to create a mountpoint from a regular directory, for example: + +____ +*mount --bind foo foo* +____ + +The bind mount call attaches only (part of) a single filesystem, not possible submounts. The entire file hierarchy including submounts can be attached a second place by using: + +____ +*mount --rbind* _olddir newdir_ +____ + +Note that the filesystem mount options maintained by the kernel will remain the same as those on the original mount point. The userspace mount options (e.g., _netdev) will not be copied by *mount* and it's necessary to explicitly specify the options on the *mount* command line. + +Since util-linux 2.27 *mount* permits changing the mount options by passing the relevant options along with *--bind*. For example: + +____ +*mount -o bind,ro foo foo* +____ + +This feature is not supported by the Linux kernel; it is implemented in userspace by an additional *mount*(2) remounting system call. This solution is not atomic. + +The alternative (classic) way to create a read-only bind mount is to use the remount operation, for example: + +____ +*mount --bind* _olddir newdir_ + +*mount -o remount,bind,ro* _olddir newdir_ +____ + +Note that a read-only bind will create a read-only mountpoint (VFS entry), but the original filesystem superblock will still be writable, meaning that the _olddir_ will be writable, but the _newdir_ will be read-only. + +It's also possible to change nosuid, nodev, noexec, noatime, nodiratime, relatime and nosymfollow VFS entry flags via a "remount,bind" operation. The other flags (for example filesystem-specific flags) are silently ignored. It's impossible to change mount options recursively (for example with *-o rbind,ro*). + +Since util-linux 2.31, *mount* ignores the *bind* flag from _/etc/fstab_ on a *remount* operation (if *-o remount* is specified on command line). This is necessary to fully control mount options on remount by command line. In previous versions the bind flag has been always applied and it was impossible to re-define mount options without interaction with the bind semantic. This *mount* behavior does not affect situations when "remount,bind" is specified in the _/etc/fstab_ file. + +=== The move operation + +Move a *mounted tree* to another place (atomically). The call is: + +____ +*mount --move* _olddir newdir_ +____ + +This will cause the contents which previously appeared under _olddir_ to now be accessible under _newdir_. The physical location of the files is not changed. Note that _olddir_ has to be a mountpoint. + +Note also that moving a mount residing under a shared mount is invalid and unsupported. Use *findmnt -o TARGET,PROPAGATION* to see the current propagation flags. + +=== Shared subtree operations + +Since Linux 2.6.15 it is possible to mark a mount and its submounts as shared, private, slave or unbindable. A shared mount provides the ability to create mirrors of that mount such that mounts and unmounts within any of the mirrors propagate to the other mirror. A slave mount receives propagation from its master, but not vice versa. A private mount carries no propagation abilities. An unbindable mount is a private mount which cannot be cloned through a bind operation. The detailed semantics are documented in _Documentation/filesystems/sharedsubtree.txt_ file in the kernel source tree; see also *mount_namespaces*(7). + +Supported operations are: + +.... +mount --make-shared mountpoint +mount --make-slave mountpoint +mount --make-private mountpoint +mount --make-unbindable mountpoint +.... + +The following commands allow one to recursively change the type of all the mounts under a given mountpoint. + +.... +mount --make-rshared mountpoint +mount --make-rslave mountpoint +mount --make-rprivate mountpoint +mount --make-runbindable mountpoint +.... + +*mount* *does not read* *fstab*(5) when a *--make-** operation is requested. All necessary information has to be specified on the command line. + +Note that the Linux kernel does not allow changing multiple propagation flags with a single *mount*(2) system call, and the flags cannot be mixed with other mount options and operations. + +Since util-linux 2.23 the *mount* command can be used to do more propagation (topology) changes by one *mount*(8) call and do it also together with other mount operations. The propagation flags are applied by additional *mount*(2) system calls when the preceding mount operations were successful. Note that this use case is not atomic. It is possible to specify the propagation flags in *fstab*(5) as mount options (*private*, *slave*, *shared*, *unbindable*, *rprivate*, *rslave*, *rshared*, *runbindable*). + +For example: + +.... +mount --make-private --make-unbindable /dev/sda1 /foo +.... + +is the same as: + +.... +mount /dev/sda1 /foo +mount --make-private /foo +mount --make-unbindable /foo +.... + +== COMMAND-LINE OPTIONS + +The full set of mount options used by an invocation of *mount* is determined by first extracting the mount options for the filesystem from the _fstab_ table, then applying any options specified by the *-o* argument, and finally applying a *-r* or *-w* option, when present. + +The *mount* command does not pass all command-line options to the **/sbin/mount.**__suffix__ mount helpers. The interface between *mount* and the mount helpers is described below in the *EXTERNAL HELPERS* section. + +Command-line options available for the *mount* command are: + +*-a*, *--all*:: +Mount all filesystems (of the given types) mentioned in _fstab_ (except for those whose line contains the *noauto* keyword). The filesystems are mounted following their order in _fstab_. The *mount* command compares filesystem source, target (and fs root for bind mount or btrfs) to detect already mounted filesystems. The kernel table with already mounted filesystems is cached during *mount --all*. This means that all duplicated _fstab_ entries will be mounted. ++ +The correct functionality depends on _/proc_ (to detect already mounted filesystems) and on _/sys_ (to evaluate filesystem tags like UUID= or LABEL=). It's strongly recommended to mount _/proc_ and _/sys_ filesystems before *mount -a* is executed, or keep /proc and /sys at the beginning of _fstab_. ++ +The option *--all* is possible to use for remount operation too. In this case all filters (*-t* and *-O*) are applied to the table of already mounted filesystems. ++ +Since version 2.35 it is possible to use the command line option *-o* to alter mount options from _fstab_ (see also *--options-mode*). ++ +Note that it is a bad practice to use *mount -a* for _fstab_ checking. The recommended solution is *findmnt --verify*. + +*-B*, *--bind*:: +Remount a subtree somewhere else (so that its contents are available in both places). See above, under *Bind mounts*. + +*-c*, *--no-canonicalize*:: +Don't canonicalize paths. The *mount* command canonicalizes all paths (from the command line or _fstab_) by default. This option can be used together with the *-f* flag for already canonicalized absolute paths. The option is designed for mount helpers which call *mount -i*. It is strongly recommended to not use this command-line option for normal mount operations. ++ +Note that *mount* does not pass this option to the **/sbin/mount.**__type__ helpers. + +*-F*, *--fork*:: +(Used in conjunction with *-a*.) Fork off a new incarnation of *mount* for each device. This will do the mounts on different devices or different NFS servers in parallel. This has the advantage that it is faster; also NFS timeouts proceed in parallel. A disadvantage is that the order of the mount operations is undefined. Thus, you cannot use this option if you want to mount both _/usr_ and _/usr/spool_. + +*-f, --fake*:: +Causes everything to be done except for the actual system call; if it's not obvious, this "fakes" mounting the filesystem. This option is useful in conjunction with the *-v* flag to determine what the *mount* command is trying to do. It can also be used to add entries for devices that were mounted earlier with the *-n* option. The *-f* option checks for an existing record in _/etc/mtab_ and fails when the record already exists (with a regular non-fake mount, this check is done by the kernel). + +*-i, --internal-only*:: +Don't call the **/sbin/mount.**__filesystem__ helper even if it exists. + +*-L*, *--label* _label_:: +Mount the partition that has the specified _label_. + +*-l*, *--show-labels*:: +Add the labels in the mount output. *mount* must have permission to read the disk device (e.g. be set-user-ID root) for this to work. One can set such a label for ext2, ext3 or ext4 using the *e2label*(8) utility, or for XFS using *xfs_admin*(8), or for reiserfs using *reiserfstune*(8). + +*-M*, *--move*:: +Move a subtree to some other place. See above, the subsection *The move operation*. + +*-m*, **--mkdir**[=__mode__]:: +Allow to make a target directory (mountpoint) if it does not exist yet. Alias to "-o X-mount.mkdir[=mode]", the default mode is 0755. For more details see *X-mount.mkdir* below. + +*-n*, *--no-mtab*:: +Mount without writing in _/etc/mtab_. This is necessary for example when _/etc_ is on a read-only filesystem. + +*-N*, *--namespace* _ns_:: +Perform the mount operation in the mount namespace specified by _ns_. _ns_ is either PID of process running in that namespace or special file representing that namespace. ++ +*mount* switches to the mount namespace when it reads _/etc/fstab_, writes _/etc/mtab: (or writes to _/run/mount_) and calls *mount*(2), otherwise it runs in the original mount namespace. This means that the target namespace does not have to contain any libraries or other requirements necessary to execute the *mount*(2) call. ++ +See *mount_namespaces*(7) for more information. + +*-O*, *--test-opts* _opts_:: +Limit the set of filesystems to which the *-a* option applies. In this regard it is like the *-t* option except that *-O* is useless without *-a*. For example, the command ++ +*mount -a -O no_netdev* ++ +mounts all filesystems except those which have the option _netdev_ specified in the options field in the _/etc/fstab_ file. ++ +It is different from *-t* in that each option is matched exactly; a leading *no* at the beginning of one option does not negate the rest. ++ +The *-t* and *-O* options are cumulative in effect; that is, the command ++ +*mount -a -t ext2 -O _netdev* ++ +mounts all ext2 filesystems with the _netdev option, not all filesystems that are either ext2 or have the _netdev option specified. + +*-o*, *--options* _opts_:: +Use the specified mount options. The _opts_ argument is a comma-separated list. For example: ++ +*mount LABEL=mydisk -o noatime,nodev,nosuid* ++ +For more details, see the *FILESYSTEM-INDEPENDENT MOUNT OPTIONS* and *FILESYSTEM-SPECIFIC MOUNT OPTIONS* sections. + +*--options-mode* _mode_:: +Controls how to combine options from _fstab_/_mtab_ with options from the command line. _mode_ can be one of *ignore*, *append*, *prepend* or *replace*. For example, *append* means that options from _fstab_ are appended to options from the command line. The default value is *prepend* -- it means command line options are evaluated after _fstab_ options. Note that the last option wins if there are conflicting ones. + +*--options-source* _source_:: +Source of default options. _source_ is a comma-separated list of *fstab*, *mtab* and *disable*. *disable* disables *fstab* and *mtab* and enables *--options-source-force*. The default value is *fstab,mtab*. + +*--options-source-force*:: +Use options from _fstab_/_mtab_ even if both _device_ and _dir_ are specified. + +*-R*, *--rbind*:: +Remount a subtree and all possible submounts somewhere else (so that its contents are available in both places). See above, the subsection *Bind mounts*. + +*-r*, *--read-only*:: +Mount the filesystem read-only. A synonym is *-o ro*. ++ +Note that, depending on the filesystem type, state and kernel behavior, the system may still write to the device. For example, ext3 and ext4 will replay the journal if the filesystem is dirty. To prevent this kind of write access, you may want to mount an ext3 or ext4 filesystem with the *ro,noload* mount options or set the block device itself to read-only mode, see the *blockdev*(8) command. + +*-s*:: +Tolerate sloppy mount options rather than failing. This will ignore mount options not supported by a filesystem type. Not all filesystems support this option. Currently it's supported by the *mount.nfs* mount helper only. + +*--source* _device_:: +If only one argument for the *mount* command is given, then the argument might be interpreted as the target (mountpoint) or source (device). This option allows you to explicitly define that the argument is the mount source. + +*--target* _directory_:: +If only one argument for the mount command is given, then the argument might be interpreted as the target (mountpoint) or source (device). This option allows you to explicitly define that the argument is the mount target. + +*--target-prefix* _directory_:: +Prepend the specified directory to all mount targets. This option can be used to follow _fstab_, but mount operations are done in another place, for example: ++ +*mount --all --target-prefix /chroot -o X-mount.mkdir* ++ +mounts all from system _fstab_ to _/chroot_, all missing mountpoint are created (due to X-mount.mkdir). See also *--fstab* to use an alternative _fstab_. + +*-T*, *--fstab* _path_:: +Specifies an alternative _fstab_ file. If _path_ is a directory, then the files in the directory are sorted by *strverscmp*(3); files that start with "." or without an _.fstab_ extension are ignored. The option can be specified more than once. This option is mostly designed for initramfs or chroot scripts where additional configuration is specified beyond standard system configuration. ++ +Note that *mount* does not pass the option *--fstab* to the **/sbin/mount.**__type__ helpers, meaning that the alternative _fstab_ files will be invisible for the helpers. This is no problem for normal mounts, but user (non-root) mounts always require _fstab_ to verify the user's rights. + +*-t*, *--types* _fstype_:: +The argument following the *-t* is used to indicate the filesystem type. The filesystem types which are currently supported depend on the running kernel. See _/proc/filesystems_ and _/lib/modules/$(uname -r)/kernel/fs_ for a complete list of the filesystems. The most common are ext2, ext3, ext4, xfs, btrfs, vfat, sysfs, proc, nfs and cifs. ++ +The programs *mount* and *umount*(8) support filesystem subtypes. The subtype is defined by a '.subtype' suffix. For example 'fuse.sshfs'. It's recommended to use subtype notation rather than add any prefix to the mount source (for example 'sshfs#example.com' is deprecated). ++ +If no *-t* option is given, or if the *auto* type is specified, *mount* will try to guess the desired type. *mount* uses the *libblkid*(3) library for guessing the filesystem type; if that does not turn up anything that looks familiar, *mount* will try to read the file _/etc/filesystems_, or, if that does not exist, _/proc/filesystems_. All of the filesystem types listed there will be tried, except for those that are labeled "nodev" (e.g. _devpts_, _proc_ and _nfs_). If _/etc/filesystems_ ends in a line with a single {asterisk}, mount will read _/proc/filesystems_ afterwards. While trying, all filesystem types will be mounted with the mount option *silent*. +//TRANSLATORS: Keep {asterisk} untranslated. ++ +The *auto* type may be useful for user-mounted floppies. Creating a file _/etc/filesystems_ can be useful to change the probe order (e.g., to try vfat before msdos or ext3 before ext2) or if you use a kernel module autoloader. ++ +More than one type may be specified in a comma-separated list, for the *-t* option as well as in an _/etc/fstab_ entry. The list of filesystem types for the *-t* option can be prefixed with *no* to specify the filesystem types on which no action should be taken. The prefix *no* has no effect when specified in an _/etc/fstab_ entry. ++ +The prefix *no* can be meaningful with the *-a* option. For example, the command ++ +*mount -a -t nomsdos,smbfs* ++ +mounts all filesystems except those of type _msdos_ and _smbfs_. ++ +For most types all the *mount* program has to do is issue a simple *mount*(2) system call, and no detailed knowledge of the filesystem type is required. For a few types however (like nfs, nfs4, cifs, smbfs, ncpfs) an ad hoc code is necessary. The nfs, nfs4, cifs, smbfs, and ncpfs filesystems have a separate mount program. In order to make it possible to treat all types in a uniform way, *mount* will execute the program **/sbin/mount.**__type__ (if that exists) when called with type _type_. Since different versions of the *smbmount* program have different calling conventions, */sbin/mount.smbfs* may have to be a shell script that sets up the desired call. + +*-U*, *--uuid* _uuid_:: +Mount the partition that has the specified _uuid_. + +*-v*, *--verbose*:: +Verbose mode. + +*-w*, *--rw*, *--read-write*:: +Mount the filesystem read/write. Read-write is the kernel default and the *mount* default is to try read-only if the previous *mount*(2) syscall with read-write flags on write-protected devices failed. ++ +A synonym is *-o rw*. ++ +Note that specifying *-w* on the command line forces *mount* to never try read-only mount on write-protected devices or already mounted read-only filesystems. + +include::man-common/help-version.adoc[] + +== FILESYSTEM-INDEPENDENT MOUNT OPTIONS + +Some of these options are only useful when they appear in the _/etc/fstab_ file. + +Some of these options could be enabled or disabled by default in the system kernel. To check the current setting see the options in _/proc/mounts_. Note that filesystems also have per-filesystem specific default mount options (see for example *tune2fs -l* output for ext__N__ filesystems). + +The following options apply to any filesystem that is being mounted (but not every filesystem actually honors them - e.g., the *sync* option today has an effect only for ext2, ext3, ext4, fat, vfat, ufs and xfs): + +*async*:: +All I/O to the filesystem should be done asynchronously. (See also the *sync* option.) + +*atime*:: +Do not use the *noatime* feature, so the inode access time is controlled by kernel defaults. See also the descriptions of the *relatime* and *strictatime* mount options. + +*noatime*:: +Do not update inode access times on this filesystem (e.g. for faster access on the news spool to speed up news servers). This works for all inode types (directories too), so it implies *nodiratime*. + +*auto*:: +Can be mounted with the *-a* option. + +*noauto*:: +Can only be mounted explicitly (i.e., the *-a* option will not cause the filesystem to be mounted). + +**context=**__context__, **fscontext=**__context__, **defcontext=**__context__, and **rootcontext=**__context__:: +The *context=* option is useful when mounting filesystems that do not support extended attributes, such as a floppy or hard disk formatted with VFAT, or systems that are not normally running under SELinux, such as an ext3 or ext4 formatted disk from a non-SELinux workstation. You can also use *context=* on filesystems you do not trust, such as a floppy. It also helps in compatibility with xattr-supporting filesystems on earlier 2.4.<x> kernel versions. Even where xattrs are supported, you can save time not having to label every file by assigning the entire disk one security context. ++ +A commonly used option for removable media is *context="system_u:object_r:removable_t*. ++ +The *fscontext=* option works for all filesystems, regardless of their xattr support. The fscontext option sets the overarching filesystem label to a specific security context. This filesystem label is separate from the individual labels on the files. It represents the entire filesystem for certain kinds of permission checks, such as during mount or file creation. Individual file labels are still obtained from the xattrs on the files themselves. The context option actually sets the aggregate context that fscontext provides, in addition to supplying the same label for individual files. ++ +You can set the default security context for unlabeled files using *defcontext=* option. This overrides the value set for unlabeled files in the policy and requires a filesystem that supports xattr labeling. ++ +The *rootcontext=* option allows you to explicitly label the root inode of a FS being mounted before that FS or inode becomes visible to userspace. This was found to be useful for things like stateless Linux. ++ +Note that the kernel rejects any remount request that includes the context option, *even* when unchanged from the current context. ++ +*Warning: the* _context_ *value might contain commas*, in which case the value has to be properly quoted, otherwise *mount* will interpret the comma as a separator between mount options. Don't forget that the shell strips off quotes and thus *double quoting is required*. For example: +____ +mount -t tmpfs none /mnt -o \ +'context="system_u:object_r:tmp_t:s0:c127,c456",noexec' +____ + +For more details, see *selinux*(8). + +*defaults*:: +Use the default options: *rw*, *suid*, *dev*, *exec*, *auto*, *nouser*, and *async*. ++ +Note that the real set of all default mount options depends on the kernel and filesystem type. See the beginning of this section for more details. + +*dev*:: +Interpret character or block special devices on the filesystem. + +*nodev*:: +Do not interpret character or block special devices on the filesystem. + +*diratime*:: +Update directory inode access times on this filesystem. This is the default. (This option is ignored when *noatime* is set.) + +*nodiratime*:: +Do not update directory inode access times on this filesystem. (This option is implied when *noatime* is set.) + +*dirsync*:: +All directory updates within the filesystem should be done synchronously. This affects the following system calls: *creat*(2), *link*(2), *unlink*(2), *symlink*(2), *mkdir*(2), *rmdir*(2), *mknod*(2) and *rename*(2). + +*exec*:: +Permit execution of binaries and other executable files. + +*noexec*:: +Do not permit direct execution of any binaries on the mounted filesystem. + +*group*:: +Allow an ordinary user to mount the filesystem if one of that user's groups matches the group of the device. This option implies the options *nosuid* and *nodev* (unless overridden by subsequent options, as in the option line *group,dev,suid*). + +*iversion*:: +Every time the inode is modified, the i_version field will be incremented. + +*noiversion*:: +Do not increment the i_version inode field. + +*mand*:: +Allow mandatory locks on this filesystem. See *fcntl*(2). This option was deprecated in Linux 5.15. + +*nomand*:: +Do not allow mandatory locks on this filesystem. + +*_netdev*:: +The filesystem resides on a device that requires network access (used to prevent the system from attempting to mount these filesystems until the network has been enabled on the system). + +*nofail*:: +Do not report errors for this device if it does not exist. + +*relatime*:: +Update inode access times relative to modify or change time. Access time is only updated if the previous access time was earlier than the current modify or change time. (Similar to *noatime*, but it doesn't break *mutt*(1) or other applications that need to know if a file has been read since the last time it was modified.) ++ +Since Linux 2.6.30, the kernel defaults to the behavior provided by this option (unless *noatime* was specified), and the *strictatime* option is required to obtain traditional semantics. In addition, since Linux 2.6.30, the file's last access time is always updated if it is more than 1 day old. + +*norelatime*:: +Do not use the *relatime* feature. See also the *strictatime* mount option. + +*strictatime*:: +Allows to explicitly request full atime updates. This makes it possible for the kernel to default to *relatime* or *noatime* but still allow userspace to override it. For more details about the default system mount options see _/proc/mounts_. + +*nostrictatime*:: +Use the kernel's default behavior for inode access time updates. + +*lazytime*:: +Only update times (atime, mtime, ctime) on the in-memory version of the file inode. ++ +This mount option significantly reduces writes to the inode table for workloads that perform frequent random writes to preallocated files. ++ +The on-disk timestamps are updated only when: ++ +* the inode needs to be updated for some change unrelated to file timestamps +* the application employs *fsync*(2), *syncfs*(2), or *sync*(2) +* an undeleted inode is evicted from memory +* more than 24 hours have passed since the inode was written to disk. + +*nolazytime*:: +Do not use the lazytime feature. + +*suid*:: +Honor set-user-ID and set-group-ID bits or file capabilities when executing programs from this filesystem. + +*nosuid*:: +Do not honor set-user-ID and set-group-ID bits or file capabilities when executing programs from this filesystem. In addition, SELinux domain transitions require permission _nosuid_transition_, which in turn needs also policy capability _nnp_nosuid_transition_. + +*silent*:: +Turn on the silent flag. + +*loud*:: +Turn off the silent flag. + +*owner*:: +Allow an ordinary user to mount the filesystem if that user is the owner of the device. This option implies the options *nosuid* and *nodev* (unless overridden by subsequent options, as in the option line *owner,dev,suid*). + +*remount*:: +Attempt to remount an already-mounted filesystem. This is commonly used to change the mount flags for a filesystem, especially to make a readonly filesystem writable. It does not change device or mount point. ++ +The remount operation together with the *bind* flag has special semantics. See above, the subsection *Bind mounts*. ++ +The remount functionality follows the standard way the *mount* command works with options from _fstab_. This means that *mount* does not read _fstab_ (or _mtab_) only when both _device_ and _dir_ are specified. ++ +*mount -o remount,rw /dev/foo /dir* ++ +After this call all old mount options are replaced and arbitrary stuff from _fstab_ (or _mtab_) is ignored, except the *loop=* option which is internally generated and maintained by the *mount* command. ++ +*mount -o remount,rw /dir* ++ +After this call, *mount* reads _fstab_ and merges these options with the options from the command line (*-o*). If no mountpoint is found in _fstab_, then a remount with unspecified source is allowed. ++ +*mount* allows the use of *--all* to remount all already mounted filesystems which match a specified filter (*-O* and *-t*). For example: ++ +*mount --all -o remount,ro -t vfat* ++ +remounts all already mounted vfat filesystems in read-only mode. Each of the filesystems is remounted by *mount -o remount,ro* _/dir_ semantic. This means the *mount* command reads _fstab_ or _mtab_ and merges these options with the options from the command line. + +*ro*:: +Mount the filesystem read-only. + +*rw*:: +Mount the filesystem read-write. + +*sync*:: +All I/O to the filesystem should be done synchronously. In the case of media with a limited number of write cycles (e.g. some flash drives), *sync* may cause life-cycle shortening. + +*user*:: +Allow an ordinary user to mount the filesystem. The name of the mounting user is written to the _mtab_ file (or to the private libmount file in _/run/mount_ on systems without a regular _mtab_) so that this same user can unmount the filesystem again. This option implies the options *noexec*, *nosuid*, and *nodev* (unless overridden by subsequent options, as in the option line *user,exec,dev,suid*). + +*nouser*:: +Forbid an ordinary user to mount the filesystem. This is the default; it does not imply any other options. + +*users*:: +Allow any user to mount and to unmount the filesystem, even when some other ordinary user mounted it. This option implies the options *noexec*, *nosuid*, and *nodev* (unless overridden by subsequent options, as in the option line *users,exec,dev,suid*). + +*X-**:: +All options prefixed with "X-" are interpreted as comments or as userspace application-specific options. These options are not stored in user space (e.g., _mtab_ file), nor sent to the mount._type_ helpers nor to the *mount*(2) system call. The suggested format is **X-**__appname__._option_. + +*x-**:: +The same as *X-** options, but stored permanently in user space. This means the options are also available for *umount*(8) or other operations. Note that maintaining mount options in user space is tricky, because it's necessary use libmount-based tools and there is no guarantee that the options will be always available (for example after a move mount operation or in unshared namespace). ++ +Note that before util-linux v2.30 the x-* options have not been maintained by libmount and stored in user space (functionality was the same as for X-* now), but due to the growing number of use-cases (in initrd, systemd etc.) the functionality has been extended to keep existing _fstab_ configurations usable without a change. + +*X-mount.mkdir*[=_mode_]:: +Allow to make a target directory (mountpoint) if it does not exist yet. The optional argument _mode_ specifies the filesystem access mode used for *mkdir*(2) in octal notation. The default mode is 0755. This functionality is supported only for root users or when *mount* is executed without suid permissions. The option is also supported as *x-mount.mkdir*, but this notation is deprecated since v2.30. See also *--mkdir* command line option. + +**X-mount.subdir=**__directory__:: +Allow mounting sub-directory from a filesystem instead of the root directory. For now, this feature is implemented by temporary filesystem root directory mount in unshared namespace and then bind the sub-directory to the final mount point and umount the root of the filesystem. The sub-directory mount shows up atomically for the rest of the system although it is implemented by multiple *mount*(2) syscalls. This feature is EXPERIMENTAL. + +*nosymfollow*:: +Do not follow symlinks when resolving paths. Symlinks can still be created, and *readlink*(1), *readlink*(2), *realpath*(1), and *realpath*(3) all still work properly. + +== FILESYSTEM-SPECIFIC MOUNT OPTIONS + +This section lists options that are specific to particular filesystems. Where possible, you should first consult filesystem-specific manual pages for details. Some of those pages are listed in the following table. + +[cols=",",options="header",] +|=== +|*Filesystem(s)* |*Manual page* +|btrfs |*btrfs*(5) +|cifs |*mount.cifs*(8) +|ext2, ext3, ext4 |*ext4*(5) +|fuse |*fuse*(8) +|nfs |*nfs*(5) +|tmpfs |*tmpfs*(5) +|xfs |*xfs*(5) +|=== + +Note that some of the pages listed above might be available only after you install the respective userland tools. + +The following options apply only to certain filesystems. We sort them by filesystem. All options follow the *-o* flag. + +What options are supported depends a bit on the running kernel. Further information may be available in filesystem-specific files in the kernel source subdirectory _Documentation/filesystems_. + +=== Mount options for adfs + +**uid=**__value__ and **gid=**__value__:: +Set the owner and group of the files in the filesystem (default: uid=gid=0). + +**ownmask=**__value__ and **othmask=**__value__:: +Set the permission mask for ADFS 'owner' permissions and 'other' permissions, respectively (default: 0700 and 0077, respectively). See also _/usr/src/linux/Documentation/filesystems/adfs.rst_. + +=== Mount options for affs + +**uid=**__value__ and **gid=**__value__:: +Set the owner and group of the root of the filesystem (default: uid=gid=0, but with option *uid* or *gid* without specified value, the UID and GID of the current process are taken). + +**setuid=**__value__ and **setgid=**__value__:: +Set the owner and group of all files. + +**mode=**__value__:: +Set the mode of all files to _value_ & 0777 disregarding the original permissions. Add search permission to directories that have read permission. The value is given in octal. + +*protect*:: +Do not allow any changes to the protection bits on the filesystem. + +*usemp*:: +Set UID and GID of the root of the filesystem to the UID and GID of the mount point upon the first sync or umount, and then clear this option. Strange... + +*verbose*:: +Print an informational message for each successful mount. + +**prefix=**__string__:: +Prefix used before volume name, when following a link. + +**volume=**__string__:: +Prefix (of length at most 30) used before '/' when following a symbolic link. + +**reserved=**__value__:: +(Default: 2.) Number of unused blocks at the start of the device. + +**root=**__value__:: +Give explicitly the location of the root block. + +**bs=**__value__:: +Give blocksize. Allowed values are 512, 1024, 2048, 4096. + +**grpquota**|**noquota**|**quota**|*usrquota*:: +These options are accepted but ignored. (However, quota utilities may react to such strings in _/etc/fstab_.) + +=== Mount options for debugfs + +The debugfs filesystem is a pseudo filesystem, traditionally mounted on _/sys/kernel/debug_. As of kernel version 3.4, debugfs has the following options: + +**uid=**__n__**, gid=**__n__:: +Set the owner and group of the mountpoint. + +**mode=**__value__:: +Sets the mode of the mountpoint. + +=== Mount options for devpts + +The devpts filesystem is a pseudo filesystem, traditionally mounted on _/dev/pts_. In order to acquire a pseudo terminal, a process opens _/dev/ptmx_; the number of the pseudo terminal is then made available to the process and the pseudo terminal slave can be accessed as _/dev/pts/_<number>. + +**uid=**__value__ and **gid=**__value__:: +This sets the owner or the group of newly created pseudo terminals to the specified values. When nothing is specified, they will be set to the UID and GID of the creating process. For example, if there is a tty group with GID 5, then *gid=5* will cause newly created pseudo terminals to belong to the tty group. + +**mode=**__value__:: +Set the mode of newly created pseudo terminals to the specified value. The default is 0600. A value of *mode=620* and *gid=5* makes "mesg y" the default on newly created pseudo terminals. + +*newinstance*:: +Create a private instance of the devpts filesystem, such that indices of pseudo terminals allocated in this new instance are independent of indices created in other instances of devpts. ++ +All mounts of devpts without this *newinstance* option share the same set of pseudo terminal indices (i.e., legacy mode). Each mount of devpts with the *newinstance* option has a private set of pseudo terminal indices. ++ +This option is mainly used to support containers in the Linux kernel. It is implemented in Linux kernel versions starting with 2.6.29. Further, this mount option is valid only if *CONFIG_DEVPTS_MULTIPLE_INSTANCES* is enabled in the kernel configuration. ++ +To use this option effectively, _/dev/ptmx_ must be a symbolic link to _pts/ptmx_. See _Documentation/filesystems/devpts.txt_ in the Linux kernel source tree for details. + +**ptmxmode=**__value__:: +Set the mode for the new _ptmx_ device node in the devpts filesystem. ++ +With the support for multiple instances of devpts (see *newinstance* option above), each instance has a private _ptmx_ node in the root of the devpts filesystem (typically _/dev/pts/ptmx_). ++ +For compatibility with older versions of the kernel, the default mode of the new _ptmx_ node is 0000. **ptmxmode=**__value__ specifies a more useful mode for the _ptmx_ node and is highly recommended when the *newinstance* option is specified. ++ +This option is only implemented in Linux kernel versions starting with 2.6.29. Further, this option is valid only if *CONFIG_DEVPTS_MULTIPLE_INSTANCES* is enabled in the kernel configuration. + +=== Mount options for fat + +(Note: _fat_ is not a separate filesystem, but a common part of the _msdos_, _umsdos_ and _vfat_ filesystems.) + +*blocksize=*{**512**|**1024**|*2048*}:: +Set blocksize (default 512). This option is obsolete. + +**uid=**__value__ and **gid=**__value__:: +Set the owner and group of all files. (Default: the UID and GID of the current process.) + +**umask=**__value__:: +Set the umask (the bitmask of the permissions that are *not* present). The default is the umask of the current process. The value is given in octal. + +**dmask=**__value__:: +Set the umask applied to directories only. The default is the umask of the current process. The value is given in octal. + +**fmask=**__value__:: +Set the umask applied to regular files only. The default is the umask of the current process. The value is given in octal. + +**allow_utime=**__value__:: +This option controls the permission check of mtime/atime. + +*20*;; +If current process is in group of file's group ID, you can change timestamp. + +*2*;; +Other users can change timestamp. + +The default is set from 'dmask' option. (If the directory is writable, *utime*(2) is also allowed. I.e. ~dmask & 022) + +Normally *utime*(2) checks that the current process is owner of the file, or that it has the *CAP_FOWNER* capability. But FAT filesystems don't have UID/GID on disk, so the normal check is too inflexible. With this option you can relax it. + +**check=**__value__:: +Three different levels of pickiness can be chosen: + +*r*[*elaxed*];; +Upper and lower case are accepted and equivalent, long name parts are truncated (e.g. _verylongname.foobar_ becomes _verylong.foo_), leading and embedded spaces are accepted in each name part (name and extension). + +*n*[*ormal*];; +Like "relaxed", but many special characters (*, ?, <, spaces, etc.) are rejected. This is the default. + +*s*[*trict*];; +Like "normal", but names that contain long parts or special characters that are sometimes used on Linux but are not accepted by MS-DOS (+, =, etc.) are rejected. + +**codepage=**__value__:: +Sets the codepage for converting to shortname characters on FAT and VFAT filesystems. By default, codepage 437 is used. + +**conv=**__mode__:: +This option is obsolete and may fail or be ignored. + +**cvf_format=**__module__:: +Forces the driver to use the CVF (Compressed Volume File) module cvf___module__ instead of auto-detection. If the kernel supports *kmod*, the **cvf_format=**__xxx__ option also controls on-demand CVF module loading. This option is obsolete. + +**cvf_option=**__option__:: +Option passed to the CVF module. This option is obsolete. + +*debug*:: +Turn on the _debug_ flag. A version string and a list of filesystem parameters will be printed (these data are also printed if the parameters appear to be inconsistent). + +*discard*:: +If set, causes discard/TRIM commands to be issued to the block device when blocks are freed. This is useful for SSD devices and sparse/thinly-provisioned LUNs. + +*dos1xfloppy*:: +If set, use a fallback default BIOS Parameter Block configuration, determined by backing device size. These static parameters match defaults assumed by DOS 1.x for 160 kiB, 180 kiB, 320 kiB, and 360 kiB floppies and floppy images. + +*errors=*{**panic**|**continue**|*remount-ro*}:: +Specify FAT behavior on critical errors: panic, continue without doing anything, or remount the partition in read-only mode (default behavior). + +*fat=*{**12**|**16**|*32*}:: +Specify a 12, 16 or 32 bit fat. This overrides the automatic FAT type detection routine. Use with caution! + +**iocharset=**__value__:: +Character set to use for converting between 8 bit characters and 16 bit Unicode characters. The default is iso8859-1. Long filenames are stored on disk in Unicode format. + +*nfs=*{**stale_rw**|*nostale_ro*}:: +Enable this only if you want to export the FAT filesystem over NFS. ++ +*stale_rw*: This option maintains an index (cache) of directory inodes which is used by the nfs-related code to improve look-ups. Full file operations (read/write) over NFS are supported but with cache eviction at NFS server, this could result in spurious *ESTALE* errors. ++ +*nostale_ro*: This option bases the inode number and file handle on the on-disk location of a file in the FAT directory entry. This ensures that *ESTALE* will not be returned after a file is evicted from the inode cache. However, it means that operations such as rename, create and unlink could cause file handles that previously pointed at one file to point at a different file, potentially causing data corruption. For this reason, this option also mounts the filesystem readonly. ++ +To maintain backward compatibility, *-o nfs* is also accepted, defaulting to *stale_rw*. + +*tz=UTC*:: +This option disables the conversion of timestamps between local time (as used by Windows on FAT) and UTC (which Linux uses internally). This is particularly useful when mounting devices (like digital cameras) that are set to UTC in order to avoid the pitfalls of local time. + +**time_offset=**__minutes__:: +Set offset for conversion of timestamps from local time used by FAT to UTC. I.e., _minutes_ will be subtracted from each timestamp to convert it to UTC used internally by Linux. This is useful when the time zone set in the kernel via *settimeofday*(2) is not the time zone used by the filesystem. Note that this option still does not provide correct time stamps in all cases in presence of DST - time stamps in a different DST setting will be off by one hour. + +*quiet*:: +Turn on the _quiet_ flag. Attempts to chown or chmod files do not return errors, although they fail. Use with caution! + +*rodir*:: +FAT has the *ATTR_RO* (read-only) attribute. On Windows, the *ATTR_RO* of the directory will just be ignored, and is used only by applications as a flag (e.g. it's set for the customized folder). ++ +If you want to use *ATTR_RO* as read-only flag even for the directory, set this option. + +*showexec*:: +If set, the execute permission bits of the file will be allowed only if the extension part of the name is .EXE, .COM, or .BAT. Not set by default. + +*sys_immutable*:: +If set, *ATTR_SYS* attribute on FAT is handled as *IMMUTABLE* flag on Linux. Not set by default. + +*flush*:: +If set, the filesystem will try to flush to disk more early than normal. Not set by default. + +*usefree*:: +Use the "free clusters" value stored on *FSINFO*. It'll be used to determine number of free clusters without scanning disk. But it's not used by default, because recent Windows don't update it correctly in some case. If you are sure the "free clusters" on *FSINFO* is correct, by this option you can avoid scanning disk. + +*dots*, *nodots*, *dotsOK=*[**yes**|*no*]:: +Various misguided attempts to force Unix or DOS conventions onto a FAT filesystem. + +=== Mount options for hfs + +**creator=**__cccc__**, type=**__cccc__:: +Set the creator/type values as shown by the MacOS finder used for creating new files. Default values: '????'. + +**uid=**__n__**, gid=**__n__:: +Set the owner and group of all files. (Default: the UID and GID of the current process.) + +**dir_umask=**__n__**, file_umask=**__n__**, umask=**__n__:: +Set the umask used for all directories, all regular files, or all files and directories. Defaults to the umask of the current process. + +**session=**__n__:: +Select the CDROM session to mount. Defaults to leaving that decision to the CDROM driver. This option will fail with anything but a CDROM as underlying device. + +**part=**__n__:: +Select partition number n from the device. Only makes sense for CDROMs. Defaults to not parsing the partition table at all. + +*quiet*:: +Don't complain about invalid mount options. + +=== Mount options for hpfs + +**uid=**__value__ and **gid=**__value__:: +Set the owner and group of all files. (Default: the UID and GID of the current process.) + +**umask=**__value__:: +Set the umask (the bitmask of the permissions that are *not* present). The default is the umask of the current process. The value is given in octal. + +*case=*{**lower**|*asis*}:: +Convert all files names to lower case, or leave them. (Default: *case=lower*.) + +**conv=**__mode__:: +This option is obsolete and may fail or being ignored. + +*nocheck*:: +Do not abort mounting when certain consistency checks fail. + +=== Mount options for iso9660 + +ISO 9660 is a standard describing a filesystem structure to be used on CD-ROMs. (This filesystem type is also seen on some DVDs. See also the _udf_ filesystem.) + +Normal _iso9660_ filenames appear in an 8.3 format (i.e., DOS-like restrictions on filename length), and in addition all characters are in upper case. Also there is no field for file ownership, protection, number of links, provision for block/character devices, etc. + +Rock Ridge is an extension to iso9660 that provides all of these UNIX-like features. Basically there are extensions to each directory record that supply all of the additional information, and when Rock Ridge is in use, the filesystem is indistinguishable from a normal UNIX filesystem (except that it is read-only, of course). + +*norock*:: +Disable the use of Rock Ridge extensions, even if available. Cf. *map*. + +*nojoliet*:: +Disable the use of Microsoft Joliet extensions, even if available. Cf. *map*. + +*check=*{*r*[*elaxed*]|*s*[*trict*]}:: +With *check=relaxed*, a filename is first converted to lower case before doing the lookup. This is probably only meaningful together with *norock* and *map=normal*. (Default: *check=strict*.) + +**uid=**__value__ and **gid=**__value__:: +Give all files in the filesystem the indicated user or group id, possibly overriding the information found in the Rock Ridge extensions. (Default: *uid=0,gid=0*.) + +*map=*{*n*[*ormal*]|*o*[*ff*]|*a*[*corn*]}:: +For non-Rock Ridge volumes, normal name translation maps upper to lower case ASCII, drops a trailing ';1', and converts ';' to '.'. With *map=off* no name translation is done. See *norock*. (Default: *map=normal*.) *map=acorn* is like *map=normal* but also apply Acorn extensions if present. + +**mode=**__value__:: +For non-Rock Ridge volumes, give all files the indicated mode. (Default: read and execute permission for everybody.) Octal mode values require a leading 0. + +*unhide*:: +Also show hidden and associated files. (If the ordinary files and the associated or hidden files have the same filenames, this may make the ordinary files inaccessible.) + +*block=*{**512**|**1024**|*2048*}:: +Set the block size to the indicated value. (Default: *block=1024*.) + +**conv=**__mode__:: +This option is obsolete and may fail or being ignored. + +*cruft*:: +If the high byte of the file length contains other garbage, set this mount option to ignore the high order bits of the file length. This implies that a file cannot be larger than 16 MB. + +**session=**__x__:: +Select number of session on a multisession CD. + +**sbsector=**__xxx__:: +Session begins from sector xxx. + +The following options are the same as for vfat and specifying them only makes sense when using discs encoded using Microsoft's Joliet extensions. + +**iocharset=**__value__:: +Character set to use for converting 16 bit Unicode characters on CD to 8 bit characters. The default is iso8859-1. + +*utf8*:: +Convert 16 bit Unicode characters on CD to UTF-8. + +=== Mount options for jfs + +**iocharset=**__name__:: +Character set to use for converting from Unicode to ASCII. The default is to do no conversion. Use *iocharset=utf8* for UTF8 translations. This requires *CONFIG_NLS_UTF8* to be set in the kernel _.config_ file. + +**resize=**__value__:: +Resize the volume to _value_ blocks. JFS only supports growing a volume, not shrinking it. This option is only valid during a remount, when the volume is mounted read-write. The *resize* keyword with no value will grow the volume to the full size of the partition. + +*nointegrity*:: +Do not write to the journal. The primary use of this option is to allow for higher performance when restoring a volume from backup media. The integrity of the volume is not guaranteed if the system abnormally ends. + +*integrity*:: +Default. Commit metadata changes to the journal. Use this option to remount a volume where the *nointegrity* option was previously specified in order to restore normal behavior. + +*errors=*{**continue**|**remount-ro**|*panic*}:: +Define the behavior when an error is encountered. (Either ignore errors and just mark the filesystem erroneous and continue, or remount the filesystem read-only, or panic and halt the system.) + +**noquota**|**quota**|**usrquota**|*grpquota*:: +These options are accepted but ignored. + +=== Mount options for msdos + +See mount options for fat. If the _msdos_ filesystem detects an inconsistency, it reports an error and sets the file system read-only. The filesystem can be made writable again by remounting it. + +=== Mount options for ncpfs + +Just like _nfs_, the _ncpfs_ implementation expects a binary argument (a _struct ncp_mount_data_) to the *mount*(2) system call. This argument is constructed by *ncpmount*(8) and the current version of *mount* (2.12) does not know anything about ncpfs. + +=== Mount options for ntfs + +**iocharset=**__name__:: +Character set to use when returning file names. Unlike VFAT, NTFS suppresses names that contain nonconvertible characters. Deprecated. + +**nls=**__name__:: +New name for the option earlier called _iocharset_. + +*utf8*:: +Use UTF-8 for converting file names. + +*uni_xlate=*{**0**|**1**|*2*}:: +For 0 (or 'no' or 'false'), do not use escape sequences for unknown Unicode characters. For 1 (or 'yes' or 'true') or 2, use vfat-style 4-byte escape sequences starting with ":". Here 2 gives a little-endian encoding and 1 a byteswapped bigendian encoding. + +*posix=[0|1]*:: +If enabled (posix=1), the filesystem distinguishes between upper and lower case. The 8.3 alias names are presented as hard links instead of being suppressed. This option is obsolete. + +**uid=**__value__, **gid=**__value__ and **umask=**__value__:: +Set the file permission on the filesystem. The umask value is given in octal. By default, the files are owned by root and not readable by somebody else. + +=== Mount options for overlay + +Since Linux 3.18 the overlay pseudo filesystem implements a union mount for other filesystems. + +An overlay filesystem combines two filesystems - an *upper* filesystem and a *lower* filesystem. When a name exists in both filesystems, the object in the upper filesystem is visible while the object in the lower filesystem is either hidden or, in the case of directories, merged with the upper object. + +The lower filesystem can be any filesystem supported by Linux and does not need to be writable. The lower filesystem can even be another overlayfs. The upper filesystem will normally be writable and if it is it must support the creation of trusted.* extended attributes, and must provide a valid d_type in readdir responses, so NFS is not suitable. + +A read-only overlay of two read-only filesystems may use any filesystem type. The options *lowerdir* and *upperdir* are combined into a merged directory by using: + +____ +.... +mount -t overlay overlay \ + -olowerdir=/lower,upperdir=/upper,workdir=/work /merged +.... +____ + +**lowerdir=**__directory__:: +Any filesystem, does not need to be on a writable filesystem. + +**upperdir=**__directory__:: +The upperdir is normally on a writable filesystem. + +**workdir=**__directory__:: +The workdir needs to be an empty directory on the same filesystem as upperdir. + +*userxattr*:: +Use the "*user.overlay.*" xattr namespace instead of "*trusted.overlay.*". This is useful for unprivileged mounting of overlayfs. + +*redirect_dir=*{**on**|**off**|**follow**|**nofollow**}:: +If the _redirect_dir_ feature is enabled, then the directory will be copied up (but not the contents). Then the "{**trusted**|**user**}.overlay.redirect" extended attribute is set to the path of the original location from the root of the overlay. Finally the directory is moved to the new location. ++ +*on*;; +Redirects are enabled. + +*off*;; +Redirects are not created and only followed if "redirect_always_follow" feature is enabled in the kernel/module config. + +*follow*;; +Redirects are not created, but followed. + +*nofollow*;; +Redirects are not created and not followed (equivalent to "redirect_dir=off" if "redirect_always_follow" feature is not enabled). + +*index=*{**on**|**off**}:: +Inode index. If this feature is disabled and a file with multiple hard links is copied up, then this will "break" the link. Changes will not be propagated to other names referring to the same inode. + +*uuid=*{**on**|**off**}:: +Can be used to replace UUID of the underlying filesystem in file handles with null, and effectively disable UUID checks. This can be useful in case the underlying disk is copied and the UUID of this copy is changed. This is only applicable if all lower/upper/work directories are on the same filesystem, otherwise it will fallback to normal behaviour. + +*nfs_export=*{**on**|**off**}:: +When the underlying filesystems supports NFS export and the "nfs_export" +feature is enabled, an overlay filesystem may be exported to NFS. ++ +With the "nfs_export" feature, on copy_up of any lower object, an index entry +is created under the index directory. The index entry name is the hexadecimal +representation of the copy up origin file handle. For a non-directory object, +the index entry is a hard link to the upper inode. For a directory object, the +index entry has an extended attribute "{**trusted**|**user**}.overlay.upper" +with an encoded file handle of the upper directory inode. ++ +When encoding a file handle from an overlay filesystem object, the following rules apply;; + +* For a non-upper object, encode a lower file handle from lower inode +* For an indexed object, encode a lower file handle from copy_up origin +* For a pure-upper object and for an existing non-indexed upper object, encode an upper file handle from upper inode + ++ +The encoded overlay file handle includes;; + +* Header including path type information (e.g. lower/upper) +* UUID of the underlying filesystem +* Underlying filesystem encoding of underlying inode + ++ +This encoding format is identical to the encoding format of file handles that are stored in extended attribute "{**trusted**|**user**}.overlay.origin". When decoding an overlay file handle, the following steps are followed;; + +* Find underlying layer by UUID and path type information. +* Decode the underlying filesystem file handle to underlying dentry. +* For a lower file handle, lookup the handle in index directory by name. +* If a whiteout is found in index, return **ESTALE**. This represents an overlay object that was deleted after its file handle was encoded. +* For a non-directory, instantiate a disconnected overlay dentry from the decoded underlying dentry, the path type and index inode, if found. +* For a directory, use the connected underlying decoded dentry, path type and index, to lookup a connected overlay dentry. + ++ +-- +Decoding a non-directory file handle may return a disconnected dentry. copy_up +of that disconnected dentry will create an upper index entry with no upper +alias. + +When overlay filesystem has multiple lower layers, a middle layer directory may +have a "redirect" to lower directory. Because middle layer "redirects" are not +indexed, a lower file handle that was encoded from the "redirect" origin +directory, cannot be used to find the middle or upper layer directory. +Similarly, a lower file handle that was encoded from a descendant of the +"redirect" origin directory, cannot be used to reconstruct a connected overlay +path. To mitigate the cases of directories that cannot be decoded from a lower +file handle, these directories are copied up on encode and encoded as an upper +file handle. On an overlay filesystem with no upper layer this mitigation +cannot be used NFS export in this setup requires turning off redirect follow +(e.g. "__redirect_dir=nofollow__"). + +The overlay filesystem does not support non-directory connectable file handles, so exporting with the _subtree_check_ exportfs configuration will cause failures to lookup files over NFS. + +When the NFS export feature is enabled, all directory index entries are verified on mount time to check that upper file handles are not stale. This verification may cause significant overhead in some cases. + +Note: the mount options __index=off,nfs_export=on__ are conflicting for a +read-write mount and will result in an error. +-- + +*xino=*{**on**|**off**|**auto**}:: +The "xino" feature composes a unique object identifier from the real object st_ino and an underlying fsid index. The "xino" feature uses the high inode number bits for fsid, because the underlying filesystems rarely use the high inode number bits. In case the underlying inode number does overflow into the high xino bits, overlay filesystem will fall back to the non xino behavior for that inode. ++ +For a detailed description of the effect of this option please refer to https://www.kernel.org/doc/html/latest/filesystems/overlayfs.html?highlight=overlayfs + +*metacopy=*{**on**|**off**}:: +When metadata only copy up feature is enabled, overlayfs will only copy up metadata (as opposed to whole file), when a metadata specific operation like chown/chmod is performed. Full file will be copied up later when file is opened for WRITE operation. ++ +In other words, this is delayed data copy up operation and data is copied up when there is a need to actually modify data. + +*volatile*:: +Volatile mounts are not guaranteed to survive a crash. It is strongly recommended that volatile mounts are only used if data written to the overlay can be recreated without significant effort. ++ +The advantage of mounting with the "volatile" option is that all forms of sync calls to the upper filesystem are omitted. ++ +In order to avoid a giving a false sense of safety, the syncfs (and fsync) semantics of volatile mounts are slightly different than that of the rest of VFS. If any writeback error occurs on the upperdir’s filesystem after a volatile mount takes place, all sync functions will return an error. Once this condition is reached, the filesystem will not recover, and every subsequent sync call will return an error, even if the upperdir has not experience a new error since the last sync call. ++ +When overlay is mounted with "volatile" option, the directory "$workdir/work/incompat/volatile" is created. During next mount, overlay checks for this directory and refuses to mount if present. This is a strong indicator that user should throw away upper and work directories and create fresh one. In very limited cases where the user knows that the system has not crashed and contents of upperdir are intact, The "volatile" directory can be removed. + +=== Mount options for reiserfs + +Reiserfs is a journaling filesystem. + +*conv*:: +Instructs version 3.6 reiserfs software to mount a version 3.5 filesystem, using the 3.6 format for newly created objects. This filesystem will no longer be compatible with reiserfs 3.5 tools. + +*hash=*{**rupasov**|**tea**|**r5**|*detect*}:: +Choose which hash function reiserfs will use to find files within directories. ++ +*rupasov*;; +A hash invented by Yury Yu. Rupasov. It is fast and preserves locality, mapping lexicographically close file names to close hash values. This option should not be used, as it causes a high probability of hash collisions. + +*tea*;; +A Davis-Meyer function implemented by Jeremy Fitzhardinge. It uses hash permuting bits in the name. It gets high randomness and, therefore, low probability of hash collisions at some CPU cost. This may be used if *EHASHCOLLISION* errors are experienced with the r5 hash. + +*r5*;; +A modified version of the rupasov hash. It is used by default and is the best choice unless the filesystem has huge directories and unusual file-name patterns. + +*detect*;; +Instructs *mount* to detect which hash function is in use by examining the filesystem being mounted, and to write this information into the reiserfs superblock. This is only useful on the first mount of an old format filesystem. + +*hashed_relocation*:: +Tunes the block allocator. This may provide performance improvements in some situations. + +*no_unhashed_relocation*:: +Tunes the block allocator. This may provide performance improvements in some situations. + +*noborder*:: +Disable the border allocator algorithm invented by Yury Yu. Rupasov. This may provide performance improvements in some situations. + +*nolog*:: +Disable journaling. This will provide slight performance improvements in some situations at the cost of losing reiserfs's fast recovery from crashes. Even with this option turned on, reiserfs still performs all journaling operations, save for actual writes into its journaling area. Implementation of _nolog_ is a work in progress. + +*notail*:: +By default, reiserfs stores small files and 'file tails' directly into its tree. This confuses some utilities such as *lilo*(8). This option is used to disable packing of files into the tree. + +*replayonly*:: +Replay the transactions which are in the journal, but do not actually mount the filesystem. Mainly used by _reiserfsck_. + +**resize=**__number__:: +A remount option which permits online expansion of reiserfs partitions. Instructs reiserfs to assume that the device has _number_ blocks. This option is designed for use with devices which are under logical volume management (LVM). There is a special _resizer_ utility which can be obtained from _ftp://ftp.namesys.com/pub/reiserfsprogs_. + +*user_xattr*:: +Enable Extended User Attributes. See the *attr*(1) manual page. + +*acl*:: +Enable POSIX Access Control Lists. See the *acl*(5) manual page. + +*barrier=none* / *barrier=flush*:: +This disables / enables the use of write barriers in the journaling code. *barrier=none* disables, *barrier=flush* enables (default). This also requires an IO stack which can support barriers, and if reiserfs gets an error on a barrier write, it will disable barriers again with a warning. Write barriers enforce proper on-disk ordering of journal commits, making volatile disk write caches safe to use, at some performance penalty. If your disks are battery-backed in one way or another, disabling barriers may safely improve performance. + +=== Mount options for ubifs + +UBIFS is a flash filesystem which works on top of UBI volumes. Note that *atime* is not supported and is always turned off. + +The device name may be specified as + +____ +*ubiX_Y*:: + UBI device number *X*, volume number *Y* +*ubiY*:: + UBI device number *0*, volume number *Y* +*ubiX:NAME*:: + UBI device number *X*, volume with name *NAME* +*ubi:NAME*:: + UBI device number *0*, volume with name *NAME* +____ + +Alternative *!* separator may be used instead of *:*. + +The following mount options are available: + +*bulk_read*:: +Enable bulk-read. VFS read-ahead is disabled because it slows down the filesystem. Bulk-Read is an internal optimization. Some flashes may read faster if the data are read at one go, rather than at several read requests. For example, OneNAND can do "read-while-load" if it reads more than one NAND page. + +*no_bulk_read*:: +Do not bulk-read. This is the default. + +*chk_data_crc*:: +Check data CRC-32 checksums. This is the default. + +*no_chk_data_crc*:: +Do not check data CRC-32 checksums. With this option, the filesystem does not check CRC-32 checksum for data, but it does check it for the internal indexing information. This option only affects reading, not writing. CRC-32 is always calculated when writing the data. + +*compr=*{**none**|**lzo**|*zlib*}:: +Select the default compressor which is used when new files are written. It is still possible to read compressed files if mounted with the *none* option. + +=== Mount options for udf + +UDF is the "Universal Disk Format" filesystem defined by OSTA, the Optical Storage Technology Association, and is often used for DVD-ROM, frequently in the form of a hybrid UDF/ISO-9660 filesystem. It is, however, perfectly usable by itself on disk drives, flash drives and other block devices. See also _iso9660_. + +*uid=*:: +Make all files in the filesystem belong to the given user. uid=forget can be specified independently of (or usually in addition to) uid=<user> and results in UDF not storing uids to the media. In fact the recorded uid is the 32-bit overflow uid -1 as defined by the UDF standard. The value is given as either <user> which is a valid user name or the corresponding decimal user id, or the special string "forget". + +*gid=*:: +Make all files in the filesystem belong to the given group. gid=forget can be specified independently of (or usually in addition to) gid=<group> and results in UDF not storing gids to the media. In fact the recorded gid is the 32-bit overflow gid -1 as defined by the UDF standard. The value is given as either <group> which is a valid group name or the corresponding decimal group id, or the special string "forget". + +*umask=*:: +Mask out the given permissions from all inodes read from the filesystem. The value is given in octal. + +*mode=*:: +If *mode=* is set the permissions of all non-directory inodes read from the filesystem will be set to the given mode. The value is given in octal. + +*dmode=*:: +If *dmode=* is set the permissions of all directory inodes read from the filesystem will be set to the given dmode. The value is given in octal. + +*bs=*:: +Set the block size. Default value prior to kernel version 2.6.30 was 2048. Since 2.6.30 and prior to 4.11 it was logical device block size with fallback to 2048. Since 4.11 it is logical block size with fallback to any valid block size between logical device block size and 4096. ++ +For other details see the *mkudffs*(8) 2.0+ manpage, see the *COMPATIBILITY* and *BLOCK SIZE* sections. + +*unhide*:: +Show otherwise hidden files. + +*undelete*:: +Show deleted files in lists. + +*adinicb*:: +Embed data in the inode. (default) + +*noadinicb*:: +Don't embed data in the inode. + +*shortad*:: +Use short UDF address descriptors. + +*longad*:: +Use long UDF address descriptors. (default) + +*nostrict*:: +Unset strict conformance. + +*iocharset=*:: +Set the NLS character set. This requires kernel compiled with *CONFIG_UDF_NLS* option. + +*utf8*:: +Set the UTF-8 character set. + +=== Mount options for debugging and disaster recovery + +*novrs*:: +Ignore the Volume Recognition Sequence and attempt to mount anyway. + +*session=*:: +Select the session number for multi-session recorded optical media. (default= last session) + +*anchor=*:: +Override standard anchor location. (default= 256) + +*lastblock=*:: +Set the last block of the filesystem. + +=== Unused historical mount options that may be encountered and should be removed + +*uid=ignore*:: +Ignored, use uid=<user> instead. + +*gid=ignore*:: +Ignored, use gid=<group> instead. + +*volume=*:: +Unimplemented and ignored. + +*partition=*:: +Unimplemented and ignored. + +*fileset=*:: +Unimplemented and ignored. + +*rootdir=*:: +Unimplemented and ignored. + +=== Mount options for ufs + +**ufstype=**__value__:: +UFS is a filesystem widely used in different operating systems. The problem are differences among implementations. Features of some implementations are undocumented, so its hard to recognize the type of ufs automatically. That's why the user must specify the type of ufs by mount option. Possible values are: ++ +*old*;; +Old format of ufs, this is the default, read only. (Don't forget to give the *-r* option.) + +*44bsd*;; +For filesystems created by a BSD-like system (NetBSD, FreeBSD, OpenBSD). + +*ufs2*;; +Used in FreeBSD 5.x supported as read-write. + +*5xbsd*;; +Synonym for ufs2. + +*sun*;; +For filesystems created by SunOS or Solaris on Sparc. + +*sunx86*;; +For filesystems created by Solaris on x86. + +*hp*;; +For filesystems created by HP-UX, read-only. + +*nextstep*;; +For filesystems created by NeXTStep (on NeXT station) (currently read only). + +*nextstep-cd*;; +For NextStep CDROMs (block_size == 2048), read-only. + +*openstep*;; +For filesystems created by OpenStep (currently read only). The same filesystem type is also used by macOS. + +**onerror=**__value__:: +Set behavior on error: + +*panic*;; +If an error is encountered, cause a kernel panic. + +[**lock**|**umount**|*repair*];; +These mount options don't do anything at present; when an error is encountered only a console message is printed. + +=== Mount options for umsdos + +See mount options for msdos. The *dotsOK* option is explicitly killed by _umsdos_. + +=== Mount options for vfat + +First of all, the mount options for _fat_ are recognized. The *dotsOK* option is explicitly killed by _vfat_. Furthermore, there are + +*uni_xlate*:: +Translate unhandled Unicode characters to special escaped sequences. This lets you backup and restore filenames that are created with any Unicode characters. Without this option, a '?' is used when no translation is possible. The escape character is ':' because it is otherwise invalid on the vfat filesystem. The escape sequence that gets used, where u is the Unicode character, is: ':', (u & 0x3f), ((u>>6) & 0x3f), (u>>12). + +*posix*:: +Allow two files with names that only differ in case. This option is obsolete. + +*nonumtail*:: +First try to make a short name without sequence number, before trying _name~num.ext_. + +*utf8*:: +UTF8 is the filesystem safe 8-bit encoding of Unicode that is used by the console. It can be enabled for the filesystem with this option or disabled with utf8=0, utf8=no or utf8=false. If _uni_xlate_ gets set, UTF8 gets disabled. + +**shortname=**__mode__:: +Defines the behavior for creation and display of filenames which fit into 8.3 characters. If a long name for a file exists, it will always be the preferred one for display. There are four __mode__s: + +*lower*;; +Force the short name to lower case upon display; store a long name when the short name is not all upper case. + +*win95*;; +Force the short name to upper case upon display; store a long name when the short name is not all upper case. + +*winnt*;; +Display the short name as is; store a long name when the short name is not all lower case or all upper case. + +*mixed*;; +Display the short name as is; store a long name when the short name is not all upper case. This mode is the default since Linux 2.6.32. + +=== Mount options for usbfs + +**devuid=**__uid__ and **devgid=**__gid__ and **devmode=**__mode__:: +Set the owner and group and mode of the device files in the usbfs filesystem (default: uid=gid=0, mode=0644). The mode is given in octal. + +**busuid=**__uid__ and **busgid=**__gid__ and **busmode=**__mode__:: +Set the owner and group and mode of the bus directories in the usbfs filesystem (default: uid=gid=0, mode=0555). The mode is given in octal. + +**listuid=**__uid__ and **listgid=**__gid__ and **listmode=**__mode__:: +Set the owner and group and mode of the file _devices_ (default: uid=gid=0, mode=0444). The mode is given in octal. + +== DM-VERITY SUPPORT + +The device-mapper verity target provides read-only transparent integrity checking of block devices using kernel crypto API. The *mount* command can open the dm-verity device and do the integrity verification before the device filesystem is mounted. Requires libcryptsetup with in libmount (optionally via *dlopen*(3)). If libcryptsetup supports extracting the root hash of an already mounted device, existing devices will be automatically reused in case of a match. Mount options for dm-verity: + +**verity.hashdevice=**__path__:: +Path to the hash tree device associated with the source volume to pass to dm-verity. + +**verity.roothash=**__hex__:: +Hex-encoded hash of the root of _verity.hashdevice_. Mutually exclusive with _verity.roothashfile._ + +**verity.roothashfile=**__path__:: +Path to file containing the hex-encoded hash of the root of _verity.hashdevice._ Mutually exclusive with _verity.roothash._ + +**verity.hashoffset=**__offset__:: +If the hash tree device is embedded in the source volume, _offset_ (default: 0) is used by dm-verity to get to the tree. + +**verity.fecdevice=**__path__:: +Path to the Forward Error Correction (FEC) device associated with the source volume to pass to dm-verity. Optional. Requires kernel built with *CONFIG_DM_VERITY_FEC*. + +**verity.fecoffset=**__offset__:: +If the FEC device is embedded in the source volume, _offset_ (default: 0) is used by dm-verity to get to the FEC area. Optional. + +**verity.fecroots=**__value__:: +Parity bytes for FEC (default: 2). Optional. + +**verity.roothashsig=**__path__:: +Path to *pkcs7*(1ssl) signature of root hash hex string. Requires crypt_activate_by_signed_key() from cryptsetup and kernel built with *CONFIG_DM_VERITY_VERIFY_ROOTHASH_SIG*. For device reuse, signatures have to be either used by all mounts of a device or by none. Optional. + +**verity.oncorruption=**__ignore__|__restart__|__panic__:: +Instruct the kernel to ignore, reboot or panic when corruption is detected. By default the I/O operation simply fails. Requires Linux 4.1 or newer, and libcrypsetup 2.3.4 or newer. Optional. + +Supported since util-linux v2.35. + +For example commands: + +.... +mksquashfs /etc /tmp/etc.squashfs +dd if=/dev/zero of=/tmp/etc.hash bs=1M count=10 +veritysetup format /tmp/etc.squashfs /tmp/etc.hash +openssl smime -sign -in <hash> -nocerts -inkey private.key \ +-signer private.crt -noattr -binary -outform der -out /tmp/etc.roothash.p7s +mount -o verity.hashdevice=/tmp/etc.hash,verity.roothash=<hash>,\ +verity.roothashsig=/tmp/etc.roothash.p7s /tmp/etc.squashfs /mnt +.... + +create squashfs image from _/etc_ directory, verity hash device and mount verified filesystem image to _/mnt_. The kernel will verify that the root hash is signed by a key from the kernel keyring if roothashsig is used. + +== LOOP-DEVICE SUPPORT + +One further possible type is a mount via the loop device. For example, the command + +____ +*mount /tmp/disk.img /mnt -t vfat -o loop=/dev/loop3* +____ + +will set up the loop device _/dev/loop3_ to correspond to the file _/tmp/disk.img_, and then mount this device on _/mnt_. + +If no explicit loop device is mentioned (but just an option '**-o loop**' is given), then *mount* will try to find some unused loop device and use that, for example + +____ +*mount /tmp/disk.img /mnt -o loop* +____ + +The *mount* command *automatically* creates a loop device from a regular file if a filesystem type is not specified or the filesystem is known for libblkid, for example: + +____ +*mount /tmp/disk.img /mnt* + +*mount -t ext4 /tmp/disk.img /mnt* +____ + +This type of mount knows about three options, namely *loop*, *offset* and *sizelimit*, that are really options to *losetup*(8). (These options can be used in addition to those specific to the filesystem type.) + +Since Linux 2.6.25 auto-destruction of loop devices is supported, meaning that any loop device allocated by *mount* will be freed by *umount* independently of _/etc/mtab_. + +You can also free a loop device by hand, using *losetup -d* or *umount -d*. + +Since util-linux v2.29, *mount* re-uses the loop device rather than initializing a new device if the same backing file is already used for some loop device with the same offset and sizelimit. This is necessary to avoid a filesystem corruption. + +== EXIT STATUS + +*mount* has the following exit status values (the bits can be ORed): + +*0*:: +success + +*1*:: +incorrect invocation or permissions + +*2*:: +system error (out of memory, cannot fork, no more loop devices) + +*4*:: +internal *mount* bug + +*8*:: +user interrupt + +*16*:: +problems writing or locking _/etc/mtab_ + +*32*:: +mount failure + +*64*:: +some mount succeeded ++ +The command *mount -a* returns 0 (all succeeded), 32 (all failed), or 64 (some failed, some succeeded). + +== EXTERNAL HELPERS + +The syntax of external mount helpers is: + +**/sbin/mount.**__suffix__ _spec dir_ [*-sfnv*] [*-N* _namespace_] [*-o* _options_] [*-t* __type__**.**_subtype_] + +where the _suffix_ is the filesystem type and the *-sfnvoN* options have the same meaning as the normal mount options. The *-t* option is used for filesystems with subtypes support (for example */sbin/mount.fuse -t fuse.sshfs*). + +The command *mount* does not pass the mount options *unbindable*, *runbindable*, *private*, *rprivate*, *slave*, *rslave*, *shared*, *rshared*, *auto*, *noauto*, *comment*, *x-**, *loop*, *offset* and *sizelimit* to the mount.<suffix> helpers. All other options are used in a comma-separated list as an argument to the *-o* option. + +== ENVIRONMENT + +*LIBMOUNT_FSTAB*=<path>:: +overrides the default location of the _fstab_ file (ignored for suid) + +*LIBMOUNT_MTAB*=<path>:: +overrides the default location of the _mtab_ file (ignored for suid) + +*LIBMOUNT_DEBUG*=all:: +enables libmount debug output + +*LIBBLKID_DEBUG*=all:: +enables libblkid debug output + +*LOOPDEV_DEBUG*=all:: +enables loop device setup debug output + +== FILES + +See also "*The files /etc/fstab, /etc/mtab and /proc/mounts*" section above. + +_/etc/fstab_:: +filesystem table + +_/run/mount_:: +libmount private runtime directory + +_/etc/mtab_:: +table of mounted filesystems or symlink to _/proc/mounts_ + +_/etc/mtab~_:: +lock file (unused on systems with _mtab_ symlink) + +_/etc/mtab.tmp_:: +temporary file (unused on systems with _mtab_ symlink) + +_/etc/filesystems_:: +a list of filesystem types to try + +== HISTORY + +A *mount* command existed in Version 5 AT&T UNIX. + +== BUGS + +It is possible for a corrupted filesystem to cause a crash. + +Some Linux filesystems don't support *-o sync* and *-o dirsync* (the ext2, ext3, ext4, fat and vfat filesystems _do_ support synchronous updates (a la BSD) when mounted with the *sync* option). + +The *-o remount* may not be able to change mount parameters (all _ext2fs_-specific parameters, except *sb*, are changeable with a remount, for example, but you can't change *gid* or *umask* for the _fatfs_). + +It is possible that the files _/etc/mtab_ and _/proc/mounts_ don't match on systems with a regular _mtab_ file. The first file is based only on the *mount* command options, but the content of the second file also depends on the kernel and others settings (e.g. on a remote NFS server -- in certain cases the *mount* command may report unreliable information about an NFS mount point and the _/proc/mount_ file usually contains more reliable information.) This is another reason to replace the _mtab_ file with a symlink to the _/proc/mounts_ file. + +Checking files on NFS filesystems referenced by file descriptors (i.e. the *fcntl* and *ioctl* families of functions) may lead to inconsistent results due to the lack of a consistency check in the kernel even if the *noac* mount option is used. + +The *loop* option with the *offset* or *sizelimit* options used may fail when using older kernels if the *mount* command can't confirm that the size of the block device has been configured as requested. This situation can be worked around by using the *losetup*(8) command manually before calling *mount* with the configured loop device. + +== AUTHORS + +mailto:kzak@redhat.com[Karel Zak] + +== SEE ALSO + +*mount*(2), +*umount*(2), +*filesystems*(5), +*fstab*(5), +*nfs*(5), +*xfs*(5), +*mount_namespaces*(7), +*xattr*(7), +*e2label*(8), +*findmnt*(8), +*losetup*(8), +*lsblk*(8), +*mke2fs*(8), +*mountd*(8), +*nfsd*(8), +*swapon*(8), +*tune2fs*(8), +*umount*(8), +*xfs_admin*(8) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/mount.c b/sys-utils/mount.c new file mode 100644 index 0000000..ba4c78e --- /dev/null +++ b/sys-utils/mount.c @@ -0,0 +1,1049 @@ +/* + * mount(8) -- mount a filesystem + * + * Copyright (C) 2011 Red Hat, Inc. All rights reserved. + * Written by Karel Zak <kzak@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <getopt.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <stdarg.h> +#include <libmount.h> +#include <ctype.h> + +#include "nls.h" +#include "c.h" +#include "env.h" +#include "strutils.h" +#include "closestream.h" +#include "canonicalize.h" +#include "pathnames.h" + +#define XALLOC_EXIT_CODE MNT_EX_SYSERR +#include "xalloc.h" + +#define OPTUTILS_EXIT_CODE MNT_EX_USAGE +#include "optutils.h" + +static struct ul_env_list *envs_removed; + +static int mk_exit_code(struct libmnt_context *cxt, int rc); + +static void suid_drop(struct libmnt_context *cxt) +{ + const uid_t ruid = getuid(); + const uid_t euid = geteuid(); + + if (ruid != 0 && euid == 0 && drop_permissions() != 0) + err(MNT_EX_FAIL, _("drop permissions failed")); + + /* be paranoid and check it, setuid(0) has to fail */ + if (ruid != 0 && setuid(0) == 0) + errx(MNT_EX_FAIL, _("drop permissions failed.")); + + mnt_context_force_unrestricted(cxt); + + /* restore "bad" environment variables */ + if (envs_removed) { + env_list_setenv(envs_removed); + env_list_free(envs_removed); + envs_removed = NULL; + } +} + +static void __attribute__((__noreturn__)) mount_print_version(void) +{ + const char *ver = NULL; + const char **features = NULL, **p; + + mnt_get_library_version(&ver); + mnt_get_library_features(&features); + + printf(_("%s from %s (libmount %s"), + program_invocation_short_name, + PACKAGE_STRING, + ver); + p = features; + while (p && *p) { + fputs(p == features ? ": " : ", ", stdout); + fputs(*p++, stdout); + } + fputs(")\n", stdout); + exit(MNT_EX_SUCCESS); +} + +static int table_parser_errcb(struct libmnt_table *tb __attribute__((__unused__)), + const char *filename, int line) +{ + if (filename) + warnx(_("%s: parse error at line %d -- ignored"), filename, line); + return 1; +} + +/* + * Replace control chars with '?' to be compatible with coreutils. For more + * robust solution use findmnt(1) where we use \x?? hex encoding. + */ +static void safe_fputs(const char *data) +{ + const char *p; + + for (p = data; p && *p; p++) { + if (iscntrl((unsigned char) *p)) + fputc('?', stdout); + else + fputc(*p, stdout); + } +} + +static void print_all(struct libmnt_context *cxt, char *pattern, int show_label) +{ + struct libmnt_table *tb; + struct libmnt_iter *itr = NULL; + struct libmnt_fs *fs; + struct libmnt_cache *cache = NULL; + + if (mnt_context_get_mtab(cxt, &tb)) + err(MNT_EX_SYSERR, _("failed to read mtab")); + + itr = mnt_new_iter(MNT_ITER_FORWARD); + if (!itr) + err(MNT_EX_SYSERR, _("failed to initialize libmount iterator")); + if (show_label) + cache = mnt_new_cache(); + + while (mnt_table_next_fs(tb, itr, &fs) == 0) { + const char *type = mnt_fs_get_fstype(fs); + const char *src = mnt_fs_get_source(fs); + const char *optstr = mnt_fs_get_options(fs); + char *xsrc = NULL; + + if (type && pattern && !mnt_match_fstype(type, pattern)) + continue; + + if (mnt_fs_is_regularfs(fs)) + xsrc = mnt_pretty_path(src, cache); + printf ("%s on ", xsrc ? xsrc : src); + safe_fputs(mnt_fs_get_target(fs)); + + if (type) + printf (" type %s", type); + if (optstr) + printf (" (%s)", optstr); + if (show_label && src) { + char *lb = mnt_cache_find_tag_value(cache, src, "LABEL"); + if (lb) + printf (" [%s]", lb); + } + fputc('\n', stdout); + free(xsrc); + } + + mnt_unref_cache(cache); + mnt_free_iter(itr); +} + +/* + * mount -a [-F] + */ +static int mount_all(struct libmnt_context *cxt) +{ + struct libmnt_iter *itr; + struct libmnt_fs *fs; + int mntrc, ignored, rc = MNT_EX_SUCCESS; + + int nsucc = 0, nerrs = 0; + + itr = mnt_new_iter(MNT_ITER_FORWARD); + if (!itr) { + warn(_("failed to initialize libmount iterator")); + return MNT_EX_SYSERR; + } + + while (mnt_context_next_mount(cxt, itr, &fs, &mntrc, &ignored) == 0) { + + const char *tgt = mnt_fs_get_target(fs); + + if (ignored) { + if (mnt_context_is_verbose(cxt)) + printf(ignored == 1 ? _("%-25s: ignored\n") : + _("%-25s: already mounted\n"), + tgt); + } else if (mnt_context_is_fork(cxt)) { + if (mnt_context_is_verbose(cxt)) + printf("%-25s: mount successfully forked\n", tgt); + } else { + if (mk_exit_code(cxt, mntrc) == MNT_EX_SUCCESS) { + nsucc++; + + /* Note that MNT_EX_SUCCESS return code does + * not mean that FS has been really mounted + * (e.g. nofail option) */ + if (mnt_context_get_status(cxt) + && mnt_context_is_verbose(cxt)) + printf("%-25s: successfully mounted\n", tgt); + } else + nerrs++; + } + } + + if (mnt_context_is_parent(cxt)) { + /* wait for mount --fork children */ + int nchildren = 0; + + nerrs = 0, nsucc = 0; + + rc = mnt_context_wait_for_children(cxt, &nchildren, &nerrs); + if (!rc && nchildren) + nsucc = nchildren - nerrs; + } + + if (nerrs == 0) + rc = MNT_EX_SUCCESS; /* all success */ + else if (nsucc == 0) + rc = MNT_EX_FAIL; /* all failed */ + else + rc = MNT_EX_SOMEOK; /* some success, some failed */ + + mnt_free_iter(itr); + return rc; +} + + +/* + * mount -a -o remount + */ +static int remount_all(struct libmnt_context *cxt) +{ + struct libmnt_iter *itr; + struct libmnt_fs *fs; + int mntrc, ignored, rc = MNT_EX_SUCCESS; + + int nsucc = 0, nerrs = 0; + + itr = mnt_new_iter(MNT_ITER_FORWARD); + if (!itr) { + warn(_("failed to initialize libmount iterator")); + return MNT_EX_SYSERR; + } + + while (mnt_context_next_remount(cxt, itr, &fs, &mntrc, &ignored) == 0) { + + const char *tgt = mnt_fs_get_target(fs); + + if (ignored) { + if (mnt_context_is_verbose(cxt)) + printf(_("%-25s: ignored\n"), tgt); + } else { + if (mk_exit_code(cxt, mntrc) == MNT_EX_SUCCESS) { + nsucc++; + + /* Note that MNT_EX_SUCCESS return code does + * not mean that FS has been really mounted + * (e.g. nofail option) */ + if (mnt_context_get_status(cxt) + && mnt_context_is_verbose(cxt)) + printf("%-25s: successfully remounted\n", tgt); + } else + nerrs++; + } + } + + if (nerrs == 0) + rc = MNT_EX_SUCCESS; /* all success */ + else if (nsucc == 0) + rc = MNT_EX_FAIL; /* all failed */ + else + rc = MNT_EX_SOMEOK; /* some success, some failed */ + + mnt_free_iter(itr); + return rc; +} + +static void success_message(struct libmnt_context *cxt) +{ + unsigned long mflags = 0; + const char *tgt, *src, *pr = program_invocation_short_name; + + if (mnt_context_helper_executed(cxt) + || mnt_context_get_status(cxt) != 1) + return; + + mnt_context_get_mflags(cxt, &mflags); + tgt = mnt_context_get_target(cxt); + src = mnt_context_get_source(cxt); + + if (mflags & MS_MOVE) + printf(_("%s: %s moved to %s.\n"), pr, src, tgt); + else if (mflags & MS_BIND) + printf(_("%s: %s bound on %s.\n"), pr, src, tgt); + else if (mflags & MS_PROPAGATION) { + if (src && strcmp(src, "none") != 0 && tgt) + printf(_("%s: %s mounted on %s.\n"), pr, src, tgt); + + printf(_("%s: %s propagation flags changed.\n"), pr, tgt); + } else + printf(_("%s: %s mounted on %s.\n"), pr, src, tgt); +} + +#if defined(HAVE_LIBSELINUX) && defined(HAVE_SECURITY_GET_INITIAL_CONTEXT) +# include <selinux/selinux.h> +# include <selinux/context.h> + +static void selinux_warning(struct libmnt_context *cxt, const char *tgt) +{ + + if (tgt && mnt_context_is_verbose(cxt) && is_selinux_enabled() > 0) { + char *raw = NULL, *def = NULL; + + if (getfilecon(tgt, &raw) > 0 + && security_get_initial_context("file", &def) == 0) { + + if (!selinux_file_context_cmp(raw, def)) + printf(_( + "mount: %s does not contain SELinux labels.\n" + " You just mounted a file system that supports labels which does not\n" + " contain labels, onto an SELinux box. It is likely that confined\n" + " applications will generate AVC messages and not be allowed access to\n" + " this file system. For more details see restorecon(8) and mount(8).\n"), + tgt); + } + freecon(raw); + freecon(def); + } +} +#else +# define selinux_warning(_x, _y) +#endif + + +#ifdef USE_SYSTEMD +static void systemd_hint(void) +{ + static int fstab_check_done = 0; + + if (fstab_check_done == 0) { + struct stat a, b; + + if (isatty(STDERR_FILENO) && + stat(_PATH_SD_UNITSLOAD, &a) == 0 && + stat(_PATH_MNTTAB, &b) == 0 && + cmp_stat_mtime(&a, &b, <)) + printf(_( + "mount: (hint) your fstab has been modified, but systemd still uses\n" + " the old version; use 'systemctl daemon-reload' to reload.\n")); + + fstab_check_done = 1; + } +} +#else +# define systemd_hint() +#endif + + +/* + * Returns exit status (MNT_EX_*) and/or prints error message. + */ +static int mk_exit_code(struct libmnt_context *cxt, int rc) +{ + const char *tgt; + char buf[BUFSIZ] = { 0 }; + + rc = mnt_context_get_excode(cxt, rc, buf, sizeof(buf)); + tgt = mnt_context_get_target(cxt); + + if (*buf) { + const char *spec = tgt; + if (!spec) + spec = mnt_context_get_source(cxt); + if (!spec) + spec = "???"; + warnx("%s: %s.", spec, buf); + + if (mnt_context_syscall_called(cxt) && + mnt_context_get_syscall_errno(cxt) != 0) + fprintf(stderr, _(" dmesg(1) may have more information after failed mount system call.\n")); + } + + if (rc == MNT_EX_SUCCESS && mnt_context_get_status(cxt) == 1) { + selinux_warning(cxt, tgt); + } + + systemd_hint(); + + return rc; +} + +static struct libmnt_table *append_fstab(struct libmnt_context *cxt, + struct libmnt_table *fstab, + const char *path) +{ + + if (!fstab) { + fstab = mnt_new_table(); + if (!fstab) + err(MNT_EX_SYSERR, _("failed to initialize libmount table")); + + mnt_table_set_parser_errcb(fstab, table_parser_errcb); + mnt_context_set_fstab(cxt, fstab); + + mnt_unref_table(fstab); /* reference is handled by @cxt now */ + } + + if (mnt_table_parse_fstab(fstab, path)) + errx(MNT_EX_USAGE,_("%s: failed to parse"), path); + + return fstab; +} + +/* + * Check source and target paths -- non-root user should not be able to + * resolve paths which are unreadable for them. + */ +static int sanitize_paths(struct libmnt_context *cxt) +{ + const char *p; + struct libmnt_fs *fs = mnt_context_get_fs(cxt); + + if (!fs) + return 0; + + p = mnt_fs_get_target(fs); + if (p) { + char *np = canonicalize_path_restricted(p); + if (!np) + return -EPERM; + mnt_fs_set_target(fs, np); + free(np); + } + + p = mnt_fs_get_srcpath(fs); + if (p) { + char *np = canonicalize_path_restricted(p); + if (!np) + return -EPERM; + mnt_fs_set_source(fs, np); + free(np); + } + return 0; +} + +static void append_option(struct libmnt_context *cxt, const char *opt, const char *arg) +{ + char *o = NULL; + + if (opt && (*opt == '=' || *opt == '\'' || *opt == '\"' || isblank(*opt))) + errx(MNT_EX_USAGE, _("unsupported option format: %s"), opt); + + if (arg && *arg) + xasprintf(&o, "%s=\"%s\"", opt, arg); + + if (mnt_context_append_options(cxt, o ? : opt)) + err(MNT_EX_SYSERR, _("failed to append option '%s'"), o ? : opt); + + free(o); +} + +static int has_remount_flag(struct libmnt_context *cxt) +{ + unsigned long mflags = 0; + + if (mnt_context_get_mflags(cxt, &mflags)) + return 0; + + return mflags & MS_REMOUNT; +} + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + fputs(USAGE_HEADER, out); + fprintf(out, _( + " %1$s [-lhV]\n" + " %1$s -a [options]\n" + " %1$s [options] [--source] <source> | [--target] <directory>\n" + " %1$s [options] <source> <directory>\n" + " %1$s <operation> <mountpoint> [<target>]\n"), + program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Mount a filesystem.\n"), out); + + fputs(USAGE_OPTIONS, out); + fprintf(out, _( + " -a, --all mount all filesystems mentioned in fstab\n" + " -c, --no-canonicalize don't canonicalize paths\n" + " -f, --fake dry run; skip the mount(2) syscall\n" + " -F, --fork fork off for each device (use with -a)\n" + " -T, --fstab <path> alternative file to /etc/fstab\n")); + fprintf(out, _( + " -i, --internal-only don't call the mount.<type> helpers\n")); + fprintf(out, _( + " -l, --show-labels show also filesystem labels\n")); + fprintf(out, _( + " -m, --mkdir[=<mode>] alias to '-o X-mount.mkdir[=<mode>]'\n")); + fprintf(out, _( + " -n, --no-mtab don't write to /etc/mtab\n")); + fprintf(out, _( + " --options-mode <mode>\n" + " what to do with options loaded from fstab\n" + " --options-source <source>\n" + " mount options source\n" + " --options-source-force\n" + " force use of options from fstab/mtab\n")); + fprintf(out, _( + " -o, --options <list> comma-separated list of mount options\n" + " -O, --test-opts <list> limit the set of filesystems (use with -a)\n" + " -r, --read-only mount the filesystem read-only (same as -o ro)\n" + " -t, --types <list> limit the set of filesystem types\n")); + fprintf(out, _( + " --source <src> explicitly specifies source (path, label, uuid)\n" + " --target <target> explicitly specifies mountpoint\n")); + fprintf(out, _( + " --target-prefix <path>\n" + " specifies path used for all mountpoints\n")); + fprintf(out, _( + " -v, --verbose say what is being done\n")); + fprintf(out, _( + " -w, --rw, --read-write mount the filesystem read-write (default)\n")); + fprintf(out, _( + " -N, --namespace <ns> perform mount in another namespace\n")); + + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(25)); + + fprintf(out, _( + "\nSource:\n" + " -L, --label <label> synonym for LABEL=<label>\n" + " -U, --uuid <uuid> synonym for UUID=<uuid>\n" + " LABEL=<label> specifies device by filesystem label\n" + " UUID=<uuid> specifies device by filesystem UUID\n" + " PARTLABEL=<label> specifies device by partition label\n" + " PARTUUID=<uuid> specifies device by partition UUID\n" + " ID=<id> specifies device by udev hardware ID\n")); + + fprintf(out, _( + " <device> specifies device by path\n" + " <directory> mountpoint for bind mounts (see --bind/rbind)\n" + " <file> regular file for loopdev setup\n")); + + fprintf(out, _( + "\nOperations:\n" + " -B, --bind mount a subtree somewhere else (same as -o bind)\n" + " -M, --move move a subtree to some other place\n" + " -R, --rbind mount a subtree and all submounts somewhere else\n")); + fprintf(out, _( + " --make-shared mark a subtree as shared\n" + " --make-slave mark a subtree as slave\n" + " --make-private mark a subtree as private\n" + " --make-unbindable mark a subtree as unbindable\n")); + fprintf(out, _( + " --make-rshared recursively mark a whole subtree as shared\n" + " --make-rslave recursively mark a whole subtree as slave\n" + " --make-rprivate recursively mark a whole subtree as private\n" + " --make-runbindable recursively mark a whole subtree as unbindable\n")); + + printf(USAGE_MAN_TAIL("mount(8)")); + + exit(MNT_EX_SUCCESS); +} + +struct flag_str { + int value; + char *str; +}; + +static int omode2mask(const char *str) +{ + size_t i; + + static const struct flag_str flags[] = { + { MNT_OMODE_IGNORE, "ignore" }, + { MNT_OMODE_APPEND, "append" }, + { MNT_OMODE_PREPEND, "prepend" }, + { MNT_OMODE_REPLACE, "replace" }, + }; + + for (i = 0; i < ARRAY_SIZE(flags); i++) { + if (!strcmp(str, flags[i].str)) + return flags[i].value; + } + return -EINVAL; +} + +static long osrc2mask(const char *str, size_t len) +{ + size_t i; + + static const struct flag_str flags[] = { + { MNT_OMODE_FSTAB, "fstab" }, + { MNT_OMODE_MTAB, "mtab" }, + { MNT_OMODE_NOTAB, "disable" }, + }; + + for (i = 0; i < ARRAY_SIZE(flags); i++) { + if (!strncmp(str, flags[i].str, len) && !flags[i].str[len]) + return flags[i].value; + } + return -EINVAL; +} + +static pid_t parse_pid(const char *str) +{ + char *end; + pid_t ret; + + errno = 0; + ret = strtoul(str, &end, 10); + + if (ret < 0 || errno || end == str || (end && *end)) + return 0; + return ret; +} + +int main(int argc, char **argv) +{ + int c, rc = MNT_EX_SUCCESS, all = 0, show_labels = 0; + struct libmnt_context *cxt; + struct libmnt_table *fstab = NULL; + char *srcbuf = NULL; + char *types = NULL; + int oper = 0, is_move = 0; + int propa = 0; + int optmode = 0, optmode_mode = 0, optmode_src = 0; + + enum { + MOUNT_OPT_SHARED = CHAR_MAX + 1, + MOUNT_OPT_SLAVE, + MOUNT_OPT_PRIVATE, + MOUNT_OPT_UNBINDABLE, + MOUNT_OPT_RSHARED, + MOUNT_OPT_RSLAVE, + MOUNT_OPT_RPRIVATE, + MOUNT_OPT_RUNBINDABLE, + MOUNT_OPT_TARGET, + MOUNT_OPT_TARGET_PREFIX, + MOUNT_OPT_SOURCE, + MOUNT_OPT_OPTMODE, + MOUNT_OPT_OPTSRC, + MOUNT_OPT_OPTSRC_FORCE + }; + + static const struct option longopts[] = { + { "all", no_argument, NULL, 'a' }, + { "fake", no_argument, NULL, 'f' }, + { "fstab", required_argument, NULL, 'T' }, + { "fork", no_argument, NULL, 'F' }, + { "help", no_argument, NULL, 'h' }, + { "no-mtab", no_argument, NULL, 'n' }, + { "read-only", no_argument, NULL, 'r' }, + { "ro", no_argument, NULL, 'r' }, + { "verbose", no_argument, NULL, 'v' }, + { "version", no_argument, NULL, 'V' }, + { "read-write", no_argument, NULL, 'w' }, + { "rw", no_argument, NULL, 'w' }, + { "options", required_argument, NULL, 'o' }, + { "test-opts", required_argument, NULL, 'O' }, + { "types", required_argument, NULL, 't' }, + { "uuid", required_argument, NULL, 'U' }, + { "label", required_argument, NULL, 'L' }, + { "bind", no_argument, NULL, 'B' }, + { "move", no_argument, NULL, 'M' }, + { "rbind", no_argument, NULL, 'R' }, + { "make-shared", no_argument, NULL, MOUNT_OPT_SHARED }, + { "make-slave", no_argument, NULL, MOUNT_OPT_SLAVE }, + { "make-private", no_argument, NULL, MOUNT_OPT_PRIVATE }, + { "make-unbindable", no_argument, NULL, MOUNT_OPT_UNBINDABLE }, + { "make-rshared", no_argument, NULL, MOUNT_OPT_RSHARED }, + { "make-rslave", no_argument, NULL, MOUNT_OPT_RSLAVE }, + { "make-rprivate", no_argument, NULL, MOUNT_OPT_RPRIVATE }, + { "make-runbindable", no_argument, NULL, MOUNT_OPT_RUNBINDABLE }, + { "mkdir", optional_argument, NULL, 'm' }, + { "no-canonicalize", no_argument, NULL, 'c' }, + { "internal-only", no_argument, NULL, 'i' }, + { "show-labels", no_argument, NULL, 'l' }, + { "target", required_argument, NULL, MOUNT_OPT_TARGET }, + { "target-prefix", required_argument, NULL, MOUNT_OPT_TARGET_PREFIX }, + { "source", required_argument, NULL, MOUNT_OPT_SOURCE }, + { "options-mode", required_argument, NULL, MOUNT_OPT_OPTMODE }, + { "options-source", required_argument, NULL, MOUNT_OPT_OPTSRC }, + { "options-source-force", no_argument, NULL, MOUNT_OPT_OPTSRC_FORCE}, + { "namespace", required_argument, NULL, 'N' }, + { NULL, 0, NULL, 0 } + }; + + static const ul_excl_t excl[] = { /* rows and cols in ASCII order */ + { 'B','M','R' }, /* bind,move,rbind */ + { 'L','U', MOUNT_OPT_SOURCE }, /* label,uuid,source */ + { 0 } + }; + int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; + + __sanitize_env(&envs_removed); + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + strutils_set_exitcode(MNT_EX_USAGE); + + mnt_init_debug(0); + cxt = mnt_new_context(); + if (!cxt) + err(MNT_EX_SYSERR, _("libmount context allocation failed")); + + mnt_context_set_tables_errcb(cxt, table_parser_errcb); + + while ((c = getopt_long(argc, argv, "aBcfFhilL:m::Mno:O:rRsU:vVwt:T:N:", + longopts, NULL)) != -1) { + + /* only few options are allowed for non-root users */ + if (mnt_context_is_restricted(cxt) && + !strchr("hlLUVvrist", c) && + c != MOUNT_OPT_TARGET && + c != MOUNT_OPT_SOURCE) + suid_drop(cxt); + + err_exclusive_options(c, longopts, excl, excl_st); + + switch(c) { + case 'a': + all = 1; + break; + case 'c': + mnt_context_disable_canonicalize(cxt, TRUE); + break; + case 'f': + mnt_context_enable_fake(cxt, TRUE); + break; + case 'F': + mnt_context_enable_fork(cxt, TRUE); + break; + case 'i': + mnt_context_disable_helpers(cxt, TRUE); + break; + case 'n': + mnt_context_disable_mtab(cxt, TRUE); + break; + case 'r': + append_option(cxt, "ro", NULL); + mnt_context_enable_rwonly_mount(cxt, FALSE); + break; + case 'v': + mnt_context_enable_verbose(cxt, TRUE); + break; + case 'w': + append_option(cxt, "rw", NULL); + mnt_context_enable_rwonly_mount(cxt, TRUE); + break; + case 'o': + /* "move" is not supported as option string in libmount + * to avoid use in fstab */ + if (mnt_optstr_get_option(optarg, "move", NULL, 0) == 0) { + char *o = xstrdup(optarg); + + mnt_optstr_remove_option(&o, "move"); + if (o && *o) + append_option(cxt, o, NULL); + oper = is_move = 1; + free(o); + } else + append_option(cxt, optarg, NULL); + break; + case 'O': + if (mnt_context_set_options_pattern(cxt, optarg)) + err(MNT_EX_SYSERR, _("failed to set options pattern")); + break; + case 'L': + xasprintf(&srcbuf, "LABEL=\"%s\"", optarg); + mnt_context_disable_swapmatch(cxt, 1); + mnt_context_set_source(cxt, srcbuf); + free(srcbuf); + break; + case 'U': + xasprintf(&srcbuf, "UUID=\"%s\"", optarg); + mnt_context_disable_swapmatch(cxt, 1); + mnt_context_set_source(cxt, srcbuf); + free(srcbuf); + break; + case 'l': + show_labels = 1; + break; + case 't': + types = optarg; + break; + case 'T': + fstab = append_fstab(cxt, fstab, optarg); + break; + case 's': + mnt_context_enable_sloppy(cxt, TRUE); + break; + case 'B': + oper = 1; + append_option(cxt, "bind", NULL); + break; + case 'M': + oper = 1; + is_move = 1; + break; + case 'm': + if (optarg && *optarg == '=') + optarg++; + append_option(cxt, "X-mount.mkdir", optarg); + break; + case 'R': + oper = 1; + append_option(cxt, "rbind", NULL); + break; + case 'N': + { + char path[PATH_MAX]; + pid_t pid = parse_pid(optarg); + + if (pid) + snprintf(path, sizeof(path), "/proc/%i/ns/mnt", pid); + + if (mnt_context_set_target_ns(cxt, pid ? path : optarg)) + err(MNT_EX_SYSERR, _("failed to set target namespace to %s"), pid ? path : optarg); + break; + } + case MOUNT_OPT_SHARED: + append_option(cxt, "shared", NULL); + propa = 1; + break; + case MOUNT_OPT_SLAVE: + append_option(cxt, "slave", NULL); + propa = 1; + break; + case MOUNT_OPT_PRIVATE: + append_option(cxt, "private", NULL); + propa = 1; + break; + case MOUNT_OPT_UNBINDABLE: + append_option(cxt, "unbindable", NULL); + propa = 1; + break; + case MOUNT_OPT_RSHARED: + append_option(cxt, "rshared", NULL); + propa = 1; + break; + case MOUNT_OPT_RSLAVE: + append_option(cxt, "rslave", NULL); + propa = 1; + break; + case MOUNT_OPT_RPRIVATE: + append_option(cxt, "rprivate", NULL); + propa = 1; + break; + case MOUNT_OPT_RUNBINDABLE: + append_option(cxt, "runbindable", NULL); + propa = 1; + break; + case MOUNT_OPT_TARGET: + mnt_context_disable_swapmatch(cxt, 1); + mnt_context_set_target(cxt, optarg); + break; + case MOUNT_OPT_TARGET_PREFIX: + mnt_context_set_target_prefix(cxt, optarg); + break; + case MOUNT_OPT_SOURCE: + mnt_context_disable_swapmatch(cxt, 1); + mnt_context_set_source(cxt, optarg); + break; + case MOUNT_OPT_OPTMODE: + optmode_mode = omode2mask(optarg); + if (optmode_mode == -EINVAL) { + warnx(_("bad usage")); + errtryhelp(MNT_EX_USAGE); + } + break; + case MOUNT_OPT_OPTSRC: + { + unsigned long tmp = 0; + if (string_to_bitmask(optarg, &tmp, osrc2mask)) { + warnx(_("bad usage")); + errtryhelp(MNT_EX_USAGE); + } + optmode_src = tmp; + break; + } + case MOUNT_OPT_OPTSRC_FORCE: + optmode |= MNT_OMODE_FORCE; + break; + + case 'h': + mnt_free_context(cxt); + usage(); + case 'V': + mnt_free_context(cxt); + mount_print_version(); + default: + errtryhelp(MNT_EX_USAGE); + } + } + + argc -= optind; + argv += optind; + + optmode |= optmode_mode | optmode_src; + if (optmode) { + if (!optmode_mode) + optmode |= MNT_OMODE_PREPEND; + if (!optmode_src) + optmode |= MNT_OMODE_FSTAB | MNT_OMODE_MTAB; + mnt_context_set_optsmode(cxt, optmode); + } + + if (fstab && !mnt_context_is_nocanonicalize(cxt)) { + /* + * We have external (context independent) fstab instance, let's + * make a connection between the fstab and the canonicalization + * cache. + */ + mnt_table_set_cache(fstab, mnt_context_get_cache(cxt)); + } + + if (!mnt_context_get_source(cxt) && + !mnt_context_get_target(cxt) && + !argc && + !all) { + if (oper || mnt_context_get_options(cxt)) { + warnx(_("bad usage")); + errtryhelp(MNT_EX_USAGE); + } + print_all(cxt, types, show_labels); + goto done; + } + + /* Non-root users are allowed to use -t to print_all(), + but not to mount */ + if (mnt_context_is_restricted(cxt) && types) + suid_drop(cxt); + + if (oper && (types || all || mnt_context_get_source(cxt))) { + warnx(_("bad usage")); + errtryhelp(MNT_EX_USAGE); + } + + if (types && (all || strchr(types, ',') || + strncmp(types, "no", 2) == 0)) + mnt_context_set_fstype_pattern(cxt, types); + else if (types) + mnt_context_set_fstype(cxt, types); + + if (all) { + /* + * A) Mount all + */ + if (has_remount_flag(cxt)) + rc = remount_all(cxt); + else + rc = mount_all(cxt); + goto done; + + } else if (argc == 0 && (mnt_context_get_source(cxt) || + mnt_context_get_target(cxt))) { + /* + * B) mount -L|-U|--source|--target + * + * non-root may specify source *or* target, but not both + */ + if (mnt_context_is_restricted(cxt) && + mnt_context_get_source(cxt) && + mnt_context_get_target(cxt)) + suid_drop(cxt); + + } else if (argc == 1 && (!mnt_context_get_source(cxt) || + !mnt_context_get_target(cxt))) { + /* + * C) mount [-L|-U|--source] <target> + * mount [--target <dir>] <source> + * mount <source|target> + * + * non-root may specify source *or* target, but not both + * + * It does not matter for libmount if we set source or target + * here (the library is able to swap it), but it matters for + * sanitize_paths(). + */ + int istag = mnt_tag_is_valid(argv[0]); + + if (istag && mnt_context_get_source(cxt)) + /* -L, -U or --source together with LABEL= or UUID= */ + errx(MNT_EX_USAGE, _("source specified more than once")); + else if (istag || mnt_context_get_target(cxt)) + mnt_context_set_source(cxt, argv[0]); + else + mnt_context_set_target(cxt, argv[0]); + + if (mnt_context_is_restricted(cxt) && + mnt_context_get_source(cxt) && + mnt_context_get_target(cxt)) + suid_drop(cxt); + + } else if (argc == 2 && !mnt_context_get_source(cxt) + && !mnt_context_get_target(cxt)) { + /* + * D) mount <source> <target> + */ + if (mnt_context_is_restricted(cxt)) + suid_drop(cxt); + + mnt_context_set_source(cxt, argv[0]); + mnt_context_set_target(cxt, argv[1]); + + } else { + warnx(_("bad usage")); + errtryhelp(MNT_EX_USAGE); + } + + if (mnt_context_is_restricted(cxt) && sanitize_paths(cxt) != 0) + suid_drop(cxt); + + if (is_move) + /* "move" as option string is not supported by libmount */ + mnt_context_set_mflags(cxt, MS_MOVE); + + if ((oper && !has_remount_flag(cxt)) || propa) + /* For --make-* or --bind is fstab/mtab unnecessary */ + mnt_context_set_optsmode(cxt, MNT_OMODE_NOTAB); + + rc = mnt_context_mount(cxt); + + if (rc == -EPERM + && mnt_context_is_restricted(cxt) + && !mnt_context_syscall_called(cxt)) { + /* Try it again without permissions */ + suid_drop(cxt); + rc = mnt_context_mount(cxt); + } + rc = mk_exit_code(cxt, rc); + + if (rc == MNT_EX_SUCCESS && mnt_context_is_verbose(cxt)) + success_message(cxt); +done: + mnt_free_context(cxt); + env_list_free(envs_removed); + return rc; +} diff --git a/sys-utils/mountpoint.1 b/sys-utils/mountpoint.1 new file mode 100644 index 0000000..c32243d --- /dev/null +++ b/sys-utils/mountpoint.1 @@ -0,0 +1,111 @@ +'\" t +.\" Title: mountpoint +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: User Commands +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "MOUNTPOINT" "1" "2022-05-11" "util\-linux 2.38.1" "User Commands" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +mountpoint \- see if a directory or file is a mountpoint +.SH "SYNOPSIS" +.sp +\fBmountpoint\fP [\fB\-d\fP|\fB\-q\fP] \fIdirectory\fP|\fIfile\fP +.sp +\fBmountpoint\fP \fB\-x\fP \fIdevice\fP +.SH "DESCRIPTION" +.sp +\fBmountpoint\fP checks whether the given \fIdirectory\fP or \fIfile\fP is mentioned in the \fI/proc/self/mountinfo\fP file. +.SH "OPTIONS" +.sp +\fB\-d\fP, \fB\-\-fs\-devno\fP +.RS 4 +Show the major/minor numbers of the device that is mounted on the given directory. +.RE +.sp +\fB\-q\fP, \fB\-\-quiet\fP +.RS 4 +Be quiet \- don\(cqt print anything. +.RE +.sp +\fB\-\-nofollow\fP +.RS 4 +Do not follow symbolic link if it the last element of the \fIdirectory\fP path. +.RE +.sp +\fB\-x\fP, \fB\-\-devno\fP +.RS 4 +Show the major/minor numbers of the given blockdevice on standard output. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "EXIT STATUS" +.sp +\fBmountpoint\fP has the following exit status values: +.sp +\fB0\fP +.RS 4 +success; the directory is a mountpoint, or device is block device on \fB\-\-devno\fP +.RE +.sp +\fB1\fP +.RS 4 +failure; incorrect invocation, permissions or system error +.RE +.sp +\fB32\fP +.RS 4 +failure; the directory is not a mountpoint, or device is not a block device on \fB\-\-devno\fP +.RE +.SH "ENVIRONMENT" +.sp +\fBLIBMOUNT_DEBUG\fP=all +.RS 4 +enables libmount debug output. +.RE +.SH "NOTES" +.sp +The util\-linux \fBmountpoint\fP implementation was written from scratch for libmount. The original version for sysvinit suite was written by Miquel van Smoorenburg. +.SH "AUTHORS" +.sp +.MTO "kzak\(atredhat.com" "Karel Zak" "" +.SH "SEE ALSO" +.sp +\fBmount\fP(8) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBmountpoint\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/mountpoint.1.adoc b/sys-utils/mountpoint.1.adoc new file mode 100644 index 0000000..bc8a2e9 --- /dev/null +++ b/sys-utils/mountpoint.1.adoc @@ -0,0 +1,75 @@ +//po4a: entry man manual += mountpoint(1) +:doctype: manpage +:man manual: User Commands +:man source: util-linux {release-version} +:page-layout: base +:command: mountpoint + +== NAME + +mountpoint - see if a directory or file is a mountpoint + +== SYNOPSIS + +*mountpoint* [*-d*|*-q*] _directory_|_file_ + +*mountpoint* *-x* _device_ + +== DESCRIPTION + +*mountpoint* checks whether the given _directory_ or _file_ is mentioned in the _/proc/self/mountinfo_ file. + +== OPTIONS + +*-d*, *--fs-devno*:: +Show the major/minor numbers of the device that is mounted on the given directory. + +*-q*, *--quiet*:: +Be quiet - don't print anything. + +*--nofollow*:: +Do not follow symbolic link if it the last element of the _directory_ path. + +*-x*, *--devno*:: +Show the major/minor numbers of the given blockdevice on standard output. + +include::man-common/help-version.adoc[] + +== EXIT STATUS + +*mountpoint* has the following exit status values: + +*0*:: +success; the directory is a mountpoint, or device is block device on *--devno* + +*1*:: +failure; incorrect invocation, permissions or system error + +*32*:: +failure; the directory is not a mountpoint, or device is not a block device on *--devno* + +== ENVIRONMENT + +*LIBMOUNT_DEBUG*=all:: +enables libmount debug output. + +== NOTES + +The util-linux *mountpoint* implementation was written from scratch for libmount. The original version for sysvinit suite was written by Miquel van Smoorenburg. + +== AUTHORS + +mailto:kzak@redhat.com[Karel Zak] + +== SEE ALSO + +*mount*(8) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/mountpoint.c b/sys-utils/mountpoint.c new file mode 100644 index 0000000..b9904f3 --- /dev/null +++ b/sys-utils/mountpoint.c @@ -0,0 +1,219 @@ +/* + * mountpoint(1) - see if a directory is a mountpoint + * + * This is libmount based reimplementation of the mountpoint(1) + * from sysvinit project. + * + * + * Copyright (C) 2011 Red Hat, Inc. All rights reserved. + * Written by Karel Zak <kzak@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <getopt.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> + +#include <libmount.h> + +#include "nls.h" +#include "xalloc.h" +#include "c.h" +#include "closestream.h" +#include "pathnames.h" + +#define MOUNTPOINT_EXIT_NOMNT 32 + +struct mountpoint_control { + char *path; + dev_t dev; + struct stat st; + unsigned int + dev_devno:1, + fs_devno:1, + nofollow:1, + quiet:1; +}; + +static int dir_to_device(struct mountpoint_control *ctl) +{ + struct libmnt_table *tb = mnt_new_table_from_file(_PATH_PROC_MOUNTINFO); + struct libmnt_fs *fs; + struct libmnt_cache *cache; + int rc = -1; + + if (!tb) { + /* + * Fallback. Traditional way to detect mountpoints. This way + * is independent on /proc, but not able to detect bind mounts. + */ + struct stat pst; + char buf[PATH_MAX], *cn; + int len; + + cn = mnt_resolve_path(ctl->path, NULL); /* canonicalize */ + + len = snprintf(buf, sizeof(buf), "%s/..", cn ? cn : ctl->path); + free(cn); + + if (len < 0 || (size_t) len >= sizeof(buf)) + return -1; + if (stat(buf, &pst) !=0) + return -1; + + if (ctl->st.st_dev != pst.st_dev || ctl->st.st_ino == pst.st_ino) { + ctl->dev = ctl->st.st_dev; + return 0; + } + + return -1; + } + + /* to canonicalize all necessary paths */ + cache = mnt_new_cache(); + mnt_table_set_cache(tb, cache); + mnt_unref_cache(cache); + + fs = mnt_table_find_target(tb, ctl->path, MNT_ITER_BACKWARD); + if (fs && mnt_fs_get_target(fs)) { + ctl->dev = mnt_fs_get_devno(fs); + rc = 0; + } + + mnt_unref_table(tb); + return rc; +} + +static int print_devno(const struct mountpoint_control *ctl) +{ + if (!S_ISBLK(ctl->st.st_mode)) { + if (!ctl->quiet) + warnx(_("%s: not a block device"), ctl->path); + return -1; + } + printf("%u:%u\n", major(ctl->st.st_rdev), minor(ctl->st.st_rdev)); + return 0; +} + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + fputs(USAGE_HEADER, out); + fprintf(out, + _(" %1$s [-qd] /path/to/directory\n" + " %1$s -x /dev/device\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Check whether a directory or file is a mountpoint.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -q, --quiet quiet mode - don't print anything\n" + " --nofollow do not follow symlink\n" + " -d, --fs-devno print maj:min device number of the filesystem\n" + " -x, --devno print maj:min device number of the block device\n"), out); + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(20)); + printf(USAGE_MAN_TAIL("mountpoint(1)")); + + exit(EXIT_SUCCESS); +} + +int main(int argc, char **argv) +{ + int c; + struct mountpoint_control ctl = { NULL }; + + enum { + OPT_NOFOLLOW = CHAR_MAX + 1 + }; + + static const struct option longopts[] = { + { "quiet", no_argument, NULL, 'q' }, + { "nofollow", no_argument, NULL, OPT_NOFOLLOW }, + { "fs-devno", no_argument, NULL, 'd' }, + { "devno", no_argument, NULL, 'x' }, + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, 'V' }, + { NULL, 0, NULL, 0 } + }; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + mnt_init_debug(0); + + while ((c = getopt_long(argc, argv, "qdxhV", longopts, NULL)) != -1) { + + switch(c) { + case 'q': + ctl.quiet = 1; + break; + case OPT_NOFOLLOW: + ctl.nofollow = 1; + break; + case 'd': + ctl.fs_devno = 1; + break; + case 'x': + ctl.dev_devno = 1; + break; + + case 'h': + usage(); + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if (optind + 1 != argc) { + warnx(_("bad usage")); + errtryhelp(EXIT_FAILURE); + } + if (ctl.nofollow && ctl.dev_devno) + errx(EXIT_FAILURE, _("%s and %s are mutually exclusive"), + "--devno", "--nofollow"); + + ctl.path = argv[optind]; + c = ctl.nofollow ? lstat(ctl.path, &ctl.st) : stat(ctl.path, &ctl.st); + if (c) { + if (!ctl.quiet) + err(EXIT_FAILURE, "%s", ctl.path); + return EXIT_FAILURE; + } + if (ctl.dev_devno) + return print_devno(&ctl) ? MOUNTPOINT_EXIT_NOMNT : EXIT_SUCCESS; + + if ((ctl.nofollow && S_ISLNK(ctl.st.st_mode)) || dir_to_device(&ctl)) { + if (!ctl.quiet) + printf(_("%s is not a mountpoint\n"), ctl.path); + return MOUNTPOINT_EXIT_NOMNT; + } + if (ctl.fs_devno) + printf("%u:%u\n", major(ctl.dev), minor(ctl.dev)); + else if (!ctl.quiet) + printf(_("%s is a mountpoint\n"), ctl.path); + + return EXIT_SUCCESS; +} diff --git a/sys-utils/nsenter.1 b/sys-utils/nsenter.1 new file mode 100644 index 0000000..a696670 --- /dev/null +++ b/sys-utils/nsenter.1 @@ -0,0 +1,251 @@ +'\" t +.\" Title: nsenter +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: User Commands +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "NSENTER" "1" "2022-05-11" "util\-linux 2.38.1" "User Commands" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +nsenter \- run program in different namespaces +.SH "SYNOPSIS" +.sp +\fBnsenter\fP [options] [\fIprogram\fP [\fIarguments\fP]] +.SH "DESCRIPTION" +.sp +The \fBnsenter\fP command executes \fIprogram\fP in the namespace(s) that are specified in the command\-line options (described below). If \fIprogram\fP is not given, then "${SHELL}" is run (default: \fI/bin/sh\fP). +.sp +Enterable namespaces are: +.sp +\fBmount namespace\fP +.RS 4 +Mounting and unmounting filesystems will not affect the rest of the system, except for filesystems which are explicitly marked as shared (with \fBmount \-\-make\-shared\fP; see \fI/proc/self/mountinfo\fP for the \fBshared\fP flag). For further details, see \fBmount_namespaces\fP(7) and the discussion of the \fBCLONE_NEWNS\fP flag in \fBclone\fP(2). +.RE +.sp +\fBUTS namespace\fP +.RS 4 +Setting hostname or domainname will not affect the rest of the system. For further details, see \fButs_namespaces\fP(7). +.RE +.sp +\fBIPC namespace\fP +.RS 4 +The process will have an independent namespace for POSIX message queues as well as System V message queues, semaphore sets and shared memory segments. For further details, see \fBipc_namespaces\fP(7). +.RE +.sp +\fBnetwork namespace\fP +.RS 4 +The process will have independent IPv4 and IPv6 stacks, IP routing tables, firewall rules, the \fI/proc/net\fP and \fI/sys/class/net\fP directory trees, sockets, etc. For further details, see \fBnetwork_namespaces\fP(7). +.RE +.sp +\fBPID namespace\fP +.RS 4 +Children will have a set of PID to process mappings separate from the \fBnsenter\fP process. \fBnsenter\fP will fork by default if changing the PID namespace, so that the new program and its children share the same PID namespace and are visible to each other. If \fB\-\-no\-fork\fP is used, the new program will be exec\(cqed without forking. For further details, see \fBpid_namespaces\fP(7). +.RE +.sp +\fBuser namespace\fP +.RS 4 +The process will have a distinct set of UIDs, GIDs and capabilities. For further details, see \fBuser_namespaces\fP(7). +.RE +.sp +\fBcgroup namespace\fP +.RS 4 +The process will have a virtualized view of \fI/proc/self/cgroup\fP, and new cgroup mounts will be rooted at the namespace cgroup root. For further details, see \fBcgroup_namespaces\fP(7). +.RE +.sp +\fBtime namespace\fP +.RS 4 +The process can have a distinct view of \fBCLOCK_MONOTONIC\fP and/or \fBCLOCK_BOOTTIME\fP which can be changed using \fI/proc/self/timens_offsets\fP. For further details, see \fBtime_namespaces\fP(7). +.RE +.SH "OPTIONS" +.sp +Various of the options below that relate to namespaces take an optional \fIfile\fP argument. This should be one of the \fI/proc/[pid]/ns/*\fP files described in \fBnamespaces\fP(7), or the pathname of a bind mount that was created on one of those files. +.sp +\fB\-a\fP, \fB\-\-all\fP +.RS 4 +Enter all namespaces of the target process by the default \fI/proc/[pid]/ns/*\fP namespace paths. The default paths to the target process namespaces may be overwritten by namespace specific options (e.g., \fB\-\-all \-\-mount\fP=[\fIpath\fP]). +.sp +The user namespace will be ignored if the same as the caller\(cqs current user namespace. It prevents a caller that has dropped capabilities from regaining those capabilities via a call to setns(). See \fBsetns\fP(2) for more details. +.RE +.sp +\fB\-t\fP, \fB\-\-target\fP \fIPID\fP +.RS 4 +Specify a target process to get contexts from. The paths to the contexts specified by \fIpid\fP are: +.sp +\fI/proc/pid/ns/mnt\fP +.RS 4 +the mount namespace +.RE +.sp +\fI/proc/pid/ns/uts\fP +.RS 4 +the UTS namespace +.RE +.sp +\fI/proc/pid/ns/ipc\fP +.RS 4 +the IPC namespace +.RE +.sp +\fI/proc/pid/ns/net\fP +.RS 4 +the network namespace +.RE +.sp +\fI/proc/pid/ns/pid\fP +.RS 4 +the PID namespace +.RE +.sp +\fI/proc/pid/ns/user\fP +.RS 4 +the user namespace +.RE +.sp +\fI/proc/pid/ns/cgroup\fP +.RS 4 +the cgroup namespace +.RE +.sp +\fI/proc/pid/ns/time\fP +.RS 4 +the time namespace +.RE +.sp +\fI/proc/pid/root\fP +.RS 4 +the root directory +.RE +.sp +\fI/proc/pid/cwd\fP +.RS 4 +the working directory respectively +.RE +.RE +.sp +\fB\-m\fP, \fB\-\-mount\fP[=\fIfile\fP] +.RS 4 +Enter the mount namespace. If no file is specified, enter the mount namespace of the target process. If \fIfile\fP is specified, enter the mount namespace specified by \fIfile\fP. +.RE +.sp +\fB\-u\fP, \fB\-\-uts\fP[=\fIfile\fP] +.RS 4 +Enter the UTS namespace. If no file is specified, enter the UTS namespace of the target process. If \fIfile\fP is specified, enter the UTS namespace specified by \fIfile\fP. +.RE +.sp +\fB\-i\fP, \fB\-\-ipc\fP[=\fIfile\fP] +.RS 4 +Enter the IPC namespace. If no file is specified, enter the IPC namespace of the target process. If \fIfile\fP is specified, enter the IPC namespace specified by \fIfile\fP. +.RE +.sp +\fB\-n\fP, \fB\-\-net\fP[=\fIfile\fP] +.RS 4 +Enter the network namespace. If no file is specified, enter the network namespace of the target process. If \fIfile\fP is specified, enter the network namespace specified by \fIfile\fP. +.RE +.sp +\fB\-p\fP, \fB\-\-pid\fP[=\fIfile\fP] +.RS 4 +Enter the PID namespace. If no file is specified, enter the PID namespace of the target process. If \fIfile\fP is specified, enter the PID namespace specified by \fIfile\fP. +.RE +.sp +\fB\-U\fP, \fB\-\-user\fP[=\fIfile\fP] +.RS 4 +Enter the user namespace. If no file is specified, enter the user namespace of the target process. If \fIfile\fP is specified, enter the user namespace specified by \fIfile\fP. See also the \fB\-\-setuid\fP and \fB\-\-setgid\fP options. +.RE +.sp +\fB\-C\fP, \fB\-\-cgroup\fP[=\fIfile\fP] +.RS 4 +Enter the cgroup namespace. If no file is specified, enter the cgroup namespace of the target process. If \fIfile\fP is specified, enter the cgroup namespace specified by \fIfile\fP. +.RE +.sp +\fB\-T\fP, \fB\-\-time\fP[=\fIfile\fP] +.RS 4 +Enter the time namespace. If no file is specified, enter the time namespace of the target process. If \fIfile\fP is specified, enter the time namespace specified by \fIfile\fP. +.RE +.sp +\fB\-G\fP, \fB\-\-setgid\fP \fIgid\fP +.RS 4 +Set the group ID which will be used in the entered namespace and drop supplementary groups. \fBnsenter\fP always sets GID for user namespaces, the default is 0. +.RE +.sp +\fB\-S\fP, \fB\-\-setuid\fP \fIuid\fP +.RS 4 +Set the user ID which will be used in the entered namespace. \fBnsenter\fP always sets UID for user namespaces, the default is 0. +.RE +.sp +\fB\-\-preserve\-credentials\fP +.RS 4 +Don\(cqt modify UID and GID when enter user namespace. The default is to drops supplementary groups and sets GID and UID to 0. +.RE +.sp +\fB\-r\fP, \fB\-\-root\fP[=\fIdirectory\fP] +.RS 4 +Set the root directory. If no directory is specified, set the root directory to the root directory of the target process. If directory is specified, set the root directory to the specified directory. The specified \fIdirectory\fP is open before it switches to the requested namespaces. +.RE +.sp +\fB\-w\fP, \fB\-\-wd\fP[=\fIdirectory\fP] +.RS 4 +Set the working directory. If no directory is specified, set the working directory to the working directory of the target process. If directory is specified, set the working directory to the specified directory. The specified \fIdirectory\fP is open before it switches to the requested namespaces, it means the specified directory works as "tunnel" to the current namespace. See also \fB\-\-wdns\fP. +.RE +.sp +\fB\-W\fP, \fB\-\-wdns\fP[=\fIdirectory\fP] +.RS 4 +Set the working directory. The \fIdirectory\fP is open after switch to the requested namespaces and after \fBchroot\fP(2) call. The options \fB\-\-wd\fP and \fB\-\-wdns\fP are mutually exclusive. +.RE +.sp +\fB\-F\fP, \fB\-\-no\-fork\fP +.RS 4 +Do not fork before exec\(cqing the specified program. By default, when entering a PID namespace, \fBnsenter\fP calls \fBfork\fP before calling \fBexec\fP so that any children will also be in the newly entered PID namespace. +.RE +.sp +\fB\-Z\fP, \fB\-\-follow\-context\fP +.RS 4 +Set the SELinux security context used for executing a new process according to already running process specified by \fB\-\-target\fP PID. (The util\-linux has to be compiled with SELinux support otherwise the option is unavailable.) +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "AUTHORS" +.sp +.MTO "biederm\(atxmission.com" "Eric Biederman" "," +.MTO "kzak\(atredhat.com" "Karel Zak" "" +.SH "SEE ALSO" +.sp +\fBclone\fP(2), +\fBsetns\fP(2), +\fBnamespaces\fP(7) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBnsenter\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/nsenter.1.adoc b/sys-utils/nsenter.1.adoc new file mode 100644 index 0000000..2500ff2 --- /dev/null +++ b/sys-utils/nsenter.1.adoc @@ -0,0 +1,149 @@ +//po4a: entry man manual += nsenter(1) +:doctype: manpage +:man manual: User Commands +:man source: util-linux {release-version} +:page-layout: base +:command: nsenter + +== NAME + +nsenter - run program in different namespaces + +== SYNOPSIS + +*nsenter* [options] [_program_ [_arguments_]] + +== DESCRIPTION + +The *nsenter* command executes _program_ in the namespace(s) that are specified in the command-line options (described below). If _program_ is not given, then "$\{SHELL}" is run (default: _/bin/sh_). + +Enterable namespaces are: + +*mount namespace*:: +Mounting and unmounting filesystems will not affect the rest of the system, except for filesystems which are explicitly marked as shared (with *mount --make-shared*; see _/proc/self/mountinfo_ for the *shared* flag). For further details, see *mount_namespaces*(7) and the discussion of the *CLONE_NEWNS* flag in *clone*(2). + +*UTS namespace*:: +Setting hostname or domainname will not affect the rest of the system. For further details, see *uts_namespaces*(7). + +*IPC namespace*:: +The process will have an independent namespace for POSIX message queues as well as System V message queues, semaphore sets and shared memory segments. For further details, see *ipc_namespaces*(7). + +*network namespace*:: +The process will have independent IPv4 and IPv6 stacks, IP routing tables, firewall rules, the _/proc/net_ and _/sys/class/net_ directory trees, sockets, etc. For further details, see *network_namespaces*(7). + +*PID namespace*:: +Children will have a set of PID to process mappings separate from the *nsenter* process. *nsenter* will fork by default if changing the PID namespace, so that the new program and its children share the same PID namespace and are visible to each other. If *--no-fork* is used, the new program will be exec'ed without forking. For further details, see *pid_namespaces*(7). + +*user namespace*:: +The process will have a distinct set of UIDs, GIDs and capabilities. For further details, see *user_namespaces*(7). + +*cgroup namespace*:: +The process will have a virtualized view of _/proc/self/cgroup_, and new cgroup mounts will be rooted at the namespace cgroup root. For further details, see *cgroup_namespaces*(7). + +*time namespace*:: +The process can have a distinct view of *CLOCK_MONOTONIC* and/or *CLOCK_BOOTTIME* which can be changed using _/proc/self/timens_offsets_. For further details, see *time_namespaces*(7). + +== OPTIONS + +//TRANSLATORS: Keep {asterisk} untranslated. +Various of the options below that relate to namespaces take an optional _file_ argument. This should be one of the _/proc/[pid]/ns/{asterisk}_ files described in *namespaces*(7), or the pathname of a bind mount that was created on one of those files. + +//TRANSLATORS: Keep {asterisk} untranslated. +*-a*, *--all*:: +Enter all namespaces of the target process by the default _/proc/[pid]/ns/{asterisk}_ namespace paths. The default paths to the target process namespaces may be overwritten by namespace specific options (e.g., *--all --mount*=[_path_]). ++ +The user namespace will be ignored if the same as the caller's current user namespace. It prevents a caller that has dropped capabilities from regaining those capabilities via a call to setns(). See *setns*(2) for more details. + +*-t*, *--target* _PID_:: +Specify a target process to get contexts from. The paths to the contexts specified by _pid_ are: + +_/proc/pid/ns/mnt_;; +the mount namespace +_/proc/pid/ns/uts_;; +the UTS namespace +_/proc/pid/ns/ipc_;; +the IPC namespace +_/proc/pid/ns/net_;; +the network namespace +_/proc/pid/ns/pid_;; +the PID namespace +_/proc/pid/ns/user_;; +the user namespace +_/proc/pid/ns/cgroup_;; +the cgroup namespace +_/proc/pid/ns/time_;; +the time namespace +_/proc/pid/root_;; +the root directory +_/proc/pid/cwd_;; +the working directory respectively + +*-m*, *--mount*[=_file_]:: +Enter the mount namespace. If no file is specified, enter the mount namespace of the target process. If _file_ is specified, enter the mount namespace specified by _file_. + +*-u*, *--uts*[=_file_]:: +Enter the UTS namespace. If no file is specified, enter the UTS namespace of the target process. If _file_ is specified, enter the UTS namespace specified by _file_. + +*-i*, *--ipc*[=_file_]:: +Enter the IPC namespace. If no file is specified, enter the IPC namespace of the target process. If _file_ is specified, enter the IPC namespace specified by _file_. + +*-n*, *--net*[=_file_]:: +Enter the network namespace. If no file is specified, enter the network namespace of the target process. If _file_ is specified, enter the network namespace specified by _file_. + +*-p*, *--pid*[=_file_]:: +Enter the PID namespace. If no file is specified, enter the PID namespace of the target process. If _file_ is specified, enter the PID namespace specified by _file_. + +*-U*, *--user*[=_file_]:: +Enter the user namespace. If no file is specified, enter the user namespace of the target process. If _file_ is specified, enter the user namespace specified by _file_. See also the *--setuid* and *--setgid* options. + +*-C*, *--cgroup*[=_file_]:: +Enter the cgroup namespace. If no file is specified, enter the cgroup namespace of the target process. If _file_ is specified, enter the cgroup namespace specified by _file_. + +*-T*, *--time*[=_file_]:: +Enter the time namespace. If no file is specified, enter the time namespace of the target process. If _file_ is specified, enter the time namespace specified by _file_. + +*-G*, *--setgid* _gid_:: +Set the group ID which will be used in the entered namespace and drop supplementary groups. *nsenter* always sets GID for user namespaces, the default is 0. + +*-S*, *--setuid* _uid_:: +Set the user ID which will be used in the entered namespace. *nsenter* always sets UID for user namespaces, the default is 0. + +*--preserve-credentials*:: +Don't modify UID and GID when enter user namespace. The default is to drops supplementary groups and sets GID and UID to 0. + +*-r*, *--root*[=_directory_]:: +Set the root directory. If no directory is specified, set the root directory to the root directory of the target process. If directory is specified, set the root directory to the specified directory. The specified _directory_ is open before it switches to the requested namespaces. + +*-w*, *--wd*[=_directory_]:: +Set the working directory. If no directory is specified, set the working directory to the working directory of the target process. If directory is specified, set the working directory to the specified directory. The specified _directory_ is open before it switches to the requested namespaces, it means the specified directory works as "tunnel" to the current namespace. See also *--wdns*. + +*-W*, *--wdns*[=_directory_]:: +Set the working directory. The _directory_ is open after switch to the requested namespaces and after *chroot*(2) call. The options *--wd* and *--wdns* are mutually exclusive. + +*-F*, *--no-fork*:: +Do not fork before exec'ing the specified program. By default, when entering a PID namespace, *nsenter* calls *fork* before calling *exec* so that any children will also be in the newly entered PID namespace. + +*-Z*, *--follow-context*:: +Set the SELinux security context used for executing a new process according to already running process specified by *--target* PID. (The util-linux has to be compiled with SELinux support otherwise the option is unavailable.) + +include::man-common/help-version.adoc[] + +== AUTHORS + +mailto:biederm@xmission.com[Eric Biederman], +mailto:kzak@redhat.com[Karel Zak] + +== SEE ALSO + +*clone*(2), +*setns*(2), +*namespaces*(7) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/nsenter.c b/sys-utils/nsenter.c new file mode 100644 index 0000000..440bf3b --- /dev/null +++ b/sys-utils/nsenter.c @@ -0,0 +1,530 @@ +/* + * nsenter(1) - command-line interface for setns(2) + * + * Copyright (C) 2012-2013 Eric Biederman <ebiederm@xmission.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; version 2. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <dirent.h> +#include <errno.h> +#include <getopt.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdbool.h> +#include <unistd.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <grp.h> +#include <sys/stat.h> + +#ifdef HAVE_LIBSELINUX +# include <selinux/selinux.h> +#endif + +#include "strutils.h" +#include "nls.h" +#include "c.h" +#include "closestream.h" +#include "namespace.h" +#include "exec_shell.h" +#include "optutils.h" + +static struct namespace_file { + int nstype; + const char *name; + int fd; +} namespace_files[] = { + /* Careful the order is significant in this array. + * + * The user namespace comes either first or last: first if + * you're using it to increase your privilege and last if + * you're using it to decrease. We enter the namespaces in + * two passes starting initially from offset 1 and then offset + * 0 if that fails. + */ + { .nstype = CLONE_NEWUSER, .name = "ns/user", .fd = -1 }, + { .nstype = CLONE_NEWCGROUP,.name = "ns/cgroup", .fd = -1 }, + { .nstype = CLONE_NEWIPC, .name = "ns/ipc", .fd = -1 }, + { .nstype = CLONE_NEWUTS, .name = "ns/uts", .fd = -1 }, + { .nstype = CLONE_NEWNET, .name = "ns/net", .fd = -1 }, + { .nstype = CLONE_NEWPID, .name = "ns/pid", .fd = -1 }, + { .nstype = CLONE_NEWNS, .name = "ns/mnt", .fd = -1 }, + { .nstype = CLONE_NEWTIME, .name = "ns/time", .fd = -1 }, + { .nstype = 0, .name = NULL, .fd = -1 } +}; + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + + fputs(USAGE_HEADER, out); + fprintf(out, _(" %s [options] [<program> [<argument>...]]\n"), + program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Run a program with namespaces of other processes.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -a, --all enter all namespaces\n"), out); + fputs(_(" -t, --target <pid> target process to get namespaces from\n"), out); + fputs(_(" -m, --mount[=<file>] enter mount namespace\n"), out); + fputs(_(" -u, --uts[=<file>] enter UTS namespace (hostname etc)\n"), out); + fputs(_(" -i, --ipc[=<file>] enter System V IPC namespace\n"), out); + fputs(_(" -n, --net[=<file>] enter network namespace\n"), out); + fputs(_(" -p, --pid[=<file>] enter pid namespace\n"), out); + fputs(_(" -C, --cgroup[=<file>] enter cgroup namespace\n"), out); + fputs(_(" -U, --user[=<file>] enter user namespace\n"), out); + fputs(_(" -T, --time[=<file>] enter time namespace\n"), out); + fputs(_(" -S, --setuid <uid> set uid in entered namespace\n"), out); + fputs(_(" -G, --setgid <gid> set gid in entered namespace\n"), out); + fputs(_(" --preserve-credentials do not touch uids or gids\n"), out); + fputs(_(" -r, --root[=<dir>] set the root directory\n"), out); + fputs(_(" -w, --wd[=<dir>] set the working directory\n"), out); + fputs(_(" -W. --wdns <dir> set the working directory in namespace\n"), out); + fputs(_(" -F, --no-fork do not fork before exec'ing <program>\n"), out); +#ifdef HAVE_LIBSELINUX + fputs(_(" -Z, --follow-context set SELinux context according to --target PID\n"), out); +#endif + + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(24)); + printf(USAGE_MAN_TAIL("nsenter(1)")); + + exit(EXIT_SUCCESS); +} + +static pid_t namespace_target_pid = 0; +static int root_fd = -1; +static int wd_fd = -1; + +static void open_target_fd(int *fd, const char *type, const char *path) +{ + char pathbuf[PATH_MAX]; + + if (!path && namespace_target_pid) { + snprintf(pathbuf, sizeof(pathbuf), "/proc/%u/%s", + namespace_target_pid, type); + path = pathbuf; + } + if (!path) + errx(EXIT_FAILURE, + _("neither filename nor target pid supplied for %s"), + type); + + if (*fd >= 0) + close(*fd); + + *fd = open(path, O_RDONLY); + if (*fd < 0) + err(EXIT_FAILURE, _("cannot open %s"), path); +} + +static void open_namespace_fd(int nstype, const char *path) +{ + struct namespace_file *nsfile; + + for (nsfile = namespace_files; nsfile->nstype; nsfile++) { + if (nstype != nsfile->nstype) + continue; + + open_target_fd(&nsfile->fd, nsfile->name, path); + return; + } + /* This should never happen */ + assert(nsfile->nstype); +} + +static int get_ns_ino(const char *path, ino_t *ino) +{ + struct stat st; + + if (stat(path, &st) != 0) + return -errno; + *ino = st.st_ino; + return 0; +} + +static int is_usable_namespace(pid_t target, const struct namespace_file *nsfile) +{ + char path[PATH_MAX]; + ino_t my_ino = 0; + int rc; + + /* Check NS accessibility */ + snprintf(path, sizeof(path), "/proc/%u/%s", getpid(), nsfile->name); + rc = get_ns_ino(path, &my_ino); + if (rc == -ENOENT) + return false; /* Unsupported NS */ + + /* It is not permitted to use setns(2) to reenter the caller's + * current user namespace; see setns(2) man page for more details. + */ + if (nsfile->nstype & CLONE_NEWUSER) { + ino_t target_ino = 0; + + snprintf(path, sizeof(path), "/proc/%u/%s", target, nsfile->name); + if (get_ns_ino(path, &target_ino) != 0) + err(EXIT_FAILURE, _("stat of %s failed"), path); + + if (my_ino == target_ino) + return false; + } + + return true; /* All pass */ +} + +static void continue_as_child(void) +{ + pid_t child; + int status; + pid_t ret; + + /* Clear any inherited settings */ + signal(SIGCHLD, SIG_DFL); + + child = fork(); + if (child < 0) + err(EXIT_FAILURE, _("fork failed")); + + /* Only the child returns */ + if (child == 0) + return; + + for (;;) { + ret = waitpid(child, &status, WUNTRACED); + if ((ret == child) && (WIFSTOPPED(status))) { + /* The child suspended so suspend us as well */ + kill(getpid(), SIGSTOP); + kill(child, SIGCONT); + } else { + break; + } + } + /* Return the child's exit code if possible */ + if (WIFEXITED(status)) { + exit(WEXITSTATUS(status)); + } else if (WIFSIGNALED(status)) { + kill(getpid(), WTERMSIG(status)); + } + exit(EXIT_FAILURE); +} + +int main(int argc, char *argv[]) +{ + enum { + OPT_PRESERVE_CRED = CHAR_MAX + 1 + }; + static const struct option longopts[] = { + { "all", no_argument, NULL, 'a' }, + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, 'V'}, + { "target", required_argument, NULL, 't' }, + { "mount", optional_argument, NULL, 'm' }, + { "uts", optional_argument, NULL, 'u' }, + { "ipc", optional_argument, NULL, 'i' }, + { "net", optional_argument, NULL, 'n' }, + { "pid", optional_argument, NULL, 'p' }, + { "user", optional_argument, NULL, 'U' }, + { "cgroup", optional_argument, NULL, 'C' }, + { "time", optional_argument, NULL, 'T' }, + { "setuid", required_argument, NULL, 'S' }, + { "setgid", required_argument, NULL, 'G' }, + { "root", optional_argument, NULL, 'r' }, + { "wd", optional_argument, NULL, 'w' }, + { "wdns", optional_argument, NULL, 'W' }, + { "no-fork", no_argument, NULL, 'F' }, + { "preserve-credentials", no_argument, NULL, OPT_PRESERVE_CRED }, +#ifdef HAVE_LIBSELINUX + { "follow-context", no_argument, NULL, 'Z' }, +#endif + { NULL, 0, NULL, 0 } + }; + static const ul_excl_t excl[] = { /* rows and cols in ASCII order */ + { 'W', 'w' }, + { 0 } + }; + int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; + + struct namespace_file *nsfile; + int c, pass, namespaces = 0, setgroups_nerrs = 0, preserve_cred = 0; + bool do_rd = false, do_wd = false, force_uid = false, force_gid = false; + bool do_all = false; + int do_fork = -1; /* unknown yet */ + char *wdns = NULL; + uid_t uid = 0; + gid_t gid = 0; +#ifdef HAVE_LIBSELINUX + bool selinux = 0; +#endif + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + while ((c = + getopt_long(argc, argv, "+ahVt:m::u::i::n::p::C::U::T::S:G:r::w::W:FZ", + longopts, NULL)) != -1) { + + err_exclusive_options(c, longopts, excl, excl_st); + + switch (c) { + case 'a': + do_all = true; + break; + case 't': + namespace_target_pid = + strtoul_or_err(optarg, _("failed to parse pid")); + break; + case 'm': + if (optarg) + open_namespace_fd(CLONE_NEWNS, optarg); + else + namespaces |= CLONE_NEWNS; + break; + case 'u': + if (optarg) + open_namespace_fd(CLONE_NEWUTS, optarg); + else + namespaces |= CLONE_NEWUTS; + break; + case 'i': + if (optarg) + open_namespace_fd(CLONE_NEWIPC, optarg); + else + namespaces |= CLONE_NEWIPC; + break; + case 'n': + if (optarg) + open_namespace_fd(CLONE_NEWNET, optarg); + else + namespaces |= CLONE_NEWNET; + break; + case 'p': + if (optarg) + open_namespace_fd(CLONE_NEWPID, optarg); + else + namespaces |= CLONE_NEWPID; + break; + case 'C': + if (optarg) + open_namespace_fd(CLONE_NEWCGROUP, optarg); + else + namespaces |= CLONE_NEWCGROUP; + break; + case 'U': + if (optarg) + open_namespace_fd(CLONE_NEWUSER, optarg); + else + namespaces |= CLONE_NEWUSER; + break; + case 'T': + if (optarg) + open_namespace_fd(CLONE_NEWTIME, optarg); + else + namespaces |= CLONE_NEWTIME; + break; + case 'S': + uid = strtoul_or_err(optarg, _("failed to parse uid")); + force_uid = true; + break; + case 'G': + gid = strtoul_or_err(optarg, _("failed to parse gid")); + force_gid = true; + break; + case 'F': + do_fork = 0; + break; + case 'r': + if (optarg) + open_target_fd(&root_fd, "root", optarg); + else + do_rd = true; + break; + case 'w': + if (optarg) + open_target_fd(&wd_fd, "cwd", optarg); + else + do_wd = true; + break; + case 'W': + wdns = optarg; + break; + case OPT_PRESERVE_CRED: + preserve_cred = 1; + break; +#ifdef HAVE_LIBSELINUX + case 'Z': + selinux = 1; + break; +#endif + case 'h': + usage(); + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + } + +#ifdef HAVE_LIBSELINUX + if (selinux && is_selinux_enabled() > 0) { + char *scon = NULL; + + if (!namespace_target_pid) + errx(EXIT_FAILURE, _("no target PID specified for --follow-context")); + if (getpidcon(namespace_target_pid, &scon) < 0) + errx(EXIT_FAILURE, _("failed to get %d SELinux context"), + (int) namespace_target_pid); + if (setexeccon(scon) < 0) + errx(EXIT_FAILURE, _("failed to set exec context to '%s'"), scon); + freecon(scon); + } +#endif + + if (do_all) { + if (!namespace_target_pid) + errx(EXIT_FAILURE, _("no target PID specified for --all")); + for (nsfile = namespace_files; nsfile->nstype; nsfile++) { + if (nsfile->fd >= 0) + continue; /* namespace already specified */ + + if (!is_usable_namespace(namespace_target_pid, nsfile)) + continue; + + namespaces |= nsfile->nstype; + } + } + + /* + * Open remaining namespace and directory descriptors. + */ + for (nsfile = namespace_files; nsfile->nstype; nsfile++) + if (nsfile->nstype & namespaces) + open_namespace_fd(nsfile->nstype, NULL); + if (do_rd) + open_target_fd(&root_fd, "root", NULL); + if (do_wd) + open_target_fd(&wd_fd, "cwd", NULL); + + /* + * Update namespaces variable to contain all requested namespaces + */ + for (nsfile = namespace_files; nsfile->nstype; nsfile++) { + if (nsfile->fd < 0) + continue; + namespaces |= nsfile->nstype; + } + + /* for user namespaces we always set UID and GID (default is 0) + * and clear root's groups if --preserve-credentials is no specified */ + if ((namespaces & CLONE_NEWUSER) && !preserve_cred) { + force_uid = true, force_gid = true; + + /* We call setgroups() before and after we enter user namespace, + * let's complain only if both fail */ + if (setgroups(0, NULL) != 0) + setgroups_nerrs++; + } + + /* + * Now that we know which namespaces we want to enter, enter + * them. Do this in two passes, not entering the user + * namespace on the first pass. So if we're deprivileging the + * container we'll enter the user namespace last and if we're + * privileging it then we enter the user namespace first + * (because the initial setns will fail). + */ + for (pass = 0; pass < 2; pass ++) { + for (nsfile = namespace_files + 1 - pass; nsfile->nstype; nsfile++) { + if (nsfile->fd < 0) + continue; + if (nsfile->nstype == CLONE_NEWPID && do_fork == -1) + do_fork = 1; + if (setns(nsfile->fd, nsfile->nstype)) { + if (pass != 0) + err(EXIT_FAILURE, + _("reassociate to namespace '%s' failed"), + nsfile->name); + else + continue; + } + + close(nsfile->fd); + nsfile->fd = -1; + } + } + + /* Remember the current working directory if I'm not changing it */ + if (root_fd >= 0 && wd_fd < 0 && wdns == NULL) { + wd_fd = open(".", O_RDONLY); + if (wd_fd < 0) + err(EXIT_FAILURE, + _("cannot open current working directory")); + } + + /* Change the root directory */ + if (root_fd >= 0) { + if (fchdir(root_fd) < 0) + err(EXIT_FAILURE, + _("change directory by root file descriptor failed")); + + if (chroot(".") < 0) + err(EXIT_FAILURE, _("chroot failed")); + if (chdir("/")) + err(EXIT_FAILURE, _("cannot change directory to %s"), "/"); + + close(root_fd); + root_fd = -1; + } + + /* working directory specified as in-namespace path */ + if (wdns) { + wd_fd = open(wdns, O_RDONLY); + if (wd_fd < 0) + err(EXIT_FAILURE, + _("cannot open current working directory")); + } + + /* Change the working directory */ + if (wd_fd >= 0) { + if (fchdir(wd_fd) < 0) + err(EXIT_FAILURE, + _("change directory by working directory file descriptor failed")); + + close(wd_fd); + wd_fd = -1; + } + + if (do_fork == 1) + continue_as_child(); + + if (force_uid || force_gid) { + if (force_gid && setgroups(0, NULL) != 0 && setgroups_nerrs) /* drop supplementary groups */ + err(EXIT_FAILURE, _("setgroups failed")); + if (force_gid && setgid(gid) < 0) /* change GID */ + err(EXIT_FAILURE, _("setgid failed")); + if (force_uid && setuid(uid) < 0) /* change UID */ + err(EXIT_FAILURE, _("setuid failed")); + } + + if (optind < argc) { + execvp(argv[optind], argv + optind); + errexec(argv[optind]); + } + exec_shell(); +} diff --git a/sys-utils/pivot_root.8 b/sys-utils/pivot_root.8 new file mode 100644 index 0000000..d816504 --- /dev/null +++ b/sys-utils/pivot_root.8 @@ -0,0 +1,112 @@ +'\" t +.\" Title: pivot_root +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: System Administration +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "PIVOT_ROOT" "8" "2022-05-11" "util\-linux 2.38.1" "System Administration" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +pivot_root \- change the root filesystem +.SH "SYNOPSIS" +.sp +\fBpivot_root\fP \fInew_root\fP \fIput_old\fP +.SH "DESCRIPTION" +.sp +\fBpivot_root\fP moves the root file system of the current process to the directory \fIput_old\fP and makes \fInew_root\fP the new root file system. Since \fBpivot_root\fP(8) simply calls \fBpivot_root\fP(2), we refer to the man page of the latter for further details. +.sp +Note that, depending on the implementation of \fBpivot_root\fP, root and current working directory of the caller may or may not change. The following is a sequence for invoking \fBpivot_root\fP that works in either case, assuming that \fBpivot_root\fP and \fBchroot\fP are in the current \fBPATH\fP: +.sp +.if n .RS 4 +.nf +.fam C +cd new_root +pivot_root . put_old +exec chroot . command +.fam +.fi +.if n .RE +.sp +Note that \fBchroot\fP must be available under the old root and under the new root, because \fBpivot_root\fP may or may not have implicitly changed the root directory of the shell. +.sp +Note that \fBexec chroot\fP changes the running executable, which is necessary if the old root directory should be unmounted afterwards. Also note that standard input, output, and error may still point to a device on the old root file system, keeping it busy. They can easily be changed when invoking \fBchroot\fP (see below; note the absence of leading slashes to make it work whether \fBpivot_root\fP has changed the shell\(cqs root or not). +.SH "OPTIONS" +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "EXAMPLE" +.sp +Change the root file system to \fI/dev/hda1\fP from an interactive shell: +.sp +.if n .RS 4 +.nf +.fam C +mount /dev/hda1 /new\-root +cd /new\-root +pivot_root . old\-root +exec chroot . sh <dev/console >dev/console 2>&1 +umount /old\-root +.fam +.fi +.if n .RE +.sp +Mount the new root file system over NFS from 10.0.0.1:/my_root and run \fBinit\fP: +.sp +.if n .RS 4 +.nf +.fam C +ifconfig lo 127.0.0.1 up # for portmap +# configure Ethernet or such +portmap # for lockd (implicitly started by mount) +mount \-o ro 10.0.0.1:/my_root /mnt +killall portmap # portmap keeps old root busy +cd /mnt +pivot_root . old_root +exec chroot . sh \-c \(aqumount /old_root; exec /sbin/init\(aq \(rs + <dev/console >dev/console 2>&1 +.fam +.fi +.if n .RE +.SH "SEE ALSO" +.sp +\fBchroot\fP(1), +\fBpivot_root\fP(2), +\fBmount\fP(8), +\fBswitch_root\fP(8), +\fBumount\fP(8) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBpivot_root\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/pivot_root.8.adoc b/sys-utils/pivot_root.8.adoc new file mode 100644 index 0000000..9eba290 --- /dev/null +++ b/sys-utils/pivot_root.8.adoc @@ -0,0 +1,77 @@ +//po4a: entry man manual += pivot_root(8) +:doctype: manpage +:man manual: System Administration +:man source: util-linux {release-version} +:page-layout: base +:command: pivot_root + +== NAME + +pivot_root - change the root filesystem + +== SYNOPSIS + +*pivot_root* _new_root_ _put_old_ + +== DESCRIPTION + +*pivot_root* moves the root file system of the current process to the directory _put_old_ and makes _new_root_ the new root file system. Since *pivot_root*(8) simply calls *pivot_root*(2), we refer to the man page of the latter for further details. + +Note that, depending on the implementation of *pivot_root*, root and current working directory of the caller may or may not change. The following is a sequence for invoking *pivot_root* that works in either case, assuming that *pivot_root* and *chroot* are in the current *PATH*: + +.... +cd new_root +pivot_root . put_old +exec chroot . command +.... + +Note that *chroot* must be available under the old root and under the new root, because *pivot_root* may or may not have implicitly changed the root directory of the shell. + +Note that *exec chroot* changes the running executable, which is necessary if the old root directory should be unmounted afterwards. Also note that standard input, output, and error may still point to a device on the old root file system, keeping it busy. They can easily be changed when invoking *chroot* (see below; note the absence of leading slashes to make it work whether *pivot_root* has changed the shell's root or not). + +== OPTIONS + +include::man-common/help-version.adoc[] + +== EXAMPLE + +Change the root file system to _/dev/hda1_ from an interactive shell: + +.... +mount /dev/hda1 /new-root +cd /new-root +pivot_root . old-root +exec chroot . sh <dev/console >dev/console 2>&1 +umount /old-root +.... + +Mount the new root file system over NFS from 10.0.0.1:/my_root and run *init*: + +.... +ifconfig lo 127.0.0.1 up # for portmap +# configure Ethernet or such +portmap # for lockd (implicitly started by mount) +mount -o ro 10.0.0.1:/my_root /mnt +killall portmap # portmap keeps old root busy +cd /mnt +pivot_root . old_root +exec chroot . sh -c 'umount /old_root; exec /sbin/init' \ + <dev/console >dev/console 2>&1 +.... + +== SEE ALSO + +*chroot*(1), +*pivot_root*(2), +*mount*(8), +*switch_root*(8), +*umount*(8) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/pivot_root.c b/sys-utils/pivot_root.c new file mode 100644 index 0000000..aef1b12 --- /dev/null +++ b/sys-utils/pivot_root.c @@ -0,0 +1,79 @@ +/* + * pivot_root.c - Change the root file system + * + * Copyright (C) 2000 Werner Almesberger + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This file is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include <err.h> +#include <errno.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include "c.h" +#include "nls.h" +#include "closestream.h" + +#define pivot_root(new_root,put_old) syscall(SYS_pivot_root,new_root,put_old) + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + fputs(USAGE_HEADER, out); + fprintf(out, _(" %s [options] new_root put_old\n"), + program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Change the root filesystem.\n"), out); + + fputs(USAGE_OPTIONS, out); + printf(USAGE_HELP_OPTIONS(16)); + printf(USAGE_MAN_TAIL("pivot_root(8)")); + exit(EXIT_SUCCESS); +} + +int main(int argc, char **argv) +{ + int ch; + static const struct option longopts[] = { + {"version", no_argument, NULL, 'V'}, + {"help", no_argument, NULL, 'h'}, + {NULL, 0, NULL, 0} + }; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + while ((ch = getopt_long(argc, argv, "Vh", longopts, NULL)) != -1) + switch (ch) { + case 'V': + print_version(EXIT_SUCCESS); + case 'h': + usage(); + default: + errtryhelp(EXIT_FAILURE); + } + + if (argc != 3) { + warnx(_("bad usage")); + errtryhelp(EXIT_FAILURE); + } + if (pivot_root(argv[1], argv[2]) < 0) + err(EXIT_FAILURE, _("failed to change root from `%s' to `%s'"), + argv[1], argv[2]); + + return EXIT_SUCCESS; +} diff --git a/sys-utils/prlimit.1 b/sys-utils/prlimit.1 new file mode 100644 index 0000000..3a8d347 --- /dev/null +++ b/sys-utils/prlimit.1 @@ -0,0 +1,251 @@ +'\" t +.\" Title: prlimit +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: User Commands +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "PRLIMIT" "1" "2022-05-11" "util\-linux 2.38.1" "User Commands" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +prlimit \- get and set process resource limits +.SH "SYNOPSIS" +.sp +\fBprlimit\fP [options] [\fB\-\-resource\fP[=\fIlimits\fP]] [\fB\-\-pid\fP \fIPID\fP] +.sp +\fBprlimit\fP [options] [\fB\-\-resource\fP[=\fIlimits\fP]] \fIcommand\fP [\fIargument\fP...] +.SH "DESCRIPTION" +.sp +Given a process ID and one or more resources, \fBprlimit\fP tries to retrieve and/or modify the limits. +.sp +When \fIcommand\fP is given, \fBprlimit\fP will run this command with the given arguments. +.sp +The \fIlimits\fP parameter is composed of a soft and a hard value, separated by a colon (:), in order to modify the existing values. If no \fIlimits\fP are given, \fBprlimit\fP will display the current values. If one of the values is not given, then the existing one will be used. To specify the unlimited or infinity limit (\fBRLIM_INFINITY\fP), the \-1 or \(aqunlimited\(aq string can be passed. +.sp +Because of the nature of limits, the soft limit must be lower or equal to the high limit (also called the ceiling). To see all available resource limits, refer to the \fBRESOURCE OPTIONS\fP section. +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +\fIsoft\fP:_hard_ Specify both limits. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +\fIsoft\fP: Specify only the soft limit. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +:\fIhard\fP Specify only the hard limit. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +\fIvalue\fP Specify both limits to the same value. +.RE +.SH "GENERAL OPTIONS" +.sp +\fB\-\-noheadings\fP +.RS 4 +Do not print a header line. +.RE +.sp +\fB\-o, \-\-output\fP \fIlist\fP +.RS 4 +Define the output columns to use. If no output arrangement is specified, then a default set is used. Use \fB\-\-help\fP to get a list of all supported columns. +.RE +.sp +\fB\-p, \-\-pid\fP +.RS 4 +Specify the process id; if none is given, the running process will be used. +.RE +.sp +\fB\-\-raw\fP +.RS 4 +Use the raw output format. +.RE +.sp +\fB\-\-verbose\fP +.RS 4 +Verbose mode. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "RESOURCE OPTIONS" +.sp +\fB\-c, \-\-core\fP[=\fIlimits\fP] +.RS 4 +Maximum size of a core file. +.RE +.sp +\fB\-d, \-\-data\fP[=\fIlimits\fP] +.RS 4 +Maximum data size. +.RE +.sp +\fB\-e, \-\-nice\fP[=\fIlimits\fP] +.RS 4 +Maximum nice priority allowed to raise. +.RE +.sp +\fB\-f, \-\-fsize\fP[=\fIlimits\fP] +.RS 4 +Maximum file size. +.RE +.sp +\fB\-i, \-\-sigpending\fP[=\fIlimits\fP] +.RS 4 +Maximum number of pending signals. +.RE +.sp +\fB\-l, \-\-memlock\fP[=\fIlimits\fP] +.RS 4 +Maximum locked\-in\-memory address space. +.RE +.sp +\fB\-m, \-\-rss\fP[=\fIlimits\fP] +.RS 4 +Maximum Resident Set Size (RSS). +.RE +.sp +\fB\-n, \-\-nofile\fP[=\fIlimits\fP] +.RS 4 +Maximum number of open files. +.RE +.sp +\fB\-q, \-\-msgqueue\fP[=\fIlimits\fP] +.RS 4 +Maximum number of bytes in POSIX message queues. +.RE +.sp +\fB\-r, \-\-rtprio\fP[=\fIlimits\fP] +.RS 4 +Maximum real\-time priority. +.RE +.sp +\fB\-s, \-\-stack\fP[=\fIlimits\fP] +.RS 4 +Maximum size of the stack. +.RE +.sp +\fB\-t, \-\-cpu\fP[=\fIlimits\fP] +.RS 4 +CPU time, in seconds. +.RE +.sp +\fB\-u, \-\-nproc\fP[=\fIlimits\fP] +.RS 4 +Maximum number of processes. +.RE +.sp +\fB\-v, \-\-as\fP[=\fIlimits\fP] +.RS 4 +Address space limit. +.RE +.sp +\fB\-x, \-\-locks\fP[=\fIlimits\fP] +.RS 4 +Maximum number of file locks held. +.RE +.sp +\fB\-y, \-\-rttime\fP[=\fIlimits\fP] +.RS 4 +Timeout for real\-time tasks. +.RE +.SH "NOTES" +.sp +The \fBprlimit\fP(2) system call is supported since Linux 2.6.36, older kernels will break this program. +.SH "EXAMPLES" +.sp +\fBprlimit \-\-pid 13134\fP +.RS 4 +Display limit values for all current resources. +.RE +.sp +\fBprlimit \-\-pid 13134 \-\-rss \-\-nofile=1024:4095\fP +.RS 4 +Display the limits of the RSS, and set the soft and hard limits for the number of open files to 1024 and 4095, respectively. +.RE +.sp +\fBprlimit \-\-pid 13134 \-\-nproc=512:\fP +.RS 4 +Modify only the soft limit for the number of processes. +.RE +.sp +\fBprlimit \-\-pid $$ \-\-nproc=unlimited\fP +.RS 4 +Set for the current process both the soft and ceiling values for the number of processes to unlimited. +.RE +.sp +\fBprlimit \-\-cpu=10 sort \-u hugefile\fP +.RS 4 +Set both the soft and hard CPU time limit to ten seconds and run \fBsort\fP(1). +.RE +.SH "AUTHORS" +.sp +.MTO "dave\(atgnu.org" "Davidlohr Bueso" "" +\- In memory of Dennis M. Ritchie. +.SH "SEE ALSO" +.sp +\fBulimit\fP(1p), +\fBprlimit\fP(2) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBprlimit\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/prlimit.1.adoc b/sys-utils/prlimit.1.adoc new file mode 100644 index 0000000..bc8c9a1 --- /dev/null +++ b/sys-utils/prlimit.1.adoc @@ -0,0 +1,146 @@ +//po4a: entry man manual +//// +prlimit.1 -- +Copyright 2011 Davidlohr Bueso <dave@gnu.org> +May be distributed under the GNU General Public License +//// += prlimit(1) +:doctype: manpage +:man manual: User Commands +:man source: util-linux {release-version} +:page-layout: base +:command: prlimit +:colon: : + +== NAME + +prlimit - get and set process resource limits + +== SYNOPSIS + +*prlimit* [options] [*--resource*[=_limits_]] [*--pid* _PID_] + +*prlimit* [options] [*--resource*[=_limits_]] _command_ [_argument_...] + +== DESCRIPTION + +Given a process ID and one or more resources, *prlimit* tries to retrieve and/or modify the limits. + +When _command_ is given, *prlimit* will run this command with the given arguments. + +The _limits_ parameter is composed of a soft and a hard value, separated by a colon (:), in order to modify the existing values. If no _limits_ are given, *prlimit* will display the current values. If one of the values is not given, then the existing one will be used. To specify the unlimited or infinity limit (*RLIM_INFINITY*), the -1 or 'unlimited' string can be passed. + +Because of the nature of limits, the soft limit must be lower or equal to the high limit (also called the ceiling). To see all available resource limits, refer to the *RESOURCE OPTIONS* section. + +//TRANSLATORS: Keep {colon} untranslated. +* _soft_{colon}_hard_ Specify both limits. +* _soft_{colon} Specify only the soft limit. +* {colon}__hard__ Specify only the hard limit. +* _value_ Specify both limits to the same value. + +== GENERAL OPTIONS + +*--noheadings*:: +Do not print a header line. + +*-o, --output* _list_:: +Define the output columns to use. If no output arrangement is specified, then a default set is used. Use *--help* to get a list of all supported columns. + +*-p, --pid*:: +Specify the process id; if none is given, the running process will be used. + +*--raw*:: +Use the raw output format. + +*--verbose*:: +Verbose mode. + +include::man-common/help-version.adoc[] + +== RESOURCE OPTIONS + +*-c, --core*[=_limits_]:: +Maximum size of a core file. + +*-d, --data*[=_limits_]:: +Maximum data size. + +*-e, --nice*[=_limits_]:: +Maximum nice priority allowed to raise. + +*-f, --fsize*[=_limits_]:: +Maximum file size. + +*-i, --sigpending*[=_limits_]:: +Maximum number of pending signals. + +*-l, --memlock*[=_limits_]:: +Maximum locked-in-memory address space. + +*-m, --rss*[=_limits_]:: +Maximum Resident Set Size (RSS). + +*-n, --nofile*[=_limits_]:: +Maximum number of open files. + +*-q, --msgqueue*[=_limits_]:: +Maximum number of bytes in POSIX message queues. + +*-r, --rtprio*[=_limits_]:: +Maximum real-time priority. + +*-s, --stack*[=_limits_]:: +Maximum size of the stack. + +*-t, --cpu*[=_limits_]:: +CPU time, in seconds. + +*-u, --nproc*[=_limits_]:: +Maximum number of processes. + +*-v, --as*[=_limits_]:: +Address space limit. + +*-x, --locks*[=_limits_]:: +Maximum number of file locks held. + +*-y, --rttime*[=_limits_]:: +Timeout for real-time tasks. + +== NOTES + +The *prlimit*(2) system call is supported since Linux 2.6.36, older kernels will break this program. + +== EXAMPLES + +*prlimit --pid 13134*:: +Display limit values for all current resources. + +*prlimit --pid 13134 --rss --nofile=1024:4095*:: +Display the limits of the RSS, and set the soft and hard limits for the number of open files to 1024 and 4095, respectively. + +*prlimit --pid 13134 --nproc=512:*:: +Modify only the soft limit for the number of processes. + +*prlimit --pid $$ --nproc=unlimited*:: +Set for the current process both the soft and ceiling values for the number of processes to unlimited. + +*prlimit --cpu=10 sort -u hugefile*:: +Set both the soft and hard CPU time limit to ten seconds and run *sort*(1). + +== AUTHORS + +mailto:dave@gnu.org[Davidlohr Bueso] - In memory of Dennis M. Ritchie. + +== SEE ALSO + +*ulimit*(1p), +*prlimit*(2) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/prlimit.c b/sys-utils/prlimit.c new file mode 100644 index 0000000..ca69ccf --- /dev/null +++ b/sys-utils/prlimit.c @@ -0,0 +1,664 @@ +/* + * prlimit - get/set process resource limits. + * + * Copyright (C) 2011 Davidlohr Bueso <dave@gnu.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <errno.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <ctype.h> +#include <assert.h> +#include <unistd.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/resource.h> + +#include <libsmartcols.h> + +#include "c.h" +#include "nls.h" +#include "xalloc.h" +#include "strutils.h" +#include "list.h" +#include "closestream.h" + +#ifndef RLIMIT_RTTIME +# define RLIMIT_RTTIME 15 +#endif + +enum { + AS, + CORE, + CPU, + DATA, + FSIZE, + LOCKS, + MEMLOCK, + MSGQUEUE, + NICE, + NOFILE, + NPROC, + RSS, + RTPRIO, + RTTIME, + SIGPENDING, + STACK +}; + +/* basic output flags */ +static int no_headings; +static int raw; + +struct prlimit_desc { + const char *name; + const char *help; + const char *unit; + int resource; +}; + +static struct prlimit_desc prlimit_desc[] = +{ + [AS] = { "AS", N_("address space limit"), N_("bytes"), RLIMIT_AS }, + [CORE] = { "CORE", N_("max core file size"), N_("bytes"), RLIMIT_CORE }, + [CPU] = { "CPU", N_("CPU time"), N_("seconds"), RLIMIT_CPU }, + [DATA] = { "DATA", N_("max data size"), N_("bytes"), RLIMIT_DATA }, + [FSIZE] = { "FSIZE", N_("max file size"), N_("bytes"), RLIMIT_FSIZE }, + [LOCKS] = { "LOCKS", N_("max number of file locks held"), N_("locks"), RLIMIT_LOCKS }, + [MEMLOCK] = { "MEMLOCK", N_("max locked-in-memory address space"), N_("bytes"), RLIMIT_MEMLOCK }, + [MSGQUEUE] = { "MSGQUEUE", N_("max bytes in POSIX mqueues"), N_("bytes"), RLIMIT_MSGQUEUE }, + [NICE] = { "NICE", N_("max nice prio allowed to raise"), NULL, RLIMIT_NICE }, + [NOFILE] = { "NOFILE", N_("max number of open files"), N_("files"), RLIMIT_NOFILE }, + [NPROC] = { "NPROC", N_("max number of processes"), N_("processes"), RLIMIT_NPROC }, + [RSS] = { "RSS", N_("max resident set size"), N_("bytes"), RLIMIT_RSS }, + [RTPRIO] = { "RTPRIO", N_("max real-time priority"), NULL, RLIMIT_RTPRIO }, + [RTTIME] = { "RTTIME", N_("timeout for real-time tasks"), N_("microsecs"), RLIMIT_RTTIME }, + [SIGPENDING] = { "SIGPENDING", N_("max number of pending signals"), N_("signals"), RLIMIT_SIGPENDING }, + [STACK] = { "STACK", N_("max stack size"), N_("bytes"), RLIMIT_STACK } +}; + +#define MAX_RESOURCES ARRAY_SIZE(prlimit_desc) + +struct prlimit { + struct list_head lims; + + struct rlimit rlim; + struct prlimit_desc *desc; + int modify; /* PRLIMIT_{SOFT,HARD} mask */ +}; + +#define PRLIMIT_EMPTY_LIMIT {{ 0, 0, }, NULL, 0 } + +enum { + COL_HELP, + COL_RES, + COL_SOFT, + COL_HARD, + COL_UNITS, +}; + +/* column names */ +struct colinfo { + const char *name; /* header */ + double whint; /* width hint (N < 1 is in percent of termwidth) */ + int flags; /* SCOLS_FL_* */ + const char *help; +}; + +/* columns descriptions */ +static struct colinfo infos[] = { + [COL_RES] = { "RESOURCE", 0.25, SCOLS_FL_TRUNC, N_("resource name") }, + [COL_HELP] = { "DESCRIPTION", 0.1, SCOLS_FL_TRUNC, N_("resource description")}, + [COL_SOFT] = { "SOFT", 0.1, SCOLS_FL_RIGHT, N_("soft limit")}, + [COL_HARD] = { "HARD", 1, SCOLS_FL_RIGHT, N_("hard limit (ceiling)")}, + [COL_UNITS] = { "UNITS", 0.1, SCOLS_FL_TRUNC, N_("units")}, +}; + +static int columns[ARRAY_SIZE(infos) * 2]; +static int ncolumns; + + + +#define INFINITY_STR "unlimited" +#define INFINITY_STRLEN (sizeof(INFINITY_STR) - 1) + +#define PRLIMIT_SOFT (1 << 1) +#define PRLIMIT_HARD (1 << 2) + +static pid_t pid; /* calling process (default) */ +static int verbose; + +#ifdef HAVE_SYS_SYSCALL_H +# include <sys/syscall.h> +# if defined(SYS_prlimit64) +# ifndef HAVE_PRLIMIT +static int prlimit(pid_t p, int resource, + const struct rlimit *new_limit, + struct rlimit *old_limit) +{ + return syscall(SYS_prlimit64, p, resource, new_limit, old_limit); +} +# endif /* !HAVE_PRLIMIT */ +# endif /* SYS_prlimit64 */ +#endif /* HAVE_SYS_SYSCALL_H */ + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + size_t i; + + fputs(USAGE_HEADER, out); + + fprintf(out, + _(" %s [options] [--<resource>=<limit>] [-p PID]\n"), program_invocation_short_name); + fprintf(out, + _(" %s [options] [--<resource>=<limit>] COMMAND\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Show or change the resource limits of a process.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -p, --pid <pid> process id\n" + " -o, --output <list> define which output columns to use\n" + " --noheadings don't print headings\n" + " --raw use the raw output format\n" + " --verbose verbose output\n" + ), out); + printf(USAGE_HELP_OPTIONS(24)); + + fputs(_("\nResources:\n"), out); + fputs(_(" -c, --core maximum size of core files created\n" + " -d, --data maximum size of a process's data segment\n" + " -e, --nice maximum nice priority allowed to raise\n" + " -f, --fsize maximum size of files written by the process\n" + " -i, --sigpending maximum number of pending signals\n" + " -l, --memlock maximum size a process may lock into memory\n" + " -m, --rss maximum resident set size\n" + " -n, --nofile maximum number of open files\n" + " -q, --msgqueue maximum bytes in POSIX message queues\n" + " -r, --rtprio maximum real-time scheduling priority\n" + " -s, --stack maximum stack size\n" + " -t, --cpu maximum amount of CPU time in seconds\n" + " -u, --nproc maximum number of user processes\n" + " -v, --as size of virtual memory\n" + " -x, --locks maximum number of file locks\n" + " -y, --rttime CPU time in microseconds a process scheduled\n" + " under real-time scheduling\n"), out); + + fputs(USAGE_ARGUMENTS, out); + fputs(_( + " <limit> is defined as a range soft:hard, soft:, :hard or a value to\n" + " define both limits (e.g. -e=0:10 -r=:10).\n"), out); + + fputs(USAGE_COLUMNS, out); + for (i = 0; i < ARRAY_SIZE(infos); i++) + fprintf(out, " %11s %s\n", infos[i].name, _(infos[i].help)); + + printf(USAGE_MAN_TAIL("prlimit(1)")); + + exit(EXIT_SUCCESS); +} + +static inline int get_column_id(int num) +{ + assert(num < ncolumns); + assert(columns[num] < (int) ARRAY_SIZE(infos)); + + return columns[num]; +} + +static inline struct colinfo *get_column_info(unsigned num) +{ + return &infos[ get_column_id(num) ]; +} + +static void add_scols_line(struct libscols_table *table, struct prlimit *l) +{ + int i; + struct libscols_line *line; + + assert(table); + assert(l); + + line = scols_table_new_line(table, NULL); + if (!line) + err(EXIT_FAILURE, _("failed to allocate output line")); + + for (i = 0; i < ncolumns; i++) { + char *str = NULL; + + switch (get_column_id(i)) { + case COL_RES: + if (l->desc->name) + str = xstrdup(l->desc->name); + break; + case COL_HELP: + if (l->desc->help) + str = xstrdup(_(l->desc->help)); + break; + case COL_SOFT: + if (l->rlim.rlim_cur == RLIM_INFINITY) + str = xstrdup(_("unlimited")); + else + xasprintf(&str, "%llu", (unsigned long long) l->rlim.rlim_cur); + break; + case COL_HARD: + if (l->rlim.rlim_max == RLIM_INFINITY) + str = xstrdup(_("unlimited")); + else + xasprintf(&str, "%llu", (unsigned long long) l->rlim.rlim_max); + break; + case COL_UNITS: + if (l->desc->unit) + str = xstrdup(_(l->desc->unit)); + break; + default: + break; + } + + if (str && scols_line_refer_data(line, i, str)) + err(EXIT_FAILURE, _("failed to add output data")); + } +} + +static int column_name_to_id(const char *name, size_t namesz) +{ + size_t i; + + assert(name); + + for (i = 0; i < ARRAY_SIZE(infos); i++) { + const char *cn = infos[i].name; + + if (!strncasecmp(name, cn, namesz) && !*(cn + namesz)) + return i; + } + warnx(_("unknown column: %s"), name); + return -1; +} + +static void rem_prlim(struct prlimit *lim) +{ + if (!lim) + return; + list_del(&lim->lims); + free(lim); +} + +static int show_limits(struct list_head *lims) +{ + int i; + struct list_head *p, *pnext; + struct libscols_table *table; + + table = scols_new_table(); + if (!table) + err(EXIT_FAILURE, _("failed to allocate output table")); + + scols_table_enable_raw(table, raw); + scols_table_enable_noheadings(table, no_headings); + + for (i = 0; i < ncolumns; i++) { + struct colinfo *col = get_column_info(i); + + if (!scols_table_new_column(table, col->name, col->whint, col->flags)) + err(EXIT_FAILURE, _("failed to allocate output column")); + } + + list_for_each_safe(p, pnext, lims) { + struct prlimit *lim = list_entry(p, struct prlimit, lims); + + add_scols_line(table, lim); + rem_prlim(lim); + } + + scols_print_table(table); + scols_unref_table(table); + return 0; +} + +/* + * If one of the limits is unknown (default value for not being passed), we + * need to get the current limit and use it. I see no other way other than + * using prlimit(2). + */ +static void get_unknown_hardsoft(struct prlimit *lim) +{ + struct rlimit old; + + if (prlimit(pid, lim->desc->resource, NULL, &old) == -1) + err(EXIT_FAILURE, _("failed to get old %s limit"), + lim->desc->name); + + if (!(lim->modify & PRLIMIT_SOFT)) + lim->rlim.rlim_cur = old.rlim_cur; + else if (!(lim->modify & PRLIMIT_HARD)) + lim->rlim.rlim_max = old.rlim_max; +} + +static void do_prlimit(struct list_head *lims) +{ + struct list_head *p, *pnext; + + list_for_each_safe(p, pnext, lims) { + struct rlimit *new = NULL, *old = NULL; + struct prlimit *lim = list_entry(p, struct prlimit, lims); + + if (lim->modify) { + if (lim->modify != (PRLIMIT_HARD | PRLIMIT_SOFT)) + get_unknown_hardsoft(lim); + + if ((lim->rlim.rlim_cur > lim->rlim.rlim_max) && + (lim->rlim.rlim_cur != RLIM_INFINITY || + lim->rlim.rlim_max != RLIM_INFINITY)) + errx(EXIT_FAILURE, _("the soft limit %s cannot exceed the hard limit"), + lim->desc->name); + new = &lim->rlim; + } else + old = &lim->rlim; + + if (verbose && new) { + printf(_("New %s limit for pid %d: "), lim->desc->name, + pid ? pid : getpid()); + if (new->rlim_cur == RLIM_INFINITY) + printf("<%s", _("unlimited")); + else + printf("<%ju", (uintmax_t)new->rlim_cur); + + if (new->rlim_max == RLIM_INFINITY) + printf(":%s>\n", _("unlimited")); + else + printf(":%ju>\n", (uintmax_t)new->rlim_max); + } + + if (prlimit(pid, lim->desc->resource, new, old) == -1) + err(EXIT_FAILURE, lim->modify ? + _("failed to set the %s resource limit") : + _("failed to get the %s resource limit"), + lim->desc->name); + + if (lim->modify) + rem_prlim(lim); /* modify only; don't show */ + } +} + +static int get_range(char *str, rlim_t *soft, rlim_t *hard, int *found) +{ + char *end = NULL; + + if (!str) + return 0; + + *found = errno = 0; + *soft = *hard = RLIM_INFINITY; + + if (!strcmp(str, INFINITY_STR)) { /* <unlimited> */ + *found |= PRLIMIT_SOFT | PRLIMIT_HARD; + return 0; + + } + + if (*str == ':') { /* <:hard> */ + str++; + + if (strcmp(str, INFINITY_STR) != 0) { + *hard = strtoull(str, &end, 10); + + if (errno || !end || *end || end == str) + return -1; + } + *found |= PRLIMIT_HARD; + return 0; + + } + + if (strncmp(str, INFINITY_STR, INFINITY_STRLEN) == 0) { + /* <unlimited> or <unlimited:> */ + end = str + INFINITY_STRLEN; + } else { + /* <value> or <soft:> */ + *hard = *soft = strtoull(str, &end, 10); + if (errno || !end || end == str) + return -1; + } + + if (*end == ':' && !*(end + 1)) /* <soft:> */ + *found |= PRLIMIT_SOFT; + + else if (*end == ':') { /* <soft:hard> */ + str = end + 1; + + if (!strcmp(str, INFINITY_STR)) + *hard = RLIM_INFINITY; + else { + end = NULL; + errno = 0; + *hard = strtoull(str, &end, 10); + + if (errno || !end || *end || end == str) + return -1; + } + *found |= PRLIMIT_SOFT | PRLIMIT_HARD; + + } else /* <value> */ + *found |= PRLIMIT_SOFT | PRLIMIT_HARD; + + return 0; +} + + +static int parse_prlim(struct rlimit *lim, char *ops, size_t id) +{ + rlim_t soft = 0, hard = 0; + int found = 0; + + if (ops && *ops == '=') + ops++; + + if (get_range(ops, &soft, &hard, &found)) + errx(EXIT_FAILURE, _("failed to parse %s limit"), + prlimit_desc[id].name); + + lim->rlim_cur = soft; + lim->rlim_max = hard; + + return found; +} + +static int add_prlim(char *ops, struct list_head *lims, size_t id) +{ + struct prlimit *lim = xcalloc(1, sizeof(*lim)); + + INIT_LIST_HEAD(&lim->lims); + lim->desc = &prlimit_desc[id]; + + if (ops) + lim->modify = parse_prlim(&lim->rlim, ops, id); + + list_add_tail(&lim->lims, lims); + return 0; +} + +int main(int argc, char **argv) +{ + int opt; + struct list_head lims; + + enum { + VERBOSE_OPTION = CHAR_MAX + 1, + RAW_OPTION, + NOHEADINGS_OPTION + }; + + static const struct option longopts[] = { + { "pid", required_argument, NULL, 'p' }, + { "output", required_argument, NULL, 'o' }, + { "as", optional_argument, NULL, 'v' }, + { "core", optional_argument, NULL, 'c' }, + { "cpu", optional_argument, NULL, 't' }, + { "data", optional_argument, NULL, 'd' }, + { "fsize", optional_argument, NULL, 'f' }, + { "locks", optional_argument, NULL, 'x' }, + { "memlock", optional_argument, NULL, 'l' }, + { "msgqueue", optional_argument, NULL, 'q' }, + { "nice", optional_argument, NULL, 'e' }, + { "nofile", optional_argument, NULL, 'n' }, + { "nproc", optional_argument, NULL, 'u' }, + { "rss", optional_argument, NULL, 'm' }, + { "rtprio", optional_argument, NULL, 'r' }, + { "rttime", optional_argument, NULL, 'y' }, + { "sigpending", optional_argument, NULL, 'i' }, + { "stack", optional_argument, NULL, 's' }, + { "version", no_argument, NULL, 'V' }, + { "help", no_argument, NULL, 'h' }, + { "noheadings", no_argument, NULL, NOHEADINGS_OPTION }, + { "raw", no_argument, NULL, RAW_OPTION }, + { "verbose", no_argument, NULL, VERBOSE_OPTION }, + { NULL, 0, NULL, 0 } + }; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + INIT_LIST_HEAD(&lims); + + /* + * Something is very wrong if this doesn't succeed, + * assuming STACK is the last resource, of course. + */ + assert(MAX_RESOURCES == STACK + 1); + + while((opt = getopt_long(argc, argv, + "+c::d::e::f::i::l::m::n::q::r::s::t::u::v::x::y::p:o:vVh", + longopts, NULL)) != -1) { + switch(opt) { + case 'c': + add_prlim(optarg, &lims, CORE); + break; + case 'd': + add_prlim(optarg, &lims, DATA); + break; + case 'e': + add_prlim(optarg, &lims, NICE); + break; + case 'f': + add_prlim(optarg, &lims, FSIZE); + break; + case 'i': + add_prlim(optarg, &lims, SIGPENDING); + break; + case 'l': + add_prlim(optarg, &lims, MEMLOCK); + break; + case 'm': + add_prlim(optarg, &lims, RSS); + break; + case 'n': + add_prlim(optarg, &lims, NOFILE); + break; + case 'q': + add_prlim(optarg, &lims, MSGQUEUE); + break; + case 'r': + add_prlim(optarg, &lims, RTPRIO); + break; + case 's': + add_prlim(optarg, &lims, STACK); + break; + case 't': + add_prlim(optarg, &lims, CPU); + break; + case 'u': + add_prlim(optarg, &lims, NPROC); + break; + case 'v': + add_prlim(optarg, &lims, AS); + break; + case 'x': + add_prlim(optarg, &lims, LOCKS); + break; + case 'y': + add_prlim(optarg, &lims, RTTIME); + break; + + case 'p': + if (pid) + errx(EXIT_FAILURE, _("option --pid may be specified only once")); + pid = strtos32_or_err(optarg, _("invalid PID argument")); + break; + case 'o': + ncolumns = string_to_idarray(optarg, + columns, ARRAY_SIZE(columns), + column_name_to_id); + if (ncolumns < 0) + return EXIT_FAILURE; + break; + case NOHEADINGS_OPTION: + no_headings = 1; + break; + case VERBOSE_OPTION: + verbose++; + break; + case RAW_OPTION: + raw = 1; + break; + + case 'h': + usage(); + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + } + if (argc > optind && pid) + errx(EXIT_FAILURE, _("options --pid and COMMAND are mutually exclusive")); + if (!ncolumns) { + /* default columns */ + columns[ncolumns++] = COL_RES; + columns[ncolumns++] = COL_HELP; + columns[ncolumns++] = COL_SOFT; + columns[ncolumns++] = COL_HARD; + columns[ncolumns++] = COL_UNITS; + } + + scols_init_debug(0); + + if (list_empty(&lims)) { + /* default is to print all resources */ + size_t n; + + for (n = 0; n < MAX_RESOURCES; n++) + add_prlim(NULL, &lims, n); + } + + do_prlimit(&lims); + + if (!list_empty(&lims)) + show_limits(&lims); + + if (argc > optind) { + /* prlimit [options] COMMAND */ + execvp(argv[optind], &argv[optind]); + errexec(argv[optind]); + } + + return EXIT_SUCCESS; +} diff --git a/sys-utils/readprofile.8 b/sys-utils/readprofile.8 new file mode 100644 index 0000000..ed1e66f --- /dev/null +++ b/sys-utils/readprofile.8 @@ -0,0 +1,190 @@ +'\" t +.\" Title: readprofile +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: System Administration +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "READPROFILE" "8" "2022-05-11" "util\-linux 2.38.1" "System Administration" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +readprofile \- read kernel profiling information +.SH "SYNOPSIS" +.sp +\fBreadprofile\fP [options] +.SH "VERSION" +.sp +This manpage documents version 2.0 of the program. +.SH "DESCRIPTION" +.sp +The \fBreadprofile\fP command uses the \fI/proc/profile\fP information to print ascii data on standard output. The output is organized in three columns: the first is the number of clock ticks, the second is the name of the C function in the kernel where those many ticks occurred, and the third is the normalized `load\(aq of the procedure, calculated as a ratio between the number of ticks and the length of the procedure. The output is filled with blanks to ease readability. +.SH "OPTIONS" +.sp +\fB\-a\fP, \fB\-\-all\fP +.RS 4 +Print all symbols in the mapfile. By default the procedures with reported ticks are not printed. +.RE +.sp +\fB\-b\fP, \fB\-\-histbin\fP +.RS 4 +Print individual histogram\-bin counts. +.RE +.sp +\fB\-i\fP, \fB\-\-info\fP +.RS 4 +Info. This makes \fBreadprofile\fP only print the profiling step used by the kernel. The profiling step is the resolution of the profiling buffer, and is chosen during kernel configuration (through \fBmake config\fP), or in the kernel\(cqs command line. If the \fB\-t\fP (terse) switch is used together with \fB\-i\fP only the decimal number is printed. +.RE +.sp +\fB\-m\fP, \fB\-\-mapfile\fP \fImapfile\fP +.RS 4 +Specify a mapfile, which by default is \fI/usr/src/linux/System.map\fP. You should specify the map file on cmdline if your current kernel isn\(cqt the last one you compiled, or if you keep System.map elsewhere. If the name of the map file ends with \fI.gz\fP it is decompressed on the fly. +.RE +.sp +\fB\-M\fP, \fB\-\-multiplier\fP \fImultiplier\fP +.RS 4 +On some architectures it is possible to alter the frequency at which the kernel delivers profiling interrupts to each CPU. This option allows you to set the frequency, as a multiplier of the system clock frequency, HZ. Linux 2.6.16 dropped multiplier support for most systems. This option also resets the profiling buffer, and requires superuser privileges. +.RE +.sp +\fB\-p\fP, \fB\-\-profile\fP \fIpro\-file\fP +.RS 4 +Specify a different profiling buffer, which by default is \fI/proc/profile\fP. Using a different pro\-file is useful if you want to `freeze\(aq the kernel profiling at some time and read it later. The \fI/proc/profile\fP file can be copied using \fBcat\fP(1) or \fBcp\fP(1). There is no more support for compressed profile buffers, like in \fBreadprofile\-1.1\fP, because the program needs to know the size of the buffer in advance. +.RE +.sp +\fB\-r\fP, \fB\-\-reset\fP +.RS 4 +Reset the profiling buffer. This can only be invoked by root, because \fI/proc/profile\fP is readable by everybody but writable only by the superuser. However, you can make \fBreadprofile\fP set\-user\-ID 0, in order to reset the buffer without gaining privileges. +.RE +.sp +\fB\-s, \-\-counters\fP +.RS 4 +Print individual counters within functions. +.RE +.sp +\fB\-v\fP, \fB\-\-verbose\fP +.RS 4 +Verbose. The output is organized in four columns and filled with blanks. The first column is the RAM address of a kernel function, the second is the name of the function, the third is the number of clock ticks and the last is the normalized load. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "FILES" +.sp +\fI/proc/profile\fP +.RS 4 +A binary snapshot of the profiling buffer. +.RE +.sp +\fI/usr/src/linux/System.map\fP +.RS 4 +The symbol table for the kernel. +.RE +.sp +\fI/usr/src/linux/*\fP +.RS 4 +The program being profiled :\-) +.RE +.SH "BUGS" +.sp +\fBreadprofile\fP only works with a 1.3.x or newer kernel, because \fI/proc/profile\fP changed in the step from 1.2 to 1.3. +.sp +This program only works with ELF kernels. The change for a.out kernels is trivial, and left as an exercise to the a.out user. +.sp +To enable profiling, the kernel must be rebooted, because no profiling module is available, and it wouldn\(cqt be easy to build. To enable profiling, you can specify \fBprofile\fP=\fI2\fP (or another number) on the kernel commandline. The number you specify is the two\-exponent used as profiling step. +.sp +Profiling is disabled when interrupts are inhibited. This means that many profiling ticks happen when interrupts are re\-enabled. Watch out for misleading information. +.SH "EXAMPLE" +.sp +Browse the profiling buffer ordering by clock ticks: +.sp +.if n .RS 4 +.nf +.fam C + readprofile | sort \-nr | less +.fam +.fi +.if n .RE +.sp +Print the 20 most loaded procedures: +.sp +.if n .RS 4 +.nf +.fam C + readprofile | sort \-nr +2 | head \-20 +.fam +.fi +.if n .RE +.sp +Print only filesystem profile: +.sp +.if n .RS 4 +.nf +.fam C + readprofile | grep _ext2 +.fam +.fi +.if n .RE +.sp +Look at all the kernel information, with ram addresses: +.sp +.if n .RS 4 +.nf +.fam C + readprofile \-av | less +.fam +.fi +.if n .RE +.sp +Browse a \(aqfrozen\(aq profile buffer for a non current kernel: +.sp +.if n .RS 4 +.nf +.fam C + readprofile \-p ~/profile.freeze \-m /zImage.map.gz +.fam +.fi +.if n .RE +.sp +Request profiling at 2kHz per CPU, and reset the profiling buffer: +.sp +.if n .RS 4 +.nf +.fam C + sudo readprofile \-M 20 +.fam +.fi +.if n .RE +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBreadprofile\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/readprofile.8.adoc b/sys-utils/readprofile.8.adoc new file mode 100644 index 0000000..62613c3 --- /dev/null +++ b/sys-utils/readprofile.8.adoc @@ -0,0 +1,121 @@ +//po4a: entry man manual += readprofile(8) +:doctype: manpage +:man manual: System Administration +:man source: util-linux {release-version} +:page-layout: base +:command: readprofile + +== NAME + +readprofile - read kernel profiling information + +== SYNOPSIS + +*readprofile* [options] + +== VERSION + +This manpage documents version 2.0 of the program. + +== DESCRIPTION + +The *readprofile* command uses the _/proc/profile_ information to print ascii data on standard output. The output is organized in three columns: the first is the number of clock ticks, the second is the name of the C function in the kernel where those many ticks occurred, and the third is the normalized `load' of the procedure, calculated as a ratio between the number of ticks and the length of the procedure. The output is filled with blanks to ease readability. + +== OPTIONS + +*-a*, *--all*:: +Print all symbols in the mapfile. By default the procedures with reported ticks are not printed. + +*-b*, *--histbin*:: +Print individual histogram-bin counts. + +*-i*, *--info*:: +Info. This makes *readprofile* only print the profiling step used by the kernel. The profiling step is the resolution of the profiling buffer, and is chosen during kernel configuration (through *make config*), or in the kernel's command line. If the *-t* (terse) switch is used together with *-i* only the decimal number is printed. + +*-m*, *--mapfile* _mapfile_:: +Specify a mapfile, which by default is _/usr/src/linux/System.map_. You should specify the map file on cmdline if your current kernel isn't the last one you compiled, or if you keep System.map elsewhere. If the name of the map file ends with _.gz_ it is decompressed on the fly. + +*-M*, *--multiplier* _multiplier_:: +On some architectures it is possible to alter the frequency at which the kernel delivers profiling interrupts to each CPU. This option allows you to set the frequency, as a multiplier of the system clock frequency, HZ. Linux 2.6.16 dropped multiplier support for most systems. This option also resets the profiling buffer, and requires superuser privileges. + +*-p*, *--profile* _pro-file_:: +Specify a different profiling buffer, which by default is _/proc/profile_. Using a different pro-file is useful if you want to `freeze' the kernel profiling at some time and read it later. The _/proc/profile_ file can be copied using *cat*(1) or *cp*(1). There is no more support for compressed profile buffers, like in *readprofile-1.1*, because the program needs to know the size of the buffer in advance. + +*-r*, *--reset*:: +Reset the profiling buffer. This can only be invoked by root, because _/proc/profile_ is readable by everybody but writable only by the superuser. However, you can make *readprofile* set-user-ID 0, in order to reset the buffer without gaining privileges. + +*-s, --counters*:: +Print individual counters within functions. + +*-v*, *--verbose*:: +Verbose. The output is organized in four columns and filled with blanks. The first column is the RAM address of a kernel function, the second is the name of the function, the third is the number of clock ticks and the last is the normalized load. + +include::man-common/help-version.adoc[] + +== FILES + +_/proc/profile_:: +A binary snapshot of the profiling buffer. + +_/usr/src/linux/System.map_:: +The symbol table for the kernel. + +_/usr/src/linux/*_:: +The program being profiled :-) + +== BUGS + +*readprofile* only works with a 1.3.x or newer kernel, because _/proc/profile_ changed in the step from 1.2 to 1.3. + +This program only works with ELF kernels. The change for a.out kernels is trivial, and left as an exercise to the a.out user. + +To enable profiling, the kernel must be rebooted, because no profiling module is available, and it wouldn't be easy to build. To enable profiling, you can specify *profile*=_2_ (or another number) on the kernel commandline. The number you specify is the two-exponent used as profiling step. + +Profiling is disabled when interrupts are inhibited. This means that many profiling ticks happen when interrupts are re-enabled. Watch out for misleading information. + +== EXAMPLE + +Browse the profiling buffer ordering by clock ticks: + +.... + readprofile | sort -nr | less +.... + +Print the 20 most loaded procedures: + +.... + readprofile | sort -nr +2 | head -20 +.... + +Print only filesystem profile: + +.... + readprofile | grep _ext2 +.... + +Look at all the kernel information, with ram addresses: + +.... + readprofile -av | less +.... + +Browse a 'frozen' profile buffer for a non current kernel: + +.... + readprofile -p ~/profile.freeze -m /zImage.map.gz +.... + +Request profiling at 2kHz per CPU, and reset the profiling buffer: + +.... + sudo readprofile -M 20 +.... + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/readprofile.c b/sys-utils/readprofile.c new file mode 100644 index 0000000..579902d --- /dev/null +++ b/sys-utils/readprofile.c @@ -0,0 +1,415 @@ +/* + * readprofile.c - used to read /proc/profile + * + * Copyright (C) 1994,1996 Alessandro Rubini (rubini@ipvvis.unipv.it) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* + * 1999-02-22 Arkadiusz Miśkiewicz <misiek@pld.ORG.PL> + * - added Native Language Support + * 1999-09-01 Stephane Eranian <eranian@cello.hpl.hp.com> + * - 64bit clean patch + * 3Feb2001 Andrew Morton <andrewm@uow.edu.au> + * - -M option to write profile multiplier. + * 2001-11-07 Werner Almesberger <wa@almesberger.net> + * - byte order auto-detection and -n option + * 2001-11-09 Werner Almesberger <wa@almesberger.net> + * - skip step size (index 0) + * 2002-03-09 John Levon <moz@compsoc.man.ac.uk> + * - make maplineno do something + * 2002-11-28 Mads Martin Joergensen + + * - also try /boot/System.map-`uname -r` + * 2003-04-09 Werner Almesberger <wa@almesberger.net> + * - fixed off-by eight error and improved heuristics in byte order detection + * 2003-08-12 Nikita Danilov <Nikita@Namesys.COM> + * - added -s option; example of use: + * "readprofile -s -m /boot/System.map-test | grep __d_lookup | sort -n -k3" + */ + +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <sys/utsname.h> +#include <unistd.h> + +#include "c.h" +#include "strutils.h" +#include "nls.h" +#include "xalloc.h" +#include "closestream.h" + +#define S_LEN 128 + +/* These are the defaults */ +static char defaultmap[]="/boot/System.map"; +static char defaultpro[]="/proc/profile"; + +static FILE *myopen(char *name, char *mode, int *flag) +{ + int len = strlen(name); + + if (!strcmp(name + len - 3, ".gz")) { + FILE *res; + char *cmdline = xmalloc(len + 6); + snprintf(cmdline, len + 6, "zcat %s", name); + res = popen(cmdline, mode); + free(cmdline); + *flag = 1; + return res; + } + *flag = 0; + return fopen(name, mode); +} + +#ifndef BOOT_SYSTEM_MAP +#define BOOT_SYSTEM_MAP "/boot/System.map-" +#endif + +static char *boot_uname_r_str(void) +{ + struct utsname uname_info; + char *s; + size_t len; + + if (uname(&uname_info)) + return ""; + len = strlen(BOOT_SYSTEM_MAP) + strlen(uname_info.release) + 1; + s = xmalloc(len); + strcpy(s, BOOT_SYSTEM_MAP); + strcat(s, uname_info.release); + return s; +} + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + fputs(USAGE_HEADER, out); + fprintf(out, _(" %s [options]\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Display kernel profiling information.\n"), out); + + fputs(USAGE_OPTIONS, out); + fprintf(out, + _(" -m, --mapfile <mapfile> (defaults: \"%s\" and\n"), defaultmap); + fprintf(out, + _(" \"%s\")\n"), boot_uname_r_str()); + fprintf(out, + _(" -p, --profile <pro-file> (default: \"%s\")\n"), defaultpro); + fputs(_(" -M, --multiplier <mult> set the profiling multiplier to <mult>\n"), out); + fputs(_(" -i, --info print only info about the sampling step\n"), out); + fputs(_(" -v, --verbose print verbose data\n"), out); + fputs(_(" -a, --all print all symbols, even if count is 0\n"), out); + fputs(_(" -b, --histbin print individual histogram-bin counts\n"), out); + fputs(_(" -s, --counters print individual counters within functions\n"), out); + fputs(_(" -r, --reset reset all the counters (root only)\n"), out); + fputs(_(" -n, --no-auto disable byte order auto-detection\n"), out); + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(27)); + printf(USAGE_MAN_TAIL("readprofile(8)")); + exit(EXIT_SUCCESS); +} + +int main(int argc, char **argv) +{ + FILE *map; + int proFd, has_mult = 0, multiplier = 0; + char *mapFile, *proFile; + size_t len = 0, indx = 1; + unsigned long long add0 = 0; + unsigned int step; + unsigned int *buf, total, fn_len; + unsigned long long fn_add = 0, next_add; /* current and next address */ + char fn_name[S_LEN], next_name[S_LEN]; /* current and next name */ + char mode[8]; + int c; + ssize_t rc; + int optAll = 0, optInfo = 0, optReset = 0, optVerbose = 0, optNative = 0; + int optBins = 0, optSub = 0; + char mapline[S_LEN]; + int maplineno = 1; + int popenMap; /* flag to tell if popen() has been used */ + int header_printed; + double rep = 0; + + static const struct option longopts[] = { + {"mapfile", required_argument, NULL, 'm'}, + {"profile", required_argument, NULL, 'p'}, + {"multiplier", required_argument, NULL, 'M'}, + {"info", no_argument, NULL, 'i'}, + {"verbose", no_argument, NULL, 'v'}, + {"all", no_argument, NULL, 'a'}, + {"histbin", no_argument, NULL, 'b'}, + {"counters", no_argument, NULL, 's'}, + {"reset", no_argument, NULL, 'r'}, + {"no-auto", no_argument, NULL, 'n'}, + {"version", no_argument, NULL, 'V'}, + {"help", no_argument, NULL, 'h'}, + {NULL, 0, NULL, 0} + }; + +#define next (current^1) + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + proFile = defaultpro; + mapFile = defaultmap; + + while ((c = getopt_long(argc, argv, "m:p:M:ivabsrnVh", longopts, NULL)) != -1) { + switch (c) { + case 'm': + mapFile = optarg; + break; + case 'n': + optNative++; + break; + case 'p': + proFile = optarg; + break; + case 'a': + optAll++; + break; + case 'b': + optBins++; + break; + case 's': + optSub++; + break; + case 'i': + optInfo++; + break; + case 'M': + multiplier = strtol_or_err(optarg, _("failed to parse multiplier")); + has_mult = 1; + break; + case 'r': + optReset++; + break; + case 'v': + optVerbose++; + break; + + case 'V': + print_version(EXIT_SUCCESS); + case 'h': + usage(); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if (optReset || has_mult) { + int fd, to_write; + + /* When writing the multiplier, if the length of the + * write is not sizeof(int), the multiplier is not + * changed. */ + if (has_mult) { + to_write = sizeof(int); + } else { + multiplier = 0; + /* sth different from sizeof(int) */ + to_write = 1; + } + /* try to become root, just in case */ + ignore_result( setuid(0) ); + fd = open(defaultpro, O_WRONLY); + if (fd < 0) + err(EXIT_FAILURE, "%s", defaultpro); + if (write(fd, &multiplier, to_write) != to_write) + err(EXIT_FAILURE, _("error writing %s"), defaultpro); + close(fd); + exit(EXIT_SUCCESS); + } + + /* Use an fd for the profiling buffer, to skip stdio overhead */ + if (((proFd = open(proFile, O_RDONLY)) < 0) + || ((int)(len = lseek(proFd, 0, SEEK_END)) < 0) + || (lseek(proFd, 0, SEEK_SET) < 0)) + err(EXIT_FAILURE, "%s", proFile); + if (!len) + errx(EXIT_FAILURE, "%s: %s", proFile, _("input file is empty")); + + buf = xmalloc(len); + + rc = read(proFd, buf, len); + if (rc < 0 || (size_t) rc != len) + err(EXIT_FAILURE, "%s", proFile); + close(proFd); + + if (!optNative) { + int entries = len / sizeof(*buf); + int big = 0, small = 0; + unsigned *p; + size_t i; + + for (p = buf + 1; p < buf + entries; p++) { + if (*p & ~0U << ((unsigned) sizeof(*buf) * 4U)) + big++; + if (*p & ((1U << ((unsigned) sizeof(*buf) * 4U)) - 1U)) + small++; + } + if (big > small) { + warnx(_("Assuming reversed byte order. " + "Use -n to force native byte order.")); + for (p = buf; p < buf + entries; p++) + for (i = 0; i < sizeof(*buf) / 2; i++) { + unsigned char *b = (unsigned char *)p; + unsigned char tmp; + tmp = b[i]; + b[i] = b[sizeof(*buf) - i - 1]; + b[sizeof(*buf) - i - 1] = tmp; + } + } + } + + step = buf[0]; + if (optInfo) { + printf(_("Sampling_step: %u\n"), step); + exit(EXIT_SUCCESS); + } + + total = 0; + + map = myopen(mapFile, "r", &popenMap); + if (map == NULL && mapFile == defaultmap) { + mapFile = boot_uname_r_str(); + map = myopen(mapFile, "r", &popenMap); + } + if (map == NULL) + err(EXIT_FAILURE, "%s", mapFile); + + while (fgets(mapline, S_LEN, map)) { + if (sscanf(mapline, "%llx %7[^\n ] %127[^\n ]", &fn_add, mode, fn_name) != 3) + errx(EXIT_FAILURE, _("%s(%i): wrong map line"), mapFile, + maplineno); + /* only elf works like this */ + if (!strcmp(fn_name, "_stext") || !strcmp(fn_name, "__stext")) { + add0 = fn_add; + break; + } + maplineno++; + } + + if (!add0) + errx(EXIT_FAILURE, _("can't find \"_stext\" in %s"), mapFile); + + /* + * Main loop. + */ + while (fgets(mapline, S_LEN, map)) { + unsigned int this = 0; + int done = 0; + + if (sscanf(mapline, "%llx %7[^\n ] %127[^\n ]", &next_add, mode, next_name) != 3) + errx(EXIT_FAILURE, _("%s(%i): wrong map line"), mapFile, + maplineno); + header_printed = 0; + + /* the kernel only profiles up to _etext */ + if (!strcmp(next_name, "_etext") || + !strcmp(next_name, "__etext")) + done = 1; + else { + /* ignore any LEADING (before a '[tT]' symbol + * is found) Absolute symbols and __init_end + * because some architectures place it before + * .text section */ + if ((*mode == 'A' || *mode == '?') + && (total == 0 || !strcmp(next_name, "__init_end"))) + continue; + if (*mode != 'T' && *mode != 't' && + *mode != 'W' && *mode != 'w') + break; /* only text is profiled */ + } + + if (indx >= len / sizeof(*buf)) + errx(EXIT_FAILURE, + _("profile address out of range. Wrong map file?")); + + while (step > 0 && indx < (next_add - add0) / step) { + if (optBins && (buf[indx] || optAll)) { + if (!header_printed) { + printf("%s:\n", fn_name); + header_printed = 1; + } + printf("\t%llx\t%u\n", (indx - 1) * step + add0, + buf[indx]); + } + this += buf[indx++]; + } + total += this; + + if (optBins) { + if (optVerbose || this > 0) + printf(" total\t\t\t\t%u\n", this); + } else if ((this || optAll) && + (fn_len = next_add - fn_add) != 0) { + if (optVerbose) + printf("%016llx %-40s %6u %8.4f\n", fn_add, + fn_name, this, this / (double)fn_len); + else + printf("%6u %-40s %8.4f\n", + this, fn_name, this / (double)fn_len); + if (optSub && step > 0) { + unsigned long long scan; + + for (scan = (fn_add - add0) / step + 1; + scan < (next_add - add0) / step; + scan++) { + unsigned long long addr; + addr = (scan - 1) * step + add0; + printf("\t%#llx\t%s+%#llx\t%u\n", + addr, fn_name, addr - fn_add, + buf[scan]); + } + } + } + + fn_add = next_add; + strcpy(fn_name, next_name); + + maplineno++; + if (done) + break; + } + + /* clock ticks, out of kernel text - probably modules */ + printf("%6u %s\n", buf[len / sizeof(*buf) - 1], "*unknown*"); + + if (fn_add > add0) + rep = total / (double)(fn_add - add0); + + /* trailer */ + if (optVerbose) + printf("%016x %-40s %6u %8.4f\n", + 0, "total", total, rep); + else + printf("%6u %-40s %8.4f\n", + total, _("total"), rep); + + popenMap ? pclose(map) : fclose(map); + exit(EXIT_SUCCESS); +} diff --git a/sys-utils/renice.1 b/sys-utils/renice.1 new file mode 100644 index 0000000..0810846 --- /dev/null +++ b/sys-utils/renice.1 @@ -0,0 +1,103 @@ +'\" t +.\" Title: renice +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: User Commands +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "RENICE" "1" "2022-05-11" "util\-linux 2.38.1" "User Commands" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +renice \- alter priority of running processes +.SH "SYNOPSIS" +.sp +\fBrenice\fP [\fB\-n\fP] \fIpriority\fP [\fB\-g\fP|\fB\-p\fP|\fB\-u\fP] \fIidentifier\fP... +.SH "DESCRIPTION" +.sp +\fBrenice\fP alters the scheduling priority of one or more running processes. The first argument is the \fIpriority\fP value to be used. The other arguments are interpreted as process IDs (by default), process group IDs, user IDs, or user names. \fBrenice\fP\(aqing a process group causes all processes in the process group to have their scheduling priority altered. \fBrenice\fP\(aqing a user causes all processes owned by the user to have their scheduling priority altered. +.SH "OPTIONS" +.sp +\fB\-n\fP, \fB\-\-priority\fP \fIpriority\fP +.RS 4 +Specify the scheduling \fIpriority\fP to be used for the process, process group, or user. Use of the option \fB\-n\fP or \fB\-\-priority\fP is optional, but when used it must be the first argument. +.RE +.sp +\fB\-g\fP, \fB\-\-pgrp\fP +.RS 4 +Interpret the succeeding arguments as process group IDs. +.RE +.sp +\fB\-p\fP, \fB\-\-pid\fP +.RS 4 +Interpret the succeeding arguments as process IDs (the default). +.RE +.sp +\fB\-u\fP, \fB\-\-user\fP +.RS 4 +Interpret the succeeding arguments as usernames or UIDs. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "FILES" +.sp +\fI/etc/passwd\fP +.RS 4 +to map user names to user IDs +.RE +.SH "NOTES" +.sp +Users other than the superuser may only alter the priority of processes they own. Furthermore, an unprivileged user can only \fIincrease\fP the "nice value" (i.e., choose a lower priority) and such changes are irreversible unless (since Linux 2.6.12) the user has a suitable "nice" resource limit (see \fBulimit\fP(1p) and \fBgetrlimit\fP(2)). +.sp +The superuser may alter the priority of any process and set the priority to any value in the range \-20 to 19. Useful priorities are: 19 (the affected processes will run only when nothing else in the system wants to), 0 (the "base" scheduling priority), anything negative (to make things go very fast). +.SH "HISTORY" +.sp +The \fBrenice\fP command appeared in 4.0BSD. +.SH "EXAMPLES" +.sp +The following command would change the priority of the processes with PIDs 987 and 32, plus all processes owned by the users daemon and root: +.sp +\fBrenice +1 987 \-u daemon root \-p 32\fP +.SH "SEE ALSO" +.sp +\fBnice\fP(1), +\fBchrt\fP(1), +\fBgetpriority\fP(2), +\fBsetpriority\fP(2), +\fBcredentials\fP(7), +\fBsched\fP(7) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBrenice\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/renice.1.adoc b/sys-utils/renice.1.adoc new file mode 100644 index 0000000..26392c2 --- /dev/null +++ b/sys-utils/renice.1.adoc @@ -0,0 +1,107 @@ +//po4a: entry man manual +//// +Copyright (c) 1983, 1991, 1993 + The Regents of the University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. +3. All advertising materials mentioning features or use of this software + must display the following acknowledgement: + This product includes software developed by the University of + California, Berkeley and its contributors. +4. Neither the name of the University nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. + + @(#)renice.8 8.1 (Berkeley) 6/9/93 +//// += renice(1) +:doctype: manpage +:man manual: User Commands +:man source: util-linux {release-version} +:page-layout: base +:command: renice + +== NAME + +renice - alter priority of running processes + +== SYNOPSIS + +*renice* [*-n*] _priority_ [*-g*|*-p*|*-u*] _identifier_... + +== DESCRIPTION + +*renice* alters the scheduling priority of one or more running processes. The first argument is the _priority_ value to be used. The other arguments are interpreted as process IDs (by default), process group IDs, user IDs, or user names. *renice*'ing a process group causes all processes in the process group to have their scheduling priority altered. *renice*'ing a user causes all processes owned by the user to have their scheduling priority altered. + +== OPTIONS + +*-n*, *--priority* _priority_:: +Specify the scheduling _priority_ to be used for the process, process group, or user. Use of the option *-n* or *--priority* is optional, but when used it must be the first argument. + +*-g*, *--pgrp*:: +Interpret the succeeding arguments as process group IDs. + +*-p*, *--pid*:: +Interpret the succeeding arguments as process IDs (the default). + +*-u*, *--user*:: +Interpret the succeeding arguments as usernames or UIDs. + +include::man-common/help-version.adoc[] + +== FILES + +_/etc/passwd_:: +to map user names to user IDs + +== NOTES + +Users other than the superuser may only alter the priority of processes they own. Furthermore, an unprivileged user can only _increase_ the "nice value" (i.e., choose a lower priority) and such changes are irreversible unless (since Linux 2.6.12) the user has a suitable "nice" resource limit (see *ulimit*(1p) and *getrlimit*(2)). + +The superuser may alter the priority of any process and set the priority to any value in the range -20 to 19. Useful priorities are: 19 (the affected processes will run only when nothing else in the system wants to), 0 (the "base" scheduling priority), anything negative (to make things go very fast). + +== HISTORY + +The *renice* command appeared in 4.0BSD. + +== EXAMPLES + +The following command would change the priority of the processes with PIDs 987 and 32, plus all processes owned by the users daemon and root: + +*renice +1 987 -u daemon root -p 32* + +== SEE ALSO + +*nice*(1), +*chrt*(1), +*getpriority*(2), +*setpriority*(2), +*credentials*(7), +*sched*(7) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/renice.c b/sys-utils/renice.c new file mode 100644 index 0000000..080b86e --- /dev/null +++ b/sys-utils/renice.c @@ -0,0 +1,194 @@ +/* + * Copyright (c) 1983, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + + /* 1999-02-22 Arkadiusz Miśkiewicz <misiek@pld.ORG.PL> + * - added Native Language Support + */ + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/resource.h> + +#include <stdio.h> +#include <pwd.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include "nls.h" +#include "c.h" +#include "closestream.h" + +static const char *idtype[] = { + [PRIO_PROCESS] = N_("process ID"), + [PRIO_PGRP] = N_("process group ID"), + [PRIO_USER] = N_("user ID"), +}; + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + fputs(USAGE_HEADER, out); + fprintf(out, + _(" %1$s [-n] <priority> [-p|--pid] <pid>...\n" + " %1$s [-n] <priority> -g|--pgrp <pgid>...\n" + " %1$s [-n] <priority> -u|--user <user>...\n"), + program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Alter the priority of running processes.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -n, --priority <num> specify the nice value\n"), out); + fputs(_(" -p, --pid interpret arguments as process ID (default)\n"), out); + fputs(_(" -g, --pgrp interpret arguments as process group ID\n"), out); + fputs(_(" -u, --user interpret arguments as username or user ID\n"), out); + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(24)); + printf(USAGE_MAN_TAIL("renice(1)")); + exit(EXIT_SUCCESS); +} + +static int getprio(const int which, const int who, int *prio) +{ + errno = 0; + *prio = getpriority(which, who); + if (*prio == -1 && errno) { + warn(_("failed to get priority for %d (%s)"), who, idtype[which]); + return -errno; + } + return 0; +} + +static int donice(const int which, const int who, const int prio) +{ + int oldprio, newprio; + + if (getprio(which, who, &oldprio) != 0) + return 1; + if (setpriority(which, who, prio) < 0) { + warn(_("failed to set priority for %d (%s)"), who, idtype[which]); + return 1; + } + if (getprio(which, who, &newprio) != 0) + return 1; + printf(_("%d (%s) old priority %d, new priority %d\n"), + who, idtype[which], oldprio, newprio); + return 0; +} + +/* + * Change the priority (the nice value) of processes + * or groups of processes which are already running. + */ +int main(int argc, char **argv) +{ + int which = PRIO_PROCESS; + int who = 0, prio, errs = 0; + char *endptr = NULL; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + argc--; + argv++; + + if (argc == 1) { + if (strcmp(*argv, "-h") == 0 || + strcmp(*argv, "--help") == 0) + usage(); + + if (strcmp(*argv, "-v") == 0 || + strcmp(*argv, "-V") == 0 || + strcmp(*argv, "--version") == 0) + print_version(EXIT_SUCCESS); + } + + if (*argv && (strcmp(*argv, "-n") == 0 || strcmp(*argv, "--priority") == 0)) { + argc--; + argv++; + } + + if (argc < 2 || !*argv) { + warnx(_("not enough arguments")); + errtryhelp(EXIT_FAILURE); + } + + prio = strtol(*argv, &endptr, 10); + if (*endptr) { + warnx(_("invalid priority '%s'"), *argv); + errtryhelp(EXIT_FAILURE); + } + argc--; + argv++; + + for (; argc > 0; argc--, argv++) { + if (strcmp(*argv, "-g") == 0 || strcmp(*argv, "--pgrp") == 0) { + which = PRIO_PGRP; + continue; + } + if (strcmp(*argv, "-u") == 0 || strcmp(*argv, "--user") == 0) { + which = PRIO_USER; + continue; + } + if (strcmp(*argv, "-p") == 0 || strcmp(*argv, "--pid") == 0) { + which = PRIO_PROCESS; + continue; + } + if (which == PRIO_USER) { + struct passwd *pwd = getpwnam(*argv); + + if (pwd != NULL) + who = pwd->pw_uid; + else + who = strtol(*argv, &endptr, 10); + if (who < 0 || *endptr) { + warnx(_("unknown user %s"), *argv); + errs = 1; + continue; + } + } else { + who = strtol(*argv, &endptr, 10); + if (who < 0 || *endptr) { + /* TRANSLATORS: The first %s is one of the above + * three ID names. Read: "bad value for %s: %s" */ + warnx(_("bad %s value: %s"), idtype[which], *argv); + errs = 1; + continue; + } + } + errs |= donice(which, who, prio); + } + return errs != 0 ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/sys-utils/rfkill.8 b/sys-utils/rfkill.8 new file mode 100644 index 0000000..22db13c --- /dev/null +++ b/sys-utils/rfkill.8 @@ -0,0 +1,148 @@ +'\" t +.\" Title: rfkill +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: System Administration +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "RFKILL" "8" "2022-05-11" "util\-linux 2.38.1" "System Administration" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +rfkill \- tool for enabling and disabling wireless devices +.SH "SYNOPSIS" +.sp +\fBrfkill\fP [options] [\fIcommand\fP] [\fIID\fP|\fItype\fP ...] +.SH "DESCRIPTION" +.sp +\fBrfkill\fP lists, enabling and disabling wireless devices. +.sp +The command "list" output format is deprecated and maintained for backward compatibility only. The new output format is the default when no command is specified or when the option \fB\-\-output\fP is used. +.sp +The default output is subject to change. So whenever possible, you should avoid using default outputs in your scripts. Always explicitly define expected columns by using the \fB\-\-output\fP option together with a columns list in environments where a stable output is required. +.SH "OPTIONS" +.sp +\fB\-J\fP, \fB\-\-json\fP +.RS 4 +Use JSON output format. +.RE +.sp +\fB\-n\fP, \fB\-\-noheadings\fP +.RS 4 +Do not print a header line. +.RE +.sp +\fB\-o\fP, \fB\-\-output\fP +.RS 4 +Specify which output columns to print. Use \fB\-\-help\fP to get a list of available columns. +.RE +.sp +\fB\-\-output\-all\fP +.RS 4 +Output all available columns. +.RE +.sp +\fB\-r\fP, \fB\-\-raw\fP +.RS 4 +Use the raw output format. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "COMMANDS" +.sp +\fBhelp\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fBevent\fP +.RS 4 +Listen for rfkill events and display them on stdout. +.RE +.sp +\fBlist\fP [\fIid\fP|\fItype\fP ...] +.RS 4 +List the current state of all available devices. The command output format is deprecated, see the \fBDESCRIPTION\fP section. It is a good idea to check with \fBlist\fP command \fIid\fP or \fItype\fP scope is appropriate before setting \fBblock\fP or \fBunblock\fP. Special \fIall\fP type string will match everything. Use of multiple \fIID\fP or \fItype\fP arguments is supported. +.RE +.sp +\fBblock\fP \fIid\fP|\fItype\fP [...] +.RS 4 +Disable the corresponding device. +.RE +.sp +\fBunblock\fP \fIid\fP|\fItype\fP [...] +.RS 4 +Enable the corresponding device. If the device is hard\-blocked, for example via a hardware switch, it will remain unavailable though it is now soft\-unblocked. +.RE +.sp +\fBtoggle\fP \fIid\fP|\fItype\fP [...] +.RS 4 +Enable or disable the corresponding device. +.RE +.SH "EXAMPLE" +.sp +.if n .RS 4 +.nf +.fam C + rfkill \-\-output ID,TYPE + rfkill block all + rfkill unblock wlan + rfkill block bluetooth uwb wimax wwan gps fm nfc +.fam +.fi +.if n .RE +.SH "AUTHORS" +.sp +\fBrfkill\fP was originally written by \c +.MTO "johannes\(atsipsolutions.net" "Johannes Berg" "" +and +.MTO "marcel\(atholtmann.org" "Marcel Holtmann" "." +The code has been later modified by +.MTO "kerolasa\(atiki.fi" "Sami Kerola" "" +and +.MTO "kzak\(atredhat.com" "Karel Zak" "" +for the util\-linux project. +.sp +This manual page was written by \c +.MTO "linux\(atyoumustbejoking.demon.co.uk" "Darren Salt" "" +for the Debian project (and may be used by others). +.SH "SEE ALSO" +.sp +\fBpowertop\fP(8), +\fBsystemd\-rfkill\fP(8), +.URL "https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/driver\-api/rfkill.rst" "Linux kernel documentation" "" +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBrfkill\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/rfkill.8.adoc b/sys-utils/rfkill.8.adoc new file mode 100644 index 0000000..2086798 --- /dev/null +++ b/sys-utils/rfkill.8.adoc @@ -0,0 +1,90 @@ +//po4a: entry man manual += rfkill(8) +:doctype: manpage +:man manual: System Administration +:man source: util-linux {release-version} +:page-layout: base +:command: rfkill + +== NAME + +rfkill - tool for enabling and disabling wireless devices + +== SYNOPSIS + +*rfkill* [options] [_command_] [_ID_|_type_ ...] + +== DESCRIPTION + +*rfkill* lists, enabling and disabling wireless devices. + +The command "list" output format is deprecated and maintained for backward compatibility only. The new output format is the default when no command is specified or when the option *--output* is used. + +The default output is subject to change. So whenever possible, you should avoid using default outputs in your scripts. Always explicitly define expected columns by using the *--output* option together with a columns list in environments where a stable output is required. + +== OPTIONS + +*-J*, *--json*:: +Use JSON output format. + +*-n*, *--noheadings*:: +Do not print a header line. + +*-o*, *--output*:: +Specify which output columns to print. Use *--help* to get a list of available columns. + +*--output-all*:: +Output all available columns. + +*-r*, *--raw*:: +Use the raw output format. + +include::man-common/help-version.adoc[] + +== COMMANDS + +*help*:: +Display help text and exit. + +*event*:: +Listen for rfkill events and display them on stdout. + +*list* [__id__|__type__ ...]:: +List the current state of all available devices. The command output format is deprecated, see the *DESCRIPTION* section. It is a good idea to check with *list* command _id_ or _type_ scope is appropriate before setting *block* or *unblock*. Special _all_ type string will match everything. Use of multiple _ID_ or _type_ arguments is supported. + +**block** __id__|__type__ [...]:: +Disable the corresponding device. + +**unblock** __id__|__type__ [...]:: +Enable the corresponding device. If the device is hard-blocked, for example via a hardware switch, it will remain unavailable though it is now soft-unblocked. + +**toggle** __id__|__type__ [...]:: +Enable or disable the corresponding device. + +== EXAMPLE +.... + rfkill --output ID,TYPE + rfkill block all + rfkill unblock wlan + rfkill block bluetooth uwb wimax wwan gps fm nfc +.... + +== AUTHORS + +*rfkill* was originally written by mailto:johannes@sipsolutions.net[Johannes Berg] and mailto:marcel@holtmann.org[Marcel Holtmann]. The code has been later modified by mailto:kerolasa@iki.fi[Sami Kerola] and mailto:kzak@redhat.com[Karel Zak] for the util-linux project. + +This manual page was written by mailto:linux@youmustbejoking.demon.co.uk[Darren Salt] for the Debian project (and may be used by others). + +== SEE ALSO + +*powertop*(8), +*systemd-rfkill*(8), +https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/driver-api/rfkill.rst[Linux kernel documentation] + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/rfkill.c b/sys-utils/rfkill.c new file mode 100644 index 0000000..c4ee137 --- /dev/null +++ b/sys-utils/rfkill.c @@ -0,0 +1,818 @@ +/* + * /dev/rfkill userspace tool + * + * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> + * Copyright 2009 Marcel Holtmann <marcel@holtmann.org> + * Copyright 2009 Tim Gardner <tim.gardner@canonical.com> + * Copyright 2017 Sami Kerola <kerolasa@iki.fi> + * Copyright (C) 2017 Karel Zak <kzak@redhat.com> + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <ctype.h> +#include <getopt.h> +#include <libsmartcols.h> +#include <linux/rfkill.h> +#include <poll.h> +#include <sys/syslog.h> +#include <sys/time.h> + +#include "c.h" +#include "closestream.h" +#include "nls.h" +#include "optutils.h" +#include "pathnames.h" +#include "strutils.h" +#include "timeutils.h" +#include "widechar.h" +#include "xalloc.h" +#include "all-io.h" + + +/* + * NFC supported by kernel since v3.10 (year 2013); FM and another types are from + * year 2009 (2.6.33) or older. + */ +#ifndef RFKILL_TYPE_NFC +# ifndef RFKILL_TYPE_FM +# define RFKILL_TYPE_FM RFKILL_TYPE_GPS + 1 +# endif +# define RFKILL_TYPE_NFC RFKILL_TYPE_FM + 1 +# undef NUM_RFKILL_TYPES +# define NUM_RFKILL_TYPES RFKILL_TYPE_NFC + 1 +#endif + +struct rfkill_type_str { + enum rfkill_type type; /* ID */ + const char *name; /* generic name */ + const char *desc; /* human readable name */ +}; + +static const struct rfkill_type_str rfkill_type_strings[] = { + { .type = RFKILL_TYPE_ALL, .name = "all" }, + { .type = RFKILL_TYPE_WLAN, .name = "wlan", .desc = "Wireless LAN" }, + { .type = RFKILL_TYPE_WLAN, .name = "wifi" }, /* alias */ + { .type = RFKILL_TYPE_BLUETOOTH, .name = "bluetooth", .desc = "Bluetooth" }, + { .type = RFKILL_TYPE_UWB, .name = "uwb", .desc = "Ultra-Wideband" }, + { .type = RFKILL_TYPE_UWB, .name = "ultrawideband" }, /* alias */ + { .type = RFKILL_TYPE_WIMAX, .name = "wimax", .desc = "WiMAX" }, + { .type = RFKILL_TYPE_WWAN, .name = "wwan", .desc = "Wireless WAN" }, + { .type = RFKILL_TYPE_GPS, .name = "gps", .desc = "GPS" }, + { .type = RFKILL_TYPE_FM, .name = "fm", .desc = "FM" }, + { .type = RFKILL_TYPE_NFC, .name = "nfc", .desc = "NFC" }, + { .type = NUM_RFKILL_TYPES, .name = NULL } +}; + +struct rfkill_id { + union { + enum rfkill_type type; + uint32_t index; + }; + enum { + RFKILL_IS_INVALID, + RFKILL_IS_TYPE, + RFKILL_IS_INDEX, + RFKILL_IS_ALL + } result; +}; + +/* supported actions */ +enum { + ACT_LIST, + ACT_HELP, + ACT_EVENT, + ACT_BLOCK, + ACT_UNBLOCK, + ACT_TOGGLE, + + ACT_LIST_OLD +}; + +static char *rfkill_actions[] = { + [ACT_LIST] = "list", + [ACT_HELP] = "help", + [ACT_EVENT] = "event", + [ACT_BLOCK] = "block", + [ACT_UNBLOCK] = "unblock", + [ACT_TOGGLE] = "toggle" +}; + +/* column IDs */ +enum { + COL_DEVICE, + COL_ID, + COL_TYPE, + COL_DESC, + COL_SOFT, + COL_HARD +}; + +/* column names */ +struct colinfo { + const char *name; /* header */ + double whint; /* width hint (N < 1 is in percent of termwidth) */ + int flags; /* SCOLS_FL_* */ + const char *help; +}; + +/* columns descriptions */ +static const struct colinfo infos[] = { + [COL_DEVICE] = {"DEVICE", 0, 0, N_("kernel device name")}, + [COL_ID] = {"ID", 2, SCOLS_FL_RIGHT, N_("device identifier value")}, + [COL_TYPE] = {"TYPE", 0, 0, N_("device type name that can be used as identifier")}, + [COL_DESC] = {"TYPE-DESC", 0, 0, N_("device type description")}, + [COL_SOFT] = {"SOFT", 0, SCOLS_FL_RIGHT, N_("status of software block")}, + [COL_HARD] = {"HARD", 0, SCOLS_FL_RIGHT, N_("status of hardware block")} +}; + +static int columns[ARRAY_SIZE(infos) * 2]; +static size_t ncolumns; + +struct control { + struct libscols_table *tb; + unsigned int + json:1, + no_headings:1, + raw:1; +}; + +static int column_name_to_id(const char *name, size_t namesz) +{ + size_t i; + + assert(name); + + for (i = 0; i < ARRAY_SIZE(infos); i++) { + const char *cn = infos[i].name; + + if (!strncasecmp(name, cn, namesz) && !*(cn + namesz)) + return i; + } + warnx(_("unknown column: %s"), name); + return -1; +} + +static int get_column_id(size_t num) +{ + assert(num < ncolumns); + assert(columns[num] < (int)ARRAY_SIZE(infos)); + return columns[num]; +} + +static const struct colinfo *get_column_info(int num) +{ + return &infos[get_column_id(num)]; +} + +static int string_to_action(const char *str) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(rfkill_actions); i++) + if (strcmp(str, rfkill_actions[i]) == 0) + return i; + + return -EINVAL; +} + +static int rfkill_open(int rdonly, int nonblock) +{ + int fd; + + fd = open(_PATH_DEV_RFKILL, rdonly ? O_RDONLY : O_RDWR); + if (fd < 0) { + warn(_("cannot open %s"), _PATH_DEV_RFKILL); + return -errno; + } + + if (nonblock && fcntl(fd, F_SETFL, O_NONBLOCK) < 0) { + warn(_("cannot set non-blocking %s"), _PATH_DEV_RFKILL); + close(fd); + return -errno; + } + + return fd; +} + +/* returns: 0 success, 1 read again, < 0 error */ +static int rfkill_read_event(int fd, struct rfkill_event *event) +{ + ssize_t len = read(fd, event, sizeof(*event)); + + if (len < 0) { + if (errno == EAGAIN) + return 1; + warn(_("cannot read %s"), _PATH_DEV_RFKILL); + return -errno; + } + + if ((size_t) len < (size_t) RFKILL_EVENT_SIZE_V1) { + warnx(_("wrong size of rfkill event: %zu < %zu"), + (size_t) len, (size_t) RFKILL_EVENT_SIZE_V1); + return 1; + } + + return 0; +} + + +static int rfkill_event(void) +{ + enum { + POLLFD_RFKILL, + POLLFD_STDOUT, + POLLFD_COUNT + }; + struct rfkill_event event; + struct timeval tv; + char date_buf[ISO_BUFSIZ]; + struct pollfd p[POLLFD_COUNT]; + int fd, n; + + fd = rfkill_open(1, 0); + if (fd < 0) + return fd; + + memset(&p, 0, sizeof(p)); + p[POLLFD_RFKILL].fd = fd; + p[POLLFD_RFKILL].events = POLLIN | POLLHUP; + p[POLLFD_STDOUT].fd = STDOUT_FILENO; + p[POLLFD_STDOUT].events = 0; + + /* interrupted by signal only */ + while (1) { + int rc = 1; /* recover-able error */ + + n = poll(p, ARRAY_SIZE(p), -1); + if (n < 0) { + warn(_("failed to poll %s"), _PATH_DEV_RFKILL); + goto failed; + } + + if (p[POLLFD_STDOUT].revents) + goto failed; /* read end of stdout closed */ + if (p[POLLFD_RFKILL].revents) + rc = rfkill_read_event(fd, &event); + if (rc < 0) + goto failed; + if (rc) + continue; + + gettimeofday(&tv, NULL); + strtimeval_iso(&tv, ISO_TIMESTAMP_COMMA, date_buf, + sizeof(date_buf)); + printf("%s: idx %u type %u op %u soft %u hard %u\n", + date_buf, + event.idx, event.type, event.op, event.soft, event.hard); + fflush(stdout); + } + +failed: + close(fd); + return -1; +} + +static const char *get_sys_attr(uint32_t idx, const char *attr) +{ + static char name[128]; + char path[PATH_MAX]; + FILE *f; + char *p; + + snprintf(path, sizeof(path), _PATH_SYS_RFKILL "/rfkill%u/%s", idx, attr); + f = fopen(path, "r"); + if (!f) + goto done; + if (!fgets(name, sizeof(name), f)) + goto done; + p = strchr(name, '\n'); + if (p) + *p = '\0'; +done: + if (f) + fclose(f); + return name; +} + +static struct rfkill_id rfkill_id_to_type(const char *s) +{ + const struct rfkill_type_str *p; + struct rfkill_id ret = { .result = 0 }; + + if (islower(*s)) { + for (p = rfkill_type_strings; p->name != NULL; p++) { + if (!strcmp(s, p->name)) { + ret.type = p->type; + if (!strcmp(s, "all")) + ret.result = RFKILL_IS_ALL; + else + ret.result = RFKILL_IS_TYPE; + return ret; + } + } + } else if (isdigit(*s)) { + /* assume a numeric character implies an index. */ + char filename[64]; + + ret.index = strtou32_or_err(s, _("invalid identifier")); + snprintf(filename, sizeof(filename) - 1, + _PATH_SYS_RFKILL "/rfkill%" PRIu32 "/name", ret.index); + if (access(filename, F_OK) == 0) + ret.result = RFKILL_IS_INDEX; + else + ret.result = RFKILL_IS_INVALID; + return ret; + } + + ret.result = RFKILL_IS_INVALID; + return ret; +} + +static const char *rfkill_type_to_desc(enum rfkill_type type) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(rfkill_type_strings); i++) { + if (type == rfkill_type_strings[i].type) + return rfkill_type_strings[i].desc; + } + + return NULL; +} + + +static int event_match(struct rfkill_event *event, struct rfkill_id *id) +{ + if (event->op != RFKILL_OP_ADD) + return 0; + + /* filter out unwanted results */ + switch (id->result) { + case RFKILL_IS_TYPE: + if (event->type != id->type) + return 0; + break; + case RFKILL_IS_INDEX: + if (event->idx != id->index) + return 0; + break; + case RFKILL_IS_ALL: + break; + default: + abort(); + } + + return 1; +} + +static void fill_table_row(struct libscols_table *tb, struct rfkill_event *event) +{ + static struct libscols_line *ln; + size_t i; + + assert(tb); + + ln = scols_table_new_line(tb, NULL); + if (!ln) { + errno = ENOMEM; + errx(EXIT_FAILURE, _("failed to allocate output line")); + } + + for (i = 0; i < (size_t)ncolumns; i++) { + char *str = NULL; + switch (get_column_id(i)) { + case COL_DEVICE: + str = xstrdup(get_sys_attr(event->idx, "name")); + break; + case COL_ID: + xasprintf(&str, "%" PRIu32, event->idx); + break; + case COL_TYPE: + str = xstrdup(get_sys_attr(event->idx, "type")); + break; + case COL_DESC: + str = xstrdup(rfkill_type_to_desc(event->type)); + break; + case COL_SOFT: + str = xstrdup(event->soft ? _("blocked") : _("unblocked")); + break; + case COL_HARD: + str = xstrdup(event->hard ? _("blocked") : _("unblocked")); + break; + default: + abort(); + } + if (str && scols_line_refer_data(ln, i, str)) + errx(EXIT_FAILURE, _("failed to add output data")); + } +} + +static int rfkill_list_old(const char *param) +{ + struct rfkill_id id = { .result = RFKILL_IS_ALL }; + struct rfkill_event event; + int fd, rc = 0; + + if (param) { + id = rfkill_id_to_type(param); + if (id.result == RFKILL_IS_INVALID) { + warnx(_("invalid identifier: %s"), param); + return -EINVAL; + } + } + + fd = rfkill_open(1, 1); + if (fd < 0) + return fd; + + while (1) { + rc = rfkill_read_event(fd, &event); + if (rc < 0) + break; + if (rc == 1 && errno == EAGAIN) { + rc = 0; /* done */ + break; + } + if (rc == 0 && event_match(&event, &id)) { + char *name = xstrdup(get_sys_attr(event.idx, "name")), + *type = xstrdup(rfkill_type_to_desc(event.type)); + + if (!type) + type = xstrdup(get_sys_attr(event.idx, "type")); + + printf("%u: %s: %s\n", event.idx, name, type); + printf("\tSoft blocked: %s\n", event.soft ? "yes" : "no"); + printf("\tHard blocked: %s\n", event.hard ? "yes" : "no"); + + free(name); + free(type); + } + } + close(fd); + return rc; +} + +static void rfkill_list_init(struct control *ctrl) +{ + size_t i; + + scols_init_debug(0); + + ctrl->tb = scols_new_table(); + if (!ctrl->tb) + err(EXIT_FAILURE, _("failed to allocate output table")); + + scols_table_enable_json(ctrl->tb, ctrl->json); + scols_table_set_name(ctrl->tb, "rfkilldevices"); + scols_table_enable_noheadings(ctrl->tb, ctrl->no_headings); + scols_table_enable_raw(ctrl->tb, ctrl->raw); + + for (i = 0; i < (size_t) ncolumns; i++) { + const struct colinfo *col = get_column_info(i); + struct libscols_column *cl; + + cl = scols_table_new_column(ctrl->tb, col->name, col->whint, col->flags); + if (!cl) + err(EXIT_FAILURE, _("failed to allocate output column")); + if (ctrl->json) { + int id = get_column_id(i); + if (id == COL_ID) + scols_column_set_json_type(cl, SCOLS_JSON_NUMBER); + } + } +} + +static int rfkill_list_fill(struct control const *ctrl, const char *param) +{ + struct rfkill_id id = { .result = RFKILL_IS_ALL }; + struct rfkill_event event; + int fd, rc = 0; + + if (param) { + id = rfkill_id_to_type(param); + if (id.result == RFKILL_IS_INVALID) { + warnx(_("invalid identifier: %s"), param); + return -EINVAL; + } + } + + fd = rfkill_open(1, 1); + if (fd < 0) + return fd; + + while (1) { + rc = rfkill_read_event(fd, &event); + if (rc < 0) + break; + if (rc == 1 && errno == EAGAIN) { + rc = 0; /* done */ + break; + } + if (rc == 0 && event_match(&event, &id)) + fill_table_row(ctrl->tb, &event); + } + close(fd); + return rc; +} + +static void rfkill_list_output(struct control const *ctrl) +{ + scols_print_table(ctrl->tb); + scols_unref_table(ctrl->tb); +} + +static int __rfkill_block(int fd, struct rfkill_id *id, uint8_t block, const char *param) +{ + struct rfkill_event event = { + .op = RFKILL_OP_CHANGE_ALL, + .soft = block, + 0 + }; + char *message = NULL; + + switch (id->result) { + case RFKILL_IS_INVALID: + warnx(_("invalid identifier: %s"), param); + return -1; + case RFKILL_IS_TYPE: + event.type = id->type; + xasprintf(&message, "type %s", param); + break; + case RFKILL_IS_INDEX: + event.op = RFKILL_OP_CHANGE; + event.idx = id->index; + xasprintf(&message, "id %d", id->index); + break; + case RFKILL_IS_ALL: + message = xstrdup("all"); + break; + default: + abort(); + } + + if (write_all(fd, &event, sizeof(event)) != 0) + warn(_("write failed: %s"), _PATH_DEV_RFKILL); + else { + openlog("rfkill", 0, LOG_USER); + syslog(LOG_NOTICE, "%s set for %s", block ? "block" : "unblock", message); + closelog(); + } + free(message); + return 0; +} + +static int rfkill_block(uint8_t block, const char *param) +{ + struct rfkill_id id; + int fd; + + id = rfkill_id_to_type(param); + if (id.result == RFKILL_IS_INVALID) { + warnx(_("invalid identifier: %s"), param); + return -EINVAL; + } + + fd = rfkill_open(0, 0); + if (fd < 0) + return fd; + + __rfkill_block(fd, &id, block, param); + + return close(fd); +} + +static int rfkill_toggle(const char *param) +{ + struct rfkill_id id = { .result = RFKILL_IS_ALL }; + struct rfkill_event event; + int fd, rc = 0; + + id = rfkill_id_to_type(param); + if (id.result == RFKILL_IS_INVALID) { + warnx(_("invalid identifier: %s"), param); + return -EINVAL; + } + + fd = rfkill_open(0, 1); + if (fd < 0) + return fd; + + while (1) { + rc = rfkill_read_event(fd, &event); + if (rc < 0) + break; + if (rc == 1 && errno == EAGAIN) { + rc = 0; /* done */ + break; + } + if (rc == 0 && event_match(&event, &id)) + __rfkill_block(fd, &id, event.soft ? 0 : 1, param); + } + + close(fd); + return rc; +} + + +static void __attribute__((__noreturn__)) usage(void) +{ + size_t i; + + fputs(USAGE_HEADER, stdout); + fprintf(stdout, _(" %s [options] command [identifier ...]\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, stdout); + fputs(_("Tool for enabling and disabling wireless devices.\n"), stdout); + + fputs(USAGE_OPTIONS, stdout); + fputs(_(" -J, --json use JSON output format\n"), stdout); + fputs(_(" -n, --noheadings don't print headings\n"), stdout); + fputs(_(" -o, --output <list> define which output columns to use\n"), stdout); + fputs(_(" --output-all output all columns\n"), stdout); + fputs(_(" -r, --raw use the raw output format\n"), stdout); + + fputs(USAGE_SEPARATOR, stdout); + printf(USAGE_HELP_OPTIONS(24)); + + fputs(USAGE_COLUMNS, stdout); + for (i = 0; i < ARRAY_SIZE(infos); i++) + fprintf(stdout, " %-10s %s\n", infos[i].name, _(infos[i].help)); + + fputs(USAGE_COMMANDS, stdout); + + /* + * TRANSLATORS: command names should not be translated, explaining + * them as additional field after identifier is fine, for example + * + * list [identifier] (lista [tarkenne]) + */ + fputs(_(" help\n"), stdout); + fputs(_(" event\n"), stdout); + fputs(_(" list [identifier]\n"), stdout); + fputs(_(" block identifier\n"), stdout); + fputs(_(" unblock identifier\n"), stdout); + fputs(_(" toggle identifier\n"), stdout); + + fprintf(stdout, USAGE_MAN_TAIL("rfkill(8)")); + exit(EXIT_SUCCESS); +} + +int main(int argc, char **argv) +{ + struct control ctrl = { 0 }; + int c, act = ACT_LIST, list_all = 0; + char *outarg = NULL; + enum { + OPT_LIST_TYPES = CHAR_MAX + 1 + }; + static const struct option longopts[] = { + { "json", no_argument, NULL, 'J' }, + { "noheadings", no_argument, NULL, 'n' }, + { "output", required_argument, NULL, 'o' }, + { "output-all", no_argument, NULL, OPT_LIST_TYPES }, + { "raw", no_argument, NULL, 'r' }, + { "version", no_argument, NULL, 'V' }, + { "help", no_argument, NULL, 'h' }, + { NULL, 0, NULL, 0 } + }; + static const ul_excl_t excl[] = { + {'J', 'r'}, + {0} + }; + int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; + int ret = 0; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + while ((c = getopt_long(argc, argv, "Jno:rVh", longopts, NULL)) != -1) { + err_exclusive_options(c, longopts, excl, excl_st); + switch (c) { + case 'J': + ctrl.json = 1; + break; + case 'n': + ctrl.no_headings = 1; + break; + case 'o': + outarg = optarg; + break; + case OPT_LIST_TYPES: + list_all = 1; + break; + case 'r': + ctrl.raw = 1; + break; + + case 'V': + print_version(EXIT_SUCCESS); + case 'h': + usage(); + default: + errtryhelp(EXIT_FAILURE); + } + } + argc -= optind; + argv += optind; + + if (argc > 0) { + act = string_to_action(*argv); + if (act < 0) + errtryhelp(EXIT_FAILURE); + argv++; + argc--; + + /* + * For backward compatibility we use old output format if + * "list" explicitly specified and --output not defined. + */ + if (!outarg && act == ACT_LIST) + act = ACT_LIST_OLD; + } + + switch (act) { + case ACT_LIST_OLD: + /* Deprecated in favour of ACT_LIST */ + if (!argc) + ret |= rfkill_list_old(NULL); /* ALL */ + else while (argc) { + ret |= rfkill_list_old(*argv); + argc--; + argv++; + } + break; + + case ACT_LIST: + columns[ncolumns++] = COL_ID; + columns[ncolumns++] = COL_TYPE; + columns[ncolumns++] = COL_DEVICE; + if (list_all) + columns[ncolumns++] = COL_DESC; + columns[ncolumns++] = COL_SOFT; + columns[ncolumns++] = COL_HARD; + + if (outarg + && string_add_to_idarray(outarg, columns, + ARRAY_SIZE(columns), &ncolumns, + column_name_to_id) < 0) + return EXIT_FAILURE; + + rfkill_list_init(&ctrl); + if (!argc) + ret |= rfkill_list_fill(&ctrl, NULL); /* ALL */ + else while (argc) { + ret |= rfkill_list_fill(&ctrl, *argv); + argc--; + argv++; + } + rfkill_list_output(&ctrl); + break; + + case ACT_EVENT: + ret = rfkill_event(); + break; + + case ACT_HELP: + usage(); + break; + + case ACT_BLOCK: + while (argc) { + ret |= rfkill_block(1, *argv); + argc--; + argv++; + } + break; + + case ACT_UNBLOCK: + while (argc) { + ret |= rfkill_block(0, *argv); + argv++; + argc--; + } + break; + + case ACT_TOGGLE: + while (argc) { + ret |= rfkill_toggle(*argv); + argv++; + argc--; + } + break; + } + + return ret ? EXIT_FAILURE : EXIT_SUCCESS; +} diff --git a/sys-utils/rtcwake.8 b/sys-utils/rtcwake.8 new file mode 100644 index 0000000..d6f996d --- /dev/null +++ b/sys-utils/rtcwake.8 @@ -0,0 +1,257 @@ +'\" t +.\" Title: rtcwake +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: System Administration +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "RTCWAKE" "8" "2022-05-11" "util\-linux 2.38.1" "System Administration" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +rtcwake \- enter a system sleep state until specified wakeup time +.SH "SYNOPSIS" +.sp +\fBrtcwake\fP [options] [\fB\-d\fP \fIdevice\fP] [\fB\-m\fP \fIstandby_mode\fP] {\fB\-s\fP \fIseconds\fP|\fB\-t\fP \fItime_t\fP} +.SH "DESCRIPTION" +.sp +This program is used to enter a system sleep state and to automatically wake from it at a specified time. +.sp +This uses cross\-platform Linux interfaces to enter a system sleep state, and leave it no later than a specified time. It uses any RTC framework driver that supports standard driver model wakeup flags. +.sp +This is normally used like the old \fBapmsleep\fP utility, to wake from a suspend state like ACPI S1 (standby) or S3 (suspend\-to\-RAM). Most platforms can implement those without analogues of BIOS, APM, or ACPI. +.sp +On some systems, this can also be used like \fBnvram\-wakeup\fP, waking from states like ACPI S4 (suspend to disk). Not all systems have persistent media that are appropriate for such suspend modes. +.sp +Note that alarm functionality depends on hardware; not every RTC is able to setup an alarm up to 24 hours in the future. +.sp +The suspend setup may be interrupted by active hardware; for example wireless USB input devices that continue to send events for some fraction of a second after the return key is pressed. \fBrtcwake\fP tries to avoid this problem and it waits to the terminal to settle down before entering a system sleep. +.SH "OPTIONS" +.sp +\fB\-A\fP, \fB\-\-adjfile\fP \fIfile\fP +.RS 4 +Specify an alternative path to the adjust file. +.RE +.sp +\fB\-a\fP, \fB\-\-auto\fP +.RS 4 +Read the clock mode (whether the hardware clock is set to UTC or local time) from the \fIadjtime\fP file, where \fBhwclock\fP(8) stores that information. This is the default. +.RE +.sp +\fB\-\-date\fP \fItimestamp\fP +.RS 4 +Set the wakeup time to the value of the timestamp. Format of the timestamp can be any of the following: +.RE +.TS +allbox tab(:); +lt lt. +T{ +.sp +YYYYMMDDhhmmss +T}:T{ +.sp + +T} +T{ +.sp +YYYY\-MM\-DD hh:mm:ss +T}:T{ +.sp + +T} +T{ +.sp +YYYY\-MM\-DD hh:mm +T}:T{ +.sp +(seconds will be set to 00) +T} +T{ +.sp +YYYY\-MM\-DD +T}:T{ +.sp +(time will be set to 00:00:00) +T} +T{ +.sp +hh:mm:ss +T}:T{ +.sp +(date will be set to today) +T} +T{ +.sp +hh:mm +T}:T{ +.sp +(date will be set to today, seconds to 00) +T} +T{ +.sp +tomorrow +T}:T{ +.sp +(time is set to 00:00:00) +T} +T{ +.sp ++5min +T}:T{ +.sp + +T} +.TE +.sp +.sp +\fB\-d\fP, \fB\-\-device\fP \fIdevice\fP +.RS 4 +Use the specified \fIdevice\fP instead of \fBrtc0\fP as realtime clock. This option is only relevant if your system has more than one RTC. You may specify \fBrtc1\fP, \fBrtc2\fP, ... here. +.RE +.sp +\fB\-l\fP, \fB\-\-local\fP +.RS 4 +Assume that the hardware clock is set to local time, regardless of the contents of the \fIadjtime\fP file. +.RE +.sp +\fB\-\-list\-modes\fP +.RS 4 +List available \fB\-\-mode\fP option arguments. +.RE +.sp +\fB\-m\fP, \fB\-\-mode\fP \fImode\fP +.RS 4 +Go into the given standby state. Valid values for \fImode\fP are: +.sp +\fBstandby\fP +.RS 4 +ACPI state S1. This state offers minimal, though real, power savings, while providing a very low\-latency transition back to a working system. This is the default mode. +.RE +.sp +\fBfreeze\fP +.RS 4 +The processes are frozen, all the devices are suspended and all the processors idled. This state is a general state that does not need any platform\-specific support, but it saves less power than Suspend\-to\-RAM, because the system is still in a running state. (Available since Linux 3.9.) +.RE +.sp +\fBmem\fP +.RS 4 +ACPI state S3 (Suspend\-to\-RAM). This state offers significant power savings as everything in the system is put into a low\-power state, except for memory, which is placed in self\-refresh mode to retain its contents. +.RE +.sp +\fBdisk\fP +.RS 4 +ACPI state S4 (Suspend\-to\-disk). This state offers the greatest power savings, and can be used even in the absence of low\-level platform support for power management. This state operates similarly to Suspend\-to\-RAM, but includes a final step of writing memory contents to disk. +.RE +.sp +\fBoff\fP +.RS 4 +ACPI state S5 (Poweroff). This is done by calling \(aq/sbin/shutdown\(aq. Not officially supported by ACPI, but it usually works. +.RE +.sp +\fBno\fP +.RS 4 +Don\(cqt suspend, only set the RTC wakeup time. +.RE +.sp +\fBon\fP +.RS 4 +Don\(cqt suspend, but read the RTC device until an alarm time appears. This mode is useful for debugging. +.RE +.sp +\fBdisable\fP +.RS 4 +Disable a previously set alarm. +.RE +.sp +\fBshow\fP +.RS 4 +Print alarm information in format: "alarm: off|on <time>". The time is in ctime() output format, e.g., "alarm: on Tue Nov 16 04:48:45 2010". +.RE +.RE +.sp +\fB\-n\fP, \fB\-\-dry\-run\fP +.RS 4 +This option does everything apart from actually setting up the alarm, suspending the system, or waiting for the alarm. +.RE +.sp +\fB\-s\fP, \fB\-\-seconds\fP \fIseconds\fP +.RS 4 +Set the wakeup time to \fIseconds\fP in the future from now. +.RE +.sp +\fB\-t\fP, \fB\-\-time\fP \fItime_t\fP +.RS 4 +Set the wakeup time to the absolute time \fItime_t\fP. \fItime_t\fP is the time in seconds since 1970\-01\-01, 00:00 UTC. Use the \fBdate\fP(1) tool to convert between human\-readable time and \fItime_t\fP. +.RE +.sp +\fB\-u\fP, \fB\-\-utc\fP +.RS 4 +Assume that the hardware clock is set to UTC (Universal Time Coordinated), regardless of the contents of the \fIadjtime\fP file. +.RE +.sp +\fB\-v\fP, \fB\-\-verbose\fP +.RS 4 +Be verbose. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "NOTES" +.sp +Some PC systems can\(cqt currently exit sleep states such as \fBmem\fP using only the kernel code accessed by this driver. They need help from userspace code to make the framebuffer work again. +.SH "FILES" +.sp +\fI/etc/adjtime\fP +.SH "HISTORY" +.sp +The program was posted several times on LKML and other lists before appearing in kernel commit message for Linux 2.6 in the GIT commit 87ac84f42a7a580d0dd72ae31d6a5eb4bfe04c6d. +.SH "AUTHORS" +.sp +The program was written by \c +.MTO "dbrownell\(atusers.sourceforge.net" "David Brownell" "" +and improved by +.MTO "bwalle\(atsuse.de" "Bernhard Walle" "." +.SH "COPYRIGHT" +.sp +This is free software. You may redistribute copies of it under the terms of the \c +.URL "http://www.gnu.org/licenses/gpl.html" "GNU General Public License" "." +There is NO WARRANTY, to the extent permitted by law. +.SH "SEE ALSO" +.sp +\fBadjtime_config\fP(5), +\fBhwclock\fP(8), +\fBdate\fP(1) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBrtcwake\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/rtcwake.8.adoc b/sys-utils/rtcwake.8.adoc new file mode 100644 index 0000000..481a586 --- /dev/null +++ b/sys-utils/rtcwake.8.adoc @@ -0,0 +1,142 @@ +//po4a: entry man manual += rtcwake(8) +:doctype: manpage +:man manual: System Administration +:man source: util-linux {release-version} +:page-layout: base +:command: rtcwake + +== NAME + +rtcwake - enter a system sleep state until specified wakeup time + +== SYNOPSIS + +*rtcwake* [options] [*-d* _device_] [*-m* _standby_mode_] {*-s* _seconds_|*-t* _time_t_} + +== DESCRIPTION + +This program is used to enter a system sleep state and to automatically wake from it at a specified time. + +This uses cross-platform Linux interfaces to enter a system sleep state, and leave it no later than a specified time. It uses any RTC framework driver that supports standard driver model wakeup flags. + +This is normally used like the old *apmsleep* utility, to wake from a suspend state like ACPI S1 (standby) or S3 (suspend-to-RAM). Most platforms can implement those without analogues of BIOS, APM, or ACPI. + +On some systems, this can also be used like *nvram-wakeup*, waking from states like ACPI S4 (suspend to disk). Not all systems have persistent media that are appropriate for such suspend modes. + +Note that alarm functionality depends on hardware; not every RTC is able to setup an alarm up to 24 hours in the future. + +The suspend setup may be interrupted by active hardware; for example wireless USB input devices that continue to send events for some fraction of a second after the return key is pressed. *rtcwake* tries to avoid this problem and it waits to the terminal to settle down before entering a system sleep. + +== OPTIONS + +*-A*, *--adjfile* _file_:: +Specify an alternative path to the adjust file. + +*-a*, *--auto*:: +Read the clock mode (whether the hardware clock is set to UTC or local time) from the _adjtime_ file, where *hwclock*(8) stores that information. This is the default. + +*--date* _timestamp_:: +Set the wakeup time to the value of the timestamp. Format of the timestamp can be any of the following: + +[cols=",",] +|=== +|YYYYMMDDhhmmss | +|YYYY-MM-DD hh:mm:ss | +|YYYY-MM-DD hh:mm |(seconds will be set to 00) +|YYYY-MM-DD |(time will be set to 00:00:00) +|hh:mm:ss |(date will be set to today) +|hh:mm |(date will be set to today, seconds to 00) +|tomorrow |(time is set to 00:00:00) +|+5min | +|=== + +*-d*, *--device* _device_:: +Use the specified _device_ instead of *rtc0* as realtime clock. This option is only relevant if your system has more than one RTC. You may specify *rtc1*, *rtc2*, ... here. + +*-l*, *--local*:: +Assume that the hardware clock is set to local time, regardless of the contents of the _adjtime_ file. + +*--list-modes*:: +List available *--mode* option arguments. + +*-m*, *--mode* _mode_:: +Go into the given standby state. Valid values for _mode_ are: + +*standby*;; +ACPI state S1. This state offers minimal, though real, power savings, while providing a very low-latency transition back to a working system. This is the default mode. + +*freeze*;; +The processes are frozen, all the devices are suspended and all the processors idled. This state is a general state that does not need any platform-specific support, but it saves less power than Suspend-to-RAM, because the system is still in a running state. (Available since Linux 3.9.) + +*mem*;; +ACPI state S3 (Suspend-to-RAM). This state offers significant power savings as everything in the system is put into a low-power state, except for memory, which is placed in self-refresh mode to retain its contents. + +*disk*;; +ACPI state S4 (Suspend-to-disk). This state offers the greatest power savings, and can be used even in the absence of low-level platform support for power management. This state operates similarly to Suspend-to-RAM, but includes a final step of writing memory contents to disk. + +*off*;; +ACPI state S5 (Poweroff). This is done by calling '/sbin/shutdown'. Not officially supported by ACPI, but it usually works. + +*no*;; +Don't suspend, only set the RTC wakeup time. + +*on*;; +Don't suspend, but read the RTC device until an alarm time appears. This mode is useful for debugging. + +*disable*;; +Disable a previously set alarm. + +*show*;; +Print alarm information in format: "alarm: off|on <time>". The time is in ctime() output format, e.g., "alarm: on Tue Nov 16 04:48:45 2010". + +*-n*, *--dry-run*:: +This option does everything apart from actually setting up the alarm, suspending the system, or waiting for the alarm. + +*-s*, *--seconds* _seconds_:: +Set the wakeup time to _seconds_ in the future from now. + +*-t*, *--time* _time_t_:: +Set the wakeup time to the absolute time _time_t_. _time_t_ is the time in seconds since 1970-01-01, 00:00 UTC. Use the *date*(1) tool to convert between human-readable time and _time_t_. + +*-u*, *--utc*:: +Assume that the hardware clock is set to UTC (Universal Time Coordinated), regardless of the contents of the _adjtime_ file. + +*-v*, *--verbose*:: +Be verbose. + +include::man-common/help-version.adoc[] + +== NOTES + +Some PC systems can't currently exit sleep states such as *mem* using only the kernel code accessed by this driver. They need help from userspace code to make the framebuffer work again. + +== FILES + +_{ADJTIME_PATH}_ + +== HISTORY + +The program was posted several times on LKML and other lists before appearing in kernel commit message for Linux 2.6 in the GIT commit 87ac84f42a7a580d0dd72ae31d6a5eb4bfe04c6d. + +== AUTHORS + +The program was written by mailto:dbrownell@users.sourceforge.net[David Brownell] and improved by mailto:bwalle@suse.de[Bernhard Walle]. + +== COPYRIGHT + +This is free software. You may redistribute copies of it under the terms of the link:http://www.gnu.org/licenses/gpl.html[GNU General Public License]. There is NO WARRANTY, to the extent permitted by law. + +== SEE ALSO + +*adjtime_config*(5), +*hwclock*(8), +*date*(1) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/rtcwake.c b/sys-utils/rtcwake.c new file mode 100644 index 0000000..65f916b --- /dev/null +++ b/sys-utils/rtcwake.c @@ -0,0 +1,688 @@ +/* + * rtcwake -- enter a system sleep state until specified wakeup time. + * + * This uses cross-platform Linux interfaces to enter a system sleep state, + * and leave it no later than a specified time. It uses any RTC framework + * driver that supports standard driver model wakeup flags. + * + * This is normally used like the old "apmsleep" utility, to wake from a + * suspend state like ACPI S1 (standby) or S3 (suspend-to-RAM). Most + * platforms can implement those without analogues of BIOS, APM, or ACPI. + * + * On some systems, this can also be used like "nvram-wakeup", waking + * from states like ACPI S4 (suspend to disk). Not all systems have + * persistent media that are appropriate for such suspend modes. + * + * The best way to set the system's RTC is so that it holds the current + * time in UTC. Use the "-l" flag to tell this program that the system + * RTC uses a local timezone instead (maybe you dual-boot MS-Windows). + * That flag should not be needed on systems with adjtime support. + */ + +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <linux/rtc.h> +#include <poll.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <termios.h> +#include <time.h> +#include <unistd.h> + +#include "c.h" +#include "closestream.h" +#include "env.h" +#include "nls.h" +#include "optutils.h" +#include "pathnames.h" +#include "strutils.h" +#include "strv.h" +#include "timeutils.h" +#include "xalloc.h" + +#ifndef RTC_AF +# define RTC_AF 0x20 /* Alarm interrupt */ +#endif + +#define ADJTIME_ZONE_BUFSIZ 8 +#define SYS_WAKEUP_PATH_TEMPLATE "/sys/class/rtc/%s/device/power/wakeup" +#define SYS_POWER_STATE_PATH "/sys/power/state" +#define DEFAULT_RTC_DEVICE "/dev/rtc0" + +enum rtc_modes { /* manual page --mode option explains these. */ + OFF_MODE = 0, + NO_MODE, + ON_MODE, + DISABLE_MODE, + SHOW_MODE, + + SYSFS_MODE /* keep it last */ + +}; + +static const char *rtcwake_mode_string[] = { + [OFF_MODE] = "off", + [NO_MODE] = "no", + [ON_MODE] = "on", + [DISABLE_MODE] = "disable", + [SHOW_MODE] = "show" +}; + +enum clock_modes { + CM_AUTO, + CM_UTC, + CM_LOCAL +}; + +struct rtcwake_control { + char *mode_str; /* name of the requested mode */ + char **possible_modes; /* modes listed in /sys/power/state */ + char *adjfile; /* adjtime file path */ + enum clock_modes clock_mode; /* hwclock timezone */ + time_t sys_time; /* system time */ + time_t rtc_time; /* hardware time */ + unsigned int verbose:1, /* verbose messaging */ + dryrun:1; /* do not set alarm, suspend system, etc */ +}; + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + fputs(USAGE_HEADER, out); + fprintf(out, + _(" %s [options]\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Enter a system sleep state until a specified wakeup time.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -a, --auto reads the clock mode from adjust file (default)\n"), out); + fprintf(out, + _(" -A, --adjfile <file> specifies the path to the adjust file\n" + " the default is %s\n"), _PATH_ADJTIME); + fputs(_(" --date <timestamp> date time of timestamp to wake\n"), out); + fputs(_(" -d, --device <device> select rtc device (rtc0|rtc1|...)\n"), out); + fputs(_(" -n, --dry-run does everything, but suspend\n"), out); + fputs(_(" -l, --local RTC uses local timezone\n"), out); + fputs(_(" --list-modes list available modes\n"), out); + fputs(_(" -m, --mode <mode> standby|mem|... sleep mode\n"), out); + fputs(_(" -s, --seconds <seconds> seconds to sleep\n"), out); + fputs(_(" -t, --time <time_t> time to wake\n"), out); + fputs(_(" -u, --utc RTC uses UTC\n"), out); + fputs(_(" -v, --verbose verbose messages\n"), out); + + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(26)); + printf(USAGE_MAN_TAIL("rtcwake(8)")); + exit(EXIT_SUCCESS); +} + +static int is_wakeup_enabled(const char *devname) +{ + char buf[128], *s; + FILE *f; + size_t skip = 0; + + if (startswith(devname, "/dev/")) + skip = 5; + snprintf(buf, sizeof buf, SYS_WAKEUP_PATH_TEMPLATE, devname + skip); + f = fopen(buf, "r"); + if (!f) { + warn(_("cannot open %s"), buf); + return 0; + } + + s = fgets(buf, sizeof buf, f); + fclose(f); + if (!s) + return 0; + s = strchr(buf, '\n'); + if (!s) + return 0; + *s = 0; + /* wakeup events could be disabled or not supported */ + return strcmp(buf, "enabled") == 0; +} + +static int get_basetimes(struct rtcwake_control *ctl, int fd) +{ + struct tm tm = { 0 }; + struct rtc_time rtc; + + /* This process works in RTC time, except when working + * with the system clock (which always uses UTC). + */ + if (ctl->clock_mode == CM_UTC) + xsetenv("TZ", "UTC", 1); + tzset(); + /* Read rtc and system clocks "at the same time", or as + * precisely (+/- a second) as we can read them. + */ + if (ioctl(fd, RTC_RD_TIME, &rtc) < 0) { + warn(_("read rtc time failed")); + return -1; + } + + ctl->sys_time = time(NULL); + if (ctl->sys_time == (time_t)-1) { + warn(_("read system time failed")); + return -1; + } + /* Convert rtc_time to normal arithmetic-friendly form, + * updating tm.tm_wday as used by asctime(). + */ + tm.tm_sec = rtc.tm_sec; + tm.tm_min = rtc.tm_min; + tm.tm_hour = rtc.tm_hour; + tm.tm_mday = rtc.tm_mday; + tm.tm_mon = rtc.tm_mon; + tm.tm_year = rtc.tm_year; + tm.tm_isdst = -1; /* assume the system knows better than the RTC */ + + ctl->rtc_time = mktime(&tm); + if (ctl->rtc_time == (time_t)-1) { + warn(_("convert rtc time failed")); + return -1; + } + + if (ctl->verbose) { + /* Unless the system uses UTC, either delta or tzone + * reflects a seconds offset from UTC. The value can + * help sort out problems like bugs in your C library. */ + char s[64]; + printf("\tdelta = %"PRId64"\n", (int64_t) ctl->sys_time - ctl->rtc_time); + printf("\ttzone = %ld\n", timezone); + printf("\ttzname = %s\n", tzname[daylight]); + gmtime_r(&ctl->sys_time, &tm); + printf("\tsystime = %"PRId64", (UTC) %s", + (int64_t) ctl->sys_time, asctime_r(&tm, s)); + gmtime_r(&ctl->rtc_time, &tm); + printf("\trtctime = %"PRId64", (UTC) %s", + (int64_t) ctl->rtc_time, asctime_r(&tm, s)); + } + return 0; +} + +static int setup_alarm(struct rtcwake_control *ctl, int fd, time_t *wakeup) +{ + struct tm tm; + struct rtc_wkalrm wake = { 0 }; + + /* The wakeup time is in POSIX time (more or less UTC). Ideally + * RTCs use that same time; but PCs can't do that if they need to + * boot MS-Windows. Messy... + * + * When clock_mode == CM_UTC this process's timezone is UTC, so + * we'll pass a UTC date to the RTC. + * + * Else clock_mode == CM_LOCAL so the time given to the RTC will + * instead use the local time zone. */ + localtime_r(wakeup, &tm); + wake.time.tm_sec = tm.tm_sec; + wake.time.tm_min = tm.tm_min; + wake.time.tm_hour = tm.tm_hour; + wake.time.tm_mday = tm.tm_mday; + wake.time.tm_mon = tm.tm_mon; + wake.time.tm_year = tm.tm_year; + /* wday, yday, and isdst fields are unused */ + wake.time.tm_wday = -1; + wake.time.tm_yday = -1; + wake.time.tm_isdst = -1; + wake.enabled = 1; + + if (!ctl->dryrun && ioctl(fd, RTC_WKALM_SET, &wake) < 0) { + warn(_("set rtc wake alarm failed")); + return -1; + } + return 0; +} + +static char **get_sys_power_states(struct rtcwake_control *ctl) +{ + int fd = -1; + + if (!ctl->possible_modes) { + char buf[256] = { 0 }; + ssize_t ss; + + fd = open(SYS_POWER_STATE_PATH, O_RDONLY); + if (fd < 0) + goto nothing; + ss = read(fd, &buf, sizeof(buf) - 1); + if (ss <= 0) + goto nothing; + buf[ss] = '\0'; + ctl->possible_modes = strv_split(buf, " \n"); + close(fd); + } + return ctl->possible_modes; +nothing: + if (fd >= 0) + close(fd); + return NULL; +} + +static void wait_stdin(struct rtcwake_control *ctl) +{ + struct pollfd fd[] = { + {.fd = STDIN_FILENO, .events = POLLIN} + }; + int tries = 0; + + while (tries < 8 && poll(fd, 1, 10) == 1) { + if (ctl->verbose) + warnx(_("discarding stdin")); + xusleep(250000); + tcflush(STDIN_FILENO, TCIFLUSH); + tries++; + } +} + +static void suspend_system(struct rtcwake_control *ctl) +{ + FILE *f = fopen(SYS_POWER_STATE_PATH, "w"); + + if (!f) { + warn(_("cannot open %s"), SYS_POWER_STATE_PATH); + return; + } + + if (!ctl->dryrun) { + if (isatty(STDIN_FILENO)) + wait_stdin(ctl); + fprintf(f, "%s\n", ctl->mode_str); + fflush(f); + } + /* this executes after wake from suspend */ + if (close_stream(f)) + errx(EXIT_FAILURE, _("write error")); +} + +static int read_clock_mode(struct rtcwake_control *ctl) +{ + FILE *fp; + char linebuf[ADJTIME_ZONE_BUFSIZ]; + + fp = fopen(ctl->adjfile, "r"); + if (!fp) + return -1; + /* skip two lines */ + if (skip_fline(fp) || skip_fline(fp)) { + fclose(fp); + return -1; + } + /* read third line */ + if (!fgets(linebuf, sizeof linebuf, fp)) { + fclose(fp); + return -1; + } + + if (strncmp(linebuf, "UTC", 3) == 0) + ctl->clock_mode = CM_UTC; + else if (strncmp(linebuf, "LOCAL", 5) == 0) + ctl->clock_mode = CM_LOCAL; + else if (ctl->verbose) + warnx(_("unexpected third line in: %s: %s"), ctl->adjfile, linebuf); + + fclose(fp); + return 0; +} + +static int print_alarm(struct rtcwake_control *ctl, int fd) +{ + struct rtc_wkalrm wake; + struct tm tm = { 0 }; + time_t alarm; + char s[CTIME_BUFSIZ]; + + if (ioctl(fd, RTC_WKALM_RD, &wake) < 0) { + warn(_("read rtc alarm failed")); + return -1; + } + + if (wake.enabled != 1 || wake.time.tm_year == -1) { + printf(_("alarm: off\n")); + return 0; + } + tm.tm_sec = wake.time.tm_sec; + tm.tm_min = wake.time.tm_min; + tm.tm_hour = wake.time.tm_hour; + tm.tm_mday = wake.time.tm_mday; + tm.tm_mon = wake.time.tm_mon; + tm.tm_year = wake.time.tm_year; + tm.tm_isdst = -1; /* assume the system knows better than the RTC */ + + alarm = mktime(&tm); + if (alarm == (time_t)-1) { + warn(_("convert time failed")); + return -1; + } + /* 0 if both UTC, or expresses diff if RTC in local time */ + alarm += ctl->sys_time - ctl->rtc_time; + ctime_r(&alarm, s); + printf(_("alarm: on %s"), s); + + return 0; +} + +static int get_rtc_mode(struct rtcwake_control *ctl, const char *s) +{ + size_t i; + char **modes = get_sys_power_states(ctl), **m; + + STRV_FOREACH(m, modes) { + if (strcmp(s, *m) == 0) + return SYSFS_MODE; + } + + for (i = 0; i < ARRAY_SIZE(rtcwake_mode_string); i++) + if (!strcmp(s, rtcwake_mode_string[i])) + return i; + + return -EINVAL; +} + +static int open_dev_rtc(const char *devname) +{ + int fd; + char *devpath = NULL; + + if (startswith(devname, "/dev")) + devpath = xstrdup(devname); + else + xasprintf(&devpath, "/dev/%s", devname); + fd = open(devpath, O_RDONLY | O_CLOEXEC); + if (fd < 0) + err(EXIT_FAILURE, _("%s: unable to find device"), devpath); + free(devpath); + return fd; +} + +static void list_modes(struct rtcwake_control *ctl) +{ + size_t i; + char **modes = get_sys_power_states(ctl), **m; + + if (!modes) + errx(EXIT_FAILURE, _("could not read: %s"), SYS_POWER_STATE_PATH); + + STRV_FOREACH(m, modes) + printf("%s ", *m); + + for (i = 0; i < ARRAY_SIZE(rtcwake_mode_string); i++) + printf("%s ", rtcwake_mode_string[i]); + putchar('\n'); +} + +int main(int argc, char **argv) +{ + struct rtcwake_control ctl = { + .mode_str = "suspend", /* default mode */ + .adjfile = _PATH_ADJTIME, + .clock_mode = CM_AUTO + }; + char *devname = DEFAULT_RTC_DEVICE; + int suspend = SYSFS_MODE; + int rc = EXIT_SUCCESS; + int t; + int fd; + time_t alarm = 0, seconds = 0; + enum { + OPT_DATE = CHAR_MAX + 1, + OPT_LIST + }; + static const struct option long_options[] = { + { "adjfile", required_argument, NULL, 'A' }, + { "auto", no_argument, NULL, 'a' }, + { "dry-run", no_argument, NULL, 'n' }, + { "local", no_argument, NULL, 'l' }, + { "utc", no_argument, NULL, 'u' }, + { "verbose", no_argument, NULL, 'v' }, + { "version", no_argument, NULL, 'V' }, + { "help", no_argument, NULL, 'h' }, + { "mode", required_argument, NULL, 'm' }, + { "device", required_argument, NULL, 'd' }, + { "seconds", required_argument, NULL, 's' }, + { "time", required_argument, NULL, 't' }, + { "date", required_argument, NULL, OPT_DATE }, + { "list-modes", no_argument, NULL, OPT_LIST }, + { NULL, 0, NULL, 0 } + }; + static const ul_excl_t excl[] = { + { 'a', 'l', 'u' }, + { 's', 't', OPT_DATE }, + { 0 }, + }; + int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + while ((t = getopt_long(argc, argv, "A:ahd:lm:ns:t:uVv", + long_options, NULL)) != EOF) { + err_exclusive_options(t, long_options, excl, excl_st); + switch (t) { + case 'A': + /* for better compatibility with hwclock */ + ctl.adjfile = optarg; + break; + case 'a': + ctl.clock_mode = CM_AUTO; + break; + case 'd': + devname = optarg; + break; + case 'l': + ctl.clock_mode = CM_LOCAL; + break; + + case OPT_LIST: + list_modes(&ctl); + return EXIT_SUCCESS; + + case 'm': + if ((suspend = get_rtc_mode(&ctl, optarg)) < 0) + errx(EXIT_FAILURE, _("unrecognized suspend state '%s'"), optarg); + ctl.mode_str = optarg; + break; + case 'n': + ctl.dryrun = 1; + break; + case 's': + /* alarm time, seconds-to-sleep (relative) */ + seconds = strtotime_or_err(optarg, _("invalid seconds argument")); + break; + case 't': + /* alarm time, time_t (absolute, seconds since epoch) */ + alarm = strtotime_or_err(optarg, _("invalid time argument")); + break; + case OPT_DATE: + { /* alarm time, see timestamp format from manual */ + usec_t p; + if (parse_timestamp(optarg, &p) < 0) + errx(EXIT_FAILURE, _("invalid time value \"%s\""), optarg); + alarm = (time_t) (p / 1000000); + break; + } + case 'u': + ctl.clock_mode = CM_UTC; + break; + case 'v': + ctl.verbose = 1; + break; + + case 'V': + print_version(EXIT_SUCCESS); + case 'h': + usage(); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if (ctl.clock_mode == CM_AUTO && read_clock_mode(&ctl) < 0) { + printf(_("%s: assuming RTC uses UTC ...\n"), program_invocation_short_name); + ctl.clock_mode = CM_UTC; + } + + if (ctl.verbose) + printf("%s", ctl.clock_mode == CM_UTC ? _("Using UTC time.\n") : + _("Using local time.\n")); + + if (!alarm && !seconds && suspend != DISABLE_MODE && suspend != SHOW_MODE) + errx(EXIT_FAILURE, _("must provide wake time (see --seconds, --time and --date options)")); + + /* device must exist and (if we'll sleep) be wakeup-enabled */ + fd = open_dev_rtc(devname); + + if (suspend != ON_MODE && suspend != NO_MODE && !is_wakeup_enabled(devname)) + errx(EXIT_FAILURE, _("%s not enabled for wakeup events"), devname); + + /* relative or absolute alarm time, normalized to time_t */ + if (get_basetimes(&ctl, fd) < 0) + exit(EXIT_FAILURE); + + if (ctl.verbose) + printf(_("alarm %"PRId64", sys_time %"PRId64", " + "rtc_time %"PRId64", seconds %"PRIu64"\n"), + (int64_t) alarm, (int64_t) ctl.sys_time, + (int64_t) ctl.rtc_time, + (int64_t) seconds); + + if (suspend != DISABLE_MODE && suspend != SHOW_MODE) { + /* perform alarm setup when the show or disable modes are not set */ + if (alarm) { + if (alarm < ctl.sys_time) { + char s[CTIME_BUFSIZ]; + + ctime_r(&alarm, s); + errx(EXIT_FAILURE, _("time doesn't go backward to %s"), s); + } + alarm -= ctl.sys_time - ctl.rtc_time; + } else + alarm = ctl.rtc_time + seconds + 1; + + if (setup_alarm(&ctl, fd, &alarm) < 0) + exit(EXIT_FAILURE); + + if (suspend == NO_MODE || suspend == ON_MODE) { + char s[CTIME_BUFSIZ]; + + ctime_r(&alarm, s); + printf(_("%s: wakeup using %s at %s"), + program_invocation_short_name, devname, s); + } else { + char s[CTIME_BUFSIZ]; + + ctime_r(&alarm, s); + printf(_("%s: wakeup from \"%s\" using %s at %s"), + program_invocation_short_name, ctl.mode_str, devname, s); + } + fflush(stdout); + xusleep(10 * 1000); + } + + switch (suspend) { + case NO_MODE: + if (ctl.verbose) + printf(_("suspend mode: no; leaving\n")); + ctl.dryrun = 1; /* to skip disabling alarm at the end */ + break; + case OFF_MODE: + { + char *arg[5]; + int i = 0; + + if (!access(_PATH_SHUTDOWN, X_OK)) { + arg[i++] = _PATH_SHUTDOWN; + arg[i++] = "-h"; + arg[i++] = "-P"; + arg[i++] = "now"; + arg[i] = NULL; + } else if (!access(_PATH_POWEROFF, X_OK)) { + arg[i++] = _PATH_POWEROFF; + arg[i] = NULL; + } else { + arg[i] = NULL; + } + + if (arg[0]) { + if (ctl.verbose) + printf(_("suspend mode: off; executing %s\n"), + arg[0]); + if (!ctl.dryrun) { + execv(arg[0], arg); + warn(_("failed to execute %s"), arg[0]); + rc = EX_EXEC_ENOENT; + } + } else { + /* Failed to find shutdown command */ + warn(_("failed to find shutdown command")); + rc = EX_EXEC_ENOENT; + } + break; + } + case ON_MODE: + { + unsigned long data; + + if (ctl.verbose) + printf(_("suspend mode: on; reading rtc\n")); + if (!ctl.dryrun) { + do { + t = read(fd, &data, sizeof data); + if (t < 0) { + warn(_("rtc read failed")); + break; + } + if (ctl.verbose) + printf("... %s: %03lx\n", devname, data); + } while (!(data & RTC_AF)); + } + break; + } + case DISABLE_MODE: + /* just break, alarm gets disabled in the end */ + if (ctl.verbose) + printf(_("suspend mode: disable; disabling alarm\n")); + break; + case SHOW_MODE: + if (ctl.verbose) + printf(_("suspend mode: show; printing alarm info\n")); + if (print_alarm(&ctl, fd)) + rc = EXIT_FAILURE; + ctl.dryrun = 1; /* don't really disable alarm in the end, just show */ + break; + default: + if (ctl.verbose) + printf(_("suspend mode: %s; suspending system\n"), ctl.mode_str); + sync(); + suspend_system(&ctl); + } + + if (!ctl.dryrun) { + struct rtc_wkalrm wake; + + if (ioctl(fd, RTC_WKALM_RD, &wake) < 0) { + warn(_("read rtc alarm failed")); + rc = EXIT_FAILURE; + } else { + wake.enabled = 0; + if (ioctl(fd, RTC_WKALM_SET, &wake) < 0) { + warn(_("disable rtc alarm interrupt failed")); + rc = EXIT_FAILURE; + } + } + } + + close(fd); + return rc; +} diff --git a/sys-utils/setarch.8 b/sys-utils/setarch.8 new file mode 100644 index 0000000..cf92ce4 --- /dev/null +++ b/sys-utils/setarch.8 @@ -0,0 +1,155 @@ +'\" t +.\" Title: setarch +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: System Administration +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "SETARCH" "8" "2022-05-11" "util\-linux 2.38.1" "System Administration" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +setarch \- change reported architecture in new program environment and/or set personality flags +.SH "SYNOPSIS" +.sp +\fBsetarch\fP [\fIarch\fP] [options] [\fIprogram\fP [\fIargument\fP...]] +.sp +\fBsetarch\fP \fB\-\-list\fP|\fB\-h\fP|\fB\-V\fP +.sp +\fBarch\fP [options] [\fIprogram\fP [\fIargument\fP...]] +.SH "DESCRIPTION" +.sp +\fBsetarch\fP modifies execution domains and process personality flags. +.sp +The execution domains currently only affects the output of \fBuname \-m\fP. For example, on an AMD64 system, running \fBsetarch i386\fP \fIprogram\fP will cause \fIprogram\fP to see i686 instead of \fIx86_64\fP as the machine type. It can also be used to set various personality options. The default \fIprogram\fP is \fB/bin/sh\fP. +.sp +Since version 2.33 the \fIarch\fP command line argument is optional and \fBsetarch\fP may be used to change personality flags (ADDR_LIMIT_*, SHORT_INODE, etc) without modification of the execution domain. +.SH "OPTIONS" +.sp +\fB\-\-list\fP +.RS 4 +List the architectures that \fBsetarch\fP knows about. Whether \fBsetarch\fP can actually set each of these architectures depends on the running kernel. +.RE +.sp +\fB\-\-uname\-2.6\fP +.RS 4 +Causes the \fIprogram\fP to see a kernel version number beginning with 2.6. Turns on \fBUNAME26\fP. +.RE +.sp +\fB\-v\fP, \fB\-\-verbose\fP +.RS 4 +Be verbose. +.RE +.sp +\fB\-3\fP, \fB\-\-3gb\fP +.RS 4 +Specifies \fIprogram\fP should use a maximum of 3GB of address space. Supported on x86. Turns on \fBADDR_LIMIT_3GB\fP. +.RE +.sp +\fB\-\-4gb\fP +.RS 4 +This option has no effect. It is retained for backward compatibility only, and may be removed in future releases. +.RE +.sp +\fB\-B\fP, \fB\-\-32bit\fP +.RS 4 +Limit the address space to 32 bits to emulate hardware. Supported on ARM and Alpha. Turns on \fBADDR_LIMIT_32BIT\fP. +.RE +.sp +\fB\-F\fP, \fB\-\-fdpic\-funcptrs\fP +.RS 4 +Treat user\-space function pointers to signal handlers as pointers to address descriptors. This option has no effect on architectures that do not support \fBFDPIC\fP ELF binaries. In kernel v4.14 support is limited to ARM, Blackfin, Fujitsu FR\-V, and SuperH CPU architectures. +.RE +.sp +\fB\-I\fP, \fB\-\-short\-inode\fP +.RS 4 +Obsolete bug emulation flag. Turns on \fBSHORT_INODE\fP. +.RE +.sp +\fB\-L\fP, \fB\-\-addr\-compat\-layout\fP +.RS 4 +Provide legacy virtual address space layout. Use when the \fIprogram\fP binary does not have \fBPT_GNU_STACK\fP ELF header. Turns on \fBADDR_COMPAT_LAYOUT\fP. +.RE +.sp +\fB\-R\fP, \fB\-\-addr\-no\-randomize\fP +.RS 4 +Disables randomization of the virtual address space. Turns on \fBADDR_NO_RANDOMIZE\fP. +.RE +.sp +\fB\-S\fP, \fB\-\-whole\-seconds\fP +.RS 4 +Obsolete bug emulation flag. Turns on \fBWHOLE_SECONDS\fP. +.RE +.sp +\fB\-T\fP, \fB\-\-sticky\-timeouts\fP +.RS 4 +This makes \fBselect\fP(2), \fBpselect\fP(2), and \fBppoll\fP(2) system calls preserve the timeout value instead of modifying it to reflect the amount of time not slept when interrupted by a signal handler. Use when \fIprogram\fP depends on this behavior. For more details see the timeout description in \fBselect\fP(2) manual page. Turns on \fBSTICKY_TIMEOUTS\fP. +.RE +.sp +\fB\-X\fP, \fB\-\-read\-implies\-exec\fP +.RS 4 +If this is set then \fBmmap\fP(3p) \fBPROT_READ\fP will also add the \fBPROT_EXEC\fP bit \- as expected by legacy x86 binaries. Notice that the ELF loader will automatically set this bit when it encounters a legacy binary. Turns on \fBREAD_IMPLIES_EXEC\fP. +.RE +.sp +\fB\-Z\fP, \fB\-\-mmap\-page\-zero\fP +.RS 4 +SVr4 bug emulation that will set \fBmmap\fP(3p) page zero as read\-only. Use when \fIprogram\fP depends on this behavior, and the source code is not available to be fixed. Turns on \fBMMAP_PAGE_ZERO\fP. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "EXAMPLE" +.sp +.if n .RS 4 +.nf +.fam C +setarch \-\-addr\-no\-randomize mytestprog +setarch ppc32 rpmbuild \-\-target=ppc \-\-rebuild foo.src.rpm +setarch ppc32 \-v \-vL3 rpmbuild \-\-target=ppc \-\-rebuild bar.src.rpm +setarch ppc32 \-\-32bit rpmbuild \-\-target=ppc \-\-rebuild foo.src.rpm +.fam +.fi +.if n .RE +.SH "AUTHORS" +.sp +.MTO "sopwith\(atredhat.com" "Elliot Lee" "," +.MTO "jnovy\(atredhat.com" "Jindrich Novy" "," +.MTO "kzak\(atredhat.com" "Karel Zak" "" +.SH "SEE ALSO" +.sp +\fBpersonality\fP(2), +\fBselect\fP(2) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBsetarch\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/setarch.8.adoc b/sys-utils/setarch.8.adoc new file mode 100644 index 0000000..d204028 --- /dev/null +++ b/sys-utils/setarch.8.adoc @@ -0,0 +1,101 @@ +//po4a: entry man manual += setarch(8) +:doctype: manpage +:man manual: System Administration +:man source: util-linux {release-version} +:page-layout: base +:command: setarch + +== NAME + +setarch - change reported architecture in new program environment and/or set personality flags + +== SYNOPSIS + +*setarch* [_arch_] [options] [_program_ [_argument_...]] + +*setarch* *--list*|*-h*|*-V* + +*arch* [options] [_program_ [_argument_...]] + +== DESCRIPTION + +*setarch* modifies execution domains and process personality flags. + +The execution domains currently only affects the output of *uname -m*. For example, on an AMD64 system, running *setarch i386* _program_ will cause _program_ to see i686 instead of _x86_64_ as the machine type. It can also be used to set various personality options. The default _program_ is */bin/sh*. + +Since version 2.33 the _arch_ command line argument is optional and *setarch* may be used to change personality flags (ADDR_LIMIT_*, SHORT_INODE, etc) without modification of the execution domain. + +== OPTIONS + +*--list*:: +List the architectures that *setarch* knows about. Whether *setarch* can actually set each of these architectures depends on the running kernel. + +*--uname-2.6*:: +Causes the _program_ to see a kernel version number beginning with 2.6. Turns on *UNAME26*. + +*-v*, *--verbose*:: +Be verbose. + +*-3*, *--3gb*:: +Specifies _program_ should use a maximum of 3GB of address space. Supported on x86. Turns on *ADDR_LIMIT_3GB*. + +*--4gb*:: +This option has no effect. It is retained for backward compatibility only, and may be removed in future releases. + +*-B*, *--32bit*:: +Limit the address space to 32 bits to emulate hardware. Supported on ARM and Alpha. Turns on *ADDR_LIMIT_32BIT*. + +*-F*, *--fdpic-funcptrs*:: +Treat user-space function pointers to signal handlers as pointers to address descriptors. This option has no effect on architectures that do not support *FDPIC* ELF binaries. In kernel v4.14 support is limited to ARM, Blackfin, Fujitsu FR-V, and SuperH CPU architectures. + +*-I*, *--short-inode*:: +Obsolete bug emulation flag. Turns on *SHORT_INODE*. + +*-L*, *--addr-compat-layout*:: +Provide legacy virtual address space layout. Use when the _program_ binary does not have *PT_GNU_STACK* ELF header. Turns on *ADDR_COMPAT_LAYOUT*. + +*-R*, *--addr-no-randomize*:: +Disables randomization of the virtual address space. Turns on *ADDR_NO_RANDOMIZE*. + +*-S*, *--whole-seconds*:: +Obsolete bug emulation flag. Turns on *WHOLE_SECONDS*. + +*-T*, *--sticky-timeouts*:: +This makes *select*(2), *pselect*(2), and *ppoll*(2) system calls preserve the timeout value instead of modifying it to reflect the amount of time not slept when interrupted by a signal handler. Use when _program_ depends on this behavior. For more details see the timeout description in *select*(2) manual page. Turns on *STICKY_TIMEOUTS*. + +*-X*, *--read-implies-exec*:: +If this is set then *mmap*(3p) *PROT_READ* will also add the *PROT_EXEC* bit - as expected by legacy x86 binaries. Notice that the ELF loader will automatically set this bit when it encounters a legacy binary. Turns on *READ_IMPLIES_EXEC*. + +*-Z*, *--mmap-page-zero*:: +SVr4 bug emulation that will set *mmap*(3p) page zero as read-only. Use when _program_ depends on this behavior, and the source code is not available to be fixed. Turns on *MMAP_PAGE_ZERO*. + +include::man-common/help-version.adoc[] + +== EXAMPLE + +.... +setarch --addr-no-randomize mytestprog +setarch ppc32 rpmbuild --target=ppc --rebuild foo.src.rpm +setarch ppc32 -v -vL3 rpmbuild --target=ppc --rebuild bar.src.rpm +setarch ppc32 --32bit rpmbuild --target=ppc --rebuild foo.src.rpm +.... + +== AUTHORS + +mailto:sopwith@redhat.com[Elliot Lee], +mailto:jnovy@redhat.com[Jindrich Novy], +mailto:kzak@redhat.com[Karel Zak] + +== SEE ALSO + +*personality*(2), +*select*(2) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/setarch.c b/sys-utils/setarch.c new file mode 100644 index 0000000..b86be1e --- /dev/null +++ b/sys-utils/setarch.c @@ -0,0 +1,479 @@ +/* + * Copyright (C) 2003-2007 Red Hat, Inc. + * + * This file is part of util-linux. + * + * This file is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This file is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * + * Written by Elliot Lee <sopwith@redhat.com> + * New personality options & code added by Jindrich Novy <jnovy@redhat.com> + * ADD_NO_RANDOMIZE flag added by Arjan van de Ven <arjanv@redhat.com> + * Help and MIPS support from Mike Frysinger (vapier@gentoo.org) + * Better error handling from Dmitry V. Levin (ldv@altlinux.org) + * + * based on ideas from the ppc32 util by Guy Streeter (2002-01), based on the + * sparc32 util by Jakub Jelinek (1998, 1999) + */ + +#include <sys/personality.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <getopt.h> +#include <limits.h> +#include <sys/utsname.h> +#include "nls.h" +#include "c.h" +#include "closestream.h" + +#ifndef HAVE_PERSONALITY +# include <syscall.h> +# define personality(pers) ((long)syscall(SYS_personality, pers)) +#endif + +#define turn_on(_flag, _opts) \ + do { \ + (_opts) |= _flag; \ + if (verbose) \ + printf(_("Switching on %s.\n"), #_flag); \ + } while(0) + +#ifndef UNAME26 +# define UNAME26 0x0020000 +#endif +#ifndef ADDR_NO_RANDOMIZE +# define ADDR_NO_RANDOMIZE 0x0040000 +#endif +#ifndef FDPIC_FUNCPTRS +# define FDPIC_FUNCPTRS 0x0080000 +#endif +#ifndef MMAP_PAGE_ZERO +# define MMAP_PAGE_ZERO 0x0100000 +#endif +#ifndef ADDR_COMPAT_LAYOUT +# define ADDR_COMPAT_LAYOUT 0x0200000 +#endif +#ifndef READ_IMPLIES_EXEC +# define READ_IMPLIES_EXEC 0x0400000 +#endif +#ifndef ADDR_LIMIT_32BIT +# define ADDR_LIMIT_32BIT 0x0800000 +#endif +#ifndef SHORT_INODE +# define SHORT_INODE 0x1000000 +#endif +#ifndef WHOLE_SECONDS +# define WHOLE_SECONDS 0x2000000 +#endif +#ifndef STICKY_TIMEOUTS +# define STICKY_TIMEOUTS 0x4000000 +#endif +#ifndef ADDR_LIMIT_3GB +# define ADDR_LIMIT_3GB 0x8000000 +#endif + + +struct arch_domain { + int perval; /* PER_* */ + const char *target_arch; + const char *result_arch; +}; + + +static void __attribute__((__noreturn__)) usage(int archwrapper) +{ + fputs(USAGE_HEADER, stdout); + if (!archwrapper) + printf(_(" %s [<arch>] [options] [<program> [<argument>...]]\n"), program_invocation_short_name); + else + printf(_(" %s [options] [<program> [<argument>...]]\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, stdout); + fputs(_("Change the reported architecture and set personality flags.\n"), stdout); + + fputs(USAGE_OPTIONS, stdout); + fputs(_(" -B, --32bit turns on ADDR_LIMIT_32BIT\n"), stdout); + fputs(_(" -F, --fdpic-funcptrs makes function pointers point to descriptors\n"), stdout); + fputs(_(" -I, --short-inode turns on SHORT_INODE\n"), stdout); + fputs(_(" -L, --addr-compat-layout changes the way virtual memory is allocated\n"), stdout); + fputs(_(" -R, --addr-no-randomize disables randomization of the virtual address space\n"), stdout); + fputs(_(" -S, --whole-seconds turns on WHOLE_SECONDS\n"), stdout); + fputs(_(" -T, --sticky-timeouts turns on STICKY_TIMEOUTS\n"), stdout); + fputs(_(" -X, --read-implies-exec turns on READ_IMPLIES_EXEC\n"), stdout); + fputs(_(" -Z, --mmap-page-zero turns on MMAP_PAGE_ZERO\n"), stdout); + fputs(_(" -3, --3gb limits the used address space to a maximum of 3 GB\n"), stdout); + fputs(_(" --4gb ignored (for backward compatibility only)\n"), stdout); + fputs(_(" --uname-2.6 turns on UNAME26\n"), stdout); + fputs(_(" -v, --verbose say what options are being switched on\n"), stdout); + + if (!archwrapper) + fputs(_(" --list list settable architectures, and exit\n"), stdout); + + fputs(USAGE_SEPARATOR, stdout); + printf(USAGE_HELP_OPTIONS(26)); + printf(USAGE_MAN_TAIL("setarch(8)")); + + exit(EXIT_SUCCESS); +} + +/* + * Returns inilialized list of all available execution domains. + */ +static struct arch_domain *init_arch_domains(void) +{ + static struct utsname un; + size_t i; + + static struct arch_domain transitions[] = + { + {UNAME26, "uname26", NULL}, + {PER_LINUX32, "linux32", NULL}, + {PER_LINUX, "linux64", NULL}, +#if defined(__powerpc__) || defined(__powerpc64__) +# ifdef __BIG_ENDIAN__ + {PER_LINUX32, "ppc32", "ppc"}, + {PER_LINUX32, "ppc", "ppc"}, + {PER_LINUX, "ppc64", "ppc64"}, + {PER_LINUX, "ppc64pseries", "ppc64"}, + {PER_LINUX, "ppc64iseries", "ppc64"}, +# else + {PER_LINUX32, "ppc32", "ppcle"}, + {PER_LINUX32, "ppc", "ppcle"}, + {PER_LINUX32, "ppc32le", "ppcle"}, + {PER_LINUX32, "ppcle", "ppcle"}, + {PER_LINUX, "ppc64le", "ppc64le"}, +# endif +#endif +#if defined(__x86_64__) || defined(__i386__) || defined(__ia64__) + {PER_LINUX32, "i386", "i386"}, + {PER_LINUX32, "i486", "i386"}, + {PER_LINUX32, "i586", "i386"}, + {PER_LINUX32, "i686", "i386"}, + {PER_LINUX32, "athlon", "i386"}, +#endif +#if defined(__x86_64__) || defined(__i386__) + {PER_LINUX, "x86_64", "x86_64"}, +#endif +#if defined(__ia64__) || defined(__i386__) + {PER_LINUX, "ia64", "ia64"}, +#endif +#if defined(__hppa__) + {PER_LINUX32, "parisc32", "parisc"}, + {PER_LINUX32, "parisc", "parisc"}, + {PER_LINUX, "parisc64", "parisc64"}, +#endif +#if defined(__s390x__) || defined(__s390__) + {PER_LINUX32, "s390", "s390"}, + {PER_LINUX, "s390x", "s390x"}, +#endif +#if defined(__sparc64__) || defined(__sparc__) + {PER_LINUX32, "sparc", "sparc"}, + {PER_LINUX32, "sparc32bash", "sparc"}, + {PER_LINUX32, "sparc32", "sparc"}, + {PER_LINUX, "sparc64", "sparc64"}, +#endif +#if defined(__mips64__) || defined(__mips__) + {PER_LINUX32, "mips32", "mips"}, + {PER_LINUX32, "mips", "mips"}, + {PER_LINUX, "mips64", "mips64"}, +#endif +#if defined(__alpha__) + {PER_LINUX, "alpha", "alpha"}, + {PER_LINUX, "alphaev5", "alpha"}, + {PER_LINUX, "alphaev56", "alpha"}, + {PER_LINUX, "alphaev6", "alpha"}, + {PER_LINUX, "alphaev67", "alpha"}, +#endif +#if defined(__e2k__) + {PER_LINUX, "e2k", "e2k"}, + {PER_LINUX, "e2kv4", "e2k"}, + {PER_LINUX, "e2kv5", "e2k"}, + {PER_LINUX, "e2kv6", "e2k"}, + {PER_LINUX, "e2k4c", "e2k"}, + {PER_LINUX, "e2k8c", "e2k"}, + {PER_LINUX, "e2k1cp", "e2k"}, + {PER_LINUX, "e2k8c2", "e2k"}, + {PER_LINUX, "e2k12c", "e2k"}, + {PER_LINUX, "e2k16c", "e2k"}, + {PER_LINUX, "e2k2c3", "e2k"}, +#endif +#if defined(__arm__) || defined(__aarch64__) +# ifdef __BIG_ENDIAN__ + {PER_LINUX32, "armv7b", "arm"}, + {PER_LINUX32, "armv8b", "arm"}, +# else + {PER_LINUX32, "armv7l", "arm"}, + {PER_LINUX32, "armv8l", "arm"}, +# endif + {PER_LINUX32, "armh", "arm"}, + {PER_LINUX32, "arm", "arm"}, + {PER_LINUX, "arm64", "aarch64"}, + {PER_LINUX, "aarch64", "aarch64"}, +#endif + /* place holder, will be filled up at runtime */ + {-1, NULL, NULL}, + {-1, NULL, NULL} + }; + + /* Add the trivial transition {PER_LINUX, machine, machine} if no + * such target_arch is hardcoded yet. */ + uname(&un); + for (i = 0; transitions[i].perval >= 0; i++) + if (!strcmp(un.machine, transitions[i].target_arch)) + break; + if (transitions[i].perval < 0) { + unsigned long wrdsz = CHAR_BIT * sizeof(void *); + if (wrdsz == 32 || wrdsz == 64) { + /* fill up the place holder */ + transitions[i].perval = wrdsz == 32 ? PER_LINUX32 : PER_LINUX; + transitions[i].target_arch = un.machine; + transitions[i].result_arch = un.machine; + } + } + + return transitions; +} + +/* + * List all execution domains from transitions + */ +static void list_arch_domains(struct arch_domain *doms) +{ + struct arch_domain *d; + + for (d = doms; d->target_arch != NULL; d++) + printf("%s\n", d->target_arch); +} + +static struct arch_domain *get_arch_domain(struct arch_domain *doms, const char *pers) +{ + struct arch_domain *d; + + for (d = doms; d && d->perval >= 0; d++) { + if (!strcmp(pers, d->target_arch)) + break; + } + + return !d || d->perval < 0 ? NULL : d; +} + +static void verify_arch_domain(struct arch_domain *doms, struct arch_domain *target, const char *wanted) +{ + struct utsname un; + + if (!doms || !target || !target->result_arch) + return; + + uname(&un); + + if (!strcmp(un.machine, target->result_arch)) + return; + + if (!strcmp(target->result_arch, "i386") || + !strcmp(target->result_arch, "arm")) { + struct arch_domain *dom; + for (dom = doms; dom->target_arch != NULL; dom++) { + if (!dom->result_arch || strcmp(dom->result_arch, target->result_arch)) + continue; + if (!strcmp(dom->target_arch, un.machine)) + return; + } + } + + errx(EXIT_FAILURE, _("Kernel cannot set architecture to %s"), wanted); +} + +int main(int argc, char *argv[]) +{ + const char *arch = NULL; + unsigned long options = 0; + int verbose = 0; + int archwrapper; + int c; + struct arch_domain *doms = NULL, *target = NULL; + unsigned long pers_value = 0; + char *shell = NULL, *shell_arg = NULL; + + /* Options without equivalent short options */ + enum { + OPT_4GB = CHAR_MAX + 1, + OPT_UNAME26, + OPT_LIST + }; + + /* Options --3gb and --4gb are for compatibility with an old + * Debian setarch implementation. */ + static const struct option longopts[] = { + {"help", no_argument, NULL, 'h'}, + {"version", no_argument, NULL, 'V'}, + {"verbose", no_argument, NULL, 'v'}, + {"addr-no-randomize", no_argument, NULL, 'R'}, + {"fdpic-funcptrs", no_argument, NULL, 'F'}, + {"mmap-page-zero", no_argument, NULL, 'Z'}, + {"addr-compat-layout", no_argument, NULL, 'L'}, + {"read-implies-exec", no_argument, NULL, 'X'}, + {"32bit", no_argument, NULL, 'B'}, + {"short-inode", no_argument, NULL, 'I'}, + {"whole-seconds", no_argument, NULL, 'S'}, + {"sticky-timeouts", no_argument, NULL, 'T'}, + {"3gb", no_argument, NULL, '3'}, + {"4gb", no_argument, NULL, OPT_4GB}, + {"uname-2.6", no_argument, NULL, OPT_UNAME26}, + {"list", no_argument, NULL, OPT_LIST}, + {NULL, 0, NULL, 0} + }; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + if (argc < 1) { + warnx(_("Not enough arguments")); + errtryhelp(EXIT_FAILURE); + } + archwrapper = strcmp(program_invocation_short_name, "setarch") != 0; + if (archwrapper) { + arch = program_invocation_short_name; /* symlinks to setarch */ + + /* Don't use ifdef sparc here, we get "Unrecognized architecture" + * error message later if necessary */ + if (strcmp(arch, "sparc32bash") == 0) { + shell = "/bin/bash"; + shell_arg = ""; + goto set_arch; + } + } else { + if (1 < argc && *argv[1] != '-') { + arch = argv[1]; + argv[1] = argv[0]; /* for getopt_long() to get the program name */ + argv++; + argc--; + } + } + + while ((c = getopt_long(argc, argv, "+hVv3BFILRSTXZ", longopts, NULL)) != -1) { + switch (c) { + case 'v': + verbose = 1; + break; + case 'R': + turn_on(ADDR_NO_RANDOMIZE, options); + break; + case 'F': + turn_on(FDPIC_FUNCPTRS, options); + break; + case 'Z': + turn_on(MMAP_PAGE_ZERO, options); + break; + case 'L': + turn_on(ADDR_COMPAT_LAYOUT, options); + break; + case 'X': + turn_on(READ_IMPLIES_EXEC, options); + break; + case 'B': + turn_on(ADDR_LIMIT_32BIT, options); + break; + case 'I': + turn_on(SHORT_INODE, options); + break; + case 'S': + turn_on(WHOLE_SECONDS, options); + break; + case 'T': + turn_on(STICKY_TIMEOUTS, options); + break; + case '3': + turn_on(ADDR_LIMIT_3GB, options); + break; + case OPT_4GB: /* just ignore this one */ + break; + case OPT_UNAME26: + turn_on(UNAME26, options); + break; + case OPT_LIST: + if (!archwrapper) { + list_arch_domains(init_arch_domains()); + return EXIT_SUCCESS; + } else + warnx(_("unrecognized option '--list'")); + /* fallthrough */ + + default: + errtryhelp(EXIT_FAILURE); + case 'h': + usage(archwrapper); + case 'V': + print_version(EXIT_SUCCESS); + } + } + + if (!arch && !options) + errx(EXIT_FAILURE, _("no architecture argument or personality flags specified")); + + argc -= optind; + argv += optind; + +set_arch: + /* get execution domain (architecture) */ + if (arch) { + doms = init_arch_domains(); + target = get_arch_domain(doms, arch); + + if (!target) + errx(EXIT_FAILURE, _("%s: Unrecognized architecture"), arch); + pers_value = target->perval; + } + + /* add personality flags */ + pers_value |= options; + + /* call kernel */ + if (personality(pers_value) < 0) { + /* + * Depending on architecture and kernel version, personality + * syscall is either capable or incapable of returning an error. + * If the return value is not an error, then it's the previous + * personality value, which can be an arbitrary value + * undistinguishable from an error value. + * To make things clear, a second call is needed. + */ + if (personality(pers_value) < 0) + err(EXIT_FAILURE, _("failed to set personality to %s"), arch); + } + + /* make sure architecture is set as expected */ + if (arch) + verify_arch_domain(doms, target, arch); + + if (!argc) { + shell = "/bin/sh"; + shell_arg = "-sh"; + } + if (verbose) { + printf(_("Execute command `%s'.\n"), shell ? shell : argv[0]); + /* flush all output streams before exec */ + fflush(NULL); + } + + /* Execute shell */ + if (shell) { + execl(shell, shell_arg, (char *)NULL); + errexec(shell); + } + + /* Execute on command line specified command */ + execvp(argv[0], argv); + errexec(argv[0]); +} diff --git a/sys-utils/setpriv.1 b/sys-utils/setpriv.1 new file mode 100644 index 0000000..f50edb2 --- /dev/null +++ b/sys-utils/setpriv.1 @@ -0,0 +1,205 @@ +'\" t +.\" Title: setpriv +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: User Commands +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "SETPRIV" "1" "2022-05-11" "util\-linux 2.38.1" "User Commands" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +setpriv \- run a program with different Linux privilege settings +.SH "SYNOPSIS" +.sp +\fBsetpriv\fP [options] \fIprogram\fP [\fIarguments\fP] +.SH "DESCRIPTION" +.sp +Sets or queries various Linux privilege settings that are inherited across \fBexecve\fP(2). +.sp +In comparison to \fBsu\fP(1) and \fBrunuser\fP(1), \fBsetpriv\fP neither uses PAM, nor does it prompt for a password. It is a simple, non\-set\-user\-ID wrapper around \fBexecve\fP(2), and can be used to drop privileges in the same way as \fBsetuidgid\fP(8) from \fBdaemontools\fP, \fBchpst\fP(8) from \fBrunit\fP, or similar tools shipped by other service managers. +.SH "OPTIONS" +.sp +\fB\-\-clear\-groups\fP +.RS 4 +Clear supplementary groups. +.RE +.sp +\fB\-d\fP, \fB\-\-dump\fP +.RS 4 +Dump the current privilege state. This option can be specified more than once to show extra, mostly useless, information. Incompatible with all other options. +.RE +.sp +\fB\-\-groups\fP \fIgroup\fP... +.RS 4 +Set supplementary groups. The argument is a comma\-separated list of GIDs or names. +.RE +.sp +\fB\-\-inh\-caps\fP (\fB+\fP|\fB\-\fP)\fIcap\fP..., \fB\-\-ambient\-caps\fP (\fB+\fP|\fB\-\fP)\fIcap\fP..., \fB\-\-bounding\-set\fP (\fB+\fP|\fB\-\fP)\fIcap\fP... +.RS 4 +Set the inheritable capabilities, ambient capabilities or the capability bounding set. See \fBcapabilities\fP(7). The argument is a comma\-separated list of \fB+\fP\fIcap\fP and \fB\-\fP\fIcap\fP entries, which add or remove an entry respectively. \fIcap\fP can either be a human\-readable name as seen in \fBcapabilities\fP(7) without the \fIcap_\fP prefix or of the format \fBcap_N\fP, where \fIN\fP is the internal capability index used by Linux. \fB+all\fP and \fB\-all\fP can be used to add or remove all caps. +.sp +The set of capabilities starts out as the current inheritable set for \fB\-\-inh\-caps\fP, the current ambient set for \fB\-\-ambient\-caps\fP and the current bounding set for \fB\-\-bounding\-set\fP. +.sp +Note the following restrictions (detailed in \fBcapabilities\fP(7)) regarding modifications to these capability sets: +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +A capability can be added to the inheritable set only if it is currently present in the bounding set. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +A capability can be added to the ambient set only if it is currently present in both the permitted and inheritable sets. +.RE +.sp +.RS 4 +.ie n \{\ +\h'-04'\(bu\h'+03'\c +.\} +.el \{\ +. sp -1 +. IP \(bu 2.3 +.\} +Notwithstanding the syntax offered by \fBsetpriv\fP, the kernel does not permit capabilities to be added to the bounding set. +.RE +.RE +.sp +If you drop a capability from the bounding set without also dropping it from the inheritable set, you are likely to become confused. Do not do that. +.sp +\fB\-\-keep\-groups\fP +.RS 4 +Preserve supplementary groups. Only useful in conjunction with \fB\-\-rgid\fP, \fB\-\-egid\fP, or \fB\-\-regid\fP. +.RE +.sp +\fB\-\-init\-groups\fP +.RS 4 +Initialize supplementary groups using initgroups3. Only useful in conjunction with \fB\-\-ruid\fP or \fB\-\-reuid\fP. +.RE +.sp +\fB\-\-list\-caps\fP +.RS 4 +List all known capabilities. This option must be specified alone. +.RE +.sp +\fB\-\-no\-new\-privs\fP +.RS 4 +Set the \fIno_new_privs\fP bit. With this bit set, \fBexecve\fP(2) will not grant new privileges. For example, the set\-user\-ID and set\-group\-ID bits as well as file capabilities will be disabled. (Executing binaries with these bits set will still work, but they will not gain privileges. Certain LSMs, especially AppArmor, may result in failures to execute certain programs.) This bit is inherited by child processes and cannot be unset. See \fBprctl\fP(2) and \fIDocumentation/prctl/no_new_privs.txt\fP in the Linux kernel source. +.sp +The \fIno_new_privs\fP bit is supported since Linux 3.5. +.RE +.sp +\fB\-\-rgid\fP \fIgid\fP, \fB\-\-egid\fP \fIgid\fP, \fB\-\-regid\fP \fIgid\fP +.RS 4 +Set the real, effective, or both GIDs. The \fIgid\fP argument can be given as a textual group name. +.sp +For safety, you must specify one of \fB\-\-clear\-groups\fP, \fB\-\-groups\fP, \fB\-\-keep\-groups\fP, or \fB\-\-init\-groups\fP if you set any primary \fIgid\fP. +.RE +.sp +\fB\-\-ruid\fP \fIuid\fP, \fB\-\-euid\fP \fIuid\fP, \fB\-\-reuid\fP \fIuid\fP +.RS 4 +Set the real, effective, or both UIDs. The \fIuid\fP argument can be given as a textual login name. +.sp +Setting a \fIuid\fP or \fIgid\fP does not change capabilities, although the exec call at the end might change capabilities. This means that, if you are root, you probably want to do something like: +.sp +\fBsetpriv \-\-reuid=1000 \-\-regid=1000 \-\-inh\-caps=\-all\fP +.RE +.sp +\fB\-\-securebits\fP (\fB+\fP|\fB\-\fP)\fIsecurebit\fP... +.RS 4 +Set or clear securebits. The argument is a comma\-separated list. The valid securebits are \fInoroot\fP, \fInoroot_locked\fP, \fIno_setuid_fixup\fP, \fIno_setuid_fixup_locked\fP, and \fIkeep_caps_locked\fP. \fIkeep_caps\fP is cleared by \fBexecve\fP(2) and is therefore not allowed. +.RE +.sp +\fB\-\-pdeathsig keep\fP|\fBclear\fP|\fB<signal>\fP +.RS 4 +Keep, clear or set the parent death signal. Some LSMs, most notably SELinux and AppArmor, clear the signal when the process\(aq credentials change. Using \fB\-\-pdeathsig keep\fP will restore the parent death signal after changing credentials to remedy that situation. +.RE +.sp +\fB\-\-selinux\-label\fP \fIlabel\fP +.RS 4 +Request a particular SELinux transition (using a transition on exec, not dyntrans). This will fail and cause \fBsetpriv\fP to abort if SELinux is not in use, and the transition may be ignored or cause \fBexecve\fP(2) to fail at SELinux\(cqs whim. (In particular, this is unlikely to work in conjunction with \fIno_new_privs\fP.) This is similar to \fBruncon\fP(1). +.RE +.sp +\fB\-\-apparmor\-profile\fP \fIprofile\fP +.RS 4 +Request a particular AppArmor profile (using a transition on exec). This will fail and cause \fBsetpriv\fP to abort if AppArmor is not in use, and the transition may be ignored or cause \fBexecve\fP(2) to fail at AppArmor\(cqs whim. +.RE +.sp +\fB\-\-reset\-env\fP +.RS 4 +Clears all the environment variables except \fBTERM\fP; initializes the environment variables \fBHOME\fP, \fBSHELL\fP, \fBUSER\fP, \fBLOGNAME\fP according to the user\(cqs passwd entry; sets \fBPATH\fP to \fI/usr/local/bin:/bin:/usr/bin\fP for a regular user and to \fI/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin\fP for root. +.sp +The environment variable \fBPATH\fP may be different on systems where \fI/bin\fP and \fI/sbin\fP are merged into \fI/usr\fP. The environment variable \fBSHELL\fP defaults to \fB/bin/sh\fP if none is given in the user\(cqs passwd entry. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "NOTES" +.sp +If applying any specified option fails, \fIprogram\fP will not be run and \fBsetpriv\fP will return with exit status 127. +.sp +Be careful with this tool \(em it may have unexpected security consequences. For example, setting \fIno_new_privs\fP and then execing a program that is SELinux\-confined (as this tool would do) may prevent the SELinux restrictions from taking effect. +.SH "EXAMPLES" +.sp +If you\(cqre looking for behavior similar to \fBsu\fP(1)/\fBrunuser\fP(1), or \fBsudo\fP(8) (without the \fB\-g\fP option), try something like: +.sp +\fBsetpriv \-\-reuid=1000 \-\-regid=1000 \-\-init\-groups\fP +.sp +If you want to mimic daemontools\(aq \fBsetuid\fP(8), try: +.sp +\fBsetpriv \-\-reuid=1000 \-\-regid=1000 \-\-clear\-groups\fP +.SH "AUTHORS" +.sp +.MTO "luto\(atamacapital.net" "Andy Lutomirski" "" +.SH "SEE ALSO" +.sp +\fBrunuser\fP(1), +\fBsu\fP(1), +\fBprctl\fP(2), +\fBcapabilities\fP(7) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBsetpriv\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/setpriv.1.adoc b/sys-utils/setpriv.1.adoc new file mode 100644 index 0000000..a0ad6f8 --- /dev/null +++ b/sys-utils/setpriv.1.adoc @@ -0,0 +1,127 @@ +//po4a: entry man manual += setpriv(1) +:doctype: manpage +:man manual: User Commands +:man source: util-linux {release-version} +:page-layout: base +:command: setpriv + +== NAME + +setpriv - run a program with different Linux privilege settings + +== SYNOPSIS + +*setpriv* [options] _program_ [_arguments_] + +== DESCRIPTION + +Sets or queries various Linux privilege settings that are inherited across *execve*(2). + +In comparison to *su*(1) and *runuser*(1), *setpriv* neither uses PAM, nor does it prompt for a password. It is a simple, non-set-user-ID wrapper around *execve*(2), and can be used to drop privileges in the same way as *setuidgid*(8) from *daemontools*, *chpst*(8) from *runit*, or similar tools shipped by other service managers. + +== OPTIONS + +*--clear-groups*:: +Clear supplementary groups. + +*-d*, *--dump*:: +Dump the current privilege state. This option can be specified more than once to show extra, mostly useless, information. Incompatible with all other options. + +*--groups* _group_...:: +Set supplementary groups. The argument is a comma-separated list of GIDs or names. + +*--inh-caps* (*+*|*-*)_cap_...:: +*--ambient-caps* (*+*|*-*)_cap_...:: +*--bounding-set* (*+*|*-*)_cap_...:: +Set the inheritable capabilities, ambient capabilities or the capability bounding set. See *capabilities*(7). The argument is a comma-separated list of **+**__cap__ and **-**__cap__ entries, which add or remove an entry respectively. _cap_ can either be a human-readable name as seen in *capabilities*(7) without the _cap__ prefix or of the format *cap_N*, where _N_ is the internal capability index used by Linux. *+all* and *-all* can be used to add or remove all caps. ++ +The set of capabilities starts out as the current inheritable set for *--inh-caps*, the current ambient set for *--ambient-caps* and the current bounding set for *--bounding-set*. ++ +Note the following restrictions (detailed in *capabilities*(7)) regarding modifications to these capability sets: + +* A capability can be added to the inheritable set only if it is currently present in the bounding set. +* A capability can be added to the ambient set only if it is currently present in both the permitted and inheritable sets. +* Notwithstanding the syntax offered by *setpriv*, the kernel does not permit capabilities to be added to the bounding set. + +If you drop a capability from the bounding set without also dropping it from the inheritable set, you are likely to become confused. Do not do that. + +*--keep-groups*:: +Preserve supplementary groups. Only useful in conjunction with *--rgid*, *--egid*, or *--regid*. + +*--init-groups*:: +Initialize supplementary groups using initgroups3. Only useful in conjunction with *--ruid* or *--reuid*. + +*--list-caps*:: +List all known capabilities. This option must be specified alone. + +*--no-new-privs*:: +Set the _no_new_privs_ bit. With this bit set, *execve*(2) will not grant new privileges. For example, the set-user-ID and set-group-ID bits as well as file capabilities will be disabled. (Executing binaries with these bits set will still work, but they will not gain privileges. Certain LSMs, especially AppArmor, may result in failures to execute certain programs.) This bit is inherited by child processes and cannot be unset. See *prctl*(2) and _Documentation/prctl/no_new_privs.txt_ in the Linux kernel source. ++ +The _no_new_privs_ bit is supported since Linux 3.5. + +*--rgid* _gid_, *--egid* _gid_, *--regid* _gid_:: +Set the real, effective, or both GIDs. The _gid_ argument can be given as a textual group name. ++ +For safety, you must specify one of *--clear-groups*, *--groups*, *--keep-groups*, or *--init-groups* if you set any primary _gid_. + +*--ruid* _uid_, *--euid* _uid_, *--reuid* _uid_:: +Set the real, effective, or both UIDs. The _uid_ argument can be given as a textual login name. ++ +Setting a _uid_ or _gid_ does not change capabilities, although the exec call at the end might change capabilities. This means that, if you are root, you probably want to do something like: ++ +*setpriv --reuid=1000 --regid=1000 --inh-caps=-all* + +*--securebits* (**+**|*-*)__securebit__...:: +Set or clear securebits. The argument is a comma-separated list. The valid securebits are _noroot_, _noroot_locked_, _no_setuid_fixup_, _no_setuid_fixup_locked_, and _keep_caps_locked_. _keep_caps_ is cleared by *execve*(2) and is therefore not allowed. + +**--pdeathsig keep**|**clear**|*<signal>*:: +Keep, clear or set the parent death signal. Some LSMs, most notably SELinux and AppArmor, clear the signal when the process' credentials change. Using *--pdeathsig keep* will restore the parent death signal after changing credentials to remedy that situation. + +*--selinux-label* _label_:: +Request a particular SELinux transition (using a transition on exec, not dyntrans). This will fail and cause *setpriv* to abort if SELinux is not in use, and the transition may be ignored or cause *execve*(2) to fail at SELinux's whim. (In particular, this is unlikely to work in conjunction with _no_new_privs_.) This is similar to *runcon*(1). + +*--apparmor-profile* _profile_:: +Request a particular AppArmor profile (using a transition on exec). This will fail and cause *setpriv* to abort if AppArmor is not in use, and the transition may be ignored or cause *execve*(2) to fail at AppArmor's whim. + +*--reset-env*:: +Clears all the environment variables except *TERM*; initializes the environment variables *HOME*, *SHELL*, *USER*, *LOGNAME* according to the user's passwd entry; sets *PATH* to _/usr/local/bin:/bin:/usr/bin_ for a regular user and to _/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin_ for root. ++ +The environment variable *PATH* may be different on systems where _/bin_ and _/sbin_ are merged into _/usr_. The environment variable *SHELL* defaults to */bin/sh* if none is given in the user's passwd entry. + +include::man-common/help-version.adoc[] + +== NOTES + +If applying any specified option fails, _program_ will not be run and *setpriv* will return with exit status 127. + +Be careful with this tool -- it may have unexpected security consequences. For example, setting _no_new_privs_ and then execing a program that is SELinux-confined (as this tool would do) may prevent the SELinux restrictions from taking effect. + +== EXAMPLES + +If you're looking for behavior similar to *su*(1)/*runuser*(1), or *sudo*(8) (without the *-g* option), try something like: + +*setpriv --reuid=1000 --regid=1000 --init-groups* + +If you want to mimic daemontools' *setuid*(8), try: + +*setpriv --reuid=1000 --regid=1000 --clear-groups* + +== AUTHORS + +mailto:luto@amacapital.net[Andy Lutomirski] + +== SEE ALSO + +*runuser*(1), +*su*(1), +*prctl*(2), +*capabilities*(7) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/setpriv.c b/sys-utils/setpriv.c new file mode 100644 index 0000000..ddc2cc6 --- /dev/null +++ b/sys-utils/setpriv.c @@ -0,0 +1,1061 @@ +/* + * setpriv(1) - set various kernel privilege bits and run something + * + * Copyright (C) 2012 Andy Lutomirski <luto@amacapital.net> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <cap-ng.h> +#include <errno.h> +#include <getopt.h> +#include <grp.h> +#include <linux/securebits.h> +#include <pwd.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/prctl.h> +#include <sys/types.h> +#include <unistd.h> + +#include "c.h" +#include "caputils.h" +#include "closestream.h" +#include "nls.h" +#include "optutils.h" +#include "strutils.h" +#include "xalloc.h" +#include "pathnames.h" +#include "signames.h" +#include "env.h" + +#ifndef PR_SET_NO_NEW_PRIVS +# define PR_SET_NO_NEW_PRIVS 38 +#endif +#ifndef PR_GET_NO_NEW_PRIVS +# define PR_GET_NO_NEW_PRIVS 39 +#endif + +#define SETPRIV_EXIT_PRIVERR 127 /* how we exit when we fail to set privs */ + +/* The shell to set SHELL env.variable if none is given in the user's passwd entry. */ +#define DEFAULT_SHELL "/bin/sh" + +static gid_t get_group(const char *s, const char *err); + +enum cap_type { + CAP_TYPE_EFFECTIVE = CAPNG_EFFECTIVE, + CAP_TYPE_PERMITTED = CAPNG_PERMITTED, + CAP_TYPE_INHERITABLE = CAPNG_INHERITABLE, + CAP_TYPE_BOUNDING = CAPNG_BOUNDING_SET, + CAP_TYPE_AMBIENT = (1 << 4) +}; + +/* + * Note: We are subject to https://bugzilla.redhat.com/show_bug.cgi?id=895105 + * and we will therefore have problems if new capabilities are added. Once + * that bug is fixed, I'll (Andy Lutomirski) submit a corresponding fix to + * setpriv. In the mean time, the code here tries to work reasonably well. + */ + +struct privctx { + unsigned int + nnp:1, /* no_new_privs */ + have_ruid:1, /* real uid */ + have_euid:1, /* effective uid */ + have_rgid:1, /* real gid */ + have_egid:1, /* effective gid */ + have_passwd:1, /* passwd entry */ + have_groups:1, /* add groups */ + keep_groups:1, /* keep groups */ + clear_groups:1, /* remove groups */ + init_groups:1, /* initialize groups */ + reset_env:1, /* reset environment */ + have_securebits:1; /* remove groups */ + + /* uids and gids */ + uid_t ruid, euid; + gid_t rgid, egid; + + /* real user passwd entry */ + struct passwd passwd; + + /* supplementary groups */ + size_t num_groups; + gid_t *groups; + + /* caps */ + const char *caps_to_inherit; + const char *ambient_caps; + const char *bounding_set; + + /* securebits */ + int securebits; + /* parent death signal (<0 clear, 0 nothing, >0 signal) */ + int pdeathsig; + + /* LSMs */ + const char *selinux_label; + const char *apparmor_profile; +}; + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + fputs(USAGE_HEADER, out); + fprintf(out, _(" %s [options] <program> [<argument>...]\n"), + program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Run a program with different privilege settings.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -d, --dump show current state (and do not exec)\n"), out); + fputs(_(" --nnp, --no-new-privs disallow granting new privileges\n"), out); + fputs(_(" --ambient-caps <caps,...> set ambient capabilities\n"), out); + fputs(_(" --inh-caps <caps,...> set inheritable capabilities\n"), out); + fputs(_(" --bounding-set <caps> set capability bounding set\n"), out); + fputs(_(" --ruid <uid|user> set real uid\n"), out); + fputs(_(" --euid <uid|user> set effective uid\n"), out); + fputs(_(" --rgid <gid|user> set real gid\n"), out); + fputs(_(" --egid <gid|group> set effective gid\n"), out); + fputs(_(" --reuid <uid|user> set real and effective uid\n"), out); + fputs(_(" --regid <gid|group> set real and effective gid\n"), out); + fputs(_(" --clear-groups clear supplementary groups\n"), out); + fputs(_(" --keep-groups keep supplementary groups\n"), out); + fputs(_(" --init-groups initialize supplementary groups\n"), out); + fputs(_(" --groups <group,...> set supplementary groups by UID or name\n"), out); + fputs(_(" --securebits <bits> set securebits\n"), out); + fputs(_(" --pdeathsig keep|clear|<signame>\n" + " set or clear parent death signal\n"), out); + fputs(_(" --selinux-label <label> set SELinux label\n"), out); + fputs(_(" --apparmor-profile <pr> set AppArmor profile\n"), out); + fputs(_(" --reset-env clear all environment and initialize\n" + " HOME, SHELL, USER, LOGNAME and PATH\n"), out); + + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(29)); + fputs(USAGE_SEPARATOR, out); + fputs(_(" This tool can be dangerous. Read the manpage, and be careful.\n"), out); + printf(USAGE_MAN_TAIL("setpriv(1)")); + + exit(EXIT_SUCCESS); +} + +static int has_cap(enum cap_type which, unsigned int i) +{ + switch (which) { + case CAP_TYPE_EFFECTIVE: + case CAP_TYPE_BOUNDING: + case CAP_TYPE_INHERITABLE: + case CAP_TYPE_PERMITTED: + return capng_have_capability((capng_type_t)which, i); + case CAP_TYPE_AMBIENT: + return prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, + (unsigned long) i, 0UL, 0UL); + default: + warnx(_("invalid capability type")); + return -1; + } +} + +/* Returns the number of capabilities printed. */ +static int print_caps(FILE *f, enum cap_type which) +{ + int i, n = 0, max = cap_last_cap(); + + for (i = 0; i <= max; i++) { + int ret = has_cap(which, i); + + if (i == 0 && ret < 0) + return -1; + + if (ret == 1) { + const char *name = capng_capability_to_name(i); + if (n) + fputc(',', f); + if (name) + fputs(name, f); + else + warnx(_("cap %d: libcap-ng is broken"), i); + n++; + } + } + + return n; +} + +static void dump_one_secbit(int *first, int *bits, int bit, const char *name) +{ + if (*bits & bit) { + if (*first) + *first = 0; + else + printf(","); + fputs(name, stdout); + *bits &= ~bit; + } +} + +static void dump_securebits(void) +{ + int first = 1; + int bits = prctl(PR_GET_SECUREBITS, 0, 0, 0, 0); + + if (bits < 0) { + warnx(_("getting process secure bits failed")); + return; + } + + printf(_("Securebits: ")); + + dump_one_secbit(&first, &bits, SECBIT_NOROOT, "noroot"); + dump_one_secbit(&first, &bits, SECBIT_NOROOT_LOCKED, "noroot_locked"); + dump_one_secbit(&first, &bits, SECBIT_NO_SETUID_FIXUP, + "no_setuid_fixup"); + dump_one_secbit(&first, &bits, SECBIT_NO_SETUID_FIXUP_LOCKED, + "no_setuid_fixup_locked"); + bits &= ~SECBIT_KEEP_CAPS; + dump_one_secbit(&first, &bits, SECBIT_KEEP_CAPS_LOCKED, + "keep_caps_locked"); + if (bits) { + if (first) + first = 0; + else + printf(","); + printf("0x%x", (unsigned)bits); + } + + if (first) + printf(_("[none]\n")); + else + printf("\n"); +} + +static void dump_label(const char *name) +{ + char buf[4097]; + ssize_t len; + int fd, e; + + fd = open(_PATH_PROC_ATTR_CURRENT, O_RDONLY); + if (fd == -1) { + warn(_("cannot open %s"), _PATH_PROC_ATTR_CURRENT); + return; + } + + len = read(fd, buf, sizeof(buf)); + e = errno; + close(fd); + if (len < 0) { + errno = e; + warn(_("cannot read %s"), name); + return; + } + if (sizeof(buf) - 1 <= (size_t)len) { + warnx(_("%s: too long"), name); + return; + } + + buf[len] = 0; + if (0 < len && buf[len - 1] == '\n') + buf[len - 1] = 0; + printf("%s: %s\n", name, buf); +} + +static void dump_groups(void) +{ + int n = getgroups(0, NULL); + gid_t *groups; + + if (n < 0) { + warn("getgroups failed"); + return; + } + + groups = xmalloc(n * sizeof(gid_t)); + n = getgroups(n, groups); + if (n < 0) { + free(groups); + warn("getgroups failed"); + return; + } + + printf(_("Supplementary groups: ")); + if (n == 0) + printf(_("[none]")); + else { + int i; + for (i = 0; i < n; i++) { + if (0 < i) + printf(","); + printf("%ld", (long)groups[i]); + } + } + printf("\n"); + free(groups); +} + +static void dump_pdeathsig(void) +{ + int pdeathsig; + + if (prctl(PR_GET_PDEATHSIG, &pdeathsig) != 0) { + warn(_("get pdeathsig failed")); + return; + } + + printf(_("Parent death signal: ")); + if (pdeathsig && signum_to_signame(pdeathsig) != NULL) + printf("%s\n", signum_to_signame(pdeathsig)); + else if (pdeathsig) + printf("%d\n", pdeathsig); + else + printf(_("[none]\n")); +} + +static void dump(int dumplevel) +{ + int x; + uid_t ru, eu, su; + gid_t rg, eg, sg; + + if (getresuid(&ru, &eu, &su) == 0) { + printf(_("uid: %u\n"), ru); + printf(_("euid: %u\n"), eu); + /* Saved and fs uids always equal euid. */ + if (3 <= dumplevel) + printf(_("suid: %u\n"), su); + } else + warn(_("getresuid failed")); + + if (getresgid(&rg, &eg, &sg) == 0) { + printf("gid: %ld\n", (long)rg); + printf("egid: %ld\n", (long)eg); + /* Saved and fs gids always equal egid. */ + if (dumplevel >= 3) + printf("sgid: %ld\n", (long)sg); + } else + warn(_("getresgid failed")); + + dump_groups(); + + x = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); + if (0 <= x) + printf("no_new_privs: %d\n", x); + else + warn("setting no_new_privs failed"); + + if (2 <= dumplevel) { + printf(_("Effective capabilities: ")); + if (print_caps(stdout, CAP_TYPE_EFFECTIVE) == 0) + printf(_("[none]")); + printf("\n"); + + printf(_("Permitted capabilities: ")); + if (print_caps(stdout, CAP_TYPE_PERMITTED) == 0) + printf(_("[none]")); + printf("\n"); + } + + printf(_("Inheritable capabilities: ")); + if (print_caps(stdout, CAP_TYPE_INHERITABLE) == 0) + printf(_("[none]")); + printf("\n"); + + printf(_("Ambient capabilities: ")); + x = print_caps(stdout, CAP_TYPE_AMBIENT); + if (x == 0) + printf(_("[none]")); + if (x < 0) + printf(_("[unsupported]")); + printf("\n"); + + printf(_("Capability bounding set: ")); + if (print_caps(stdout, CAP_TYPE_BOUNDING) == 0) + printf(_("[none]")); + printf("\n"); + + dump_securebits(); + dump_pdeathsig(); + + if (access(_PATH_SYS_SELINUX, F_OK) == 0) + dump_label(_("SELinux label")); + + if (access(_PATH_SYS_APPARMOR, F_OK) == 0) { + dump_label(_("AppArmor profile")); + } +} + +static void list_known_caps(void) +{ + int i, max = cap_last_cap(); + + for (i = 0; i <= max; i++) { + const char *name = capng_capability_to_name(i); + if (name) + printf("%s\n", name); + else + warnx(_("cap %d: libcap-ng is broken"), i); + } +} + +static void parse_groups(struct privctx *opts, const char *str) +{ + char *groups = xstrdup(str); + char *buf = groups; /* We'll reuse it */ + char *c; + size_t i = 0; + + opts->have_groups = 1; + opts->num_groups = 0; + while ((c = strsep(&groups, ","))) + opts->num_groups++; + + /* Start again */ + strcpy(buf, str); /* It's exactly the right length */ + groups = buf; + + opts->groups = xcalloc(opts->num_groups, sizeof(gid_t)); + while ((c = strsep(&groups, ","))) + opts->groups[i++] = get_group(c, _("Invalid supplementary group id")); + + free(groups); +} + +static void parse_pdeathsig(struct privctx *opts, const char *str) +{ + if (!strcmp(str, "keep")) { + if (prctl(PR_GET_PDEATHSIG, &opts->pdeathsig) != 0) + errx(SETPRIV_EXIT_PRIVERR, + _("failed to get parent death signal")); + } else if (!strcmp(str, "clear")) { + opts->pdeathsig = -1; + } else if ((opts->pdeathsig = signame_to_signum(str)) < 0) { + errx(EXIT_FAILURE, _("unknown signal: %s"), str); + } +} + +static void do_setresuid(const struct privctx *opts) +{ + uid_t ruid, euid, suid; + if (getresuid(&ruid, &euid, &suid) != 0) + err(SETPRIV_EXIT_PRIVERR, _("getresuid failed")); + if (opts->have_ruid) + ruid = opts->ruid; + if (opts->have_euid) + euid = opts->euid; + + /* Also copy effective to saved (for paranoia). */ + if (setresuid(ruid, euid, euid) != 0) + err(SETPRIV_EXIT_PRIVERR, _("setresuid failed")); +} + +static void do_setresgid(const struct privctx *opts) +{ + gid_t rgid, egid, sgid; + if (getresgid(&rgid, &egid, &sgid) != 0) + err(SETPRIV_EXIT_PRIVERR, _("getresgid failed")); + if (opts->have_rgid) + rgid = opts->rgid; + if (opts->have_egid) + egid = opts->egid; + + /* Also copy effective to saved (for paranoia). */ + if (setresgid(rgid, egid, egid) != 0) + err(SETPRIV_EXIT_PRIVERR, _("setresgid failed")); +} + +static void bump_cap(unsigned int cap) +{ + if (capng_have_capability(CAPNG_PERMITTED, cap)) + capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, cap); +} + +static int cap_update(capng_act_t action, + enum cap_type type, unsigned int cap) +{ + switch (type) { + case CAP_TYPE_EFFECTIVE: + case CAP_TYPE_BOUNDING: + case CAP_TYPE_INHERITABLE: + case CAP_TYPE_PERMITTED: + return capng_update(action, (capng_type_t) type, cap); + case CAP_TYPE_AMBIENT: + { + int ret; + + if (action == CAPNG_ADD) + ret = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, + (unsigned long) cap, 0UL, 0UL); + else + ret = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_LOWER, + (unsigned long) cap, 0UL, 0UL); + + return ret; + } + default: + errx(EXIT_FAILURE, _("unsupported capability type")); + return -1; + } +} + +static void do_caps(enum cap_type type, const char *caps) +{ + char *my_caps = xstrdup(caps); + char *c; + + while ((c = strsep(&my_caps, ","))) { + capng_act_t action; + if (*c == '+') + action = CAPNG_ADD; + else if (*c == '-') + action = CAPNG_DROP; + else + errx(EXIT_FAILURE, _("bad capability string")); + + if (!strcmp(c + 1, "all")) { + int i; + /* We can trust the return value from cap_last_cap(), + * so use that directly. */ + for (i = 0; i <= cap_last_cap(); i++) + cap_update(action, type, i); + } else { + int cap = capng_name_to_capability(c + 1); + if (0 <= cap) + cap_update(action, type, cap); + else if (sscanf(c + 1, "cap_%d", &cap) == 1 + && 0 <= cap && cap <= cap_last_cap()) + cap_update(action, type, cap); + else + errx(EXIT_FAILURE, + _("unknown capability \"%s\""), c + 1); + } + } + + free(my_caps); +} + +static void parse_securebits(struct privctx *opts, const char *arg) +{ + char *buf = xstrdup(arg); + char *c; + + opts->have_securebits = 1; + opts->securebits = prctl(PR_GET_SECUREBITS, 0, 0, 0, 0); + if (opts->securebits < 0) + err(SETPRIV_EXIT_PRIVERR, _("getting process secure bits failed")); + + if (opts->securebits & ~(int)(SECBIT_NOROOT | + SECBIT_NOROOT_LOCKED | + SECBIT_NO_SETUID_FIXUP | + SECBIT_NO_SETUID_FIXUP_LOCKED | + SECBIT_KEEP_CAPS | + SECBIT_KEEP_CAPS_LOCKED)) + errx(SETPRIV_EXIT_PRIVERR, + _("unrecognized securebit set -- refusing to adjust")); + + while ((c = strsep(&buf, ","))) { + if (*c != '+' && *c != '-') + errx(EXIT_FAILURE, _("bad securebits string")); + + if (!strcmp(c + 1, "all")) { + if (*c == '-') + opts->securebits = 0; + else + errx(EXIT_FAILURE, + _("+all securebits is not allowed")); + } else { + int bit; + if (!strcmp(c + 1, "noroot")) + bit = SECBIT_NOROOT; + else if (!strcmp(c + 1, "noroot_locked")) + bit = SECBIT_NOROOT_LOCKED; + else if (!strcmp(c + 1, "no_setuid_fixup")) + bit = SECBIT_NO_SETUID_FIXUP; + else if (!strcmp(c + 1, "no_setuid_fixup_locked")) + bit = SECBIT_NO_SETUID_FIXUP_LOCKED; + else if (!strcmp(c + 1, "keep_caps")) + errx(EXIT_FAILURE, + _("adjusting keep_caps does not make sense")); + else if (!strcmp(c + 1, "keep_caps_locked")) + bit = SECBIT_KEEP_CAPS_LOCKED; /* sigh */ + else + errx(EXIT_FAILURE, _("unrecognized securebit")); + + if (*c == '+') + opts->securebits |= bit; + else + opts->securebits &= ~bit; + } + } + + opts->securebits |= SECBIT_KEEP_CAPS; /* We need it, and it's reset on exec */ + + free(buf); +} + +static void do_selinux_label(const char *label) +{ + int fd; + size_t len; + + if (access(_PATH_SYS_SELINUX, F_OK) != 0) + errx(SETPRIV_EXIT_PRIVERR, _("SELinux is not running")); + + fd = open(_PATH_PROC_ATTR_EXEC, O_RDWR); + if (fd == -1) + err(SETPRIV_EXIT_PRIVERR, + _("cannot open %s"), _PATH_PROC_ATTR_EXEC); + + len = strlen(label); + errno = 0; + if (write(fd, label, len) != (ssize_t) len) + err(SETPRIV_EXIT_PRIVERR, + _("write failed: %s"), _PATH_PROC_ATTR_EXEC); + + if (close(fd) != 0) + err(SETPRIV_EXIT_PRIVERR, + _("close failed: %s"), _PATH_PROC_ATTR_EXEC); +} + +static void do_apparmor_profile(const char *label) +{ + FILE *f; + + if (access(_PATH_SYS_APPARMOR, F_OK) != 0) + errx(SETPRIV_EXIT_PRIVERR, _("AppArmor is not running")); + + f = fopen(_PATH_PROC_ATTR_EXEC, "r+"); + if (!f) + err(SETPRIV_EXIT_PRIVERR, + _("cannot open %s"), _PATH_PROC_ATTR_EXEC); + + fprintf(f, "exec %s", label); + + if (close_stream(f) != 0) + err(SETPRIV_EXIT_PRIVERR, + _("write failed: %s"), _PATH_PROC_ATTR_EXEC); +} + + +static void do_reset_environ(struct passwd *pw) +{ + char *term = getenv("TERM"); + + if (term) + term = xstrdup(term); +#ifdef HAVE_CLEARENV + clearenv(); +#else + environ = NULL; +#endif + if (term) { + xsetenv("TERM", term, 1); + free(term); + } + + if (pw->pw_shell && *pw->pw_shell) + xsetenv("SHELL", pw->pw_shell, 1); + else + xsetenv("SHELL", DEFAULT_SHELL, 1); + + xsetenv("HOME", pw->pw_dir, 1); + xsetenv("USER", pw->pw_name, 1); + xsetenv("LOGNAME", pw->pw_name, 1); + + if (pw->pw_uid) + xsetenv("PATH", _PATH_DEFPATH, 1); + else + xsetenv("PATH", _PATH_DEFPATH_ROOT, 1); +} + +static uid_t get_user(const char *s, const char *err) +{ + struct passwd *pw; + long tmp; + pw = getpwnam(s); + if (pw) + return pw->pw_uid; + tmp = strtol_or_err(s, err); + return tmp; +} + +static gid_t get_group(const char *s, const char *err) +{ + struct group *gr; + long tmp; + gr = getgrnam(s); + if (gr) + return gr->gr_gid; + tmp = strtol_or_err(s, err); + return tmp; +} + +static struct passwd *get_passwd(const char *s, uid_t *uid, const char *err) +{ + struct passwd *pw; + long tmp; + pw = getpwnam(s); + if (pw) { + *uid = pw->pw_uid; + } else { + tmp = strtol_or_err(s, err); + *uid = tmp; + pw = getpwuid(*uid); + } + return pw; +} + +static struct passwd *passwd_copy(struct passwd *dst, const struct passwd *src) +{ + struct passwd *rv; + rv = memcpy(dst, src, sizeof(*dst)); + rv->pw_name = xstrdup(rv->pw_name); + rv->pw_passwd = xstrdup(rv->pw_passwd); + rv->pw_gecos = xstrdup(rv->pw_gecos); + rv->pw_dir = xstrdup(rv->pw_dir); + rv->pw_shell = xstrdup(rv->pw_shell); + return rv; +} + +int main(int argc, char **argv) +{ + enum { + NNP = CHAR_MAX + 1, + RUID, + EUID, + RGID, + EGID, + REUID, + REGID, + CLEAR_GROUPS, + KEEP_GROUPS, + INIT_GROUPS, + GROUPS, + INHCAPS, + AMBCAPS, + LISTCAPS, + CAPBSET, + SECUREBITS, + PDEATHSIG, + SELINUX_LABEL, + APPARMOR_PROFILE, + RESET_ENV + }; + + static const struct option longopts[] = { + { "dump", no_argument, NULL, 'd' }, + { "nnp", no_argument, NULL, NNP }, + { "no-new-privs", no_argument, NULL, NNP }, + { "inh-caps", required_argument, NULL, INHCAPS }, + { "ambient-caps", required_argument, NULL, AMBCAPS }, + { "list-caps", no_argument, NULL, LISTCAPS }, + { "ruid", required_argument, NULL, RUID }, + { "euid", required_argument, NULL, EUID }, + { "rgid", required_argument, NULL, RGID }, + { "egid", required_argument, NULL, EGID }, + { "reuid", required_argument, NULL, REUID }, + { "regid", required_argument, NULL, REGID }, + { "clear-groups", no_argument, NULL, CLEAR_GROUPS }, + { "keep-groups", no_argument, NULL, KEEP_GROUPS }, + { "init-groups", no_argument, NULL, INIT_GROUPS }, + { "groups", required_argument, NULL, GROUPS }, + { "bounding-set", required_argument, NULL, CAPBSET }, + { "securebits", required_argument, NULL, SECUREBITS }, + { "pdeathsig", required_argument, NULL, PDEATHSIG, }, + { "selinux-label", required_argument, NULL, SELINUX_LABEL }, + { "apparmor-profile", required_argument, NULL, APPARMOR_PROFILE }, + { "help", no_argument, NULL, 'h' }, + { "reset-env", no_argument, NULL, RESET_ENV, }, + { "version", no_argument, NULL, 'V' }, + { NULL, 0, NULL, 0 } + }; + + static const ul_excl_t excl[] = { + /* keep in same order with enum definitions */ + {CLEAR_GROUPS, KEEP_GROUPS, INIT_GROUPS, GROUPS}, + {0} + }; + int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; + + int c; + struct privctx opts; + struct passwd *pw = NULL; + int dumplevel = 0; + int total_opts = 0; + int list_caps = 0; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + memset(&opts, 0, sizeof(opts)); + + while ((c = getopt_long(argc, argv, "+dhV", longopts, NULL)) != -1) { + err_exclusive_options(c, longopts, excl, excl_st); + total_opts++; + switch (c) { + case 'd': + dumplevel++; + break; + case NNP: + if (opts.nnp) + errx(EXIT_FAILURE, + _("duplicate --no-new-privs option")); + opts.nnp = 1; + break; + case RUID: + if (opts.have_ruid) + errx(EXIT_FAILURE, _("duplicate ruid")); + opts.have_ruid = 1; + pw = get_passwd(optarg, &opts.ruid, _("failed to parse ruid")); + if (pw) { + passwd_copy(&opts.passwd, pw); + opts.have_passwd = 1; + } + break; + case EUID: + if (opts.have_euid) + errx(EXIT_FAILURE, _("duplicate euid")); + opts.have_euid = 1; + opts.euid = get_user(optarg, _("failed to parse euid")); + break; + case REUID: + if (opts.have_ruid || opts.have_euid) + errx(EXIT_FAILURE, _("duplicate ruid or euid")); + opts.have_ruid = opts.have_euid = 1; + pw = get_passwd(optarg, &opts.ruid, _("failed to parse reuid")); + opts.euid = opts.ruid; + if (pw) { + passwd_copy(&opts.passwd, pw); + opts.have_passwd = 1; + } + break; + case RGID: + if (opts.have_rgid) + errx(EXIT_FAILURE, _("duplicate rgid")); + opts.have_rgid = 1; + opts.rgid = get_group(optarg, _("failed to parse rgid")); + break; + case EGID: + if (opts.have_egid) + errx(EXIT_FAILURE, _("duplicate egid")); + opts.have_egid = 1; + opts.egid = get_group(optarg, _("failed to parse egid")); + break; + case REGID: + if (opts.have_rgid || opts.have_egid) + errx(EXIT_FAILURE, _("duplicate rgid or egid")); + opts.have_rgid = opts.have_egid = 1; + opts.rgid = opts.egid = get_group(optarg, _("failed to parse regid")); + break; + case CLEAR_GROUPS: + if (opts.clear_groups) + errx(EXIT_FAILURE, + _("duplicate --clear-groups option")); + opts.clear_groups = 1; + break; + case KEEP_GROUPS: + if (opts.keep_groups) + errx(EXIT_FAILURE, + _("duplicate --keep-groups option")); + opts.keep_groups = 1; + break; + case INIT_GROUPS: + if (opts.init_groups) + errx(EXIT_FAILURE, + _("duplicate --init-groups option")); + opts.init_groups = 1; + break; + case GROUPS: + if (opts.have_groups) + errx(EXIT_FAILURE, + _("duplicate --groups option")); + parse_groups(&opts, optarg); + break; + case PDEATHSIG: + if (opts.pdeathsig) + errx(EXIT_FAILURE, + _("duplicate --keep-pdeathsig option")); + parse_pdeathsig(&opts, optarg); + break; + case LISTCAPS: + list_caps = 1; + break; + case INHCAPS: + if (opts.caps_to_inherit) + errx(EXIT_FAILURE, + _("duplicate --inh-caps option")); + opts.caps_to_inherit = optarg; + break; + case AMBCAPS: + if (opts.ambient_caps) + errx(EXIT_FAILURE, + _("duplicate --ambient-caps option")); + opts.ambient_caps = optarg; + break; + case CAPBSET: + if (opts.bounding_set) + errx(EXIT_FAILURE, + _("duplicate --bounding-set option")); + opts.bounding_set = optarg; + break; + case SECUREBITS: + if (opts.have_securebits) + errx(EXIT_FAILURE, + _("duplicate --securebits option")); + parse_securebits(&opts, optarg); + break; + case SELINUX_LABEL: + if (opts.selinux_label) + errx(EXIT_FAILURE, + _("duplicate --selinux-label option")); + opts.selinux_label = optarg; + break; + case APPARMOR_PROFILE: + if (opts.apparmor_profile) + errx(EXIT_FAILURE, + _("duplicate --apparmor-profile option")); + opts.apparmor_profile = optarg; + break; + case RESET_ENV: + opts.reset_env = 1; + break; + + case 'h': + usage(); + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if (dumplevel) { + if (total_opts != dumplevel || optind < argc) + errx(EXIT_FAILURE, + _("--dump is incompatible with all other options")); + dump(dumplevel); + return EXIT_SUCCESS; + } + + if (list_caps) { + if (total_opts != 1 || optind < argc) + errx(EXIT_FAILURE, + _("--list-caps must be specified alone")); + list_known_caps(); + return EXIT_SUCCESS; + } + + if (argc <= optind) + errx(EXIT_FAILURE, _("No program specified")); + + if ((opts.have_rgid || opts.have_egid) + && !opts.keep_groups && !opts.clear_groups && !opts.init_groups + && !opts.have_groups) + errx(EXIT_FAILURE, + _("--[re]gid requires --keep-groups, --clear-groups, --init-groups, or --groups")); + + if (opts.init_groups && !opts.have_ruid) + errx(EXIT_FAILURE, + _("--init-groups requires --ruid or --reuid")); + + if (opts.init_groups && !opts.have_passwd) + errx(EXIT_FAILURE, + _("uid %ld not found, --init-groups requires an user that " + "can be found on the system"), + (long) opts.ruid); + + if (opts.reset_env) { + if (opts.have_passwd) + /* pwd according to --ruid or --reuid */ + pw = &opts.passwd; + else + /* pwd for the current user */ + pw = getpwuid(getuid()); + do_reset_environ(pw); + } + + if (opts.nnp && prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) == -1) + err(EXIT_FAILURE, _("disallow granting new privileges failed")); + + if (opts.selinux_label) + do_selinux_label(opts.selinux_label); + if (opts.apparmor_profile) + do_apparmor_profile(opts.apparmor_profile); + + if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) == -1) + err(EXIT_FAILURE, _("keep process capabilities failed")); + + /* We're going to want CAP_SETPCAP, CAP_SETUID, and CAP_SETGID if + * possible. */ + bump_cap(CAP_SETPCAP); + bump_cap(CAP_SETUID); + bump_cap(CAP_SETGID); + if (capng_apply(CAPNG_SELECT_CAPS) != 0) + err(SETPRIV_EXIT_PRIVERR, _("activate capabilities")); + + if (opts.have_ruid || opts.have_euid) { + do_setresuid(&opts); + /* KEEPCAPS doesn't work for the effective mask. */ + if (capng_apply(CAPNG_SELECT_CAPS) != 0) + err(SETPRIV_EXIT_PRIVERR, _("reactivate capabilities")); + } + + if (opts.have_rgid || opts.have_egid) + do_setresgid(&opts); + + if (opts.have_groups) { + if (setgroups(opts.num_groups, opts.groups) != 0) + err(SETPRIV_EXIT_PRIVERR, _("setgroups failed")); + } else if (opts.init_groups) { + if (initgroups(opts.passwd.pw_name, opts.passwd.pw_gid) != 0) + err(SETPRIV_EXIT_PRIVERR, _("initgroups failed")); + } else if (opts.clear_groups) { + gid_t x = 0; + if (setgroups(0, &x) != 0) + err(SETPRIV_EXIT_PRIVERR, _("setgroups failed")); + } + + if (opts.have_securebits && prctl(PR_SET_SECUREBITS, opts.securebits, 0, 0, 0) != 0) + err(SETPRIV_EXIT_PRIVERR, _("set process securebits failed")); + + if (opts.bounding_set) { + do_caps(CAP_TYPE_BOUNDING, opts.bounding_set); + errno = EPERM; /* capng doesn't set errno if we're missing CAP_SETPCAP */ + if (capng_apply(CAPNG_SELECT_BOUNDS) != 0) + err(SETPRIV_EXIT_PRIVERR, _("apply bounding set")); + } + + if (opts.caps_to_inherit) { + do_caps(CAP_TYPE_INHERITABLE, opts.caps_to_inherit); + if (capng_apply(CAPNG_SELECT_CAPS) != 0) + err(SETPRIV_EXIT_PRIVERR, _("apply capabilities")); + } + + if (opts.ambient_caps) { + do_caps(CAP_TYPE_AMBIENT, opts.ambient_caps); + } + + /* Clear or set parent death signal */ + if (opts.pdeathsig && prctl(PR_SET_PDEATHSIG, opts.pdeathsig < 0 ? 0 : opts.pdeathsig) != 0) + err(SETPRIV_EXIT_PRIVERR, _("set parent death signal failed")); + + execvp(argv[optind], argv + optind); + errexec(argv[optind]); +} diff --git a/sys-utils/setsid.1 b/sys-utils/setsid.1 new file mode 100644 index 0000000..7b46f04 --- /dev/null +++ b/sys-utils/setsid.1 @@ -0,0 +1,77 @@ +'\" t +.\" Title: setsid +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: User Commands +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "SETSID" "1" "2022-05-11" "util\-linux 2.38.1" "User Commands" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +setsid \- run a program in a new session +.SH "SYNOPSIS" +.sp +\fBsetsid\fP [options] \fIprogram\fP [\fIarguments\fP] +.SH "DESCRIPTION" +.sp +\fBsetsid\fP runs a program in a new session. The command calls \fBfork\fP(2) if already a process group leader. Otherwise, it executes a program in the current process. This default behavior is possible to override by the \fB\-\-fork\fP option. +.SH "OPTIONS" +.sp +\fB\-c\fP, \fB\-\-ctty\fP +.RS 4 +Set the controlling terminal to the current one. +.RE +.sp +\fB\-f\fP, \fB\-\-fork\fP +.RS 4 +Always create a new process. +.RE +.sp +\fB\-w\fP, \fB\-\-wait\fP +.RS 4 +Wait for the execution of the program to end, and return the exit status of this program as the exit status of \fBsetsid\fP. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Display version information and exit. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.SH "AUTHORS" +.sp +.MTO "jrs\(atworld.std.com" "Rick Sladkey" "" +.SH "SEE ALSO" +.sp +\fBsetsid\fP(2) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBsetsid\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/setsid.1.adoc b/sys-utils/setsid.1.adoc new file mode 100644 index 0000000..3eddcab --- /dev/null +++ b/sys-utils/setsid.1.adoc @@ -0,0 +1,54 @@ +//po4a: entry man manual +// Rick Sladkey <jrs@world.std.com> +// In the public domain. += setsid(1) +:doctype: manpage +:man manual: User Commands +:man source: util-linux {release-version} +:page-layout: base +:command: setsid + +== NAME + +setsid - run a program in a new session + +== SYNOPSIS + +*setsid* [options] _program_ [_arguments_] + +== DESCRIPTION + +*setsid* runs a program in a new session. The command calls *fork*(2) if already a process group leader. Otherwise, it executes a program in the current process. This default behavior is possible to override by the *--fork* option. + +== OPTIONS + +*-c*, *--ctty*:: +Set the controlling terminal to the current one. + +*-f*, *--fork*:: +Always create a new process. + +*-w*, *--wait*:: +Wait for the execution of the program to end, and return the exit status of this program as the exit status of *setsid*. + +*-V*, *--version*:: +Display version information and exit. + +*-h*, *--help*:: +Display help text and exit. + +== AUTHORS + +mailto:jrs@world.std.com[Rick Sladkey] + +== SEE ALSO + +*setsid*(2) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/setsid.c b/sys-utils/setsid.c new file mode 100644 index 0000000..5725e80 --- /dev/null +++ b/sys-utils/setsid.c @@ -0,0 +1,123 @@ +/* + * setsid.c -- execute a command in a new session + * Rick Sladkey <jrs@world.std.com> + * In the public domain. + * + * 1999-02-22 Arkadiusz Miśkiewicz <misiek@pld.ORG.PL> + * - added Native Language Support + * + * 2001-01-18 John Fremlin <vii@penguinpowered.com> + * - fork in case we are process group leader + * + * 2008-08-20 Daniel Kahn Gillmor <dkg@fifthhorseman.net> + * - if forked, wait on child process and emit its return code. + */ + +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/ioctl.h> +#include <sys/types.h> +#include <sys/wait.h> + +#include "c.h" +#include "nls.h" +#include "closestream.h" + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + fputs(USAGE_HEADER, out); + fprintf(out, _( + " %s [options] <program> [arguments ...]\n"), + program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Run a program in a new session.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -c, --ctty set the controlling terminal to the current one\n"), out); + fputs(_(" -f, --fork always fork\n"), out); + fputs(_(" -w, --wait wait program to exit, and use the same return\n"), out); + + printf(USAGE_HELP_OPTIONS(16)); + + printf(USAGE_MAN_TAIL("setsid(1)")); + exit(EXIT_SUCCESS); +} + +int main(int argc, char **argv) +{ + int ch, forcefork = 0; + int ctty = 0; + pid_t pid; + int status = 0; + + static const struct option longopts[] = { + {"ctty", no_argument, NULL, 'c'}, + {"fork", no_argument, NULL, 'f'}, + {"wait", no_argument, NULL, 'w'}, + {"version", no_argument, NULL, 'V'}, + {"help", no_argument, NULL, 'h'}, + {NULL, 0, NULL, 0} + }; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + while ((ch = getopt_long(argc, argv, "+Vhcfw", longopts, NULL)) != -1) + switch (ch) { + case 'c': + ctty=1; + break; + case 'f': + forcefork = 1; + break; + case 'w': + status = 1; + break; + + case 'h': + usage(); + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + + if (argc - optind < 1) { + warnx(_("no command specified")); + errtryhelp(EXIT_FAILURE); + } + + if (forcefork || getpgrp() == getpid()) { + pid = fork(); + switch (pid) { + case -1: + err(EXIT_FAILURE, _("fork")); + case 0: + /* child */ + break; + default: + /* parent */ + if (!status) + return EXIT_SUCCESS; + if (wait(&status) != pid) + err(EXIT_FAILURE, "wait"); + if (WIFEXITED(status)) + return WEXITSTATUS(status); + err(status, _("child %d did not exit normally"), pid); + } + } + if (setsid() < 0) + /* cannot happen */ + err(EXIT_FAILURE, _("setsid failed")); + + if (ctty && ioctl(STDIN_FILENO, TIOCSCTTY, 1)) + err(EXIT_FAILURE, _("failed to set the controlling terminal")); + execvp(argv[optind], argv + optind); + errexec(argv[optind]); +} diff --git a/sys-utils/swapoff.8 b/sys-utils/swapoff.8 new file mode 100644 index 0000000..cd186f3 --- /dev/null +++ b/sys-utils/swapoff.8 @@ -0,0 +1 @@ +.so swapon.8
\ No newline at end of file diff --git a/sys-utils/swapoff.c b/sys-utils/swapoff.c new file mode 100644 index 0000000..7bfb90a --- /dev/null +++ b/sys-utils/swapoff.c @@ -0,0 +1,281 @@ +#include <stdio.h> +#include <errno.h> +#include <getopt.h> + +#ifdef HAVE_SYS_SWAP_H +# include <sys/swap.h> +#endif + +#include "nls.h" +#include "c.h" +#include "xalloc.h" +#include "closestream.h" + +#include "swapprober.h" +#include "swapon-common.h" + +#if !defined(HAVE_SWAPOFF) && defined(SYS_swapoff) +# include <sys/syscall.h> +# define swapoff(path) syscall(SYS_swapoff, path) +#endif + +static int verbose; +static int all; + +#define QUIET 1 +#define CANONIC 1 + +#define SWAPOFF_EX_OK 0 /* no errors */ +#define SWAPOFF_EX_ENOMEM 2 /* swapoff(2) failed due to OOM */ +#define SWAPOFF_EX_FAILURE 4 /* swapoff(2) failed due to another reason */ +#define SWAPOFF_EX_SYSERR 8 /* non-swaoff() errors */ +#define SWAPOFF_EX_USAGE 16 /* usage, permissions or syntax error */ +#define SWAPOFF_EX_ALLERR 32 /* --all all failed */ +#define SWAPOFF_EX_SOMEOK 64 /* --all some failed some OK */ + +/* + * This function works like mnt_resolve_tag(), but it's able to read UUID/LABEL + * from regular swap files too (according to entries in /proc/swaps). Note that + * mnt_resolve_tag() and mnt_resolve_spec() works with system visible block + * devices only. + */ +static char *swapoff_resolve_tag(const char *name, const char *value, + struct libmnt_cache *cache) +{ + char *path; + struct libmnt_table *tb; + struct libmnt_iter *itr; + struct libmnt_fs *fs; + + /* this is usual case for block devices (and it's really fast as it uses + * udev /dev/disk/by-* symlinks by default */ + path = mnt_resolve_tag(name, value, cache); + if (path) + return path; + + /* try regular files from /proc/swaps */ + tb = get_swaps(); + if (!tb) + return NULL; + + itr = mnt_new_iter(MNT_ITER_BACKWARD); + if (!itr) + err(SWAPOFF_EX_SYSERR, _("failed to initialize libmount iterator")); + + while (tb && mnt_table_next_fs(tb, itr, &fs) == 0) { + blkid_probe pr = NULL; + const char *src = mnt_fs_get_source(fs); + const char *type = mnt_fs_get_swaptype(fs); + const char *data = NULL; + + if (!src || !type || strcmp(type, "file") != 0) + continue; + pr = get_swap_prober(src); + if (!pr) + continue; + blkid_probe_lookup_value(pr, name, &data, NULL); + if (data && strcmp(data, value) == 0) + path = xstrdup(src); + blkid_free_probe(pr); + if (path) + break; + } + + mnt_free_iter(itr); + return path; +} + +static int do_swapoff(const char *orig_special, int quiet, int canonic) +{ + const char *special = orig_special; + int rc = SWAPOFF_EX_OK; + + if (verbose) + printf(_("swapoff %s\n"), orig_special); + + if (!canonic) { + char *n, *v; + + special = mnt_resolve_spec(orig_special, mntcache); + if (!special && blkid_parse_tag_string(orig_special, &n, &v) == 0) { + special = swapoff_resolve_tag(n, v, mntcache); + free(n); + free(v); + } + if (!special) + return cannot_find(orig_special); + } + + if (swapoff(special) == 0) + rc = SWAPOFF_EX_OK; /* success */ + else { + switch (errno) { + case EPERM: + errx(SWAPOFF_EX_USAGE, _("Not superuser.")); + break; + case ENOMEM: + warn(_("%s: swapoff failed"), orig_special); + rc = SWAPOFF_EX_ENOMEM; + break; + default: + if (!quiet) + warn(_("%s: swapoff failed"), orig_special); + rc = SWAPOFF_EX_FAILURE; + break; + } + } + + return rc; +} + +static int swapoff_by(const char *name, const char *value, int quiet) +{ + const char *special = swapoff_resolve_tag(name, value, mntcache); + return special ? do_swapoff(special, quiet, CANONIC) : cannot_find(value); +} + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + fputs(USAGE_HEADER, out); + fprintf(out, _(" %s [options] [<spec>]\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Disable devices and files for paging and swapping.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -a, --all disable all swaps from /proc/swaps\n" + " -v, --verbose verbose mode\n"), out); + + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(24)); + + fputs(_("\nThe <spec> parameter:\n" \ + " -L <label> LABEL of device to be used\n" \ + " -U <uuid> UUID of device to be used\n" \ + " LABEL=<label> LABEL of device to be used\n" \ + " UUID=<uuid> UUID of device to be used\n" \ + " <device> name of device to be used\n" \ + " <file> name of file to be used\n"), out); + + printf(USAGE_MAN_TAIL("swapoff(8)")); + exit(SWAPOFF_EX_OK); +} + +static int swapoff_all(void) +{ + int nerrs = 0, nsucc = 0; + struct libmnt_table *tb; + struct libmnt_fs *fs; + struct libmnt_iter *itr = mnt_new_iter(MNT_ITER_BACKWARD); + + if (!itr) + err(SWAPOFF_EX_SYSERR, _("failed to initialize libmount iterator")); + + /* + * In case /proc/swaps exists, unswap stuff listed there. We are quiet + * but report errors in status. Errors might mean that /proc/swaps + * exists as ordinary file, not in procfs. do_swapoff() exits + * immediately on EPERM. + */ + tb = get_swaps(); + + while (tb && mnt_table_find_next_fs(tb, itr, match_swap, NULL, &fs) == 0) { + if (do_swapoff(mnt_fs_get_source(fs), QUIET, CANONIC) == SWAPOFF_EX_OK) + nsucc++; + else + nerrs++; + } + + /* + * Unswap stuff mentioned in /etc/fstab. Probably it was unmounted + * already, so errors are not bad. Doing swapoff -a twice should not + * give error messages. + */ + tb = get_fstab(); + mnt_reset_iter(itr, MNT_ITER_FORWARD); + + while (tb && mnt_table_find_next_fs(tb, itr, match_swap, NULL, &fs) == 0) { + if (!is_active_swap(mnt_fs_get_source(fs))) + do_swapoff(mnt_fs_get_source(fs), QUIET, !CANONIC); + } + + mnt_free_iter(itr); + + if (nerrs == 0) + return SWAPOFF_EX_OK; /* all success */ + else if (nsucc == 0) + return SWAPOFF_EX_ALLERR; /* all failed */ + + return SWAPOFF_EX_SOMEOK; /* some success, some failed */ +} + +int main(int argc, char *argv[]) +{ + int status = 0, c; + size_t i; + + static const struct option long_opts[] = { + { "all", no_argument, NULL, 'a' }, + { "help", no_argument, NULL, 'h' }, + { "verbose", no_argument, NULL, 'v' }, + { "version", no_argument, NULL, 'V' }, + { NULL, 0, NULL, 0 } + }; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + while ((c = getopt_long(argc, argv, "ahvVL:U:", + long_opts, NULL)) != -1) { + switch (c) { + case 'a': /* all */ + ++all; + break; + case 'v': /* be chatty */ + ++verbose; + break; + case 'L': + add_label(optarg); + break; + case 'U': + add_uuid(optarg); + break; + + case 'h': /* help */ + usage(); + case 'V': /* version */ + print_version(SWAPOFF_EX_OK); + default: + errtryhelp(SWAPOFF_EX_USAGE); + } + } + argv += optind; + + if (!all && !numof_labels() && !numof_uuids() && *argv == NULL) { + warnx(_("bad usage")); + errtryhelp(SWAPOFF_EX_USAGE); + } + + mnt_init_debug(0); + mntcache = mnt_new_cache(); + + for (i = 0; i < numof_labels(); i++) + status |= swapoff_by("LABEL", get_label(i), !QUIET); + + for (i = 0; i < numof_uuids(); i++) + status |= swapoff_by("UUID", get_uuid(i), !QUIET); + + while (*argv != NULL) + status |= do_swapoff(*argv++, !QUIET, !CANONIC); + + if (all) + status |= swapoff_all(); + + free_tables(); + mnt_unref_cache(mntcache); + + return status; +} diff --git a/sys-utils/swapon-common.c b/sys-utils/swapon-common.c new file mode 100644 index 0000000..dd1593d --- /dev/null +++ b/sys-utils/swapon-common.c @@ -0,0 +1,117 @@ + +#include "c.h" +#include "nls.h" +#include "xalloc.h" + +#include "swapon-common.h" + +/* + * content of /proc/swaps and /etc/fstab + */ +static struct libmnt_table *swaps, *fstab; + +struct libmnt_cache *mntcache; + +static int table_parser_errcb(struct libmnt_table *tb __attribute__((__unused__)), + const char *filename, int line) +{ + if (filename) + warnx(_("%s: parse error at line %d -- ignored"), filename, line); + return 1; +} + +struct libmnt_table *get_fstab(void) +{ + if (!fstab) { + fstab = mnt_new_table(); + if (!fstab) + return NULL; + mnt_table_set_parser_errcb(fstab, table_parser_errcb); + mnt_table_set_cache(fstab, mntcache); + if (mnt_table_parse_fstab(fstab, NULL) != 0) + return NULL; + } + + return fstab; +} + +struct libmnt_table *get_swaps(void) +{ + if (!swaps) { + swaps = mnt_new_table(); + if (!swaps) + return NULL; + mnt_table_set_cache(swaps, mntcache); + mnt_table_set_parser_errcb(swaps, table_parser_errcb); + if (mnt_table_parse_swaps(swaps, NULL) != 0) + return NULL; + } + + return swaps; +} + +void free_tables(void) +{ + mnt_unref_table(swaps); + mnt_unref_table(fstab); +} + +int match_swap(struct libmnt_fs *fs, void *data __attribute__((unused))) +{ + return fs && mnt_fs_is_swaparea(fs); +} + +int is_active_swap(const char *filename) +{ + struct libmnt_table *st = get_swaps(); + return st && mnt_table_find_source(st, filename, MNT_ITER_BACKWARD); +} + + +int cannot_find(const char *special) +{ + warnx(_("cannot find the device for %s"), special); + return -1; +} + +/* + * Lists with -L and -U option + */ +static const char **llist; +static size_t llct; +static const char **ulist; +static size_t ulct; + + +void add_label(const char *label) +{ + llist = xrealloc(llist, (++llct) * sizeof(char *)); + llist[llct - 1] = label; +} + +const char *get_label(size_t i) +{ + return i < llct ? llist[i] : NULL; +} + +size_t numof_labels(void) +{ + return llct; +} + +void add_uuid(const char *uuid) +{ + ulist = xrealloc(ulist, (++ulct) * sizeof(char *)); + ulist[ulct - 1] = uuid; +} + +const char *get_uuid(size_t i) +{ + return i < ulct ? ulist[i] : NULL; +} + +size_t numof_uuids(void) +{ + return ulct; +} + diff --git a/sys-utils/swapon-common.h b/sys-utils/swapon-common.h new file mode 100644 index 0000000..d1b679f --- /dev/null +++ b/sys-utils/swapon-common.h @@ -0,0 +1,25 @@ +#ifndef UTIL_LINUX_SWAPON_COMMON_H +#define UTIL_LINUX_SWAPON_COMMON_H + +#include <libmount.h> + +extern struct libmnt_cache *mntcache; + +extern struct libmnt_table *get_fstab(void); +extern struct libmnt_table *get_swaps(void); +extern void free_tables(void); + +extern int match_swap(struct libmnt_fs *fs, void *data); +extern int is_active_swap(const char *filename); + +extern int cannot_find(const char *special); + +extern void add_label(const char *label); +extern const char *get_label(size_t i); +extern size_t numof_labels(void); + +extern void add_uuid(const char *uuid); +extern const char *get_uuid(size_t i); +extern size_t numof_uuids(void); + +#endif /* UTIL_LINUX_SWAPON_COMMON_H */ diff --git a/sys-utils/swapon.8 b/sys-utils/swapon.8 new file mode 100644 index 0000000..8d56980 --- /dev/null +++ b/sys-utils/swapon.8 @@ -0,0 +1,252 @@ +'\" t +.\" Title: swapon +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: System Administration +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "SWAPON" "8" "2022-05-11" "util\-linux 2.38.1" "System Administration" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +swapon, swapoff \- enable/disable devices and files for paging and swapping +.SH "SYNOPSIS" +.sp +\fBswapon\fP [options] [\fIspecialfile\fP...] +.sp +\fBswapoff\fP [\fB\-va\fP] [\fIspecialfile\fP...] +.SH "DESCRIPTION" +.sp +\fBswapon\fP is used to specify devices on which paging and swapping are to take place. +.sp +The device or file used is given by the \fIspecialfile\fP parameter. It may be of the form \fB\-L\fP \fIlabel\fP or \fB\-U\fP \fIuuid\fP to indicate a device by label or uuid. +.sp +Calls to \fBswapon\fP normally occur in the system boot scripts making all swap devices available, so that the paging and swapping activity is interleaved across several devices and files. +.sp +\fBswapoff\fP disables swapping on the specified devices and files. When the \fB\-a\fP flag is given, swapping is disabled on all known swap devices and files (as found in \fI/proc/swaps\fP or \fI/etc/fstab\fP). +.SH "OPTIONS" +.sp +\fB\-a\fP, \fB\-\-all\fP +.RS 4 +All devices marked as "swap" in \fI/etc/fstab\fP are made available, except for those with the "noauto" option. Devices that are already being used as swap are silently skipped. +.RE +.sp +\fB\-d\fP, \fB\-\-discard\fP[\fB=\fP\fIpolicy\fP] +.RS 4 +Enable swap discards, if the swap backing device supports the discard or trim operation. This may improve performance on some Solid State Devices, but often it does not. The option allows one to select between two available swap discard policies: +.sp +\fB\-\-discard=once\fP +.RS 4 +to perform a single\-time discard operation for the whole swap area at swapon; or +.RE +.sp +\fB\-\-discard=pages\fP +.RS 4 +to asynchronously discard freed swap pages before they are available for reuse. +.RE +.sp +If no policy is selected, the default behavior is to enable both discard types. The \fI/etc/fstab\fP mount options \fBdiscard\fP, \fBdiscard=once\fP, or \fBdiscard=pages\fP may also be used to enable discard flags. +.RE +.sp +\fB\-e\fP, \fB\-\-ifexists\fP +.RS 4 +Silently skip devices that do not exist. The \fI/etc/fstab\fP mount option \fBnofail\fP may also be used to skip non\-existing device. +.RE +.sp +\fB\-f\fP, \fB\-\-fixpgsz\fP +.RS 4 +Reinitialize (exec mkswap) the swap space if its page size does not match that of the current running kernel. \fBmkswap\fP(8) initializes the whole device and does not check for bad blocks. +.RE +.sp +\fB\-L\fP \fIlabel\fP +.RS 4 +Use the partition that has the specified \fIlabel\fP. (For this, access to \fI/proc/partitions\fP is needed.) +.RE +.sp +\fB\-o\fP, \fB\-\-options\fP \fIopts\fP +.RS 4 +Specify swap options by an \fIfstab\fP\-compatible comma\-separated string. For example: +.sp +\fBswapon \-o pri=1,discard=pages,nofail /dev/sda2\fP +.sp +The \fIopts\fP string is evaluated last and overrides all other command line options. +.RE +.sp +\fB\-p\fP, \fB\-\-priority\fP \fIpriority\fP +.RS 4 +Specify the priority of the swap device. \fIpriority\fP is a value between \-1 and 32767. Higher numbers indicate higher priority. See \fBswapon\fP(2) for a full description of swap priorities. Add \fBpri=\fP\fIvalue\fP to the option field of \fI/etc/fstab\fP for use with \fBswapon \-a\fP. When no priority is defined, it defaults to \-1. +.RE +.sp +\fB\-s\fP, \fB\-\-summary\fP +.RS 4 +Display swap usage summary by device. Equivalent to \fBcat /proc/swaps\fP. This output format is DEPRECATED in favour of \fB\-\-show\fP that provides better control on output data. +.RE +.sp +\fB\-\-show\fP[\fB=\fP\fIcolumn\fP...] +.RS 4 +Display a definable table of swap areas. See the \fB\-\-help\fP output for a list of available columns. +.RE +.sp +\fB\-\-output\-all\fP +.RS 4 +Output all available columns. +.RE +.sp +\fB\-\-noheadings\fP +.RS 4 +Do not print headings when displaying \fB\-\-show\fP output. +.RE +.sp +\fB\-\-raw\fP +.RS 4 +Display \fB\-\-show\fP output without aligning table columns. +.RE +.sp +\fB\-\-bytes\fP +.RS 4 +Display swap size in bytes in \fB\-\-show\fP output instead of in user\-friendly units. +.RE +.sp +\fB\-U\fP \fIuuid\fP +.RS 4 +Use the partition that has the specified \fIuuid\fP. +.RE +.sp +\fB\-v\fP, \fB\-\-verbose\fP +.RS 4 +Be verbose. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "EXIT STATUS" +.sp +\fBswapoff\fP has the following exit status values since v2.36: +.sp +\fB0\fP +.RS 4 +success +.RE +.sp +\fB2\fP +.RS 4 +system has insufficient memory to stop swapping (OOM) +.RE +.sp +\fB4\fP +.RS 4 +\fBswapoff\fP(2) syscall failed for another reason +.RE +.sp +\fB8\fP +.RS 4 +non\-\fBswapoff\fP(2) syscall system error (out of memory, ...) +.RE +.sp +\fB16\fP +.RS 4 +usage or syntax error +.RE +.sp +\fB32\fP +.RS 4 +all swapoff failed on \fB\-\-all\fP +.RE +.sp +\fB64\fP +.RS 4 +some swapoff succeeded on \fB\-\-all\fP +.RE +.sp +The command \fBswapoff \-\-all\fP returns 0 (all succeeded), 32 (all failed), or 64 (some failed, some succeeded). +.sp ++ +The old versions before v2.36 has no documented exit status, 0 means success in all versions. +.SH "ENVIRONMENT" +.sp +\fBLIBMOUNT_DEBUG\fP=all +.RS 4 +enables \fBlibmount\fP debug output. +.RE +.sp +\fBLIBBLKID_DEBUG\fP=all +.RS 4 +enables \fBlibblkid\fP debug output. +.RE +.SH "FILES" +.sp +\fI/dev/sd??\fP +.RS 4 +standard paging devices +.RE +.sp +\fI/etc/fstab\fP +.RS 4 +ascii filesystem description table +.RE +.SH "NOTES" +.SS "Files with holes" +.sp +The swap file implementation in the kernel expects to be able to write to the file directly, without the assistance of the filesystem. This is a problem on files with holes or on copy\-on\-write files on filesystems like Btrfs. +.sp +Commands like \fBcp\fP(1) or \fBtruncate\fP(1) create files with holes. These files will be rejected by \fBswapon\fP. +.sp +Preallocated files created by \fBfallocate\fP(1) may be interpreted as files with holes too depending of the filesystem. Preallocated swap files are supported on XFS since Linux 4.18. +.sp +The most portable solution to create a swap file is to use \fBdd\fP(1) and \fI/dev/zero\fP. +.SS "Btrfs" +.sp +Swap files on Btrfs are supported since Linux 5.0 on files with \fBnocow\fP attribute. See the \fBbtrfs\fP(5) manual page for more details. +.SS "NFS" +.sp +Swap over \fBNFS\fP may not work. +.SS "Suspend" +.sp +\fBswapon\fP automatically detects and rewrites a swap space signature with old software suspend data (e.g., \fBS1SUSPEND\fP, \fBS2SUSPEND\fP, ...). The problem is that if we don\(cqt do it, then we get data corruption the next time an attempt at unsuspending is made. +.SH "HISTORY" +.sp +The \fBswapon\fP command appeared in 4.0BSD. +.SH "SEE ALSO" +.sp +\fBswapoff\fP(2), +\fBswapon\fP(2), +\fBfstab\fP(5), +\fBinit\fP(8), +\fBfallocate\fP(1), +\fBmkswap\fP(8), +\fBmount\fP(8), +\fBrc\fP(8) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBswapon\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/swapon.8.adoc b/sys-utils/swapon.8.adoc new file mode 100644 index 0000000..01de11d --- /dev/null +++ b/sys-utils/swapon.8.adoc @@ -0,0 +1,181 @@ +//po4a: entry man manual += swapon(8) +:doctype: manpage +:man manual: System Administration +:man source: util-linux {release-version} +:page-layout: base +:command: swapon + +== NAME + +swapon, swapoff - enable/disable devices and files for paging and swapping + +== SYNOPSIS + +*swapon* [options] [_specialfile_...] + +*swapoff* [*-va*] [_specialfile_...] + +== DESCRIPTION + +*swapon* is used to specify devices on which paging and swapping are to take place. + +The device or file used is given by the _specialfile_ parameter. It may be of the form *-L* _label_ or *-U* _uuid_ to indicate a device by label or uuid. + +Calls to *swapon* normally occur in the system boot scripts making all swap devices available, so that the paging and swapping activity is interleaved across several devices and files. + +*swapoff* disables swapping on the specified devices and files. When the *-a* flag is given, swapping is disabled on all known swap devices and files (as found in _/proc/swaps_ or _/etc/fstab_). + +== OPTIONS + +*-a*, *--all*:: +All devices marked as "swap" in _/etc/fstab_ are made available, except for those with the "noauto" option. Devices that are already being used as swap are silently skipped. + +*-d*, *--discard*[**=**__policy__]:: +Enable swap discards, if the swap backing device supports the discard or trim operation. This may improve performance on some Solid State Devices, but often it does not. The option allows one to select between two available swap discard policies: + +*--discard=once*;; +to perform a single-time discard operation for the whole swap area at swapon; or + +*--discard=pages*;; +to asynchronously discard freed swap pages before they are available for reuse. + ++ +If no policy is selected, the default behavior is to enable both discard types. The _/etc/fstab_ mount options *discard*, *discard=once*, or *discard=pages* may also be used to enable discard flags. + +*-e*, *--ifexists*:: +Silently skip devices that do not exist. The _/etc/fstab_ mount option *nofail* may also be used to skip non-existing device. + +*-f*, *--fixpgsz*:: +Reinitialize (exec mkswap) the swap space if its page size does not match that of the current running kernel. *mkswap*(8) initializes the whole device and does not check for bad blocks. + +*-L* _label_:: +Use the partition that has the specified _label_. (For this, access to _/proc/partitions_ is needed.) + +*-o*, *--options* _opts_:: +Specify swap options by an __fstab__-compatible comma-separated string. For example: ++ +*swapon -o pri=1,discard=pages,nofail /dev/sda2* ++ +The _opts_ string is evaluated last and overrides all other command line options. + +*-p*, *--priority* _priority_:: +Specify the priority of the swap device. _priority_ is a value between -1 and 32767. Higher numbers indicate higher priority. See *swapon*(2) for a full description of swap priorities. Add **pri=**__value__ to the option field of _/etc/fstab_ for use with *swapon -a*. When no priority is defined, it defaults to -1. + +*-s*, *--summary*:: +Display swap usage summary by device. Equivalent to *cat /proc/swaps*. This output format is DEPRECATED in favour of *--show* that provides better control on output data. + +*--show*[**=**__column__...]:: +Display a definable table of swap areas. See the *--help* output for a list of available columns. + +*--output-all*:: +Output all available columns. + +*--noheadings*:: +Do not print headings when displaying *--show* output. + +*--raw*:: +Display *--show* output without aligning table columns. + +*--bytes*:: +Display swap size in bytes in *--show* output instead of in user-friendly units. + +*-U* _uuid_:: +Use the partition that has the specified _uuid_. + +*-v*, *--verbose*:: +Be verbose. + +include::man-common/help-version.adoc[] + +== EXIT STATUS + +*swapoff* has the following exit status values since v2.36: + +*0*:: +success + +*2*:: +system has insufficient memory to stop swapping (OOM) + +*4*:: +*swapoff*(2) syscall failed for another reason + +*8*:: +non-*swapoff*(2) syscall system error (out of memory, ...) + +*16*:: +usage or syntax error + +*32*:: +all swapoff failed on *--all* + +*64*:: +some swapoff succeeded on *--all* + +The command *swapoff --all* returns 0 (all succeeded), 32 (all failed), or 64 (some failed, some succeeded). ++ +The old versions before v2.36 has no documented exit status, 0 means success in all versions. + +== ENVIRONMENT + +*LIBMOUNT_DEBUG*=all:: +enables *libmount* debug output. + +*LIBBLKID_DEBUG*=all:: +enables *libblkid* debug output. + +== FILES + +_/dev/sd??_:: +standard paging devices + +_/etc/fstab_:: +ascii filesystem description table + +== NOTES + +=== Files with holes + +The swap file implementation in the kernel expects to be able to write to the file directly, without the assistance of the filesystem. This is a problem on files with holes or on copy-on-write files on filesystems like Btrfs. + +Commands like *cp*(1) or *truncate*(1) create files with holes. These files will be rejected by *swapon*. + +Preallocated files created by *fallocate*(1) may be interpreted as files with holes too depending of the filesystem. Preallocated swap files are supported on XFS since Linux 4.18. + +The most portable solution to create a swap file is to use *dd*(1) and _/dev/zero_. + +=== Btrfs + +Swap files on Btrfs are supported since Linux 5.0 on files with *nocow* attribute. See the *btrfs*(5) manual page for more details. + +=== NFS + +Swap over *NFS* may not work. + +=== Suspend + +*swapon* automatically detects and rewrites a swap space signature with old software suspend data (e.g., *S1SUSPEND*, *S2SUSPEND*, ...). The problem is that if we don't do it, then we get data corruption the next time an attempt at unsuspending is made. + +== HISTORY + +The *swapon* command appeared in 4.0BSD. + +== SEE ALSO + +*swapoff*(2), +*swapon*(2), +*fstab*(5), +*init*(8), +*fallocate*(1), +*mkswap*(8), +*mount*(8), +*rc*(8) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/swapon.c b/sys-utils/swapon.c new file mode 100644 index 0000000..76c5cac --- /dev/null +++ b/sys-utils/swapon.c @@ -0,0 +1,1029 @@ +#include <assert.h> +#include <stdlib.h> +#include <stdio.h> +#include <getopt.h> +#include <string.h> +#include <errno.h> +#include <sys/stat.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <fcntl.h> +#include <stdint.h> +#include <ctype.h> + +#include <libsmartcols.h> + +#include "c.h" +#include "nls.h" +#include "bitops.h" +#include "blkdev.h" +#include "pathnames.h" +#include "xalloc.h" +#include "strutils.h" +#include "optutils.h" +#include "closestream.h" + +#include "swapheader.h" +#include "swapprober.h" +#include "swapon-common.h" + +#ifdef HAVE_SYS_SWAP_H +# include <sys/swap.h> +#endif + +#ifndef SWAP_FLAG_DISCARD +# define SWAP_FLAG_DISCARD 0x10000 /* enable discard for swap */ +#endif + +#ifndef SWAP_FLAG_DISCARD_ONCE +# define SWAP_FLAG_DISCARD_ONCE 0x20000 /* discard swap area at swapon-time */ +#endif + +#ifndef SWAP_FLAG_DISCARD_PAGES +# define SWAP_FLAG_DISCARD_PAGES 0x40000 /* discard page-clusters after use */ +#endif + +#define SWAP_FLAGS_DISCARD_VALID (SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \ + SWAP_FLAG_DISCARD_PAGES) + +#ifndef SWAP_FLAG_PREFER +# define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ +#endif + +#ifndef SWAP_FLAG_PRIO_MASK +# define SWAP_FLAG_PRIO_MASK 0x7fff +#endif + +#ifndef SWAP_FLAG_PRIO_SHIFT +# define SWAP_FLAG_PRIO_SHIFT 0 +#endif + +#if !defined(HAVE_SWAPON) && defined(SYS_swapon) +# include <sys/syscall.h> +# define swapon(path, flags) syscall(SYS_swapon, path, flags) +#endif + +#define MAX_PAGESIZE (64 * 1024) + +#ifndef UUID_STR_LEN +# define UUID_STR_LEN 37 +#endif + +enum { + SIG_SWAPSPACE = 1, + SIG_SWSUSPEND +}; + +/* column names */ +struct colinfo { + const char *name; /* header */ + double whint; /* width hint (N < 1 is in percent of termwidth) */ + int flags; /* SCOLS_FL_* */ + const char *help; +}; + +enum { + COL_PATH, + COL_TYPE, + COL_SIZE, + COL_USED, + COL_PRIO, + COL_UUID, + COL_LABEL +}; +static struct colinfo infos[] = { + [COL_PATH] = { "NAME", 0.20, 0, N_("device file or partition path") }, + [COL_TYPE] = { "TYPE", 0.20, SCOLS_FL_TRUNC, N_("type of the device")}, + [COL_SIZE] = { "SIZE", 0.20, SCOLS_FL_RIGHT, N_("size of the swap area")}, + [COL_USED] = { "USED", 0.20, SCOLS_FL_RIGHT, N_("bytes in use")}, + [COL_PRIO] = { "PRIO", 0.20, SCOLS_FL_RIGHT, N_("swap priority")}, + [COL_UUID] = { "UUID", 0.20, 0, N_("swap uuid")}, + [COL_LABEL] = { "LABEL", 0.20, 0, N_("swap label")}, +}; + + +/* swap area properties */ +struct swap_prop { + int discard; /* discard policy */ + int priority; /* non-prioritized swap by default */ + int no_fail; /* skip device if not exist */ +}; + +/* device description */ +struct swap_device { + const char *path; /* device or file to be turned on */ + const char *label; /* swap label */ + const char *uuid; /* unique identifier */ + unsigned int pagesize; +}; + +/* control struct */ +struct swapon_ctl { + int columns[ARRAY_SIZE(infos) * 2]; /* --show columns */ + int ncolumns; /* number of columns */ + + struct swap_prop props; /* global settings for all devices */ + + unsigned int + all:1, /* turn on all swap devices */ + bytes:1, /* display --show in bytes */ + fix_page_size:1, /* reinitialize page size */ + no_heading:1, /* toggle --show headers */ + raw:1, /* toggle --show alignment */ + show:1, /* display --show information */ + verbose:1; /* be chatty */ +}; + +static int column_name_to_id(const char *name, size_t namesz) +{ + size_t i; + + assert(name); + + for (i = 0; i < ARRAY_SIZE(infos); i++) { + const char *cn = infos[i].name; + + if (!strncasecmp(name, cn, namesz) && !*(cn + namesz)) + return i; + } + warnx(_("unknown column: %s"), name); + return -1; +} + +static inline int get_column_id(const struct swapon_ctl *ctl, int num) +{ + assert(num < ctl->ncolumns); + assert(ctl->columns[num] < (int) ARRAY_SIZE(infos)); + + return ctl->columns[num]; +} + +static inline struct colinfo *get_column_info(const struct swapon_ctl *ctl, unsigned num) +{ + return &infos[get_column_id(ctl, num)]; +} + +static void add_scols_line(const struct swapon_ctl *ctl, struct libscols_table *table, struct libmnt_fs *fs) +{ + int i; + struct libscols_line *line; + blkid_probe pr = NULL; + const char *data; + + assert(table); + assert(fs); + + line = scols_table_new_line(table, NULL); + if (!line) + err(EXIT_FAILURE, _("failed to allocate output line")); + + data = mnt_fs_get_source(fs); + if (access(data, R_OK) == 0) + pr = get_swap_prober(data); + for (i = 0; i < ctl->ncolumns; i++) { + char *str = NULL; + off_t size; + + switch (get_column_id(ctl, i)) { + case COL_PATH: + xasprintf(&str, "%s", mnt_fs_get_source(fs)); + break; + case COL_TYPE: + xasprintf(&str, "%s", mnt_fs_get_swaptype(fs)); + break; + case COL_SIZE: + size = mnt_fs_get_size(fs); + size *= 1024; /* convert to bytes */ + if (ctl->bytes) + xasprintf(&str, "%jd", size); + else + str = size_to_human_string(SIZE_SUFFIX_1LETTER, size); + break; + case COL_USED: + size = mnt_fs_get_usedsize(fs); + size *= 1024; /* convert to bytes */ + if (ctl->bytes) + xasprintf(&str, "%jd", size); + else + str = size_to_human_string(SIZE_SUFFIX_1LETTER, size); + break; + case COL_PRIO: + xasprintf(&str, "%d", mnt_fs_get_priority(fs)); + break; + case COL_UUID: + if (pr && !blkid_probe_lookup_value(pr, "UUID", &data, NULL)) + xasprintf(&str, "%s", data); + break; + case COL_LABEL: + if (pr && !blkid_probe_lookup_value(pr, "LABEL", &data, NULL)) + xasprintf(&str, "%s", data); + break; + default: + break; + } + + if (str && scols_line_refer_data(line, i, str)) + err(EXIT_FAILURE, _("failed to add output data")); + } + if (pr) + blkid_free_probe(pr); +} + +static int display_summary(void) +{ + struct libmnt_table *st = get_swaps(); + struct libmnt_iter *itr; + struct libmnt_fs *fs; + + if (!st) + return -1; + + if (mnt_table_is_empty(st)) + return 0; + + itr = mnt_new_iter(MNT_ITER_FORWARD); + if (!itr) + err(EXIT_FAILURE, _("failed to initialize libmount iterator")); + + /* TRANSLATORS: The tabs make each field a multiple of 8 characters. Keep aligned with each entry below. */ + printf(_("Filename\t\t\t\tType\t\tSize\t\tUsed\t\tPriority\n")); + + while (mnt_table_next_fs(st, itr, &fs) == 0) { + const char *src = mnt_fs_get_source(fs); + const char *type = mnt_fs_get_swaptype(fs); + int srclen = strlen(src); + int typelen = strlen(type); + off_t size = mnt_fs_get_size(fs); + off_t used = mnt_fs_get_usedsize(fs); + + /* TRANSLATORS: Keep each field a multiple of 8 characters and aligned with the header above. */ + printf("%s%*s%s%s\t%jd%s\t%jd%s\t%d\n", + src, + srclen < 40 ? 40 - srclen : 1, " ", + type, + typelen < 8 ? "\t" : "", + size, + size < 10000000 ? "\t" : "", + used, + used < 10000000 ? "\t" : "", + mnt_fs_get_priority(fs)); + } + + mnt_free_iter(itr); + return 0; +} + +static int show_table(struct swapon_ctl *ctl) +{ + struct libmnt_table *st = get_swaps(); + struct libmnt_iter *itr = NULL; + struct libmnt_fs *fs; + int i; + struct libscols_table *table = NULL; + + if (!st) + return -1; + + itr = mnt_new_iter(MNT_ITER_FORWARD); + if (!itr) + err(EXIT_FAILURE, _("failed to initialize libmount iterator")); + + scols_init_debug(0); + + table = scols_new_table(); + if (!table) + err(EXIT_FAILURE, _("failed to allocate output table")); + + scols_table_enable_raw(table, ctl->raw); + scols_table_enable_noheadings(table, ctl->no_heading); + + for (i = 0; i < ctl->ncolumns; i++) { + struct colinfo *col = get_column_info(ctl, i); + + if (!scols_table_new_column(table, col->name, col->whint, col->flags)) + err(EXIT_FAILURE, _("failed to allocate output column")); + } + + while (mnt_table_next_fs(st, itr, &fs) == 0) + add_scols_line(ctl, table, fs); + + scols_print_table(table); + scols_unref_table(table); + mnt_free_iter(itr); + return 0; +} + +/* calls mkswap */ +static int swap_reinitialize(struct swap_device *dev) +{ + pid_t pid; + int status, ret; + char const *cmd[7]; + int idx=0; + + assert(dev); + assert(dev->path); + + warnx(_("%s: reinitializing the swap."), dev->path); + + switch ((pid=fork())) { + case -1: /* fork error */ + warn(_("fork failed")); + return -1; + + case 0: /* child */ + if (geteuid() != getuid() && drop_permissions() != 0) + exit(EXIT_FAILURE); + + cmd[idx++] = "mkswap"; + if (dev->label) { + cmd[idx++] = "-L"; + cmd[idx++] = dev->label; + } + if (dev->uuid) { + cmd[idx++] = "-U"; + cmd[idx++] = dev->uuid; + } + cmd[idx++] = dev->path; + cmd[idx++] = NULL; + execvp(cmd[0], (char * const *) cmd); + errexec(cmd[0]); + + default: /* parent */ + do { + ret = waitpid(pid, &status, 0); + } while (ret == -1 && errno == EINTR); + + if (ret < 0) { + warn(_("waitpid failed")); + return -1; + } + + /* mkswap returns: 0=suss, >0 error */ + if (WIFEXITED(status) && WEXITSTATUS(status)==0) + return 0; /* ok */ + break; + } + return -1; /* error */ +} + +/* Replaces unwanted SWSUSPEND signature with swap signature */ +static int swap_rewrite_signature(const struct swap_device *dev) +{ + int fd, rc = -1; + + assert(dev); + assert(dev->path); + assert(dev->pagesize); + + fd = open(dev->path, O_WRONLY); + if (fd == -1) { + warn(_("cannot open %s"), dev->path); + return -1; + } + + if (lseek(fd, dev->pagesize - SWAP_SIGNATURE_SZ, SEEK_SET) < 0) { + warn(_("%s: lseek failed"), dev->path); + goto err; + } + + if (write(fd, (void *) SWAP_SIGNATURE, + SWAP_SIGNATURE_SZ) != SWAP_SIGNATURE_SZ) { + warn(_("%s: write signature failed"), dev->path); + goto err; + } + + rc = 0; +err: + if (close_fd(fd) != 0) { + warn(_("write failed: %s"), dev->path); + rc = -1; + } + return rc; +} + +static int swap_detect_signature(const char *buf, int *sig) +{ + assert(buf); + assert(sig); + + if (memcmp(buf, SWAP_SIGNATURE, SWAP_SIGNATURE_SZ) == 0) + *sig = SIG_SWAPSPACE; + + else if (memcmp(buf, "S1SUSPEND", 9) == 0 || + memcmp(buf, "S2SUSPEND", 9) == 0 || + memcmp(buf, "ULSUSPEND", 9) == 0 || + memcmp(buf, "\xed\xc3\x02\xe9\x98\x56\xe5\x0c", 8) == 0 || + memcmp(buf, "LINHIB0001", 10) == 0) + *sig = SIG_SWSUSPEND; + else + return 0; + + return 1; +} + +static char *swap_get_header(int fd, int *sig, unsigned int *pagesize) +{ + char *buf; + ssize_t datasz; + unsigned int page; + + assert(sig); + assert(pagesize); + + *pagesize = 0; + *sig = 0; + + buf = xmalloc(MAX_PAGESIZE); + + datasz = read(fd, buf, MAX_PAGESIZE); + if (datasz == (ssize_t) -1) + goto err; + + for (page = 0x1000; page <= MAX_PAGESIZE; page <<= 1) { + /* skip 32k pagesize since this does not seem to + * be supported */ + if (page == 0x8000) + continue; + /* the smallest swap area is PAGE_SIZE*10, it means + * 40k, that's less than MAX_PAGESIZE */ + if (datasz < 0 || (size_t) datasz < (page - SWAP_SIGNATURE_SZ)) + break; + if (swap_detect_signature(buf + page - SWAP_SIGNATURE_SZ, sig)) { + *pagesize = page; + break; + } + } + + if (*pagesize) + return buf; +err: + free(buf); + return NULL; +} + +/* returns real size of swap space */ +static unsigned long long swap_get_size(const struct swap_device *dev, + const char *hdr) +{ + unsigned int last_page = 0; + const unsigned int swap_version = SWAP_VERSION; + const struct swap_header_v1_2 *s; + + assert(dev); + assert(dev->pagesize > 0); + + s = (const struct swap_header_v1_2 *) hdr; + + if (s->version == swap_version) + last_page = s->last_page; + else if (swab32(s->version) == swap_version) + last_page = swab32(s->last_page); + + return ((unsigned long long) last_page + 1) * dev->pagesize; +} + +static void swap_get_info(struct swap_device *dev, const char *hdr) +{ + const struct swap_header_v1_2 *s = (const struct swap_header_v1_2 *) hdr; + + assert(dev); + + if (s && *s->volume_name) + dev->label = xstrdup(s->volume_name); + + if (s && *s->uuid) { + const unsigned char *u = s->uuid; + char str[UUID_STR_LEN]; + + snprintf(str, sizeof(str), + "%02x%02x%02x%02x-" + "%02x%02x-%02x%02x-" + "%02x%02x-%02x%02x%02x%02x%02x%02x", + u[0], u[1], u[2], u[3], + u[4], u[5], u[6], u[7], + u[8], u[9], u[10], u[11], u[12], u[13], u[14], u[15]); + dev->uuid = xstrdup(str); + } +} + +static int swapon_checks(const struct swapon_ctl *ctl, struct swap_device *dev) +{ + struct stat st; + int fd, sig; + char *hdr = NULL; + unsigned long long devsize = 0; + int permMask; + + assert(ctl); + assert(dev); + assert(dev->path); + + fd = open(dev->path, O_RDONLY); + if (fd == -1) { + warn(_("cannot open %s"), dev->path); + goto err; + } + + if (fstat(fd, &st) < 0) { + warn(_("stat of %s failed"), dev->path); + goto err; + } + + permMask = S_ISBLK(st.st_mode) ? 07007 : 07077; + if ((st.st_mode & permMask) != 0) + warnx(_("%s: insecure permissions %04o, %04o suggested."), + dev->path, st.st_mode & 07777, + ~permMask & 0666); + + if (S_ISREG(st.st_mode) && st.st_uid != 0) + warnx(_("%s: insecure file owner %d, 0 (root) suggested."), + dev->path, st.st_uid); + + /* test for holes by LBT */ + if (S_ISREG(st.st_mode)) { + if (st.st_blocks * 512L < st.st_size) { + warnx(_("%s: skipping - it appears to have holes."), + dev->path); + goto err; + } + devsize = st.st_size; + } + + if (S_ISBLK(st.st_mode) && blkdev_get_size(fd, &devsize)) { + warnx(_("%s: get size failed"), dev->path); + goto err; + } + + hdr = swap_get_header(fd, &sig, &dev->pagesize); + if (!hdr) { + warnx(_("%s: read swap header failed"), dev->path); + goto err; + } + + if (ctl->verbose) + warnx(_("%s: found signature [pagesize=%d, signature=%s]"), + dev->path, + dev->pagesize, + sig == SIG_SWAPSPACE ? "swap" : + sig == SIG_SWSUSPEND ? "suspend" : "unknown"); + + if (sig == SIG_SWAPSPACE && dev->pagesize) { + unsigned long long swapsize = swap_get_size(dev, hdr); + int syspg = getpagesize(); + + if (ctl->verbose) + warnx(_("%s: pagesize=%d, swapsize=%llu, devsize=%llu"), + dev->path, dev->pagesize, swapsize, devsize); + + if (swapsize > devsize) { + if (ctl->verbose) + warnx(_("%s: last_page 0x%08llx is larger" + " than actual size of swapspace"), + dev->path, swapsize); + + } else if (syspg < 0 || (unsigned int) syspg != dev->pagesize) { + if (ctl->fix_page_size) { + int rc; + + swap_get_info(dev, hdr); + + warnx(_("%s: swap format pagesize does not match."), + dev->path); + rc = swap_reinitialize(dev); + if (rc < 0) + goto err; + } else + warnx(_("%s: swap format pagesize does not match. " + "(Use --fixpgsz to reinitialize it.)"), + dev->path); + } + } else if (sig == SIG_SWSUSPEND) { + /* We have to reinitialize swap with old (=useless) software suspend + * data. The problem is that if we don't do it, then we get data + * corruption the next time an attempt at unsuspending is made. + */ + warnx(_("%s: software suspend data detected. " + "Rewriting the swap signature."), + dev->path); + if (swap_rewrite_signature(dev) < 0) + goto err; + } + + free(hdr); + close(fd); + return 0; +err: + if (fd != -1) + close(fd); + free(hdr); + return -1; +} + +static int do_swapon(const struct swapon_ctl *ctl, + const struct swap_prop *prop, + const char *spec, + int canonic) +{ + struct swap_device dev = { .path = NULL }; + int status; + int flags = 0; + int priority; + + assert(ctl); + assert(prop); + + if (!canonic) { + dev.path = mnt_resolve_spec(spec, mntcache); + if (!dev.path) + return cannot_find(spec); + } else + dev.path = spec; + + priority = prop->priority; + + if (swapon_checks(ctl, &dev)) + return -1; + +#ifdef SWAP_FLAG_PREFER + if (priority >= 0) { + if (priority > SWAP_FLAG_PRIO_MASK) + priority = SWAP_FLAG_PRIO_MASK; + + flags = SWAP_FLAG_PREFER + | ((priority & SWAP_FLAG_PRIO_MASK) + << SWAP_FLAG_PRIO_SHIFT); + } +#endif + /* + * Validate the discard flags passed and set them + * accordingly before calling sys_swapon. + */ + if (prop->discard && !(prop->discard & ~SWAP_FLAGS_DISCARD_VALID)) { + /* + * If we get here with both discard policy flags set, + * we just need to tell the kernel to enable discards + * and it will do correctly, just as we expect. + */ + if ((prop->discard & SWAP_FLAG_DISCARD_ONCE) && + (prop->discard & SWAP_FLAG_DISCARD_PAGES)) + flags |= SWAP_FLAG_DISCARD; + else + flags |= prop->discard; + } + + if (ctl->verbose) + printf(_("swapon %s\n"), dev.path); + + status = swapon(dev.path, flags); + if (status < 0) + warn(_("%s: swapon failed"), dev.path); + + return status; +} + +static int swapon_by_label(struct swapon_ctl *ctl, const char *label) +{ + char *device = mnt_resolve_tag("LABEL", label, mntcache); + return device ? do_swapon(ctl, &ctl->props, device, TRUE) : cannot_find(label); +} + +static int swapon_by_uuid(struct swapon_ctl *ctl, const char *uuid) +{ + char *device = mnt_resolve_tag("UUID", uuid, mntcache); + return device ? do_swapon(ctl, &ctl->props, device, TRUE) : cannot_find(uuid); +} + +/* -o <options> or fstab */ +static int parse_options(struct swap_prop *props, const char *options) +{ + char *arg = NULL; + size_t argsz = 0; + + assert(props); + assert(options); + + if (mnt_optstr_get_option(options, "nofail", NULL, NULL) == 0) + props->no_fail = 1; + + if (mnt_optstr_get_option(options, "discard", &arg, &argsz) == 0) { + props->discard |= SWAP_FLAG_DISCARD; + + if (arg) { + /* only single-time discards are wanted */ + if (strncmp(arg, "once", argsz) == 0) + props->discard |= SWAP_FLAG_DISCARD_ONCE; + + /* do discard for every released swap page */ + if (strncmp(arg, "pages", argsz) == 0) + props->discard |= SWAP_FLAG_DISCARD_PAGES; + } + } + + arg = NULL; + if (mnt_optstr_get_option(options, "pri", &arg, &argsz) == 0 && arg) { + char *end = NULL; + int n; + + errno = 0; + n = (int) strtol(arg, &end, 10); + if (errno == 0 && end && end > arg) + props->priority = n; + } + return 0; +} + + +static int swapon_all(struct swapon_ctl *ctl) +{ + struct libmnt_table *tb = get_fstab(); + struct libmnt_iter *itr; + struct libmnt_fs *fs; + int status = 0; + + if (!tb) + err(EXIT_FAILURE, _("failed to parse %s"), mnt_get_fstab_path()); + + itr = mnt_new_iter(MNT_ITER_FORWARD); + if (!itr) + err(EXIT_FAILURE, _("failed to initialize libmount iterator")); + + while (mnt_table_find_next_fs(tb, itr, match_swap, NULL, &fs) == 0) { + /* defaults */ + const char *opts; + const char *device; + struct swap_prop prop; /* per device setting */ + + if (mnt_fs_get_option(fs, "noauto", NULL, NULL) == 0) { + if (ctl->verbose) + warnx(_("%s: noauto option -- ignored"), mnt_fs_get_source(fs)); + continue; + } + + /* default setting */ + prop = ctl->props; + + /* overwrite default by setting from fstab */ + opts = mnt_fs_get_options(fs); + if (opts) + parse_options(&prop, opts); + + /* convert LABEL=, UUID= etc. from fstab to device name */ + device = mnt_resolve_spec(mnt_fs_get_source(fs), mntcache); + if (!device) { + if (!prop.no_fail) + status |= cannot_find(mnt_fs_get_source(fs)); + continue; + } + + if (is_active_swap(device)) { + if (ctl->verbose) + warnx(_("%s: already active -- ignored"), device); + continue; + } + + if (prop.no_fail && access(device, R_OK) != 0) { + if (ctl->verbose) + warnx(_("%s: inaccessible -- ignored"), device); + continue; + } + + /* swapon */ + status |= do_swapon(ctl, &prop, device, TRUE); + } + + mnt_free_iter(itr); + return status; +} + + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + size_t i; + + fputs(USAGE_HEADER, out); + fprintf(out, _(" %s [options] [<spec>]\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Enable devices and files for paging and swapping.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -a, --all enable all swaps from /etc/fstab\n"), out); + fputs(_(" -d, --discard[=<policy>] enable swap discards, if supported by device\n"), out); + fputs(_(" -e, --ifexists silently skip devices that do not exist\n"), out); + fputs(_(" -f, --fixpgsz reinitialize the swap space if necessary\n"), out); + fputs(_(" -o, --options <list> comma-separated list of swap options\n"), out); + fputs(_(" -p, --priority <prio> specify the priority of the swap device\n"), out); + fputs(_(" -s, --summary display summary about used swap devices (DEPRECATED)\n"), out); + fputs(_(" --show[=<columns>] display summary in definable table\n"), out); + fputs(_(" --noheadings don't print table heading (with --show)\n"), out); + fputs(_(" --raw use the raw output format (with --show)\n"), out); + fputs(_(" --bytes display swap size in bytes in --show output\n"), out); + fputs(_(" -v, --verbose verbose mode\n"), out); + + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(26)); + + fputs(_("\nThe <spec> parameter:\n" \ + " -L <label> synonym for LABEL=<label>\n" + " -U <uuid> synonym for UUID=<uuid>\n" + " LABEL=<label> specifies device by swap area label\n" + " UUID=<uuid> specifies device by swap area UUID\n" + " PARTLABEL=<label> specifies device by partition label\n" + " PARTUUID=<uuid> specifies device by partition UUID\n" + " <device> name of device to be used\n" + " <file> name of file to be used\n"), out); + + fputs(_("\nAvailable discard policy types (for --discard):\n" + " once : only single-time area discards are issued\n" + " pages : freed pages are discarded before they are reused\n" + "If no policy is selected, both discard types are enabled (default).\n"), out); + + fputs(USAGE_COLUMNS, out); + for (i = 0; i < ARRAY_SIZE(infos); i++) + fprintf(out, " %-5s %s\n", infos[i].name, _(infos[i].help)); + + printf(USAGE_MAN_TAIL("swapon(8)")); + exit(EXIT_SUCCESS); +} + +int main(int argc, char *argv[]) +{ + int status = 0, c; + size_t i; + char *options = NULL; + + enum { + BYTES_OPTION = CHAR_MAX + 1, + NOHEADINGS_OPTION, + RAW_OPTION, + SHOW_OPTION, + OPT_LIST_TYPES + }; + + static const struct option long_opts[] = { + { "priority", required_argument, NULL, 'p' }, + { "discard", optional_argument, NULL, 'd' }, + { "ifexists", no_argument, NULL, 'e' }, + { "options", optional_argument, NULL, 'o' }, + { "summary", no_argument, NULL, 's' }, + { "fixpgsz", no_argument, NULL, 'f' }, + { "all", no_argument, NULL, 'a' }, + { "help", no_argument, NULL, 'h' }, + { "verbose", no_argument, NULL, 'v' }, + { "version", no_argument, NULL, 'V' }, + { "show", optional_argument, NULL, SHOW_OPTION }, + { "output-all", no_argument, NULL, OPT_LIST_TYPES }, + { "noheadings", no_argument, NULL, NOHEADINGS_OPTION }, + { "raw", no_argument, NULL, RAW_OPTION }, + { "bytes", no_argument, NULL, BYTES_OPTION }, + { NULL, 0, NULL, 0 } + }; + + static const ul_excl_t excl[] = { /* rows and cols in ASCII order */ + { 'a','o','s', SHOW_OPTION }, + { 'a','o', BYTES_OPTION }, + { 'a','o', NOHEADINGS_OPTION }, + { 'a','o', RAW_OPTION }, + { 0 } + }; + int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; + + struct swapon_ctl ctl; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + memset(&ctl, 0, sizeof(struct swapon_ctl)); + ctl.props.priority = -1; + + mnt_init_debug(0); + mntcache = mnt_new_cache(); + + while ((c = getopt_long(argc, argv, "ahd::efo:p:svVL:U:", + long_opts, NULL)) != -1) { + + err_exclusive_options(c, long_opts, excl, excl_st); + + switch (c) { + case 'a': /* all */ + ctl.all = 1; + break; + case 'o': + options = optarg; + break; + case 'p': /* priority */ + ctl.props.priority = strtos16_or_err(optarg, + _("failed to parse priority")); + break; + case 'L': + add_label(optarg); + break; + case 'U': + add_uuid(optarg); + break; + case 'd': + ctl.props.discard |= SWAP_FLAG_DISCARD; + if (optarg) { + if (*optarg == '=') + optarg++; + + if (strcmp(optarg, "once") == 0) + ctl.props.discard |= SWAP_FLAG_DISCARD_ONCE; + else if (strcmp(optarg, "pages") == 0) + ctl.props.discard |= SWAP_FLAG_DISCARD_PAGES; + else + errx(EXIT_FAILURE, _("unsupported discard policy: %s"), optarg); + } + break; + case 'e': /* ifexists */ + ctl.props.no_fail = 1; + break; + case 'f': + ctl.fix_page_size = 1; + break; + case 's': /* status report */ + status = display_summary(); + return status; + case 'v': /* be chatty */ + ctl.verbose = 1; + break; + case SHOW_OPTION: + if (optarg) { + ctl.ncolumns = string_to_idarray(optarg, + ctl.columns, + ARRAY_SIZE(ctl.columns), + column_name_to_id); + if (ctl.ncolumns < 0) + return EXIT_FAILURE; + } + ctl.show = 1; + break; + case OPT_LIST_TYPES: + for (ctl.ncolumns = 0; (size_t)ctl.ncolumns < ARRAY_SIZE(infos); ctl.ncolumns++) + ctl.columns[ctl.ncolumns] = ctl.ncolumns; + break; + case NOHEADINGS_OPTION: + ctl.no_heading = 1; + break; + case RAW_OPTION: + ctl.raw = 1; + break; + case BYTES_OPTION: + ctl.bytes = 1; + break; + case 0: + break; + + case 'h': /* help */ + usage(); + case 'V': /* version */ + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + } + argv += optind; + + if (ctl.show || (!ctl.all && !numof_labels() && !numof_uuids() && *argv == NULL)) { + if (!ctl.ncolumns) { + /* default columns */ + ctl.columns[ctl.ncolumns++] = COL_PATH; + ctl.columns[ctl.ncolumns++] = COL_TYPE; + ctl.columns[ctl.ncolumns++] = COL_SIZE; + ctl.columns[ctl.ncolumns++] = COL_USED; + ctl.columns[ctl.ncolumns++] = COL_PRIO; + } + status = show_table(&ctl); + return status; + } + + if (ctl.props.no_fail && !ctl.all) { + warnx(_("bad usage")); + errtryhelp(EXIT_FAILURE); + } + + if (ctl.all) + status |= swapon_all(&ctl); + + if (options) + parse_options(&ctl.props, options); + + for (i = 0; i < numof_labels(); i++) + status |= swapon_by_label(&ctl, get_label(i)); + + for (i = 0; i < numof_uuids(); i++) + status |= swapon_by_uuid(&ctl, get_uuid(i)); + + while (*argv != NULL) + status |= do_swapon(&ctl, &ctl.props, *argv++, FALSE); + + free_tables(); + mnt_unref_cache(mntcache); + + return status; +} diff --git a/sys-utils/switch_root.8 b/sys-utils/switch_root.8 new file mode 100644 index 0000000..e295312 --- /dev/null +++ b/sys-utils/switch_root.8 @@ -0,0 +1,85 @@ +'\" t +.\" Title: switch_root +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-08-02 +.\" Manual: System Administration +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "SWITCH_ROOT" "8" "2022-08-02" "util\-linux 2.38.1" "System Administration" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +switch_root \- switch to another filesystem as the root of the mount tree +.SH "SYNOPSIS" +.sp +\fBswitch_root\fP [\fB\-hV\fP] +.sp +\fBswitch_root\fP \fInewroot init\fP [\fIarg\fP...] +.SH "DESCRIPTION" +.sp +\fBswitch_root\fP moves already mounted \fI/proc\fP, \fI/dev\fP, \fI/sys\fP and \fI/run\fP to \fInewroot\fP and makes \fInewroot\fP the new root filesystem and starts \fIinit\fP process. +.sp +\fBWARNING: switch_root removes recursively all files and directories on the current root filesystem.\fP +.SH "OPTIONS" +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "EXIT STATUS" +.sp +\fBswitch_root\fP returns 1 on failure, it never returns on success. +.SH "NOTES" +.sp +\fBswitch_root\fP will fail to function if \fInewroot\fP is not the root of a mount. If you want to switch root into a directory that does not meet this requirement then you can first use a bind\-mounting trick to turn any directory into a mount point: +.sp +.if n .RS 4 +.nf +.fam C +mount \-\-bind $DIR $DIR +.fam +.fi +.if n .RE +.SH "AUTHORS" +.sp +.MTO "pjones\(atredhat.com" "Peter Jones" "," +.MTO "katzj\(atredhat.com" "Jeremy Katz" "," +.MTO "kzak\(atredhat.com" "Karel Zak" "" +.SH "SEE ALSO" +.sp +\fBchroot\fP(2), +\fBinit\fP(8), +\fBmkinitrd\fP(8), +\fBmount\fP(8) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBswitch_root\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/switch_root.8.adoc b/sys-utils/switch_root.8.adoc new file mode 100644 index 0000000..fdeedf3 --- /dev/null +++ b/sys-utils/switch_root.8.adoc @@ -0,0 +1,60 @@ +//po4a: entry man manual += switch_root(8) +:doctype: manpage +:man manual: System Administration +:man source: util-linux {release-version} +:page-layout: base +:command: switch_root + +== NAME + +switch_root - switch to another filesystem as the root of the mount tree + +== SYNOPSIS + +*switch_root* [*-hV*] + +*switch_root* _newroot init_ [_arg_...] + +== DESCRIPTION + +*switch_root* moves already mounted _/proc_, _/dev_, _/sys_ and _/run_ to _newroot_ and makes _newroot_ the new root filesystem and starts _init_ process. + +*WARNING: switch_root removes recursively all files and directories on the current root filesystem.* + +== OPTIONS + +include::man-common/help-version.adoc[] + +== EXIT STATUS + +*switch_root* returns 1 on failure, it never returns on success. + +== NOTES + +*switch_root* will fail to function if _newroot_ is not the root of a mount. If you want to switch root into a directory that does not meet this requirement then you can first use a bind-mounting trick to turn any directory into a mount point: + +.... +mount --bind $DIR $DIR +.... + +== AUTHORS + +mailto:pjones@redhat.com[Peter Jones], +mailto:katzj@redhat.com[Jeremy Katz], +mailto:kzak@redhat.com[Karel Zak] + +== SEE ALSO + +*chroot*(2), +*init*(8), +*mkinitrd*(8), +*mount*(8) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/switch_root.c b/sys-utils/switch_root.c new file mode 100644 index 0000000..174eec6 --- /dev/null +++ b/sys-utils/switch_root.c @@ -0,0 +1,283 @@ +/* + * switchroot.c - switch to new root directory and start init. + * + * Copyright 2002-2009 Red Hat, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * Authors: + * Peter Jones <pjones@redhat.com> + * Jeremy Katz <katzj@redhat.com> + */ +#include <sys/mount.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/statfs.h> +#include <sys/param.h> +#include <fcntl.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <ctype.h> +#include <dirent.h> +#include <getopt.h> + +#include "c.h" +#include "nls.h" +#include "closestream.h" +#include "statfs_magic.h" + +#ifndef MS_MOVE +#define MS_MOVE 8192 +#endif + +#ifndef MNT_DETACH +#define MNT_DETACH 0x00000002 /* Just detach from the tree */ +#endif + +/* remove all files/directories below dirName -- don't cross mountpoints */ +static int recursiveRemove(int fd) +{ + struct stat rb; + DIR *dir; + int rc = -1; + int dfd; + + if (!(dir = fdopendir(fd))) { + warn(_("failed to open directory")); + goto done; + } + + /* fdopendir() precludes us from continuing to use the input fd */ + dfd = dirfd(dir); + if (fstat(dfd, &rb)) { + warn(_("stat failed")); + goto done; + } + + while(1) { + struct dirent *d; + int isdir = 0; + + errno = 0; + if (!(d = readdir(dir))) { + if (errno) { + warn(_("failed to read directory")); + goto done; + } + break; /* end of directory */ + } + + if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, "..")) + continue; +#ifdef _DIRENT_HAVE_D_TYPE + if (d->d_type == DT_DIR || d->d_type == DT_UNKNOWN) +#endif + { + struct stat sb; + + if (fstatat(dfd, d->d_name, &sb, AT_SYMLINK_NOFOLLOW)) { + warn(_("stat of %s failed"), d->d_name); + continue; + } + + /* skip if device is not the same */ + if (sb.st_dev != rb.st_dev) + continue; + + /* remove subdirectories */ + if (S_ISDIR(sb.st_mode)) { + int cfd; + + cfd = openat(dfd, d->d_name, O_RDONLY); + if (cfd >= 0) + recursiveRemove(cfd); /* it closes cfd too */ + isdir = 1; + } + } + + if (unlinkat(dfd, d->d_name, isdir ? AT_REMOVEDIR : 0)) + warn(_("failed to unlink %s"), d->d_name); + } + + rc = 0; /* success */ +done: + if (dir) + closedir(dir); + else + close(fd); + return rc; +} + +static int switchroot(const char *newroot) +{ + /* Don't try to unmount the old "/", there's no way to do it. */ + const char *umounts[] = { "/dev", "/proc", "/sys", "/run", NULL }; + int i; + int cfd = -1; + struct stat newroot_stat, oldroot_stat, sb; + + if (stat("/", &oldroot_stat) != 0) { + warn(_("stat of %s failed"), "/"); + return -1; + } + + if (stat(newroot, &newroot_stat) != 0) { + warn(_("stat of %s failed"), newroot); + return -1; + } + + for (i = 0; umounts[i] != NULL; i++) { + char newmount[PATH_MAX]; + + snprintf(newmount, sizeof(newmount), "%s%s", newroot, umounts[i]); + + if ((stat(umounts[i], &sb) == 0) && sb.st_dev == oldroot_stat.st_dev) { + /* mount point to move seems to be a normal directory or stat failed */ + continue; + } + + if ((stat(newmount, &sb) != 0) || (sb.st_dev != newroot_stat.st_dev)) { + /* mount point seems to be mounted already or stat failed */ + umount2(umounts[i], MNT_DETACH); + continue; + } + + if (mount(umounts[i], newmount, NULL, MS_MOVE, NULL) < 0) { + warn(_("failed to mount moving %s to %s"), + umounts[i], newmount); + warnx(_("forcing unmount of %s"), umounts[i]); + umount2(umounts[i], MNT_FORCE); + } + } + + if (chdir(newroot)) { + warn(_("failed to change directory to %s"), newroot); + return -1; + } + + cfd = open("/", O_RDONLY); + if (cfd < 0) { + warn(_("cannot open %s"), "/"); + goto fail; + } + + if (mount(newroot, "/", NULL, MS_MOVE, NULL) < 0) { + warn(_("failed to mount moving %s to /"), newroot); + goto fail; + } + + if (chroot(".")) { + warn(_("failed to change root")); + goto fail; + } + + if (chdir("/")) { + warn(_("cannot change directory to %s"), "/"); + goto fail; + } + + switch (fork()) { + case 0: /* child */ + { + struct statfs stfs; + + if (fstatfs(cfd, &stfs) == 0 && + (F_TYPE_EQUAL(stfs.f_type, STATFS_RAMFS_MAGIC) || + F_TYPE_EQUAL(stfs.f_type, STATFS_TMPFS_MAGIC))) + recursiveRemove(cfd); + else { + warn(_("old root filesystem is not an initramfs")); + close(cfd); + } + exit(EXIT_SUCCESS); + } + case -1: /* error */ + break; + + default: /* parent */ + close(cfd); + return 0; + } + +fail: + if (cfd >= 0) + close(cfd); + return -1; +} + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *output = stdout; + fputs(USAGE_HEADER, output); + fprintf(output, _(" %s [options] <newrootdir> <init> <args to init>\n"), + program_invocation_short_name); + + fputs(USAGE_SEPARATOR, output); + fputs(_("Switch to another filesystem as the root of the mount tree.\n"), output); + + fputs(USAGE_OPTIONS, output); + printf(USAGE_HELP_OPTIONS(16)); + printf(USAGE_MAN_TAIL("switch_root(8)")); + + exit(EXIT_SUCCESS); +} + +int main(int argc, char *argv[]) +{ + char *newroot, *init, **initargs; + int c; + static const struct option longopts[] = { + {"version", no_argument, NULL, 'V'}, + {"help", no_argument, NULL, 'h'}, + {NULL, 0, NULL, 0} + }; + + close_stdout_atexit(); + + while ((c = getopt_long(argc, argv, "+Vh", longopts, NULL)) != -1) + switch (c) { + case 'V': + print_version(EXIT_SUCCESS); + case 'h': + usage(); + default: + errtryhelp(EXIT_FAILURE); + } + if (argc < 3) { + warnx(_("not enough arguments")); + errtryhelp(EXIT_FAILURE); + } + + newroot = argv[1]; + init = argv[2]; + initargs = &argv[2]; + + if (!*newroot || !*init) { + warnx(_("bad usage")); + errtryhelp(EXIT_FAILURE); + } + + if (switchroot(newroot)) + errx(EXIT_FAILURE, _("failed. Sorry.")); + + if (access(init, X_OK)) + warn(_("cannot access %s"), init); + + execv(init, initargs); + errexec(init); +} + diff --git a/sys-utils/tunelp.8 b/sys-utils/tunelp.8 new file mode 100644 index 0000000..48f106b --- /dev/null +++ b/sys-utils/tunelp.8 @@ -0,0 +1,111 @@ +'\" t +.\" Title: tunelp +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: System Administration +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "TUNELP" "8" "2022-05-11" "util\-linux 2.38.1" "System Administration" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +tunelp \- set various parameters for the lp device +.SH "SYNOPSIS" +.sp +\fBtunelp\fP [options] \fIdevice\fP +.SH "DESCRIPTION" +.sp +\fBtunelp\fP sets several parameters for the /dev/lp\fI?\fP devices, for better performance (or for any performance at all, if your printer won\(cqt work without it...) Without parameters, it tells whether the device is using interrupts, and if so, which one. With parameters, it sets the device characteristics accordingly. +.SH "OPTIONS" +.sp +\fB\-i\fP, \fB\-\-irq\fP \fIargument\fP +.RS 4 +specifies the IRQ to use for the parallel port in question. If this is set to something non\-zero, \fB\-t\fP and \fB\-c\fP have no effect. If your port does not use interrupts, this option will make printing stop. The command \fBtunelp \-i 0\fP restores non\-interrupt driven (polling) action, and your printer should work again. If your parallel port does support interrupts, interrupt\-driven printing should be somewhat faster and efficient, and will probably be desirable. +.sp +\fBNOTE\fP: This option will have no effect with kernel 2.1.131 or later since the irq is handled by the parport driver. You can change the parport irq for example via \fI/proc/parport/*/irq\fP. Read \fI/usr/src/linux/Documentation/admin\-guide/parport.rst\fP for more details on parport. +.RE +.sp +\fB\-t\fP, \fB\-\-time\fP \fImilliseconds\fP +.RS 4 +is the amount of time in jiffies that the driver waits if the printer doesn\(cqt take a character for the number of tries dictated by the \fB\-c\fP parameter. 10 is the default value. If you want fastest possible printing, and don\(cqt care about system load, you may set this to 0. If you don\(cqt care how fast your printer goes, or are printing text on a slow printer with a buffer, then 500 (5 seconds) should be fine, and will give you very low system load. This value generally should be lower for printing graphics than text, by a factor of approximately 10, for best performance. +.RE +.sp +\fB\-c\fP, \fB\-\-chars\fP \fIcharacters\fP +.RS 4 +is the number of times to try to output a character to the printer before sleeping for \fB\-t\fP \fITIME\fP. It is the number of times around a loop that tries to send a character to the printer. 120 appears to be a good value for most printers in polling mode. 1000 is the default, because there are some printers that become jerky otherwise, but you \fImust\fP set this to \(aq1\(aq to handle the maximal CPU efficiency if you are using interrupts. If you have a very fast printer, a value of 10 might make more sense even if in polling mode. If you have a \fIreally\fP old printer, you can increase this further. +.sp +Setting \fB\-t\fP \fITIME\fP to 0 is equivalent to setting \fB\-c\fP \fICHARS\fP to infinity. +.RE +.sp +\fB\-w\fP, \fB\-\-wait\fP \fImilliseconds\fP +.RS 4 +is the number of usec we wait while playing with the strobe signal. While most printers appear to be able to deal with an extremely short strobe, some printers demand a longer one. Increasing this from the default 1 may make it possible to print with those printers. This may also make it possible to use longer cables. It\(cqs also possible to decrease this value to 0 if your printer is fast enough or your machine is slow enough. +.RE +.sp +\fB\-a\fP, \fB\-\-abort\fP \fI<on|off>\fP +.RS 4 +This is whether to abort on printer error \- the default is not to. If you are sitting at your computer, you probably want to be able to see an error and fix it, and have the printer go on printing. On the other hand, if you aren\(cqt, you might rather that your printer spooler find out that the printer isn\(cqt ready, quit trying, and send you mail about it. The choice is yours. +.RE +.sp +\fB\-o\fP, \fB\-\-check\-status\fP \fI<on|off>\fP +.RS 4 +This option is much like \fB\-a\fP. It makes any \fBopen\fP(2) of this device check to see that the device is on\-line and not reporting any out of paper or other errors. This is the correct setting for most versions of \fBlpd\fP. +.RE +.sp +\fB\-C\fP, \fB\-\-careful\fP \fI<on|off>\fP +.RS 4 +This option adds extra ("careful") error checking. When this option is on, the printer driver will ensure that the printer is on\-line and not reporting any out of paper or other errors before sending data. This is particularly useful for printers that normally appear to accept data when turned off. +.sp +\fBNOTE\fP: This option is obsolete because it\(cqs the default in 2.1.131 kernel or later. +.RE +.sp +\fB\-s\fP, \fB\-\-status\fP +.RS 4 +This option returns the current printer status, both as a decimal number from 0..255, and as a list of active flags. When this option is specified, \fB\-q\fP off, turning off the display of the current IRQ, is implied. +.RE +.sp +\fB\-r\fP, \fB\-\-reset\fP +.RS 4 +This option resets the port. It requires a Linux kernel version of 1.1.80 or later. +.RE +.sp +\fB\-q\fP, \fB\-\-print\-irq\fP \fI<on|off>\fP +.RS 4 +This option sets printing the display of the current IRQ setting. +.RE +.SH "FILES" +.sp +\fI/dev/lp?\fP, +\fI/proc/parport/*/*\fP +.SH "NOTES" +.sp +\fB\-o\fP, \fB\-C\fP, and \fB\-s\fP all require a Linux kernel version of 1.1.76 or later. +.sp +\fB\-C\fP requires a Linux version prior to 2.1.131. +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBtunelp\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/tunelp.8.adoc b/sys-utils/tunelp.8.adoc new file mode 100644 index 0000000..37140b3 --- /dev/null +++ b/sys-utils/tunelp.8.adoc @@ -0,0 +1,76 @@ +//po4a: entry man manual += tunelp(8) +:doctype: manpage +:man manual: System Administration +:man source: util-linux {release-version} +:page-layout: base +:command: tunelp + +== NAME + +tunelp - set various parameters for the lp device + +== SYNOPSIS + +*tunelp* [options] _device_ + +== DESCRIPTION + +*tunelp* sets several parameters for the /dev/lp__?__ devices, for better performance (or for any performance at all, if your printer won't work without it...) Without parameters, it tells whether the device is using interrupts, and if so, which one. With parameters, it sets the device characteristics accordingly. + +== OPTIONS + +*-i*, *--irq* _argument_:: +specifies the IRQ to use for the parallel port in question. If this is set to something non-zero, *-t* and *-c* have no effect. If your port does not use interrupts, this option will make printing stop. The command *tunelp -i 0* restores non-interrupt driven (polling) action, and your printer should work again. If your parallel port does support interrupts, interrupt-driven printing should be somewhat faster and efficient, and will probably be desirable. ++ +*NOTE*: This option will have no effect with kernel 2.1.131 or later since the irq is handled by the parport driver. You can change the parport irq for example via _/proc/parport/*/irq_. Read _/usr/src/linux/Documentation/admin-guide/parport.rst_ for more details on parport. + +*-t*, *--time* _milliseconds_:: +is the amount of time in jiffies that the driver waits if the printer doesn't take a character for the number of tries dictated by the *-c* parameter. 10 is the default value. If you want fastest possible printing, and don't care about system load, you may set this to 0. If you don't care how fast your printer goes, or are printing text on a slow printer with a buffer, then 500 (5 seconds) should be fine, and will give you very low system load. This value generally should be lower for printing graphics than text, by a factor of approximately 10, for best performance. + +*-c*, *--chars* _characters_:: +is the number of times to try to output a character to the printer before sleeping for *-t* _TIME_. It is the number of times around a loop that tries to send a character to the printer. 120 appears to be a good value for most printers in polling mode. 1000 is the default, because there are some printers that become jerky otherwise, but you _must_ set this to '1' to handle the maximal CPU efficiency if you are using interrupts. If you have a very fast printer, a value of 10 might make more sense even if in polling mode. If you have a _really_ old printer, you can increase this further. ++ +Setting *-t* _TIME_ to 0 is equivalent to setting *-c* _CHARS_ to infinity. + +*-w*, *--wait* _milliseconds_:: +is the number of usec we wait while playing with the strobe signal. While most printers appear to be able to deal with an extremely short strobe, some printers demand a longer one. Increasing this from the default 1 may make it possible to print with those printers. This may also make it possible to use longer cables. It's also possible to decrease this value to 0 if your printer is fast enough or your machine is slow enough. + +*-a*, *--abort* _<on|off>_:: +This is whether to abort on printer error - the default is not to. If you are sitting at your computer, you probably want to be able to see an error and fix it, and have the printer go on printing. On the other hand, if you aren't, you might rather that your printer spooler find out that the printer isn't ready, quit trying, and send you mail about it. The choice is yours. + +*-o*, *--check-status* _<on|off>_:: +This option is much like *-a*. It makes any *open*(2) of this device check to see that the device is on-line and not reporting any out of paper or other errors. This is the correct setting for most versions of *lpd*. + +*-C*, *--careful* _<on|off>_:: +This option adds extra ("careful") error checking. When this option is on, the printer driver will ensure that the printer is on-line and not reporting any out of paper or other errors before sending data. This is particularly useful for printers that normally appear to accept data when turned off. ++ +*NOTE*: This option is obsolete because it's the default in 2.1.131 kernel or later. + +*-s*, *--status*:: +This option returns the current printer status, both as a decimal number from 0..255, and as a list of active flags. When this option is specified, *-q* off, turning off the display of the current IRQ, is implied. + +*-r*, *--reset*:: +This option resets the port. It requires a Linux kernel version of 1.1.80 or later. + +*-q*, *--print-irq* _<on|off>_:: +This option sets printing the display of the current IRQ setting. + +== FILES + +_/dev/lp?_, +_/proc/parport/*/*_ + +== NOTES + +*-o*, *-C*, and *-s* all require a Linux kernel version of 1.1.76 or later. + +*-C* requires a Linux version prior to 2.1.131. + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/tunelp.c b/sys-utils/tunelp.c new file mode 100644 index 0000000..731acd1 --- /dev/null +++ b/sys-utils/tunelp.c @@ -0,0 +1,319 @@ +/* + * Copyright (C) 1992-1997 Michael K. Johnson, johnsonm@redhat.com + * + * This file is licensed under the terms of the GNU General Public + * License, version 2, or any later version. See file COPYING for + * information on distribution conditions. + */ + +/* + * This command is deprecated. The utility is in maintenance mode, + * meaning we keep them in source tree for backward compatibility + * only. Do not waste time making this command better, unless the + * fix is about security or other very critical issue. + * + * See Documentation/deprecated.txt for more information. + */ + +/* + * $Log: tunelp.c,v $ + * Revision 1.9 1998/06/08 19:37:11 janl + * Thus compiles tunelp with 2.1.103 kernels + * + * Revision 1.8 1997/07/06 00:14:06 aebr + * Fixes to silence -Wall. + * + * Revision 1.7 1997/06/20 16:10:38 janl + * tunelp refreshed from authors archive. + * + * Revision 1.9 1997/06/20 12:56:43 johnsonm + * Finished fixing license terms. + * + * Revision 1.8 1997/06/20 12:34:59 johnsonm + * Fixed copyright and license. + * + * Revision 1.7 1995/03/29 11:16:23 johnsonm + * TYPO fixed... + * + * Revision 1.6 1995/03/29 11:12:15 johnsonm + * Added third argument to ioctl needed with new kernels + * + * Revision 1.5 1995/01/13 10:33:43 johnsonm + * Chris's changes for new ioctl numbers and backwards compatibility + * and the reset ioctl. + * + * Revision 1.4 1995/01/03 17:42:14 johnsonm + * -s isn't supposed to take an argument; removed : after s in getopt... + * + * Revision 1.3 1995/01/03 07:36:49 johnsonm + * Fixed typo + * + * Revision 1.2 1995/01/03 07:33:44 johnsonm + * revisions for lp driver updates in Linux 1.1.76 + * + * 1999-02-22 Arkadiusz Miśkiewicz <misiek@pld.ORG.PL> + * - added Native Language Support + * + * 1999-05-07 Merged LPTRUSTIRQ patch by Andrea Arcangeli (1998/11/29), aeb + * + */ + +#include <errno.h> +#include <fcntl.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <unistd.h> + +#include <linux/lp.h> + +#include "nls.h" +#include "closestream.h" +#include "strutils.h" + +#define EXIT_LP_MALLOC 2 +#define EXIT_LP_BADVAL 3 +#define EXIT_LP_IO_ERR 4 + +#define XALLOC_EXIT_CODE EXIT_LP_MALLOC +#include "xalloc.h" + +struct command { + long op; + long val; + struct command *next; +}; + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + fputs(USAGE_HEADER, out); + fprintf(out, _(" %s [options] <device>\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Set various parameters for the line printer.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -i, --irq <num> specify parallel port irq\n"), out); + fputs(_(" -t, --time <ms> driver wait time in milliseconds\n"), out); + fputs(_(" -c, --chars <num> number of output characters before sleep\n"), out); + fputs(_(" -w, --wait <us> strobe wait in micro seconds\n"), out); + /* TRANSLATORS: do not translate <on|off> arguments. The + argument reader does not recognize locale, unless `on' is + exactly that very same string. */ + fputs(_(" -a, --abort <on|off> abort on error\n"), out); + fputs(_(" -o, --check-status <on|off> check printer status before printing\n"), out); + fputs(_(" -C, --careful <on|off> extra checking to status check\n"), out); + fputs(_(" -s, --status query printer status\n"), out); + fputs(_(" -r, --reset reset the port\n"), out); + fputs(_(" -q, --print-irq <on|off> display current irq setting\n"), out); + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(30)); + printf(USAGE_MAN_TAIL("tunelp(8)")); + + exit(EXIT_SUCCESS); +} + +int main(int argc, char **argv) +{ + int c, fd, irq, status, show_irq, offset = 0, retval; + char *filename; + struct stat statbuf; + struct command *cmds, *cmdst; + static const struct option longopts[] = { + {"irq", required_argument, NULL, 'i'}, + {"time", required_argument, NULL, 't'}, + {"chars", required_argument, NULL, 'c'}, + {"wait", required_argument, NULL, 'w'}, + {"abort", required_argument, NULL, 'a'}, + {"check-status", required_argument, NULL, 'o'}, + {"careful", required_argument, NULL, 'C'}, + {"status", no_argument, NULL, 's'}, + {"trust-irq", required_argument, NULL, 'T'}, + {"reset", no_argument, NULL, 'r'}, + {"print-irq", required_argument, NULL, 'q'}, + {"version", no_argument, NULL, 'V'}, + {"help", no_argument, NULL, 'h'}, + {NULL, 0, NULL, 0} + }; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + strutils_set_exitcode(EXIT_LP_BADVAL); + + if (argc < 2) { + warnx(_("not enough arguments")); + errtryhelp(EXIT_FAILURE); + } + + cmdst = cmds = xmalloc(sizeof(struct command)); + cmds->next = NULL; + + show_irq = 1; + while ((c = getopt_long(argc, argv, "t:c:w:a:i:ho:C:sq:rT:vV", longopts, NULL)) != -1) { + switch (c) { + case 'i': + cmds->op = LPSETIRQ; + cmds->val = strtol_or_err(optarg, _("argument error")); + cmds->next = xmalloc(sizeof(struct command)); + cmds = cmds->next; + cmds->next = NULL; + break; + case 't': + cmds->op = LPTIME; + cmds->val = strtol_or_err(optarg, _("argument error")); + cmds->next = xmalloc(sizeof(struct command)); + cmds = cmds->next; + cmds->next = NULL; + break; + case 'c': + cmds->op = LPCHAR; + cmds->val = strtol_or_err(optarg, _("argument error")); + cmds->next = xmalloc(sizeof(struct command)); + cmds = cmds->next; + cmds->next = NULL; + break; + case 'w': + cmds->op = LPWAIT; + cmds->val = strtol_or_err(optarg, _("argument error")); + cmds->next = xmalloc(sizeof(struct command)); + cmds = cmds->next; + cmds->next = NULL; + break; + case 'a': + cmds->op = LPABORT; + cmds->val = parse_switch(optarg, _("argument error"), "on", "off", NULL); + cmds->next = xmalloc(sizeof(struct command)); + cmds = cmds->next; + cmds->next = NULL; + break; + case 'q': + show_irq = parse_switch(optarg, _("argument error"), "on", "off", NULL); + break; + case 'o': + cmds->op = LPABORTOPEN; + cmds->val = parse_switch(optarg, _("argument error"), "on", "off", NULL); + cmds->next = xmalloc(sizeof(struct command)); + cmds = cmds->next; + cmds->next = NULL; + break; + case 'C': + cmds->op = LPCAREFUL; + cmds->val = parse_switch(optarg, _("argument error"), "on", "off", NULL); + cmds->next = xmalloc(sizeof(struct command)); + cmds = cmds->next; + cmds->next = NULL; + break; + case 's': + show_irq = 0; + cmds->op = LPGETSTATUS; + cmds->val = 0; + cmds->next = xmalloc(sizeof(struct command)); + cmds = cmds->next; + cmds->next = NULL; + break; + case 'r': + cmds->op = LPRESET; + cmds->val = 0; + cmds->next = xmalloc(sizeof(struct command)); + cmds = cmds->next; + cmds->next = NULL; + break; + + case 'h': + usage(); + case 'v': + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if (optind != argc - 1) { + warnx(_("no device specified")); + errtryhelp(EXIT_FAILURE); + } + + filename = xstrdup(argv[optind]); + fd = open(filename, O_WRONLY | O_NONBLOCK, 0); + /* Need to open O_NONBLOCK in case ABORTOPEN is already set + * and printer is off or off-line or in an error condition. + * Otherwise we would abort... + */ + if (fd < 0) + err(EXIT_FAILURE, "%s", filename); + + if (fstat(fd, &statbuf)) + err(EXIT_FAILURE, "%s: stat() failed", filename); + + if (!S_ISCHR(statbuf.st_mode)) { + warnx(_("%s not an lp device"), filename); + errtryhelp(EXIT_FAILURE); + } + /* Allow for binaries compiled under a new kernel to work on + * the old ones The irq argument to ioctl isn't touched by + * the old kernels, but we don't want to cause the kernel to + * complain if we are using a new kernel + */ + if (LPGETIRQ >= 0x0600 && ioctl(fd, LPGETIRQ, &irq) < 0 + && errno == EINVAL) + /* We don't understand the new ioctls */ + offset = 0x0600; + + cmds = cmdst; + while (cmds->next) { + if (cmds->op == LPGETSTATUS) { + status = 0xdeadbeef; + retval = ioctl(fd, LPGETSTATUS - offset, &status); + if (retval < 0) + warnx(_("LPGETSTATUS error")); + else { + if (status == (int)0xdeadbeef) + /* a few 1.1.7x kernels will do this */ + status = retval; + printf(_("%s status is %d"), filename, status); + if (!(status & LP_PBUSY)) + printf(_(", busy")); + if (!(status & LP_PACK)) + printf(_(", ready")); + if ((status & LP_POUTPA)) + printf(_(", out of paper")); + if ((status & LP_PSELECD)) + printf(_(", on-line")); + if (!(status & LP_PERRORP)) + printf(_(", error")); + printf("\n"); + } + } else if (ioctl(fd, cmds->op - offset, cmds->val) < 0) + warn(_("ioctl failed")); + cmdst = cmds; + cmds = cmds->next; + free(cmdst); + } + + if (show_irq) { + irq = 0xdeadbeef; + retval = ioctl(fd, LPGETIRQ - offset, &irq); + if (retval == -1) + err(EXIT_LP_IO_ERR, _("LPGETIRQ error")); + if (irq == (int)0xdeadbeef) + /* up to 1.1.77 will do this */ + irq = retval; + if (irq) + printf(_("%s using IRQ %d\n"), filename, irq); + else + printf(_("%s using polling\n"), filename); + } + free(filename); + close(fd); + + return EXIT_SUCCESS; +} diff --git a/sys-utils/umount.8 b/sys-utils/umount.8 new file mode 100644 index 0000000..9bdb603 --- /dev/null +++ b/sys-utils/umount.8 @@ -0,0 +1,232 @@ +'\" t +.\" Title: umount +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-08-04 +.\" Manual: System Administration +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "UMOUNT" "8" "2022-08-04" "util\-linux 2.38.1" "System Administration" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +umount \- unmount filesystems +.SH "SYNOPSIS" +.sp +\fBumount\fP \fB\-a\fP [\fB\-dflnrv\fP] [\fB\-t\fP \fIfstype\fP] [\fB\-O\fP \fIoption\fP...] +.sp +\fBumount\fP [\fB\-dflnrv\fP] {\fIdirectory\fP|\fIdevice\fP} +.sp +\fBumount\fP \fB\-h\fP|\fB\-V\fP +.SH "DESCRIPTION" +.sp +The \fBumount\fP command detaches the mentioned filesystem(s) from the file hierarchy. A filesystem is specified by giving the directory where it has been mounted. Giving the special device on which the filesystem lives may also work, but is obsolete, mainly because it will fail in case this device was mounted on more than one directory. +.sp +Note that a filesystem cannot be unmounted when it is \(aqbusy\(aq \- for example, when there are open files on it, or when some process has its working directory there, or when a swap file on it is in use. The offending process could even be \fBumount\fP itself \- it opens libc, and libc in its turn may open for example locale files. A lazy unmount avoids this problem, but it may introduce other issues. See \fB\-\-lazy\fP description below. +.SH "OPTIONS" +.sp +\fB\-a\fP, \fB\-\-all\fP +.RS 4 +All of the filesystems described in \fI/proc/self/mountinfo\fP (or in deprecated \fI/etc/mtab\fP) are unmounted, except the proc, devfs, devpts, sysfs, rpc_pipefs and nfsd filesystems. This list of the filesystems may be replaced by \fB\-\-types\fP umount option. +.RE +.sp +\fB\-A\fP, \fB\-\-all\-targets\fP +.RS 4 +Unmount all mountpoints in the current mount namespace for the specified filesystem. The filesystem can be specified by one of the mountpoints or the device name (or UUID, etc.). When this option is used together with \fB\-\-recursive\fP, then all nested mounts within the filesystem are recursively unmounted. This option is only supported on systems where \fI/etc/mtab\fP is a symlink to \fI/proc/mounts\fP. +.RE +.sp +\fB\-c\fP, \fB\-\-no\-canonicalize\fP +.RS 4 +Do not canonicalize paths. The paths canonicalization is based on \fBstat\fP(2) and \fBreadlink\fP(2) system calls. These system calls may hang in some cases (for example on NFS if server is not available). The option has to be used with canonical path to the mount point. +.sp +This option is silently ignored by \fBumount\fP for non\-root users. +.sp +For more details about this option see the \fBmount\fP(8) man page. Note that \fBumount\fP does not pass this option to the \fB/sbin/umount.\fP\fItype\fP helpers. +.RE +.sp +\fB\-d\fP, \fB\-\-detach\-loop\fP +.RS 4 +When the unmounted device was a loop device, also free this loop device. This option is unnecessary for devices initialized by \fBmount\fP(8), in this case "autoclear" functionality is enabled by default. +.RE +.sp +\fB\-\-fake\fP +.RS 4 +Causes everything to be done except for the actual system call or umount helper execution; this \(aqfakes\(aq unmounting the filesystem. It can be used to remove entries from the deprecated \fI/etc/mtab\fP that were unmounted earlier with the \fB\-n\fP option. +.RE +.sp +\fB\-f\fP, \fB\-\-force\fP +.RS 4 +Force an unmount (in case of an unreachable NFS system). +.sp +Note that this option does not guarantee that umount command does not hang. It\(cqs strongly recommended to use absolute paths without symlinks to avoid unwanted \fBreadlink\fP(2) and \fBstat\fP(2) system calls on unreachable NFS in \fBumount\fP. +.RE +.sp +\fB\-i\fP, \fB\-\-internal\-only\fP +.RS 4 +Do not call the \fB/sbin/umount.\fP\fIfilesystem\fP helper even if it exists. By default such a helper program is called if it exists. +.RE +.sp +\fB\-l\fP, \fB\-\-lazy\fP +.RS 4 +Lazy unmount. Detach the filesystem from the file hierarchy now, and clean up all references to this filesystem as soon as it is not busy anymore. +.sp +A system reboot would be expected in near future if you\(cqre going to use this option for network filesystem or local filesystem with submounts. The recommended use\-case for \fBumount \-l\fP is to prevent hangs on shutdown due to an unreachable network share where a normal \fBumount\fP will hang due to a downed server or a network partition. Remounts of the share will not be possible. +.RE +.sp +\fB\-N\fP, \fB\-\-namespace\fP \fIns\fP +.RS 4 +Perform \fBumount\fP in the mount namespace specified by \fIns\fP. \fIns\fP is either PID of process running in that namespace or special file representing that namespace. +.sp +\fBumount\fP switches to the namespace when it reads \fI/etc/fstab\fP, writes \fI/etc/mtab\fP (or writes to \fI/run/mount\fP) and calls \fBumount\fP(2) system call, otherwise it runs in the original namespace. It means that the target mount namespace does not have to contain any libraries or other requirements necessary to execute \fBumount\fP(2) command. +.sp +See \fBmount_namespaces\fP(7) for more information. +.RE +.sp +\fB\-n\fP, \fB\-\-no\-mtab\fP +.RS 4 +Unmount without writing in \fI/etc/mtab\fP. +.RE +.sp +\fB\-O\fP, \fB\-\-test\-opts\fP \fIoption\fP... +.RS 4 +Unmount only the filesystems that have the specified option set in \fI/etc/fstab\fP. More than one option may be specified in a comma\-separated list. Each option can be prefixed with \fBno\fP to indicate that no action should be taken for this option. +.RE +.sp +\fB\-q\fP, \fB\-\-quiet\fP +.RS 4 +Suppress "not mounted" error messages. +.RE +.sp +\fB\-R\fP, \fB\-\-recursive\fP +.RS 4 +Recursively unmount each specified directory. Recursion for each directory will stop if any unmount operation in the chain fails for any reason. The relationship between mountpoints is determined by \fI/proc/self/mountinfo\fP entries. The filesystem must be specified by mountpoint path; a recursive unmount by device name (or UUID) is unsupported. Since version 2.37 it umounts also all over\-mounted filesystems (more filesystems on the same mountpoint). +.RE +.sp +\fB\-r\fP, \fB\-\-read\-only\fP +.RS 4 +When an unmount fails, try to remount the filesystem read\-only. +.RE +.sp +\fB\-t\fP, \fB\-\-types\fP \fItype\fP... +.RS 4 +Indicate that the actions should only be taken on filesystems of the specified \fItype\fP. More than one type may be specified in a comma\-separated list. The list of filesystem types can be prefixed with \fBno\fP to indicate that no action should be taken for all of the mentioned types. Note that \fBumount\fP reads information about mounted filesystems from kernel (\fI/proc/mounts\fP) and filesystem names may be different than filesystem names used in the \fI/etc/fstab\fP (e.g., "nfs4" vs. "nfs"). +.RE +.sp +\fB\-v\fP, \fB\-\-verbose\fP +.RS 4 +Verbose mode. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "NON\-SUPERUSER UMOUNTS" +.sp +Normally, only the superuser can umount filesystems. However, when \fIfstab\fP contains the \fBuser\fP option on a line, anybody can umount the corresponding filesystem. For more details see \fBmount\fP(8) man page. +.sp +Since version 2.34 the \fBumount\fP command can be used to perform umount operation also for fuse filesystems if kernel mount table contains user\(cqs ID. In this case \fIfstab\fP \fBuser=\fP mount option is not required. +.sp +Since version 2.35 \fBumount\fP command does not exit when user permissions are inadequate by internal \fBlibmount\fP security rules. It drops suid permissions and continue as regular non\-root user. This can be used to support use\-cases where root permissions are not necessary (e.g., fuse filesystems, user namespaces, etc). +.SH "LOOP DEVICE" +.sp +The \fBumount\fP command will automatically detach loop device previously initialized by \fBmount\fP(8) command independently of \fI/etc/mtab\fP. +.sp +In this case the device is initialized with "autoclear" flag (see \fBlosetup\fP(8) output for more details), otherwise it\(cqs necessary to use the option \fB\-\-detach\-loop\fP or call \fBlosetup \-d\fP \fIdevice\fP. The autoclear feature is supported since Linux 2.6.25. +.SH "EXTERNAL HELPERS" +.sp +The syntax of external unmount helpers is: +.RS 3 +.ll -.6i +.sp +\fBumount.\fP\fIsuffix\fP {\fIdirectory\fP|\fIdevice\fP} [\fB\-flnrv\fP] [\fB\-N\fP \fInamespace\fP] [\fB\-t\fP \fItype\fP.\fIsubtype\fP] +.br +.RE +.ll +.sp +where \fIsuffix\fP is the filesystem type (or the value from a \fBuhelper=\fP or \fBhelper=\fP marker in the mtab file). The \fB\-t\fP option can be used for filesystems that have subtype support. For example: +.RS 3 +.ll -.6i +.sp +\fBumount.fuse \-t fuse.sshfs\fP +.br +.RE +.ll +.sp +A \fBuhelper=\fP\fIsomething\fP marker (unprivileged helper) can appear in the \fI/etc/mtab\fP file when ordinary users need to be able to unmount a mountpoint that is not defined in \fI/etc/fstab\fP (for example for a device that was mounted by \fBudisks\fP(1)). +.sp +A \fBhelper=\fP\fItype\fP marker in the \fImtab\fP file will redirect all unmount requests to the \fB/sbin/umount.\fP\fItype\fP helper independently of UID. +.sp +Note that \fI/etc/mtab\fP is currently deprecated and \fBhelper=\fP and other userspace mount options are maintained by \fBlibmount\fP. +.SH "ENVIRONMENT" +.sp +\fBLIBMOUNT_FSTAB\fP=<path> +.RS 4 +overrides the default location of the \fIfstab\fP file (ignored for \fBsuid\fP) +.RE +.sp +\fBLIBMOUNT_MTAB\fP=<path> +.RS 4 +overrides the default location of the \fImtab\fP file (ignored for \fBsuid\fP) +.RE +.sp +\fBLIBMOUNT_DEBUG\fP=all +.RS 4 +enables \fBlibmount\fP debug output +.RE +.SH "FILES" +.sp +\fI/etc/mtab\fP +.RS 4 +table of mounted filesystems (deprecated and usually replaced by symlink to \fI/proc/mounts\fP) +.RE +.sp +\fI/etc/fstab\fP +.RS 4 +table of known filesystems +.RE +.sp +\fI/proc/self/mountinfo\fP +.RS 4 +table of mounted filesystems generated by kernel. +.RE +.SH "HISTORY" +.sp +A \fBumount\fP command appeared in Version 6 AT&T UNIX. +.SH "SEE ALSO" +.sp +\fBumount\fP(2), +\fBlosetup\fP(8), +\fBmount_namespaces\fP(7), +\fBmount\fP(8) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBumount\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/umount.8.adoc b/sys-utils/umount.8.adoc new file mode 100644 index 0000000..9101ed8 --- /dev/null +++ b/sys-utils/umount.8.adoc @@ -0,0 +1,189 @@ +//po4a: entry man manual +//// +Copyright (c) 1996 Andries Brouwer +This page is somewhat derived from a page that was +(c) 1980, 1989, 1991 The Regents of the University of California +and had been heavily modified by Rik Faith and myself. + +This is free documentation; you can redistribute it and/or +modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 2 of +the License, or (at your option) any later version. + +The GNU General Public License's references to "object code" +and "executables" are to be interpreted as the output of any +document formatting or typesetting system, including +intermediate and printed output. + +This manual is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along +with this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +//// += umount(8) +:doctype: manpage +:man manual: System Administration +:man source: util-linux {release-version} +:page-layout: base +:command: umount + +== NAME + +umount - unmount filesystems + +== SYNOPSIS + +*umount* *-a* [*-dflnrv*] [*-t* _fstype_] [*-O* _option_...] + +*umount* [*-dflnrv*] {_directory_|_device_} + +*umount* *-h*|*-V* + +== DESCRIPTION + +The *umount* command detaches the mentioned filesystem(s) from the file hierarchy. A filesystem is specified by giving the directory where it has been mounted. Giving the special device on which the filesystem lives may also work, but is obsolete, mainly because it will fail in case this device was mounted on more than one directory. + +Note that a filesystem cannot be unmounted when it is 'busy' - for example, when there are open files on it, or when some process has its working directory there, or when a swap file on it is in use. The offending process could even be *umount* itself - it opens libc, and libc in its turn may open for example locale files. A lazy unmount avoids this problem, but it may introduce other issues. See *--lazy* description below. + +== OPTIONS + +*-a*, *--all*:: +All of the filesystems described in _/proc/self/mountinfo_ (or in deprecated _/etc/mtab_) are unmounted, except the proc, devfs, devpts, sysfs, rpc_pipefs and nfsd filesystems. This list of the filesystems may be replaced by *--types* umount option. + +*-A*, *--all-targets*:: +Unmount all mountpoints in the current mount namespace for the specified filesystem. The filesystem can be specified by one of the mountpoints or the device name (or UUID, etc.). When this option is used together with *--recursive*, then all nested mounts within the filesystem are recursively unmounted. This option is only supported on systems where _/etc/mtab_ is a symlink to _/proc/mounts_. + +*-c*, *--no-canonicalize*:: +Do not canonicalize paths. The paths canonicalization is based on *stat*(2) and *readlink*(2) system calls. These system calls may hang in some cases (for example on NFS if server is not available). The option has to be used with canonical path to the mount point. ++ +This option is silently ignored by *umount* for non-root users. ++ +For more details about this option see the *mount*(8) man page. Note that *umount* does not pass this option to the **/sbin/umount.**__type__ helpers. + +*-d*, *--detach-loop*:: +When the unmounted device was a loop device, also free this loop device. This option is unnecessary for devices initialized by *mount*(8), in this case "autoclear" functionality is enabled by default. + +*--fake*:: +Causes everything to be done except for the actual system call or umount helper execution; this 'fakes' unmounting the filesystem. It can be used to remove entries from the deprecated _/etc/mtab_ that were unmounted earlier with the *-n* option. + +*-f*, *--force*:: +Force an unmount (in case of an unreachable NFS system). ++ +Note that this option does not guarantee that umount command does not hang. It's strongly recommended to use absolute paths without symlinks to avoid unwanted *readlink*(2) and *stat*(2) system calls on unreachable NFS in *umount*. + +*-i*, *--internal-only*:: +Do not call the **/sbin/umount.**__filesystem__ helper even if it exists. By default such a helper program is called if it exists. + +*-l*, *--lazy*:: +Lazy unmount. Detach the filesystem from the file hierarchy now, and clean up all references to this filesystem as soon as it is not busy anymore. ++ +A system reboot would be expected in near future if you're going to use this option for network filesystem or local filesystem with submounts. The recommended use-case for *umount -l* is to prevent hangs on shutdown due to an unreachable network share where a normal *umount* will hang due to a downed server or a network partition. Remounts of the share will not be possible. + +*-N*, *--namespace* _ns_:: +Perform *umount* in the mount namespace specified by _ns_. _ns_ is either PID of process running in that namespace or special file representing that namespace. ++ +*umount* switches to the namespace when it reads _/etc/fstab_, writes _/etc/mtab_ (or writes to _/run/mount_) and calls *umount*(2) system call, otherwise it runs in the original namespace. It means that the target mount namespace does not have to contain any libraries or other requirements necessary to execute *umount*(2) command. ++ +See *mount_namespaces*(7) for more information. + +*-n*, *--no-mtab*:: +Unmount without writing in _/etc/mtab_. + +*-O*, *--test-opts* _option_...:: +Unmount only the filesystems that have the specified option set in _/etc/fstab_. More than one option may be specified in a comma-separated list. Each option can be prefixed with *no* to indicate that no action should be taken for this option. + +*-q*, *--quiet*:: +Suppress "not mounted" error messages. + +*-R*, *--recursive*:: +Recursively unmount each specified directory. Recursion for each directory will stop if any unmount operation in the chain fails for any reason. The relationship between mountpoints is determined by _/proc/self/mountinfo_ entries. The filesystem must be specified by mountpoint path; a recursive unmount by device name (or UUID) is unsupported. Since version 2.37 it umounts also all over-mounted filesystems (more filesystems on the same mountpoint). + +*-r*, *--read-only*:: +When an unmount fails, try to remount the filesystem read-only. + +*-t*, *--types* _type_...:: +Indicate that the actions should only be taken on filesystems of the specified _type_. More than one type may be specified in a comma-separated list. The list of filesystem types can be prefixed with *no* to indicate that no action should be taken for all of the mentioned types. Note that *umount* reads information about mounted filesystems from kernel (_/proc/mounts_) and filesystem names may be different than filesystem names used in the _/etc/fstab_ (e.g., "nfs4" vs. "nfs"). + +*-v*, *--verbose*:: +Verbose mode. + +include::man-common/help-version.adoc[] + +== NON-SUPERUSER UMOUNTS + +Normally, only the superuser can umount filesystems. However, when _fstab_ contains the *user* option on a line, anybody can umount the corresponding filesystem. For more details see *mount*(8) man page. + +Since version 2.34 the *umount* command can be used to perform umount operation also for fuse filesystems if kernel mount table contains user's ID. In this case _fstab_ *user=* mount option is not required. + +Since version 2.35 *umount* command does not exit when user permissions are inadequate by internal *libmount* security rules. It drops suid permissions and continue as regular non-root user. This can be used to support use-cases where root permissions are not necessary (e.g., fuse filesystems, user namespaces, etc). + +== LOOP DEVICE + +The *umount* command will automatically detach loop device previously initialized by *mount*(8) command independently of _/etc/mtab_. + +In this case the device is initialized with "autoclear" flag (see *losetup*(8) output for more details), otherwise it's necessary to use the option *--detach-loop* or call *losetup -d* _device_. The autoclear feature is supported since Linux 2.6.25. + +== EXTERNAL HELPERS + +The syntax of external unmount helpers is: + +____ +**umount.**__suffix__ {__directory__|_device_} [*-flnrv*] [*-N* _namespace_] [*-t* _type_._subtype_] +____ + +where _suffix_ is the filesystem type (or the value from a *uhelper=* or *helper=* marker in the mtab file). The *-t* option can be used for filesystems that have subtype support. For example: + +____ +*umount.fuse -t fuse.sshfs* +____ + +A **uhelper=**__something__ marker (unprivileged helper) can appear in the _/etc/mtab_ file when ordinary users need to be able to unmount a mountpoint that is not defined in _/etc/fstab_ (for example for a device that was mounted by *udisks*(1)). + +A **helper=**__type__ marker in the _mtab_ file will redirect all unmount requests to the **/sbin/umount.**__type__ helper independently of UID. + +Note that _/etc/mtab_ is currently deprecated and *helper=* and other userspace mount options are maintained by *libmount*. + +== ENVIRONMENT + +*LIBMOUNT_FSTAB*=<path>:: +overrides the default location of the _fstab_ file (ignored for *suid*) + +*LIBMOUNT_MTAB*=<path>:: +overrides the default location of the _mtab_ file (ignored for *suid*) + +*LIBMOUNT_DEBUG*=all:: +enables *libmount* debug output + +== FILES + +_/etc/mtab_:: +table of mounted filesystems (deprecated and usually replaced by symlink to _/proc/mounts_) + +_/etc/fstab_:: +table of known filesystems + +_/proc/self/mountinfo_:: +table of mounted filesystems generated by kernel. + +== HISTORY + +A *umount* command appeared in Version 6 AT&T UNIX. + +== SEE ALSO + +*umount*(2), +*losetup*(8), +*mount_namespaces*(7), +*mount*(8) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/umount.c b/sys-utils/umount.c new file mode 100644 index 0000000..f593176 --- /dev/null +++ b/sys-utils/umount.c @@ -0,0 +1,650 @@ +/* + * umount(8) -- mount a filesystem + * + * Copyright (C) 2011 Red Hat, Inc. All rights reserved. + * Written by Karel Zak <kzak@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <getopt.h> +#include <unistd.h> +#include <sys/types.h> + +#include <libmount.h> + +#include "nls.h" +#include "c.h" +#include "env.h" +#include "closestream.h" +#include "pathnames.h" +#include "canonicalize.h" + +#define XALLOC_EXIT_CODE MNT_EX_SYSERR +#include "xalloc.h" + +#define OPTUTILS_EXIT_CODE MNT_EX_USAGE +#include "optutils.h" + +static int quiet; +static struct ul_env_list *envs_removed; + +static int table_parser_errcb(struct libmnt_table *tb __attribute__((__unused__)), + const char *filename, int line) +{ + if (filename) + warnx(_("%s: parse error at line %d -- ignored"), filename, line); + return 1; +} + + +static void __attribute__((__noreturn__)) umount_print_version(void) +{ + const char *ver = NULL; + const char **features = NULL, **p; + + mnt_get_library_version(&ver); + mnt_get_library_features(&features); + + printf(_("%s from %s (libmount %s"), + program_invocation_short_name, + PACKAGE_STRING, + ver); + p = features; + while (p && *p) { + fputs(p == features ? ": " : ", ", stdout); + fputs(*p++, stdout); + } + fputs(")\n", stdout); + exit(MNT_EX_SUCCESS); +} +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + fputs(USAGE_HEADER, out); + fprintf(out, _( + " %1$s [-hV]\n" + " %1$s -a [options]\n" + " %1$s [options] <source> | <directory>\n"), + program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Unmount filesystems.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -a, --all unmount all filesystems\n"), out); + fputs(_(" -A, --all-targets unmount all mountpoints for the given device in the\n" + " current namespace\n"), out); + fputs(_(" -c, --no-canonicalize don't canonicalize paths\n"), out); + fputs(_(" -d, --detach-loop if mounted loop device, also free this loop device\n"), out); + fputs(_(" --fake dry run; skip the umount(2) syscall\n"), out); + fputs(_(" -f, --force force unmount (in case of an unreachable NFS system)\n"), out); + fputs(_(" -i, --internal-only don't call the umount.<type> helpers\n"), out); + fputs(_(" -n, --no-mtab don't write to /etc/mtab\n"), out); + fputs(_(" -l, --lazy detach the filesystem now, clean up things later\n"), out); + fputs(_(" -O, --test-opts <list> limit the set of filesystems (use with -a)\n"), out); + fputs(_(" -R, --recursive recursively unmount a target with all its children\n"), out); + fputs(_(" -r, --read-only in case unmounting fails, try to remount read-only\n"), out); + fputs(_(" -t, --types <list> limit the set of filesystem types\n"), out); + fputs(_(" -v, --verbose say what is being done\n"), out); + fputs(_(" -q, --quiet suppress 'not mounted' error messages\n"), out); + fputs(_(" -N, --namespace <ns> perform umount in another namespace\n"), out); + + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(25)); + printf(USAGE_MAN_TAIL("umount(8)")); + + exit(MNT_EX_SUCCESS); +} + +static void suid_drop(struct libmnt_context *cxt) +{ + const uid_t ruid = getuid(); + const uid_t euid = geteuid(); + + if (ruid != 0 && euid == 0 && drop_permissions() != 0) + err(MNT_EX_FAIL, _("drop permissions failed")); + + /* be paranoid and check it, setuid(0) has to fail */ + if (ruid != 0 && setuid(0) == 0) + errx(MNT_EX_FAIL, _("drop permissions failed.")); + + mnt_context_force_unrestricted(cxt); + + /* restore "bad" environment variables */ + if (envs_removed) { + env_list_setenv(envs_removed); + env_list_free(envs_removed); + envs_removed = NULL; + } +} + +static void success_message(struct libmnt_context *cxt) +{ + const char *tgt, *src; + + if (mnt_context_helper_executed(cxt) + || mnt_context_get_status(cxt) != 1) + return; + + tgt = mnt_context_get_target(cxt); + if (!tgt) + return; + + src = mnt_context_get_source(cxt); + if (src) + warnx(_("%s (%s) unmounted"), tgt, src); + else + warnx(_("%s unmounted"), tgt); +} + +static int mk_exit_code(struct libmnt_context *cxt, int rc) +{ + char buf[BUFSIZ] = { 0 }; + + rc = mnt_context_get_excode(cxt, rc, buf, sizeof(buf)); + + /* suppress "not mounted" error message */ + if (quiet && + rc == MNT_EX_FAIL && + mnt_context_syscall_called(cxt) && + mnt_context_get_syscall_errno(cxt) == EINVAL) + return rc; + + /* print errors/warnings */ + if (*buf) { + const char *spec = mnt_context_get_target(cxt); + if (!spec) + spec = mnt_context_get_source(cxt); + if (!spec) + spec = "???"; + warnx("%s: %s.", spec, buf); + } + return rc; +} + +static int umount_all(struct libmnt_context *cxt) +{ + struct libmnt_iter *itr; + struct libmnt_fs *fs; + int mntrc, ignored, rc = 0; + + itr = mnt_new_iter(MNT_ITER_BACKWARD); + if (!itr) { + warn(_("failed to initialize libmount iterator")); + return MNT_EX_SYSERR; + } + + while (mnt_context_next_umount(cxt, itr, &fs, &mntrc, &ignored) == 0) { + + const char *tgt = mnt_fs_get_target(fs); + + if (ignored) { + if (mnt_context_is_verbose(cxt)) + printf(_("%-25s: ignored\n"), tgt); + } else { + int xrc = mk_exit_code(cxt, mntrc); + + if (xrc == MNT_EX_SUCCESS + && mnt_context_is_verbose(cxt)) + printf("%-25s: successfully unmounted\n", tgt); + rc |= xrc; + } + } + + mnt_free_iter(itr); + return rc; +} + +static int umount_one(struct libmnt_context *cxt, const char *spec) +{ + int rc; + + if (!spec) + return MNT_EX_SOFTWARE; + + if (mnt_context_set_target(cxt, spec)) + err(MNT_EX_SYSERR, _("failed to set umount target")); + + rc = mnt_context_umount(cxt); + + if (rc == -EPERM + && mnt_context_is_restricted(cxt) + && mnt_context_tab_applied(cxt) + && !mnt_context_syscall_called(cxt)) { + /* Mountpoint exists, but failed something else in libmount, + * drop perms and try it again */ + suid_drop(cxt); + rc = mnt_context_umount(cxt); + } + + rc = mk_exit_code(cxt, rc); + + if (rc == MNT_EX_SUCCESS && mnt_context_is_verbose(cxt)) + success_message(cxt); + + mnt_reset_context(cxt); + return rc; +} + +static struct libmnt_table *new_mountinfo(struct libmnt_context *cxt) +{ + struct libmnt_table *tb; + struct libmnt_ns *ns_old = mnt_context_switch_target_ns(cxt); + + if (!ns_old) + err(MNT_EX_SYSERR, _("failed to switch namespace")); + + tb = mnt_new_table(); + if (!tb) + err(MNT_EX_SYSERR, _("libmount table allocation failed")); + + mnt_table_set_parser_errcb(tb, table_parser_errcb); + mnt_table_set_cache(tb, mnt_context_get_cache(cxt)); + + if (mnt_table_parse_file(tb, _PATH_PROC_MOUNTINFO)) { + warn(_("failed to parse %s"), _PATH_PROC_MOUNTINFO); + mnt_unref_table(tb); + tb = NULL; + } + + if (!mnt_context_switch_ns(cxt, ns_old)) + err(MNT_EX_SYSERR, _("failed to switch namespace")); + + return tb; +} + +/* + * like umount_one() but does not return error is @spec not mounted + */ +static int umount_one_if_mounted(struct libmnt_context *cxt, const char *spec) +{ + int rc; + struct libmnt_fs *fs; + + rc = mnt_context_find_umount_fs(cxt, spec, &fs); + if (rc == 1) { + rc = MNT_EX_SUCCESS; /* already unmounted */ + mnt_reset_context(cxt); + } else if (rc < 0) { + rc = mk_exit_code(cxt, rc); /* error */ + mnt_reset_context(cxt); + } else + rc = umount_one(cxt, mnt_fs_get_target(fs)); + + return rc; +} + +static int umount_do_recurse(struct libmnt_context *cxt, + struct libmnt_table *tb, struct libmnt_fs *fs) +{ + struct libmnt_fs *child, *over = NULL; + struct libmnt_iter *itr = mnt_new_iter(MNT_ITER_BACKWARD); + int rc; + + if (!itr) + err(MNT_EX_SYSERR, _("libmount iterator allocation failed")); + + /* first try overmount */ + if (mnt_table_over_fs(tb, fs, &over) == 0 && over) { + rc = umount_do_recurse(cxt, tb, over); + if (rc != MNT_EX_SUCCESS) + goto done; + } + + /* umount all children */ + for (;;) { + rc = mnt_table_next_child_fs(tb, itr, fs, &child); + if (rc < 0) { + warnx(_("failed to get child fs of %s"), + mnt_fs_get_target(fs)); + rc = MNT_EX_SOFTWARE; + goto done; + } else if (rc == 1) + break; /* no more children */ + + if (over && child == over) + continue; + + rc = umount_do_recurse(cxt, tb, child); + if (rc != MNT_EX_SUCCESS) + goto done; + } + + rc = umount_one_if_mounted(cxt, mnt_fs_get_target(fs)); +done: + mnt_free_iter(itr); + return rc; +} + +static int umount_recursive(struct libmnt_context *cxt, const char *spec) +{ + struct libmnt_table *tb; + struct libmnt_fs *fs; + int rc; + + tb = new_mountinfo(cxt); + if (!tb) + return MNT_EX_SOFTWARE; + + /* it's always real mountpoint, don't assume that the target maybe a device */ + mnt_context_disable_swapmatch(cxt, 1); + + fs = mnt_table_find_target(tb, spec, MNT_ITER_BACKWARD); + if (fs) + rc = umount_do_recurse(cxt, tb, fs); + else { + rc = MNT_EX_USAGE; + if (!quiet) + warnx(access(spec, F_OK) == 0 ? + _("%s: not mounted") : + _("%s: not found"), spec); + } + + mnt_unref_table(tb); + return rc; +} + +static int umount_alltargets(struct libmnt_context *cxt, const char *spec, int rec) +{ + struct libmnt_fs *fs; + struct libmnt_table *tb; + struct libmnt_iter *itr = NULL; + dev_t devno = 0; + int rc; + + /* Convert @spec to device name, Use the same logic like regular + * "umount <spec>". + */ + rc = mnt_context_find_umount_fs(cxt, spec, &fs); + if (rc == 1) { + rc = MNT_EX_USAGE; + if (!quiet) + warnx(access(spec, F_OK) == 0 ? + _("%s: not mounted") : + _("%s: not found"), spec); + return rc; + } + if (rc < 0) + return mk_exit_code(cxt, rc); /* error */ + + if (!mnt_fs_get_srcpath(fs) || !mnt_fs_get_devno(fs)) + errx(MNT_EX_USAGE, _("%s: failed to determine source " + "(--all-targets is unsupported on systems with " + "regular mtab file)."), spec); + + itr = mnt_new_iter(MNT_ITER_BACKWARD); + if (!itr) + err(MNT_EX_SYSERR, _("libmount iterator allocation failed")); + + /* get on @cxt independent mountinfo */ + tb = new_mountinfo(cxt); + if (!tb) { + rc = MNT_EX_SOFTWARE; + goto done; + } + + /* Note that @fs is from mount context and the context will be reset + * after each umount() call */ + devno = mnt_fs_get_devno(fs); + fs = NULL; + + mnt_reset_context(cxt); + + while (mnt_table_next_fs(tb, itr, &fs) == 0) { + if (mnt_fs_get_devno(fs) != devno) + continue; + mnt_context_disable_swapmatch(cxt, 1); + if (rec) + rc = umount_do_recurse(cxt, tb, fs); + else + rc = umount_one_if_mounted(cxt, mnt_fs_get_target(fs)); + + if (rc != MNT_EX_SUCCESS) + break; + } + +done: + mnt_free_iter(itr); + mnt_unref_table(tb); + + return rc; +} + +/* + * Check path -- non-root user should not be able to resolve path which is + * unreadable for them. + */ +static char *sanitize_path(const char *path) +{ + char *p; + + if (!path) + return NULL; + + p = canonicalize_path_restricted(path); + if (!p) + err(MNT_EX_USAGE, "%s", path); + + return p; +} + +static pid_t parse_pid(const char *str) +{ + char *end; + pid_t ret; + + errno = 0; + ret = strtoul(str, &end, 10); + + if (ret < 0 || errno || end == str || (end && *end)) + return 0; + return ret; +} + +int main(int argc, char **argv) +{ + int c, rc = 0, all = 0, recursive = 0, alltargets = 0; + struct libmnt_context *cxt; + char *types = NULL; + + enum { + UMOUNT_OPT_FAKE = CHAR_MAX + 1, + }; + + static const struct option longopts[] = { + { "all", no_argument, NULL, 'a' }, + { "all-targets", no_argument, NULL, 'A' }, + { "detach-loop", no_argument, NULL, 'd' }, + { "fake", no_argument, NULL, UMOUNT_OPT_FAKE }, + { "force", no_argument, NULL, 'f' }, + { "help", no_argument, NULL, 'h' }, + { "internal-only", no_argument, NULL, 'i' }, + { "lazy", no_argument, NULL, 'l' }, + { "no-canonicalize", no_argument, NULL, 'c' }, + { "no-mtab", no_argument, NULL, 'n' }, + { "quiet", no_argument, NULL, 'q' }, + { "read-only", no_argument, NULL, 'r' }, + { "recursive", no_argument, NULL, 'R' }, + { "test-opts", required_argument, NULL, 'O' }, + { "types", required_argument, NULL, 't' }, + { "verbose", no_argument, NULL, 'v' }, + { "version", no_argument, NULL, 'V' }, + { "namespace", required_argument, NULL, 'N' }, + { NULL, 0, NULL, 0 } + }; + + static const ul_excl_t excl[] = { /* rows and cols in ASCII order */ + { 'A','a' }, /* all-targets,all */ + { 'R','a' }, /* recursive,all */ + { 'O','R','t'}, /* options,recursive,types */ + { 'R','r' }, /* recursive,read-only */ + { 0 } + }; + int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; + + __sanitize_env(&envs_removed); + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + mnt_init_debug(0); + cxt = mnt_new_context(); + if (!cxt) + err(MNT_EX_SYSERR, _("libmount context allocation failed")); + + mnt_context_set_tables_errcb(cxt, table_parser_errcb); + + while ((c = getopt_long(argc, argv, "aAcdfhilnqRrO:t:vVN:", + longopts, NULL)) != -1) { + + + /* only few options are allowed for non-root users */ + if (mnt_context_is_restricted(cxt) && !strchr("hdilqVv", c)) { + + /* Silently ignore options without direct impact to the + * umount operation, but with security sensitive + * side-effects */ + if (strchr("c", c)) + continue; /* ignore */ + + /* drop permissions, continue as regular user */ + suid_drop(cxt); + } + + err_exclusive_options(c, longopts, excl, excl_st); + + switch(c) { + case 'a': + all = 1; + break; + case 'A': + alltargets = 1; + break; + case 'c': + mnt_context_disable_canonicalize(cxt, TRUE); + break; + case 'd': + mnt_context_enable_loopdel(cxt, TRUE); + break; + case UMOUNT_OPT_FAKE: + mnt_context_enable_fake(cxt, TRUE); + break; + case 'f': + mnt_context_enable_force(cxt, TRUE); + break; + case 'i': + mnt_context_disable_helpers(cxt, TRUE); + break; + case 'l': + mnt_context_enable_lazy(cxt, TRUE); + break; + case 'n': + mnt_context_disable_mtab(cxt, TRUE); + break; + case 'q': + quiet = 1; + break; + case 'r': + mnt_context_enable_rdonly_umount(cxt, TRUE); + break; + case 'R': + recursive = TRUE; + break; + case 'O': + if (mnt_context_set_options_pattern(cxt, optarg)) + err(MNT_EX_SYSERR, _("failed to set options pattern")); + break; + case 't': + types = optarg; + break; + case 'v': + mnt_context_enable_verbose(cxt, TRUE); + break; + case 'N': + { + char path[PATH_MAX]; + pid_t pid = parse_pid(optarg); + + if (pid) + snprintf(path, sizeof(path), "/proc/%i/ns/mnt", pid); + + if (mnt_context_set_target_ns(cxt, pid ? path : optarg)) + err(MNT_EX_SYSERR, _("failed to set target namespace to %s"), pid ? path : optarg); + break; + } + + case 'h': + mnt_free_context(cxt); + usage(); + case 'V': + mnt_free_context(cxt); + umount_print_version(); + default: + errtryhelp(MNT_EX_USAGE); + } + } + + argc -= optind; + argv += optind; + + if (all) { + if (argc) { + warnx(_("unexpected number of arguments")); + errtryhelp(MNT_EX_USAGE); + } + if (!types) + types = "noproc,nodevfs,nodevpts,nosysfs,norpc_pipefs,nonfsd,noselinuxfs"; + + mnt_context_set_fstype_pattern(cxt, types); + rc = umount_all(cxt); + + } else if (argc < 1) { + warnx(_("bad usage")); + errtryhelp(MNT_EX_USAGE); + + } else if (alltargets) { + while (argc--) + rc += umount_alltargets(cxt, *argv++, recursive); + } else if (recursive) { + while (argc--) + rc += umount_recursive(cxt, *argv++); + } else { + while (argc--) { + char *path = *argv; + + if (mnt_context_is_restricted(cxt) + && !mnt_tag_is_valid(path)) + path = sanitize_path(path); + + rc += umount_one(cxt, path); + + if (path != *argv) + free(path); + argv++; + } + } + + mnt_free_context(cxt); + env_list_free(envs_removed); + + return (rc < 256) ? rc : 255; +} + diff --git a/sys-utils/unshare.1 b/sys-utils/unshare.1 new file mode 100644 index 0000000..bc31889 --- /dev/null +++ b/sys-utils/unshare.1 @@ -0,0 +1,400 @@ +'\" t +.\" Title: unshare +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: User Commands +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "UNSHARE" "1" "2022-05-11" "util\-linux 2.38.1" "User Commands" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +unshare \- run program in new namespaces +.SH "SYNOPSIS" +.sp +\fBunshare\fP [options] [\fIprogram\fP [\fIarguments\fP]] +.SH "DESCRIPTION" +.sp +The \fBunshare\fP command creates new namespaces (as specified by the command\-line options described below) and then executes the specified \fIprogram\fP. If \fIprogram\fP is not given, then "${SHELL}" is run (default: \fI/bin/sh\fP). +.sp +By default, a new namespace persists only as long as it has member processes. A new namespace can be made persistent even when it has no member processes by bind mounting /proc/\fIpid\fP/ns/\fItype\fP files to a filesystem path. A namespace that has been made persistent in this way can subsequently be entered with \fBnsenter\fP(1) even after the \fIprogram\fP terminates (except PID namespaces where a permanently running init process is required). Once a persistent namespace is no longer needed, it can be unpersisted by using \fBumount\fP(8) to remove the bind mount. See the \fBEXAMPLES\fP section for more details. +.sp +\fBunshare\fP since util\-linux version 2.36 uses \fI/proc/[pid]/ns/pid_for_children\fP and \fI/proc/[pid]/ns/time_for_children\fP files for persistent PID and TIME namespaces. This change requires Linux kernel 4.17 or newer. +.sp +The following types of namespaces can be created with \fBunshare\fP: +.sp +\fBmount namespace\fP +.RS 4 +Mounting and unmounting filesystems will not affect the rest of the system, except for filesystems which are explicitly marked as shared (with \fBmount \-\-make\-shared\fP; see \fI/proc/self/mountinfo\fP or \fBfindmnt \-o+PROPAGATION\fP for the \fBshared\fP flags). For further details, see \fBmount_namespaces\fP(7). +.sp +\fBunshare\fP since util\-linux version 2.27 automatically sets propagation to \fBprivate\fP in a new mount namespace to make sure that the new namespace is really unshared. It\(cqs possible to disable this feature with option \fB\-\-propagation unchanged\fP. Note that \fBprivate\fP is the kernel default. +.RE +.sp +\fBUTS namespace\fP +.RS 4 +Setting hostname or domainname will not affect the rest of the system. For further details, see \fButs_namespaces\fP(7). +.RE +.sp +\fBIPC namespace\fP +.RS 4 +The process will have an independent namespace for POSIX message queues as well as System V message queues, semaphore sets and shared memory segments. For further details, see \fBipc_namespaces\fP(7). +.RE +.sp +\fBnetwork namespace\fP +.RS 4 +The process will have independent IPv4 and IPv6 stacks, IP routing tables, firewall rules, the \fI/proc/net\fP and \fI/sys/class/net\fP directory trees, sockets, etc. For further details, see \fBnetwork_namespaces\fP(7). +.RE +.sp +\fBPID namespace\fP +.RS 4 +Children will have a distinct set of PID\-to\-process mappings from their parent. For further details, see \fBpid_namespaces\fP(7). +.RE +.sp +\fBcgroup namespace\fP +.RS 4 +The process will have a virtualized view of \fI/proc/self/cgroup\fP, and new cgroup mounts will be rooted at the namespace cgroup root. For further details, see \fBcgroup_namespaces\fP(7). +.RE +.sp +\fBuser namespace\fP +.RS 4 +The process will have a distinct set of UIDs, GIDs and capabilities. For further details, see \fBuser_namespaces\fP(7). +.RE +.sp +\fBtime namespace\fP +.RS 4 +The process can have a distinct view of \fBCLOCK_MONOTONIC\fP and/or \fBCLOCK_BOOTTIME\fP which can be changed using \fI/proc/self/timens_offsets\fP. For further details, see \fBtime_namespaces\fP(7). +.RE +.SH "OPTIONS" +.sp +\fB\-i\fP, \fB\-\-ipc\fP[\fB=\fP\fIfile\fP] +.RS 4 +Create a new IPC namespace. If \fIfile\fP is specified, then the namespace is made persistent by creating a bind mount at \fIfile\fP. +.RE +.sp +\fB\-m\fP, \fB\-\-mount\fP[\fB=\fP\fIfile\fP] +.RS 4 +Create a new mount namespace. If \fIfile\fP is specified, then the namespace is made persistent by creating a bind mount at \fIfile\fP. Note that \fIfile\fP must be located on a mount whose propagation type is not \fBshared\fP (or an error results). Use the command \fBfindmnt \-o+PROPAGATION\fP when not sure about the current setting. See also the examples below. +.RE +.sp +\fB\-n\fP, \fB\-\-net\fP[\fB=\fP\fIfile\fP] +.RS 4 +Create a new network namespace. If \fIfile\fP is specified, then the namespace is made persistent by creating a bind mount at \fIfile\fP. +.RE +.sp +\fB\-p\fP, \fB\-\-pid\fP[\fB=\fP\fIfile\fP] +.RS 4 +Create a new PID namespace. If \fIfile\fP is specified, then the namespace is made persistent by creating a bind mount at \fIfile\fP. (Creation of a persistent PID namespace will fail if the \fB\-\-fork\fP option is not also specified.) +.sp +See also the \fB\-\-fork\fP and \fB\-\-mount\-proc\fP options. +.RE +.sp +\fB\-u\fP, \fB\-\-uts\fP[\fB=\fP\fIfile\fP] +.RS 4 +Create a new UTS namespace. If \fIfile\fP is specified, then the namespace is made persistent by creating a bind mount at \fIfile\fP. +.RE +.sp +\fB\-U\fP, \fB\-\-user\fP[\fB=\fP\fIfile\fP] +.RS 4 +Create a new user namespace. If \fIfile\fP is specified, then the namespace is made persistent by creating a bind mount at \fIfile\fP. +.RE +.sp +\fB\-C\fP, \fB\-\-cgroup\fP[\fB=\fP\fIfile\fP] +.RS 4 +Create a new cgroup namespace. If \fIfile\fP is specified, then the namespace is made persistent by creating a bind mount at \fIfile\fP. +.RE +.sp +\fB\-T\fP, \fB\-\-time\fP[\fB=\fP\fIfile\fP] +.RS 4 +Create a new time namespace. If \fIfile\fP is specified, then the namespace is made persistent by creating a bind mount at \fIfile\fP. The \fB\-\-monotonic\fP and \fB\-\-boottime\fP options can be used to specify the corresponding offset in the time namespace. +.RE +.sp +\fB\-f\fP, \fB\-\-fork\fP +.RS 4 +Fork the specified \fIprogram\fP as a child process of \fBunshare\fP rather than running it directly. This is useful when creating a new PID namespace. Note that when \fBunshare\fP is waiting for the child process, then it ignores \fBSIGINT\fP and \fBSIGTERM\fP and does not forward any signals to the child. It is necessary to send signals to the child process. +.RE +.sp +\fB\-\-keep\-caps\fP +.RS 4 +When the \fB\-\-user\fP option is given, ensure that capabilities granted in the user namespace are preserved in the child process. +.RE +.sp +\fB\-\-kill\-child\fP[\fB=\fP\fIsigname\fP] +.RS 4 +When \fBunshare\fP terminates, have \fIsigname\fP be sent to the forked child process. Combined with \fB\-\-pid\fP this allows for an easy and reliable killing of the entire process tree below \fBunshare\fP. If not given, \fIsigname\fP defaults to \fBSIGKILL\fP. This option implies \fB\-\-fork\fP. +.RE +.sp +\fB\-\-mount\-proc\fP[\fB=\fP\fImountpoint\fP] +.RS 4 +Just before running the program, mount the proc filesystem at \fImountpoint\fP (default is \fI/proc\fP). This is useful when creating a new PID namespace. It also implies creating a new mount namespace since the \fI/proc\fP mount would otherwise mess up existing programs on the system. The new proc filesystem is explicitly mounted as private (with \fBMS_PRIVATE\fP|\fBMS_REC\fP). +.RE +.sp +\fB\-\-map\-user=\fP\fIuid|name\fP +.RS 4 +Run the program only after the current effective user ID has been mapped to \fIuid\fP. If this option is specified multiple times, the last occurrence takes precedence. This option implies \fB\-\-user\fP. +.RE +.sp +\fB\-\-map\-users=\fP\fIouteruid,inneruid,count\fP|\fBauto\fP +.RS 4 +Run the program only after the block of user IDs of size \fIcount\fP beginning at \fIouteruid\fP has been mapped to the block of user IDs beginning at \fIinneruid\fP. This mapping is created with \fBnewuidmap\fP(1). If the range of user IDs overlaps with the mapping specified by \fB\-\-map\-user\fP, then a "hole" will be removed from the mapping. This may result in the highest user ID of the mapping not being mapped. The special value \fBauto\fP will map the first block of user IDs owned by the effective user from \fI/etc/subuid\fP to a block starting at user ID 0. If this option is specified multiple times, the last occurrence takes precedence. This option implies \fB\-\-user\fP. +.RE +.sp +\fB\-\-map\-group=\fP\fIgid|name\fP +.RS 4 +Run the program only after the current effective group ID has been mapped to \fIgid\fP. If this option is specified multiple times, the last occurrence takes precedence. This option implies \fB\-\-setgroups=deny\fP and \fB\-\-user\fP. +.RE +.sp +\fB\-\-map\-groups=\fP\fIoutergid,innergid,count\fP|\fBauto\fP +.RS 4 +Run the program only after the block of group IDs of size \fIcount\fP beginning at \fIoutergid\fP has been mapped to the block of group IDs beginning at \fIinnergid\fP. This mapping is created with \fBnewgidmap\fP(1). If the range of group IDs overlaps with the mapping specified by \fB\-\-map\-group\fP, then a "hole" will be removed from the mapping. This may result in the highest group ID of the mapping not being mapped. The special value \fBauto\fP will map the first block of user IDs owned by the effective user from \fI/etc/subgid\fP to a block starting at group ID 0. If this option is specified multiple times, the last occurrence takes precedence. This option implies \fB\-\-user\fP. +.RE +.sp +\fB\-\-map\-auto\fP +.RS 4 +Map the first block of user IDs owned by the effective user from \fI/etc/subuid\fP to a block starting at user ID 0. In the same manner, also map the first block of group IDs owned by the effective group from \fI/etc/subgid\fP to a block starting at group ID 0. This option is intended to handle the common case where the first block of subordinate user and group IDs can map the whole user and group ID space. This option is equivalent to specifying \fB\-\-map\-users=auto\fP and \fB\-\-map\-groups=auto\fP. +.RE +.sp +\fB\-r\fP, \fB\-\-map\-root\-user\fP +.RS 4 +Run the program only after the current effective user and group IDs have been mapped to the superuser UID and GID in the newly created user namespace. This makes it possible to conveniently gain capabilities needed to manage various aspects of the newly created namespaces (such as configuring interfaces in the network namespace or mounting filesystems in the mount namespace) even when run unprivileged. As a mere convenience feature, it does not support more sophisticated use cases, such as mapping multiple ranges of UIDs and GIDs. This option implies \fB\-\-setgroups=deny\fP and \fB\-\-user\fP. This option is equivalent to \fB\-\-map\-user=0 \-\-map\-group=0\fP. +.RE +.sp +\fB\-c\fP, \fB\-\-map\-current\-user\fP +.RS 4 +Run the program only after the current effective user and group IDs have been mapped to the same UID and GID in the newly created user namespace. This option implies \fB\-\-setgroups=deny\fP and \fB\-\-user\fP. This option is equivalent to \fB\-\-map\-user=$(id \-ru) \-\-map\-group=$(id \-rg)\fP. +.RE +.sp +\fB\-\-propagation private\fP|\fBshared\fP|\fBslave\fP|\fBunchanged\fP +.RS 4 +Recursively set the mount propagation flag in the new mount namespace. The default is to set the propagation to \fIprivate\fP. It is possible to disable this feature with the argument \fBunchanged\fP. The option is silently ignored when the mount namespace (\fB\-\-mount\fP) is not requested. +.RE +.sp +\fB\-\-setgroups allow\fP|\fBdeny\fP +.RS 4 +Allow or deny the \fBsetgroups\fP(2) system call in a user namespace. +.sp +To be able to call \fBsetgroups\fP(2), the calling process must at least have \fBCAP_SETGID\fP. But since Linux 3.19 a further restriction applies: the kernel gives permission to call \fBsetgroups\fP(2) only after the GID map (\fB/proc/\fP\fIpid\fP*/gid_map*) has been set. The GID map is writable by root when \fBsetgroups\fP(2) is enabled (i.e., \fBallow\fP, the default), and the GID map becomes writable by unprivileged processes when \fBsetgroups\fP(2) is permanently disabled (with \fBdeny\fP). +.RE +.sp +\fB\-R\fP, \fB\-\-root=\fP\fIdir\fP +.RS 4 +run the command with root directory set to \fIdir\fP. +.RE +.sp +\fB\-w\fP, \fB\-\-wd=\fP\fIdir\fP +.RS 4 +change working directory to \fIdir\fP. +.RE +.sp +\fB\-S\fP, \fB\-\-setuid\fP \fIuid\fP +.RS 4 +Set the user ID which will be used in the entered namespace. +.RE +.sp +\fB\-G\fP, \fB\-\-setgid\fP \fIgid\fP +.RS 4 +Set the group ID which will be used in the entered namespace and drop supplementary groups. +.RE +.sp +\fB\-\-monotonic\fP \fIoffset\fP +.RS 4 +Set the offset of \fBCLOCK_MONOTONIC\fP which will be used in the entered time namespace. This option requires unsharing a time namespace with \fB\-\-time\fP. +.RE +.sp +\fB\-\-boottime\fP \fIoffset\fP +.RS 4 +Set the offset of \fBCLOCK_BOOTTIME\fP which will be used in the entered time namespace. This option requires unsharing a time namespace with \fB\-\-time\fP. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "NOTES" +.sp +The proc and sysfs filesystems mounting as root in a user namespace have to be restricted so that a less privileged user cannot get more access to sensitive files that a more privileged user made unavailable. In short the rule for proc and sysfs is as close to a bind mount as possible. +.SH "EXAMPLES" +.sp +The following command creates a PID namespace, using \fB\-\-fork\fP to ensure that the executed command is performed in a child process that (being the first process in the namespace) has PID 1. The \fB\-\-mount\-proc\fP option ensures that a new mount namespace is also simultaneously created and that a new \fBproc\fP(5) filesystem is mounted that contains information corresponding to the new PID namespace. When the \fBreadlink\fP(1) command terminates, the new namespaces are automatically torn down. +.sp +.if n .RS 4 +.nf +.fam C +# unshare \-\-fork \-\-pid \-\-mount\-proc readlink /proc/self +1 +.fam +.fi +.if n .RE +.sp +As an unprivileged user, create a new user namespace where the user\(cqs credentials are mapped to the root IDs inside the namespace: +.sp +.if n .RS 4 +.nf +.fam C +$ id \-u; id \-g +1000 +1000 +$ unshare \-\-user \-\-map\-root\-user \(rs + sh \-c \(aq\(aqwhoami; cat /proc/self/uid_map /proc/self/gid_map\(aq\(aq +root + 0 1000 1 + 0 1000 1 +.fam +.fi +.if n .RE +.sp +As an unprivileged user, create a user namespace where the first 65536 IDs are all mapped, and the user\(cqs credentials are mapped to the root IDs inside the namespace. The map is determined by the subordinate IDs assigned in \fBsubuid\fP(5) and \fBsubgid\fP(5). Demonstrate this mapping by creating a file with user ID 1 and group ID 1. For brevity, only the user ID mappings are shown: +.sp +.if n .RS 4 +.nf +.fam C +$ id \-u +1000 +$ cat /etc/subuid +1000:100000:65536 +$ unshare \-\-user \-\-map\-auto \-\-map\-root\-user +# id \-u +0 +# cat /proc/self/uid_map + 0 1000 1 + 1 100000 65535 +# touch file; chown 1:1 file +# ls \-ln \-\-time\-style=+ file +\-rw\-r\-\-r\-\- 1 1 1 0 file +# exit +$ ls \-ln \-\-time\-style=+ file +\-rw\-r\-\-r\-\- 1 100000 100000 0 file +.fam +.fi +.if n .RE +.sp +The first of the following commands creates a new persistent UTS namespace and modifies the hostname as seen in that namespace. The namespace is then entered with \fBnsenter\fP(1) in order to display the modified hostname; this step demonstrates that the UTS namespace continues to exist even though the namespace had no member processes after the \fBunshare\fP command terminated. The namespace is then destroyed by removing the bind mount. +.sp +.if n .RS 4 +.nf +.fam C +# touch /root/uts\-ns +# unshare \-\-uts=/root/uts\-ns hostname FOO +# nsenter \-\-uts=/root/uts\-ns hostname +FOO +# umount /root/uts\-ns +.fam +.fi +.if n .RE +.sp +The following commands establish a persistent mount namespace referenced by the bind mount \fI/root/namespaces/mnt\fP. In order to ensure that the creation of that bind mount succeeds, the parent directory (\fI/root/namespaces\fP) is made a bind mount whose propagation type is not \fBshared\fP. +.sp +.if n .RS 4 +.nf +.fam C +# mount \-\-bind /root/namespaces /root/namespaces +# mount \-\-make\-private /root/namespaces +# touch /root/namespaces/mnt +# unshare \-\-mount=/root/namespaces/mnt +.fam +.fi +.if n .RE +.sp +The following commands demonstrate the use of the \fB\-\-kill\-child\fP option when creating a PID namespace, in order to ensure that when \fBunshare\fP is killed, all of the processes within the PID namespace are killed. +.sp +.if n .RS 4 +.nf +.fam C +# set +m # Don\(aqt print job status messages + + +# unshare \-\-pid \-\-fork \-\-mount\-proc \-\-kill\-child \-\- \(rs + + + bash \-\-norc \-c \(aq\(aq(sleep 555 &) && (ps a &) && sleep 999\(aq\(aq & +[1] 53456 +# PID TTY STAT TIME COMMAND + 1 pts/3 S+ 0:00 sleep 999 + 3 pts/3 S+ 0:00 sleep 555 + 5 pts/3 R+ 0:00 ps a + +# ps h \-o \(aqcomm\(aq $! # Show that background job is unshare(1) +unshare +# kill $! # Kill unshare(1) +# pidof sleep +.fam +.fi +.if n .RE +.sp +The \fBpidof\fP(1) command prints no output, because the \fBsleep\fP processes have been killed. More precisely, when the \fBsleep\fP process that has PID 1 in the namespace (i.e., the namespace\(cqs init process) was killed, this caused all other processes in the namespace to be killed. By contrast, a similar series of commands where the \fB\-\-kill\-child\fP option is not used shows that when \fBunshare\fP terminates, the processes in the PID namespace are not killed: +.sp +.if n .RS 4 +.nf +.fam C +# unshare \-\-pid \-\-fork \-\-mount\-proc \-\- \(rs + + + bash \-\-norc \-c \(aq\(aq(sleep 555 &) && (ps a &) && sleep 999\(aq\(aq & +[1] 53479 +# PID TTY STAT TIME COMMAND + 1 pts/3 S+ 0:00 sleep 999 + 3 pts/3 S+ 0:00 sleep 555 + 5 pts/3 R+ 0:00 ps a + +# kill $! +# pidof sleep +53482 53480 +.fam +.fi +.if n .RE +.sp +The following example demonstrates the creation of a time namespace where the boottime clock is set to a point several years in the past: +.sp +.if n .RS 4 +.nf +.fam C +# uptime \-p # Show uptime in initial time namespace +up 21 hours, 30 minutes +# unshare \-\-time \-\-fork \-\-boottime 300000000 uptime \-p +up 9 years, 28 weeks, 1 day, 2 hours, 50 minutes +.fam +.fi +.if n .RE +.SH "AUTHORS" +.sp +.MTO "dottedmag\(atdottedmag.net" "Mikhail Gusarov" "," +.MTO "kzak\(atredhat.com" "Karel Zak" "" +.SH "SEE ALSO" +.sp +\fBnewuidmap\fP(1) +\fBnewgidmap\fP(1) +\fBclone\fP(2), +\fBunshare\fP(2), +\fBnamespaces\fP(7), +\fBmount\fP(8) +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBunshare\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/unshare.1.adoc b/sys-utils/unshare.1.adoc new file mode 100644 index 0000000..030e9a4 --- /dev/null +++ b/sys-utils/unshare.1.adoc @@ -0,0 +1,277 @@ +//po4a: entry man manual += unshare(1) +:doctype: manpage +:man manual: User Commands +:man source: util-linux {release-version} +:page-layout: base +:command: unshare + +== NAME + +unshare - run program in new namespaces + +== SYNOPSIS + +*unshare* [options] [_program_ [_arguments_]] + +== DESCRIPTION + +The *unshare* command creates new namespaces (as specified by the command-line options described below) and then executes the specified _program_. If _program_ is not given, then "${SHELL}" is run (default: _/bin/sh_). + +By default, a new namespace persists only as long as it has member processes. A new namespace can be made persistent even when it has no member processes by bind mounting /proc/_pid_/ns/_type_ files to a filesystem path. A namespace that has been made persistent in this way can subsequently be entered with *nsenter*(1) even after the _program_ terminates (except PID namespaces where a permanently running init process is required). Once a persistent namespace is no longer needed, it can be unpersisted by using *umount*(8) to remove the bind mount. See the *EXAMPLES* section for more details. + +*unshare* since util-linux version 2.36 uses _/proc/[pid]/ns/pid_for_children_ and _/proc/[pid]/ns/time_for_children_ files for persistent PID and TIME namespaces. This change requires Linux kernel 4.17 or newer. + +The following types of namespaces can be created with *unshare*: + +*mount namespace*:: +Mounting and unmounting filesystems will not affect the rest of the system, except for filesystems which are explicitly marked as shared (with *mount --make-shared*; see _/proc/self/mountinfo_ or *findmnt -o+PROPAGATION* for the *shared* flags). For further details, see *mount_namespaces*(7). ++ +*unshare* since util-linux version 2.27 automatically sets propagation to *private* in a new mount namespace to make sure that the new namespace is really unshared. It's possible to disable this feature with option *--propagation unchanged*. Note that *private* is the kernel default. + +*UTS namespace*:: +Setting hostname or domainname will not affect the rest of the system. For further details, see *uts_namespaces*(7). + +*IPC namespace*:: +The process will have an independent namespace for POSIX message queues as well as System V message queues, semaphore sets and shared memory segments. For further details, see *ipc_namespaces*(7). + +*network namespace*:: +The process will have independent IPv4 and IPv6 stacks, IP routing tables, firewall rules, the _/proc/net_ and _/sys/class/net_ directory trees, sockets, etc. For further details, see *network_namespaces*(7). + +*PID namespace*:: +Children will have a distinct set of PID-to-process mappings from their parent. For further details, see *pid_namespaces*(7). + +*cgroup namespace*:: +The process will have a virtualized view of _/proc/self/cgroup_, and new cgroup mounts will be rooted at the namespace cgroup root. For further details, see *cgroup_namespaces*(7). + +*user namespace*:: +The process will have a distinct set of UIDs, GIDs and capabilities. For further details, see *user_namespaces*(7). + +*time namespace*:: +The process can have a distinct view of *CLOCK_MONOTONIC* and/or *CLOCK_BOOTTIME* which can be changed using _/proc/self/timens_offsets_. For further details, see *time_namespaces*(7). + +== OPTIONS + +*-i*, *--ipc*[**=**__file__]:: +Create a new IPC namespace. If _file_ is specified, then the namespace is made persistent by creating a bind mount at _file_. + +*-m*, *--mount*[**=**__file__]:: +Create a new mount namespace. If _file_ is specified, then the namespace is made persistent by creating a bind mount at _file_. Note that _file_ must be located on a mount whose propagation type is not *shared* (or an error results). Use the command *findmnt -o+PROPAGATION* when not sure about the current setting. See also the examples below. + +*-n*, *--net*[**=**__file__]:: +Create a new network namespace. If _file_ is specified, then the namespace is made persistent by creating a bind mount at _file_. + +*-p*, *--pid*[**=**__file__]:: +Create a new PID namespace. If _file_ is specified, then the namespace is made persistent by creating a bind mount at _file_. (Creation of a persistent PID namespace will fail if the *--fork* option is not also specified.) ++ +See also the *--fork* and *--mount-proc* options. + +*-u*, *--uts*[**=**__file__]:: +Create a new UTS namespace. If _file_ is specified, then the namespace is made persistent by creating a bind mount at _file_. + +*-U*, *--user*[**=**__file__]:: +Create a new user namespace. If _file_ is specified, then the namespace is made persistent by creating a bind mount at _file_. + +*-C*, *--cgroup*[**=**__file__]:: +Create a new cgroup namespace. If _file_ is specified, then the namespace is made persistent by creating a bind mount at _file_. + +*-T*, *--time*[**=**__file__]:: +Create a new time namespace. If _file_ is specified, then the namespace is made persistent by creating a bind mount at _file_. The *--monotonic* and *--boottime* options can be used to specify the corresponding offset in the time namespace. + +*-f*, *--fork*:: +Fork the specified _program_ as a child process of *unshare* rather than running it directly. This is useful when creating a new PID namespace. Note that when *unshare* is waiting for the child process, then it ignores *SIGINT* and *SIGTERM* and does not forward any signals to the child. It is necessary to send signals to the child process. + +*--keep-caps*:: +When the *--user* option is given, ensure that capabilities granted in the user namespace are preserved in the child process. + +*--kill-child*[**=**__signame__]:: +When *unshare* terminates, have _signame_ be sent to the forked child process. Combined with *--pid* this allows for an easy and reliable killing of the entire process tree below *unshare*. If not given, _signame_ defaults to *SIGKILL*. This option implies *--fork*. + +*--mount-proc*[**=**__mountpoint__]:: +Just before running the program, mount the proc filesystem at _mountpoint_ (default is _/proc_). This is useful when creating a new PID namespace. It also implies creating a new mount namespace since the _/proc_ mount would otherwise mess up existing programs on the system. The new proc filesystem is explicitly mounted as private (with *MS_PRIVATE*|*MS_REC*). + +**--map-user=**__uid|name__:: +Run the program only after the current effective user ID has been mapped to _uid_. If this option is specified multiple times, the last occurrence takes precedence. This option implies *--user*. + +**--map-users=**__outeruid,inneruid,count__|**auto**:: +Run the program only after the block of user IDs of size _count_ beginning at _outeruid_ has been mapped to the block of user IDs beginning at _inneruid_. This mapping is created with **newuidmap**(1). If the range of user IDs overlaps with the mapping specified by *--map-user*, then a "hole" will be removed from the mapping. This may result in the highest user ID of the mapping not being mapped. The special value *auto* will map the first block of user IDs owned by the effective user from _/etc/subuid_ to a block starting at user ID 0. If this option is specified multiple times, the last occurrence takes precedence. This option implies *--user*. + +**--map-group=**__gid|name__:: +Run the program only after the current effective group ID has been mapped to _gid_. If this option is specified multiple times, the last occurrence takes precedence. This option implies *--setgroups=deny* and *--user*. + +**--map-groups=**__outergid,innergid,count__|**auto**:: +Run the program only after the block of group IDs of size _count_ beginning at _outergid_ has been mapped to the block of group IDs beginning at _innergid_. This mapping is created with **newgidmap**(1). If the range of group IDs overlaps with the mapping specified by *--map-group*, then a "hole" will be removed from the mapping. This may result in the highest group ID of the mapping not being mapped. The special value *auto* will map the first block of user IDs owned by the effective user from _/etc/subgid_ to a block starting at group ID 0. If this option is specified multiple times, the last occurrence takes precedence. This option implies *--user*. + +**--map-auto**:: +Map the first block of user IDs owned by the effective user from _/etc/subuid_ to a block starting at user ID 0. In the same manner, also map the first block of group IDs owned by the effective group from _/etc/subgid_ to a block starting at group ID 0. This option is intended to handle the common case where the first block of subordinate user and group IDs can map the whole user and group ID space. This option is equivalent to specifying *--map-users=auto* and *--map-groups=auto*. + +*-r*, *--map-root-user*:: +Run the program only after the current effective user and group IDs have been mapped to the superuser UID and GID in the newly created user namespace. This makes it possible to conveniently gain capabilities needed to manage various aspects of the newly created namespaces (such as configuring interfaces in the network namespace or mounting filesystems in the mount namespace) even when run unprivileged. As a mere convenience feature, it does not support more sophisticated use cases, such as mapping multiple ranges of UIDs and GIDs. This option implies *--setgroups=deny* and *--user*. This option is equivalent to *--map-user=0 --map-group=0*. + +*-c*, *--map-current-user*:: +Run the program only after the current effective user and group IDs have been mapped to the same UID and GID in the newly created user namespace. This option implies *--setgroups=deny* and *--user*. This option is equivalent to *--map-user=$(id -ru) --map-group=$(id -rg)*. + +**--propagation private**|**shared**|**slave**|*unchanged*:: +Recursively set the mount propagation flag in the new mount namespace. The default is to set the propagation to _private_. It is possible to disable this feature with the argument *unchanged*. The option is silently ignored when the mount namespace (*--mount*) is not requested. + +**--setgroups allow**|*deny*:: +Allow or deny the *setgroups*(2) system call in a user namespace. ++ +To be able to call *setgroups*(2), the calling process must at least have *CAP_SETGID*. But since Linux 3.19 a further restriction applies: the kernel gives permission to call *setgroups*(2) only after the GID map (**/proc/**__pid__*/gid_map*) has been set. The GID map is writable by root when *setgroups*(2) is enabled (i.e., *allow*, the default), and the GID map becomes writable by unprivileged processes when *setgroups*(2) is permanently disabled (with *deny*). + +*-R*, **--root=**__dir__:: +run the command with root directory set to _dir_. + +*-w*, **--wd=**__dir__:: +change working directory to _dir_. + +*-S*, *--setuid* _uid_:: +Set the user ID which will be used in the entered namespace. + +*-G*, *--setgid* _gid_:: +Set the group ID which will be used in the entered namespace and drop supplementary groups. + +*--monotonic* _offset_:: +Set the offset of *CLOCK_MONOTONIC* which will be used in the entered time namespace. This option requires unsharing a time namespace with *--time*. + +*--boottime* _offset_:: +Set the offset of *CLOCK_BOOTTIME* which will be used in the entered time namespace. This option requires unsharing a time namespace with *--time*. + +include::man-common/help-version.adoc[] + +== NOTES + +The proc and sysfs filesystems mounting as root in a user namespace have to be restricted so that a less privileged user cannot get more access to sensitive files that a more privileged user made unavailable. In short the rule for proc and sysfs is as close to a bind mount as possible. + +== EXAMPLES + +The following command creates a PID namespace, using *--fork* to ensure that the executed command is performed in a child process that (being the first process in the namespace) has PID 1. The *--mount-proc* option ensures that a new mount namespace is also simultaneously created and that a new *proc*(5) filesystem is mounted that contains information corresponding to the new PID namespace. When the *readlink*(1) command terminates, the new namespaces are automatically torn down. + +.... +# unshare --fork --pid --mount-proc readlink /proc/self +1 +.... + +As an unprivileged user, create a new user namespace where the user's credentials are mapped to the root IDs inside the namespace: + +.... +$ id -u; id -g +1000 +1000 +$ unshare --user --map-root-user \ + sh -c ''whoami; cat /proc/self/uid_map /proc/self/gid_map'' +root + 0 1000 1 + 0 1000 1 +.... + +As an unprivileged user, create a user namespace where the first 65536 IDs are all mapped, and the user's credentials are mapped to the root IDs inside the namespace. The map is determined by the subordinate IDs assigned in *subuid*(5) and *subgid*(5). Demonstrate this mapping by creating a file with user ID 1 and group ID 1. For brevity, only the user ID mappings are shown: + +.... +$ id -u +1000 +$ cat /etc/subuid +1000:100000:65536 +$ unshare --user --map-auto --map-root-user +# id -u +0 +# cat /proc/self/uid_map + 0 1000 1 + 1 100000 65535 +# touch file; chown 1:1 file +# ls -ln --time-style=+ file +-rw-r--r-- 1 1 1 0 file +# exit +$ ls -ln --time-style=+ file +-rw-r--r-- 1 100000 100000 0 file +.... + +The first of the following commands creates a new persistent UTS namespace and modifies the hostname as seen in that namespace. The namespace is then entered with *nsenter*(1) in order to display the modified hostname; this step demonstrates that the UTS namespace continues to exist even though the namespace had no member processes after the *unshare* command terminated. The namespace is then destroyed by removing the bind mount. + +.... +# touch /root/uts-ns +# unshare --uts=/root/uts-ns hostname FOO +# nsenter --uts=/root/uts-ns hostname +FOO +# umount /root/uts-ns +.... + +The following commands establish a persistent mount namespace referenced by the bind mount _/root/namespaces/mnt_. In order to ensure that the creation of that bind mount succeeds, the parent directory (_/root/namespaces_) is made a bind mount whose propagation type is not *shared*. + +.... +# mount --bind /root/namespaces /root/namespaces +# mount --make-private /root/namespaces +# touch /root/namespaces/mnt +# unshare --mount=/root/namespaces/mnt +.... + +The following commands demonstrate the use of the *--kill-child* option when creating a PID namespace, in order to ensure that when *unshare* is killed, all of the processes within the PID namespace are killed. + +.... +# set +m # Don't print job status messages + + +# unshare --pid --fork --mount-proc --kill-child -- \ + + + bash --norc -c ''(sleep 555 &) && (ps a &) && sleep 999'' & +[1] 53456 +# PID TTY STAT TIME COMMAND + 1 pts/3 S+ 0:00 sleep 999 + 3 pts/3 S+ 0:00 sleep 555 + 5 pts/3 R+ 0:00 ps a + +# ps h -o 'comm' $! # Show that background job is unshare(1) +unshare +# kill $! # Kill unshare(1) +# pidof sleep +.... + +The *pidof*(1) command prints no output, because the *sleep* processes have been killed. More precisely, when the *sleep* process that has PID 1 in the namespace (i.e., the namespace's init process) was killed, this caused all other processes in the namespace to be killed. By contrast, a similar series of commands where the *--kill-child* option is not used shows that when *unshare* terminates, the processes in the PID namespace are not killed: + +.... +# unshare --pid --fork --mount-proc -- \ + + + bash --norc -c ''(sleep 555 &) && (ps a &) && sleep 999'' & +[1] 53479 +# PID TTY STAT TIME COMMAND + 1 pts/3 S+ 0:00 sleep 999 + 3 pts/3 S+ 0:00 sleep 555 + 5 pts/3 R+ 0:00 ps a + +# kill $! +# pidof sleep +53482 53480 +.... + +The following example demonstrates the creation of a time namespace where the boottime clock is set to a point several years in the past: + +.... +# uptime -p # Show uptime in initial time namespace +up 21 hours, 30 minutes +# unshare --time --fork --boottime 300000000 uptime -p +up 9 years, 28 weeks, 1 day, 2 hours, 50 minutes +.... + +== AUTHORS + +mailto:dottedmag@dottedmag.net[Mikhail Gusarov], +mailto:kzak@redhat.com[Karel Zak] + +== SEE ALSO + +*newuidmap*(1) +*newgidmap*(1) +*clone*(2), +*unshare*(2), +*namespaces*(7), +*mount*(8) + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/unshare.c b/sys-utils/unshare.c new file mode 100644 index 0000000..21916ad --- /dev/null +++ b/sys-utils/unshare.c @@ -0,0 +1,1111 @@ +/* + * unshare(1) - command-line interface for unshare(2) + * + * Copyright (C) 2009 Mikhail Gusarov <dottedmag@dottedmag.net> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <errno.h> +#include <getopt.h> +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/eventfd.h> +#include <sys/wait.h> +#include <sys/mount.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/prctl.h> +#include <grp.h> + +/* we only need some defines missing in sys/mount.h, no libmount linkage */ +#include <libmount.h> + +#include "nls.h" +#include "c.h" +#include "caputils.h" +#include "closestream.h" +#include "namespace.h" +#include "exec_shell.h" +#include "xalloc.h" +#include "pathnames.h" +#include "all-io.h" +#include "signames.h" +#include "strutils.h" +#include "pwdutils.h" + +/* synchronize parent and child by pipe */ +#define PIPE_SYNC_BYTE 0x06 + +/* 'private' is kernel default */ +#define UNSHARE_PROPAGATION_DEFAULT (MS_REC | MS_PRIVATE) + +/* /proc namespace files and mountpoints for binds */ +static struct namespace_file { + int type; /* CLONE_NEW* */ + const char *name; /* ns/<type> */ + const char *target; /* user specified target for bind mount */ +} namespace_files[] = { + { .type = CLONE_NEWUSER, .name = "ns/user" }, + { .type = CLONE_NEWCGROUP,.name = "ns/cgroup" }, + { .type = CLONE_NEWIPC, .name = "ns/ipc" }, + { .type = CLONE_NEWUTS, .name = "ns/uts" }, + { .type = CLONE_NEWNET, .name = "ns/net" }, + { .type = CLONE_NEWPID, .name = "ns/pid_for_children" }, + { .type = CLONE_NEWNS, .name = "ns/mnt" }, + { .type = CLONE_NEWTIME, .name = "ns/time_for_children" }, + { .name = NULL } +}; + +static int npersists; /* number of persistent namespaces */ + +enum { + SETGROUPS_NONE = -1, + SETGROUPS_DENY = 0, + SETGROUPS_ALLOW = 1, +}; + +static const char *setgroups_strings[] = +{ + [SETGROUPS_DENY] = "deny", + [SETGROUPS_ALLOW] = "allow" +}; + +static int setgroups_str2id(const char *str) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(setgroups_strings); i++) + if (strcmp(str, setgroups_strings[i]) == 0) + return i; + + errx(EXIT_FAILURE, _("unsupported --setgroups argument '%s'"), str); +} + +static void setgroups_control(int action) +{ + const char *file = _PATH_PROC_SETGROUPS; + const char *cmd; + int fd; + + if (action < 0 || (size_t) action >= ARRAY_SIZE(setgroups_strings)) + return; + cmd = setgroups_strings[action]; + + fd = open(file, O_WRONLY); + if (fd < 0) { + if (errno == ENOENT) + return; + err(EXIT_FAILURE, _("cannot open %s"), file); + } + + if (write_all(fd, cmd, strlen(cmd))) + err(EXIT_FAILURE, _("write failed %s"), file); + close(fd); +} + +static void map_id(const char *file, uint32_t from, uint32_t to) +{ + char *buf; + int fd; + + fd = open(file, O_WRONLY); + if (fd < 0) + err(EXIT_FAILURE, _("cannot open %s"), file); + + xasprintf(&buf, "%u %u 1", from, to); + if (write_all(fd, buf, strlen(buf))) + err(EXIT_FAILURE, _("write failed %s"), file); + free(buf); + close(fd); +} + +static unsigned long parse_propagation(const char *str) +{ + size_t i; + static const struct prop_opts { + const char *name; + unsigned long flag; + } opts[] = { + { "slave", MS_REC | MS_SLAVE }, + { "private", MS_REC | MS_PRIVATE }, + { "shared", MS_REC | MS_SHARED }, + { "unchanged", 0 } + }; + + for (i = 0; i < ARRAY_SIZE(opts); i++) { + if (strcmp(opts[i].name, str) == 0) + return opts[i].flag; + } + + errx(EXIT_FAILURE, _("unsupported propagation mode: %s"), str); +} + +static void set_propagation(unsigned long flags) +{ + if (flags == 0) + return; + + if (mount("none", "/", NULL, flags, NULL) != 0) + err(EXIT_FAILURE, _("cannot change root filesystem propagation")); +} + + +static int set_ns_target(int type, const char *path) +{ + struct namespace_file *ns; + + for (ns = namespace_files; ns->name; ns++) { + if (ns->type != type) + continue; + ns->target = path; + npersists++; + return 0; + } + + return -EINVAL; +} + +static int bind_ns_files(pid_t pid) +{ + struct namespace_file *ns; + char src[PATH_MAX]; + + for (ns = namespace_files; ns->name; ns++) { + if (!ns->target) + continue; + + snprintf(src, sizeof(src), "/proc/%u/%s", (unsigned) pid, ns->name); + + if (mount(src, ns->target, NULL, MS_BIND, NULL) != 0) + err(EXIT_FAILURE, _("mount %s on %s failed"), src, ns->target); + } + + return 0; +} + +static ino_t get_mnt_ino(pid_t pid) +{ + struct stat st; + char path[PATH_MAX]; + + snprintf(path, sizeof(path), "/proc/%u/ns/mnt", (unsigned) pid); + + if (stat(path, &st) != 0) + err(EXIT_FAILURE, _("stat of %s failed"), path); + return st.st_ino; +} + +static void settime(time_t offset, clockid_t clk_id) +{ + char buf[sizeof(stringify_value(ULONG_MAX)) * 3]; + int fd, len; + + len = snprintf(buf, sizeof(buf), "%d %" PRId64 " 0", clk_id, (int64_t) offset); + + fd = open("/proc/self/timens_offsets", O_WRONLY); + if (fd < 0) + err(EXIT_FAILURE, _("failed to open /proc/self/timens_offsets")); + + if (write(fd, buf, len) != len) + err(EXIT_FAILURE, _("failed to write to /proc/self/timens_offsets")); + + close(fd); +} + +/** + * waitchild() - Wait for a process to exit successfully + * @pid: PID of the process to wait for + * + * Wait for a process to exit successfully. If it exits with a non-zero return + * code, then exit() with the same status. + */ +static void waitchild(int pid) +{ + int rc, status; + + do { + rc = waitpid(pid, &status, 0); + if (rc < 0) { + if (errno == EINTR) + continue; + err(EXIT_FAILURE, _("waitpid failed")); + } + if (WIFEXITED(status) && + WEXITSTATUS(status) != EXIT_SUCCESS) + exit(WEXITSTATUS(status)); + } while (rc < 0); +} + +/** + * sync_with_child() - Tell our child we're ready and wait for it to exit + * @pid: The pid of our child + * @fd: A file descriptor created with eventfd() + * + * This tells a child created with fork_and_wait() that we are ready for it to + * continue. Once we have done that, wait for our child to exit. + */ +static void sync_with_child(pid_t pid, int fd) +{ + uint64_t ch = PIPE_SYNC_BYTE; + + write_all(fd, &ch, sizeof(ch)); + close(fd); + + waitchild(pid); +} + +/** + * fork_and_wait() - Fork and wait to be sync'd with + * @fd - A file descriptor created with eventfd() which should be passed to + * sync_with_child() + * + * This creates an eventfd and forks. The parent process returns immediately, + * but the child waits for a %PIPE_SYNC_BYTE on the eventfd before returning. + * This allows the parent to perform some tasks before the child starts its + * work. The parent should call sync_with_child() once it is ready for the + * child to continue. + * + * Return: The pid from fork() + */ +static pid_t fork_and_wait(int *fd) +{ + pid_t pid; + uint64_t ch; + + *fd = eventfd(0, 0); + if (*fd < 0) + err(EXIT_FAILURE, _("eventfd failed")); + + pid = fork(); + if (pid < 0) + err(EXIT_FAILURE, _("fork failed")); + + if (!pid) { + /* wait for the our parent to tell us to continue */ + if (read_all(*fd, (char *)&ch, sizeof(ch)) != sizeof(ch) || + ch != PIPE_SYNC_BYTE) + err(EXIT_FAILURE, _("failed to read eventfd")); + close(*fd); + } + + return pid; +} + +static pid_t bind_ns_files_from_child(int *fd) +{ + pid_t child, ppid = getpid(); + ino_t ino = get_mnt_ino(ppid); + + child = fork_and_wait(fd); + if (child) + return child; + + if (get_mnt_ino(ppid) == ino) + exit(EXIT_FAILURE); + bind_ns_files(ppid); + exit(EXIT_SUCCESS); +} + +static uid_t get_user(const char *s, const char *err) +{ + struct passwd *pw; + char *buf = NULL; + uid_t ret; + + pw = xgetpwnam(s, &buf); + if (pw) { + ret = pw->pw_uid; + free(pw); + free(buf); + } else { + ret = strtoul_or_err(s, err); + } + + return ret; +} + +static gid_t get_group(const char *s, const char *err) +{ + struct group *gr; + char *buf = NULL; + gid_t ret; + + gr = xgetgrnam(s, &buf); + if (gr) { + ret = gr->gr_gid; + free(gr); + free(buf); + } else { + ret = strtoul_or_err(s, err); + } + + return ret; +} + +/** + * struct map_range - A range of IDs to map + * @outer: First ID mapped on the outside of the namespace + * @inner: First ID mapped on the inside of the namespace + * @count: Length of the inside and outside ranges + * + * A range of uids/gids to map using new[gu]idmap. + */ +struct map_range { + unsigned int outer; + unsigned int inner; + unsigned int count; +}; + +#define UID_BUFSIZ sizeof(stringify_value(ULONG_MAX)) + +/** + * uint_to_id() - Convert a string into a user/group ID + * @name: The string representation of the ID + * @sz: The length of @name, without an (optional) nul-terminator + * + * This converts a (possibly not nul-terminated_ string into user or group ID. + * No name lookup is performed. + * + * Return: @name as a numeric ID + */ +static int uint_to_id(const char *name, size_t sz) +{ + char buf[UID_BUFSIZ]; + + mem2strcpy(buf, name, sz, sizeof(buf)); + return strtoul_or_err(buf, _("could not parse ID")); +} + +/** + * get_map_range() - Parse a mapping range from a string + * @s: A string of the format outer,inner,count + * + * Parse a string of the form outer,inner,count into a new mapping range. + * + * Return: A new &struct map_range + */ +static struct map_range *get_map_range(const char *s) +{ + int n, map[3]; + struct map_range *ret; + + n = string_to_idarray(s, map, ARRAY_SIZE(map), uint_to_id); + if (n < 0) + errx(EXIT_FAILURE, _("too many elements for mapping '%s'"), s); + if (n != ARRAY_SIZE(map)) + errx(EXIT_FAILURE, _("mapping '%s' contains only %d elements"), + s, n); + + ret = xmalloc(sizeof(*ret)); + ret->outer = map[0]; + ret->inner = map[1]; + ret->count = map[2]; + return ret; +} + +/** + * read_subid_range() - Look up a user's sub[gu]id range + * @filename: The file to look up the range from. This should be either + * ``/etc/subuid`` or ``/etc/subgid``. + * @uid: The uid of the user whose range we should look up. + * + * This finds the first subid range matching @uid in @filename. + */ +static struct map_range *read_subid_range(char *filename, uid_t uid) +{ + char *line = NULL, *pwbuf; + FILE *idmap; + size_t n = 0; + struct passwd *pw; + struct map_range *map; + + map = xmalloc(sizeof(*map)); + map->inner = 0; + + pw = xgetpwuid(uid, &pwbuf); + if (!pw) + errx(EXIT_FAILURE, _("you (user %d) don't exist."), uid); + + idmap = fopen(filename, "r"); + if (!idmap) + err(EXIT_FAILURE, _("could not open '%s'"), filename); + + /* + * Each line in sub[ug]idmap looks like + * username:subuid:count + * OR + * uid:subuid:count + */ + while (getline(&line, &n, idmap) != -1) { + char *rest, *s; + + rest = strchr(line, ':'); + if (!rest) + continue; + *rest = '\0'; + + if (strcmp(line, pw->pw_name) && + strtoul(line, NULL, 10) != pw->pw_uid) + continue; + + s = rest + 1; + rest = strchr(s, ':'); + if (!rest) + continue; + *rest = '\0'; + map->outer = strtoul_or_err(s, _("failed to parse subid map")); + + s = rest + 1; + rest = strchr(s, '\n'); + if (rest) + *rest = '\0'; + map->count = strtoul_or_err(s, _("failed to parse subid map")); + + fclose(idmap); + free(pw); + free(pwbuf); + + return map; + } + + err(EXIT_FAILURE, _("no line matching user \"%s\" in %s"), + pw->pw_name, filename); +} + +/** + * map_ids() - Create a new uid/gid map + * @idmapper: Either newuidmap or newgidmap + * @ppid: Pid to set the map for + * @outer: ID outside the namespace for a single map. + * @inner: ID inside the namespace for a single map. May be -1 to only use @map. + * @map: A range of IDs to map + * + * This creates a new uid/gid map for @ppid using @idmapper. The ID @outer in + * the parent (our) namespace is mapped to the ID @inner in the child (@ppid's) + * namespace. In addition, the range of IDs beginning at @map->outer is mapped + * to the range of IDs beginning at @map->inner. The tricky bit is that we + * cannot let these mappings overlap. We accomplish this by removing a "hole" + * from @map, if @outer or @inner overlap it. This may result in one less than + * @map->count IDs being mapped from @map. The unmapped IDs are always the + * topmost IDs of the mapping (either in the parent or the child namespace). + * + * Most of the time, this function will be called with @map->outer as some + * large ID, @map->inner as 0, and @map->count as a large number (at least + * 1000, but less than @map->outer). Typically, there will be no conflict with + * @outer. However, @inner may split the mapping for e.g. --map-current-user. + * + * This function always exec()s or errors out and does not return. + */ +static void __attribute__((__noreturn__)) +map_ids(const char *idmapper, int ppid, unsigned int outer, unsigned int inner, + struct map_range *map) +{ + /* idmapper + pid + 4 * map + NULL */ + char *argv[15]; + /* argv - idmapper - "1" - NULL */ + char args[12][UID_BUFSIZ]; + int i = 0, j = 0; + struct map_range lo, mid, hi; + unsigned int inner_offset, outer_offset; + + /* Some helper macros to reduce bookkeeping */ +#define push_str(s) do { \ + argv[i++] = s; \ +} while (0) +#define push_ul(x) do { \ + snprintf(args[j], sizeof(args[j]), "%u", x); \ + push_str(args[j++]); \ +} while (0) + + push_str(xstrdup(idmapper)); + push_ul(ppid); + if ((int)inner == -1) { + /* + * If we don't have a "single" mapping, then we can just use + * map directly + */ + push_ul(map->inner); + push_ul(map->outer); + push_ul(map->count); + push_str(NULL); + + execvp(idmapper, argv); + errexec(idmapper); + } + + /* If the mappings overlap, remove an ID from map */ + if ((outer >= map->outer && outer <= map->outer + map->count) || + (inner >= map->inner && inner <= map->inner + map->count)) + map->count--; + + /* Determine where the splits between lo, mid, and hi will be */ + outer_offset = min(outer > map->outer ? outer - map->outer : 0, + map->count); + inner_offset = min(inner > map->inner ? inner - map->inner : 0, + map->count); + + /* + * In the worst case, we need three mappings: + * From the bottom of map to either inner or outer + */ + lo.outer = map->outer; + lo.inner = map->inner; + lo.count = min(inner_offset, outer_offset); + + /* From the lower of inner or outer to the higher */ + mid.outer = lo.outer + lo.count; + mid.outer += mid.outer == outer; + mid.inner = lo.inner + lo.count; + mid.inner += mid.inner == inner; + mid.count = abs_diff(outer_offset, inner_offset); + + /* And from the higher of inner or outer to the end of the map */ + hi.outer = mid.outer + mid.count; + hi.outer += hi.outer == outer; + hi.inner = mid.inner + mid.count; + hi.inner += hi.inner == inner; + hi.count = map->count - lo.count - mid.count; + + push_ul(inner); + push_ul(outer); + push_str("1"); + /* new[gu]idmap doesn't like zero-length mappings, so skip them */ + if (lo.count) { + push_ul(lo.inner); + push_ul(lo.outer); + push_ul(lo.count); + } + if (mid.count) { + push_ul(mid.inner); + push_ul(mid.outer); + push_ul(mid.count); + } + if (hi.count) { + push_ul(hi.inner); + push_ul(hi.outer); + push_ul(hi.count); + } + push_str(NULL); + execvp(idmapper, argv); + errexec(idmapper); +} + +/** + * map_ids_from_child() - Set up a new uid/gid map + * @fd: The eventfd to wait on + * @mapuser: The user to map the current user to (or -1) + * @usermap: The range of UIDs to map (or %NULL) + * @mapgroup: The group to map the current group to (or -1) + * @groupmap: The range of GIDs to map (or %NULL) + * + * fork_and_wait() for our parent to call sync_with_child() on @fd. Upon + * recieving the go-ahead, use newuidmap and newgidmap to set the uid/gid map + * for our parent's PID. + * + * Return: The pid of the child. + */ +static pid_t map_ids_from_child(int *fd, uid_t mapuser, + struct map_range *usermap, gid_t mapgroup, + struct map_range *groupmap) +{ + pid_t child, pid = 0; + pid_t ppid = getpid(); + + child = fork_and_wait(fd); + if (child) + return child; + + /* Avoid forking more than we need to */ + if (usermap && groupmap) { + pid = fork(); + if (pid < 0) + err(EXIT_FAILURE, _("fork failed")); + if (pid) + waitchild(pid); + } + + if (!pid && usermap) + map_ids("newuidmap", ppid, geteuid(), mapuser, usermap); + if (groupmap) + map_ids("newgidmap", ppid, getegid(), mapgroup, groupmap); + exit(EXIT_SUCCESS); +} + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + + fputs(USAGE_HEADER, out); + fprintf(out, _(" %s [options] [<program> [<argument>...]]\n"), + program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Run a program with some namespaces unshared from the parent.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -m, --mount[=<file>] unshare mounts namespace\n"), out); + fputs(_(" -u, --uts[=<file>] unshare UTS namespace (hostname etc)\n"), out); + fputs(_(" -i, --ipc[=<file>] unshare System V IPC namespace\n"), out); + fputs(_(" -n, --net[=<file>] unshare network namespace\n"), out); + fputs(_(" -p, --pid[=<file>] unshare pid namespace\n"), out); + fputs(_(" -U, --user[=<file>] unshare user namespace\n"), out); + fputs(_(" -C, --cgroup[=<file>] unshare cgroup namespace\n"), out); + fputs(_(" -T, --time[=<file>] unshare time namespace\n"), out); + fputs(USAGE_SEPARATOR, out); + fputs(_(" -f, --fork fork before launching <program>\n"), out); + fputs(_(" --map-user=<uid>|<name> map current user to uid (implies --user)\n"), out); + fputs(_(" --map-group=<gid>|<name> map current group to gid (implies --user)\n"), out); + fputs(_(" -r, --map-root-user map current user to root (implies --user)\n"), out); + fputs(_(" -c, --map-current-user map current user to itself (implies --user)\n"), out); + fputs(_(" --map-auto map users and groups automatically (implies --user)\n"), out); + fputs(_(" --map-users=<outeruid>,<inneruid>,<count>\n" + " map count users from outeruid to inneruid (implies --user)\n"), out); + fputs(_(" --map-groups=<outergid>,<innergid>,<count>\n" + " map count groups from outergid to innergid (implies --user)\n"), out); + fputs(USAGE_SEPARATOR, out); + fputs(_(" --kill-child[=<signame>] when dying, kill the forked child (implies --fork)\n" + " defaults to SIGKILL\n"), out); + fputs(_(" --mount-proc[=<dir>] mount proc filesystem first (implies --mount)\n"), out); + fputs(_(" --propagation slave|shared|private|unchanged\n" + " modify mount propagation in mount namespace\n"), out); + fputs(_(" --setgroups allow|deny control the setgroups syscall in user namespaces\n"), out); + fputs(_(" --keep-caps retain capabilities granted in user namespaces\n"), out); + fputs(USAGE_SEPARATOR, out); + fputs(_(" -R, --root=<dir> run the command with root directory set to <dir>\n"), out); + fputs(_(" -w, --wd=<dir> change working directory to <dir>\n"), out); + fputs(_(" -S, --setuid <uid> set uid in entered namespace\n"), out); + fputs(_(" -G, --setgid <gid> set gid in entered namespace\n"), out); + fputs(_(" --monotonic <offset> set clock monotonic offset (seconds) in time namespaces\n"), out); + fputs(_(" --boottime <offset> set clock boottime offset (seconds) in time namespaces\n"), out); + + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(27)); + printf(USAGE_MAN_TAIL("unshare(1)")); + + exit(EXIT_SUCCESS); +} + +int main(int argc, char *argv[]) +{ + enum { + OPT_MOUNTPROC = CHAR_MAX + 1, + OPT_PROPAGATION, + OPT_SETGROUPS, + OPT_KILLCHILD, + OPT_KEEPCAPS, + OPT_MONOTONIC, + OPT_BOOTTIME, + OPT_MAPUSER, + OPT_MAPUSERS, + OPT_MAPGROUP, + OPT_MAPGROUPS, + OPT_MAPAUTO, + }; + static const struct option longopts[] = { + { "help", no_argument, NULL, 'h' }, + { "version", no_argument, NULL, 'V' }, + + { "mount", optional_argument, NULL, 'm' }, + { "uts", optional_argument, NULL, 'u' }, + { "ipc", optional_argument, NULL, 'i' }, + { "net", optional_argument, NULL, 'n' }, + { "pid", optional_argument, NULL, 'p' }, + { "user", optional_argument, NULL, 'U' }, + { "cgroup", optional_argument, NULL, 'C' }, + { "time", optional_argument, NULL, 'T' }, + + { "fork", no_argument, NULL, 'f' }, + { "kill-child", optional_argument, NULL, OPT_KILLCHILD }, + { "mount-proc", optional_argument, NULL, OPT_MOUNTPROC }, + { "map-user", required_argument, NULL, OPT_MAPUSER }, + { "map-users", required_argument, NULL, OPT_MAPUSERS }, + { "map-group", required_argument, NULL, OPT_MAPGROUP }, + { "map-groups", required_argument, NULL, OPT_MAPGROUPS }, + { "map-root-user", no_argument, NULL, 'r' }, + { "map-current-user", no_argument, NULL, 'c' }, + { "map-auto", no_argument, NULL, OPT_MAPAUTO }, + { "propagation", required_argument, NULL, OPT_PROPAGATION }, + { "setgroups", required_argument, NULL, OPT_SETGROUPS }, + { "keep-caps", no_argument, NULL, OPT_KEEPCAPS }, + { "setuid", required_argument, NULL, 'S' }, + { "setgid", required_argument, NULL, 'G' }, + { "root", required_argument, NULL, 'R' }, + { "wd", required_argument, NULL, 'w' }, + { "monotonic", required_argument, NULL, OPT_MONOTONIC }, + { "boottime", required_argument, NULL, OPT_BOOTTIME }, + { NULL, 0, NULL, 0 } + }; + + int setgrpcmd = SETGROUPS_NONE; + int unshare_flags = 0; + int c, forkit = 0; + uid_t mapuser = -1; + gid_t mapgroup = -1; + struct map_range *usermap = NULL; + struct map_range *groupmap = NULL; + int kill_child_signo = 0; /* 0 means --kill-child was not used */ + const char *procmnt = NULL; + const char *newroot = NULL; + const char *newdir = NULL; + pid_t pid_bind = 0, pid_idmap = 0; + pid_t pid = 0; + int fd_idmap, fd_bind = -1; + sigset_t sigset, oldsigset; + int status; + unsigned long propagation = UNSHARE_PROPAGATION_DEFAULT; + int force_uid = 0, force_gid = 0; + uid_t uid = 0, real_euid = geteuid(); + gid_t gid = 0, real_egid = getegid(); + int keepcaps = 0; + time_t monotonic = 0; + time_t boottime = 0; + int force_monotonic = 0; + int force_boottime = 0; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + while ((c = getopt_long(argc, argv, "+fhVmuinpCTUrR:w:S:G:c", longopts, NULL)) != -1) { + switch (c) { + case 'f': + forkit = 1; + break; + case 'm': + unshare_flags |= CLONE_NEWNS; + if (optarg) + set_ns_target(CLONE_NEWNS, optarg); + break; + case 'u': + unshare_flags |= CLONE_NEWUTS; + if (optarg) + set_ns_target(CLONE_NEWUTS, optarg); + break; + case 'i': + unshare_flags |= CLONE_NEWIPC; + if (optarg) + set_ns_target(CLONE_NEWIPC, optarg); + break; + case 'n': + unshare_flags |= CLONE_NEWNET; + if (optarg) + set_ns_target(CLONE_NEWNET, optarg); + break; + case 'p': + unshare_flags |= CLONE_NEWPID; + if (optarg) + set_ns_target(CLONE_NEWPID, optarg); + break; + case 'U': + unshare_flags |= CLONE_NEWUSER; + if (optarg) + set_ns_target(CLONE_NEWUSER, optarg); + break; + case 'C': + unshare_flags |= CLONE_NEWCGROUP; + if (optarg) + set_ns_target(CLONE_NEWCGROUP, optarg); + break; + case 'T': + unshare_flags |= CLONE_NEWTIME; + if (optarg) + set_ns_target(CLONE_NEWTIME, optarg); + break; + case OPT_MOUNTPROC: + unshare_flags |= CLONE_NEWNS; + procmnt = optarg ? optarg : "/proc"; + break; + case OPT_MAPUSER: + unshare_flags |= CLONE_NEWUSER; + mapuser = get_user(optarg, _("failed to parse uid")); + break; + case OPT_MAPGROUP: + unshare_flags |= CLONE_NEWUSER; + mapgroup = get_group(optarg, _("failed to parse gid")); + break; + case 'r': + unshare_flags |= CLONE_NEWUSER; + mapuser = 0; + mapgroup = 0; + break; + case 'c': + unshare_flags |= CLONE_NEWUSER; + mapuser = real_euid; + mapgroup = real_egid; + break; + case OPT_MAPUSERS: + unshare_flags |= CLONE_NEWUSER; + if (!strcmp(optarg, "auto")) + usermap = read_subid_range(_PATH_SUBUID, real_euid); + else + usermap = get_map_range(optarg); + break; + case OPT_MAPGROUPS: + unshare_flags |= CLONE_NEWUSER; + if (!strcmp(optarg, "auto")) + groupmap = read_subid_range(_PATH_SUBGID, real_euid); + else + groupmap = get_map_range(optarg); + break; + case OPT_MAPAUTO: + unshare_flags |= CLONE_NEWUSER; + usermap = read_subid_range(_PATH_SUBUID, real_euid); + groupmap = read_subid_range(_PATH_SUBGID, real_euid); + break; + case OPT_SETGROUPS: + setgrpcmd = setgroups_str2id(optarg); + break; + case OPT_PROPAGATION: + propagation = parse_propagation(optarg); + break; + case OPT_KILLCHILD: + forkit = 1; + if (optarg) { + if ((kill_child_signo = signame_to_signum(optarg)) < 0) + errx(EXIT_FAILURE, _("unknown signal: %s"), + optarg); + } else { + kill_child_signo = SIGKILL; + } + break; + case OPT_KEEPCAPS: + keepcaps = 1; + cap_last_cap(); /* Force last cap to be cached before we fork. */ + break; + case 'S': + uid = strtoul_or_err(optarg, _("failed to parse uid")); + force_uid = 1; + break; + case 'G': + gid = strtoul_or_err(optarg, _("failed to parse gid")); + force_gid = 1; + break; + case 'R': + newroot = optarg; + break; + case 'w': + newdir = optarg; + break; + case OPT_MONOTONIC: + monotonic = strtoul_or_err(optarg, _("failed to parse monotonic offset")); + force_monotonic = 1; + break; + case OPT_BOOTTIME: + boottime = strtoul_or_err(optarg, _("failed to parse boottime offset")); + force_boottime = 1; + break; + + case 'h': + usage(); + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if ((force_monotonic || force_boottime) && !(unshare_flags & CLONE_NEWTIME)) + errx(EXIT_FAILURE, _("options --monotonic and --boottime require " + "unsharing of a time namespace (-t)")); + + /* clear any inherited settings */ + signal(SIGCHLD, SIG_DFL); + + if (npersists && (unshare_flags & CLONE_NEWNS)) + pid_bind = bind_ns_files_from_child(&fd_bind); + + if (usermap || groupmap) + pid_idmap = map_ids_from_child(&fd_idmap, mapuser, usermap, + mapgroup, groupmap); + + if (-1 == unshare(unshare_flags)) + err(EXIT_FAILURE, _("unshare failed")); + + /* Tell child we've called unshare() */ + if (usermap || groupmap) + sync_with_child(pid_idmap, fd_idmap); + + if (force_boottime) + settime(boottime, CLOCK_BOOTTIME); + + if (force_monotonic) + settime(monotonic, CLOCK_MONOTONIC); + + if (forkit) { + if (sigemptyset(&sigset) != 0 || + sigaddset(&sigset, SIGINT) != 0 || + sigaddset(&sigset, SIGTERM) != 0 || + sigprocmask(SIG_BLOCK, &sigset, &oldsigset) != 0) + err(EXIT_FAILURE, _("sigprocmask block failed")); + + /* force child forking before mountspace binding + * so pid_for_children is populated */ + pid = fork(); + + switch(pid) { + case -1: + err(EXIT_FAILURE, _("fork failed")); + case 0: /* child */ + if (sigprocmask(SIG_SETMASK, &oldsigset, NULL)) + err(EXIT_FAILURE, + _("sigprocmask restore failed")); + if (npersists && (unshare_flags & CLONE_NEWNS)) + close(fd_bind); + break; + default: /* parent */ + break; + } + } + + if (npersists && (pid || !forkit)) { + /* run in parent */ + if (pid_bind && (unshare_flags & CLONE_NEWNS)) + sync_with_child(pid_bind, fd_bind); + else + /* simple way, just bind */ + bind_ns_files(getpid()); + } + + if (pid) { + if (waitpid(pid, &status, 0) == -1) + err(EXIT_FAILURE, _("waitpid failed")); + + if (WIFEXITED(status)) + return WEXITSTATUS(status); + if (WIFSIGNALED(status)) { + + /* Ensure the signal that terminated the child will + * also terminate the parent. */ + + int termsig = WTERMSIG(status); + + if (signal(termsig, SIG_DFL) == SIG_ERR || + sigemptyset(&sigset) != 0 || + sigaddset(&sigset, termsig) != 0 || + sigprocmask(SIG_UNBLOCK, &sigset, NULL) != 0) + err(EXIT_FAILURE, + _("sigprocmask unblock failed")); + + kill(getpid(), termsig); + } + err(EXIT_FAILURE, _("child exit failed")); + } + + if (kill_child_signo != 0 && prctl(PR_SET_PDEATHSIG, kill_child_signo) < 0) + err(EXIT_FAILURE, "prctl failed"); + + if (mapuser != (uid_t) -1 && !usermap) + map_id(_PATH_PROC_UIDMAP, mapuser, real_euid); + + /* Since Linux 3.19 unprivileged writing of /proc/self/gid_map + * has been disabled unless /proc/self/setgroups is written + * first to permanently disable the ability to call setgroups + * in that user namespace. */ + if (mapgroup != (gid_t) -1 && !groupmap) { + if (setgrpcmd == SETGROUPS_ALLOW) + errx(EXIT_FAILURE, _("options --setgroups=allow and " + "--map-group are mutually exclusive")); + setgroups_control(SETGROUPS_DENY); + map_id(_PATH_PROC_GIDMAP, mapgroup, real_egid); + } + + if (setgrpcmd != SETGROUPS_NONE) + setgroups_control(setgrpcmd); + + if ((unshare_flags & CLONE_NEWNS) && propagation) + set_propagation(propagation); + + if (newroot) { + if (chroot(newroot) != 0) + err(EXIT_FAILURE, + _("cannot change root directory to '%s'"), newroot); + newdir = newdir ?: "/"; + } + if (newdir && chdir(newdir)) + err(EXIT_FAILURE, _("cannot chdir to '%s'"), newdir); + + if (procmnt) { + /* When not changing root and using the default propagation flags + then the recursive propagation change of root will + automatically change that of an existing proc mount. */ + if (!newroot && propagation != (MS_PRIVATE|MS_REC)) { + int rc = mount("none", procmnt, NULL, MS_PRIVATE|MS_REC, NULL); + + /* Custom procmnt means that proc is very likely not mounted, causing EINVAL. + Ignoring the error in this specific instance is considered safe. */ + if(rc != 0 && errno != EINVAL) + err(EXIT_FAILURE, _("cannot change %s filesystem propagation"), procmnt); + } + + if (mount("proc", procmnt, "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL) != 0) + err(EXIT_FAILURE, _("mount %s failed"), procmnt); + } + + if (force_gid) { + if (setgroups(0, NULL) != 0) /* drop supplementary groups */ + err(EXIT_FAILURE, _("setgroups failed")); + if (setgid(gid) < 0) /* change GID */ + err(EXIT_FAILURE, _("setgid failed")); + } + if (force_uid && setuid(uid) < 0) /* change UID */ + err(EXIT_FAILURE, _("setuid failed")); + + /* We use capabilities system calls to propagate the permitted + * capabilities into the ambient set because we have already + * forked so are in async-signal-safe context. */ + if (keepcaps && (unshare_flags & CLONE_NEWUSER)) { + struct __user_cap_header_struct header = { + .version = _LINUX_CAPABILITY_VERSION_3, + .pid = 0, + }; + + struct __user_cap_data_struct payload[_LINUX_CAPABILITY_U32S_3] = {{ 0 }}; + uint64_t effective, cap; + + if (capget(&header, payload) < 0) + err(EXIT_FAILURE, _("capget failed")); + + /* In order the make capabilities ambient, we first need to ensure + * that they are all inheritable. */ + payload[0].inheritable = payload[0].permitted; + payload[1].inheritable = payload[1].permitted; + + if (capset(&header, payload) < 0) + err(EXIT_FAILURE, _("capset failed")); + + effective = ((uint64_t)payload[1].effective << 32) | (uint64_t)payload[0].effective; + + for (cap = 0; cap < (sizeof(effective) * 8); cap++) { + /* This is the same check as cap_valid(), but using + * the runtime value for the last valid cap. */ + if (cap > (uint64_t) cap_last_cap()) + continue; + + if ((effective & (1 << cap)) + && prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, cap, 0, 0) < 0) + err(EXIT_FAILURE, _("prctl(PR_CAP_AMBIENT) failed")); + } + } + + if (optind < argc) { + execvp(argv[optind], argv + optind); + errexec(argv[optind]); + } + exec_shell(); +} diff --git a/sys-utils/wdctl.8 b/sys-utils/wdctl.8 new file mode 100644 index 0000000..8566907 --- /dev/null +++ b/sys-utils/wdctl.8 @@ -0,0 +1,127 @@ +'\" t +.\" Title: wdctl +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: System Administration +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "WDCTL" "8" "2022-05-11" "util\-linux 2.38.1" "System Administration" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +wdctl \- show hardware watchdog status +.SH "SYNOPSIS" +.sp +\fBwdctl\fP [options] [\fIdevice\fP...] +.SH "DESCRIPTION" +.sp +Show hardware watchdog status. The default device is \fI/dev/watchdog\fP. If more than one device is specified then the output is separated by one blank line. +.sp +If the device is already used or user has no permissions to read from the device, then \fBwdctl\fP reads data from sysfs. In this case information about supported features (flags) might be missing. +.sp +Note that the number of supported watchdog features is hardware specific. +.SH "OPTIONS" +.sp +\fB\-f\fP, \fB\-\-flags\fP \fIlist\fP +.RS 4 +Print only the specified flags. +.RE +.sp +\fB\-F\fP, \fB\-\-noflags\fP +.RS 4 +Do not print information about flags. +.RE +.sp +\fB\-I\fP, \fB\-\-noident\fP +.RS 4 +Do not print watchdog identity information. +.RE +.sp +\fB\-n\fP, \fB\-\-noheadings\fP +.RS 4 +Do not print a header line for flags table. +.RE +.sp +\fB\-o\fP, \fB\-\-output\fP \fIlist\fP +.RS 4 +Define the output columns to use in table of watchdog flags. If no output arrangement is specified, then a default set is used. Use \fB\-\-help\fP to get list of all supported columns. +.RE +.sp +\fB\-O\fP, \fB\-\-oneline\fP +.RS 4 +Print all wanted information on one line in key="value" output format. +.RE +.sp +\fB\-p\fP, \fB\-\-setpretimeout\fP \fIseconds\fP +.RS 4 +Set the watchdog pre\-timeout in seconds. A watchdog pre\-timeout is a +notification generated by the watchdog before the watchdog reset might occur in +the event the watchdog has not been serviced. This notification is handled by +the kernel and can be configured to take an action using sysfs or by \fB\-\-setpregovernor\fP. +.RE +.sp +\fB\-g\fP, \fB\-\-setpregovernor\fP \fIgovernor\fP +.RS 4 +Set pre\-timeout governor name. For available governors see default \fBwdctl\fP output. +.RE +.sp +\fB\-r\fP, \fB\-\-raw\fP +.RS 4 +Use the raw output format. +.RE +.sp +\fB\-s\fP, \fB\-\-settimeout\fP \fIseconds\fP +.RS 4 +Set the watchdog timeout in seconds. +.RE +.sp +\fB\-T\fP, \fB\-\-notimeouts\fP +.RS 4 +Do not print watchdog timeouts. +.RE +.sp +\fB\-x\fP, \fB\-\-flags\-only\fP +.RS 4 +Same as \fB\-I \-T\fP. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "AUTHORS" +.sp +.MTO "kzak\(atredhat.com" "Karel Zak" "," +.MTO "lennart\(atpoettering.net" "Lennart Poettering" "" +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBwdctl\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/wdctl.8.adoc b/sys-utils/wdctl.8.adoc new file mode 100644 index 0000000..6eb44e5 --- /dev/null +++ b/sys-utils/wdctl.8.adoc @@ -0,0 +1,79 @@ +//po4a: entry man manual += wdctl(8) +:doctype: manpage +:man manual: System Administration +:man source: util-linux {release-version} +:page-layout: base +:command: wdctl + +== NAME + +wdctl - show hardware watchdog status + +== SYNOPSIS + +*wdctl* [options] [_device_...] + +== DESCRIPTION + +Show hardware watchdog status. The default device is _/dev/watchdog_. If more than one device is specified then the output is separated by one blank line. + +If the device is already used or user has no permissions to read from the device, then *wdctl* reads data from sysfs. In this case information about supported features (flags) might be missing. + +Note that the number of supported watchdog features is hardware specific. + +== OPTIONS + +*-f*, *--flags* _list_:: +Print only the specified flags. + +*-F*, *--noflags*:: +Do not print information about flags. + +*-I*, *--noident*:: +Do not print watchdog identity information. + +*-n*, *--noheadings*:: +Do not print a header line for flags table. + +*-o*, *--output* _list_:: +Define the output columns to use in table of watchdog flags. If no output arrangement is specified, then a default set is used. Use *--help* to get list of all supported columns. + +*-O*, *--oneline*:: +Print all wanted information on one line in key="value" output format. + +*-p*, *--setpretimeout* _seconds_:: +Set the watchdog pre-timeout in seconds. A watchdog pre-timeout is a +notification generated by the watchdog before the watchdog reset might occur in +the event the watchdog has not been serviced. This notification is handled by +the kernel and can be configured to take an action using sysfs or by **--setpregovernor**. + +*-g*, *--setpregovernor* _governor_:: +Set pre-timeout governor name. For available governors see default **wdctl** output. + +*-r*, *--raw*:: +Use the raw output format. + +*-s*, *--settimeout* _seconds_:: +Set the watchdog timeout in seconds. + +*-T*, *--notimeouts*:: +Do not print watchdog timeouts. + +*-x*, *--flags-only*:: +Same as *-I -T*. + +include::man-common/help-version.adoc[] + +== AUTHORS + +mailto:kzak@redhat.com[Karel Zak], +mailto:lennart@poettering.net[Lennart Poettering] + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/wdctl.c b/sys-utils/wdctl.c new file mode 100644 index 0000000..a22a59a --- /dev/null +++ b/sys-utils/wdctl.c @@ -0,0 +1,851 @@ +/* + * wdctl(8) - show hardware watchdog status + * + * Copyright (C) 2012 Lennart Poettering + * Copyright (C) 2012 Karel Zak <kzak@redhat.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2, or (at your option) any + * later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ +#include <sys/ioctl.h> +#include <getopt.h> +#include <stdio.h> +#include <signal.h> +#include <assert.h> +#include <linux/watchdog.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> + +#include <libsmartcols.h> + +#include "nls.h" +#include "c.h" +#include "xalloc.h" +#include "closestream.h" +#include "optutils.h" +#include "pathnames.h" +#include "strutils.h" +#include "carefulputc.h" +#include "path.h" +#include "strv.h" + +/* + * since 2.6.18 + */ +#ifndef WDIOC_SETPRETIMEOUT +# define WDIOC_SETPRETIMEOUT _IOWR(WATCHDOG_IOCTL_BASE, 8, int) +# define WDIOC_GETPRETIMEOUT _IOR(WATCHDOG_IOCTL_BASE, 9, int) +# define WDIOC_GETTIMELEFT _IOR(WATCHDOG_IOCTL_BASE, 10, int) +# define WDIOF_POWEROVER 0x0040 /* Power over voltage */ +# define WDIOF_SETTIMEOUT 0x0080 /* Set timeout (in seconds) */ +# define WDIOF_MAGICCLOSE 0x0100 /* Supports magic close char */ +# define WDIOF_PRETIMEOUT 0x0200 /* Pretimeout (in seconds), get/set */ +# define WDIOF_KEEPALIVEPING 0x8000 /* Keep alive ping reply */ +#endif + +/* + * since 3.5 + */ +#ifndef WDIOF_ALARMONLY +# define WDIOF_ALARMONLY 0x0400 /* Watchdog triggers a management or + other external alarm not a reboot */ +#endif + +struct wdflag { + uint32_t flag; + const char *name; + const char *description; +}; + +static const struct wdflag wdflags[] = { + { WDIOF_CARDRESET, "CARDRESET", N_("Card previously reset the CPU") }, + { WDIOF_EXTERN1, "EXTERN1", N_("External relay 1") }, + { WDIOF_EXTERN2, "EXTERN2", N_("External relay 2") }, + { WDIOF_FANFAULT, "FANFAULT", N_("Fan failed") }, + { WDIOF_KEEPALIVEPING, "KEEPALIVEPING", N_("Keep alive ping reply") }, + { WDIOF_MAGICCLOSE, "MAGICCLOSE", N_("Supports magic close char") }, + { WDIOF_OVERHEAT, "OVERHEAT", N_("Reset due to CPU overheat") }, + { WDIOF_POWEROVER, "POWEROVER", N_("Power over voltage") }, + { WDIOF_POWERUNDER, "POWERUNDER", N_("Power bad/power fault") }, + { WDIOF_PRETIMEOUT, "PRETIMEOUT", N_("Pretimeout (in seconds)") }, + { WDIOF_SETTIMEOUT, "SETTIMEOUT", N_("Set timeout (in seconds)") }, + { WDIOF_ALARMONLY, "ALARMONLY", N_("Not trigger reboot") } +}; + + +/* column names */ +struct colinfo { + const char *name; /* header */ + double whint; /* width hint (N < 1 is in percent of termwidth) */ + int flags; /* SCOLS_FL_* */ + const char *help; +}; + +enum { COL_FLAG, COL_DESC, COL_STATUS, COL_BSTATUS, COL_DEVICE }; + +/* columns descriptions */ +static struct colinfo infos[] = { + [COL_FLAG] = { "FLAG", 14, 0, N_("flag name") }, + [COL_DESC] = { "DESCRIPTION", 0.1, SCOLS_FL_TRUNC, N_("flag description") }, + [COL_STATUS] = { "STATUS", 1, SCOLS_FL_RIGHT, N_("flag status") }, + [COL_BSTATUS] = { "BOOT-STATUS", 1, SCOLS_FL_RIGHT, N_("flag boot status") }, + [COL_DEVICE] = { "DEVICE", 0.1, 0, N_("watchdog device name") } + +}; + +static int columns[ARRAY_SIZE(infos) * 2]; +static int ncolumns; + +struct wd_device { + const char *devpath; + struct path_cxt *sysfs; + + char *governor; + char **available_governors; + + int timeout; + int timeleft; + int pretimeout; + + uint32_t status; + uint32_t bstatus; + int nowayout; + + struct watchdog_info ident; + + unsigned int has_timeout : 1, + has_timeleft : 1, + has_pretimeout : 1, + has_nowayout : 1, + no_sysfs : 1; +}; + +struct wd_control { + /* set */ + int timeout; /* --settimeout */ + int pretimeout; /* --setpretimeout */ + const char *governor; /* --setpregovernor */ + unsigned int set_timeout : 1, + set_pretimeout : 1; + + /* output */ + unsigned int show_oneline : 1, + show_raw : 1, + hide_headings : 1, + hide_flags : 1, + hide_ident : 1, + hide_timeouts : 1; +}; + +#define want_set(_ctl) ((_ctl)->set_timeout \ + || (_ctl)->set_pretimeout \ + || (_ctl)->governor) + +/* converts flag name to flag bit */ +static long name2bit(const char *name, size_t namesz) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(wdflags); i++) { + const char *cn = wdflags[i].name; + if (!strncasecmp(name, cn, namesz) && !*(cn + namesz)) + return wdflags[i].flag; + } + warnx(_("unknown flag: %s"), name); + return -1; +} + +static int column2id(const char *name, size_t namesz) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(infos); i++) { + const char *cn = infos[i].name; + if (!strncasecmp(name, cn, namesz) && !*(cn + namesz)) + return i; + } + warnx(_("unknown column: %s"), name); + return -1; +} + +static int get_column_id(int num) +{ + assert(num < ncolumns); + assert(columns[num] < (int) ARRAY_SIZE(infos)); + + return columns[num]; +} + +static struct colinfo *get_column_info(unsigned num) +{ + return &infos[ get_column_id(num) ]; +} + +/* We preffer cdev /dev/watchdog0 as this device has node in + * /sys/class/watchdog/. The old miscdev /dev/watchdog is fallback for old + * systemds only. + */ +static const char *get_default_device(void) +{ + const char **p; + static const char *devs[] = { + "/dev/watchdog0", + "/dev/watchdog", + NULL + }; + + for (p = devs; *p; p++) { + if (access(*p, F_OK) == 0) + return *p; + } + + return NULL; +} + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + size_t i; + const char *dflt = get_default_device(); + + fputs(USAGE_HEADER, out); + fprintf(out, + _(" %s [options] [<device> ...]\n"), program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Show the status of the hardware watchdog.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -f, --flags <list> print selected flags only\n" + " -F, --noflags don't print information about flags\n" + " -I, --noident don't print watchdog identity information\n" + " -n, --noheadings don't print headings for flags table\n" + " -O, --oneline print all information on one line\n" + " -o, --output <list> output columns of the flags\n" + " -p, --setpretimeout <sec> set watchdog pre-timeout\n" + " -g, --setpregovernor <name> set pre-timeout governor\n" + " -r, --raw use raw output format for flags table\n" + " -T, --notimeouts don't print watchdog timeouts\n" + " -s, --settimeout <sec> set watchdog timeout\n" + " -x, --flags-only print only flags table (same as -I -T)\n"), out); + + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(24)); + fputs(USAGE_SEPARATOR, out); + + if (dflt) + fprintf(out, _("The default device is %s.\n"), dflt); + else + fprintf(out, _("No default device is available.\n")); + + fputs(USAGE_COLUMNS, out); + for (i = 0; i < ARRAY_SIZE(infos); i++) + fprintf(out, " %13s %s\n", infos[i].name, _(infos[i].help)); + + printf(USAGE_MAN_TAIL("wdctl(8)")); + + exit(EXIT_SUCCESS); +} + +static struct path_cxt *get_sysfs(struct wd_device *wd) +{ + struct path_cxt *sys; + struct stat st; + + if (wd->no_sysfs) + return NULL; + if (wd->sysfs) + return wd->sysfs; + if (stat(wd->devpath, &st) != 0) + goto nosysfs; + + sys = ul_new_path(_PATH_SYS_DEVCHAR "/%u:%u", + major(st.st_rdev), minor(st.st_rdev)); + if (!sys) + return NULL; + + if (ul_path_get_dirfd(sys) < 0) + goto nosysfs; /* device not in /sys */ + + if (ul_path_access(sys, F_OK, "identity") != 0) + goto nosysfs; /* no info in /sys (old miscdev?) */ + + wd->sysfs = sys; + return sys; +nosysfs: + wd->no_sysfs = 1; + return NULL; +} + +static void add_flag_line(struct libscols_table *table, struct wd_device *wd, const struct wdflag *fl) +{ + int i; + struct libscols_line *line; + + line = scols_table_new_line(table, NULL); + if (!line) { + warn(_("failed to allocate output line")); + return; + } + + for (i = 0; i < ncolumns; i++) { + const char *str = NULL; + + switch (get_column_id(i)) { + case COL_FLAG: + str = fl->name; + break; + case COL_DESC: + str = fl->description; + break; + case COL_STATUS: + str = wd->status & fl->flag ? "1" : "0"; + break; + case COL_BSTATUS: + str = wd->bstatus & fl->flag ? "1" : "0"; + break; + case COL_DEVICE: + str = wd->devpath; + break; + default: + break; + } + + if (str && scols_line_set_data(line, i, str)) { + warn(_("failed to add output data")); + break; + } + } +} + +static int show_flags(struct wd_control *ctl, struct wd_device *wd, uint32_t wanted) +{ + size_t i; + int rc = -1; + struct libscols_table *table; + uint32_t flags; + + /* information about supported bits is probably missing in /sys */ + if (!wd->ident.options) + return 0; + + scols_init_debug(0); + + /* create output table */ + table = scols_new_table(); + if (!table) { + warn(_("failed to allocate output table")); + return -1; + } + scols_table_enable_raw(table, ctl->show_raw); + scols_table_enable_noheadings(table, ctl->hide_headings); + + /* define columns */ + for (i = 0; i < (size_t) ncolumns; i++) { + struct colinfo *col = get_column_info(i); + + if (!scols_table_new_column(table, col->name, col->whint, col->flags)) { + warnx(_("failed to allocate output column")); + goto done; + } + } + + /* fill-in table with data + * -- one line for each supported flag (option) */ + flags = wd->ident.options; + + for (i = 0; i < ARRAY_SIZE(wdflags); i++) { + if (wanted && !(wanted & wdflags[i].flag)) + ; /* ignore */ + else if (flags & wdflags[i].flag) + add_flag_line(table, wd, &wdflags[i]); + + flags &= ~wdflags[i].flag; + } + + if (flags) + warnx(_("%s: unknown flags 0x%x\n"), wd->devpath, flags); + + scols_print_table(table); + rc = 0; +done: + scols_unref_table(table); + return rc; +} + +/* + * Warning: successfully opened watchdog has to be properly closed with magic + * close character otherwise the machine will be rebooted! + * + * Don't use err() or exit() here! + */ +static int set_watchdog(struct wd_control *ctl, struct wd_device *wd) +{ + int fd; + sigset_t sigs, oldsigs; + int rc = 0; + + assert(wd); + assert(wd->devpath); + assert(ctl); + + if (!ctl->set_timeout && !ctl->set_timeout) + goto sysfs_only; + + sigemptyset(&oldsigs); + sigfillset(&sigs); + sigprocmask(SIG_BLOCK, &sigs, &oldsigs); + + fd = open(wd->devpath, O_WRONLY|O_CLOEXEC); + + if (fd < 0) { + if (errno == EBUSY) + warnx(_("%s: watchdog already in use, terminating."), + wd->devpath); + warn(_("cannot open %s"), wd->devpath); + return -1; + } + + for (;;) { + /* We just opened this to query the state, not to arm + * it hence use the magic close character */ + static const char v = 'V'; + + if (write(fd, &v, 1) >= 0) + break; + if (errno != EINTR) { + warn(_("%s: failed to disarm watchdog"), wd->devpath); + break; + } + /* Let's try hard, since if we don't get this right + * the machine might end up rebooting. */ + } + + if (ctl->set_timeout) { + if (ioctl(fd, WDIOC_SETTIMEOUT, &ctl->timeout) != 0) { + rc += errno; + warn(_("cannot set timeout for %s"), wd->devpath); + } else + printf(P_("Timeout has been set to %d second.\n", + "Timeout has been set to %d seconds.\n", + ctl->timeout), ctl->timeout); + } + + if (ctl->set_pretimeout) { + if (ioctl(fd, WDIOC_SETPRETIMEOUT, &ctl->pretimeout) != 0) { + rc += errno; + warn(_("cannot set pretimeout for %s"), wd->devpath); + } else + printf(P_("Pre-timeout has been set to %d second.\n", + "Pre-timeout has been set to %d seconds.\n", + ctl->pretimeout), ctl->pretimeout); + } + + if (close(fd)) + warn(_("write failed")); + + sigprocmask(SIG_SETMASK, &oldsigs, NULL); + +sysfs_only: + if (ctl->governor) { + struct path_cxt *sys = get_sysfs(wd); + int xrc; + + xrc = !sys ? errno : + ul_path_write_string(sys, ctl->governor, + "pretimeout_governor"); + if (xrc) + warn(_("cannot set pre-timeout governor")); + rc += xrc; + } + + return rc; +} + +/* + * Warning: successfully opened watchdog has to be properly closed with magic + * close character otherwise the machine will be rebooted! + * + * Don't use err() or exit() here! + */ +static int read_watchdog_from_device(struct wd_device *wd) +{ + int fd; + sigset_t sigs, oldsigs; + + assert(wd->devpath); + + sigemptyset(&oldsigs); + sigfillset(&sigs); + sigprocmask(SIG_BLOCK, &sigs, &oldsigs); + + fd = open(wd->devpath, O_WRONLY|O_CLOEXEC); + + if (fd < 0) + return -errno; + + if (ioctl(fd, WDIOC_GETSUPPORT, &wd->ident) < 0) + warn(_("%s: failed to get information about watchdog"), wd->devpath); + else { + ioctl(fd, WDIOC_GETSTATUS, &wd->status); + ioctl(fd, WDIOC_GETBOOTSTATUS, &wd->bstatus); + + /* + * Sometimes supported options like WDIOF_CARDRESET are missing from + * ident.options, add anything set in status/bstatus to ident.options. + */ + wd->ident.options |= wd->status; + wd->ident.options |= wd->bstatus; + + if (ioctl(fd, WDIOC_GETTIMEOUT, &wd->timeout) >= 0) + wd->has_timeout = 1; + if (ioctl(fd, WDIOC_GETPRETIMEOUT, &wd->pretimeout) >= 0) + wd->has_pretimeout = 1; + if (ioctl(fd, WDIOC_GETTIMELEFT, &wd->timeleft) >= 0) + wd->has_timeleft = 1; + } + + for (;;) { + /* We just opened this to query the state, not to arm + * it hence use the magic close character */ + static const char v = 'V'; + + if (write(fd, &v, 1) >= 0) + break; + if (errno != EINTR) { + warn(_("%s: failed to disarm watchdog"), wd->devpath); + break; + } + /* Let's try hard, since if we don't get this right + * the machine might end up rebooting. */ + } + + if (close(fd)) + warn(_("write failed")); + sigprocmask(SIG_SETMASK, &oldsigs, NULL); + + return 0; +} + + +/* Returns: <0 error, 0 success, 1 unssuported */ +static int read_watchdog_from_sysfs(struct wd_device *wd) +{ + struct path_cxt *sys; + + sys = get_sysfs(wd); + if (!sys) + return 1; + + ul_path_read_buffer(sys, (char *) wd->ident.identity, sizeof(wd->ident.identity), "identity"); + + ul_path_scanf(sys, "status", "%x", &wd->status); + ul_path_read_u32(sys, &wd->bstatus, "bootstatus"); + + if (ul_path_read_s32(sys, &wd->nowayout, "nowayout") == 0) + wd->has_nowayout = 1; + if (ul_path_read_s32(sys, &wd->timeout, "timeout") == 0) + wd->has_timeout = 1; + if (ul_path_read_s32(sys, &wd->pretimeout, "pretimeout") == 0) + wd->has_pretimeout = 1; + if (ul_path_read_s32(sys, &wd->timeleft, "timeleft") == 0) + wd->has_timeleft = 1; + + return 0; +} + +static int read_governors(struct wd_device *wd) +{ + struct path_cxt *sys; + FILE *f; + + sys = get_sysfs(wd); + if (!sys) + return 1; + + f = ul_path_fopen(sys, "r", "pretimeout_available_governors"); + if (f) { + char *line = NULL; + size_t dummy = 0; + ssize_t sz; + + while ((sz = getline(&line, &dummy, f)) >= 0) { + if (rtrim_whitespace((unsigned char *) line) == 0) + continue; + strv_consume(&wd->available_governors, line); + dummy = 0; + line = NULL; + } + free(line); + fclose(f); + } + + ul_path_read_string(sys, &wd->governor, "pretimeout_governor"); + return 0; +} + +static int read_watchdog(struct wd_device *wd) +{ + int rc = read_watchdog_from_device(wd); + + if (rc == -EBUSY || rc == -EACCES || rc == -EPERM) + rc = read_watchdog_from_sysfs(wd); + + if (rc) { + warn(_("cannot read information about %s"), wd->devpath); + return -1; + } + + read_governors(wd); + return 0; +} + +static void show_timeouts(struct wd_device *wd) +{ + if (wd->has_timeout) + printf(P_("%-14s %2i second\n", "%-14s %2i seconds\n", wd->timeout), + _("Timeout:"), wd->timeout); + if (wd->has_timeleft) + printf(P_("%-14s %2i second\n", "%-14s %2i seconds\n", wd->timeleft), + _("Timeleft:"), wd->timeleft); + if (wd->has_pretimeout) + printf(P_("%-14s %2i second\n", "%-14s %2i seconds\n", wd->pretimeout), + _("Pre-timeout:"), wd->pretimeout); +} + +static void show_governors(struct wd_device *wd) +{ + if (wd->governor) + printf(_("%-14s %s\n"), _("Pre-timeout governor:"), wd->governor); + if (wd->available_governors) { + char *tmp = strv_join(wd->available_governors, " "); + + if (tmp) + printf(_("%-14s %s\n"), + _("Available pre-timeout governors:"), tmp); + free(tmp); + } +} + +static void print_oneline(struct wd_control *ctl, struct wd_device *wd, uint32_t wanted) +{ + printf("%s:", wd->devpath); + + if (!ctl->hide_ident) { + printf(" VERSION=\"%x\"", wd->ident.firmware_version); + + printf(" IDENTITY="); + fputs_quoted((char *) wd->ident.identity, stdout); + } + if (!ctl->hide_timeouts) { + if (wd->has_timeout) + printf(" TIMEOUT=\"%i\"", wd->timeout); + if (wd->has_pretimeout) + printf(" PRETIMEOUT=\"%i\"", wd->pretimeout); + if (wd->has_timeleft) + printf(" TIMELEFT=\"%i\"", wd->timeleft); + } + + if (!ctl->hide_flags) { + size_t i; + uint32_t flags = wd->ident.options; + + for (i = 0; i < ARRAY_SIZE(wdflags); i++) { + const struct wdflag *fl; + + if ((wanted && !(wanted & wdflags[i].flag)) || + !(flags & wdflags[i].flag)) + continue; + + fl= &wdflags[i]; + + printf(" %s=\"%s\"", fl->name, + wd->status & fl->flag ? "1" : "0"); + printf(" %s_BOOT=\"%s\"", fl->name, + wd->bstatus & fl->flag ? "1" : "0"); + + } + } + + fputc('\n', stdout); +} + +static void print_device(struct wd_control *ctl, struct wd_device *wd, uint32_t wanted) +{ + /* NAME=value one line output */ + if (ctl->show_oneline) { + print_oneline(ctl, wd, wanted); + return; + } + + /* pretty output */ + if (!ctl->hide_ident) { + printf("%-15s%s\n", _("Device:"), wd->devpath); + printf("%-15s%s [%s %x]\n", + _("Identity:"), + wd->ident.identity, + _("version"), + wd->ident.firmware_version); + } + if (!ctl->hide_timeouts) + show_timeouts(wd); + + show_governors(wd); + + if (!ctl->hide_flags) + show_flags(ctl, wd, wanted); +} + +int main(int argc, char *argv[]) +{ + struct wd_device wd; + struct wd_control ctl = { .hide_headings = 0 }; + int c, res = EXIT_SUCCESS, count = 0; + uint32_t wanted = 0; + const char *dflt_device = NULL; + + static const struct option long_opts[] = { + { "flags", required_argument, NULL, 'f' }, + { "flags-only", no_argument, NULL, 'x' }, + { "help", no_argument, NULL, 'h' }, + { "noflags", no_argument, NULL, 'F' }, + { "noheadings", no_argument, NULL, 'n' }, + { "noident", no_argument, NULL, 'I' }, + { "notimeouts", no_argument, NULL, 'T' }, + { "settimeout", required_argument, NULL, 's' }, + { "setpretimeout", required_argument, NULL, 'p' }, + { "setpregovernor", required_argument, NULL, 'g' }, + { "output", required_argument, NULL, 'o' }, + { "oneline", no_argument, NULL, 'O' }, + { "raw", no_argument, NULL, 'r' }, + { "version", no_argument, NULL, 'V' }, + { NULL, 0, NULL, 0 } + }; + + static const ul_excl_t excl[] = { /* rows and cols in ASCII order */ + { 'F','f' }, /* noflags,flags*/ + { 0 } + }; + int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + while ((c = getopt_long(argc, argv, + "d:f:g:hFnITp:o:s:OrVx", long_opts, NULL)) != -1) { + + err_exclusive_options(c, long_opts, excl, excl_st); + + switch(c) { + case 'o': + ncolumns = string_to_idarray(optarg, + columns, ARRAY_SIZE(columns), + column2id); + if (ncolumns < 0) + return EXIT_FAILURE; + break; + case 's': + ctl.timeout = strtos32_or_err(optarg, _("invalid timeout argument")); + ctl.set_timeout = 1; + break; + case 'p': + ctl.pretimeout = strtos32_or_err(optarg, _("invalid pretimeout argument")); + ctl.set_pretimeout = 1; + break; + case 'f': + if (string_to_bitmask(optarg, (unsigned long *) &wanted, name2bit) != 0) + return EXIT_FAILURE; + break; + case 'F': + ctl.hide_flags = 1; + break; + case 'g': + ctl.governor = optarg; + break; + case 'I': + ctl.hide_ident = 1; + break; + case 'T': + ctl.hide_timeouts = 1; + break; + case 'n': + ctl.hide_headings = 1; + break; + case 'r': + ctl.show_raw = 1; + break; + case 'O': + ctl.show_oneline = 1; + break; + case 'x': + ctl.hide_ident = 1; + ctl.hide_timeouts = 1; + break; + + case 'h': + usage(); + case 'V': + print_version(EXIT_SUCCESS); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if (!ncolumns) { + /* default columns */ + columns[ncolumns++] = COL_FLAG; + columns[ncolumns++] = COL_DESC; + columns[ncolumns++] = COL_STATUS; + columns[ncolumns++] = COL_BSTATUS; + } + + /* Device no specified, use default. */ + if (optind == argc) { + dflt_device = get_default_device(); + if (!dflt_device) + err(EXIT_FAILURE, _("No default device is available.")); + } + + do { + int rc; + + memset(&wd, 0, sizeof(wd)); + wd.devpath = dflt_device ? dflt_device : argv[optind++]; + + if (count) + fputc('\n', stdout); + count++; + + if (want_set(&ctl)) { + rc = set_watchdog(&ctl, &wd); + if (rc) { + res = EXIT_FAILURE; + } + } + + rc = read_watchdog(&wd); + if (rc) { + res = EXIT_FAILURE; + continue; + } + + print_device(&ctl, &wd, wanted); + ul_unref_path(wd.sysfs); + } while (optind < argc); + + return res; +} diff --git a/sys-utils/zramctl.8 b/sys-utils/zramctl.8 new file mode 100644 index 0000000..1d7e53d --- /dev/null +++ b/sys-utils/zramctl.8 @@ -0,0 +1,158 @@ +'\" t +.\" Title: zramctl +.\" Author: [see the "AUTHOR(S)" section] +.\" Generator: Asciidoctor 2.0.15 +.\" Date: 2022-05-11 +.\" Manual: System Administration +.\" Source: util-linux 2.38.1 +.\" Language: English +.\" +.TH "ZRAMCTL" "8" "2022-05-11" "util\-linux 2.38.1" "System Administration" +.ie \n(.g .ds Aq \(aq +.el .ds Aq ' +.ss \n[.ss] 0 +.nh +.ad l +.de URL +\fI\\$2\fP <\\$1>\\$3 +.. +.als MTO URL +.if \n[.g] \{\ +. mso www.tmac +. am URL +. ad l +. . +. am MTO +. ad l +. . +. LINKSTYLE blue R < > +.\} +.SH "NAME" +zramctl \- set up and control zram devices +.SH "SYNOPSIS" +.sp +Get info: +.RS 4 +\fBzramctl\fP [options] +.RE +.sp +Reset zram: +.RS 4 +\fBzramctl\fP \fB\-r\fP \fIzramdev\fP... +.RE +.sp +Print name of first unused zram device: +.RS 4 +\fBzramctl\fP \fB\-f\fP +.RE +.sp +Set up a zram device: +.RS 4 +\fBzramctl\fP [\fB\-f\fP | \fIzramdev\fP] [\fB\-s\fP \fIsize\fP] [\fB\-t\fP \fInumber\fP] [\fB\-a\fP \fIalgorithm\fP] +.RE +.SH "DESCRIPTION" +.sp +\fBzramctl\fP is used to quickly set up zram device parameters, to reset zram devices, and to query the status of used zram devices. +.sp +If no option is given, all non\-zero size zram devices are shown. +.sp +Note that \fIzramdev\fP node specified on command line has to already exist. The command \fBzramctl\fP creates a new \fI/dev/zram<N>\fP nodes only when \fB\-\-find\fP option specified. It\(cqs possible (and common) that after system boot \fI/dev/zram<N>\fP nodes are not created yet. +.SH "OPTIONS" +.sp +\fB\-a\fP, \fB\-\-algorithm lzo\fP|\fBlz4\fP|\fBlz4hc\fP|\fBdeflate\fP|\fB842\fP|\fBzstd\fP +.RS 4 +Set the compression algorithm to be used for compressing data in the zram device. +.RE +.sp +\fB\-f\fP, \fB\-\-find\fP +.RS 4 +Find the first unused zram device. If a \fB\-\-size\fP argument is present, then initialize the device. +.RE +.sp +\fB\-n\fP, \fB\-\-noheadings\fP +.RS 4 +Do not print a header line in status output. +.RE +.sp +\fB\-o\fP, \fB\-\-output\fP \fIlist\fP +.RS 4 +Define the status output columns to be used. If no output arrangement is specified, then a default set is used. Use \fB\-\-help\fP to get a list of all supported columns. +.RE +.sp +\fB\-\-output\-all\fP +.RS 4 +Output all available columns. +.RE +.sp +\fB\-\-raw\fP +.RS 4 +Use the raw format for status output. +.RE +.sp +\fB\-r\fP, \fB\-\-reset\fP +.RS 4 +Reset the options of the specified zram device(s). Zram device settings can be changed only after a reset. +.RE +.sp +\fB\-s\fP, \fB\-\-size\fP \fIsize\fP +.RS 4 +Create a zram device of the specified \fIsize\fP. Zram devices are aligned to memory pages; when the requested \fIsize\fP is not a multiple of the page size, it will be rounded up to the next multiple. When not otherwise specified, the unit of the \fIsize\fP parameter is bytes. +.sp +The \fIsize\fP argument may be followed by the multiplicative suffixes KiB (=1024), MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB" is optional, e.g., "K" has the same meaning as "KiB") or the suffixes KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB and YB. +.RE +.sp +\fB\-t\fP, \fB\-\-streams\fP \fInumber\fP +.RS 4 +Set the maximum number of compression streams that can be used for the device. The default is use all CPUs and one stream for kernels older than 4.6. +.RE +.sp +\fB\-h\fP, \fB\-\-help\fP +.RS 4 +Display help text and exit. +.RE +.sp +\fB\-V\fP, \fB\-\-version\fP +.RS 4 +Print version and exit. +.RE +.SH "EXIT STATUS" +.sp +\fBzramctl\fP returns 0 on success, nonzero on failure. +.SH "FILES" +.sp +\fI/dev/zram[0..N]\fP +.RS 4 +zram block devices +.RE +.SH "EXAMPLE" +.sp +The following commands set up a zram device with a size of one gigabyte and use it as swap device. +.sp +.if n .RS 4 +.nf +.fam C + # zramctl \-\-find \-\-size 1024M + /dev/zram0 + # mkswap /dev/zram0 + # swapon /dev/zram0 + ... + # swapoff /dev/zram0 + # zramctl \-\-reset /dev/zram0 +.fam +.fi +.if n .RE +.SH "AUTHORS" +.sp +.MTO "nefelim4ag\(atgmail.com" "Timofey Titovets" "," +.MTO "kzak\(atredhat.com" "Karel Zak" "" +.SH "SEE ALSO" +.sp +.URL "http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/admin\-guide/blockdev/zram.rst" "Linux kernel documentation" "" +.SH "REPORTING BUGS" +.sp +For bug reports, use the issue tracker at \c +.URL "https://github.com/util\-linux/util\-linux/issues" "" "." +.SH "AVAILABILITY" +.sp +The \fBzramctl\fP command is part of the util\-linux package which can be downloaded from \c +.URL "https://www.kernel.org/pub/linux/utils/util\-linux/" "Linux Kernel Archive" "."
\ No newline at end of file diff --git a/sys-utils/zramctl.8.adoc b/sys-utils/zramctl.8.adoc new file mode 100644 index 0000000..cdfb13d --- /dev/null +++ b/sys-utils/zramctl.8.adoc @@ -0,0 +1,110 @@ +//po4a: entry man manual += zramctl(8) +:doctype: manpage +:man manual: System Administration +:man source: util-linux {release-version} +:page-layout: base +:command: zramctl + +== NAME + +zramctl - set up and control zram devices + +== SYNOPSIS + +Get info: :: + +*zramctl* [options] + +Reset zram: :: + +*zramctl* *-r* _zramdev_... + +Print name of first unused zram device: :: + +*zramctl* *-f* + +Set up a zram device: :: + +*zramctl* [*-f* | _zramdev_] [*-s* _size_] [*-t* _number_] [*-a* _algorithm_] + +== DESCRIPTION + +*zramctl* is used to quickly set up zram device parameters, to reset zram devices, and to query the status of used zram devices. + +If no option is given, all non-zero size zram devices are shown. + +Note that _zramdev_ node specified on command line has to already exist. The command *zramctl* creates a new _/dev/zram<N>_ nodes only when *--find* option specified. It's possible (and common) that after system boot _/dev/zram<N>_ nodes are not created yet. + +== OPTIONS + +*-a*, **--algorithm lzo**|**lz4**|**lz4hc**|**deflate**|**842**|**zstd**:: +Set the compression algorithm to be used for compressing data in the zram device. + +*-f*, *--find*:: +Find the first unused zram device. If a *--size* argument is present, then initialize the device. + +*-n*, *--noheadings*:: +Do not print a header line in status output. + +*-o*, *--output* _list_:: +Define the status output columns to be used. If no output arrangement is specified, then a default set is used. Use *--help* to get a list of all supported columns. + +*--output-all*:: +Output all available columns. + +*--raw*:: +Use the raw format for status output. + +*-r*, *--reset*:: +Reset the options of the specified zram device(s). Zram device settings can be changed only after a reset. + +*-s*, *--size* _size_:: +Create a zram device of the specified _size_. Zram devices are aligned to memory pages; when the requested _size_ is not a multiple of the page size, it will be rounded up to the next multiple. When not otherwise specified, the unit of the _size_ parameter is bytes. ++ +The _size_ argument may be followed by the multiplicative suffixes KiB (=1024), MiB (=1024*1024), and so on for GiB, TiB, PiB, EiB, ZiB and YiB (the "iB" is optional, e.g., "K" has the same meaning as "KiB") or the suffixes KB (=1000), MB (=1000*1000), and so on for GB, TB, PB, EB, ZB and YB. + +*-t*, *--streams* _number_:: +Set the maximum number of compression streams that can be used for the device. The default is use all CPUs and one stream for kernels older than 4.6. + +include::man-common/help-version.adoc[] + +== EXIT STATUS + +*zramctl* returns 0 on success, nonzero on failure. + +== FILES + +_/dev/zram[0..N]_:: +zram block devices + +== EXAMPLE + +The following commands set up a zram device with a size of one gigabyte and use it as swap device. + +.... + # zramctl --find --size 1024M + /dev/zram0 + # mkswap /dev/zram0 + # swapon /dev/zram0 + ... + # swapoff /dev/zram0 + # zramctl --reset /dev/zram0 +.... + +== AUTHORS + +mailto:nefelim4ag@gmail.com[Timofey Titovets], +mailto:kzak@redhat.com[Karel Zak] + +== SEE ALSO + +link:http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/Documentation/admin-guide/blockdev/zram.rst[Linux kernel documentation] + +include::man-common/bugreports.adoc[] + +include::man-common/footer.adoc[] + +ifdef::translation[] +include::man-common/translation.adoc[] +endif::[] diff --git a/sys-utils/zramctl.c b/sys-utils/zramctl.c new file mode 100644 index 0000000..0c321b8 --- /dev/null +++ b/sys-utils/zramctl.c @@ -0,0 +1,771 @@ +/* + * zramctl - control compressed block devices in RAM + * + * Copyright (c) 2014 Timofey Titovets <Nefelim4ag@gmail.com> + * Copyright (C) 2014 Karel Zak <kzak@redhat.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <getopt.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <assert.h> +#include <sys/types.h> +#include <dirent.h> + +#include <libsmartcols.h> + +#include "c.h" +#include "nls.h" +#include "closestream.h" +#include "strutils.h" +#include "xalloc.h" +#include "sysfs.h" +#include "optutils.h" +#include "ismounted.h" +#include "strv.h" +#include "path.h" +#include "pathnames.h" + +/*#define CONFIG_ZRAM_DEBUG*/ + +#ifdef CONFIG_ZRAM_DEBUG +# define DBG(x) do { fputs("zram: ", stderr); x; fputc('\n', stderr); } while(0) +#else +# define DBG(x) +#endif + +/* status output columns */ +struct colinfo { + const char *name; + double whint; + int flags; + const char *help; +}; + +enum { + COL_NAME = 0, + COL_DISKSIZE, + COL_ORIG_SIZE, + COL_COMP_SIZE, + COL_ALGORITHM, + COL_STREAMS, + COL_ZEROPAGES, + COL_MEMTOTAL, + COL_MEMLIMIT, + COL_MEMUSED, + COL_MIGRATED, + COL_MOUNTPOINT +}; + +static const struct colinfo infos[] = { + [COL_NAME] = { "NAME", 0.25, 0, N_("zram device name") }, + [COL_DISKSIZE] = { "DISKSIZE", 5, SCOLS_FL_RIGHT, N_("limit on the uncompressed amount of data") }, + [COL_ORIG_SIZE] = { "DATA", 5, SCOLS_FL_RIGHT, N_("uncompressed size of stored data") }, + [COL_COMP_SIZE] = { "COMPR", 5, SCOLS_FL_RIGHT, N_("compressed size of stored data") }, + [COL_ALGORITHM] = { "ALGORITHM", 3, 0, N_("the selected compression algorithm") }, + [COL_STREAMS] = { "STREAMS", 3, SCOLS_FL_RIGHT, N_("number of concurrent compress operations") }, + [COL_ZEROPAGES] = { "ZERO-PAGES", 3, SCOLS_FL_RIGHT, N_("empty pages with no allocated memory") }, + [COL_MEMTOTAL] = { "TOTAL", 5, SCOLS_FL_RIGHT, N_("all memory including allocator fragmentation and metadata overhead") }, + [COL_MEMLIMIT] = { "MEM-LIMIT", 5, SCOLS_FL_RIGHT, N_("memory limit used to store compressed data") }, + [COL_MEMUSED] = { "MEM-USED", 5, SCOLS_FL_RIGHT, N_("memory zram have been consumed to store compressed data") }, + [COL_MIGRATED] = { "MIGRATED", 5, SCOLS_FL_RIGHT, N_("number of objects migrated by compaction") }, + [COL_MOUNTPOINT]= { "MOUNTPOINT",0.10, SCOLS_FL_TRUNC, N_("where the device is mounted") }, +}; + +static int columns[ARRAY_SIZE(infos) * 2] = {-1}; +static int ncolumns; + +enum { + MM_ORIG_DATA_SIZE = 0, + MM_COMPR_DATA_SIZE, + MM_MEM_USED_TOTAL, + MM_MEM_LIMIT, + MM_MEM_USED_MAX, + MM_ZERO_PAGES, + MM_NUM_MIGRATED +}; + +static const char *mm_stat_names[] = { + [MM_ORIG_DATA_SIZE] = "orig_data_size", + [MM_COMPR_DATA_SIZE] = "compr_data_size", + [MM_MEM_USED_TOTAL] = "mem_used_total", + [MM_MEM_LIMIT] = "mem_limit", + [MM_MEM_USED_MAX] = "mem_used_max", + [MM_ZERO_PAGES] = "zero_pages", + [MM_NUM_MIGRATED] = "num_migrated" +}; + +struct zram { + char devname[32]; + struct path_cxt *sysfs; /* device specific sysfs directory */ + char **mm_stat; + + unsigned int mm_stat_probed : 1, + control_probed : 1, + has_control : 1; /* has /sys/class/zram-control/ */ +}; + +static unsigned int raw, no_headings, inbytes; +static struct path_cxt *__control; + +static int get_column_id(int num) +{ + assert(num < ncolumns); + assert(columns[num] < (int) ARRAY_SIZE(infos)); + return columns[num]; +} + +static const struct colinfo *get_column_info(int num) +{ + return &infos[ get_column_id(num) ]; +} + +static int column_name_to_id(const char *name, size_t namesz) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(infos); i++) { + const char *cn = infos[i].name; + + if (!strncasecmp(name, cn, namesz) && !*(cn + namesz)) + return i; + } + warnx(_("unknown column: %s"), name); + return -1; +} + +static void zram_reset_stat(struct zram *z) +{ + if (z) { + strv_free(z->mm_stat); + z->mm_stat = NULL; + z->mm_stat_probed = 0; + } +} + +static void zram_set_devname(struct zram *z, const char *devname, size_t n) +{ + assert(z); + + if (!devname) + snprintf(z->devname, sizeof(z->devname), "/dev/zram%zu", n); + else + xstrncpy(z->devname, devname, sizeof(z->devname)); + + DBG(fprintf(stderr, "set devname: %s", z->devname)); + ul_unref_path(z->sysfs); + z->sysfs = NULL; + zram_reset_stat(z); +} + +static int zram_get_devnum(struct zram *z) +{ + int n; + + assert(z); + + if (sscanf(z->devname, "/dev/zram%d", &n) == 1) + return n; + return -EINVAL; +} + +static struct zram *new_zram(const char *devname) +{ + struct zram *z = xcalloc(1, sizeof(struct zram)); + + DBG(fprintf(stderr, "new: %p", z)); + if (devname) + zram_set_devname(z, devname, 0); + return z; +} + +static void free_zram(struct zram *z) +{ + if (!z) + return; + DBG(fprintf(stderr, "free: %p", z)); + ul_unref_path(z->sysfs); + zram_reset_stat(z); + free(z); +} + +static struct path_cxt *zram_get_sysfs(struct zram *z) +{ + assert(z); + + if (!z->sysfs) { + dev_t devno = sysfs_devname_to_devno(z->devname); + if (!devno) + return NULL; + z->sysfs = ul_new_sysfs_path(devno, NULL, NULL); + if (!z->sysfs) + return NULL; + if (*z->devname != '/') + /* canonicalize the device name according to /sys */ + sysfs_blkdev_get_path(z->sysfs, z->devname, sizeof(z->devname)); + } + + return z->sysfs; +} + +static inline int zram_exist(struct zram *z) +{ + assert(z); + + errno = 0; + if (zram_get_sysfs(z) == NULL) { + errno = ENODEV; + return 0; + } + + DBG(fprintf(stderr, "%s exists", z->devname)); + return 1; +} + +static int zram_set_u64parm(struct zram *z, const char *attr, uint64_t num) +{ + struct path_cxt *sysfs = zram_get_sysfs(z); + if (!sysfs) + return -EINVAL; + DBG(fprintf(stderr, "%s writing %ju to %s", z->devname, num, attr)); + return ul_path_write_u64(sysfs, num, attr); +} + +static int zram_set_strparm(struct zram *z, const char *attr, const char *str) +{ + struct path_cxt *sysfs = zram_get_sysfs(z); + if (!sysfs) + return -EINVAL; + DBG(fprintf(stderr, "%s writing %s to %s", z->devname, str, attr)); + return ul_path_write_string(sysfs, str, attr); +} + + +static int zram_used(struct zram *z) +{ + uint64_t size; + struct path_cxt *sysfs = zram_get_sysfs(z); + + if (sysfs && + ul_path_read_u64(sysfs, &size, "disksize") == 0 && + size > 0) { + + DBG(fprintf(stderr, "%s used", z->devname)); + return 1; + } + DBG(fprintf(stderr, "%s unused", z->devname)); + return 0; +} + +static int zram_has_control(struct zram *z) +{ + if (!z->control_probed) { + z->has_control = access(_PATH_SYS_CLASS "/zram-control/", F_OK) == 0 ? 1 : 0; + z->control_probed = 1; + DBG(fprintf(stderr, "zram-control: %s", z->has_control ? "yes" : "no")); + } + + return z->has_control; +} + +static struct path_cxt *zram_get_control(void) +{ + if (!__control) + __control = ul_new_path(_PATH_SYS_CLASS "/zram-control"); + return __control; +} + +static int zram_control_add(struct zram *z) +{ + int n = 0; + struct path_cxt *ctl; + + if (!zram_has_control(z) || !(ctl = zram_get_control())) + return -ENOSYS; + + if (ul_path_read_s32(ctl, &n, "hot_add") != 0 || n < 0) + return n; + + DBG(fprintf(stderr, "hot-add: %d", n)); + zram_set_devname(z, NULL, n); + return 0; +} + +static int zram_control_remove(struct zram *z) +{ + struct path_cxt *ctl; + int n; + + if (!zram_has_control(z) || !(ctl = zram_get_control())) + return -ENOSYS; + + n = zram_get_devnum(z); + if (n < 0) + return n; + + DBG(fprintf(stderr, "hot-remove: %d", n)); + return ul_path_write_u64(ctl, n, "hot_remove"); +} + +static struct zram *find_free_zram(void) +{ + struct zram *z = new_zram(NULL); + size_t i; + int isfree = 0; + + for (i = 0; isfree == 0; i++) { + DBG(fprintf(stderr, "find free: checking zram%zu", i)); + zram_set_devname(z, NULL, i); + if (!zram_exist(z) && zram_control_add(z) != 0) + break; + isfree = !zram_used(z); + } + if (!isfree) { + free_zram(z); + z = NULL; + } + return z; +} + +static char *get_mm_stat(struct zram *z, size_t idx, int bytes) +{ + struct path_cxt *sysfs; + const char *name; + char *str = NULL; + uint64_t num; + + assert(idx < ARRAY_SIZE(mm_stat_names)); + assert(z); + + sysfs = zram_get_sysfs(z); + if (!sysfs) + return NULL; + + /* Linux >= 4.1 uses /sys/block/zram<id>/mm_stat */ + if (!z->mm_stat && !z->mm_stat_probed) { + if (ul_path_read_string(sysfs, &str, "mm_stat") > 0 && str) { + z->mm_stat = strv_split(str, " "); + + /* make sure kernel provides mm_stat as expected */ + if (strv_length(z->mm_stat) < ARRAY_SIZE(mm_stat_names)) { + strv_free(z->mm_stat); + z->mm_stat = NULL; + } + } + z->mm_stat_probed = 1; + free(str); + str = NULL; + } + + if (z->mm_stat) { + if (bytes) + return xstrdup(z->mm_stat[idx]); + + num = strtou64_or_err(z->mm_stat[idx], _("Failed to parse mm_stat")); + return size_to_human_string(SIZE_SUFFIX_1LETTER, num); + } + + /* Linux < 4.1 uses /sys/block/zram<id>/<attrname> */ + name = mm_stat_names[idx]; + if (bytes) { + ul_path_read_string(sysfs, &str, name); + return str; + + } + + if (ul_path_read_u64(sysfs, &num, name) == 0) + return size_to_human_string(SIZE_SUFFIX_1LETTER, num); + + return NULL; +} + +static void fill_table_row(struct libscols_table *tb, struct zram *z) +{ + static struct libscols_line *ln; + struct path_cxt *sysfs; + size_t i; + uint64_t num; + + assert(tb); + assert(z); + + DBG(fprintf(stderr, "%s: filling status table", z->devname)); + + sysfs = zram_get_sysfs(z); + if (!sysfs) + return; + + ln = scols_table_new_line(tb, NULL); + if (!ln) + err(EXIT_FAILURE, _("failed to allocate output line")); + + for (i = 0; i < (size_t) ncolumns; i++) { + char *str = NULL; + + switch (get_column_id(i)) { + case COL_NAME: + str = xstrdup(z->devname); + break; + case COL_DISKSIZE: + if (inbytes) + ul_path_read_string(sysfs, &str, "disksize"); + + else if (ul_path_read_u64(sysfs, &num, "disksize") == 0) + str = size_to_human_string(SIZE_SUFFIX_1LETTER, num); + break; + case COL_ALGORITHM: + { + char *alg = NULL; + + ul_path_read_string(sysfs, &alg, "comp_algorithm"); + if (alg) { + char* lbr = strrchr(alg, '['); + char* rbr = strrchr(alg, ']'); + + if (lbr != NULL && rbr != NULL && rbr - lbr > 1) + str = xstrndup(lbr + 1, rbr - lbr - 1); + free(alg); + } + break; + } + case COL_MOUNTPOINT: + { + char path[PATH_MAX] = { '\0' }; + int fl; + + check_mount_point(z->devname, &fl, path, sizeof(path)); + if (*path) + str = xstrdup(path); + break; + } + case COL_STREAMS: + ul_path_read_string(sysfs, &str, "max_comp_streams"); + break; + case COL_ZEROPAGES: + str = get_mm_stat(z, MM_ZERO_PAGES, 1); + break; + case COL_ORIG_SIZE: + str = get_mm_stat(z, MM_ORIG_DATA_SIZE, inbytes); + break; + case COL_COMP_SIZE: + str = get_mm_stat(z, MM_COMPR_DATA_SIZE, inbytes); + break; + case COL_MEMTOTAL: + str = get_mm_stat(z, MM_MEM_USED_TOTAL, inbytes); + break; + case COL_MEMLIMIT: + str = get_mm_stat(z, MM_MEM_LIMIT, inbytes); + break; + case COL_MEMUSED: + str = get_mm_stat(z, MM_MEM_USED_MAX, inbytes); + break; + case COL_MIGRATED: + str = get_mm_stat(z, MM_NUM_MIGRATED, inbytes); + break; + } + if (str && scols_line_refer_data(ln, i, str)) + err(EXIT_FAILURE, _("failed to add output data")); + } +} + +static void status(struct zram *z) +{ + struct libscols_table *tb; + size_t i; + DIR *dir; + struct dirent *d; + + scols_init_debug(0); + + tb = scols_new_table(); + if (!tb) + err(EXIT_FAILURE, _("failed to allocate output table")); + + scols_table_enable_raw(tb, raw); + scols_table_enable_noheadings(tb, no_headings); + + for (i = 0; i < (size_t) ncolumns; i++) { + const struct colinfo *col = get_column_info(i); + + if (!scols_table_new_column(tb, col->name, col->whint, col->flags)) + err(EXIT_FAILURE, _("failed to initialize output column")); + } + + if (z) { + /* just one device specified */ + fill_table_row(tb, z); + goto print_table; + } + + /* list all used devices */ + z = new_zram(NULL); + if (!(dir = opendir(_PATH_DEV))) + err(EXIT_FAILURE, _("cannot open %s"), _PATH_DEV); + + while ((d = readdir(dir))) { + int n; + if (sscanf(d->d_name, "zram%d", &n) != 1) + continue; + zram_set_devname(z, NULL, n); + if (zram_exist(z) && zram_used(z)) + fill_table_row(tb, z); + } + closedir(dir); + free_zram(z); + +print_table: + scols_print_table(tb); + scols_unref_table(tb); +} + +static void __attribute__((__noreturn__)) usage(void) +{ + FILE *out = stdout; + size_t i; + + fputs(USAGE_HEADER, out); + fprintf(out, _( " %1$s [options] <device>\n" + " %1$s -r <device> [...]\n" + " %1$s [options] -f | <device> -s <size>\n"), + program_invocation_short_name); + + fputs(USAGE_SEPARATOR, out); + fputs(_("Set up and control zram devices.\n"), out); + + fputs(USAGE_OPTIONS, out); + fputs(_(" -a, --algorithm <alg> compression algorithm to use\n"), out); + fputs(_(" -b, --bytes print sizes in bytes rather than in human readable format\n"), out); + fputs(_(" -f, --find find a free device\n"), out); + fputs(_(" -n, --noheadings don't print headings\n"), out); + fputs(_(" -o, --output <list> columns to use for status output\n"), out); + fputs(_(" --output-all output all columns\n"), out); + fputs(_(" --raw use raw status output format\n"), out); + fputs(_(" -r, --reset reset all specified devices\n"), out); + fputs(_(" -s, --size <size> device size\n"), out); + fputs(_(" -t, --streams <number> number of compression streams\n"), out); + + fputs(USAGE_SEPARATOR, out); + printf(USAGE_HELP_OPTIONS(27)); + + fputs(USAGE_ARGUMENTS, out); + printf(USAGE_ARG_SIZE(_("<size>"))); + + fputs(_(" <alg> specify algorithm, supported are:\n"), out); + fputs(_(" lzo, lz4, lz4hc, deflate, 842 and zstd\n"), out); + + fputs(USAGE_COLUMNS, out); + for (i = 0; i < ARRAY_SIZE(infos); i++) + fprintf(out, " %11s %s\n", infos[i].name, _(infos[i].help)); + + printf(USAGE_MAN_TAIL("zramctl(8)")); + exit(EXIT_SUCCESS); +} + +/* actions */ +enum { + A_NONE = 0, + A_STATUS, + A_CREATE, + A_FINDONLY, + A_RESET +}; + +int main(int argc, char **argv) +{ + uintmax_t size = 0, nstreams = 0; + char *algorithm = NULL; + int rc = 0, c, find = 0, act = A_NONE; + struct zram *zram = NULL; + + enum { + OPT_RAW = CHAR_MAX + 1, + OPT_LIST_TYPES + }; + + static const struct option longopts[] = { + { "algorithm", required_argument, NULL, 'a' }, + { "bytes", no_argument, NULL, 'b' }, + { "find", no_argument, NULL, 'f' }, + { "help", no_argument, NULL, 'h' }, + { "output", required_argument, NULL, 'o' }, + { "output-all",no_argument, NULL, OPT_LIST_TYPES }, + { "noheadings",no_argument, NULL, 'n' }, + { "reset", no_argument, NULL, 'r' }, + { "raw", no_argument, NULL, OPT_RAW }, + { "size", required_argument, NULL, 's' }, + { "streams", required_argument, NULL, 't' }, + { "version", no_argument, NULL, 'V' }, + { NULL, 0, NULL, 0 } + }; + + static const ul_excl_t excl[] = { + { 'f', 'o', 'r' }, + { 'o', 'r', 's' }, + { 0 } + }; + int excl_st[ARRAY_SIZE(excl)] = UL_EXCL_STATUS_INIT; + + setlocale(LC_ALL, ""); + bindtextdomain(PACKAGE, LOCALEDIR); + textdomain(PACKAGE); + close_stdout_atexit(); + + while ((c = getopt_long(argc, argv, "a:bfho:nrs:t:V", longopts, NULL)) != -1) { + + err_exclusive_options(c, longopts, excl, excl_st); + + switch (c) { + case 'a': + algorithm = optarg; + break; + case 'b': + inbytes = 1; + break; + case 'f': + find = 1; + break; + case 'o': + ncolumns = string_to_idarray(optarg, + columns, ARRAY_SIZE(columns), + column_name_to_id); + if (ncolumns < 0) + return EXIT_FAILURE; + break; + case OPT_LIST_TYPES: + for (ncolumns = 0; (size_t)ncolumns < ARRAY_SIZE(infos); ncolumns++) + columns[ncolumns] = ncolumns; + break; + case 's': + size = strtosize_or_err(optarg, _("failed to parse size")); + act = A_CREATE; + break; + case 't': + nstreams = strtou64_or_err(optarg, _("failed to parse streams")); + break; + case 'r': + act = A_RESET; + break; + case OPT_RAW: + raw = 1; + break; + case 'n': + no_headings = 1; + break; + + case 'V': + print_version(EXIT_SUCCESS); + case 'h': + usage(); + default: + errtryhelp(EXIT_FAILURE); + } + } + + if (find && optind < argc) + errx(EXIT_FAILURE, _("option --find is mutually exclusive " + "with <device>")); + if (act == A_NONE) + act = find ? A_FINDONLY : A_STATUS; + + if (act != A_RESET && optind + 1 < argc) + errx(EXIT_FAILURE, _("only one <device> at a time is allowed")); + + if ((act == A_STATUS || act == A_FINDONLY) && (algorithm || nstreams)) + errx(EXIT_FAILURE, _("options --algorithm and --streams " + "must be combined with --size")); + + ul_path_init_debug(); + ul_sysfs_init_debug(); + + switch (act) { + case A_STATUS: + if (!ncolumns) { /* default columns */ + columns[ncolumns++] = COL_NAME; + columns[ncolumns++] = COL_ALGORITHM; + columns[ncolumns++] = COL_DISKSIZE; + columns[ncolumns++] = COL_ORIG_SIZE; + columns[ncolumns++] = COL_COMP_SIZE; + columns[ncolumns++] = COL_MEMTOTAL; + columns[ncolumns++] = COL_STREAMS; + columns[ncolumns++] = COL_MOUNTPOINT; + } + if (optind < argc) { + zram = new_zram(argv[optind++]); + if (!zram_exist(zram)) + err(EXIT_FAILURE, "%s", zram->devname); + } + status(zram); + free_zram(zram); + break; + case A_RESET: + if (optind == argc) + errx(EXIT_FAILURE, _("no device specified")); + while (optind < argc) { + zram = new_zram(argv[optind]); + if (!zram_exist(zram) + || zram_set_u64parm(zram, "reset", 1)) { + warn(_("%s: failed to reset"), zram->devname); + rc = 1; + } + zram_control_remove(zram); + free_zram(zram); + optind++; + } + break; + case A_FINDONLY: + zram = find_free_zram(); + if (!zram) + errx(EXIT_FAILURE, _("no free zram device found")); + printf("%s\n", zram->devname); + free_zram(zram); + break; + case A_CREATE: + if (find) { + zram = find_free_zram(); + if (!zram) + errx(EXIT_FAILURE, _("no free zram device found")); + } else if (optind == argc) + errx(EXIT_FAILURE, _("no device specified")); + else { + zram = new_zram(argv[optind]); + if (!zram_exist(zram)) + err(EXIT_FAILURE, "%s", zram->devname); + } + + if (zram_set_u64parm(zram, "reset", 1)) + err(EXIT_FAILURE, _("%s: failed to reset"), zram->devname); + + if (nstreams && + zram_set_u64parm(zram, "max_comp_streams", nstreams)) + err(EXIT_FAILURE, _("%s: failed to set number of streams"), zram->devname); + + if (algorithm && + zram_set_strparm(zram, "comp_algorithm", algorithm)) + err(EXIT_FAILURE, _("%s: failed to set algorithm"), zram->devname); + + if (zram_set_u64parm(zram, "disksize", size)) + err(EXIT_FAILURE, _("%s: failed to set disksize (%ju bytes)"), + zram->devname, size); + if (find) + printf("%s\n", zram->devname); + free_zram(zram); + break; + } + + ul_unref_path(__control); + return rc ? EXIT_FAILURE : EXIT_SUCCESS; +} |