diff options
Diffstat (limited to 'doc')
-rw-r--r-- | doc/Makefile.am | 32 | ||||
-rw-r--r-- | doc/Makefile.in | 688 | ||||
-rw-r--r-- | doc/file.man | 743 | ||||
-rw-r--r-- | doc/libmagic.man | 435 | ||||
-rw-r--r-- | doc/magic.man | 830 |
5 files changed, 2728 insertions, 0 deletions
diff --git a/doc/Makefile.am b/doc/Makefile.am new file mode 100644 index 0000000..4a78589 --- /dev/null +++ b/doc/Makefile.am @@ -0,0 +1,32 @@ +MAGIC = $(pkgdatadir)/magic +if FSECT5 +man_MAGIC = magic.5 +else +man_MAGIC = magic.4 +endif +fsect = @fsect@ +man_MANS = file.1 $(man_MAGIC) libmagic.3 + +EXTRA_DIST = file.man magic.man libmagic.man +CLEANFILES = $(man_MANS) + +file.1: Makefile file.man + @rm -f $@ + sed -e s@__CSECTION__@1@g \ + -e s@__FSECTION__@${fsect}@g \ + -e s@__VERSION__@${VERSION}@g \ + -e s@__MAGIC__@${MAGIC}@g $(srcdir)/file.man > $@ + +magic.${fsect}: Makefile magic.man + @rm -f $@ + sed -e s@__CSECTION__@1@g \ + -e s@__FSECTION__@${fsect}@g \ + -e s@__VERSION__@${VERSION}@g \ + -e s@__MAGIC__@${MAGIC}@g $(srcdir)/magic.man > $@ + +libmagic.3: Makefile libmagic.man + @rm -f $@ + sed -e s@__CSECTION__@1@g \ + -e s@__FSECTION__@${fsect}@g \ + -e s@__VERSION__@${VERSION}@g \ + -e s@__MAGIC__@${MAGIC}@g $(srcdir)/libmagic.man > $@ diff --git a/doc/Makefile.in b/doc/Makefile.in new file mode 100644 index 0000000..08b71aa --- /dev/null +++ b/doc/Makefile.in @@ -0,0 +1,688 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = doc +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/visibility.m4 $(top_srcdir)/acinclude.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +SOURCES = +DIST_SOURCES = +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +man1dir = $(mandir)/man1 +am__installdirs = "$(DESTDIR)$(man1dir)" "$(DESTDIR)$(man3dir)" \ + "$(DESTDIR)$(man4dir)" "$(DESTDIR)$(man5dir)" +man3dir = $(mandir)/man3 +man4dir = $(mandir)/man4 +man5dir = $(mandir)/man5 +NROFF = nroff +MANS = $(man_MANS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +am__DIST_COMMON = $(srcdir)/Makefile.in +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +pkgdatadir = @pkgdatadir@ +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CFLAG_VISIBILITY = @CFLAG_VISIBILITY@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +GREP = @GREP@ +HAVE_VISIBILITY = @HAVE_VISIBILITY@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MINGW = @MINGW@ +MKDIR_P = @MKDIR_P@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +RANLIB = @RANLIB@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +VERSION = @VERSION@ +WARNINGS = @WARNINGS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +fsect = @fsect@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +MAGIC = $(pkgdatadir)/magic +@FSECT5_FALSE@man_MAGIC = magic.4 +@FSECT5_TRUE@man_MAGIC = magic.5 +man_MANS = file.1 $(man_MAGIC) libmagic.3 +EXTRA_DIST = file.man magic.man libmagic.man +CLEANFILES = $(man_MANS) +all: all-am + +.SUFFIXES: +$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign doc/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --foreign doc/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-man1: $(man_MANS) + @$(NORMAL_INSTALL) + @list1=''; \ + list2='$(man_MANS)'; \ + test -n "$(man1dir)" \ + && test -n "`echo $$list1$$list2`" \ + || exit 0; \ + echo " $(MKDIR_P) '$(DESTDIR)$(man1dir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(man1dir)" || exit 1; \ + { for i in $$list1; do echo "$$i"; done; \ + if test -n "$$list2"; then \ + for i in $$list2; do echo "$$i"; done \ + | sed -n '/\.1[a-z]*$$/p'; \ + fi; \ + } | while read p; do \ + if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; echo "$$p"; \ + done | \ + sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \ + sed 'N;N;s,\n, ,g' | { \ + list=; while read file base inst; do \ + if test "$$base" = "$$inst"; then list="$$list $$file"; else \ + echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man1dir)/$$inst'"; \ + $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man1dir)/$$inst" || exit $$?; \ + fi; \ + done; \ + for i in $$list; do echo "$$i"; done | $(am__base_list) | \ + while read files; do \ + test -z "$$files" || { \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man1dir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(man1dir)" || exit $$?; }; \ + done; } + +uninstall-man1: + @$(NORMAL_UNINSTALL) + @list=''; test -n "$(man1dir)" || exit 0; \ + files=`{ for i in $$list; do echo "$$i"; done; \ + l2='$(man_MANS)'; for i in $$l2; do echo "$$i"; done | \ + sed -n '/\.1[a-z]*$$/p'; \ + } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^1][0-9a-z]*$$,1,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \ + dir='$(DESTDIR)$(man1dir)'; $(am__uninstall_files_from_dir) +install-man3: $(man_MANS) + @$(NORMAL_INSTALL) + @list1=''; \ + list2='$(man_MANS)'; \ + test -n "$(man3dir)" \ + && test -n "`echo $$list1$$list2`" \ + || exit 0; \ + echo " $(MKDIR_P) '$(DESTDIR)$(man3dir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(man3dir)" || exit 1; \ + { for i in $$list1; do echo "$$i"; done; \ + if test -n "$$list2"; then \ + for i in $$list2; do echo "$$i"; done \ + | sed -n '/\.3[a-z]*$$/p'; \ + fi; \ + } | while read p; do \ + if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; echo "$$p"; \ + done | \ + sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^3][0-9a-z]*$$,3,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \ + sed 'N;N;s,\n, ,g' | { \ + list=; while read file base inst; do \ + if test "$$base" = "$$inst"; then list="$$list $$file"; else \ + echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man3dir)/$$inst'"; \ + $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man3dir)/$$inst" || exit $$?; \ + fi; \ + done; \ + for i in $$list; do echo "$$i"; done | $(am__base_list) | \ + while read files; do \ + test -z "$$files" || { \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man3dir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(man3dir)" || exit $$?; }; \ + done; } + +uninstall-man3: + @$(NORMAL_UNINSTALL) + @list=''; test -n "$(man3dir)" || exit 0; \ + files=`{ for i in $$list; do echo "$$i"; done; \ + l2='$(man_MANS)'; for i in $$l2; do echo "$$i"; done | \ + sed -n '/\.3[a-z]*$$/p'; \ + } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^3][0-9a-z]*$$,3,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \ + dir='$(DESTDIR)$(man3dir)'; $(am__uninstall_files_from_dir) +install-man4: $(man_MANS) + @$(NORMAL_INSTALL) + @list1=''; \ + list2='$(man_MANS)'; \ + test -n "$(man4dir)" \ + && test -n "`echo $$list1$$list2`" \ + || exit 0; \ + echo " $(MKDIR_P) '$(DESTDIR)$(man4dir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(man4dir)" || exit 1; \ + { for i in $$list1; do echo "$$i"; done; \ + if test -n "$$list2"; then \ + for i in $$list2; do echo "$$i"; done \ + | sed -n '/\.4[a-z]*$$/p'; \ + fi; \ + } | while read p; do \ + if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; echo "$$p"; \ + done | \ + sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^4][0-9a-z]*$$,4,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \ + sed 'N;N;s,\n, ,g' | { \ + list=; while read file base inst; do \ + if test "$$base" = "$$inst"; then list="$$list $$file"; else \ + echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man4dir)/$$inst'"; \ + $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man4dir)/$$inst" || exit $$?; \ + fi; \ + done; \ + for i in $$list; do echo "$$i"; done | $(am__base_list) | \ + while read files; do \ + test -z "$$files" || { \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man4dir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(man4dir)" || exit $$?; }; \ + done; } + +uninstall-man4: + @$(NORMAL_UNINSTALL) + @list=''; test -n "$(man4dir)" || exit 0; \ + files=`{ for i in $$list; do echo "$$i"; done; \ + l2='$(man_MANS)'; for i in $$l2; do echo "$$i"; done | \ + sed -n '/\.4[a-z]*$$/p'; \ + } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^4][0-9a-z]*$$,4,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \ + dir='$(DESTDIR)$(man4dir)'; $(am__uninstall_files_from_dir) +install-man5: $(man_MANS) + @$(NORMAL_INSTALL) + @list1=''; \ + list2='$(man_MANS)'; \ + test -n "$(man5dir)" \ + && test -n "`echo $$list1$$list2`" \ + || exit 0; \ + echo " $(MKDIR_P) '$(DESTDIR)$(man5dir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(man5dir)" || exit 1; \ + { for i in $$list1; do echo "$$i"; done; \ + if test -n "$$list2"; then \ + for i in $$list2; do echo "$$i"; done \ + | sed -n '/\.5[a-z]*$$/p'; \ + fi; \ + } | while read p; do \ + if test -f $$p; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; echo "$$p"; \ + done | \ + sed -e 'n;s,.*/,,;p;h;s,.*\.,,;s,^[^5][0-9a-z]*$$,5,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,' | \ + sed 'N;N;s,\n, ,g' | { \ + list=; while read file base inst; do \ + if test "$$base" = "$$inst"; then list="$$list $$file"; else \ + echo " $(INSTALL_DATA) '$$file' '$(DESTDIR)$(man5dir)/$$inst'"; \ + $(INSTALL_DATA) "$$file" "$(DESTDIR)$(man5dir)/$$inst" || exit $$?; \ + fi; \ + done; \ + for i in $$list; do echo "$$i"; done | $(am__base_list) | \ + while read files; do \ + test -z "$$files" || { \ + echo " $(INSTALL_DATA) $$files '$(DESTDIR)$(man5dir)'"; \ + $(INSTALL_DATA) $$files "$(DESTDIR)$(man5dir)" || exit $$?; }; \ + done; } + +uninstall-man5: + @$(NORMAL_UNINSTALL) + @list=''; test -n "$(man5dir)" || exit 0; \ + files=`{ for i in $$list; do echo "$$i"; done; \ + l2='$(man_MANS)'; for i in $$l2; do echo "$$i"; done | \ + sed -n '/\.5[a-z]*$$/p'; \ + } | sed -e 's,.*/,,;h;s,.*\.,,;s,^[^5][0-9a-z]*$$,5,;x' \ + -e 's,\.[0-9a-z]*$$,,;$(transform);G;s,\n,.,'`; \ + dir='$(DESTDIR)$(man5dir)'; $(am__uninstall_files_from_dir) +tags TAGS: + +ctags CTAGS: + +cscope cscopelist: + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(MANS) +installdirs: + for dir in "$(DESTDIR)$(man1dir)" "$(DESTDIR)$(man3dir)" "$(DESTDIR)$(man4dir)" "$(DESTDIR)$(man5dir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool mostlyclean-am + +distclean: distclean-am + -rm -f Makefile +distclean-am: clean-am distclean-generic + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-man + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: install-man1 install-man3 install-man4 install-man5 + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-generic mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-man + +uninstall-man: uninstall-man1 uninstall-man3 uninstall-man4 \ + uninstall-man5 + +.MAKE: install-am install-strip + +.PHONY: all all-am check check-am clean clean-generic clean-libtool \ + cscopelist-am ctags-am distclean distclean-generic \ + distclean-libtool distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am install-dvi \ + install-dvi-am install-exec install-exec-am install-html \ + install-html-am install-info install-info-am install-man \ + install-man1 install-man3 install-man4 install-man5 \ + install-pdf install-pdf-am install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags-am uninstall uninstall-am uninstall-man uninstall-man1 \ + uninstall-man3 uninstall-man4 uninstall-man5 + +.PRECIOUS: Makefile + + +file.1: Makefile file.man + @rm -f $@ + sed -e s@__CSECTION__@1@g \ + -e s@__FSECTION__@${fsect}@g \ + -e s@__VERSION__@${VERSION}@g \ + -e s@__MAGIC__@${MAGIC}@g $(srcdir)/file.man > $@ + +magic.${fsect}: Makefile magic.man + @rm -f $@ + sed -e s@__CSECTION__@1@g \ + -e s@__FSECTION__@${fsect}@g \ + -e s@__VERSION__@${VERSION}@g \ + -e s@__MAGIC__@${MAGIC}@g $(srcdir)/magic.man > $@ + +libmagic.3: Makefile libmagic.man + @rm -f $@ + sed -e s@__CSECTION__@1@g \ + -e s@__FSECTION__@${fsect}@g \ + -e s@__VERSION__@${VERSION}@g \ + -e s@__MAGIC__@${MAGIC}@g $(srcdir)/libmagic.man > $@ + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/doc/file.man b/doc/file.man new file mode 100644 index 0000000..bf78c0c --- /dev/null +++ b/doc/file.man @@ -0,0 +1,743 @@ +.\" $File: file.man,v 1.150 2023/05/21 17:08:34 christos Exp $ +.Dd May 21, 2023 +.Dt FILE __CSECTION__ +.Os +.Sh NAME +.Nm file +.Nd determine file type +.Sh SYNOPSIS +.Nm +.Bk -words +.Op Fl bcdEhiklLNnprsSvzZ0 +.Op Fl Fl apple +.Op Fl Fl exclude-quiet +.Op Fl Fl extension +.Op Fl Fl mime-encoding +.Op Fl Fl mime-type +.Op Fl e Ar testname +.Op Fl F Ar separator +.Op Fl f Ar namefile +.Op Fl m Ar magicfiles +.Op Fl P Ar name=value +.Ar +.Ek +.Nm +.Fl C +.Op Fl m Ar magicfiles +.Nm +.Op Fl Fl help +.Sh DESCRIPTION +This manual page documents version __VERSION__ of the +.Nm +command. +.Pp +.Nm +tests each argument in an attempt to classify it. +There are three sets of tests, performed in this order: +filesystem tests, magic tests, and language tests. +The +.Em first +test that succeeds causes the file type to be printed. +.Pp +The type printed will usually contain one of the words +.Em text +(the file contains only +printing characters and a few common control +characters and is probably safe to read on an +.Dv ASCII +terminal), +.Em executable +(the file contains the result of compiling a program +in a form understandable to some +.Tn UNIX +kernel or another), +or +.Em data +meaning anything else (data is usually +.Dq binary +or non-printable). +Exceptions are well-known file formats (core files, tar archives) +that are known to contain binary data. +When modifying magic files or the program itself, make sure to +.Em preserve these keywords . +Users depend on knowing that all the readable files in a directory +have the word +.Dq text +printed. +Don't do as Berkeley did and change +.Dq shell commands text +to +.Dq shell script . +.Pp +The filesystem tests are based on examining the return from a +.Xr stat 2 +system call. +The program checks to see if the file is empty, +or if it's some sort of special file. +Any known file types appropriate to the system you are running on +(sockets, symbolic links, or named pipes (FIFOs) on those systems that +implement them) +are intuited if they are defined in the system header file +.In sys/stat.h . +.Pp +The magic tests are used to check for files with data in +particular fixed formats. +The canonical example of this is a binary executable (compiled program) +.Dv a.out +file, whose format is defined in +.In elf.h , +.In a.out.h +and possibly +.In exec.h +in the standard include directory. +These files have a +.Dq magic number +stored in a particular place +near the beginning of the file that tells the +.Tn UNIX +operating system +that the file is a binary executable, and which of several types thereof. +The concept of a +.Dq magic number +has been applied by extension to data files. +Any file with some invariant identifier at a small fixed +offset into the file can usually be described in this way. +The information identifying these files is read from the compiled +magic file +.Pa __MAGIC__.mgc , +or the files in the directory +.Pa __MAGIC__ +if the compiled file does not exist. +In addition, if +.Pa $HOME/.magic.mgc +or +.Pa $HOME/.magic +exists, it will be used in preference to the system magic files. +.Pp +If a file does not match any of the entries in the magic file, +it is examined to see if it seems to be a text file. +ASCII, ISO-8859-x, non-ISO 8-bit extended-ASCII character sets +(such as those used on Macintosh and IBM PC systems), +UTF-8-encoded Unicode, UTF-16-encoded Unicode, and EBCDIC +character sets can be distinguished by the different +ranges and sequences of bytes that constitute printable text +in each set. +If a file passes any of these tests, its character set is reported. +ASCII, ISO-8859-x, UTF-8, and extended-ASCII files are identified +as +.Dq text +because they will be mostly readable on nearly any terminal; +UTF-16 and EBCDIC are only +.Dq character data +because, while +they contain text, it is text that will require translation +before it can be read. +In addition, +.Nm +will attempt to determine other characteristics of text-type files. +If the lines of a file are terminated by CR, CRLF, or NEL, instead +of the Unix-standard LF, this will be reported. +Files that contain embedded escape sequences or overstriking +will also be identified. +.Pp +Once +.Nm +has determined the character set used in a text-type file, +it will +attempt to determine in what language the file is written. +The language tests look for particular strings (cf. +.In names.h ) +that can appear anywhere in the first few blocks of a file. +For example, the keyword +.Em .br +indicates that the file is most likely a +.Xr troff 1 +input file, just as the keyword +.Em struct +indicates a C program. +These tests are less reliable than the previous +two groups, so they are performed last. +The language test routines also test for some miscellany +(such as +.Xr tar 1 +archives, JSON files). +.Pp +Any file that cannot be identified as having been written +in any of the character sets listed above is simply said to be +.Dq data . +.Sh OPTIONS +.Bl -tag -width indent +.It Fl Fl apple +Causes the +.Nm +command to output the file type and creator code as +used by older MacOS versions. +The code consists of eight letters, +the first describing the file type, the latter the creator. +This option works properly only for file formats that have the +apple-style output defined. +.It Fl b , Fl Fl brief +Do not prepend filenames to output lines (brief mode). +.It Fl C , Fl Fl compile +Write a +.Pa magic.mgc +output file that contains a pre-parsed version of the magic file or directory. +.It Fl c , Fl Fl checking-printout +Cause a checking printout of the parsed form of the magic file. +This is usually used in conjunction with the +.Fl m +option to debug a new magic file before installing it. +.It Fl d +Prints internal debugging information to stderr. +.It Fl E +On filesystem errors (file not found etc), instead of handling the error +as regular output as POSIX mandates and keep going, issue an error message +and exit. +.It Fl e , Fl Fl exclude Ar testname +Exclude the test named in +.Ar testname +from the list of tests made to determine the file type. +Valid test names are: +.Bl -tag -width compress +.It apptype +.Dv EMX +application type (only on EMX). +.It ascii +Various types of text files (this test will try to guess the text +encoding, irrespective of the setting of the +.Sq encoding +option). +.It encoding +Different text encodings for soft magic tests. +.It tokens +Ignored for backwards compatibility. +.It cdf +Prints details of Compound Document Files. +.It compress +Checks for, and looks inside, compressed files. +.It csv +Checks Comma Separated Value files. +.It elf +Prints ELF file details, provided soft magic tests are enabled and the +elf magic is found. +.It json +Examines JSON (RFC-7159) files by parsing them for compliance. +.It soft +Consults magic files. +.It simh +Examines SIMH tape files. +.It tar +Examines tar files by verifying the checksum of the 512 byte tar header. +Excluding this test can provide more detailed content description by using +the soft magic method. +.It text +A synonym for +.Sq ascii . +.El +.It Fl Fl exclude-quiet +Like +.Fl Fl exclude +but ignore tests that +.Nm +does not know about. +This is intended for compatibility with older versions of +.Nm . +.It Fl Fl extension +Print a slash-separated list of valid extensions for the file type found. +.It Fl F , Fl Fl separator Ar separator +Use the specified string as the separator between the filename and the +file result returned. +Defaults to +.Sq \&: . +.It Fl f , Fl Fl files-from Ar namefile +Read the names of the files to be examined from +.Ar namefile +(one per line) +before the argument list. +Either +.Ar namefile +or at least one filename argument must be present; +to test the standard input, use +.Sq - +as a filename argument. +Please note that +.Ar namefile +is unwrapped and the enclosed filenames are processed when this option is +encountered and before any further options processing is done. +This allows one to process multiple lists of files with different command line +arguments on the same +.Nm +invocation. +Thus if you want to set the delimiter, you need to do it before you specify +the list of files, like: +.Dq Fl F Ar @ Fl f Ar namefile , +instead of: +.Dq Fl f Ar namefile Fl F Ar @ . +.It Fl h , Fl Fl no-dereference +This option causes symlinks not to be followed +(on systems that support symbolic links). +This is the default if the environment variable +.Dv POSIXLY_CORRECT +is not defined. +.It Fl i , Fl Fl mime +Causes the +.Nm +command to output mime type strings rather than the more +traditional human readable ones. +Thus it may say +.Sq text/plain; charset=us-ascii +rather than +.Dq ASCII text . +.It Fl Fl mime-type , Fl Fl mime-encoding +Like +.Fl i , +but print only the specified element(s). +.It Fl k , Fl Fl keep-going +Don't stop at the first match, keep going. +Subsequent matches will be +have the string +.Sq "\[rs]012\- " +prepended. +(If you want a newline, see the +.Fl r +option.) +The magic pattern with the highest strength (see the +.Fl l +option) comes first. +.It Fl l , Fl Fl list +Shows a list of patterns and their strength sorted descending by +.Xr magic __FSECTION__ +strength +which is used for the matching (see also the +.Fl k +option). +.It Fl L , Fl Fl dereference +This option causes symlinks to be followed, as the like-named option in +.Xr ls 1 +(on systems that support symbolic links). +This is the default if the environment variable +.Ev POSIXLY_CORRECT +is defined. +.It Fl m , Fl Fl magic-file Ar magicfiles +Specify an alternate list of files and directories containing magic. +This can be a single item, or a colon-separated list. +If a compiled magic file is found alongside a file or directory, +it will be used instead. +.It Fl N , Fl Fl no-pad +Don't pad filenames so that they align in the output. +.It Fl n , Fl Fl no-buffer +Force stdout to be flushed after checking each file. +This is only useful if checking a list of files. +It is intended to be used by programs that want filetype output from a pipe. +.It Fl p , Fl Fl preserve-date +On systems that support +.Xr utime 3 +or +.Xr utimes 2 , +attempt to preserve the access time of files analyzed, to pretend that +.Nm +never read them. +.It Fl P , Fl Fl parameter Ar name=value +Set various parameter limits. +.Bl -column "elf_phnum" "Default" "XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX" +.It Sy "Name" Ta Sy "Default" Ta Sy "Explanation" +.It Li bytes Ta 1M Ta max number of bytes to read from file +.It Li elf_notes Ta 256 Ta max ELF notes processed +.It Li elf_phnum Ta 2K Ta max ELF program sections processed +.It Li elf_shnum Ta 32K Ta max ELF sections processed +.It Li elf_shsize Ta 128MB Ta max ELF section size processed +.It Li encoding Ta 65K Ta max number of bytes to determine encoding +.It Li indir Ta 50 Ta recursion limit for indirect magic +.It Li name Ta 50 Ta use count limit for name/use magic +.It Li regex Ta 8K Ta length limit for regex searches +.El +.It Fl r , Fl Fl raw +Don't translate unprintable characters to \eooo. +Normally +.Nm +translates unprintable characters to their octal representation. +.It Fl s , Fl Fl special-files +Normally, +.Nm +only attempts to read and determine the type of argument files which +.Xr stat 2 +reports are ordinary files. +This prevents problems, because reading special files may have peculiar +consequences. +Specifying the +.Fl s +option causes +.Nm +to also read argument files which are block or character special files. +This is useful for determining the filesystem types of the data in raw +disk partitions, which are block special files. +This option also causes +.Nm +to disregard the file size as reported by +.Xr stat 2 +since on some systems it reports a zero size for raw disk partitions. +.It Fl S , Fl Fl no-sandbox +On systems where libseccomp +.Pa ( https://github.com/seccomp/libseccomp ) +is available, the +.Fl S +option disables sandboxing which is enabled by default. +This option is needed for +.Nm +to execute external decompressing programs, +i.e. when the +.Fl z +option is specified and the built-in decompressors are not available. +On systems where sandboxing is not available, this option has no effect. +.It Fl v , Fl Fl version +Print the version of the program and exit. +.It Fl z , Fl Fl uncompress +Try to look inside compressed files. +.It Fl Z , Fl Fl uncompress-noreport +Try to look inside compressed files, but report information about the contents +only not the compression. +.It Fl 0 , Fl Fl print0 +Output a null character +.Sq \e0 +after the end of the filename. +Nice to +.Xr cut 1 +the output. +This does not affect the separator, which is still printed. +.Pp +If this option is repeated more than once, then +.Nm +prints just the filename followed by a NUL followed by the description +(or ERROR: text) followed by a second NUL for each entry. +.It Fl -help +Print a help message and exit. +.El +.Sh ENVIRONMENT +The environment variable +.Ev MAGIC +can be used to set the default magic file name. +If that variable is set, then +.Nm +will not attempt to open +.Pa $HOME/.magic . +.Nm +adds +.Dq Pa .mgc +to the value of this variable as appropriate. +The environment variable +.Ev POSIXLY_CORRECT +controls (on systems that support symbolic links), whether +.Nm +will attempt to follow symlinks or not. +If set, then +.Nm +follows symlink, otherwise it does not. +This is also controlled by the +.Fl L +and +.Fl h +options. +.Sh FILES +.Bl -tag -width __MAGIC__.mgc -compact +.It Pa __MAGIC__.mgc +Default compiled list of magic. +.It Pa __MAGIC__ +Directory containing default magic files. +.El +.Sh EXIT STATUS +.Nm +will exit with +.Dv 0 +if the operation was successful or +.Dv >0 +if an error was encountered. +The following errors cause diagnostic messages, but don't affect the program +exit code (as POSIX requires), unless +.Fl E +is specified: +.Bl -bullet -compact -offset indent +.It +A file cannot be found +.It +There is no permission to read a file +.It +The file type cannot be determined +.El +.Sh EXAMPLES +.Bd -literal -offset indent +$ file file.c file /dev/{wd0a,hda} +file.c: C program text +file: ELF 32-bit LSB executable, Intel 80386, version 1 (SYSV), + dynamically linked (uses shared libs), stripped +/dev/wd0a: block special (0/0) +/dev/hda: block special (3/0) + +$ file -s /dev/wd0{b,d} +/dev/wd0b: data +/dev/wd0d: x86 boot sector + +$ file -s /dev/hda{,1,2,3,4,5,6,7,8,9,10} +/dev/hda: x86 boot sector +/dev/hda1: Linux/i386 ext2 filesystem +/dev/hda2: x86 boot sector +/dev/hda3: x86 boot sector, extended partition table +/dev/hda4: Linux/i386 ext2 filesystem +/dev/hda5: Linux/i386 swap file +/dev/hda6: Linux/i386 swap file +/dev/hda7: Linux/i386 swap file +/dev/hda8: Linux/i386 swap file +/dev/hda9: empty +/dev/hda10: empty + +$ file -i file.c file /dev/{wd0a,hda} +file.c: text/x-c +file: application/x-executable +/dev/hda: application/x-not-regular-file +/dev/wd0a: application/x-not-regular-file + +.Ed +.Sh SEE ALSO +.Xr hexdump 1 , +.Xr od 1 , +.Xr strings 1 , +.Xr magic __FSECTION__ +.Sh STANDARDS CONFORMANCE +This program is believed to exceed the System V Interface Definition +of FILE(CMD), as near as one can determine from the vague language +contained therein. +Its behavior is mostly compatible with the System V program of the same name. +This version knows more magic, however, so it will produce +different (albeit more accurate) output in many cases. +.\" URL: http://www.opengroup.org/onlinepubs/009695399/utilities/file.html +.Pp +The one significant difference +between this version and System V +is that this version treats any white space +as a delimiter, so that spaces in pattern strings must be escaped. +For example, +.Bd -literal -offset indent +\*[Gt]10 string language impress\ (imPRESS data) +.Ed +.Pp +in an existing magic file would have to be changed to +.Bd -literal -offset indent +\*[Gt]10 string language\e impress (imPRESS data) +.Ed +.Pp +In addition, in this version, if a pattern string contains a backslash, +it must be escaped. +For example +.Bd -literal -offset indent +0 string \ebegindata Andrew Toolkit document +.Ed +.Pp +in an existing magic file would have to be changed to +.Bd -literal -offset indent +0 string \e\ebegindata Andrew Toolkit document +.Ed +.Pp +SunOS releases 3.2 and later from Sun Microsystems include a +.Nm +command derived from the System V one, but with some extensions. +This version differs from Sun's only in minor ways. +It includes the extension of the +.Sq \*[Am] +operator, used as, +for example, +.Bd -literal -offset indent +\*[Gt]16 long\*[Am]0x7fffffff \*[Gt]0 not stripped +.Ed +.Sh SECURITY +On systems where libseccomp +.Pa ( https://github.com/seccomp/libseccomp ) +is available, +.Nm +is enforces limiting system calls to only the ones necessary for the +operation of the program. +This enforcement does not provide any security benefit when +.Nm +is asked to decompress input files running external programs with +the +.Fl z +option. +To enable execution of external decompressors, one needs to disable +sandboxing using the +.Fl S +option. +.Sh MAGIC DIRECTORY +The magic file entries have been collected from various sources, +mainly USENET, and contributed by various authors. +Christos Zoulas (address below) will collect additional +or corrected magic file entries. +A consolidation of magic file entries +will be distributed periodically. +.Pp +The order of entries in the magic file is significant. +Depending on what system you are using, the order that +they are put together may be incorrect. +If your old +.Nm +command uses a magic file, +keep the old magic file around for comparison purposes +(rename it to +.Pa __MAGIC__.orig ) . +.Sh HISTORY +There has been a +.Nm +command in every +.Dv UNIX since at least Research Version 4 +(man page dated November, 1973). +The System V version introduced one significant major change: +the external list of magic types. +This slowed the program down slightly but made it a lot more flexible. +.Pp +This program, based on the System V version, +was written by Ian Darwin +.Aq ian@darwinsys.com +without looking at anybody else's source code. +.Pp +John Gilmore revised the code extensively, making it better than +the first version. +Geoff Collyer found several inadequacies +and provided some magic file entries. +Contributions of the +.Sq \*[Am] +operator by Rob McMahon, +.Aq cudcv@warwick.ac.uk , +1989. +.Pp +Guy Harris, +.Aq guy@netapp.com , +made many changes from 1993 to the present. +.Pp +Primary development and maintenance from 1990 to the present by +Christos Zoulas +.Aq christos@astron.com . +.Pp +Altered by Chris Lowth +.Aq chris@lowth.com , +2000: handle the +.Fl i +option to output mime type strings, using an alternative +magic file and internal logic. +.Pp +Altered by Eric Fischer +.Aq enf@pobox.com , +July, 2000, +to identify character codes and attempt to identify the languages +of non-ASCII files. +.Pp +Altered by Reuben Thomas +.Aq rrt@sc3d.org , +2007-2011, to improve MIME support, merge MIME and non-MIME magic, +support directories as well as files of magic, apply many bug fixes, +update and fix a lot of magic, improve the build system, improve the +documentation, and rewrite the Python bindings in pure Python. +.Pp +The list of contributors to the +.Sq magic +directory (magic files) +is too long to include here. +You know who you are; thank you. +Many contributors are listed in the source files. +.Sh LEGAL NOTICE +Copyright (c) Ian F. Darwin, Toronto, Canada, 1986-1999. +Covered by the standard Berkeley Software Distribution copyright; see the file +COPYING in the source distribution. +.Pp +The files +.Pa tar.h +and +.Pa is_tar.c +were written by John Gilmore from his public-domain +.Xr tar 1 +program, and are not covered by the above license. +.Sh BUGS +Please report bugs and send patches to the bug tracker at +.Pa https://bugs.astron.com/ +or the mailing list at +.Aq file@astron.com +(visit +.Pa https://mailman.astron.com/mailman/listinfo/file +first to subscribe). +.Sh TODO +Fix output so that tests for MIME and APPLE flags are not needed all +over the place, and actual output is only done in one place. +This needs a design. +Suggestion: push possible outputs on to a list, then pick the +last-pushed (most specific, one hopes) value at the end, or +use a default if the list is empty. +This should not slow down evaluation. +.Pp +The handling of +.Dv MAGIC_CONTINUE +and printing \e012- between entries is clumsy and complicated; refactor +and centralize. +.Pp +Some of the encoding logic is hard-coded in encoding.c and can be moved +to the magic files if we had a !:charset annotation. +.Pp +Continue to squash all magic bugs. +See Debian BTS for a good source. +.Pp +Store arbitrarily long strings, for example for %s patterns, so that +they can be printed out. +Fixes Debian bug #271672. +This can be done by allocating strings in a string pool, storing the +string pool at the end of the magic file and converting all the string +pointers to relative offsets from the string pool. +.Pp +Add syntax for relative offsets after current level (Debian bug #466037). +.Pp +Make file -ki work, i.e. give multiple MIME types. +.Pp +Add a zip library so we can peek inside Office2007 documents to +print more details about their contents. +.Pp +Add an option to print URLs for the sources of the file descriptions. +.Pp +Combine script searches and add a way to map executable names to MIME +types (e.g. have a magic value for !:mime which causes the resulting +string to be looked up in a table). +This would avoid adding the same magic repeatedly for each new +hash-bang interpreter. +.Pp +When a file descriptor is available, we can skip and adjust the buffer +instead of the hacky buffer management we do now. +.Pp +Fix +.Dq name +and +.Dq use +to check for consistency at compile time (duplicate +.Dq name , +.Dq use +pointing to undefined +.Dq name +). +Make +.Dq name +/ +.Dq use +more efficient by keeping a sorted list of names. +Special-case ^ to flip endianness in the parser so that it does not +have to be escaped, and document it. +.Pp +If the offsets specified internally in the file exceed the buffer size +( +.Dv HOWMANY +variable in file.h), then we don't seek to that offset, but we give up. +It would be better if buffer managements was done when the file descriptor +is available so we can seek around the file. +One must be careful though because this has performance and thus security +considerations, because one can slow down things by repeatedly seeking. +.Pp +There is support now for keeping separate buffers and having offsets from +the end of the file, but the internal buffer management still needs an +overhaul. +.Sh AVAILABILITY +You can obtain the original author's latest version by anonymous FTP +on +.Pa ftp.astron.com +in the directory +.Pa /pub/file/file-X.YZ.tar.gz . diff --git a/doc/libmagic.man b/doc/libmagic.man new file mode 100644 index 0000000..e89c6ee --- /dev/null +++ b/doc/libmagic.man @@ -0,0 +1,435 @@ +.\" $File: libmagic.man,v 1.49 2023/07/20 14:32:07 christos Exp $ +.\" +.\" Copyright (c) Christos Zoulas 2003, 2018, 2022 +.\" All Rights Reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice immediately at the beginning of the file, without modification, +.\" this list of conditions, and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR +.\" ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.Dd June 16, 2023 +.Dt LIBMAGIC 3 +.Os +.Sh NAME +.Nm magic_open , +.Nm magic_close , +.Nm magic_error , +.Nm magic_errno , +.Nm magic_descriptor , +.Nm magic_buffer , +.Nm magic_getflags , +.Nm magic_setflags , +.Nm magic_check , +.Nm magic_compile , +.Nm magic_list , +.Nm magic_load , +.Nm magic_load_buffers , +.Nm magic_setparam , +.Nm magic_getparam , +.Nm magic_version +.Nd Magic number recognition library +.Sh LIBRARY +.Lb libmagic +.Sh SYNOPSIS +.In magic.h +.Ft magic_t +.Fn magic_open "int flags" +.Ft void +.Fn magic_close "magic_t cookie" +.Ft const char * +.Fn magic_error "magic_t cookie" +.Ft int +.Fn magic_errno "magic_t cookie" +.Ft const char * +.Fn magic_descriptor "magic_t cookie" "int fd" +.Ft const char * +.Fn magic_file "magic_t cookie" "const char *filename" +.Ft const char * +.Fn magic_buffer "magic_t cookie" "const void *buffer" "size_t length" +.Ft int +.Fn magic_getflags "magic_t cookie" +.Ft int +.Fn magic_setflags "magic_t cookie" "int flags" +.Ft int +.Fn magic_check "magic_t cookie" "const char *filename" +.Ft int +.Fn magic_compile "magic_t cookie" "const char *filename" +.Ft int +.Fn magic_list "magic_t cookie" "const char *filename" +.Ft int +.Fn magic_load "magic_t cookie" "const char *filename" +.Ft int +.Fn magic_load_buffers "magic_t cookie" "void **buffers" "size_t *sizes" "size_t nbuffers" +.Ft int +.Fn magic_getparam "magic_t cookie" "int param" "void *value" +.Ft int +.Fn magic_setparam "magic_t cookie" "int param" "const void *value" +.Ft int +.Fn magic_version "void" +.Ft const char * +.Fn magic_getpath "const char *magicfile" "int action" +.Sh DESCRIPTION +These functions +operate on the magic database file +which is described +in +.Xr magic __FSECTION__ . +.Pp +The function +.Fn magic_open +creates a magic cookie pointer and returns it. +It returns +.Dv NULL +if there was an error allocating the magic cookie. +The +.Ar flags +argument specifies how the other magic functions should behave: +.Bl -tag -width MAGIC_COMPRESS +.It Dv MAGIC_NONE +No special handling. +.It Dv MAGIC_DEBUG +Print debugging messages to stderr. +.It Dv MAGIC_SYMLINK +If the file queried is a symlink, follow it. +.It Dv MAGIC_COMPRESS +If the file is compressed, unpack it and look at the contents. +.It Dv MAGIC_DEVICES +If the file is a block or character special device, then open the device +and try to look in its contents. +.It Dv MAGIC_MIME_TYPE +Return a MIME type string, instead of a textual description. +.It Dv MAGIC_MIME_ENCODING +Return a MIME encoding, instead of a textual description. +.It Dv MAGIC_MIME +A shorthand for MAGIC_MIME_TYPE | MAGIC_MIME_ENCODING. +.It Dv MAGIC_CONTINUE +Return all matches, not just the first. +.It Dv MAGIC_CHECK +Check the magic database for consistency and print warnings to stderr. +.It Dv MAGIC_PRESERVE_ATIME +On systems that support +.Xr utime 3 +or +.Xr utimes 2 , +attempt to preserve the access time of files analysed. +.It Dv MAGIC_RAW +Don't translate unprintable characters to a \eooo octal representation. +.It Dv MAGIC_ERROR +Treat operating system errors while trying to open files and follow symlinks +as real errors, instead of printing them in the magic buffer. +.It Dv MAGIC_APPLE +Return the Apple creator and type. +.It Dv MAGIC_EXTENSION +Return a slash-separated list of extensions for this file type. +.It Dv MAGIC_COMPRESS_TRANSP +Don't report on compression, only report about the uncompressed data. +.It Dv MAGIC_NO_CHECK_APPTYPE +Don't check for +.Dv EMX +application type (only on EMX). +.It Dv MAGIC_NO_COMPRESS_FORK +Don't allow decompressors that use fork. +.It Dv MAGIC_NO_CHECK_CDF +Don't get extra information on MS Composite Document Files. +.It Dv MAGIC_NO_CHECK_COMPRESS +Don't look inside compressed files. +.It Dv MAGIC_NO_CHECK_ELF +Don't print ELF details. +.It Dv MAGIC_NO_CHECK_ENCODING +Don't check text encodings. +.It Dv MAGIC_NO_CHECK_SOFT +Don't consult magic files. +.It Dv MAGIC_NO_CHECK_TAR +Don't examine tar files. +.It Dv MAGIC_NO_CHECK_TEXT +Don't check for various types of text files. +.It Dv MAGIC_NO_CHECK_TOKENS +Don't look for known tokens inside ascii files. +.It Dv MAGIC_NO_CHECK_JSON +Don't examine JSON files. +.It Dv MAGIC_NO_CHECK_CSV +Don't examine CSV files. +.It Dv MAGIC_NO_CHECK_SIMH +Don't examine SIMH tape files. +.El +.Pp +The +.Fn magic_close +function closes the +.Xr magic __FSECTION__ +database and deallocates any resources used. +.Pp +The +.Fn magic_error +function returns a textual explanation of the last error, or +.Dv NULL +if there was no error. +.Pp +The +.Fn magic_errno +function returns the last operating system error number +.Pq Xr errno 2 +that was encountered by a system call. +.Pp +The +.Fn magic_file +function returns a textual description of the contents of the +.Ar filename +argument, or +.Dv NULL +if an error occurred. +If the +.Ar filename +is +.Dv NULL , +then stdin is used. +.Pp +The +.Fn magic_descriptor +function returns a textual description of the contents of the +.Ar fd +argument, or +.Dv NULL +if an error occurred. +.Pp +The +.Fn magic_buffer +function returns a textual description of the contents of the +.Ar buffer +argument with +.Ar length +bytes size. +.Pp +The +.Fn magic_getflags +functions returns a value representing current +.Ar flags +set. +.Pp +The +.Fn magic_setflags +function sets the +.Ar flags +described above. +Note that using both MIME flags together can also +return extra information on the charset. +.Pp +The +.Fn magic_check +function can be used to check the validity of entries in the colon +separated database files passed in as +.Ar filename , +or +.Dv NULL +for the default database. +It returns 0 on success and \-1 on failure. +.Pp +The +.Fn magic_compile +function can be used to compile the colon +separated list of database files passed in as +.Ar filename , +or +.Dv NULL +for the default database. +It returns 0 on success and \-1 on failure. +The compiled files created are named from the +.Xr basename 1 +of each file argument with +.Dq .mgc +appended to it. +.Pp +The +.Fn magic_list +function dumps all magic entries in a human readable format, +dumping first the entries that are matched against binary files and then the +ones that match text files. +It takes and optional +.Fa filename +argument which is a colon separated list of database files, or +.Dv NULL +for the default database. +.Pp +The +.Fn magic_load +function must be used to load the colon +separated list of database files passed in as +.Ar filename , +or +.Dv NULL +for the default database file before any magic queries can performed. +.Pp +The default database file is named by the MAGIC environment variable. +If that variable is not set, the default database file name is __MAGIC__. +.Fn magic_load +adds +.Dq .mgc +to the database filename as appropriate. +.Pp +The +.Fn magic_load_buffers +function takes an array of size +.Fa nbuffers +of +.Fa buffers +with a respective size for each in the array of +.Fa sizes +loaded with the contents of the magic databases from the filesystem. +This function can be used in environment where the magic library does +not have direct access to the filesystem, but can access the magic +database via shared memory or other IPC means. +.Pp +The +.Fn magic_getparam +and +.Fn magic_setparam +allow getting and setting various limits related to the magic +library. +.Bl -column "MAGIC_PARAM_ELF_PHNUM_MAX" "size_t" "Default" -offset indent +.It Sy "Parameter" Ta Sy "Type" Ta Sy "Default" +.It Li MAGIC_PARAM_INDIR_MAX Ta size_t Ta 15 +.It Li MAGIC_PARAM_NAME_MAX Ta size_t Ta 30 +.It Li MAGIC_PARAM_ELF_NOTES_MAX Ta size_t Ta 256 +.It Li MAGIC_PARAM_ELF_PHNUM_MAX Ta size_t Ta 128 +.It Li MAGIC_PARAM_ELF_SHNUM_MAX Ta size_t Ta 32768 +.It Li MAGIC_PARAM_REGEX_MAX Ta size_t Ta 8192 +.It Li MAGIC_PARAM_BYTES_MAX Ta size_t Ta 1048576 +.El +.Pp +The +.Dv MAGIC_PARAM_INDIR_RECURSION +parameter controls how many levels of recursion will be followed for +indirect magic entries. +.Pp +The +.Dv MAGIC_PARAM_NAME_RECURSION +parameter controls how many levels of recursion will be followed for +for name/use calls. +.Pp +The +.Dv MAGIC_PARAM_NAME_MAX +parameter controls the maximum number of calls for name/use. +.Pp +The +.Dv MAGIC_PARAM_NOTES_MAX +parameter controls how many ELF notes will be processed. +.Pp +The +.Dv MAGIC_PARAM_PHNUM_MAX +parameter controls how many ELF program sections will be processed. +.Pp +The +.Dv MAGIC_PARAM_SHNUM_MAX +parameter controls how many ELF sections will be processed. +.Pp +The +.Fn magic_version +command returns the version number of this library which is compiled into +the shared library using the constant +.Dv MAGIC_VERSION +from +.In magic.h . +This can be used by client programs to verify that the version they compile +against is the same as the version that they run against. +.Pp +The +.Fn magic_getpath +command returns the colon separated list of magic database locations. +If the +.Fa filename +is non-NULL, then it is returned. +Otherwise, if the +.Dv MAGIC +environment variable is defined, then it is returned. +Otherwise, if +.Fa action +is 0 (meaning "file load"), then any user-specific magic database file is included. +Otherwise, only the system default magic database path is included. +.Sh RETURN VALUES +The function +.Fn magic_open +returns a magic cookie on success and +.Dv NULL +on failure setting errno to an appropriate value. +It will set errno to +.Er EINVAL +if an unsupported value for flags was given. +The +.Fn magic_list , +.Fn magic_load , +.Fn magic_compile , +and +.Fn magic_check +functions return 0 on success and \-1 on failure. +The +.Fn magic_buffer , +.Fn magic_getpath , +and +.Fn magic_file , +functions return a string on success and +.Dv NULL +on failure. +The +.Fn magic_error +function returns a textual description of the errors of the above +functions, or +.Dv NULL +if there was no error. +The +.Fn magic_version +always returns the version number of the library. +Finally, +.Fn magic_setflags +returns \-1 on systems that don't support +.Xr utime 3 , +or +.Xr utimes 2 +when +.Dv MAGIC_PRESERVE_ATIME +is set. +.Sh FILES +.Bl -tag -width __MAGIC__.mgc -compact +.It Pa __MAGIC__ +The non-compiled default magic database. +.It Pa __MAGIC__.mgc +The compiled default magic database. +.El +.Sh SEE ALSO +.Xr file __CSECTION__ , +.Xr magic __FSECTION__ +.Sh BUGS +The results from +.Fn magic_buffer +and +.Fn magic_file +where the buffer and the file contain the same data +can produce different results, because in the +.Fn magic_file +case, the program can +.Xr lseek 2 +and +.Xr stat 2 +the file descriptor. +.Sh AUTHORS +.An M\(oans Rullg\(oard +Initial libmagic implementation, and configuration. +.An Christos Zoulas +API cleanup, error code and allocation handling. diff --git a/doc/magic.man b/doc/magic.man new file mode 100644 index 0000000..af4bfa8 --- /dev/null +++ b/doc/magic.man @@ -0,0 +1,830 @@ +.\" $File: magic.man,v 1.103 2023/07/20 14:32:07 christos Exp $ +.Dd Arpil 18, 2023 +.Dt MAGIC __FSECTION__ +.Os +.\" install as magic.4 on USG, magic.5 on V7, Berkeley and Linux systems. +.Sh NAME +.Nm magic +.Nd file command's magic pattern file +.Sh DESCRIPTION +This manual page documents the format of magic files as +used by the +.Xr file __CSECTION__ +command, version __VERSION__. +The +.Xr file __CSECTION__ +command identifies the type of a file using, +among other tests, +a test for whether the file contains certain +.Dq "magic patterns" . +The database of these +.Dq "magic patterns" +is usually located in a binary file in +.Pa __MAGIC__.mgc +or a directory of source text magic pattern fragment files in +.Pa __MAGIC__ . +The database specifies what patterns are to be tested for, what message or +MIME type to print if a particular pattern is found, +and additional information to extract from the file. +.Pp +The format of the source fragment files that are used to build this database +is as follows: +Each line of a fragment file specifies a test to be performed. +A test compares the data starting at a particular offset +in the file with a byte value, a string or a numeric value. +If the test succeeds, a message is printed. +The line consists of the following fields: +.Bl -tag -width ".Dv message" +.It Dv offset +A number specifying the offset (in bytes) into the file of the data +which is to be tested. +This offset can be a negative number if it is: +.Bl -bullet -compact +.It +The first direct offset of the magic entry (at continuation level 0), +in which case it is interpreted an offset from end end of the file +going backwards. +This works only when a file descriptor to the file is available and it +is a regular file. +.It +A continuation offset relative to the end of the last up-level field +.Dv ( \*[Am] ) . +.El +.It Dv type +The type of the data to be tested. +The possible values are: +.Bl -tag -width ".Dv lestring16" +.It Dv byte +A one-byte value. +.It Dv short +A two-byte value in this machine's native byte order. +.It Dv long +A four-byte value in this machine's native byte order. +.It Dv quad +An eight-byte value in this machine's native byte order. +.It Dv float +A 32-bit single precision IEEE floating point number in this machine's native byte order. +.It Dv double +A 64-bit double precision IEEE floating point number in this machine's native byte order. +.It Dv string +A string of bytes. +The string type specification can be optionally followed by a /<width> +option and optionally followed by a set of flags /[bCcftTtWw]*. +The width limits the number of characters to be copied. +Zero means all characters. +The following flags are supported: +.Bl -tag -width B -compact -offset XXXX +.It b +Force binary file test. +.It C +Use upper case insensitive matching: upper case +characters in the magic match both lower and upper case characters in the +target, whereas lower case characters in the magic only match upper case +characters in the target. +.It c +Use lower case insensitive matching: lower case +characters in the magic match both lower and upper case characters in the +target, whereas upper case characters in the magic only match upper case +characters in the target. +To do a complete case insensitive match, specify both +.Dq c +and +.Dq C . +.It f +Require that the matched string is a full word, not a partial word match. +.It T +Trim the string, i.e. leading and trailing whitespace +.It t +Force text file test. +.It W +Compact whitespace in the target, which must +contain at least one whitespace character. +If the magic has +.Dv n +consecutive blanks, the target needs at least +.Dv n +consecutive blanks to match. +.It w +Treat every blank in the magic as an optional blank. +is deleted before the string is printed. +.El +.It Dv pstring +A Pascal-style string where the first byte/short/int is interpreted as the +unsigned length. +The length defaults to byte and can be specified as a modifier. +The following modifiers are supported: +.Bl -tag -width B -compact -offset XXXX +.It B +A byte length (default). +.It H +A 2 byte big endian length. +.It h +A 2 byte little endian length. +.It L +A 4 byte big endian length. +.It l +A 4 byte little endian length. +.It J +The length includes itself in its count. +.El +The string is not NUL terminated. +.Dq J +is used rather than the more +valuable +.Dq I +because this type of length is a feature of the JPEG +format. +.It Dv date +A four-byte value interpreted as a UNIX date. +.It Dv qdate +An eight-byte value interpreted as a UNIX date. +.It Dv ldate +A four-byte value interpreted as a UNIX-style date, but interpreted as +local time rather than UTC. +.It Dv qldate +An eight-byte value interpreted as a UNIX-style date, but interpreted as +local time rather than UTC. +.It Dv qwdate +An eight-byte value interpreted as a Windows-style date. +.It Dv beid3 +A 32-bit ID3 length in big-endian byte order. +.It Dv beshort +A two-byte value in big-endian byte order. +.It Dv belong +A four-byte value in big-endian byte order. +.It Dv bequad +An eight-byte value in big-endian byte order. +.It Dv befloat +A 32-bit single precision IEEE floating point number in big-endian byte order. +.It Dv bedouble +A 64-bit double precision IEEE floating point number in big-endian byte order. +.It Dv bedate +A four-byte value in big-endian byte order, +interpreted as a Unix date. +.It Dv beqdate +An eight-byte value in big-endian byte order, +interpreted as a Unix date. +.It Dv beldate +A four-byte value in big-endian byte order, +interpreted as a UNIX-style date, but interpreted as local time rather +than UTC. +.It Dv beqldate +An eight-byte value in big-endian byte order, +interpreted as a UNIX-style date, but interpreted as local time rather +than UTC. +.It Dv beqwdate +An eight-byte value in big-endian byte order, +interpreted as a Windows-style date. +.It Dv bestring16 +A two-byte unicode (UCS16) string in big-endian byte order. +.It Dv leid3 +A 32-bit ID3 length in little-endian byte order. +.It Dv leshort +A two-byte value in little-endian byte order. +.It Dv lelong +A four-byte value in little-endian byte order. +.It Dv lequad +An eight-byte value in little-endian byte order. +.It Dv lefloat +A 32-bit single precision IEEE floating point number in little-endian byte order. +.It Dv ledouble +A 64-bit double precision IEEE floating point number in little-endian byte order. +.It Dv ledate +A four-byte value in little-endian byte order, +interpreted as a UNIX date. +.It Dv leqdate +An eight-byte value in little-endian byte order, +interpreted as a UNIX date. +.It Dv leldate +A four-byte value in little-endian byte order, +interpreted as a UNIX-style date, but interpreted as local time rather +than UTC. +.It Dv leqldate +An eight-byte value in little-endian byte order, +interpreted as a UNIX-style date, but interpreted as local time rather +than UTC. +.It Dv leqwdate +An eight-byte value in little-endian byte order, +interpreted as a Windows-style date. +.It Dv lestring16 +A two-byte unicode (UCS16) string in little-endian byte order. +.It Dv melong +A four-byte value in middle-endian (PDP-11) byte order. +.It Dv medate +A four-byte value in middle-endian (PDP-11) byte order, +interpreted as a UNIX date. +.It Dv meldate +A four-byte value in middle-endian (PDP-11) byte order, +interpreted as a UNIX-style date, but interpreted as local time rather +than UTC. +.It Dv indirect +Starting at the given offset, consult the magic database again. +The offset of the +.Dv indirect +magic is by default absolute in the file, but one can specify +.Dv /r +to indicate that the offset is relative from the beginning of the entry. +.It Dv name +Define a +.Dq named +magic instance that can be called from another +.Dv use +magic entry, like a subroutine call. +Named instance direct magic offsets are relative to the offset of the +previous matched entry, but indirect offsets are relative to the beginning +of the file as usual. +Named magic entries always match. +.It Dv use +Recursively call the named magic starting from the current offset. +If the name of the referenced begins with a +.Dv ^ +then the endianness of the magic is switched; if the magic mentioned +.Dv leshort +for example, +it is treated as +.Dv beshort +and vice versa. +This is useful to avoid duplicating the rules for different endianness. +.It Dv regex +A regular expression match in extended POSIX regular expression syntax +(like egrep). +Regular expressions can take exponential time to process, and their +performance is hard to predict, so their use is discouraged. +When used in production environments, their performance +should be carefully checked. +The size of the string to search should also be limited by specifying +.Dv /<length> , +to avoid performance issues scanning long files. +The type specification can also be optionally followed by +.Dv /[c][s][l] . +The +.Dq c +flag makes the match case insensitive, while the +.Dq s +flag update the offset to the start offset of the match, rather than the end. +The +.Dq l +modifier, changes the limit of length to mean number of lines instead of a +byte count. +Lines are delimited by the platforms native line delimiter. +When a line count is specified, an implicit byte count also computed assuming +each line is 80 characters long. +If neither a byte or line count is specified, the search is limited automatically +to 8KiB. +.Dv ^ +and +.Dv $ +match the beginning and end of individual lines, respectively, +not beginning and end of file. +.It Dv search +A literal string search starting at the given offset. +The same modifier flags can be used as for string patterns. +The search expression must contain the range in the form +.Dv /number, +that is the number of positions at which the match will be +attempted, starting from the start offset. +This is suitable for +searching larger binary expressions with variable offsets, using +.Dv \e +escapes for special characters. +The order of modifier and number is not relevant. +.It Dv default +This is intended to be used with the test +.Em x +(which is always true) and it has no type. +It matches when no other test at that continuation level has matched before. +Clearing that matched tests for a continuation level, can be done using the +.Dv clear +test. +.It Dv clear +This test is always true and clears the match flag for that continuation level. +It is intended to be used with the +.Dv default +test. +.It Dv der +Parse the file as a DER Certificate file. +The test field is used as a der type that needs to be matched. +The DER types are: +.Dv eoc , +.Dv bool , +.Dv int , +.Dv bit_str , +.Dv octet_str , +.Dv null , +.Dv obj_id , +.Dv obj_desc , +.Dv ext , +.Dv real , +.Dv enum , +.Dv embed , +.Dv utf8_str , +.Dv rel_oid , +.Dv time , +.Dv res2 , +.Dv seq , +.Dv set , +.Dv num_str , +.Dv prt_str , +.Dv t61_str , +.Dv vid_str , +.Dv ia5_str , +.Dv utc_time , +.Dv gen_time , +.Dv gr_str , +.Dv vis_str , +.Dv gen_str , +.Dv univ_str , +.Dv char_str , +.Dv bmp_str , +.Dv date , +.Dv tod , +.Dv datetime , +.Dv duration , +.Dv oid-iri , +.Dv rel-oid-iri . +These types can be followed by an optional numeric size, which indicates +the field width in bytes. +.It Dv guid +A Globally Unique Identifier, parsed and printed as +XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX. +It's format is a string. +.It Dv offset +This is a quad value indicating the current offset of the file. +It can be used to determine the size of the file or the magic buffer. +For example the magic entries: +.Bd -literal -offset indent +-0 offset x this file is %lld bytes +-0 offset <=100 must be more than 100 \e + bytes and is only %lld +.Ed +.It Dv octal +A string representing an octal number. +.El +.El +.Pp +For compatibility with the Single +.Ux +Standard, the type specifiers +.Dv dC +and +.Dv d1 +are equivalent to +.Dv byte , +the type specifiers +.Dv uC +and +.Dv u1 +are equivalent to +.Dv ubyte , +the type specifiers +.Dv dS +and +.Dv d2 +are equivalent to +.Dv short , +the type specifiers +.Dv uS +and +.Dv u2 +are equivalent to +.Dv ushort , +the type specifiers +.Dv dI , +.Dv dL , +and +.Dv d4 +are equivalent to +.Dv long , +the type specifiers +.Dv uI , +.Dv uL , +and +.Dv u4 +are equivalent to +.Dv ulong , +the type specifier +.Dv d8 +is equivalent to +.Dv quad , +the type specifier +.Dv u8 +is equivalent to +.Dv uquad , +and the type specifier +.Dv s +is equivalent to +.Dv string . +In addition, the type specifier +.Dv dQ +is equivalent to +.Dv quad +and the type specifier +.Dv uQ +is equivalent to +.Dv uquad . +.Pp +Each top-level magic pattern (see below for an explanation of levels) +is classified as text or binary according to the types used. +Types +.Dq regex +and +.Dq search +are classified as text tests, unless non-printable characters are used +in the pattern. +All other tests are classified as binary. +A top-level +pattern is considered to be a test text when all its patterns are text +patterns; otherwise, it is considered to be a binary pattern. +When +matching a file, binary patterns are tried first; if no match is +found, and the file looks like text, then its encoding is determined +and the text patterns are tried. +.Pp +The numeric types may optionally be followed by +.Dv \*[Am] +and a numeric value, +to specify that the value is to be AND'ed with the +numeric value before any comparisons are done. +Prepending a +.Dv u +to the type indicates that ordered comparisons should be unsigned. +.It Dv test +The value to be compared with the value from the file. +If the type is +numeric, this value +is specified in C form; if it is a string, it is specified as a C string +with the usual escapes permitted (e.g. \en for new-line). +.Pp +Numeric values +may be preceded by a character indicating the operation to be performed. +It may be +.Dv = , +to specify that the value from the file must equal the specified value, +.Dv \*[Lt] , +to specify that the value from the file must be less than the specified +value, +.Dv \*[Gt] , +to specify that the value from the file must be greater than the specified +value, +.Dv \*[Am] , +to specify that the value from the file must have set all of the bits +that are set in the specified value, +.Dv ^ , +to specify that the value from the file must have clear any of the bits +that are set in the specified value, or +.Dv ~ , +the value specified after is negated before tested. +.Dv x , +to specify that any value will match. +If the character is omitted, it is assumed to be +.Dv = . +Operators +.Dv \*[Am] , +.Dv ^ , +and +.Dv ~ +don't work with floats and doubles. +The operator +.Dv !\& +specifies that the line matches if the test does +.Em not +succeed. +.Pp +Numeric values are specified in C form; e.g. +.Dv 13 +is decimal, +.Dv 013 +is octal, and +.Dv 0x13 +is hexadecimal. +.Pp +Numeric operations are not performed on date types, instead the numeric +value is interpreted as an offset. +.Pp +For string values, the string from the +file must match the specified string. +The operators +.Dv = , +.Dv \*[Lt] +and +.Dv \*[Gt] +(but not +.Dv \*[Am] ) +can be applied to strings. +The length used for matching is that of the string argument +in the magic file. +This means that a line can match any non-empty string (usually used to +then print the string), with +.Em \*[Gt]\e0 +(because all non-empty strings are greater than the empty string). +.Pp +Dates are treated as numerical values in the respective internal +representation. +.Pp +The special test +.Em x +always evaluates to true. +.It Dv message +The message to be printed if the comparison succeeds. +If the string contains a +.Xr printf 3 +format specification, the value from the file (with any specified masking +performed) is printed using the message as the format string. +If the string begins with +.Dq \eb , +the message printed is the remainder of the string with no whitespace +added before it: multiple matches are normally separated by a single +space. +.El +.Pp +An APPLE 4+4 character APPLE creator and type can be specified as: +.Bd -literal -offset indent +!:apple CREATYPE +.Ed +.Pp +A slash-separated list of commonly found filename extensions can be specified +as: +.Bd -literal -offset indent +!:ext ext[/ext...] +.Ed +.Pp +i.e. the literal string +.Dq !:ext +followed by a slash-separated list of commonly found extensions; for example +for JPEG images: +.Bd -literal -offset indent +!:ext jpeg/jpg/jpe/jfif +.Ed +.Pp +A MIME type is given on a separate line, which must be the next +non-blank or comment line after the magic line that identifies the +file type, and has the following format: +.Bd -literal -offset indent +!:mime MIMETYPE +.Ed +.Pp +i.e. the literal string +.Dq !:mime +followed by the MIME type. +.Pp +An optional strength can be supplied on a separate line which refers to +the current magic description using the following format: +.Bd -literal -offset indent +!:strength OP VALUE +.Ed +.Pp +The operand +.Dv OP +can be: +.Dv + , +.Dv - , +.Dv * , +or +.Dv / +and +.Dv VALUE +is a constant between 0 and 255. +This constant is applied using the specified operand +to the currently computed default magic strength. +.Pp +Some file formats contain additional information which is to be printed +along with the file type or need additional tests to determine the true +file type. +These additional tests are introduced by one or more +.Em \*[Gt] +characters preceding the offset. +The number of +.Em \*[Gt] +on the line indicates the level of the test; a line with no +.Em \*[Gt] +at the beginning is considered to be at level 0. +Tests are arranged in a tree-like hierarchy: +if the test on a line at level +.Em n +succeeds, all following tests at level +.Em n+1 +are performed, and the messages printed if the tests succeed, until a line +with level +.Em n +(or less) appears. +For more complex files, one can use empty messages to get just the +"if/then" effect, in the following way: +.Bd -literal -offset indent +0 string MZ +\*[Gt]0x18 leshort \*[Lt]0x40 MS-DOS executable +\*[Gt]0x18 leshort \*[Gt]0x3f extended PC executable (e.g., MS Windows) +.Ed +.Pp +Offsets do not need to be constant, but can also be read from the file +being examined. +If the first character following the last +.Em \*[Gt] +is a +.Em \&( +then the string after the parenthesis is interpreted as an indirect offset. +That means that the number after the parenthesis is used as an offset in +the file. +The value at that offset is read, and is used again as an offset +in the file. +Indirect offsets are of the form: +.Em (( x [[.,][bBcCeEfFgGhHiIlmsSqQ]][+\-][ y ]) . +The value of +.Em x +is used as an offset in the file. +A byte, id3 length, short or long is read at that offset depending on the +.Em [bBcCeEfFgGhHiIlmsSqQ] +type specifier. +The value is treated as signed if +.Dq , +is specified or unsigned if +.Dq . +is specified. +The capitalized types interpret the number as a big endian +value, whereas the small letter versions interpret the number as a little +endian value; +the +.Em m +type interprets the number as a middle endian (PDP-11) value. +To that number the value of +.Em y +is added and the result is used as an offset in the file. +The default type if one is not specified is long. +The following types are recognized: +.Bl -column -offset indent "Type" "Half/Short" "Little" "Size" +.It Sy Type Sy Mnemonic Sy Endian Sy Size +.It bcBc Byte/Char N/A 1 +.It efg Double Little 8 +.It EFG Double Big 8 +.It hs Half/Short Little 2 +.It HS Half/Short Big 2 +.It i ID3 Little 4 +.It I ID3 Big 4 +.It m Middle Middle 4 +.It o Octal Textual Variable +.It q Quad Little 8 +.It Q Quad Big 8 +.El +.Pp +That way variable length structures can be examined: +.Bd -literal -offset indent +# MS Windows executables are also valid MS-DOS executables +0 string MZ +\*[Gt]0x18 leshort \*[Lt]0x40 MZ executable (MS-DOS) +# skip the whole block below if it is not an extended executable +\*[Gt]0x18 leshort \*[Gt]0x3f +\*[Gt]\*[Gt](0x3c.l) string PE\e0\e0 PE executable (MS-Windows) +\*[Gt]\*[Gt](0x3c.l) string LX\e0\e0 LX executable (OS/2) +.Ed +.Pp +This strategy of examining has a drawback: you must make sure that you +eventually print something, or users may get empty output (such as when +there is neither PE\e0\e0 nor LE\e0\e0 in the above example). +.Pp +If this indirect offset cannot be used directly, simple calculations are +possible: appending +.Em [+-*/%\*[Am]|^]number +inside parentheses allows one to modify +the value read from the file before it is used as an offset: +.Bd -literal -offset indent +# MS Windows executables are also valid MS-DOS executables +0 string MZ +# sometimes, the value at 0x18 is less that 0x40 but there's still an +# extended executable, simply appended to the file +\*[Gt]0x18 leshort \*[Lt]0x40 +\*[Gt]\*[Gt](4.s*512) leshort 0x014c COFF executable (MS-DOS, DJGPP) +\*[Gt]\*[Gt](4.s*512) leshort !0x014c MZ executable (MS-DOS) +.Ed +.Pp +Sometimes you do not know the exact offset as this depends on the length or +position (when indirection was used before) of preceding fields. +You can specify an offset relative to the end of the last up-level +field using +.Sq \*[Am] +as a prefix to the offset: +.Bd -literal -offset indent +0 string MZ +\*[Gt]0x18 leshort \*[Gt]0x3f +\*[Gt]\*[Gt](0x3c.l) string PE\e0\e0 PE executable (MS-Windows) +# immediately following the PE signature is the CPU type +\*[Gt]\*[Gt]\*[Gt]\*[Am]0 leshort 0x14c for Intel 80386 +\*[Gt]\*[Gt]\*[Gt]\*[Am]0 leshort 0x184 for DEC Alpha +.Ed +.Pp +Indirect and relative offsets can be combined: +.Bd -literal -offset indent +0 string MZ +\*[Gt]0x18 leshort \*[Lt]0x40 +\*[Gt]\*[Gt](4.s*512) leshort !0x014c MZ executable (MS-DOS) +# if it's not COFF, go back 512 bytes and add the offset taken +# from byte 2/3, which is yet another way of finding the start +# of the extended executable +\*[Gt]\*[Gt]\*[Gt]\*[Am](2.s-514) string LE LE executable (MS Windows VxD driver) +.Ed +.Pp +Or the other way around: +.Bd -literal -offset indent +0 string MZ +\*[Gt]0x18 leshort \*[Gt]0x3f +\*[Gt]\*[Gt](0x3c.l) string LE\e0\e0 LE executable (MS-Windows) +# at offset 0x80 (-4, since relative offsets start at the end +# of the up-level match) inside the LE header, we find the absolute +# offset to the code area, where we look for a specific signature +\*[Gt]\*[Gt]\*[Gt](\*[Am]0x7c.l+0x26) string UPX \eb, UPX compressed +.Ed +.Pp +Or even both! +.Bd -literal -offset indent +0 string MZ +\*[Gt]0x18 leshort \*[Gt]0x3f +\*[Gt]\*[Gt](0x3c.l) string LE\e0\e0 LE executable (MS-Windows) +# at offset 0x58 inside the LE header, we find the relative offset +# to a data area where we look for a specific signature +\*[Gt]\*[Gt]\*[Gt]\*[Am](\*[Am]0x54.l-3) string UNACE \eb, ACE self-extracting archive +.Ed +.Pp +If you have to deal with offset/length pairs in your file, even the +second value in a parenthesized expression can be taken from the file itself, +using another set of parentheses. +Note that this additional indirect offset is always relative to the +start of the main indirect offset. +.Bd -literal -offset indent +0 string MZ +\*[Gt]0x18 leshort \*[Gt]0x3f +\*[Gt]\*[Gt](0x3c.l) string PE\e0\e0 PE executable (MS-Windows) +# search for the PE section called ".idata"... +\*[Gt]\*[Gt]\*[Gt]\*[Am]0xf4 search/0x140 .idata +# ...and go to the end of it, calculated from start+length; +# these are located 14 and 10 bytes after the section name +\*[Gt]\*[Gt]\*[Gt]\*[Gt](\*[Am]0xe.l+(-4)) string PK\e3\e4 \eb, ZIP self-extracting archive +.Ed +.Pp +If you have a list of known values at a particular continuation level, +and you want to provide a switch-like default case: +.Bd -literal -offset indent +# clear that continuation level match +\*[Gt]18 clear +\*[Gt]18 lelong 1 one +\*[Gt]18 lelong 2 two +\*[Gt]18 default x +# print default match +\*[Gt]\*[Gt]18 lelong x unmatched 0x%x +.Ed +.Sh SEE ALSO +.Xr file __CSECTION__ +\- the command that reads this file. +.Sh BUGS +The formats +.Dv long , +.Dv belong , +.Dv lelong , +.Dv melong , +.Dv short , +.Dv beshort , +and +.Dv leshort +do not depend on the length of the C data types +.Dv short +and +.Dv long +on the platform, even though the Single +.Ux +Specification implies that they do. +However, as OS X Mountain Lion has passed the Single +.Ux +Specification validation suite, and supplies a version of +.Xr file __CSECTION__ +in which they do not depend on the sizes of the C data types and that is +built for a 64-bit environment in which +.Dv long +is 8 bytes rather than 4 bytes, presumably the validation suite does not +test whether, for example +.Dv long +refers to an item with the same size as the C data type +.Dv long . +There should probably be +.Dv type +names +.Dv int8 , +.Dv uint8 , +.Dv int16 , +.Dv uint16 , +.Dv int32 , +.Dv uint32 , +.Dv int64 , +and +.Dv uint64 , +and specified-byte-order variants of them, +to make it clearer that those types have specified widths. +.\" +.\" From: guy@sun.uucp (Guy Harris) +.\" Newsgroups: net.bugs.usg +.\" Subject: /etc/magic's format isn't well documented +.\" Message-ID: <2752@sun.uucp> +.\" Date: 3 Sep 85 08:19:07 GMT +.\" Organization: Sun Microsystems, Inc. +.\" Lines: 136 +.\" +.\" Here's a manual page for the format accepted by the "file" made by adding +.\" the changes I posted to the S5R2 version. +.\" +.\" Modified for Ian Darwin's version of the file command. |