diff options
Diffstat (limited to 'lib/strutil')
-rw-r--r-- | lib/strutil/Makefile.am | 14 | ||||
-rw-r--r-- | lib/strutil/Makefile.in | 772 | ||||
-rw-r--r-- | lib/strutil/filevercmp.c | 265 | ||||
-rw-r--r-- | lib/strutil/replace.c | 117 | ||||
-rw-r--r-- | lib/strutil/strescape.c | 259 | ||||
-rw-r--r-- | lib/strutil/strutil.c | 1024 | ||||
-rw-r--r-- | lib/strutil/strutil8bit.c | 860 | ||||
-rw-r--r-- | lib/strutil/strutilascii.c | 783 | ||||
-rw-r--r-- | lib/strutil/strutilutf8.c | 1519 | ||||
-rw-r--r-- | lib/strutil/strverscmp.c | 158 | ||||
-rw-r--r-- | lib/strutil/xstrtol.c | 256 |
11 files changed, 6027 insertions, 0 deletions
diff --git a/lib/strutil/Makefile.am b/lib/strutil/Makefile.am new file mode 100644 index 0000000..5936a36 --- /dev/null +++ b/lib/strutil/Makefile.am @@ -0,0 +1,14 @@ +noinst_LTLIBRARIES = libmcstrutil.la + +libmcstrutil_la_SOURCES = \ + filevercmp.c \ + replace.c \ + strescape.c \ + strutil8bit.c \ + strutilascii.c \ + strutil.c \ + strutilutf8.c \ + strverscmp.c \ + xstrtol.c + +AM_CPPFLAGS = $(GLIB_CFLAGS) -I$(top_srcdir) diff --git a/lib/strutil/Makefile.in b/lib/strutil/Makefile.in new file mode 100644 index 0000000..966e17e --- /dev/null +++ b/lib/strutil/Makefile.in @@ -0,0 +1,772 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = lib/strutil +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/gettext.m4 \ + $(top_srcdir)/m4/iconv.m4 $(top_srcdir)/m4/intlmacosx.m4 \ + $(top_srcdir)/m4/lib-ld.m4 $(top_srcdir)/m4/lib-link.m4 \ + $(top_srcdir)/m4/lib-prefix.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/longlong.m4 $(top_srcdir)/m4/ltoptions.m4 \ + $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \ + $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/m4/nls.m4 \ + $(top_srcdir)/m4/po.m4 $(top_srcdir)/m4/progtest.m4 \ + $(top_srcdir)/acinclude.m4 \ + $(top_srcdir)/m4.include/gnulib/mode_t.m4 \ + $(top_srcdir)/m4.include/gnulib/stat-size.m4 \ + $(top_srcdir)/m4.include/gnulib/fstypename.m4 \ + $(top_srcdir)/m4.include/gnulib/fsusage.m4 \ + $(top_srcdir)/m4.include/gnulib/mountlist.m4 \ + $(top_srcdir)/m4.include/gnulib/windows-stat-inodes.m4 \ + $(top_srcdir)/m4.include/gnulib/sys_types_h.m4 \ + $(top_srcdir)/m4.include/ax_path_lib_pcre.m4 \ + $(top_srcdir)/m4.include/dx_doxygen.m4 \ + $(top_srcdir)/m4.include/ax_require_defined.m4 \ + $(top_srcdir)/m4.include/ax_check_compile_flag.m4 \ + $(top_srcdir)/m4.include/ax_append_flag.m4 \ + $(top_srcdir)/m4.include/ax_append_compile_flags.m4 \ + $(top_srcdir)/m4.include/mc-cflags.m4 \ + $(top_srcdir)/m4.include/ax_gcc_func_attribute.m4 \ + $(top_srcdir)/m4.include/mc-check-search-type.m4 \ + $(top_srcdir)/m4.include/mc-get-fs-info.m4 \ + $(top_srcdir)/m4.include/mc-with-x.m4 \ + $(top_srcdir)/m4.include/mc-use-termcap.m4 \ + $(top_srcdir)/m4.include/mc-with-screen.m4 \ + $(top_srcdir)/m4.include/mc-with-screen-ncurses.m4 \ + $(top_srcdir)/m4.include/mc-with-screen-slang.m4 \ + $(top_srcdir)/m4.include/mc-with-internal-edit.m4 \ + $(top_srcdir)/m4.include/mc-subshell.m4 \ + $(top_srcdir)/m4.include/mc-background.m4 \ + $(top_srcdir)/m4.include/mc-ext2fs-attr.m4 \ + $(top_srcdir)/m4.include/mc-glib.m4 \ + $(top_srcdir)/m4.include/mc-vfs.m4 \ + $(top_srcdir)/m4.include/vfs/rpc.m4 \ + $(top_srcdir)/m4.include/vfs/socket.m4 \ + $(top_srcdir)/m4.include/vfs/mc-vfs-extfs.m4 \ + $(top_srcdir)/m4.include/vfs/mc-vfs-sfs.m4 \ + $(top_srcdir)/m4.include/vfs/mc-vfs-ftp.m4 \ + $(top_srcdir)/m4.include/vfs/mc-vfs-sftp.m4 \ + $(top_srcdir)/m4.include/vfs/mc-vfs-fish.m4 \ + $(top_srcdir)/m4.include/vfs/mc-vfs-undelfs.m4 \ + $(top_srcdir)/m4.include/vfs/mc-vfs-tarfs.m4 \ + $(top_srcdir)/m4.include/vfs/mc-vfs-cpiofs.m4 \ + $(top_srcdir)/m4.include/mc-version.m4 \ + $(top_srcdir)/m4.include/mc-tests.m4 \ + $(top_srcdir)/m4.include/mc-i18n.m4 \ + $(top_srcdir)/m4.include/mc-assert.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +libmcstrutil_la_LIBADD = +am_libmcstrutil_la_OBJECTS = filevercmp.lo replace.lo strescape.lo \ + strutil8bit.lo strutilascii.lo strutil.lo strutilutf8.lo \ + strverscmp.lo xstrtol.lo +libmcstrutil_la_OBJECTS = $(am_libmcstrutil_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/config/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/filevercmp.Plo \ + ./$(DEPDIR)/replace.Plo ./$(DEPDIR)/strescape.Plo \ + ./$(DEPDIR)/strutil.Plo ./$(DEPDIR)/strutil8bit.Plo \ + ./$(DEPDIR)/strutilascii.Plo ./$(DEPDIR)/strutilutf8.Plo \ + ./$(DEPDIR)/strverscmp.Plo ./$(DEPDIR)/xstrtol.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libmcstrutil_la_SOURCES) +DIST_SOURCES = $(libmcstrutil_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/config/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ +COM_ERR_CFLAGS = @COM_ERR_CFLAGS@ +COM_ERR_LIBS = @COM_ERR_LIBS@ +CP1251 = @CP1251@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOC_LINGUAS = @DOC_LINGUAS@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +E2P_CFLAGS = @E2P_CFLAGS@ +E2P_LIBS = @E2P_LIBS@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +EXT2FS_CFLAGS = @EXT2FS_CFLAGS@ +EXT2FS_LIBS = @EXT2FS_LIBS@ +EXTHELPERSDIR = @EXTHELPERSDIR@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +GETTEXT_MACRO_VERSION = @GETTEXT_MACRO_VERSION@ +GLIB_CFLAGS = @GLIB_CFLAGS@ +GLIB_LIBS = @GLIB_LIBS@ +GMODULE_CFLAGS = @GMODULE_CFLAGS@ +GMODULE_LIBS = @GMODULE_LIBS@ +GMSGFMT = @GMSGFMT@ +GMSGFMT_015 = @GMSGFMT_015@ +GREP = @GREP@ +HAVE_FILECMD = @HAVE_FILECMD@ +HAVE_ZIPINFO = @HAVE_ZIPINFO@ +HAVE_nroff = @HAVE_nroff@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +INTLLIBS = @INTLLIBS@ +INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBICONV = @LIBICONV@ +LIBINTL = @LIBINTL@ +LIBMC_RELEASE = @LIBMC_RELEASE@ +LIBMC_VERSION = @LIBMC_VERSION@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSSH_CFLAGS = @LIBSSH_CFLAGS@ +LIBSSH_LIBS = @LIBSSH_LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBICONV = @LTLIBICONV@ +LTLIBINTL = @LTLIBINTL@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANDOC = @MANDOC@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MAN_DATE = @MAN_DATE@ +MAN_FLAGS = @MAN_FLAGS@ +MAN_VERSION = @MAN_VERSION@ +MCLIBS = @MCLIBS@ +MKDIR_P = @MKDIR_P@ +MSGFMT = @MSGFMT@ +MSGFMT_015 = @MSGFMT_015@ +MSGMERGE = @MSGMERGE@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PCRE_CPPFLAGS = @PCRE_CPPFLAGS@ +PCRE_LIBS = @PCRE_LIBS@ +PERL = @PERL@ +PERL_FOR_BUILD = @PERL_FOR_BUILD@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +POSUB = @POSUB@ +PYTHON = @PYTHON@ +RANLIB = @RANLIB@ +RUBY = @RUBY@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SLANG_CFLAGS = @SLANG_CFLAGS@ +SLANG_LIBS = @SLANG_LIBS@ +STRIP = @STRIP@ +TESTS_LDFLAGS = @TESTS_LDFLAGS@ +UNZIP = @UNZIP@ +USE_NLS = @USE_NLS@ +VERSION = @VERSION@ +X11_WWW = @X11_WWW@ +XGETTEXT = @XGETTEXT@ +XGETTEXT_015 = @XGETTEXT_015@ +XGETTEXT_EXTRA_OPTIONS = @XGETTEXT_EXTRA_OPTIONS@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +ZIP = @ZIP@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +noinst_LTLIBRARIES = libmcstrutil.la +libmcstrutil_la_SOURCES = \ + filevercmp.c \ + replace.c \ + strescape.c \ + strutil8bit.c \ + strutilascii.c \ + strutil.c \ + strutilutf8.c \ + strverscmp.c \ + xstrtol.c + +AM_CPPFLAGS = $(GLIB_CFLAGS) -I$(top_srcdir) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu lib/strutil/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu lib/strutil/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libmcstrutil.la: $(libmcstrutil_la_OBJECTS) $(libmcstrutil_la_DEPENDENCIES) $(EXTRA_libmcstrutil_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(libmcstrutil_la_OBJECTS) $(libmcstrutil_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/filevercmp.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/replace.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/strescape.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/strutil.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/strutil8bit.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/strutilascii.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/strutilutf8.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/strverscmp.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/xstrtol.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/filevercmp.Plo + -rm -f ./$(DEPDIR)/replace.Plo + -rm -f ./$(DEPDIR)/strescape.Plo + -rm -f ./$(DEPDIR)/strutil.Plo + -rm -f ./$(DEPDIR)/strutil8bit.Plo + -rm -f ./$(DEPDIR)/strutilascii.Plo + -rm -f ./$(DEPDIR)/strutilutf8.Plo + -rm -f ./$(DEPDIR)/strverscmp.Plo + -rm -f ./$(DEPDIR)/xstrtol.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/filevercmp.Plo + -rm -f ./$(DEPDIR)/replace.Plo + -rm -f ./$(DEPDIR)/strescape.Plo + -rm -f ./$(DEPDIR)/strutil.Plo + -rm -f ./$(DEPDIR)/strutil8bit.Plo + -rm -f ./$(DEPDIR)/strutilascii.Plo + -rm -f ./$(DEPDIR)/strutilutf8.Plo + -rm -f ./$(DEPDIR)/strverscmp.Plo + -rm -f ./$(DEPDIR)/xstrtol.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-generic clean-libtool clean-noinstLTLIBRARIES \ + cscopelist-am ctags ctags-am distclean distclean-compile \ + distclean-generic distclean-libtool distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/lib/strutil/filevercmp.c b/lib/strutil/filevercmp.c new file mode 100644 index 0000000..a0e55fc --- /dev/null +++ b/lib/strutil/filevercmp.c @@ -0,0 +1,265 @@ +/* + Copyright (C) 1995 Ian Jackson <iwj10@cus.cam.ac.uk> + Copyright (C) 2001 Anthony Towns <aj@azure.humbug.org.au> + Copyright (C) 2008-2022 Free Software Foundation, Inc. + + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This file is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include <stdlib.h> +#include <limits.h> + +#include "lib/strutil.h" + +/*** global variables ****************************************************************************/ + +/*** file scope macro definitions ****************************************************************/ + +/*** file scope type declarations ****************************************************************/ + +/*** file scope variables ************************************************************************/ + +/* --------------------------------------------------------------------------------------------- */ +/*** file scope functions ************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +/* Return the length of a prefix of @s that corresponds to the suffix defined by this extended + * regular expression in the C locale: (\.[A-Za-z~][A-Za-z0-9~]*)*$ + * + * Use the longest suffix matching this regular expression, except do not use all of s as a suffix + * if s is nonempty. + * + * If *len is -1, s is a string; set *lem to s's length. + * Otherwise, *len should be nonnegative, s is a char array, and *len does not change. + */ +static ssize_t +file_prefixlen (const char *s, ssize_t * len) +{ + size_t n = (size_t) (*len); /* SIZE_MAX if N == -1 */ + size_t i = 0; + size_t prefixlen = 0; + + while (TRUE) + { + gboolean done; + + if (*len < 0) + done = s[i] == '\0'; + else + done = i == n; + + if (done) + { + *len = (ssize_t) i; + return (ssize_t) prefixlen; + } + + i++; + prefixlen = i; + + while (i + 1 < n && s[i] == '.' && (g_ascii_isalpha (s[i + 1]) || s[i + 1] == '~')) + for (i += 2; i < n && (g_ascii_isalnum (s[i]) || s[i] == '~'); i++) + ; + } +} + +/* --------------------------------------------------------------------------------------------- */ + +/* Return a version sort comparison value for @s's byte at position @pos. + * + * @param s a string + * @param pos a position in @s + * @param len a length of @s. If @pos == @len, sort before all non-'~' bytes. + */ + +static int +order (const char *s, size_t pos, size_t len) +{ + unsigned char c; + + if (pos == len) + return (-1); + + c = s[pos]; + + if (g_ascii_isdigit (c)) + return 0; + if (g_ascii_isalpha (c)) + return c; + if (c == '~') + return (-2); + + g_assert (UCHAR_MAX <= (INT_MAX - 1 - 2) / 2); + + return (int) c + UCHAR_MAX + 1; +} + +/* --------------------------------------------------------------------------------------------- */ + +/* Slightly modified verrevcmp function from dpkg + * + * This implements the algorithm for comparison of version strings + * specified by Debian and now widely adopted. The detailed + * specification can be found in the Debian Policy Manual in the + * section on the 'Version' control field. This version of the code + * implements that from s5.6.12 of Debian Policy v3.8.0.1 + * https://www.debian.org/doc/debian-policy/ch-controlfields.html#s-f-Version + * + * @param s1 first char array to compare + * @param s1_len length of @s1 + * @param s2 second char array to compare + * @param s2_len length of @s2 + * + * @return an integer less than, equal to, or greater than zero, if @s1 is <, == or > than @s2. + */ +static int +verrevcmp (const char *s1, ssize_t s1_len, const char *s2, ssize_t s2_len) +{ + ssize_t s1_pos = 0; + ssize_t s2_pos = 0; + + while (s1_pos < s1_len || s2_pos < s2_len) + { + int first_diff = 0; + + while ((s1_pos < s1_len && !g_ascii_isdigit (s1[s1_pos])) + || (s2_pos < s2_len && !g_ascii_isdigit (s2[s2_pos]))) + { + int s1_c, s2_c; + + s1_c = order (s1, s1_pos, s1_len); + s2_c = order (s2, s2_pos, s2_len); + + if (s1_c != s2_c) + return (s1_c - s2_c); + + s1_pos++; + s2_pos++; + } + + while (s1_pos < s1_len && s1[s1_pos] == '0') + s1_pos++; + while (s2_pos < s2_len && s2[s2_pos] == '0') + s2_pos++; + + while (s1_pos < s1_len && s2_pos < s2_len + && g_ascii_isdigit (s1[s1_pos]) && g_ascii_isdigit (s2[s2_pos])) + { + if (first_diff == 0) + first_diff = s1[s1_pos] - s2[s2_pos]; + + s1_pos++; + s2_pos++; + } + + if (s1_pos < s1_len && g_ascii_isdigit (s1[s1_pos])) + return 1; + if (s2_pos < s2_len && g_ascii_isdigit (s2[s2_pos])) + return (-1); + if (first_diff != 0) + return first_diff; + } + + return 0; +} + +/* --------------------------------------------------------------------------------------------- */ +/*** public functions ****************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +/* Compare version strings. + * + * @param s1 first string to compare + * @param s2 second string to compare + * + * @return an integer less than, equal to, or greater than zero, if @s1 is <, == or > than @s2. + */ +int +filevercmp (const char *s1, const char *s2) +{ + return filenvercmp (s1, -1, s2, -1); +} + +/* --------------------------------------------------------------------------------------------- */ +/* Compare version strings. + * + * @param a first string to compare + * @param alen length of @a or (-1) + * @param b second string to compare + * @param blen length of @b or (-1) + * + * @return an integer less than, equal to, or greater than zero, if @s1 is <, == or > than @s2. + */ +int +filenvercmp (const char *a, ssize_t alen, const char *b, ssize_t blen) +{ + gboolean aempty, bempty; + ssize_t aprefixlen, bprefixlen; + gboolean one_pass_only; + int result; + + /* Special case for empty versions. */ + aempty = alen < 0 ? a[0] == '\0' : alen == 0; + bempty = blen < 0 ? b[0] == '\0' : blen == 0; + + if (aempty) + return (bempty ? 0 : -1); + if (bempty) + return 1; + + /* Special cases for leading ".": "." sorts first, then "..", then other names with leading ".", + then other names. */ + if (a[0] == '.') + { + gboolean adot, bdot; + gboolean adotdot, bdotdot; + + if (b[0] != '.') + return (-1); + + adot = alen < 0 ? a[1] == '\0' : alen == 1; + bdot = blen < 0 ? b[1] == '\0' : blen == 1; + + if (adot) + return (bdot ? 0 : -1); + if (bdot) + return 1; + + adotdot = a[1] == '.' && (alen < 0 ? a[2] == '\0' : alen == 2); + bdotdot = b[1] == '.' && (blen < 0 ? b[2] == '\0' : blen == 2); + if (adotdot) + return (bdotdot ? 0 : -1); + if (bdotdot) + return 1; + } + else if (b[0] == '.') + return 1; + + /* Cut file suffixes. */ + aprefixlen = file_prefixlen (a, &alen); + bprefixlen = file_prefixlen (b, &blen); + + /* If both suffixes are empty, a second pass would return the same thing. */ + one_pass_only = aprefixlen == alen && bprefixlen == blen; + + result = verrevcmp (a, aprefixlen, b, bprefixlen); + + /* Return the initial result if nonzero, or if no second pass is needed. + Otherwise, restore the suffixes and try again. */ + return (result != 0 || one_pass_only ? result : verrevcmp (a, alen, b, blen)); +} + +/* --------------------------------------------------------------------------------------------- */ diff --git a/lib/strutil/replace.c b/lib/strutil/replace.c new file mode 100644 index 0000000..48255e5 --- /dev/null +++ b/lib/strutil/replace.c @@ -0,0 +1,117 @@ +/* + Functions for replacing substrings in strings. + + Copyright (C) 2013-2022 + Free Software Foundation, Inc. + + Written by: + Slava Zanko <slavazanko@gmail.com>, 2013; + + This file is part of the Midnight Commander. + + The Midnight Commander is free software: you can redistribute it + and/or modify it under the terms of the GNU General Public License as + published by the Free Software Foundation, either version 3 of the License, + or (at your option) any later version. + + The Midnight Commander is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include "lib/global.h" +#include "lib/strescape.h" +#include "lib/strutil.h" + +/*** global variables ****************************************************************************/ + +/*** file scope macro definitions ****************************************************************/ + +/*** file scope type declarations ****************************************************************/ + +/*** file scope variables ************************************************************************/ + +/* --------------------------------------------------------------------------------------------- */ +/*** file scope functions ************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +static GString * +str_ptr_array_join (const GPtrArray * str_splints) +{ + GString *return_str; + guint i; + + return_str = g_string_sized_new (32); + for (i = 0; i < str_splints->len; i++) + g_string_append (return_str, g_ptr_array_index (str_splints, i)); + + return return_str; +} + +/* --------------------------------------------------------------------------------------------- */ +/*** public functions ****************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ +/** + * Replace all substrings 'needle' in string 'haystack' by 'replacement'. + * If the 'needle' in the 'haystack' will be escaped by backslash, + * then this occurrence isn't be replaced. + * + * @param haystack string contains substrings for replacement + * @param needle string for search + * @param replacement string for replace + * @return newly allocated string with replaced substrings + */ + +char * +str_replace_all (const char *haystack, const char *needle, const char *replacement) +{ + size_t needle_len; + GPtrArray *str_splints; + GString *return_str; + + needle_len = strlen (needle); + + str_splints = g_ptr_array_new_with_free_func (g_free); + + while (TRUE) + { + char *needle_in_str; + + needle_in_str = strstr (haystack, needle); + if (needle_in_str == NULL) + { + if (*haystack != '\0') + g_ptr_array_add (str_splints, g_strdup (haystack)); + break; + } + + if (strutils_is_char_escaped (haystack, needle_in_str)) + { + char *backslash = needle_in_str - 1; + + if (haystack != backslash) + g_ptr_array_add (str_splints, g_strndup (haystack, backslash - haystack)); + + g_ptr_array_add (str_splints, g_strndup (backslash + 1, needle_in_str - backslash)); + haystack = needle_in_str + 1; + continue; + } + if (needle_in_str - haystack > 0) + g_ptr_array_add (str_splints, g_strndup (haystack, needle_in_str - haystack)); + g_ptr_array_add (str_splints, g_strdup (replacement)); + haystack = needle_in_str + needle_len; + } + return_str = str_ptr_array_join (str_splints); + + g_ptr_array_free (str_splints, TRUE); + + return g_string_free (return_str, FALSE); +} + +/* --------------------------------------------------------------------------------------------- */ diff --git a/lib/strutil/strescape.c b/lib/strutil/strescape.c new file mode 100644 index 0000000..9d1beeb --- /dev/null +++ b/lib/strutil/strescape.c @@ -0,0 +1,259 @@ +/* + Functions for escaping and unescaping strings + + Copyright (C) 2009-2022 + Free Software Foundation, Inc. + + Written by: + Slava Zanko <slavazanko@gmail.com>, 2009; + Patrick Winnertz <winnie@debian.org>, 2009 + + This file is part of the Midnight Commander. + + The Midnight Commander is free software: you can redistribute it + and/or modify it under the terms of the GNU General Public License as + published by the Free Software Foundation, either version 3 of the License, + or (at your option) any later version. + + The Midnight Commander is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include "lib/global.h" +#include "lib/strescape.h" + +/*** global variables ****************************************************************************/ + +/*** file scope macro definitions ****************************************************************/ + +/*** file scope type declarations ****************************************************************/ + +/*** file scope variables ************************************************************************/ + +static const char ESCAPE_SHELL_CHARS[] = " !#$%()&{}[]`?|<>;*\\\"'"; +static const char ESCAPE_REGEX_CHARS[] = "^!#$%()&{}[]`?|<>;*+.\\"; +static const char ESCAPE_GLOB_CHARS[] = "$*\\?"; + +/*** file scope functions ************************************************************************/ + +/*** public functions ****************************************************************************/ + +char * +strutils_escape (const char *src, gsize src_len, const char *escaped_chars, + gboolean escape_non_printable) +{ + GString *ret; + gsize curr_index; + /* do NOT break allocation semantics */ + if (src == NULL) + return NULL; + + if (*src == '\0') + return strdup (""); + + ret = g_string_new (""); + + if (src_len == (gsize) (-1)) + src_len = strlen (src); + + for (curr_index = 0; curr_index < src_len; curr_index++) + { + if (escape_non_printable) + { + switch (src[curr_index]) + { + case '\n': + g_string_append (ret, "\\n"); + continue; + case '\t': + g_string_append (ret, "\\t"); + continue; + case '\b': + g_string_append (ret, "\\b"); + continue; + case '\0': + g_string_append (ret, "\\0"); + continue; + default: + break; + } + } + + if (strchr (escaped_chars, (int) src[curr_index])) + g_string_append_c (ret, '\\'); + + g_string_append_c (ret, src[curr_index]); + } + return g_string_free (ret, FALSE); +} + +/* --------------------------------------------------------------------------------------------- */ +char * +strutils_unescape (const char *src, gsize src_len, const char *unescaped_chars, + gboolean unescape_non_printable) +{ + GString *ret; + gsize curr_index; + + if (src == NULL) + return NULL; + + if (*src == '\0') + return strdup (""); + + ret = g_string_sized_new (16); + + if (src_len == (gsize) (-1)) + src_len = strlen (src); + src_len--; + + for (curr_index = 0; curr_index < src_len; curr_index++) + { + if (src[curr_index] != '\\') + { + g_string_append_c (ret, src[curr_index]); + continue; + } + + curr_index++; + + if (unescaped_chars == ESCAPE_SHELL_CHARS && src[curr_index] == '$') + { + /* special case: \$ is used to disallow variable substitution */ + g_string_append_c (ret, '\\'); + } + else + { + if (unescape_non_printable) + { + switch (src[curr_index]) + { + case 'n': + g_string_append_c (ret, '\n'); + continue; + case 't': + g_string_append_c (ret, '\t'); + continue; + case 'b': + g_string_append_c (ret, '\b'); + continue; + case '0': + g_string_append_c (ret, '\0'); + continue; + default: + break; + } + } + + if (strchr (unescaped_chars, (int) src[curr_index]) == NULL) + g_string_append_c (ret, '\\'); + } + + g_string_append_c (ret, src[curr_index]); + } + g_string_append_c (ret, src[curr_index]); + + return g_string_free (ret, FALSE); +} + +/* --------------------------------------------------------------------------------------------- */ + +/** + * To be compatible with the general posix command lines we have to escape + * strings for the command line + * + * @param src string for escaping + * + * @return escaped string (which needs to be freed later) or NULL when NULL string is passed. + */ + +char * +strutils_shell_escape (const char *src) +{ + return strutils_escape (src, -1, ESCAPE_SHELL_CHARS, FALSE); +} + +/* --------------------------------------------------------------------------------------------- */ + +char * +strutils_glob_escape (const char *src) +{ + return strutils_escape (src, -1, ESCAPE_GLOB_CHARS, TRUE); +} + +/* --------------------------------------------------------------------------------------------- */ + +char * +strutils_regex_escape (const char *src) +{ + return strutils_escape (src, -1, ESCAPE_REGEX_CHARS, TRUE); +} + +/* --------------------------------------------------------------------------------------------- */ + +/** + * Unescape paths or other strings for e.g the internal cd + * shell-unescape within a given buffer (writing to it!) + * + * @param text string for unescaping + * + * @return unescaped string (which needs to be freed) + */ + +char * +strutils_shell_unescape (const char *text) +{ + return strutils_unescape (text, -1, ESCAPE_SHELL_CHARS, TRUE); +} + +/* --------------------------------------------------------------------------------------------- */ + +char * +strutils_glob_unescape (const char *text) +{ + return strutils_unescape (text, -1, ESCAPE_GLOB_CHARS, TRUE); +} + +/* --------------------------------------------------------------------------------------------- */ +char * +strutils_regex_unescape (const char *text) +{ + return strutils_unescape (text, -1, ESCAPE_REGEX_CHARS, TRUE); +} + +/* --------------------------------------------------------------------------------------------- */ + +/** + * Check if char in pointer contain escape'd chars + * + * @param start string for checking + * @param current pointer to checked character + * + * @return TRUE if string contain escaped chars otherwise return FALSE + */ + +gboolean +strutils_is_char_escaped (const char *start, const char *current) +{ + int num_esc = 0; + + if (start == NULL || current == NULL || current <= start) + return FALSE; + + current--; + while (current >= start && *current == '\\') + { + num_esc++; + current--; + } + return (gboolean) num_esc % 2; +} + +/* --------------------------------------------------------------------------------------------- */ diff --git a/lib/strutil/strutil.c b/lib/strutil/strutil.c new file mode 100644 index 0000000..b86d578 --- /dev/null +++ b/lib/strutil/strutil.c @@ -0,0 +1,1024 @@ +/* + Common strings utilities + + Copyright (C) 2007-2022 + Free Software Foundation, Inc. + + Written by: + Rostislav Benes, 2007 + + This file is part of the Midnight Commander. + + The Midnight Commander is free software: you can redistribute it + and/or modify it under the terms of the GNU General Public License as + published by the Free Software Foundation, either version 3 of the License, + or (at your option) any later version. + + The Midnight Commander is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include <stdlib.h> +#include <langinfo.h> +#include <string.h> +#include <errno.h> + +#include "lib/global.h" +#include "lib/util.h" /* MC_PTR_FREE */ +#include "lib/strutil.h" + +/*** global variables ****************************************************************************/ + +GIConv str_cnv_to_term; +GIConv str_cnv_from_term; +GIConv str_cnv_not_convert = INVALID_CONV; + +/*** file scope macro definitions ****************************************************************/ + +/*** file scope type declarations ****************************************************************/ + +/*** file scope variables ************************************************************************/ + +/* names, that are used for utf-8 */ +static const char *const str_utf8_encodings[] = { + "utf-8", + "utf8", + NULL +}; + +/* standard 8bit encodings, no wide or multibytes characters */ +static const char *const str_8bit_encodings[] = { + /* Solaris has different names of Windows 1251 encoding */ +#ifdef __sun + "ansi-1251", + "ansi1251", +#else + "cp-1251", + "cp1251", +#endif + "cp-1250", + "cp1250", + "cp-866", + "cp866", + "ibm-866", + "ibm866", + "cp-850", + "cp850", + "cp-852", + "cp852", + "iso-8859", + "iso8859", + "koi8", + NULL +}; + +/* terminal encoding */ +static char *codeset = NULL; +static char *term_encoding = NULL; +/* function for encoding specific operations */ +static struct str_class used_class; + +/* --------------------------------------------------------------------------------------------- */ +/*** file scope functions ************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +/* if enc is same encoding like on terminal */ +static int +str_test_not_convert (const char *enc) +{ + return g_ascii_strcasecmp (enc, codeset) == 0; +} + +/* --------------------------------------------------------------------------------------------- */ + +static estr_t +_str_convert (GIConv coder, const char *string, int size, GString * buffer) +{ + estr_t state = ESTR_SUCCESS; + gssize left; + gsize bytes_read = 0; + gsize bytes_written = 0; + + errno = 0; /* FIXME: is it really needed? */ + + if (coder == INVALID_CONV) + return ESTR_FAILURE; + + if (string == NULL || buffer == NULL) + return ESTR_FAILURE; + + /* + if (! used_class.is_valid_string (string)) + { + return ESTR_FAILURE; + } + */ + if (size < 0) + size = strlen (string); + else + { + left = strlen (string); + if (left < size) + size = left; + } + + left = size; + g_iconv (coder, NULL, NULL, NULL, NULL); + + while (left != 0) + { + gchar *tmp_buff; + GError *mcerror = NULL; + + tmp_buff = g_convert_with_iconv ((const gchar *) string, + left, coder, &bytes_read, &bytes_written, &mcerror); + if (mcerror != NULL) + { + int code = mcerror->code; + + g_error_free (mcerror); + mcerror = NULL; + + switch (code) + { + case G_CONVERT_ERROR_NO_CONVERSION: + /* Conversion between the requested character sets is not supported. */ + g_free (tmp_buff); + tmp_buff = g_strnfill (strlen (string), '?'); + g_string_append (buffer, tmp_buff); + g_free (tmp_buff); + return ESTR_FAILURE; + + case G_CONVERT_ERROR_ILLEGAL_SEQUENCE: + /* Invalid byte sequence in conversion input. */ + if ((tmp_buff == NULL) && (bytes_read != 0)) + /* recode valid byte sequence */ + tmp_buff = g_convert_with_iconv ((const gchar *) string, + bytes_read, coder, NULL, NULL, NULL); + + if (tmp_buff != NULL) + { + g_string_append (buffer, tmp_buff); + g_free (tmp_buff); + } + + if ((int) bytes_read >= left) + return ESTR_PROBLEM; + + string += bytes_read + 1; + size -= (bytes_read + 1); + left -= (bytes_read + 1); + g_string_append_c (buffer, *(string - 1)); + state = ESTR_PROBLEM; + break; + + case G_CONVERT_ERROR_PARTIAL_INPUT: + /* Partial character sequence at end of input. */ + g_string_append (buffer, tmp_buff); + g_free (tmp_buff); + if ((int) bytes_read < left) + { + left = left - bytes_read; + tmp_buff = g_strnfill (left, '?'); + g_string_append (buffer, tmp_buff); + g_free (tmp_buff); + } + return ESTR_PROBLEM; + + case G_CONVERT_ERROR_BAD_URI: /* Don't know how handle this error :( */ + case G_CONVERT_ERROR_NOT_ABSOLUTE_PATH: /* Don't know how handle this error :( */ + case G_CONVERT_ERROR_FAILED: /* Conversion failed for some reason. */ + default: + g_free (tmp_buff); + return ESTR_FAILURE; + } + } + else if (tmp_buff == NULL) + { + g_string_append (buffer, string); + return ESTR_PROBLEM; + } + else if (*tmp_buff == '\0') + { + g_free (tmp_buff); + g_string_append (buffer, string); + return state; + } + else + { + g_string_append (buffer, tmp_buff); + g_free (tmp_buff); + string += bytes_read; + left -= bytes_read; + } + } + + return state; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_test_encoding_class (const char *encoding, const char *const *table) +{ + int result = 0; + + if (encoding != NULL) + { + int t; + + for (t = 0; table[t] != NULL; t++) + if (g_ascii_strncasecmp (encoding, table[t], strlen (table[t])) == 0) + result++; + } + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +str_choose_str_functions (void) +{ + if (str_test_encoding_class (codeset, str_utf8_encodings)) + used_class = str_utf8_init (); + else if (str_test_encoding_class (codeset, str_8bit_encodings)) + used_class = str_8bit_init (); + else + used_class = str_ascii_init (); +} + +/* --------------------------------------------------------------------------------------------- */ +/*** public functions ****************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +GIConv +str_crt_conv_to (const char *to_enc) +{ + return (!str_test_not_convert (to_enc)) ? g_iconv_open (to_enc, codeset) : str_cnv_not_convert; +} + +/* --------------------------------------------------------------------------------------------- */ + +GIConv +str_crt_conv_from (const char *from_enc) +{ + return (!str_test_not_convert (from_enc)) + ? g_iconv_open (codeset, from_enc) : str_cnv_not_convert; +} + +/* --------------------------------------------------------------------------------------------- */ + +void +str_close_conv (GIConv conv) +{ + if (conv != str_cnv_not_convert) + g_iconv_close (conv); +} + +/* --------------------------------------------------------------------------------------------- */ + +estr_t +str_convert (GIConv coder, const char *string, GString * buffer) +{ + return _str_convert (coder, string, -1, buffer); +} + +/* --------------------------------------------------------------------------------------------- */ + +estr_t +str_nconvert (GIConv coder, const char *string, int size, GString * buffer) +{ + return _str_convert (coder, string, size, buffer); +} + +/* --------------------------------------------------------------------------------------------- */ + +gchar * +str_conv_gerror_message (GError * mcerror, const char *def_msg) +{ + return used_class.conv_gerror_message (mcerror, def_msg); +} + +/* --------------------------------------------------------------------------------------------- */ + +estr_t +str_vfs_convert_from (GIConv coder, const char *string, GString * buffer) +{ + estr_t result = ESTR_SUCCESS; + + if (coder == str_cnv_not_convert) + g_string_append (buffer, string != NULL ? string : ""); + else + result = _str_convert (coder, string, -1, buffer); + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +estr_t +str_vfs_convert_to (GIConv coder, const char *string, int size, GString * buffer) +{ + return used_class.vfs_convert_to (coder, string, size, buffer); +} + +/* --------------------------------------------------------------------------------------------- */ + +void +str_printf (GString * buffer, const char *format, ...) +{ + va_list ap; + va_start (ap, format); + + g_string_append_vprintf (buffer, format, ap); + va_end (ap); +} + +/* --------------------------------------------------------------------------------------------- */ + +void +str_insert_replace_char (GString * buffer) +{ + used_class.insert_replace_char (buffer); +} + +/* --------------------------------------------------------------------------------------------- */ + +estr_t +str_translate_char (GIConv conv, const char *keys, size_t ch_size, char *output, size_t out_size) +{ + size_t left; + size_t cnv; + + g_iconv (conv, NULL, NULL, NULL, NULL); + + left = (ch_size == (size_t) (-1)) ? strlen (keys) : ch_size; + + cnv = g_iconv (conv, (gchar **) & keys, &left, &output, &out_size); + if (cnv == (size_t) (-1)) + return (errno == EINVAL) ? ESTR_PROBLEM : ESTR_FAILURE; + + output[0] = '\0'; + return ESTR_SUCCESS; +} + +/* --------------------------------------------------------------------------------------------- */ + +const char * +str_detect_termencoding (void) +{ + if (term_encoding == NULL) + { + /* On Linux, nl_langinfo (CODESET) returns upper case UTF-8 whether the LANG is set + to utf-8 or UTF-8. + On Mac OS X, it returns the same case as the LANG input. + So let tranform result of nl_langinfo (CODESET) to upper case unconditionally. */ + term_encoding = g_ascii_strup (nl_langinfo (CODESET), -1); + } + + return term_encoding; +} + +/* --------------------------------------------------------------------------------------------- */ + +gboolean +str_isutf8 (const char *codeset_name) +{ + return (str_test_encoding_class (codeset_name, str_utf8_encodings) != 0); +} + +/* --------------------------------------------------------------------------------------------- */ + +void +str_init_strings (const char *termenc) +{ + codeset = termenc != NULL ? g_ascii_strup (termenc, -1) : g_strdup (str_detect_termencoding ()); + + str_cnv_not_convert = g_iconv_open (codeset, codeset); + if (str_cnv_not_convert == INVALID_CONV) + { + if (termenc != NULL) + { + g_free (codeset); + codeset = g_strdup (str_detect_termencoding ()); + str_cnv_not_convert = g_iconv_open (codeset, codeset); + } + + if (str_cnv_not_convert == INVALID_CONV) + { + g_free (codeset); + codeset = g_strdup (DEFAULT_CHARSET); + str_cnv_not_convert = g_iconv_open (codeset, codeset); + } + } + + str_cnv_to_term = str_cnv_not_convert; + str_cnv_from_term = str_cnv_not_convert; + + str_choose_str_functions (); +} + +/* --------------------------------------------------------------------------------------------- */ + +void +str_uninit_strings (void) +{ + if (str_cnv_not_convert != INVALID_CONV) + g_iconv_close (str_cnv_not_convert); + /* NULL-ize pointers to avoid double free in unit tests */ + MC_PTR_FREE (term_encoding); + MC_PTR_FREE (codeset); +} + +/* --------------------------------------------------------------------------------------------- */ + +const char * +str_term_form (const char *text) +{ + return used_class.term_form (text); +} + +/* --------------------------------------------------------------------------------------------- */ + +const char * +str_fit_to_term (const char *text, int width, align_crt_t just_mode) +{ + return used_class.fit_to_term (text, width, just_mode); +} + +/* --------------------------------------------------------------------------------------------- */ + +const char * +str_term_trim (const char *text, int width) +{ + return used_class.term_trim (text, width); +} + +/* --------------------------------------------------------------------------------------------- */ + +const char * +str_term_substring (const char *text, int start, int width) +{ + return used_class.term_substring (text, start, width); +} + +/* --------------------------------------------------------------------------------------------- */ + +char * +str_get_next_char (char *text) +{ + + used_class.cnext_char ((const char **) &text); + return text; +} + +/* --------------------------------------------------------------------------------------------- */ + +const char * +str_cget_next_char (const char *text) +{ + used_class.cnext_char (&text); + return text; +} + +/* --------------------------------------------------------------------------------------------- */ + +void +str_next_char (char **text) +{ + used_class.cnext_char ((const char **) text); +} + +/* --------------------------------------------------------------------------------------------- */ + +void +str_cnext_char (const char **text) +{ + used_class.cnext_char (text); +} + +/* --------------------------------------------------------------------------------------------- */ + +char * +str_get_prev_char (char *text) +{ + used_class.cprev_char ((const char **) &text); + return text; +} + +/* --------------------------------------------------------------------------------------------- */ + +const char * +str_cget_prev_char (const char *text) +{ + used_class.cprev_char (&text); + return text; +} + +/* --------------------------------------------------------------------------------------------- */ + +void +str_prev_char (char **text) +{ + used_class.cprev_char ((const char **) text); +} + +/* --------------------------------------------------------------------------------------------- */ + +void +str_cprev_char (const char **text) +{ + used_class.cprev_char (text); +} + +/* --------------------------------------------------------------------------------------------- */ + +char * +str_get_next_char_safe (char *text) +{ + used_class.cnext_char_safe ((const char **) &text); + return text; +} + +/* --------------------------------------------------------------------------------------------- */ + +const char * +str_cget_next_char_safe (const char *text) +{ + used_class.cnext_char_safe (&text); + return text; +} + +/* --------------------------------------------------------------------------------------------- */ + +void +str_next_char_safe (char **text) +{ + used_class.cnext_char_safe ((const char **) text); +} + +/* --------------------------------------------------------------------------------------------- */ + +void +str_cnext_char_safe (const char **text) +{ + used_class.cnext_char_safe (text); +} + +/* --------------------------------------------------------------------------------------------- */ + +char * +str_get_prev_char_safe (char *text) +{ + used_class.cprev_char_safe ((const char **) &text); + return text; +} + +/* --------------------------------------------------------------------------------------------- */ + +const char * +str_cget_prev_char_safe (const char *text) +{ + used_class.cprev_char_safe (&text); + return text; +} + +/* --------------------------------------------------------------------------------------------- */ + +void +str_prev_char_safe (char **text) +{ + used_class.cprev_char_safe ((const char **) text); +} + +/* --------------------------------------------------------------------------------------------- */ + +void +str_cprev_char_safe (const char **text) +{ + used_class.cprev_char_safe (text); +} + +/* --------------------------------------------------------------------------------------------- */ + +int +str_next_noncomb_char (char **text) +{ + return used_class.cnext_noncomb_char ((const char **) text); +} + +/* --------------------------------------------------------------------------------------------- */ + +int +str_cnext_noncomb_char (const char **text) +{ + return used_class.cnext_noncomb_char (text); +} + +/* --------------------------------------------------------------------------------------------- */ + +int +str_prev_noncomb_char (char **text, const char *begin) +{ + return used_class.cprev_noncomb_char ((const char **) text, begin); +} + +/* --------------------------------------------------------------------------------------------- */ + +int +str_cprev_noncomb_char (const char **text, const char *begin) +{ + return used_class.cprev_noncomb_char (text, begin); +} + +/* --------------------------------------------------------------------------------------------- */ + +int +str_is_valid_char (const char *ch, size_t size) +{ + return used_class.is_valid_char (ch, size); +} + +/* --------------------------------------------------------------------------------------------- */ + +int +str_term_width1 (const char *text) +{ + return used_class.term_width1 (text); +} + +/* --------------------------------------------------------------------------------------------- */ + +int +str_term_width2 (const char *text, size_t length) +{ + return used_class.term_width2 (text, length); +} + +/* --------------------------------------------------------------------------------------------- */ + +int +str_term_char_width (const char *text) +{ + return used_class.term_char_width (text); +} + +/* --------------------------------------------------------------------------------------------- */ + +int +str_offset_to_pos (const char *text, size_t length) +{ + return used_class.offset_to_pos (text, length); +} + +/* --------------------------------------------------------------------------------------------- */ + +int +str_length (const char *text) +{ + return used_class.length (text); +} + +/* --------------------------------------------------------------------------------------------- */ + +int +str_length_char (const char *text) +{ + return str_cget_next_char_safe (text) - text; +} + +/* --------------------------------------------------------------------------------------------- */ + +int +str_length2 (const char *text, int size) +{ + return used_class.length2 (text, size); +} + +/* --------------------------------------------------------------------------------------------- */ + +int +str_length_noncomb (const char *text) +{ + return used_class.length_noncomb (text); +} + +/* --------------------------------------------------------------------------------------------- */ + +int +str_column_to_pos (const char *text, size_t pos) +{ + return used_class.column_to_pos (text, pos); +} + +/* --------------------------------------------------------------------------------------------- */ + +gboolean +str_isspace (const char *ch) +{ + return used_class.char_isspace (ch); +} + +/* --------------------------------------------------------------------------------------------- */ + +gboolean +str_ispunct (const char *ch) +{ + return used_class.char_ispunct (ch); +} + +/* --------------------------------------------------------------------------------------------- */ + +gboolean +str_isalnum (const char *ch) +{ + return used_class.char_isalnum (ch); +} + +/* --------------------------------------------------------------------------------------------- */ + +gboolean +str_isdigit (const char *ch) +{ + return used_class.char_isdigit (ch); +} + +/* --------------------------------------------------------------------------------------------- */ + +gboolean +str_toupper (const char *ch, char **out, size_t * remain) +{ + return used_class.char_toupper (ch, out, remain); +} + +/* --------------------------------------------------------------------------------------------- */ + +gboolean +str_tolower (const char *ch, char **out, size_t * remain) +{ + return used_class.char_tolower (ch, out, remain); +} + +/* --------------------------------------------------------------------------------------------- */ + +gboolean +str_isprint (const char *ch) +{ + return used_class.char_isprint (ch); +} + +/* --------------------------------------------------------------------------------------------- */ + +gboolean +str_iscombiningmark (const char *ch) +{ + return used_class.char_iscombiningmark (ch); +} + +/* --------------------------------------------------------------------------------------------- */ + +const char * +str_trunc (const char *text, int width) +{ + return used_class.trunc (text, width); +} + +/* --------------------------------------------------------------------------------------------- */ + +char * +str_create_search_needle (const char *needle, gboolean case_sen) +{ + return used_class.create_search_needle (needle, case_sen); +} + +/* --------------------------------------------------------------------------------------------- */ + +void +str_release_search_needle (char *needle, gboolean case_sen) +{ + used_class.release_search_needle (needle, case_sen); +} + +/* --------------------------------------------------------------------------------------------- */ + +const char * +str_search_first (const char *text, const char *search, gboolean case_sen) +{ + return used_class.search_first (text, search, case_sen); +} + +/* --------------------------------------------------------------------------------------------- */ + +const char * +str_search_last (const char *text, const char *search, gboolean case_sen) +{ + return used_class.search_last (text, search, case_sen); +} + +/* --------------------------------------------------------------------------------------------- */ + +gboolean +str_is_valid_string (const char *text) +{ + return used_class.is_valid_string (text); +} + +/* --------------------------------------------------------------------------------------------- */ + +int +str_compare (const char *t1, const char *t2) +{ + return used_class.compare (t1, t2); +} + +/* --------------------------------------------------------------------------------------------- */ + +int +str_ncompare (const char *t1, const char *t2) +{ + return used_class.ncompare (t1, t2); +} + +/* --------------------------------------------------------------------------------------------- */ + +int +str_casecmp (const char *t1, const char *t2) +{ + return used_class.casecmp (t1, t2); +} + +/* --------------------------------------------------------------------------------------------- */ + +int +str_ncasecmp (const char *t1, const char *t2) +{ + return used_class.ncasecmp (t1, t2); +} + +/* --------------------------------------------------------------------------------------------- */ + +int +str_prefix (const char *text, const char *prefix) +{ + return used_class.prefix (text, prefix); +} + +/* --------------------------------------------------------------------------------------------- */ + +int +str_caseprefix (const char *text, const char *prefix) +{ + return used_class.caseprefix (text, prefix); +} + +/* --------------------------------------------------------------------------------------------- */ + +void +str_fix_string (char *text) +{ + used_class.fix_string (text); +} + +/* --------------------------------------------------------------------------------------------- */ + +char * +str_create_key (const char *text, gboolean case_sen) +{ + return used_class.create_key (text, case_sen); +} + +/* --------------------------------------------------------------------------------------------- */ + +char * +str_create_key_for_filename (const char *text, gboolean case_sen) +{ + return used_class.create_key_for_filename (text, case_sen); +} + +/* --------------------------------------------------------------------------------------------- */ + +int +str_key_collate (const char *t1, const char *t2, gboolean case_sen) +{ + return used_class.key_collate (t1, t2, case_sen); +} + +/* --------------------------------------------------------------------------------------------- */ + +void +str_release_key (char *key, gboolean case_sen) +{ + used_class.release_key (key, case_sen); +} + +/* --------------------------------------------------------------------------------------------- */ + +void +str_msg_term_size (const char *text, int *lines, int *columns) +{ + char *p, *tmp; + char *q; + char c = '\0'; + + *lines = 1; + *columns = 0; + + tmp = g_strdup (text); + p = tmp; + + while (TRUE) + { + int width; + + q = strchr (p, '\n'); + if (q != NULL) + { + c = q[0]; + q[0] = '\0'; + } + + width = str_term_width1 (p); + if (width > *columns) + *columns = width; + + if (q == NULL) + break; + + q[0] = c; + p = q + 1; + (*lines)++; + } + + g_free (tmp); +} + +/* --------------------------------------------------------------------------------------------- */ + +char * +strrstr_skip_count (const char *haystack, const char *needle, size_t skip_count) +{ + char *semi; + ssize_t len; + + len = strlen (haystack); + + do + { + semi = g_strrstr_len (haystack, len, needle); + if (semi == NULL) + return NULL; + len = semi - haystack - 1; + } + while (skip_count-- != 0); + + return semi; +} + +/* --------------------------------------------------------------------------------------------- */ +/* Interprete string as a non-negative decimal integer, optionally multiplied by various values. + * + * @param str input value + * @param invalid set to TRUE if "str" does not represent a number in this format + * + * @return non-integer representation of "str", 0 in case of error. + */ + +uintmax_t +parse_integer (const char *str, gboolean * invalid) +{ + uintmax_t n; + char *suffix; + strtol_error_t e; + + e = xstrtoumax (str, &suffix, 10, &n, "bcEGkKMPTwYZ0"); + if (e == LONGINT_INVALID_SUFFIX_CHAR && *suffix == 'x') + { + uintmax_t multiplier; + + multiplier = parse_integer (suffix + 1, invalid); + if (multiplier != 0 && n * multiplier / multiplier != n) + { + *invalid = TRUE; + return 0; + } + + n *= multiplier; + } + else if (e != LONGINT_OK) + { + *invalid = TRUE; + n = 0; + } + + return n; +} + +/* --------------------------------------------------------------------------------------------- */ diff --git a/lib/strutil/strutil8bit.c b/lib/strutil/strutil8bit.c new file mode 100644 index 0000000..abb7a99 --- /dev/null +++ b/lib/strutil/strutil8bit.c @@ -0,0 +1,860 @@ +/* + 8bit strings utilities + + Copyright (C) 2007-2022 + Free Software Foundation, Inc. + + Written by: + Rostislav Benes, 2007 + + This file is part of the Midnight Commander. + + The Midnight Commander is free software: you can redistribute it + and/or modify it under the terms of the GNU General Public License as + published by the Free Software Foundation, either version 3 of the License, + or (at your option) any later version. + + The Midnight Commander is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include <ctype.h> +#include <stdlib.h> + +#include "lib/global.h" +#include "lib/strutil.h" + +/* Functions for singlebyte encodings, all characters have width 1 + * using standard system functions. + * There are only small differences between functions in strutil8bit.c + * and strutilascii.c. + */ + +/*** global variables ****************************************************************************/ + +/*** file scope macro definitions ****************************************************************/ + +/* + * Inlines to equalize 'char' signedness for single 'char' encodings. + * Instead of writing + * isspace ((unsigned char) c); + * you can write + * char_isspace (c); + */ +#define DECLARE_CTYPE_WRAPPER(func_name) \ +static inline int char_##func_name(char c) \ +{ \ + return func_name((int)(unsigned char)c); \ +} + +/*** file scope type declarations ****************************************************************/ + +/*** file scope variables ************************************************************************/ + +static const char replch = '?'; + +/* --------------------------------------------------------------------------------------------- */ +/*** file scope functions ************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +/* *INDENT-OFF* */ +DECLARE_CTYPE_WRAPPER (isalnum) +DECLARE_CTYPE_WRAPPER (isdigit) +DECLARE_CTYPE_WRAPPER (isprint) +DECLARE_CTYPE_WRAPPER (ispunct) +DECLARE_CTYPE_WRAPPER (isspace) +DECLARE_CTYPE_WRAPPER (toupper) +DECLARE_CTYPE_WRAPPER (tolower) +/* *INDENT-ON* */ + +/* --------------------------------------------------------------------------------------------- */ + +static void +str_8bit_insert_replace_char (GString * buffer) +{ + g_string_append_c (buffer, replch); +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_8bit_is_valid_string (const char *text) +{ + (void) text; + return TRUE; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_8bit_is_valid_char (const char *ch, size_t size) +{ + (void) ch; + (void) size; + return 1; +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +str_8bit_cnext_char (const char **text) +{ + (*text)++; +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +str_8bit_cprev_char (const char **text) +{ + (*text)--; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_8bit_cnext_noncomb_char (const char **text) +{ + if (*text[0] == '\0') + return 0; + + (*text)++; + return 1; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_8bit_cprev_noncomb_char (const char **text, const char *begin) +{ + if ((*text) == begin) + return 0; + + (*text)--; + return 1; +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_8bit_isspace (const char *text) +{ + return char_isspace (text[0]) != 0; +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_8bit_ispunct (const char *text) +{ + return char_ispunct (text[0]) != 0; +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_8bit_isalnum (const char *text) +{ + return char_isalnum (text[0]) != 0; +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_8bit_isdigit (const char *text) +{ + return char_isdigit (text[0]) != 0; +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_8bit_isprint (const char *text) +{ + return char_isprint (text[0]) != 0; +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_8bit_iscombiningmark (const char *text) +{ + (void) text; + return FALSE; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_8bit_toupper (const char *text, char **out, size_t * remain) +{ + if (*remain <= 1) + return FALSE; + + (*out)[0] = char_toupper (text[0]); + (*out)++; + (*remain)--; + return TRUE; +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_8bit_tolower (const char *text, char **out, size_t * remain) +{ + if (*remain <= 1) + return FALSE; + + (*out)[0] = char_tolower (text[0]); + (*out)++; + (*remain)--; + return TRUE; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_8bit_length (const char *text) +{ + return strlen (text); +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_8bit_length2 (const char *text, int size) +{ + return (size >= 0) ? MIN (strlen (text), (gsize) size) : strlen (text); +} + +/* --------------------------------------------------------------------------------------------- */ + +static gchar * +str_8bit_conv_gerror_message (GError * mcerror, const char *def_msg) +{ + GIConv conv; + gchar *ret; + + /* glib messages are in UTF-8 charset */ + conv = str_crt_conv_from ("UTF-8"); + + if (conv == INVALID_CONV) + ret = g_strdup (def_msg != NULL ? def_msg : ""); + else + { + GString *buf; + + buf = g_string_new (""); + + if (str_convert (conv, mcerror->message, buf) != ESTR_FAILURE) + ret = g_string_free (buf, FALSE); + else + { + ret = g_strdup (def_msg != NULL ? def_msg : ""); + g_string_free (buf, TRUE); + } + + str_close_conv (conv); + } + + return ret; +} + +/* --------------------------------------------------------------------------------------------- */ + +static estr_t +str_8bit_vfs_convert_to (GIConv coder, const char *string, int size, GString * buffer) +{ + estr_t result = ESTR_SUCCESS; + + if (coder == str_cnv_not_convert) + g_string_append_len (buffer, string, size); + else + result = str_nconvert (coder, string, size, buffer); + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static const char * +str_8bit_term_form (const char *text) +{ + static char result[BUF_MEDIUM]; + char *actual; + size_t remain; + size_t length; + size_t pos = 0; + + actual = result; + remain = sizeof (result); + length = strlen (text); + + for (; pos < length && remain > 1; pos++, actual++, remain--) + actual[0] = char_isprint (text[pos]) ? text[pos] : '.'; + + actual[0] = '\0'; + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static const char * +str_8bit_fit_to_term (const char *text, int width, align_crt_t just_mode) +{ + static char result[BUF_MEDIUM]; + char *actual; + size_t remain; + int ident = 0; + size_t length; + size_t pos = 0; + + length = strlen (text); + actual = result; + remain = sizeof (result); + + if ((int) length <= width) + { + switch (HIDE_FIT (just_mode)) + { + case J_CENTER_LEFT: + case J_CENTER: + ident = (width - length) / 2; + break; + case J_RIGHT: + ident = width - length; + break; + default: + break; + } + + if ((int) remain <= ident) + goto finally; + memset (actual, ' ', ident); + actual += ident; + remain -= ident; + + for (; pos < length && remain > 1; pos++, actual++, remain--) + actual[0] = char_isprint (text[pos]) ? text[pos] : '.'; + + if (width - length - ident > 0) + { + if (remain <= width - length - ident) + goto finally; + memset (actual, ' ', width - length - ident); + actual += width - length - ident; + } + } + else if (IS_FIT (just_mode)) + { + for (; pos + 1 <= (gsize) width / 2 && remain > 1; actual++, pos++, remain--) + actual[0] = char_isprint (text[pos]) ? text[pos] : '.'; + + if (remain <= 1) + goto finally; + actual[0] = '~'; + actual++; + remain--; + + pos += length - width + 1; + for (; pos < length && remain > 1; pos++, actual++, remain--) + actual[0] = char_isprint (text[pos]) ? text[pos] : '.'; + } + else + { + switch (HIDE_FIT (just_mode)) + { + case J_CENTER: + ident = (length - width) / 2; + break; + case J_RIGHT: + ident = length - width; + break; + default: + break; + } + + pos += ident; + for (; pos < (gsize) (ident + width) && remain > 1; pos++, actual++, remain--) + actual[0] = char_isprint (text[pos]) ? text[pos] : '.'; + } + + finally: + if (actual >= result + sizeof (result)) + actual = result + sizeof (result) - 1; + actual[0] = '\0'; + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static const char * +str_8bit_term_trim (const char *text, int width) +{ + static char result[BUF_MEDIUM]; + size_t remain; + char *actual; + size_t length; + + length = strlen (text); + actual = result; + remain = sizeof (result); + + if (width > 0) + { + size_t pos; + + if (width >= (int) length) + { + for (pos = 0; pos < length && remain > 1; pos++, actual++, remain--) + actual[0] = char_isprint (text[pos]) ? text[pos] : '.'; + } + else if (width <= 3) + { + memset (actual, '.', width); + actual += width; + } + else + { + memset (actual, '.', 3); + actual += 3; + remain -= 3; + + for (pos = length - width + 3; pos < length && remain > 1; pos++, actual++, remain--) + actual[0] = char_isprint (text[pos]) ? text[pos] : '.'; + } + } + + actual[0] = '\0'; + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_8bit_term_width2 (const char *text, size_t length) +{ + return (length != (size_t) (-1)) ? MIN (strlen (text), length) : strlen (text); +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_8bit_term_width1 (const char *text) +{ + return str_8bit_term_width2 (text, (size_t) (-1)); +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_8bit_term_char_width (const char *text) +{ + (void) text; + return 1; +} + +/* --------------------------------------------------------------------------------------------- */ + +static const char * +str_8bit_term_substring (const char *text, int start, int width) +{ + static char result[BUF_MEDIUM]; + size_t remain; + char *actual; + size_t length; + + actual = result; + remain = sizeof (result); + length = strlen (text); + + if (start < (int) length) + { + size_t pos; + + for (pos = start; pos < length && width > 0 && remain > 1; + pos++, width--, actual++, remain--) + actual[0] = char_isprint (text[pos]) ? text[pos] : '.'; + } + + for (; width > 0 && remain > 1; actual++, remain--, width--) + actual[0] = ' '; + + actual[0] = '\0'; + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static const char * +str_8bit_trunc (const char *text, int width) +{ + static char result[MC_MAXPATHLEN]; + int remain; + char *actual; + size_t pos = 0; + size_t length; + + actual = result; + remain = sizeof (result); + length = strlen (text); + + if ((int) length > width) + { + for (; pos + 1 <= (gsize) width / 2 && remain > 1; actual++, pos++, remain--) + actual[0] = char_isprint (text[pos]) ? text[pos] : '.'; + + if (remain <= 1) + goto finally; + actual[0] = '~'; + actual++; + remain--; + + pos += length - width + 1; + for (; pos < length && remain > 1; pos++, actual++, remain--) + actual[0] = char_isprint (text[pos]) ? text[pos] : '.'; + } + else + { + for (; pos < length && remain > 1; pos++, actual++, remain--) + actual[0] = char_isprint (text[pos]) ? text[pos] : '.'; + } + + finally: + actual[0] = '\0'; + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_8bit_offset_to_pos (const char *text, size_t length) +{ + (void) text; + return (int) length; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_8bit_column_to_pos (const char *text, size_t pos) +{ + (void) text; + return (int) pos; +} + +/* --------------------------------------------------------------------------------------------- */ + +static char * +str_8bit_create_search_needle (const char *needle, gboolean case_sen) +{ + (void) case_sen; + return (char *) needle; +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +str_8bit_release_search_needle (char *needle, gboolean case_sen) +{ + (void) case_sen; + (void) needle; +} + +/* --------------------------------------------------------------------------------------------- */ + +static char * +str_8bit_strdown (const char *str) +{ + char *rets, *p; + + if (str == NULL) + return NULL; + + rets = g_strdup (str); + + for (p = rets; *p != '\0'; p++) + *p = char_tolower (*p); + + return rets; +} + +/* --------------------------------------------------------------------------------------------- */ + +static const char * +str_8bit_search_first (const char *text, const char *search, gboolean case_sen) +{ + char *fold_text; + char *fold_search; + const char *match; + + fold_text = case_sen ? (char *) text : str_8bit_strdown (text); + fold_search = case_sen ? (char *) search : str_8bit_strdown (search); + + match = g_strstr_len (fold_text, -1, fold_search); + if (match != NULL) + { + size_t offset; + + offset = match - fold_text; + match = text + offset; + } + + if (!case_sen) + { + g_free (fold_text); + g_free (fold_search); + } + + return match; +} + +/* --------------------------------------------------------------------------------------------- */ + +static const char * +str_8bit_search_last (const char *text, const char *search, gboolean case_sen) +{ + char *fold_text; + char *fold_search; + const char *match; + + fold_text = case_sen ? (char *) text : str_8bit_strdown (text); + fold_search = case_sen ? (char *) search : str_8bit_strdown (search); + + match = g_strrstr_len (fold_text, -1, fold_search); + if (match != NULL) + { + size_t offset; + + offset = match - fold_text; + match = text + offset; + } + + if (!case_sen) + { + g_free (fold_text); + g_free (fold_search); + } + + return match; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_8bit_compare (const char *t1, const char *t2) +{ + return strcmp (t1, t2); +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_8bit_ncompare (const char *t1, const char *t2) +{ + return strncmp (t1, t2, MIN (strlen (t1), strlen (t2))); +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_8bit_casecmp (const char *s1, const char *s2) +{ + /* code from GLib */ + +#ifdef HAVE_STRCASECMP + g_return_val_if_fail (s1 != NULL, 0); + g_return_val_if_fail (s2 != NULL, 0); + + return strcasecmp (s1, s2); +#else + gint c1, c2; + + g_return_val_if_fail (s1 != NULL, 0); + g_return_val_if_fail (s2 != NULL, 0); + + while (*s1 != '\0' && *s2 != '\0') + { + /* According to A. Cox, some platforms have islower's that + * don't work right on non-uppercase + */ + c1 = isupper ((guchar) * s1) ? tolower ((guchar) * s1) : *s1; + c2 = isupper ((guchar) * s2) ? tolower ((guchar) * s2) : *s2; + if (c1 != c2) + return (c1 - c2); + s1++; + s2++; + } + + return (((gint) (guchar) * s1) - ((gint) (guchar) * s2)); +#endif +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_8bit_ncasecmp (const char *s1, const char *s2) +{ + size_t n; + + g_return_val_if_fail (s1 != NULL, 0); + g_return_val_if_fail (s2 != NULL, 0); + + n = MIN (strlen (s1), strlen (s2)); + + /* code from GLib */ + +#ifdef HAVE_STRNCASECMP + return strncasecmp (s1, s2, n); +#else + gint c1, c2; + + while (n != 0 && *s1 != '\0' && *s2 != '\0') + { + n -= 1; + /* According to A. Cox, some platforms have islower's that + * don't work right on non-uppercase + */ + c1 = isupper ((guchar) * s1) ? tolower ((guchar) * s1) : *s1; + c2 = isupper ((guchar) * s2) ? tolower ((guchar) * s2) : *s2; + if (c1 != c2) + return (c1 - c2); + s1++; + s2++; + } + + if (n == 0) + return 0; + + return (((gint) (guchar) * s1) - ((gint) (guchar) * s2)); + +#endif +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_8bit_prefix (const char *text, const char *prefix) +{ + int result; + + for (result = 0; text[result] != '\0' && prefix[result] != '\0' + && text[result] == prefix[result]; result++); + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_8bit_caseprefix (const char *text, const char *prefix) +{ + int result; + + for (result = 0; text[result] != '\0' && prefix[result] != '\0' + && char_toupper (text[result]) == char_toupper (prefix[result]); result++); + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +str_8bit_fix_string (char *text) +{ + (void) text; +} + +/* --------------------------------------------------------------------------------------------- */ + +static char * +str_8bit_create_key (const char *text, gboolean case_sen) +{ + return case_sen ? (char *) text : str_8bit_strdown (text); +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_8bit_key_collate (const char *t1, const char *t2, gboolean case_sen) +{ + return case_sen ? strcmp (t1, t2) : strcoll (t1, t2); +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +str_8bit_release_key (char *key, gboolean case_sen) +{ + if (!case_sen) + g_free (key); +} + +/* --------------------------------------------------------------------------------------------- */ +/*** public functions ****************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +struct str_class +str_8bit_init (void) +{ + struct str_class result; + + result.conv_gerror_message = str_8bit_conv_gerror_message; + result.vfs_convert_to = str_8bit_vfs_convert_to; + result.insert_replace_char = str_8bit_insert_replace_char; + result.is_valid_string = str_8bit_is_valid_string; + result.is_valid_char = str_8bit_is_valid_char; + result.cnext_char = str_8bit_cnext_char; + result.cprev_char = str_8bit_cprev_char; + result.cnext_char_safe = str_8bit_cnext_char; + result.cprev_char_safe = str_8bit_cprev_char; + result.cnext_noncomb_char = str_8bit_cnext_noncomb_char; + result.cprev_noncomb_char = str_8bit_cprev_noncomb_char; + result.char_isspace = str_8bit_isspace; + result.char_ispunct = str_8bit_ispunct; + result.char_isalnum = str_8bit_isalnum; + result.char_isdigit = str_8bit_isdigit; + result.char_isprint = str_8bit_isprint; + result.char_iscombiningmark = str_8bit_iscombiningmark; + result.char_toupper = str_8bit_toupper; + result.char_tolower = str_8bit_tolower; + result.length = str_8bit_length; + result.length2 = str_8bit_length2; + result.length_noncomb = str_8bit_length; + result.fix_string = str_8bit_fix_string; + result.term_form = str_8bit_term_form; + result.fit_to_term = str_8bit_fit_to_term; + result.term_trim = str_8bit_term_trim; + result.term_width2 = str_8bit_term_width2; + result.term_width1 = str_8bit_term_width1; + result.term_char_width = str_8bit_term_char_width; + result.term_substring = str_8bit_term_substring; + result.trunc = str_8bit_trunc; + result.offset_to_pos = str_8bit_offset_to_pos; + result.column_to_pos = str_8bit_column_to_pos; + result.create_search_needle = str_8bit_create_search_needle; + result.release_search_needle = str_8bit_release_search_needle; + result.search_first = str_8bit_search_first; + result.search_last = str_8bit_search_last; + result.compare = str_8bit_compare; + result.ncompare = str_8bit_ncompare; + result.casecmp = str_8bit_casecmp; + result.ncasecmp = str_8bit_ncasecmp; + result.prefix = str_8bit_prefix; + result.caseprefix = str_8bit_caseprefix; + result.create_key = str_8bit_create_key; + result.create_key_for_filename = str_8bit_create_key; + result.key_collate = str_8bit_key_collate; + result.release_key = str_8bit_release_key; + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ diff --git a/lib/strutil/strutilascii.c b/lib/strutil/strutilascii.c new file mode 100644 index 0000000..7a8a5ae --- /dev/null +++ b/lib/strutil/strutilascii.c @@ -0,0 +1,783 @@ +/* + ASCII strings utilities + + Copyright (C) 2007-2022 + Free Software Foundation, Inc. + + Written by: + Rostislav Benes, 2007 + + This file is part of the Midnight Commander. + + The Midnight Commander is free software: you can redistribute it + and/or modify it under the terms of the GNU General Public License as + published by the Free Software Foundation, either version 3 of the License, + or (at your option) any later version. + + The Midnight Commander is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include <ctype.h> +#include <stdlib.h> + +#include "lib/global.h" +#include "lib/strutil.h" + +/* using g_ascii function from glib + * on terminal are showed only ascii characters (lower than 0x80) + */ + +/*** global variables ****************************************************************************/ + +/*** file scope macro definitions ****************************************************************/ + +/*** file scope type declarations ****************************************************************/ + +/*** file scope variables ************************************************************************/ + +static const char replch = '?'; + +/* --------------------------------------------------------------------------------------------- */ +/*** file scope functions ************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +static void +str_ascii_insert_replace_char (GString * buffer) +{ + g_string_append_c (buffer, replch); +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_ascii_is_valid_string (const char *text) +{ + (void) text; + return TRUE; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_ascii_is_valid_char (const char *ch, size_t size) +{ + (void) ch; + (void) size; + return 1; +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +str_ascii_cnext_char (const char **text) +{ + (*text)++; +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +str_ascii_cprev_char (const char **text) +{ + (*text)--; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_ascii_cnext_noncomb_char (const char **text) +{ + if (*text[0] == '\0') + return 0; + + (*text)++; + return 1; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_ascii_cprev_noncomb_char (const char **text, const char *begin) +{ + if ((*text) == begin) + return 0; + + (*text)--; + return 1; +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_ascii_isspace (const char *text) +{ + return g_ascii_isspace ((gchar) text[0]); +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_ascii_ispunct (const char *text) +{ + return g_ascii_ispunct ((gchar) text[0]); +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_ascii_isalnum (const char *text) +{ + return g_ascii_isalnum ((gchar) text[0]); +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_ascii_isdigit (const char *text) +{ + return g_ascii_isdigit ((gchar) text[0]); +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_ascii_isprint (const char *text) +{ + return g_ascii_isprint ((gchar) text[0]); +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_ascii_iscombiningmark (const char *text) +{ + (void) text; + return FALSE; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_ascii_toupper (const char *text, char **out, size_t * remain) +{ + if (*remain <= 1) + return FALSE; + + (*out)[0] = (char) g_ascii_toupper ((gchar) text[0]); + (*out)++; + (*remain)--; + return TRUE; +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_ascii_tolower (const char *text, char **out, size_t * remain) +{ + if (*remain <= 1) + return FALSE; + + (*out)[0] = (char) g_ascii_tolower ((gchar) text[0]); + (*out)++; + (*remain)--; + return TRUE; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_ascii_length (const char *text) +{ + return strlen (text); +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_ascii_length2 (const char *text, int size) +{ + return (size >= 0) ? MIN (strlen (text), (gsize) size) : strlen (text); +} + +/* --------------------------------------------------------------------------------------------- */ + +static gchar * +str_ascii_conv_gerror_message (GError * mcerror, const char *def_msg) +{ + /* the same as str_utf8_conv_gerror_message() */ + if (mcerror != NULL) + return g_strdup (mcerror->message); + + return g_strdup (def_msg != NULL ? def_msg : ""); +} + +/* --------------------------------------------------------------------------------------------- */ + +static estr_t +str_ascii_vfs_convert_to (GIConv coder, const char *string, int size, GString * buffer) +{ + (void) coder; + g_string_append_len (buffer, string, size); + return ESTR_SUCCESS; +} + +/* --------------------------------------------------------------------------------------------- */ + +static const char * +str_ascii_term_form (const char *text) +{ + static char result[BUF_MEDIUM]; + char *actual; + size_t remain; + size_t length; + size_t pos = 0; + + actual = result; + remain = sizeof (result); + length = strlen (text); + + /* go throw all characters and check, if they are ascii and printable */ + for (; pos < length && remain > 1; pos++, actual++, remain--) + { + actual[0] = isascii ((unsigned char) text[pos]) ? text[pos] : '?'; + actual[0] = g_ascii_isprint ((gchar) actual[0]) ? actual[0] : '.'; + } + + actual[0] = '\0'; + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static const char * +str_ascii_fit_to_term (const char *text, int width, align_crt_t just_mode) +{ + static char result[BUF_MEDIUM]; + char *actual; + size_t remain; + int ident = 0; + size_t length; + size_t pos = 0; + + length = strlen (text); + actual = result; + remain = sizeof (result); + + if ((int) length <= width) + { + switch (HIDE_FIT (just_mode)) + { + case J_CENTER_LEFT: + case J_CENTER: + ident = (width - length) / 2; + break; + case J_RIGHT: + ident = width - length; + break; + default: + break; + } + + /* add space before text */ + if ((int) remain <= ident) + goto finally; + memset (actual, ' ', ident); + actual += ident; + remain -= ident; + + /* copy all characters */ + for (; pos < (gsize) length && remain > 1; pos++, actual++, remain--) + { + actual[0] = isascii ((unsigned char) text[pos]) ? text[pos] : '?'; + actual[0] = g_ascii_isprint ((gchar) actual[0]) ? actual[0] : '.'; + } + + /* add space after text */ + if (width - length - ident > 0) + { + if (remain <= width - length - ident) + goto finally; + memset (actual, ' ', width - length - ident); + actual += width - length - ident; + } + } + else if (IS_FIT (just_mode)) + { + /* copy prefix of text, that is not wider than width / 2 */ + for (; pos + 1 <= (gsize) width / 2 && remain > 1; actual++, pos++, remain--) + { + actual[0] = isascii ((unsigned char) text[pos]) ? text[pos] : '?'; + actual[0] = g_ascii_isprint ((gchar) actual[0]) ? actual[0] : '.'; + } + + if (remain <= 1) + goto finally; + actual[0] = '~'; + actual++; + remain--; + + pos += length - width + 1; + + /* copy suffix of text */ + for (; pos < length && remain > 1; pos++, actual++, remain--) + { + actual[0] = isascii ((unsigned char) text[pos]) ? text[pos] : '?'; + actual[0] = g_ascii_isprint ((gchar) actual[0]) ? actual[0] : '.'; + } + } + else + { + switch (HIDE_FIT (just_mode)) + { + case J_CENTER: + ident = (length - width) / 2; + break; + case J_RIGHT: + ident = length - width; + break; + default: + break; + } + + /* copy substring text, substring start from ident and take width + * characters from text */ + pos += ident; + for (; pos < (gsize) (ident + width) && remain > 1; pos++, actual++, remain--) + { + actual[0] = isascii ((unsigned char) text[pos]) ? text[pos] : '?'; + actual[0] = g_ascii_isprint ((gchar) actual[0]) ? actual[0] : '.'; + } + + } + + finally: + if (actual >= result + sizeof (result)) + actual = result + sizeof (result) - 1; + actual[0] = '\0'; + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static const char * +str_ascii_term_trim (const char *text, int width) +{ + static char result[BUF_MEDIUM]; + size_t remain; + char *actual; + size_t length; + + length = strlen (text); + actual = result; + remain = sizeof (result); + + if (width > 0) + { + size_t pos; + + if (width >= (int) length) + { + /* copy all characters */ + for (pos = 0; pos < length && remain > 1; pos++, actual++, remain--) + { + actual[0] = isascii ((unsigned char) text[pos]) ? text[pos] : '?'; + actual[0] = g_ascii_isprint ((gchar) actual[0]) ? actual[0] : '.'; + } + } + else if (width <= 3) + { + memset (actual, '.', width); + actual += width; + } + else + { + memset (actual, '.', 3); + actual += 3; + remain -= 3; + + /* copy suffix of text */ + for (pos = length - width + 3; pos < length && remain > 1; pos++, actual++, remain--) + { + actual[0] = isascii ((unsigned char) text[pos]) ? text[pos] : '?'; + actual[0] = g_ascii_isprint ((gchar) actual[0]) ? actual[0] : '.'; + } + } + } + + actual[0] = '\0'; + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_ascii_term_width2 (const char *text, size_t length) +{ + return (length != (size_t) (-1)) ? MIN (strlen (text), length) : strlen (text); +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_ascii_term_width1 (const char *text) +{ + return str_ascii_term_width2 (text, (size_t) (-1)); +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_ascii_term_char_width (const char *text) +{ + (void) text; + return 1; +} + +/* --------------------------------------------------------------------------------------------- */ + +static const char * +str_ascii_term_substring (const char *text, int start, int width) +{ + static char result[BUF_MEDIUM]; + size_t remain; + char *actual; + size_t length; + + actual = result; + remain = sizeof (result); + length = strlen (text); + + if (start < (int) length) + { + size_t pos; + + /* copy at most width characters from text from start */ + for (pos = start; pos < length && width > 0 && remain > 1; + pos++, width--, actual++, remain--) + { + actual[0] = isascii ((unsigned char) text[pos]) ? text[pos] : '?'; + actual[0] = g_ascii_isprint ((gchar) actual[0]) ? actual[0] : '.'; + } + } + + /* if text is shorter then width, add space to the end */ + for (; width > 0 && remain > 1; actual++, remain--, width--) + actual[0] = ' '; + + actual[0] = '\0'; + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static const char * +str_ascii_trunc (const char *text, int width) +{ + static char result[MC_MAXPATHLEN]; + int remain; + char *actual; + size_t pos = 0; + size_t length; + + actual = result; + remain = sizeof (result); + length = strlen (text); + + if ((int) length > width) + { + /* copy prefix of text */ + for (; pos + 1 <= (gsize) width / 2 && remain > 1; actual++, pos++, remain--) + { + actual[0] = isascii ((unsigned char) text[pos]) ? text[pos] : '?'; + actual[0] = g_ascii_isprint ((gchar) actual[0]) ? actual[0] : '.'; + } + + if (remain <= 1) + goto finally; + actual[0] = '~'; + actual++; + remain--; + + pos += length - width + 1; + + /* copy suffix of text */ + for (; pos < length && remain > 1; pos++, actual++, remain--) + { + actual[0] = isascii ((unsigned char) text[pos]) ? text[pos] : '?'; + actual[0] = g_ascii_isprint ((gchar) actual[0]) ? actual[0] : '.'; + } + } + else + { + /* copy all characters */ + for (; pos < length && remain > 1; pos++, actual++, remain--) + { + actual[0] = isascii ((unsigned char) text[pos]) ? text[pos] : '?'; + actual[0] = g_ascii_isprint ((gchar) actual[0]) ? actual[0] : '.'; + } + } + + finally: + actual[0] = '\0'; + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_ascii_offset_to_pos (const char *text, size_t length) +{ + (void) text; + return (int) length; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_ascii_column_to_pos (const char *text, size_t pos) +{ + (void) text; + return (int) pos; +} + +/* --------------------------------------------------------------------------------------------- */ + +static char * +str_ascii_create_search_needle (const char *needle, gboolean case_sen) +{ + (void) case_sen; + return (char *) needle; +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +str_ascii_release_search_needle (char *needle, gboolean case_sen) +{ + (void) case_sen; + (void) needle; + +} + +/* --------------------------------------------------------------------------------------------- */ + +static const char * +str_ascii_search_first (const char *text, const char *search, gboolean case_sen) +{ + char *fold_text; + char *fold_search; + const char *match; + + fold_text = case_sen ? (char *) text : g_ascii_strdown (text, -1); + fold_search = case_sen ? (char *) search : g_ascii_strdown (search, -1); + + match = g_strstr_len (fold_text, -1, fold_search); + if (match != NULL) + { + size_t offset; + + offset = match - fold_text; + match = text + offset; + } + + if (!case_sen) + { + g_free (fold_text); + g_free (fold_search); + } + + return match; +} + +/* --------------------------------------------------------------------------------------------- */ + +static const char * +str_ascii_search_last (const char *text, const char *search, gboolean case_sen) +{ + char *fold_text; + char *fold_search; + const char *match; + + fold_text = case_sen ? (char *) text : g_ascii_strdown (text, -1); + fold_search = case_sen ? (char *) search : g_ascii_strdown (search, -1); + + match = g_strrstr_len (fold_text, -1, fold_search); + if (match != NULL) + { + size_t offset; + + offset = match - fold_text; + match = text + offset; + } + + if (!case_sen) + { + g_free (fold_text); + g_free (fold_search); + } + + return match; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_ascii_compare (const char *t1, const char *t2) +{ + return strcmp (t1, t2); +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_ascii_ncompare (const char *t1, const char *t2) +{ + return strncmp (t1, t2, MIN (strlen (t1), strlen (t2))); +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_ascii_casecmp (const char *t1, const char *t2) +{ + return g_ascii_strcasecmp (t1, t2); +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_ascii_ncasecmp (const char *t1, const char *t2) +{ + return g_ascii_strncasecmp (t1, t2, MIN (strlen (t1), strlen (t2))); +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +str_ascii_fix_string (char *text) +{ + for (; text[0] != '\0'; text++) + text[0] = ((unsigned char) text[0] < 128) ? text[0] : '?'; +} + +/* --------------------------------------------------------------------------------------------- */ + +static char * +str_ascii_create_key (const char *text, gboolean case_sen) +{ + (void) case_sen; + return (char *) text; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_ascii_key_collate (const char *t1, const char *t2, gboolean case_sen) +{ + return case_sen ? strcmp (t1, t2) : g_ascii_strcasecmp (t1, t2); +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +str_ascii_release_key (char *key, gboolean case_sen) +{ + (void) key; + (void) case_sen; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_ascii_prefix (const char *text, const char *prefix) +{ + int result; + + for (result = 0; text[result] != '\0' && prefix[result] != '\0' + && text[result] == prefix[result]; result++); + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_ascii_caseprefix (const char *text, const char *prefix) +{ + int result; + + for (result = 0; text[result] != '\0' && prefix[result] != '\0' + && g_ascii_toupper (text[result]) == g_ascii_toupper (prefix[result]); result++); + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ +/*** public functions ****************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +struct str_class +str_ascii_init (void) +{ + struct str_class result; + + result.conv_gerror_message = str_ascii_conv_gerror_message; + result.vfs_convert_to = str_ascii_vfs_convert_to; + result.insert_replace_char = str_ascii_insert_replace_char; + result.is_valid_string = str_ascii_is_valid_string; + result.is_valid_char = str_ascii_is_valid_char; + result.cnext_char = str_ascii_cnext_char; + result.cprev_char = str_ascii_cprev_char; + result.cnext_char_safe = str_ascii_cnext_char; + result.cprev_char_safe = str_ascii_cprev_char; + result.cnext_noncomb_char = str_ascii_cnext_noncomb_char; + result.cprev_noncomb_char = str_ascii_cprev_noncomb_char; + result.char_isspace = str_ascii_isspace; + result.char_ispunct = str_ascii_ispunct; + result.char_isalnum = str_ascii_isalnum; + result.char_isdigit = str_ascii_isdigit; + result.char_isprint = str_ascii_isprint; + result.char_iscombiningmark = str_ascii_iscombiningmark; + result.char_toupper = str_ascii_toupper; + result.char_tolower = str_ascii_tolower; + result.length = str_ascii_length; + result.length2 = str_ascii_length2; + result.length_noncomb = str_ascii_length; + result.fix_string = str_ascii_fix_string; + result.term_form = str_ascii_term_form; + result.fit_to_term = str_ascii_fit_to_term; + result.term_trim = str_ascii_term_trim; + result.term_width2 = str_ascii_term_width2; + result.term_width1 = str_ascii_term_width1; + result.term_char_width = str_ascii_term_char_width; + result.term_substring = str_ascii_term_substring; + result.trunc = str_ascii_trunc; + result.offset_to_pos = str_ascii_offset_to_pos; + result.column_to_pos = str_ascii_column_to_pos; + result.create_search_needle = str_ascii_create_search_needle; + result.release_search_needle = str_ascii_release_search_needle; + result.search_first = str_ascii_search_first; + result.search_last = str_ascii_search_last; + result.compare = str_ascii_compare; + result.ncompare = str_ascii_ncompare; + result.casecmp = str_ascii_casecmp; + result.ncasecmp = str_ascii_ncasecmp; + result.prefix = str_ascii_prefix; + result.caseprefix = str_ascii_caseprefix; + result.create_key = str_ascii_create_key; + result.create_key_for_filename = str_ascii_create_key; + result.key_collate = str_ascii_key_collate; + result.release_key = str_ascii_release_key; + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ diff --git a/lib/strutil/strutilutf8.c b/lib/strutil/strutilutf8.c new file mode 100644 index 0000000..16725cb --- /dev/null +++ b/lib/strutil/strutilutf8.c @@ -0,0 +1,1519 @@ +/* + UTF-8 strings utilities + + Copyright (C) 2007-2022 + Free Software Foundation, Inc. + + Written by: + Rostislav Benes, 2007 + + This file is part of the Midnight Commander. + + The Midnight Commander is free software: you can redistribute it + and/or modify it under the terms of the GNU General Public License as + published by the Free Software Foundation, either version 3 of the License, + or (at your option) any later version. + + The Midnight Commander is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include <stdlib.h> +#include <langinfo.h> +#include <limits.h> /* MB_LEN_MAX */ +#include <string.h> + +#include "lib/global.h" +#include "lib/strutil.h" + +/* using function for utf-8 from glib */ + +/*** global variables ****************************************************************************/ + +/*** file scope macro definitions ****************************************************************/ + +/*** file scope type declarations ****************************************************************/ + +struct utf8_tool +{ + char *actual; + size_t remain; + const char *checked; + int ident; + gboolean compose; +}; + +struct term_form +{ + char text[BUF_MEDIUM * MB_LEN_MAX]; + size_t width; + gboolean compose; +}; + +/*** file scope variables ************************************************************************/ + +static const char replch[] = "\xEF\xBF\xBD"; + +/* --------------------------------------------------------------------------------------------- */ +/*** file scope functions ************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_unichar_iscombiningmark (gunichar uni) +{ + GUnicodeType type; + + type = g_unichar_type (uni); + return (type == G_UNICODE_SPACING_MARK) + || (type == G_UNICODE_ENCLOSING_MARK) || (type == G_UNICODE_NON_SPACING_MARK); +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +str_utf8_insert_replace_char (GString * buffer) +{ + g_string_append (buffer, replch); +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_utf8_is_valid_string (const char *text) +{ + return g_utf8_validate (text, -1, NULL); +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_utf8_is_valid_char (const char *ch, size_t size) +{ + switch (g_utf8_get_char_validated (ch, size)) + { + case (gunichar) (-2): + return (-2); + case (gunichar) (-1): + return (-1); + default: + return 1; + } +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +str_utf8_cnext_char (const char **text) +{ + (*text) = g_utf8_next_char (*text); +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +str_utf8_cprev_char (const char **text) +{ + (*text) = g_utf8_prev_char (*text); +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +str_utf8_cnext_char_safe (const char **text) +{ + if (str_utf8_is_valid_char (*text, -1) == 1) + (*text) = g_utf8_next_char (*text); + else + (*text)++; +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +str_utf8_cprev_char_safe (const char **text) +{ + const char *result, *t; + + result = g_utf8_prev_char (*text); + t = result; + str_utf8_cnext_char_safe (&t); + if (t == *text) + (*text) = result; + else + (*text)--; +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +str_utf8_fix_string (char *text) +{ + while (text[0] != '\0') + { + gunichar uni; + + uni = g_utf8_get_char_validated (text, -1); + if ((uni != (gunichar) (-1)) && (uni != (gunichar) (-2))) + text = g_utf8_next_char (text); + else + { + text[0] = '?'; + text++; + } + } +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_utf8_isspace (const char *text) +{ + gunichar uni; + + uni = g_utf8_get_char_validated (text, -1); + return g_unichar_isspace (uni); +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_utf8_ispunct (const char *text) +{ + gunichar uni; + + uni = g_utf8_get_char_validated (text, -1); + return g_unichar_ispunct (uni); +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_utf8_isalnum (const char *text) +{ + gunichar uni; + + uni = g_utf8_get_char_validated (text, -1); + return g_unichar_isalnum (uni); +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_utf8_isdigit (const char *text) +{ + gunichar uni; + + uni = g_utf8_get_char_validated (text, -1); + return g_unichar_isdigit (uni); +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_utf8_isprint (const char *ch) +{ + gunichar uni; + + uni = g_utf8_get_char_validated (ch, -1); + return g_unichar_isprint (uni); +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_utf8_iscombiningmark (const char *ch) +{ + gunichar uni; + + uni = g_utf8_get_char_validated (ch, -1); + return str_unichar_iscombiningmark (uni); +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_utf8_cnext_noncomb_char (const char **text) +{ + int count = 0; + + while ((*text)[0] != '\0') + { + str_utf8_cnext_char_safe (text); + count++; + if (!str_utf8_iscombiningmark (*text)) + break; + } + + return count; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_utf8_cprev_noncomb_char (const char **text, const char *begin) +{ + int count = 0; + + while ((*text) != begin) + { + str_utf8_cprev_char_safe (text); + count++; + if (!str_utf8_iscombiningmark (*text)) + break; + } + + return count; +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_utf8_toupper (const char *text, char **out, size_t * remain) +{ + gunichar uni; + size_t left; + + uni = g_utf8_get_char_validated (text, -1); + if (uni == (gunichar) (-1) || uni == (gunichar) (-2)) + return FALSE; + + uni = g_unichar_toupper (uni); + left = g_unichar_to_utf8 (uni, NULL); + if (left >= *remain) + return FALSE; + + left = g_unichar_to_utf8 (uni, *out); + (*out) += left; + (*remain) -= left; + return TRUE; +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +str_utf8_tolower (const char *text, char **out, size_t * remain) +{ + gunichar uni; + size_t left; + + uni = g_utf8_get_char_validated (text, -1); + if (uni == (gunichar) (-1) || uni == (gunichar) (-2)) + return FALSE; + + uni = g_unichar_tolower (uni); + left = g_unichar_to_utf8 (uni, NULL); + if (left >= *remain) + return FALSE; + + left = g_unichar_to_utf8 (uni, *out); + (*out) += left; + (*remain) -= left; + return TRUE; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_utf8_length (const char *text) +{ + int result = 0; + const char *start; + const char *end; + + start = text; + while (!g_utf8_validate (start, -1, &end) && start[0] != '\0') + { + if (start != end) + result += g_utf8_strlen (start, end - start); + + result++; + start = end + 1; + } + + if (start == text) + result = g_utf8_strlen (text, -1); + else if (start[0] != '\0' && start != end) + result += g_utf8_strlen (start, end - start); + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_utf8_length2 (const char *text, int size) +{ + int result = 0; + const char *start; + const char *end; + + start = text; + while (!g_utf8_validate (start, -1, &end) && start[0] != '\0' && size > 0) + { + if (start != end) + { + result += g_utf8_strlen (start, MIN (end - start, size)); + size -= end - start; + } + result += (size > 0); + size--; + start = end + 1; + } + + if (start == text) + result = g_utf8_strlen (text, size); + else if (start[0] != '\0' && start != end && size > 0) + result += g_utf8_strlen (start, MIN (end - start, size)); + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_utf8_length_noncomb (const char *text) +{ + int result = 0; + const char *t = text; + + while (t[0] != '\0') + { + str_utf8_cnext_noncomb_char (&t); + result++; + } + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +#if 0 +static void +str_utf8_questmark_sustb (char **string, size_t * left, GString * buffer) +{ + char *next; + + next = g_utf8_next_char (*string); + (*left) -= next - (*string); + (*string) = next; + g_string_append_c (buffer, '?'); +} +#endif + +/* --------------------------------------------------------------------------------------------- */ + +static gchar * +str_utf8_conv_gerror_message (GError * mcerror, const char *def_msg) +{ + if (mcerror != NULL) + return g_strdup (mcerror->message); + + return g_strdup (def_msg != NULL ? def_msg : ""); +} + +/* --------------------------------------------------------------------------------------------- */ + +static estr_t +str_utf8_vfs_convert_to (GIConv coder, const char *string, int size, GString * buffer) +{ + estr_t result = ESTR_SUCCESS; + + if (coder == str_cnv_not_convert) + g_string_append_len (buffer, string, size); + else + result = str_nconvert (coder, string, size, buffer); + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ +/* utility function, that makes string valid in utf8 and all characters printable + * return width of string too */ + +static const struct term_form * +str_utf8_make_make_term_form (const char *text, size_t length) +{ + static struct term_form result; + gunichar uni; + size_t left; + char *actual; + + result.text[0] = '\0'; + result.width = 0; + result.compose = FALSE; + actual = result.text; + + /* check if text start with combining character, + * add space at begin in this case */ + if (length != 0 && text[0] != '\0') + { + uni = g_utf8_get_char_validated (text, -1); + if ((uni != (gunichar) (-1)) && (uni != (gunichar) (-2)) + && str_unichar_iscombiningmark (uni)) + { + actual[0] = ' '; + actual++; + result.width++; + result.compose = TRUE; + } + } + + while (length != 0 && text[0] != '\0') + { + uni = g_utf8_get_char_validated (text, -1); + if ((uni != (gunichar) (-1)) && (uni != (gunichar) (-2))) + { + if (g_unichar_isprint (uni)) + { + left = g_unichar_to_utf8 (uni, actual); + actual += left; + if (str_unichar_iscombiningmark (uni)) + result.compose = TRUE; + else + { + result.width++; + if (g_unichar_iswide (uni)) + result.width++; + } + } + else + { + actual[0] = '.'; + actual++; + result.width++; + } + text = g_utf8_next_char (text); + } + else + { + text++; + /*actual[0] = '?'; */ + memcpy (actual, replch, strlen (replch)); + actual += strlen (replch); + result.width++; + } + + if (length != (size_t) (-1)) + length--; + } + actual[0] = '\0'; + + return &result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static const char * +str_utf8_term_form (const char *text) +{ + static char result[BUF_MEDIUM * MB_LEN_MAX]; + const struct term_form *pre_form; + + pre_form = str_utf8_make_make_term_form (text, (size_t) (-1)); + if (pre_form->compose) + { + char *composed; + + composed = g_utf8_normalize (pre_form->text, -1, G_NORMALIZE_DEFAULT_COMPOSE); + g_strlcpy (result, composed, sizeof (result)); + g_free (composed); + } + else + g_strlcpy (result, pre_form->text, sizeof (result)); + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ +/* utility function, that copies all characters from checked to actual */ + +static gboolean +utf8_tool_copy_chars_to_end (struct utf8_tool *tool) +{ + tool->compose = FALSE; + + while (tool->checked[0] != '\0') + { + gunichar uni; + size_t left; + + uni = g_utf8_get_char (tool->checked); + tool->compose = tool->compose || str_unichar_iscombiningmark (uni); + left = g_unichar_to_utf8 (uni, NULL); + if (tool->remain <= left) + return FALSE; + left = g_unichar_to_utf8 (uni, tool->actual); + tool->actual += left; + tool->remain -= left; + tool->checked = g_utf8_next_char (tool->checked); + } + + return TRUE; +} + +/* --------------------------------------------------------------------------------------------- */ +/* utility function, that copies characters from checked to actual until ident is + * smaller than to_ident */ + +static gboolean +utf8_tool_copy_chars_to (struct utf8_tool *tool, int to_ident) +{ + tool->compose = FALSE; + + while (tool->checked[0] != '\0') + { + gunichar uni; + size_t left; + int w = 0; + + uni = g_utf8_get_char (tool->checked); + if (str_unichar_iscombiningmark (uni)) + tool->compose = TRUE; + else + { + w = 1; + if (g_unichar_iswide (uni)) + w++; + if (tool->ident + w > to_ident) + return TRUE; + } + + left = g_unichar_to_utf8 (uni, NULL); + if (tool->remain <= left) + return FALSE; + left = g_unichar_to_utf8 (uni, tool->actual); + tool->actual += left; + tool->remain -= left; + tool->checked = g_utf8_next_char (tool->checked); + tool->ident += w; + } + + return TRUE; +} + +/* --------------------------------------------------------------------------------------------- */ +/* utility function, adds count spaces to actual */ + +static int +utf8_tool_insert_space (struct utf8_tool *tool, int count) +{ + if (count <= 0) + return 1; + if (tool->remain <= (gsize) count) + return 0; + + memset (tool->actual, ' ', count); + tool->actual += count; + tool->remain -= count; + return 1; +} + +/* --------------------------------------------------------------------------------------------- */ +/* utility function, adds one characters to actual */ + +static int +utf8_tool_insert_char (struct utf8_tool *tool, char ch) +{ + if (tool->remain <= 1) + return 0; + + tool->actual[0] = ch; + tool->actual++; + tool->remain--; + return 1; +} + +/* --------------------------------------------------------------------------------------------- */ +/* utility function, thah skips characters from checked until ident is greater or + * equal to to_ident */ + +static gboolean +utf8_tool_skip_chars_to (struct utf8_tool *tool, int to_ident) +{ + gunichar uni; + + while (to_ident > tool->ident && tool->checked[0] != '\0') + { + uni = g_utf8_get_char (tool->checked); + if (!str_unichar_iscombiningmark (uni)) + { + tool->ident++; + if (g_unichar_iswide (uni)) + tool->ident++; + } + tool->checked = g_utf8_next_char (tool->checked); + } + + uni = g_utf8_get_char (tool->checked); + while (str_unichar_iscombiningmark (uni)) + { + tool->checked = g_utf8_next_char (tool->checked); + uni = g_utf8_get_char (tool->checked); + } + + return TRUE; +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +utf8_tool_compose (char *buffer, size_t size) +{ + char *composed; + + composed = g_utf8_normalize (buffer, -1, G_NORMALIZE_DEFAULT_COMPOSE); + g_strlcpy (buffer, composed, size); + g_free (composed); +} + +/* --------------------------------------------------------------------------------------------- */ + +static const char * +str_utf8_fit_to_term (const char *text, int width, align_crt_t just_mode) +{ + static char result[BUF_MEDIUM * MB_LEN_MAX]; + const struct term_form *pre_form; + struct utf8_tool tool; + + pre_form = str_utf8_make_make_term_form (text, (size_t) (-1)); + tool.checked = pre_form->text; + tool.actual = result; + tool.remain = sizeof (result); + tool.compose = FALSE; + + if (pre_form->width <= (gsize) width) + { + switch (HIDE_FIT (just_mode)) + { + case J_CENTER_LEFT: + case J_CENTER: + tool.ident = (width - pre_form->width) / 2; + break; + case J_RIGHT: + tool.ident = width - pre_form->width; + break; + default: + tool.ident = 0; + break; + } + + utf8_tool_insert_space (&tool, tool.ident); + utf8_tool_copy_chars_to_end (&tool); + utf8_tool_insert_space (&tool, width - pre_form->width - tool.ident); + } + else if (IS_FIT (just_mode)) + { + tool.ident = 0; + utf8_tool_copy_chars_to (&tool, width / 2); + utf8_tool_insert_char (&tool, '~'); + + tool.ident = 0; + utf8_tool_skip_chars_to (&tool, pre_form->width - width + 1); + utf8_tool_copy_chars_to_end (&tool); + utf8_tool_insert_space (&tool, width - (pre_form->width - tool.ident + 1)); + } + else + { + switch (HIDE_FIT (just_mode)) + { + case J_CENTER: + tool.ident = (width - pre_form->width) / 2; + break; + case J_RIGHT: + tool.ident = width - pre_form->width; + break; + default: + tool.ident = 0; + break; + } + + utf8_tool_skip_chars_to (&tool, 0); + utf8_tool_insert_space (&tool, tool.ident); + utf8_tool_copy_chars_to (&tool, width); + utf8_tool_insert_space (&tool, width - tool.ident); + } + + tool.actual[0] = '\0'; + if (tool.compose) + utf8_tool_compose (result, sizeof (result)); + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static const char * +str_utf8_term_trim (const char *text, int width) +{ + static char result[BUF_MEDIUM * MB_LEN_MAX]; + const struct term_form *pre_form; + struct utf8_tool tool; + + if (width < 1) + { + result[0] = '\0'; + return result; + } + + pre_form = str_utf8_make_make_term_form (text, (size_t) (-1)); + + tool.checked = pre_form->text; + tool.actual = result; + tool.remain = sizeof (result); + tool.compose = FALSE; + + if ((gsize) width >= pre_form->width) + utf8_tool_copy_chars_to_end (&tool); + else if (width <= 3) + { + memset (tool.actual, '.', width); + tool.actual += width; + tool.remain -= width; + } + else + { + memset (tool.actual, '.', 3); + tool.actual += 3; + tool.remain -= 3; + + tool.ident = 0; + utf8_tool_skip_chars_to (&tool, pre_form->width - width + 3); + utf8_tool_copy_chars_to_end (&tool); + } + + tool.actual[0] = '\0'; + if (tool.compose) + utf8_tool_compose (result, sizeof (result)); + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_utf8_term_width2 (const char *text, size_t length) +{ + const struct term_form *result; + + result = str_utf8_make_make_term_form (text, length); + return result->width; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_utf8_term_width1 (const char *text) +{ + return str_utf8_term_width2 (text, (size_t) (-1)); +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_utf8_term_char_width (const char *text) +{ + gunichar uni; + + uni = g_utf8_get_char_validated (text, -1); + return (str_unichar_iscombiningmark (uni)) ? 0 : ((g_unichar_iswide (uni)) ? 2 : 1); +} + +/* --------------------------------------------------------------------------------------------- */ + +static const char * +str_utf8_term_substring (const char *text, int start, int width) +{ + static char result[BUF_MEDIUM * MB_LEN_MAX]; + const struct term_form *pre_form; + struct utf8_tool tool; + + pre_form = str_utf8_make_make_term_form (text, (size_t) (-1)); + + tool.checked = pre_form->text; + tool.actual = result; + tool.remain = sizeof (result); + tool.compose = FALSE; + + tool.ident = -start; + utf8_tool_skip_chars_to (&tool, 0); + if (tool.ident < 0) + tool.ident = 0; + utf8_tool_insert_space (&tool, tool.ident); + + utf8_tool_copy_chars_to (&tool, width); + utf8_tool_insert_space (&tool, width - tool.ident); + + tool.actual[0] = '\0'; + if (tool.compose) + utf8_tool_compose (result, sizeof (result)); + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static const char * +str_utf8_trunc (const char *text, int width) +{ + static char result[MC_MAXPATHLEN * MB_LEN_MAX * 2]; + const struct term_form *pre_form; + struct utf8_tool tool; + + pre_form = str_utf8_make_make_term_form (text, (size_t) (-1)); + + tool.checked = pre_form->text; + tool.actual = result; + tool.remain = sizeof (result); + tool.compose = FALSE; + + if (pre_form->width <= (gsize) width) + utf8_tool_copy_chars_to_end (&tool); + else + { + tool.ident = 0; + utf8_tool_copy_chars_to (&tool, width / 2); + utf8_tool_insert_char (&tool, '~'); + + tool.ident = 0; + utf8_tool_skip_chars_to (&tool, pre_form->width - width + 1); + utf8_tool_copy_chars_to_end (&tool); + } + + tool.actual[0] = '\0'; + if (tool.compose) + utf8_tool_compose (result, sizeof (result)); + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_utf8_offset_to_pos (const char *text, size_t length) +{ + if (str_utf8_is_valid_string (text)) + return g_utf8_offset_to_pointer (text, length) - text; + else + { + int result; + GString *buffer; + + buffer = g_string_new (text); + str_utf8_fix_string (buffer->str); + result = g_utf8_offset_to_pointer (buffer->str, length) - buffer->str; + g_string_free (buffer, TRUE); + return result; + } +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_utf8_column_to_pos (const char *text, size_t pos) +{ + int result = 0; + int width = 0; + + while (text[0] != '\0') + { + gunichar uni; + + uni = g_utf8_get_char_validated (text, MB_LEN_MAX); + if ((uni != (gunichar) (-1)) && (uni != (gunichar) (-2))) + { + if (g_unichar_isprint (uni)) + { + if (!str_unichar_iscombiningmark (uni)) + { + width++; + if (g_unichar_iswide (uni)) + width++; + } + } + else + { + width++; + } + text = g_utf8_next_char (text); + } + else + { + text++; + width++; + } + + if ((gsize) width > pos) + return result; + + result++; + } + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static char * +str_utf8_create_search_needle (const char *needle, gboolean case_sen) +{ + char *fold, *result; + + if (needle == NULL) + return NULL; + + if (case_sen) + return g_utf8_normalize (needle, -1, G_NORMALIZE_ALL); + + fold = g_utf8_casefold (needle, -1); + result = g_utf8_normalize (fold, -1, G_NORMALIZE_ALL); + g_free (fold); + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +str_utf8_release_search_needle (char *needle, gboolean case_sen) +{ + (void) case_sen; + g_free (needle); +} + +/* --------------------------------------------------------------------------------------------- */ + +static const char * +str_utf8_search_first (const char *text, const char *search, gboolean case_sen) +{ + char *fold_text; + char *deco_text; + const char *match; + const char *result = NULL; + const char *m; + + fold_text = case_sen ? (char *) text : g_utf8_casefold (text, -1); + deco_text = g_utf8_normalize (fold_text, -1, G_NORMALIZE_ALL); + + match = deco_text; + do + { + match = g_strstr_len (match, -1, search); + if (match != NULL) + { + if ((!str_utf8_iscombiningmark (match) || (match == deco_text)) && + !str_utf8_iscombiningmark (match + strlen (search))) + { + result = text; + m = deco_text; + while (m < match) + { + str_utf8_cnext_noncomb_char (&m); + str_utf8_cnext_noncomb_char (&result); + } + } + else + str_utf8_cnext_char (&match); + } + } + while (match != NULL && result == NULL); + + g_free (deco_text); + if (!case_sen) + g_free (fold_text); + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static const char * +str_utf8_search_last (const char *text, const char *search, gboolean case_sen) +{ + char *fold_text; + char *deco_text; + char *match; + const char *result = NULL; + const char *m; + + fold_text = case_sen ? (char *) text : g_utf8_casefold (text, -1); + deco_text = g_utf8_normalize (fold_text, -1, G_NORMALIZE_ALL); + + do + { + match = g_strrstr_len (deco_text, -1, search); + if (match != NULL) + { + if ((!str_utf8_iscombiningmark (match) || (match == deco_text)) && + !str_utf8_iscombiningmark (match + strlen (search))) + { + result = text; + m = deco_text; + while (m < match) + { + str_utf8_cnext_noncomb_char (&m); + str_utf8_cnext_noncomb_char (&result); + } + } + else + match[0] = '\0'; + } + } + while (match != NULL && result == NULL); + + g_free (deco_text); + if (!case_sen) + g_free (fold_text); + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static char * +str_utf8_normalize (const char *text) +{ + GString *fixed; + char *tmp; + char *result; + const char *start; + const char *end; + + /* g_utf8_normalize() is a heavyweight function, that converts UTF-8 into UCS-4, + * does the normalization and then converts UCS-4 back into UTF-8. + * Since file names are composed of ASCII characters in most cases, we can speed up + * utf8 normalization by checking if the heavyweight Unicode normalization is actually + * needed. Normalization of ASCII string is no-op. + */ + + /* find out whether text is ASCII only */ + for (end = text; *end != '\0'; end++) + if ((*end & 0x80) != 0) + { + /* found 2nd byte of utf8-encoded symbol */ + break; + } + + /* if text is ASCII-only, return copy, normalize otherwise */ + if (*end == '\0') + return g_strndup (text, end - text); + + fixed = g_string_sized_new (4); + + start = text; + while (!g_utf8_validate (start, -1, &end) && start[0] != '\0') + { + if (start != end) + { + tmp = g_utf8_normalize (start, end - start, G_NORMALIZE_ALL); + g_string_append (fixed, tmp); + g_free (tmp); + } + g_string_append_c (fixed, end[0]); + start = end + 1; + } + + if (start == text) + { + result = g_utf8_normalize (text, -1, G_NORMALIZE_ALL); + g_string_free (fixed, TRUE); + } + else + { + if (start[0] != '\0' && start != end) + { + tmp = g_utf8_normalize (start, end - start, G_NORMALIZE_ALL); + g_string_append (fixed, tmp); + g_free (tmp); + } + result = g_string_free (fixed, FALSE); + } + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static char * +str_utf8_casefold_normalize (const char *text) +{ + GString *fixed; + char *tmp, *fold; + char *result; + const char *start; + const char *end; + + fixed = g_string_sized_new (4); + + start = text; + while (!g_utf8_validate (start, -1, &end) && start[0] != '\0') + { + if (start != end) + { + fold = g_utf8_casefold (start, end - start); + tmp = g_utf8_normalize (fold, -1, G_NORMALIZE_ALL); + g_string_append (fixed, tmp); + g_free (tmp); + g_free (fold); + } + g_string_append_c (fixed, end[0]); + start = end + 1; + } + + if (start == text) + { + fold = g_utf8_casefold (text, -1); + result = g_utf8_normalize (fold, -1, G_NORMALIZE_ALL); + g_free (fold); + g_string_free (fixed, TRUE); + } + else + { + if (start[0] != '\0' && start != end) + { + fold = g_utf8_casefold (start, end - start); + tmp = g_utf8_normalize (fold, -1, G_NORMALIZE_ALL); + g_string_append (fixed, tmp); + g_free (tmp); + g_free (fold); + } + result = g_string_free (fixed, FALSE); + } + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_utf8_compare (const char *t1, const char *t2) +{ + char *n1, *n2; + int result; + + n1 = str_utf8_normalize (t1); + n2 = str_utf8_normalize (t2); + + result = strcmp (n1, n2); + + g_free (n1); + g_free (n2); + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_utf8_ncompare (const char *t1, const char *t2) +{ + char *n1, *n2; + size_t l1, l2; + int result; + + n1 = str_utf8_normalize (t1); + n2 = str_utf8_normalize (t2); + + l1 = strlen (n1); + l2 = strlen (n2); + result = strncmp (n1, n2, MIN (l1, l2)); + + g_free (n1); + g_free (n2); + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_utf8_casecmp (const char *t1, const char *t2) +{ + char *n1, *n2; + int result; + + n1 = str_utf8_casefold_normalize (t1); + n2 = str_utf8_casefold_normalize (t2); + + result = strcmp (n1, n2); + + g_free (n1); + g_free (n2); + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_utf8_ncasecmp (const char *t1, const char *t2) +{ + char *n1, *n2; + size_t l1, l2; + int result; + + n1 = str_utf8_casefold_normalize (t1); + n2 = str_utf8_casefold_normalize (t2); + + l1 = strlen (n1); + l2 = strlen (n2); + result = strncmp (n1, n2, MIN (l1, l2)); + + g_free (n1); + g_free (n2); + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_utf8_prefix (const char *text, const char *prefix) +{ + char *t, *p; + const char *nt, *np; + const char *nnt, *nnp; + int result; + + t = str_utf8_normalize (text); + p = str_utf8_normalize (prefix); + nt = t; + np = p; + nnt = t; + nnp = p; + + while (nt[0] != '\0' && np[0] != '\0') + { + str_utf8_cnext_char_safe (&nnt); + str_utf8_cnext_char_safe (&nnp); + if (nnt - nt != nnp - np) + break; + if (strncmp (nt, np, nnt - nt) != 0) + break; + nt = nnt; + np = nnp; + } + + result = np - p; + + g_free (t); + g_free (p); + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_utf8_caseprefix (const char *text, const char *prefix) +{ + char *t, *p; + const char *nt, *np; + const char *nnt, *nnp; + int result; + + t = str_utf8_casefold_normalize (text); + p = str_utf8_casefold_normalize (prefix); + nt = t; + np = p; + nnt = t; + nnp = p; + + while (nt[0] != '\0' && np[0] != '\0') + { + str_utf8_cnext_char_safe (&nnt); + str_utf8_cnext_char_safe (&nnp); + if (nnt - nt != nnp - np) + break; + if (strncmp (nt, np, nnt - nt) != 0) + break; + nt = nnt; + np = nnp; + } + + result = np - p; + + g_free (t); + g_free (p); + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static char * +str_utf8_create_key_gen (const char *text, gboolean case_sen, + gchar * (*keygen) (const gchar * text, gssize size)) +{ + char *result; + + if (case_sen) + result = str_utf8_normalize (text); + else + { + gboolean dot; + GString *fixed; + const char *start, *end; + char *fold, *key; + + dot = text[0] == '.'; + fixed = g_string_sized_new (16); + + if (!dot) + start = text; + else + { + start = text + 1; + g_string_append_c (fixed, '.'); + } + + while (!g_utf8_validate (start, -1, &end) && start[0] != '\0') + { + if (start != end) + { + fold = g_utf8_casefold (start, end - start); + key = keygen (fold, -1); + g_string_append (fixed, key); + g_free (key); + g_free (fold); + } + g_string_append_c (fixed, end[0]); + start = end + 1; + } + + if (start == text) + { + fold = g_utf8_casefold (start, -1); + result = keygen (fold, -1); + g_free (fold); + g_string_free (fixed, TRUE); + } + else if (dot && (start == text + 1)) + { + fold = g_utf8_casefold (start, -1); + key = keygen (fold, -1); + g_string_append (fixed, key); + g_free (key); + g_free (fold); + result = g_string_free (fixed, FALSE); + } + else + { + if (start[0] != '\0' && start != end) + { + fold = g_utf8_casefold (start, end - start); + key = keygen (fold, -1); + g_string_append (fixed, key); + g_free (key); + g_free (fold); + } + result = g_string_free (fixed, FALSE); + } + } + return result; +} + +/* --------------------------------------------------------------------------------------------- */ + +static char * +str_utf8_create_key (const char *text, gboolean case_sen) +{ + return str_utf8_create_key_gen (text, case_sen, g_utf8_collate_key); +} + +/* --------------------------------------------------------------------------------------------- */ + +#ifdef MC__USE_STR_UTF8_CREATE_KEY_FOR_FILENAME +static char * +str_utf8_create_key_for_filename (const char *text, gboolean case_sen) +{ + return str_utf8_create_key_gen (text, case_sen, g_utf8_collate_key_for_filename); +} +#endif + +/* --------------------------------------------------------------------------------------------- */ + +static int +str_utf8_key_collate (const char *t1, const char *t2, gboolean case_sen) +{ + (void) case_sen; + return strcmp (t1, t2); +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +str_utf8_release_key (char *key, gboolean case_sen) +{ + (void) case_sen; + g_free (key); +} + +/* --------------------------------------------------------------------------------------------- */ +/*** public functions ****************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +struct str_class +str_utf8_init (void) +{ + struct str_class result; + + result.conv_gerror_message = str_utf8_conv_gerror_message; + result.vfs_convert_to = str_utf8_vfs_convert_to; + result.insert_replace_char = str_utf8_insert_replace_char; + result.is_valid_string = str_utf8_is_valid_string; + result.is_valid_char = str_utf8_is_valid_char; + result.cnext_char = str_utf8_cnext_char; + result.cprev_char = str_utf8_cprev_char; + result.cnext_char_safe = str_utf8_cnext_char_safe; + result.cprev_char_safe = str_utf8_cprev_char_safe; + result.cnext_noncomb_char = str_utf8_cnext_noncomb_char; + result.cprev_noncomb_char = str_utf8_cprev_noncomb_char; + result.char_isspace = str_utf8_isspace; + result.char_ispunct = str_utf8_ispunct; + result.char_isalnum = str_utf8_isalnum; + result.char_isdigit = str_utf8_isdigit; + result.char_isprint = str_utf8_isprint; + result.char_iscombiningmark = str_utf8_iscombiningmark; + result.char_toupper = str_utf8_toupper; + result.char_tolower = str_utf8_tolower; + result.length = str_utf8_length; + result.length2 = str_utf8_length2; + result.length_noncomb = str_utf8_length_noncomb; + result.fix_string = str_utf8_fix_string; + result.term_form = str_utf8_term_form; + result.fit_to_term = str_utf8_fit_to_term; + result.term_trim = str_utf8_term_trim; + result.term_width2 = str_utf8_term_width2; + result.term_width1 = str_utf8_term_width1; + result.term_char_width = str_utf8_term_char_width; + result.term_substring = str_utf8_term_substring; + result.trunc = str_utf8_trunc; + result.offset_to_pos = str_utf8_offset_to_pos; + result.column_to_pos = str_utf8_column_to_pos; + result.create_search_needle = str_utf8_create_search_needle; + result.release_search_needle = str_utf8_release_search_needle; + result.search_first = str_utf8_search_first; + result.search_last = str_utf8_search_last; + result.compare = str_utf8_compare; + result.ncompare = str_utf8_ncompare; + result.casecmp = str_utf8_casecmp; + result.ncasecmp = str_utf8_ncasecmp; + result.prefix = str_utf8_prefix; + result.caseprefix = str_utf8_caseprefix; + result.create_key = str_utf8_create_key; +#ifdef MC__USE_STR_UTF8_CREATE_KEY_FOR_FILENAME + /* case insensitive sort files in "a1 a2 a10" order */ + result.create_key_for_filename = str_utf8_create_key_for_filename; +#else + /* case insensitive sort files in "a1 a10 a2" order */ + result.create_key_for_filename = str_utf8_create_key; +#endif + result.key_collate = str_utf8_key_collate; + result.release_key = str_utf8_release_key; + + return result; +} + +/* --------------------------------------------------------------------------------------------- */ diff --git a/lib/strutil/strverscmp.c b/lib/strutil/strverscmp.c new file mode 100644 index 0000000..50f7f56 --- /dev/null +++ b/lib/strutil/strverscmp.c @@ -0,0 +1,158 @@ +/* + Compare strings while treating digits characters numerically. + + Copyright (C) 1997-2022 + Free Software Foundation, Inc. + + This file is part of the GNU C Library. + Contributed by Jean-François Bignolles <bignolle@ecoledoc.ibp.fr>, 1997. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + <https://www.gnu.org/licenses/>. + + This file is part of the Midnight Commander. + + The Midnight Commander is free software: you can redistribute it + and/or modify it under the terms of the GNU General Public License as + published by the Free Software Foundation, either version 3 of the License, + or (at your option) any later version. + + The Midnight Commander is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include <ctype.h> +#ifdef HAVE_STRVERSCMP +#include <string.h> +#endif /* HAVE_STRVERSCMP */ + +#include "lib/strutil.h" + +/*** global variables ****************************************************************************/ + +/*** file scope macro definitions ****************************************************************/ + +#ifndef HAVE_STRVERSCMP + +/* states: S_N: normal, S_I: comparing integral part, S_F: comparing + fractional parts, S_Z: idem but with leading Zeroes only */ +#define S_N 0x0 +#define S_I 0x3 +#define S_F 0x6 +#define S_Z 0x9 + +/* result_type: CMP: return diff; LEN: compare using len_diff/diff */ +#define CMP 2 +#define LEN 3 + +#endif /* HAVE_STRVERSCMP */ + +/*** file scope type declarations ****************************************************************/ + +/*** file scope variables ************************************************************************/ + +/* --------------------------------------------------------------------------------------------- */ +/*** file scope functions ************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +/* --------------------------------------------------------------------------------------------- */ +/*** public functions ****************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ +/* Compare S1 and S2 as strings holding indices/version numbers, + returning less than, equal to or greater than zero if S1 is less than, + equal to or greater than S2 (for more info, see the texinfo doc). + */ +int +str_verscmp (const char *s1, const char *s2) +{ +#ifdef HAVE_STRVERSCMP + return strverscmp (s1, s2); + +#else /* HAVE_STRVERSCMP */ + const unsigned char *p1 = (const unsigned char *) s1; + const unsigned char *p2 = (const unsigned char *) s2; + unsigned char c1, c2; + int state; + int diff; + + /* *INDENT-OFF* */ + /* Symbol(s) 0 [1-9] others + Transition (10) 0 (01) d (00) x */ + static const unsigned char next_state[] = + { + /* state x d 0 */ + /* S_N */ S_N, S_I, S_Z, + /* S_I */ S_N, S_I, S_I, + /* S_F */ S_N, S_F, S_F, + /* S_Z */ S_N, S_F, S_Z + }; + + static const signed char result_type[] = + { + /* state x/x x/d x/0 d/x d/d d/0 0/x 0/d 0/0 */ + + /* S_N */ CMP, CMP, CMP, CMP, LEN, CMP, CMP, CMP, CMP, + /* S_I */ CMP, -1, -1, +1, LEN, LEN, +1, LEN, LEN, + /* S_F */ CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP, CMP, + /* S_Z */ CMP, +1, +1, -1, CMP, CMP, -1, CMP, CMP + }; + /* *INDENT-ON* */ + + if (p1 == p2) + return 0; + + c1 = *p1++; + c2 = *p2++; + /* Hint: '0' is a digit too. */ + state = S_N + ((c1 == '0') + (isdigit (c1) != 0)); + + while ((diff = c1 - c2) == 0) + { + if (c1 == '\0') + return diff; + + state = next_state[state]; + c1 = *p1++; + c2 = *p2++; + state += (c1 == '0') + (isdigit (c1) != 0); + } + + state = result_type[state * 3 + (((c2 == '0') + (isdigit (c2) != 0)))]; + + switch (state) + { + case CMP: + return diff; + + case LEN: + while (isdigit (*p1++)) + if (!isdigit (*p2++)) + return 1; + + return isdigit (*p2) ? -1 : diff; + + default: + return state; + } +#endif /* HAVE_STRVERSCMP */ +} + +/* --------------------------------------------------------------------------------------------- */ diff --git a/lib/strutil/xstrtol.c b/lib/strutil/xstrtol.c new file mode 100644 index 0000000..b755be3 --- /dev/null +++ b/lib/strutil/xstrtol.c @@ -0,0 +1,256 @@ +/* A more useful interface to strtol. + + Copyright (C) 1995-2022 + Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Jim Meyering. */ + +#include <config.h> + +/* Some pre-ANSI implementations (e.g. SunOS 4) + need stderr defined if assertion checking is enabled. */ +#include <stdio.h> + +#include <ctype.h> +#include <errno.h> +#include <inttypes.h> +#include <limits.h> +#include <stdlib.h> +#include <string.h> + +#include "lib/strutil.h" + +/*** global variables ****************************************************************************/ + +/*** file scope macro definitions ****************************************************************/ + +/*** file scope type declarations ****************************************************************/ + +/*** file scope variables ************************************************************************/ + +/* --------------------------------------------------------------------------------------------- */ +/*** file scope functions ************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +static strtol_error_t +bkm_scale (uintmax_t * x, int scale_factor) +{ + if (UINTMAX_MAX / scale_factor < *x) + { + *x = UINTMAX_MAX; + return LONGINT_OVERFLOW; + } + + *x *= scale_factor; + return LONGINT_OK; +} + +/* --------------------------------------------------------------------------------------------- */ + +static strtol_error_t +bkm_scale_by_power (uintmax_t * x, int base, int power) +{ + strtol_error_t err = LONGINT_OK; + while (power-- != 0) + err |= bkm_scale (x, base); + return err; +} + +/* --------------------------------------------------------------------------------------------- */ +/*** public functions ****************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +strtol_error_t +xstrtoumax (const char *s, char **ptr, int base, uintmax_t * val, const char *valid_suffixes) +{ + char *t_ptr; + char **p; + uintmax_t tmp; + strtol_error_t err = LONGINT_OK; + + g_assert (0 <= base && base <= 36); + + p = (ptr != NULL ? ptr : &t_ptr); + + { + const char *q = s; + unsigned char ch = *q; + + while (isspace (ch)) + ch = *++q; + + if (ch == '-') + return LONGINT_INVALID; + } + + errno = 0; + tmp = strtol (s, p, base); + + if (*p == s) + { + /* If there is no number but there is a valid suffix, assume the + number is 1. The string is invalid otherwise. */ + if (valid_suffixes != NULL && **p != '\0' && strchr (valid_suffixes, **p) != NULL) + tmp = 1; + else + return LONGINT_INVALID; + } + else if (errno != 0) + { + if (errno != ERANGE) + return LONGINT_INVALID; + err = LONGINT_OVERFLOW; + } + + /* Let valid_suffixes == NULL mean "allow any suffix". */ + /* FIXME: update all callers except the ones that allow suffixes + after the number, changing last parameter NULL to "". */ + if (valid_suffixes == NULL) + { + *val = tmp; + return err; + } + + if (**p != '\0') + { + int suffixes = 1; + strtol_error_t overflow; + + if (strchr (valid_suffixes, **p) == NULL) + { + *val = tmp; + return err | LONGINT_INVALID_SUFFIX_CHAR; + } + + base = 1024; + + switch (**p) + { + case 'E': + case 'G': + case 'g': + case 'k': + case 'K': + case 'M': + case 'm': + case 'P': + case 'T': + case 't': + case 'Y': + case 'Z': + if (strchr (valid_suffixes, '0') != NULL) + { + /* The "valid suffix" '0' is a special flag meaning that + an optional second suffix is allowed, which can change + the base. A suffix "B" (e.g. "100MB") stands for a power + of 1000, whereas a suffix "iB" (e.g. "100MiB") stands for + a power of 1024. If no suffix (e.g. "100M"), assume + power-of-1024. */ + + switch (p[0][1]) + { + case 'i': + if (p[0][2] == 'B') + suffixes += 2; + break; + + case 'B': + case 'D': /* 'D' is obsolescent */ + base = 1000; + suffixes++; + break; + default: + break; + } + } + break; + default: + break; + } + + switch (**p) + { + case 'b': + overflow = bkm_scale (&tmp, 512); + break; + + case 'B': + /* This obsolescent first suffix is distinct from the 'B' + second suffix above. E.g., 'tar -L 1000B' means change + the tape after writing 1000 KiB of data. */ + overflow = bkm_scale (&tmp, 1024); + break; + + case 'c': + overflow = LONGINT_OK; + break; + + case 'E': /* exa or exbi */ + overflow = bkm_scale_by_power (&tmp, base, 6); + break; + + case 'G': /* giga or gibi */ + case 'g': /* 'g' is undocumented; for compatibility only */ + overflow = bkm_scale_by_power (&tmp, base, 3); + break; + + case 'k': /* kilo */ + case 'K': /* kibi */ + overflow = bkm_scale_by_power (&tmp, base, 1); + break; + + case 'M': /* mega or mebi */ + case 'm': /* 'm' is undocumented; for compatibility only */ + overflow = bkm_scale_by_power (&tmp, base, 2); + break; + + case 'P': /* peta or pebi */ + overflow = bkm_scale_by_power (&tmp, base, 5); + break; + + case 'T': /* tera or tebi */ + case 't': /* 't' is undocumented; for compatibility only */ + overflow = bkm_scale_by_power (&tmp, base, 4); + break; + + case 'w': + overflow = bkm_scale (&tmp, 2); + break; + + case 'Y': /* yotta or 2**80 */ + overflow = bkm_scale_by_power (&tmp, base, 8); + break; + + case 'Z': /* zetta or 2**70 */ + overflow = bkm_scale_by_power (&tmp, base, 7); + break; + + default: + *val = tmp; + return err | LONGINT_INVALID_SUFFIX_CHAR; + } + + err |= overflow; + *p += suffixes; + if (**p != '\0') + err |= LONGINT_INVALID_SUFFIX_CHAR; + } + + *val = tmp; + return err; +} + +/* --------------------------------------------------------------------------------------------- */ |