diff options
Diffstat (limited to '')
-rw-r--r-- | lib/search.h | 196 | ||||
-rw-r--r-- | lib/search/Makefile.am | 12 | ||||
-rw-r--r-- | lib/search/Makefile.in | 759 | ||||
-rw-r--r-- | lib/search/glob.c | 207 | ||||
-rw-r--r-- | lib/search/hex.c | 235 | ||||
-rw-r--r-- | lib/search/internal.h | 86 | ||||
-rw-r--r-- | lib/search/lib.c | 233 | ||||
-rw-r--r-- | lib/search/normal.c | 108 | ||||
-rw-r--r-- | lib/search/regex.c | 1121 | ||||
-rw-r--r-- | lib/search/search.c | 521 |
10 files changed, 3478 insertions, 0 deletions
diff --git a/lib/search.h b/lib/search.h new file mode 100644 index 0000000..07372c2 --- /dev/null +++ b/lib/search.h @@ -0,0 +1,196 @@ +#ifndef MC__SEARCH_H +#define MC__SEARCH_H + +#include <config.h> + +#include "lib/global.h" /* <glib.h> */ + +#include <sys/types.h> + +#ifdef SEARCH_TYPE_PCRE +#ifdef HAVE_PCRE2 +#define PCRE2_CODE_UNIT_WIDTH 8 +#include <pcre2.h> +#else +#include <pcre.h> +#endif +#endif /* SEARCH_TYPE_PCRE */ +/*** typedefs(not structures) and defined constants **********************************************/ + +typedef enum mc_search_cbret_t mc_search_cbret_t; + +typedef mc_search_cbret_t (*mc_search_fn) (const void *user_data, gsize char_offset, + int *current_char); +typedef mc_search_cbret_t (*mc_update_fn) (const void *user_data, gsize char_offset); + +#define MC_SEARCH__NUM_REPLACE_ARGS 64 + +#ifdef SEARCH_TYPE_GLIB +#define mc_search_matchinfo_t GMatchInfo +#else +#ifdef HAVE_PCRE2 +/* no pcre_extra in PCRE2. pcre2_jit_compile (equivalent of pcre_study) handles + * all of this internally. but we can use this to hold the pcre2_matches data + * until the search is complete */ +#define mc_search_matchinfo_t pcre2_match_data +#else +#define mc_search_matchinfo_t pcre_extra +#endif +#endif + +/*** enums ***************************************************************************************/ + +typedef enum +{ + MC_SEARCH_E_OK = 0, + MC_SEARCH_E_INPUT, + MC_SEARCH_E_REGEX_COMPILE, + MC_SEARCH_E_REGEX, + MC_SEARCH_E_REGEX_REPLACE, + MC_SEARCH_E_NOTFOUND, + MC_SEARCH_E_ABORT +} mc_search_error_t; + +typedef enum +{ + MC_SEARCH_T_INVALID = -1, + MC_SEARCH_T_NORMAL, + MC_SEARCH_T_REGEX, + MC_SEARCH_T_HEX, + MC_SEARCH_T_GLOB +} mc_search_type_t; + +enum mc_search_cbret_t +{ + MC_SEARCH_CB_OK = 0, + MC_SEARCH_CB_INVALID = -1, + MC_SEARCH_CB_ABORT = -2, + MC_SEARCH_CB_SKIP = -3, + MC_SEARCH_CB_NOTFOUND = -4 +}; + +/*** structures declarations (and typedefs of structures)*****************************************/ + +typedef struct mc_search_struct +{ + /* public input data */ + +#ifdef HAVE_CHARSET + /* search in all charsets */ + gboolean is_all_charsets; +#endif + + /* case sensitive search */ + gboolean is_case_sensitive; + + /* search only once. Is this for replace? */ + gboolean is_once_only; + + /* search only whole words (from begin to end). Used only with NORMAL search type */ + gboolean whole_words; + + /* search entire string (from begin to end). Used only with GLOB search type */ + gboolean is_entire_line; + + /* function, used for getting data. NULL if not used */ + mc_search_fn search_fn; + + /* function, used for updatin current search status. NULL if not used */ + mc_update_fn update_fn; + + /* type of search */ + mc_search_type_t search_type; + + /* public output data */ + + /* some data for normal */ + off_t normal_offset; + + off_t start_buffer; + /* some data for regexp */ + int num_results; + gboolean is_utf8; + mc_search_matchinfo_t *regex_match_info; + GString *regex_buffer; +#ifdef SEARCH_TYPE_PCRE +#ifdef HAVE_PCRE2 + /* pcre2 will provide a pointer to a match_data structure that can be manipulated like an iovector */ + size_t *iovector; +#else + int iovector[MC_SEARCH__NUM_REPLACE_ARGS * 2]; +#endif +#endif /* SEARCH_TYPE_PCRE */ + + /* private data */ + + struct + { + GPtrArray *conditions; + gboolean result; + } prepared; + + /* original search string */ + struct + { + GString *str; +#ifdef HAVE_CHARSET + gchar *charset; +#endif + } original; + + /* error code after search */ + mc_search_error_t error; + gchar *error_str; +} mc_search_t; + +typedef struct mc_search_type_str_struct +{ + const char *str; + mc_search_type_t type; +} mc_search_type_str_t; + +/*** global variables defined in .c file *********************************************************/ + +/* Error messages */ +extern const char *STR_E_NOTFOUND; +extern const char *STR_E_UNKNOWN_TYPE; +extern const char *STR_E_RPL_NOT_EQ_TO_FOUND; +extern const char *STR_E_RPL_INVALID_TOKEN; + +/*** declarations of public functions ************************************************************/ + +mc_search_t *mc_search_new (const gchar * original, const gchar * original_charset); + +mc_search_t *mc_search_new_len (const gchar * original, gsize original_len, + const gchar * original_charset); + +void mc_search_free (mc_search_t * lc_mc_search); + +gboolean mc_search_prepare (mc_search_t * mc_search); + +gboolean mc_search_run (mc_search_t * mc_search, const void *user_data, gsize start_search, + gsize end_search, gsize * found_len); + +gboolean mc_search_is_type_avail (mc_search_type_t search_type); + +const mc_search_type_str_t *mc_search_types_list_get (size_t * num); + +GString *mc_search_prepare_replace_str (mc_search_t * mc_search, GString * replace_str); +char *mc_search_prepare_replace_str2 (mc_search_t * lc_mc_search, const char *replace_str); + +gboolean mc_search_is_fixed_search_str (const mc_search_t * lc_mc_search); + +gchar **mc_search_get_types_strings_array (size_t * num); + +gboolean mc_search (const gchar * pattern, const gchar * pattern_charset, const gchar * str, + mc_search_type_t type); + +int mc_search_getstart_result_by_num (mc_search_t * lc_mc_search, int lc_index); +int mc_search_getend_result_by_num (mc_search_t * lc_mc_search, int lc_index); + +/* *INDENT-OFF* */ +void mc_search_set_error (mc_search_t * lc_mc_search, mc_search_error_t code, const gchar * format, ...) + G_GNUC_PRINTF (3, 4); +/* *INDENT-ON* */ + +#endif /* MC__SEARCH_H */ diff --git a/lib/search/Makefile.am b/lib/search/Makefile.am new file mode 100644 index 0000000..48774a5 --- /dev/null +++ b/lib/search/Makefile.am @@ -0,0 +1,12 @@ +noinst_LTLIBRARIES = libsearch.la + +libsearch_la_SOURCES = \ + search.c \ + internal.h \ + lib.c \ + normal.c \ + regex.c \ + glob.c \ + hex.c + +AM_CPPFLAGS = -I$(top_srcdir) $(GLIB_CFLAGS) diff --git a/lib/search/Makefile.in b/lib/search/Makefile.in new file mode 100644 index 0000000..b587d13 --- /dev/null +++ b/lib/search/Makefile.in @@ -0,0 +1,759 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = lib/search +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/gettext.m4 \ + $(top_srcdir)/m4/iconv.m4 $(top_srcdir)/m4/intlmacosx.m4 \ + $(top_srcdir)/m4/lib-ld.m4 $(top_srcdir)/m4/lib-link.m4 \ + $(top_srcdir)/m4/lib-prefix.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/longlong.m4 $(top_srcdir)/m4/ltoptions.m4 \ + $(top_srcdir)/m4/ltsugar.m4 $(top_srcdir)/m4/ltversion.m4 \ + $(top_srcdir)/m4/lt~obsolete.m4 $(top_srcdir)/m4/nls.m4 \ + $(top_srcdir)/m4/po.m4 $(top_srcdir)/m4/progtest.m4 \ + $(top_srcdir)/acinclude.m4 \ + $(top_srcdir)/m4.include/gnulib/mode_t.m4 \ + $(top_srcdir)/m4.include/gnulib/stat-size.m4 \ + $(top_srcdir)/m4.include/gnulib/fstypename.m4 \ + $(top_srcdir)/m4.include/gnulib/fsusage.m4 \ + $(top_srcdir)/m4.include/gnulib/mountlist.m4 \ + $(top_srcdir)/m4.include/gnulib/windows-stat-inodes.m4 \ + $(top_srcdir)/m4.include/gnulib/sys_types_h.m4 \ + $(top_srcdir)/m4.include/ax_path_lib_pcre.m4 \ + $(top_srcdir)/m4.include/ax_check_pcre2.m4 \ + $(top_srcdir)/m4.include/dx_doxygen.m4 \ + $(top_srcdir)/m4.include/ax_require_defined.m4 \ + $(top_srcdir)/m4.include/ax_check_compile_flag.m4 \ + $(top_srcdir)/m4.include/ax_append_flag.m4 \ + $(top_srcdir)/m4.include/ax_append_compile_flags.m4 \ + $(top_srcdir)/m4.include/mc-cflags.m4 \ + $(top_srcdir)/m4.include/ax_gcc_func_attribute.m4 \ + $(top_srcdir)/m4.include/mc-check-search-type.m4 \ + $(top_srcdir)/m4.include/mc-get-fs-info.m4 \ + $(top_srcdir)/m4.include/mc-with-x.m4 \ + $(top_srcdir)/m4.include/mc-use-termcap.m4 \ + $(top_srcdir)/m4.include/mc-with-screen.m4 \ + $(top_srcdir)/m4.include/mc-with-screen-ncurses.m4 \ + $(top_srcdir)/m4.include/mc-with-screen-slang.m4 \ + $(top_srcdir)/m4.include/mc-with-internal-edit.m4 \ + $(top_srcdir)/m4.include/mc-subshell.m4 \ + $(top_srcdir)/m4.include/mc-background.m4 \ + $(top_srcdir)/m4.include/mc-ext2fs-attr.m4 \ + $(top_srcdir)/m4.include/mc-glib.m4 \ + $(top_srcdir)/m4.include/mc-vfs.m4 \ + $(top_srcdir)/m4.include/vfs/rpc.m4 \ + $(top_srcdir)/m4.include/vfs/socket.m4 \ + $(top_srcdir)/m4.include/vfs/mc-vfs-extfs.m4 \ + $(top_srcdir)/m4.include/vfs/mc-vfs-sfs.m4 \ + $(top_srcdir)/m4.include/vfs/mc-vfs-ftp.m4 \ + $(top_srcdir)/m4.include/vfs/mc-vfs-sftp.m4 \ + $(top_srcdir)/m4.include/vfs/mc-vfs-fish.m4 \ + $(top_srcdir)/m4.include/vfs/mc-vfs-undelfs.m4 \ + $(top_srcdir)/m4.include/vfs/mc-vfs-tarfs.m4 \ + $(top_srcdir)/m4.include/vfs/mc-vfs-cpiofs.m4 \ + $(top_srcdir)/m4.include/mc-version.m4 \ + $(top_srcdir)/m4.include/mc-tests.m4 \ + $(top_srcdir)/m4.include/mc-i18n.m4 \ + $(top_srcdir)/m4.include/mc-assert.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +libsearch_la_LIBADD = +am_libsearch_la_OBJECTS = search.lo lib.lo normal.lo regex.lo glob.lo \ + hex.lo +libsearch_la_OBJECTS = $(am_libsearch_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/config/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/glob.Plo ./$(DEPDIR)/hex.Plo \ + ./$(DEPDIR)/lib.Plo ./$(DEPDIR)/normal.Plo \ + ./$(DEPDIR)/regex.Plo ./$(DEPDIR)/search.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libsearch_la_SOURCES) +DIST_SOURCES = $(libsearch_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/config/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CHECK_CFLAGS = @CHECK_CFLAGS@ +CHECK_LIBS = @CHECK_LIBS@ +COM_ERR_CFLAGS = @COM_ERR_CFLAGS@ +COM_ERR_LIBS = @COM_ERR_LIBS@ +CP1251 = @CP1251@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DOC_LINGUAS = @DOC_LINGUAS@ +DOXYGEN_PAPER_SIZE = @DOXYGEN_PAPER_SIZE@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +DX_CONFIG = @DX_CONFIG@ +DX_DOCDIR = @DX_DOCDIR@ +DX_DOT = @DX_DOT@ +DX_DOXYGEN = @DX_DOXYGEN@ +DX_DVIPS = @DX_DVIPS@ +DX_EGREP = @DX_EGREP@ +DX_ENV = @DX_ENV@ +DX_FLAG_chi = @DX_FLAG_chi@ +DX_FLAG_chm = @DX_FLAG_chm@ +DX_FLAG_doc = @DX_FLAG_doc@ +DX_FLAG_dot = @DX_FLAG_dot@ +DX_FLAG_html = @DX_FLAG_html@ +DX_FLAG_man = @DX_FLAG_man@ +DX_FLAG_pdf = @DX_FLAG_pdf@ +DX_FLAG_ps = @DX_FLAG_ps@ +DX_FLAG_rtf = @DX_FLAG_rtf@ +DX_FLAG_xml = @DX_FLAG_xml@ +DX_HHC = @DX_HHC@ +DX_LATEX = @DX_LATEX@ +DX_MAKEINDEX = @DX_MAKEINDEX@ +DX_PDFLATEX = @DX_PDFLATEX@ +DX_PERL = @DX_PERL@ +DX_PROJECT = @DX_PROJECT@ +E2P_CFLAGS = @E2P_CFLAGS@ +E2P_LIBS = @E2P_LIBS@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +EXT2FS_CFLAGS = @EXT2FS_CFLAGS@ +EXT2FS_LIBS = @EXT2FS_LIBS@ +EXTHELPERSDIR = @EXTHELPERSDIR@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +GETTEXT_MACRO_VERSION = @GETTEXT_MACRO_VERSION@ +GLIB_CFLAGS = @GLIB_CFLAGS@ +GLIB_LIBS = @GLIB_LIBS@ +GMODULE_CFLAGS = @GMODULE_CFLAGS@ +GMODULE_LIBS = @GMODULE_LIBS@ +GMSGFMT = @GMSGFMT@ +GMSGFMT_015 = @GMSGFMT_015@ +GREP = @GREP@ +HAVE_FILECMD = @HAVE_FILECMD@ +HAVE_ZIPINFO = @HAVE_ZIPINFO@ +HAVE_nroff = @HAVE_nroff@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +INTLLIBS = @INTLLIBS@ +INTL_MACOSX_LIBS = @INTL_MACOSX_LIBS@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LIBICONV = @LIBICONV@ +LIBINTL = @LIBINTL@ +LIBMC_RELEASE = @LIBMC_RELEASE@ +LIBMC_VERSION = @LIBMC_VERSION@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBSSH_CFLAGS = @LIBSSH_CFLAGS@ +LIBSSH_LIBS = @LIBSSH_LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBICONV = @LTLIBICONV@ +LTLIBINTL = @LTLIBINTL@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANDOC = @MANDOC@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MAN_DATE = @MAN_DATE@ +MAN_FLAGS = @MAN_FLAGS@ +MAN_VERSION = @MAN_VERSION@ +MCLIBS = @MCLIBS@ +MKDIR_P = @MKDIR_P@ +MSGFMT = @MSGFMT@ +MSGFMT_015 = @MSGFMT_015@ +MSGMERGE = @MSGMERGE@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PCRE_CFLAGS = @PCRE_CFLAGS@ +PCRE_LIBS = @PCRE_LIBS@ +PERL = @PERL@ +PERL_FOR_BUILD = @PERL_FOR_BUILD@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +POSUB = @POSUB@ +PYTHON = @PYTHON@ +RANLIB = @RANLIB@ +RUBY = @RUBY@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +SLANG_CFLAGS = @SLANG_CFLAGS@ +SLANG_LIBS = @SLANG_LIBS@ +STRIP = @STRIP@ +TESTS_LDFLAGS = @TESTS_LDFLAGS@ +UNZIP = @UNZIP@ +USE_NLS = @USE_NLS@ +VERSION = @VERSION@ +X11_WWW = @X11_WWW@ +XGETTEXT = @XGETTEXT@ +XGETTEXT_015 = @XGETTEXT_015@ +XGETTEXT_EXTRA_OPTIONS = @XGETTEXT_EXTRA_OPTIONS@ +XMKMF = @XMKMF@ +X_CFLAGS = @X_CFLAGS@ +X_EXTRA_LIBS = @X_EXTRA_LIBS@ +X_LIBS = @X_LIBS@ +X_PRE_LIBS = @X_PRE_LIBS@ +ZIP = @ZIP@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +noinst_LTLIBRARIES = libsearch.la +libsearch_la_SOURCES = \ + search.c \ + internal.h \ + lib.c \ + normal.c \ + regex.c \ + glob.c \ + hex.c + +AM_CPPFLAGS = -I$(top_srcdir) $(GLIB_CFLAGS) +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu lib/search/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu lib/search/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libsearch.la: $(libsearch_la_OBJECTS) $(libsearch_la_DEPENDENCIES) $(EXTRA_libsearch_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(libsearch_la_OBJECTS) $(libsearch_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/glob.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hex.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lib.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/normal.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/regex.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/search.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/glob.Plo + -rm -f ./$(DEPDIR)/hex.Plo + -rm -f ./$(DEPDIR)/lib.Plo + -rm -f ./$(DEPDIR)/normal.Plo + -rm -f ./$(DEPDIR)/regex.Plo + -rm -f ./$(DEPDIR)/search.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/glob.Plo + -rm -f ./$(DEPDIR)/hex.Plo + -rm -f ./$(DEPDIR)/lib.Plo + -rm -f ./$(DEPDIR)/normal.Plo + -rm -f ./$(DEPDIR)/regex.Plo + -rm -f ./$(DEPDIR)/search.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-generic clean-libtool clean-noinstLTLIBRARIES \ + cscopelist-am ctags ctags-am distclean distclean-compile \ + distclean-generic distclean-libtool distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + +.PRECIOUS: Makefile + + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/lib/search/glob.c b/lib/search/glob.c new file mode 100644 index 0000000..5874aba --- /dev/null +++ b/lib/search/glob.c @@ -0,0 +1,207 @@ +/* + Search text engine. + Glob-style pattern matching + + Copyright (C) 2009-2023 + Free Software Foundation, Inc. + + Written by: + Slava Zanko <slavazanko@gmail.com>, 2009. + + This file is part of the Midnight Commander. + + The Midnight Commander is free software: you can redistribute it + and/or modify it under the terms of the GNU General Public License as + published by the Free Software Foundation, either version 3 of the License, + or (at your option) any later version. + + The Midnight Commander is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include "lib/global.h" +#include "lib/strutil.h" +#include "lib/search.h" +#include "lib/strescape.h" + +#include "internal.h" + +/*** global variables ****************************************************************************/ + +/*** file scope macro definitions ****************************************************************/ + +/*** file scope type declarations ****************************************************************/ + +/*** forward declarations (file scope functions) *************************************************/ + +/*** file scope variables ************************************************************************/ + +/* --------------------------------------------------------------------------------------------- */ +/*** file scope functions ************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +static GString * +mc_search__glob_translate_to_regex (const GString * astr) +{ + const char *str = astr->str; + GString *buff; + gsize loop; + gboolean inside_group = FALSE; + + buff = g_string_sized_new (32); + + for (loop = 0; loop < astr->len; loop++) + { + switch (str[loop]) + { + case '*': + if (!strutils_is_char_escaped (str, &(str[loop]))) + { + g_string_append (buff, inside_group ? ".*" : "(.*)"); + continue; + } + break; + case '?': + if (!strutils_is_char_escaped (str, &(str[loop]))) + { + g_string_append (buff, inside_group ? "." : "(.)"); + continue; + } + break; + case ',': + if (!strutils_is_char_escaped (str, &(str[loop]))) + { + g_string_append_c (buff, inside_group ? '|' : ','); + continue; + } + break; + case '{': + if (!strutils_is_char_escaped (str, &(str[loop]))) + { + g_string_append_c (buff, '('); + inside_group = TRUE; + continue; + } + break; + case '}': + if (!strutils_is_char_escaped (str, &(str[loop]))) + { + g_string_append_c (buff, ')'); + inside_group = FALSE; + continue; + } + break; + case '+': + case '.': + case '$': + case '(': + case ')': + case '^': + g_string_append_c (buff, '\\'); + break; + default: + break; + } + g_string_append_c (buff, str[loop]); + } + return buff; +} + +/* --------------------------------------------------------------------------------------------- */ + +static GString * +mc_search__translate_replace_glob_to_regex (const char *str) +{ + GString *buff; + char cnt = '0'; + gboolean escaped_mode = FALSE; + + buff = g_string_sized_new (32); + + while (*str != '\0') + { + char c = *str++; + + switch (c) + { + case '\\': + if (!escaped_mode) + { + escaped_mode = TRUE; + g_string_append_c (buff, '\\'); + continue; + } + break; + case '*': + case '?': + if (!escaped_mode) + { + g_string_append_c (buff, '\\'); + c = ++cnt; + } + break; + case '&': + if (!escaped_mode) + g_string_append_c (buff, '\\'); + break; + default: + break; + } + g_string_append_c (buff, c); + escaped_mode = FALSE; + } + return buff; +} + +/*** public functions ****************************************************************************/ + +void +mc_search__cond_struct_new_init_glob (const char *charset, mc_search_t * lc_mc_search, + mc_search_cond_t * mc_search_cond) +{ + GString *tmp; + + tmp = mc_search__glob_translate_to_regex (mc_search_cond->str); + g_string_free (mc_search_cond->str, TRUE); + + if (lc_mc_search->is_entire_line) + { + g_string_prepend_c (tmp, '^'); + g_string_append_c (tmp, '$'); + } + mc_search_cond->str = tmp; + + mc_search__cond_struct_new_init_regex (charset, lc_mc_search, mc_search_cond); +} + +/* --------------------------------------------------------------------------------------------- */ + +gboolean +mc_search__run_glob (mc_search_t * lc_mc_search, const void *user_data, + gsize start_search, gsize end_search, gsize * found_len) +{ + return mc_search__run_regex (lc_mc_search, user_data, start_search, end_search, found_len); +} + +/* --------------------------------------------------------------------------------------------- */ + +GString * +mc_search_glob_prepare_replace_str (mc_search_t * lc_mc_search, GString * replace_str) +{ + GString *repl, *res; + + repl = mc_search__translate_replace_glob_to_regex (replace_str->str); + res = mc_search_regex_prepare_replace_str (lc_mc_search, repl); + g_string_free (repl, TRUE); + + return res; +} + +/* --------------------------------------------------------------------------------------------- */ diff --git a/lib/search/hex.c b/lib/search/hex.c new file mode 100644 index 0000000..50af6fb --- /dev/null +++ b/lib/search/hex.c @@ -0,0 +1,235 @@ +/* + Search text engine. + HEX-style pattern matching + + Copyright (C) 2009-2023 + Free Software Foundation, Inc. + + Written by: + Slava Zanko <slavazanko@gmail.com>, 2009. + + This file is part of the Midnight Commander. + + The Midnight Commander is free software: you can redistribute it + and/or modify it under the terms of the GNU General Public License as + published by the Free Software Foundation, either version 3 of the License, + or (at your option) any later version. + + The Midnight Commander is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include <stdio.h> + +#include "lib/global.h" +#include "lib/strutil.h" +#include "lib/search.h" +#include "lib/strescape.h" + +#include "internal.h" + +/*** global variables ****************************************************************************/ + +/*** file scope macro definitions ****************************************************************/ + +typedef enum +{ + MC_SEARCH_HEX_E_OK, + MC_SEARCH_HEX_E_NUM_OUT_OF_RANGE, + MC_SEARCH_HEX_E_INVALID_CHARACTER, + MC_SEARCH_HEX_E_UNMATCHED_QUOTES +} mc_search_hex_parse_error_t; + +/*** file scope type declarations ****************************************************************/ + +/*** forward declarations (file scope functions) *************************************************/ + +/*** file scope variables ************************************************************************/ + +/* --------------------------------------------------------------------------------------------- */ +/*** file scope functions ************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +static GString * +mc_search__hex_translate_to_regex (const GString * astr, mc_search_hex_parse_error_t * error_ptr, + int *error_pos_ptr) +{ + GString *buff; + const char *str; + gsize str_len; + gsize loop = 0; + mc_search_hex_parse_error_t error = MC_SEARCH_HEX_E_OK; + + buff = g_string_sized_new (64); + str = astr->str; + str_len = astr->len; + + while (loop < str_len && error == MC_SEARCH_HEX_E_OK) + { + unsigned int val; + int ptr; + + if (g_ascii_isspace (str[loop])) + { + /* Eat-up whitespace between tokens. */ + while (g_ascii_isspace (str[loop])) + loop++; + } + /* cppcheck-suppress invalidscanf */ + else if (sscanf (str + loop, "%x%n", &val, &ptr) == 1) + { + if (val > 255) + error = MC_SEARCH_HEX_E_NUM_OUT_OF_RANGE; + else + { + g_string_append_printf (buff, "\\x%02X", val); + loop += ptr; + } + } + else if (str[loop] == '"') + { + gsize loop2; + + loop2 = loop + 1; + + while (loop2 < str_len) + { + if (str[loop2] == '"') + break; + if (str[loop2] == '\\' && loop2 + 1 < str_len) + loop2++; + g_string_append_c (buff, str[loop2]); + loop2++; + } + + if (str[loop2] == '\0') + error = MC_SEARCH_HEX_E_UNMATCHED_QUOTES; + else + loop = loop2 + 1; + } + else + error = MC_SEARCH_HEX_E_INVALID_CHARACTER; + } + + if (error != MC_SEARCH_HEX_E_OK) + { + g_string_free (buff, TRUE); + if (error_ptr != NULL) + *error_ptr = error; + if (error_pos_ptr != NULL) + *error_pos_ptr = loop; + return NULL; + } + + return buff; +} + +/* --------------------------------------------------------------------------------------------- */ +/*** public functions ****************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +void +mc_search__cond_struct_new_init_hex (const char *charset, mc_search_t * lc_mc_search, + mc_search_cond_t * mc_search_cond) +{ + GString *tmp; + mc_search_hex_parse_error_t error = MC_SEARCH_HEX_E_OK; + int error_pos = 0; + + /* + * We may be searching in binary data, which is often invalid UTF-8. + * + * We have to create a non UTF-8 regex (that is, G_REGEX_RAW) or else, as + * the data is invalid UTF-8, both GLib's PCRE and our + * mc_search__g_regex_match_full_safe() are going to fail us. The former by + * not finding all bytes, the latter by overwriting the supposedly invalid + * UTF-8 with NULs. + * + * To do this, we specify "ASCII" as the charset. + * + * In fact, we can specify any charset other than "UTF-8": any such charset + * will trigger G_REGEX_RAW (see [1]). The output of [2] will be the same + * for all charsets because it skips the \xXX symbols + * mc_search__hex_translate_to_regex() outputs. + * + * But "ASCII" is the best choice because a hex pattern may contain a + * quoted string: this way we know [2] will ignore any characters outside + * ASCII letters range (these ignored chars will be copied verbatim to the + * output and will match as-is; in other words, in a case-sensitive manner; + * If the user is interested in case-insensitive searches of international + * text, he shouldn't be using hex search in the first place.) + * + * Switching out of UTF-8 has another advantage: + * + * When doing case-insensitive searches, GLib treats \xXX symbols as normal + * letters and therefore matches both "a" and "A" for the hex pattern + * "0x61". When we switch out of UTF-8, we're switching to using [2], which + * doesn't have this issue. + * + * [1] mc_search__cond_struct_new_init_regex + * [2] mc_search__cond_struct_new_regex_ci_str + */ + if (str_isutf8 (charset)) + charset = "ASCII"; + + tmp = mc_search__hex_translate_to_regex (mc_search_cond->str, &error, &error_pos); + if (tmp != NULL) + { + g_string_free (mc_search_cond->str, TRUE); + mc_search_cond->str = tmp; + mc_search__cond_struct_new_init_regex (charset, lc_mc_search, mc_search_cond); + } + else + { + const char *desc; + + switch (error) + { + case MC_SEARCH_HEX_E_NUM_OUT_OF_RANGE: + desc = + _ + ("Number out of range (should be in byte range, 0 <= n <= 0xFF, expressed in hex)"); + break; + case MC_SEARCH_HEX_E_INVALID_CHARACTER: + desc = _("Invalid character"); + break; + case MC_SEARCH_HEX_E_UNMATCHED_QUOTES: + desc = _("Unmatched quotes character"); + break; + default: + desc = ""; + } + + lc_mc_search->error = MC_SEARCH_E_INPUT; + lc_mc_search->error_str = + g_strdup_printf (_("Hex pattern error at position %d:\n%s."), error_pos + 1, desc); + } +} + +/* --------------------------------------------------------------------------------------------- */ + +gboolean +mc_search__run_hex (mc_search_t * lc_mc_search, const void *user_data, + gsize start_search, gsize end_search, gsize * found_len) +{ + return mc_search__run_regex (lc_mc_search, user_data, start_search, end_search, found_len); +} + +/* --------------------------------------------------------------------------------------------- */ + +GString * +mc_search_hex_prepare_replace_str (mc_search_t * lc_mc_search, GString * replace_str) +{ + (void) lc_mc_search; + + return mc_g_string_dup (replace_str); +} + +/* --------------------------------------------------------------------------------------------- */ diff --git a/lib/search/internal.h b/lib/search/internal.h new file mode 100644 index 0000000..08cb019 --- /dev/null +++ b/lib/search/internal.h @@ -0,0 +1,86 @@ +#ifndef MC__SEARCH_INTERNAL_H +#define MC__SEARCH_INTERNAL_H + +/*** typedefs(not structures) and defined constants **********************************************/ + +#ifdef SEARCH_TYPE_GLIB +#define mc_search_regex_t GRegex +#else +#ifdef HAVE_PCRE2 +#define mc_search_regex_t pcre2_code +#else +#define mc_search_regex_t pcre +#endif +#endif + +/*** enums ***************************************************************************************/ + +typedef enum +{ + COND__NOT_FOUND, + COND__NOT_ALL_FOUND, + COND__FOUND_CHAR, + COND__FOUND_CHAR_LAST, + COND__FOUND_OK, + COND__FOUND_ERROR +} mc_search__found_cond_t; + +/*** structures declarations (and typedefs of structures)*****************************************/ + +typedef struct mc_search_cond_struct +{ + GString *str; + GString *upper; + GString *lower; + mc_search_regex_t *regex_handle; + gchar *charset; +} mc_search_cond_t; + +/*** global variables defined in .c file *********************************************************/ + +/*** declarations of public functions ************************************************************/ + +/* search/lib.c : */ + +GString *mc_search__recode_str (const char *str, gsize str_len, const char *charset_from, + const char *charset_to); +GString *mc_search__get_one_symbol (const char *charset, const char *str, gsize str_len, + gboolean * just_letters); +GString *mc_search__tolower_case_str (const char *charset, const GString * str); +GString *mc_search__toupper_case_str (const char *charset, const GString * str); + +/* search/regex.c : */ + +void mc_search__cond_struct_new_init_regex (const char *charset, mc_search_t * lc_mc_search, + mc_search_cond_t * mc_search_cond); +gboolean mc_search__run_regex (mc_search_t * lc_mc_search, const void *user_data, + gsize start_search, gsize end_search, gsize * found_len); +GString *mc_search_regex_prepare_replace_str (mc_search_t * lc_mc_search, GString * replace_str); + +/* search/normal.c : */ + +void mc_search__cond_struct_new_init_normal (const char *charset, mc_search_t * lc_mc_search, + mc_search_cond_t * mc_search_cond); +gboolean mc_search__run_normal (mc_search_t * lc_mc_search, const void *user_data, + gsize start_search, gsize end_search, gsize * found_len); +GString *mc_search_normal_prepare_replace_str (mc_search_t * lc_mc_search, GString * replace_str); + +/* search/glob.c : */ + +void mc_search__cond_struct_new_init_glob (const char *charset, mc_search_t * lc_mc_search, + mc_search_cond_t * mc_search_cond); +gboolean mc_search__run_glob (mc_search_t * lc_mc_search, const void *user_data, + gsize start_search, gsize end_search, gsize * found_len); +GString *mc_search_glob_prepare_replace_str (mc_search_t * lc_mc_search, GString * replace_str); + +/* search/hex.c : */ + +void mc_search__cond_struct_new_init_hex (const char *charset, mc_search_t * lc_mc_search, + mc_search_cond_t * mc_search_cond); +gboolean mc_search__run_hex (mc_search_t * lc_mc_search, const void *user_data, + gsize start_search, gsize end_search, gsize * found_len); +GString *mc_search_hex_prepare_replace_str (mc_search_t * lc_mc_search, GString * replace_str); + +/*** inline functions ****************************************************************************/ + +#endif /* MC__SEARCH_INTERNAL_H */ diff --git a/lib/search/lib.c b/lib/search/lib.c new file mode 100644 index 0000000..2c22504 --- /dev/null +++ b/lib/search/lib.c @@ -0,0 +1,233 @@ +/* + Search text engine. + Common share code for module. + + Copyright (C) 2009-2023 + Free Software Foundation, Inc. + + Written by: + Slava Zanko <slavazanko@gmail.com>, 2009, 2011 + Andrew Borodin <aborodin@vmail.ru>, 2013 + + This file is part of the Midnight Commander. + + The Midnight Commander is free software: you can redistribute it + and/or modify it under the terms of the GNU General Public License as + published by the Free Software Foundation, either version 3 of the License, + or (at your option) any later version. + + The Midnight Commander is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include <stdlib.h> +#include <sys/types.h> + +#include "lib/global.h" +#include "lib/strutil.h" +#include "lib/search.h" +#ifdef HAVE_CHARSET +#include "lib/charsets.h" +#endif + +#include "internal.h" + +/*** global variables ****************************************************************************/ + +/* *INDENT-OFF* */ +const char *STR_E_NOTFOUND = N_("Search string not found"); +const char *STR_E_UNKNOWN_TYPE = N_("Not implemented yet"); +const char *STR_E_RPL_NOT_EQ_TO_FOUND = + N_("Num of replace tokens not equal to num of found tokens"); +const char *STR_E_RPL_INVALID_TOKEN = N_("Invalid token number %d"); +/* *INDENT-ON* */ + +/*** file scope macro definitions ****************************************************************/ + +/*** file scope type declarations ****************************************************************/ + +typedef gboolean (*case_conv_fn) (const char *ch, char **out, size_t * remain); + +/*** forward declarations (file scope functions) *************************************************/ + +/*** file scope variables ************************************************************************/ + +/* --------------------------------------------------------------------------------------------- */ +/*** file scope functions ************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +static GString * +mc_search__change_case_str (const char *charset, const GString * str, case_conv_fn case_conv) +{ + GString *ret; + const char *src_ptr; + gchar *dst_str; + gchar *dst_ptr; + gsize dst_len; +#ifdef HAVE_CHARSET + GString *converted_str; + + if (charset == NULL) + charset = cp_source; + + converted_str = mc_search__recode_str (str->str, str->len, charset, cp_display); + + dst_len = converted_str->len + 1; /* +1 is required for str_toupper/str_tolower */ + dst_str = g_malloc (dst_len); + + for (src_ptr = converted_str->str, dst_ptr = dst_str; + case_conv (src_ptr, &dst_ptr, &dst_len); src_ptr += str_length_char (src_ptr)) + ; + *dst_ptr = '\0'; + + dst_len = converted_str->len; + g_string_free (converted_str, TRUE); + + ret = mc_search__recode_str (dst_str, dst_len, cp_display, charset); + g_free (dst_str); +#else + (void) charset; + + dst_len = str->len + 1; /* +1 is required for str_toupper/str_tolower */ + dst_str = g_malloc (dst_len); + + for (src_ptr = str->str, dst_ptr = dst_str; + case_conv (src_ptr, &dst_ptr, &dst_len); src_ptr += str_length_char (src_ptr)) + ; + *dst_ptr = '\0'; + + ret = g_string_new_len (dst_str, dst_len); + g_free (dst_str); +#endif + return ret; +} + +/* --------------------------------------------------------------------------------------------- */ +/*** public functions ****************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +GString * +mc_search__recode_str (const char *str, gsize str_len, const char *charset_from, + const char *charset_to) +{ + GString *ret = NULL; + + if (charset_from != NULL && charset_to != NULL + && g_ascii_strcasecmp (charset_to, charset_from) != 0) + { + GIConv conv; + + conv = g_iconv_open (charset_to, charset_from); + if (conv != INVALID_CONV) + { + gchar *val; + gsize bytes_read = 0; + gsize bytes_written = 0; + + val = g_convert_with_iconv (str, str_len, conv, &bytes_read, &bytes_written, NULL); + + g_iconv_close (conv); + + if (val != NULL) + { + ret = g_string_new_len (val, bytes_written); + g_free (val); + } + } + } + + if (ret == NULL) + ret = g_string_new_len (str, str_len); + + return ret; +} + +/* --------------------------------------------------------------------------------------------- */ + +GString * +mc_search__get_one_symbol (const char *charset, const char *str, gsize str_len, + gboolean * just_letters) +{ + GString *converted_str; + const gchar *next_char; + +#ifdef HAVE_CHARSET + GString *converted_str2; + + if (charset == NULL) + charset = cp_source; + + converted_str = mc_search__recode_str (str, str_len, charset, cp_display); +#else + (void) charset; + + converted_str = g_string_new_len (str, str_len); +#endif + + next_char = str_cget_next_char (converted_str->str); + g_string_set_size (converted_str, (gsize) (next_char - converted_str->str)); + +#ifdef HAVE_CHARSET + converted_str2 = + mc_search__recode_str (converted_str->str, converted_str->len, cp_display, charset); +#endif + if (just_letters != NULL) + *just_letters = str_isalnum (converted_str->str) && !str_isdigit (converted_str->str); +#ifdef HAVE_CHARSET + g_string_free (converted_str, TRUE); + return converted_str2; +#else + return converted_str; +#endif +} + +/* --------------------------------------------------------------------------------------------- */ + +GString * +mc_search__tolower_case_str (const char *charset, const GString * str) +{ + return mc_search__change_case_str (charset, str, str_tolower); +} + +/* --------------------------------------------------------------------------------------------- */ + +GString * +mc_search__toupper_case_str (const char *charset, const GString * str) +{ + return mc_search__change_case_str (charset, str, str_toupper); +} + +/* --------------------------------------------------------------------------------------------- */ + +gchar ** +mc_search_get_types_strings_array (size_t * num) +{ + gchar **ret; + int lc_index; + size_t n; + + const mc_search_type_str_t *type_str; + const mc_search_type_str_t *types_str = mc_search_types_list_get (&n); + + ret = g_try_new0 (char *, n + 1); + if (ret == NULL) + return NULL; + + for (lc_index = 0, type_str = types_str; type_str->str != NULL; type_str++, lc_index++) + ret[lc_index] = g_strdup (type_str->str); + + /* don't count last NULL item */ + if (num != NULL) + *num = (size_t) lc_index; + + return ret; +} + +/* --------------------------------------------------------------------------------------------- */ diff --git a/lib/search/normal.c b/lib/search/normal.c new file mode 100644 index 0000000..9042bfc --- /dev/null +++ b/lib/search/normal.c @@ -0,0 +1,108 @@ +/* + Search text engine. + Plain search + + Copyright (C) 2009-2023 + Free Software Foundation, Inc. + + Written by: + Slava Zanko <slavazanko@gmail.com>, 2009. + + This file is part of the Midnight Commander. + + The Midnight Commander is free software: you can redistribute it + and/or modify it under the terms of the GNU General Public License as + published by the Free Software Foundation, either version 3 of the License, + or (at your option) any later version. + + The Midnight Commander is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include "lib/global.h" +#include "lib/strutil.h" +#include "lib/search.h" + +#include "internal.h" + +/*** global variables ****************************************************************************/ + +/*** file scope macro definitions ****************************************************************/ + +/*** file scope type declarations ****************************************************************/ + +/*** forward declarations (file scope functions) *************************************************/ + +/*** file scope variables ************************************************************************/ + +/* --------------------------------------------------------------------------------------------- */ +/*** file scope functions ************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +static void +mc_search__normal_translate_to_regex (GString * str) +{ + gsize loop; + + for (loop = 0; loop < str->len; loop++) + switch (str->str[loop]) + { + case '*': + case '?': + case ',': + case '{': + case '}': + case '[': + case ']': + case '\\': + case '+': + case '.': + case '$': + case '(': + case ')': + case '^': + case '-': + case '|': + g_string_insert_c (str, loop, '\\'); + loop++; + default: + break; + } +} + +/* --------------------------------------------------------------------------------------------- */ +/*** public functions ****************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +void +mc_search__cond_struct_new_init_normal (const char *charset, mc_search_t * lc_mc_search, + mc_search_cond_t * mc_search_cond) +{ + mc_search__normal_translate_to_regex (mc_search_cond->str); + mc_search__cond_struct_new_init_regex (charset, lc_mc_search, mc_search_cond); +} + +/* --------------------------------------------------------------------------------------------- */ + +gboolean +mc_search__run_normal (mc_search_t * lc_mc_search, const void *user_data, + gsize start_search, gsize end_search, gsize * found_len) +{ + return mc_search__run_regex (lc_mc_search, user_data, start_search, end_search, found_len); +} + +/* --------------------------------------------------------------------------------------------- */ +GString * +mc_search_normal_prepare_replace_str (mc_search_t * lc_mc_search, GString * replace_str) +{ + (void) lc_mc_search; + + return mc_g_string_dup (replace_str); +} diff --git a/lib/search/regex.c b/lib/search/regex.c new file mode 100644 index 0000000..d24cf48 --- /dev/null +++ b/lib/search/regex.c @@ -0,0 +1,1121 @@ +/* + Search text engine. + Regex search + + Copyright (C) 2009-2023 + Free Software Foundation, Inc. + + Written by: + Slava Zanko <slavazanko@gmail.com>, 2009, 2010, 2011, 2013 + Vitaliy Filippov <vitalif@yourcmc.ru>, 2011 + Andrew Borodin <aborodin@vmail.ru>, 2013-2015 + + This file is part of the Midnight Commander. + + The Midnight Commander is free software: you can redistribute it + and/or modify it under the terms of the GNU General Public License as + published by the Free Software Foundation, either version 3 of the License, + or (at your option) any later version. + + The Midnight Commander is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include <stdlib.h> + +#include "lib/global.h" +#include "lib/strutil.h" +#include "lib/search.h" +#include "lib/strescape.h" +#include "lib/util.h" /* MC_PTR_FREE */ + +#include "internal.h" + +/*** global variables ****************************************************************************/ + +/*** file scope macro definitions ****************************************************************/ + +#define REPLACE_PREPARE_T_NOTHING_SPECIAL -1 +#define REPLACE_PREPARE_T_REPLACE_FLAG -2 +#define REPLACE_PREPARE_T_ESCAPE_SEQ -3 + +/*** file scope type declarations ****************************************************************/ + +typedef enum +{ + REPLACE_T_NO_TRANSFORM = 0, + REPLACE_T_UPP_TRANSFORM_CHAR = 1, + REPLACE_T_LOW_TRANSFORM_CHAR = 2, + REPLACE_T_UPP_TRANSFORM = 4, + REPLACE_T_LOW_TRANSFORM = 8 +} replace_transform_type_t; + +/*** forward declarations (file scope functions) *************************************************/ + +/*** file scope variables ************************************************************************/ + +/* --------------------------------------------------------------------------------------------- */ +/*** file scope functions ************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +mc_search__regex_str_append_if_special (GString * copy_to, const GString * regex_str, + gsize * offset) +{ + const char *special_chars[] = { + "\\s", "\\S", + "\\d", "\\D", + "\\b", "\\B", + "\\w", "\\W", + "\\t", "\\n", + "\\r", "\\f", + "\\a", "\\e", + "\\x", "\\X", + "\\c", "\\C", + "\\l", "\\L", + "\\u", "\\U", + "\\E", "\\Q", + NULL + }; + + char *tmp_regex_str; + const char **spec_chr; + + tmp_regex_str = &(regex_str->str[*offset]); + + for (spec_chr = special_chars; *spec_chr != NULL; spec_chr++) + { + gsize spec_chr_len; + + spec_chr_len = strlen (*spec_chr); + + if (strncmp (tmp_regex_str, *spec_chr, spec_chr_len) == 0 + && !strutils_is_char_escaped (regex_str->str, tmp_regex_str)) + { + if (strncmp ("\\x", *spec_chr, spec_chr_len) == 0) + { + if (tmp_regex_str[spec_chr_len] != '{') + spec_chr_len += 2; + else + { + while ((spec_chr_len < regex_str->len - *offset) + && tmp_regex_str[spec_chr_len] != '}') + spec_chr_len++; + if (tmp_regex_str[spec_chr_len] == '}') + spec_chr_len++; + } + } + g_string_append_len (copy_to, tmp_regex_str, spec_chr_len); + *offset += spec_chr_len; + return TRUE; + } + } + + return FALSE; +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +mc_search__cond_struct_new_regex_hex_add (const char *charset, GString * str_to, + const GString * one_char) +{ + GString *upp, *low; + gsize loop; + + upp = mc_search__toupper_case_str (charset, one_char); + low = mc_search__tolower_case_str (charset, one_char); + + for (loop = 0; loop < upp->len; loop++) + { + gchar tmp_str[10 + 1]; /* longest content is "[\\x%02X\\x%02X]" */ + gint tmp_len; + + if (loop >= low->len || upp->str[loop] == low->str[loop]) + tmp_len = + g_snprintf (tmp_str, sizeof (tmp_str), "\\x%02X", (unsigned char) upp->str[loop]); + else + tmp_len = + g_snprintf (tmp_str, sizeof (tmp_str), "[\\x%02X\\x%02X]", + (unsigned char) upp->str[loop], (unsigned char) low->str[loop]); + + g_string_append_len (str_to, tmp_str, tmp_len); + } + + g_string_free (upp, TRUE); + g_string_free (low, TRUE); +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +mc_search__cond_struct_new_regex_accum_append (const char *charset, GString * str_to, + GString * str_from) +{ + GString *recoded_part; + gsize loop = 0; + + recoded_part = g_string_sized_new (32); + + while (loop < str_from->len) + { + GString *one_char; + gboolean just_letters; + + one_char = + mc_search__get_one_symbol (charset, str_from->str + loop, + MIN (str_from->len - loop, 6), &just_letters); + + if (one_char->len == 0) + loop++; + else + { + loop += one_char->len; + + if (just_letters) + mc_search__cond_struct_new_regex_hex_add (charset, recoded_part, one_char); + else + g_string_append_len (recoded_part, one_char->str, one_char->len); + } + + g_string_free (one_char, TRUE); + } + + g_string_append_len (str_to, recoded_part->str, recoded_part->len); + g_string_free (recoded_part, TRUE); + g_string_set_size (str_from, 0); +} + +/* --------------------------------------------------------------------------------------------- */ + +/** + * Creates a case-insensitive version of a regex pattern. + * + * For example (assuming ASCII charset): given "\\bHello!\\xAB", returns + * "\\b[Hh][Ee][Ll][Ll][Oo]!\\xAB" (this example is for easier reading; in + * reality hex codes are used instead of letters). + * + * This function knows not to ruin special regex symbols. + * + * This function is used when working with non-UTF-8 charsets: GLib's + * regex engine doesn't understand such charsets and therefore can't do + * this job itself. + */ +static GString * +mc_search__cond_struct_new_regex_ci_str (const char *charset, const GString * astr) +{ + GString *accumulator, *spec_char, *ret_str; + gsize loop; + + ret_str = g_string_sized_new (64); + accumulator = g_string_sized_new (64); + spec_char = g_string_sized_new (64); + loop = 0; + + while (loop < astr->len) + { + if (mc_search__regex_str_append_if_special (spec_char, astr, &loop)) + { + mc_search__cond_struct_new_regex_accum_append (charset, ret_str, accumulator); + g_string_append_len (ret_str, spec_char->str, spec_char->len); + g_string_set_size (spec_char, 0); + continue; + } + + if (astr->str[loop] == '[' && !strutils_is_char_escaped (astr->str, &(astr->str[loop]))) + { + mc_search__cond_struct_new_regex_accum_append (charset, ret_str, accumulator); + + while (loop < astr->len && !(astr->str[loop] == ']' + && !strutils_is_char_escaped (astr->str, + &(astr->str[loop])))) + { + g_string_append_c (ret_str, astr->str[loop]); + loop++; + } + + g_string_append_c (ret_str, astr->str[loop]); + loop++; + continue; + } + /* + TODO: handle [ and ] + */ + g_string_append_c (accumulator, astr->str[loop]); + loop++; + } + mc_search__cond_struct_new_regex_accum_append (charset, ret_str, accumulator); + + g_string_free (accumulator, TRUE); + g_string_free (spec_char, TRUE); + + return ret_str; +} + +/* --------------------------------------------------------------------------------------------- */ + +#ifdef SEARCH_TYPE_GLIB +/* A thin wrapper above g_regex_match_full that makes sure the string passed + * to it is valid UTF-8 (unless G_REGEX_RAW compile flag was set), as it is a + * requirement by glib and it might crash otherwise. See: mc ticket 3449. + * Be careful: there might be embedded NULs in the strings. */ +static gboolean +mc_search__g_regex_match_full_safe (const GRegex * regex, + const gchar * string, + gssize string_len, + gint start_position, + GRegexMatchFlags match_options, + GMatchInfo ** match_info, GError ** error) +{ + char *string_safe, *p, *end; + gboolean ret; + + if (string_len < 0) + string_len = strlen (string); + + if ((g_regex_get_compile_flags (regex) & G_REGEX_RAW) + || g_utf8_validate (string, string_len, NULL)) + { + return g_regex_match_full (regex, string, string_len, start_position, match_options, + match_info, error); + } + + /* Correctly handle embedded NULs while copying */ + p = string_safe = g_malloc (string_len + 1); + memcpy (string_safe, string, string_len); + string_safe[string_len] = '\0'; + end = p + string_len; + + while (p < end) + { + gunichar c = g_utf8_get_char_validated (p, -1); + if (c != (gunichar) (-1) && c != (gunichar) (-2)) + { + p = g_utf8_next_char (p); + } + else + { + /* U+FFFD would be the proper choice, but then we'd have to + maintain mapping between old and new offsets. + So rather do a byte by byte replacement. */ + *p++ = '\0'; + } + } + + ret = + g_regex_match_full (regex, string_safe, string_len, start_position, match_options, + match_info, error); + g_free (string_safe); + return ret; +} +#endif /* SEARCH_TYPE_GLIB */ + +/* --------------------------------------------------------------------------------------------- */ + +static mc_search__found_cond_t +mc_search__regex_found_cond_one (mc_search_t * lc_mc_search, mc_search_regex_t * regex, + GString * search_str) +{ +#ifdef SEARCH_TYPE_GLIB + GError *mcerror = NULL; + + if (!mc_search__g_regex_match_full_safe + (regex, search_str->str, search_str->len, 0, G_REGEX_MATCH_NEWLINE_ANY, + &lc_mc_search->regex_match_info, &mcerror)) + { + g_match_info_free (lc_mc_search->regex_match_info); + lc_mc_search->regex_match_info = NULL; + if (mcerror != NULL) + { + lc_mc_search->error = MC_SEARCH_E_REGEX; + g_free (lc_mc_search->error_str); + lc_mc_search->error_str = + str_conv_gerror_message (mcerror, _("Regular expression error")); + g_error_free (mcerror); + return COND__FOUND_ERROR; + } + return COND__NOT_FOUND; + } + lc_mc_search->num_results = g_match_info_get_match_count (lc_mc_search->regex_match_info); +#else /* SEARCH_TYPE_GLIB */ + + lc_mc_search->num_results = +#ifdef HAVE_PCRE2 + pcre2_match (regex, (unsigned char *) search_str->str, search_str->len, 0, 0, + lc_mc_search->regex_match_info, NULL); +#else + pcre_exec (regex, lc_mc_search->regex_match_info, search_str->str, search_str->len, 0, 0, + lc_mc_search->iovector, MC_SEARCH__NUM_REPLACE_ARGS); +#endif + if (lc_mc_search->num_results < 0) + { + return COND__NOT_FOUND; + } +#endif /* SEARCH_TYPE_GLIB */ + return COND__FOUND_OK; + +} + +/* --------------------------------------------------------------------------------------------- */ + +static mc_search__found_cond_t +mc_search__regex_found_cond (mc_search_t * lc_mc_search, GString * search_str) +{ + gsize loop1; + + for (loop1 = 0; loop1 < lc_mc_search->prepared.conditions->len; loop1++) + { + mc_search_cond_t *mc_search_cond; + mc_search__found_cond_t ret; + + mc_search_cond = + (mc_search_cond_t *) g_ptr_array_index (lc_mc_search->prepared.conditions, loop1); + + if (!mc_search_cond->regex_handle) + continue; + + ret = + mc_search__regex_found_cond_one (lc_mc_search, mc_search_cond->regex_handle, + search_str); + if (ret != COND__NOT_FOUND) + return ret; + } + return COND__NOT_ALL_FOUND; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +mc_search_regex__get_max_num_of_replace_tokens (const gchar * str, gsize len) +{ + int max_token = 0; + gsize loop; + for (loop = 0; loop < len - 1; loop++) + { + if (str[loop] == '\\' && g_ascii_isdigit (str[loop + 1])) + { + if (strutils_is_char_escaped (str, &str[loop])) + continue; + if (max_token < str[loop + 1] - '0') + max_token = str[loop + 1] - '0'; + continue; + } + if (str[loop] == '$' && str[loop + 1] == '{') + { + gsize tmp_len; + + if (strutils_is_char_escaped (str, &str[loop])) + continue; + + for (tmp_len = 0; + loop + tmp_len + 2 < len && (str[loop + 2 + tmp_len] & (char) 0xf0) == 0x30; + tmp_len++); + + if (str[loop + 2 + tmp_len] == '}') + { + int tmp_token; + char *tmp_str; + + tmp_str = g_strndup (&str[loop + 2], tmp_len); + tmp_token = atoi (tmp_str); + if (max_token < tmp_token) + max_token = tmp_token; + g_free (tmp_str); + } + } + } + return max_token; +} + +/* --------------------------------------------------------------------------------------------- */ + +static char * +mc_search_regex__get_token_by_num (const mc_search_t * lc_mc_search, gsize lc_index) +{ + int fnd_start = 0, fnd_end = 0; + +#ifdef SEARCH_TYPE_GLIB + g_match_info_fetch_pos (lc_mc_search->regex_match_info, lc_index, &fnd_start, &fnd_end); +#else /* SEARCH_TYPE_GLIB */ + fnd_start = lc_mc_search->iovector[lc_index * 2 + 0]; + fnd_end = lc_mc_search->iovector[lc_index * 2 + 1]; +#endif /* SEARCH_TYPE_GLIB */ + + if (fnd_end == fnd_start) + return g_strdup (""); + + return g_strndup (lc_mc_search->regex_buffer->str + fnd_start, fnd_end - fnd_start); + +} + +/* --------------------------------------------------------------------------------------------- */ + +static gboolean +mc_search_regex__replace_handle_esc_seq (const GString * replace_str, const gsize current_pos, + gsize * skip_len, int *ret) +{ + char *curr_str = &(replace_str->str[current_pos]); + char c = curr_str[1]; + + if (replace_str->len > current_pos + 2) + { + if (c == '{') + { + for (*skip_len = 2; /* \{ */ + current_pos + *skip_len < replace_str->len && curr_str[*skip_len] >= '0' + && curr_str[*skip_len] <= '7'; (*skip_len)++) + ; + + if (current_pos + *skip_len < replace_str->len && curr_str[*skip_len] == '}') + { + (*skip_len)++; + *ret = REPLACE_PREPARE_T_ESCAPE_SEQ; + return FALSE; + } + else + { + *ret = REPLACE_PREPARE_T_NOTHING_SPECIAL; + return TRUE; + } + } + + if (c == 'x') + { + *skip_len = 2; /* \x */ + c = curr_str[2]; + if (c == '{') + { + for (*skip_len = 3; /* \x{ */ + current_pos + *skip_len < replace_str->len + && g_ascii_isxdigit ((guchar) curr_str[*skip_len]); (*skip_len)++) + ; + + if (current_pos + *skip_len < replace_str->len && curr_str[*skip_len] == '}') + { + (*skip_len)++; + *ret = REPLACE_PREPARE_T_ESCAPE_SEQ; + return FALSE; + } + else + { + *ret = REPLACE_PREPARE_T_NOTHING_SPECIAL; + return TRUE; + } + } + else if (!g_ascii_isxdigit ((guchar) c)) + { + *skip_len = 2; /* \x without number behind */ + *ret = REPLACE_PREPARE_T_NOTHING_SPECIAL; + return FALSE; + } + else + { + c = curr_str[3]; + if (!g_ascii_isxdigit ((guchar) c)) + *skip_len = 3; /* \xH */ + else + *skip_len = 4; /* \xHH */ + *ret = REPLACE_PREPARE_T_ESCAPE_SEQ; + return FALSE; + } + } + } + + if (strchr ("ntvbrfa", c) != NULL) + { + *skip_len = 2; + *ret = REPLACE_PREPARE_T_ESCAPE_SEQ; + return FALSE; + } + return TRUE; +} + +/* --------------------------------------------------------------------------------------------- */ + +static int +mc_search_regex__process_replace_str (const GString * replace_str, const gsize current_pos, + gsize * skip_len, replace_transform_type_t * replace_flags) +{ + int ret = -1; + const char *curr_str = &(replace_str->str[current_pos]); + + if (current_pos > replace_str->len) + return REPLACE_PREPARE_T_NOTHING_SPECIAL; + + *skip_len = 0; + + if (replace_str->len > current_pos + 2 && curr_str[0] == '$' && curr_str[1] == '{' + && (curr_str[2] & (char) 0xf0) == 0x30) + { + char *tmp_str; + + if (strutils_is_char_escaped (replace_str->str, curr_str)) + { + *skip_len = 1; + return REPLACE_PREPARE_T_NOTHING_SPECIAL; + } + + for (*skip_len = 0; + current_pos + *skip_len + 2 < replace_str->len + && (curr_str[2 + *skip_len] & (char) 0xf0) == 0x30; (*skip_len)++) + ; + + if (curr_str[2 + *skip_len] != '}') + return REPLACE_PREPARE_T_NOTHING_SPECIAL; + + tmp_str = g_strndup (curr_str + 2, *skip_len); + if (tmp_str == NULL) + return REPLACE_PREPARE_T_NOTHING_SPECIAL; + + ret = atoi (tmp_str); + g_free (tmp_str); + + *skip_len += 3; /* ${} */ + return ret; /* capture buffer index >= 0 */ + } + + if (curr_str[0] == '\\' && replace_str->len > current_pos + 1) + { + if (strutils_is_char_escaped (replace_str->str, curr_str)) + { + *skip_len = 1; + return REPLACE_PREPARE_T_NOTHING_SPECIAL; + } + + if (g_ascii_isdigit (curr_str[1])) + { + ret = g_ascii_digit_value (curr_str[1]); /* capture buffer index >= 0 */ + *skip_len = 2; /* \\ and one digit */ + return ret; + } + + if (!mc_search_regex__replace_handle_esc_seq (replace_str, current_pos, skip_len, &ret)) + return ret; + + ret = REPLACE_PREPARE_T_REPLACE_FLAG; + *skip_len += 2; + + switch (curr_str[1]) + { + case 'U': + *replace_flags |= REPLACE_T_UPP_TRANSFORM; + *replace_flags &= ~REPLACE_T_LOW_TRANSFORM; + break; + case 'u': + *replace_flags |= REPLACE_T_UPP_TRANSFORM_CHAR; + break; + case 'L': + *replace_flags |= REPLACE_T_LOW_TRANSFORM; + *replace_flags &= ~REPLACE_T_UPP_TRANSFORM; + break; + case 'l': + *replace_flags |= REPLACE_T_LOW_TRANSFORM_CHAR; + break; + case 'E': + *replace_flags = REPLACE_T_NO_TRANSFORM; + break; + default: + ret = REPLACE_PREPARE_T_NOTHING_SPECIAL; + break; + } + } + return ret; +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +mc_search_regex__process_append_str (GString * dest_str, const char *from, gsize len, + replace_transform_type_t * replace_flags) +{ + gsize loop; + gsize char_len; + + if (len == (gsize) (-1)) + len = strlen (from); + + if (*replace_flags == REPLACE_T_NO_TRANSFORM) + { + g_string_append_len (dest_str, from, len); + return; + } + + for (loop = 0; loop < len; loop += char_len) + { + GString *tmp_string = NULL; + GString *s; + + s = mc_search__get_one_symbol (NULL, from + loop, len - loop, NULL); + char_len = s->len; + + if ((*replace_flags & REPLACE_T_UPP_TRANSFORM_CHAR) != 0) + { + *replace_flags &= ~REPLACE_T_UPP_TRANSFORM_CHAR; + tmp_string = mc_search__toupper_case_str (NULL, s); + g_string_append_len (dest_str, tmp_string->str, tmp_string->len); + } + else if ((*replace_flags & REPLACE_T_LOW_TRANSFORM_CHAR) != 0) + { + *replace_flags &= ~REPLACE_T_LOW_TRANSFORM_CHAR; + tmp_string = mc_search__tolower_case_str (NULL, s); + g_string_append_len (dest_str, tmp_string->str, tmp_string->len); + } + else if ((*replace_flags & REPLACE_T_UPP_TRANSFORM) != 0) + { + tmp_string = mc_search__toupper_case_str (NULL, s); + g_string_append_len (dest_str, tmp_string->str, tmp_string->len); + } + else if ((*replace_flags & REPLACE_T_LOW_TRANSFORM) != 0) + { + tmp_string = mc_search__tolower_case_str (NULL, s); + g_string_append_len (dest_str, tmp_string->str, tmp_string->len); + } + + g_string_free (s, TRUE); + if (tmp_string != NULL) + g_string_free (tmp_string, TRUE); + } +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +mc_search_regex__process_escape_sequence (GString * dest_str, const char *from, gsize len, + replace_transform_type_t * replace_flags, + gboolean is_utf8) +{ + gsize i = 0; + unsigned int c = 0; + char b; + + if (len == (gsize) (-1)) + len = strlen (from); + if (len == 0) + return; + + if (from[i] == '{') + i++; + if (i >= len) + return; + + if (from[i] == 'x') + { + i++; + if (i < len && from[i] == '{') + i++; + for (; i < len; i++) + { + if (from[i] >= '0' && from[i] <= '9') + c = c * 16 + from[i] - '0'; + else if (from[i] >= 'a' && from[i] <= 'f') + c = c * 16 + 10 + from[i] - 'a'; + else if (from[i] >= 'A' && from[i] <= 'F') + c = c * 16 + 10 + from[i] - 'A'; + else + break; + } + } + else if (from[i] >= '0' && from[i] <= '7') + for (; i < len && from[i] >= '0' && from[i] <= '7'; i++) + c = c * 8 + from[i] - '0'; + else + { + switch (from[i]) + { + case 'n': + c = '\n'; + break; + case 't': + c = '\t'; + break; + case 'v': + c = '\v'; + break; + case 'b': + c = '\b'; + break; + case 'r': + c = '\r'; + break; + case 'f': + c = '\f'; + break; + case 'a': + c = '\a'; + break; + default: + mc_search_regex__process_append_str (dest_str, from, len, replace_flags); + return; + } + } + + if (c < 0x80 || !is_utf8) + g_string_append_c (dest_str, (char) c); + else if (c < 0x800) + { + b = 0xC0 | (c >> 6); + g_string_append_c (dest_str, b); + b = 0x80 | (c & 0x3F); + g_string_append_c (dest_str, b); + } + else if (c < 0x10000) + { + b = 0xE0 | (c >> 12); + g_string_append_c (dest_str, b); + b = 0x80 | ((c >> 6) & 0x3F); + g_string_append_c (dest_str, b); + b = 0x80 | (c & 0x3F); + g_string_append_c (dest_str, b); + } + else if (c < 0x10FFFF) + { + b = 0xF0 | (c >> 16); + g_string_append_c (dest_str, b); + b = 0x80 | ((c >> 12) & 0x3F); + g_string_append_c (dest_str, b); + b = 0x80 | ((c >> 6) & 0x3F); + g_string_append_c (dest_str, b); + b = 0x80 | (c & 0x3F); + g_string_append_c (dest_str, b); + } +} + +/* --------------------------------------------------------------------------------------------- */ +/*** public functions ****************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +void +mc_search__cond_struct_new_init_regex (const char *charset, mc_search_t * lc_mc_search, + mc_search_cond_t * mc_search_cond) +{ + if (lc_mc_search->whole_words && !lc_mc_search->is_entire_line) + { + /* NOTE: \b as word boundary doesn't allow search + * whole words with non-ASCII symbols. + * Update: Is it still true nowadays? Probably not. #2396, #3524 */ + g_string_prepend (mc_search_cond->str, "(?<![\\p{L}\\p{N}_])"); + g_string_append (mc_search_cond->str, "(?![\\p{L}\\p{N}_])"); + } + + { +#ifdef SEARCH_TYPE_GLIB + GError *mcerror = NULL; + GRegexCompileFlags g_regex_options = G_REGEX_OPTIMIZE | G_REGEX_DOTALL; + + if (str_isutf8 (charset) && mc_global.utf8_display) + { + if (!lc_mc_search->is_case_sensitive) + g_regex_options |= G_REGEX_CASELESS; + } + else + { + g_regex_options |= G_REGEX_RAW; + + if (!lc_mc_search->is_case_sensitive) + { + GString *tmp; + + tmp = mc_search_cond->str; + mc_search_cond->str = mc_search__cond_struct_new_regex_ci_str (charset, tmp); + g_string_free (tmp, TRUE); + } + } + + mc_search_cond->regex_handle = + g_regex_new (mc_search_cond->str->str, g_regex_options, 0, &mcerror); + + if (mcerror != NULL) + { + lc_mc_search->error = MC_SEARCH_E_REGEX_COMPILE; + g_free (lc_mc_search->error_str); + lc_mc_search->error_str = + str_conv_gerror_message (mcerror, _("Regular expression error")); + g_error_free (mcerror); + return; + } +#else /* SEARCH_TYPE_GLIB */ + +#ifdef HAVE_PCRE2 + int errcode; + char error[BUF_SMALL]; + size_t erroffset; + int pcre_options = PCRE2_MULTILINE; +#else + const char *error; + int erroffset; + int pcre_options = PCRE_EXTRA | PCRE_MULTILINE; +#endif + + if (str_isutf8 (charset) && mc_global.utf8_display) + { +#ifdef HAVE_PCRE2 + pcre_options |= PCRE2_UTF; + if (!lc_mc_search->is_case_sensitive) + pcre_options |= PCRE2_CASELESS; +#else + pcre_options |= PCRE_UTF8; + if (!lc_mc_search->is_case_sensitive) + pcre_options |= PCRE_CASELESS; +#endif + } + else if (!lc_mc_search->is_case_sensitive) + { + GString *tmp; + + tmp = mc_search_cond->str; + mc_search_cond->str = mc_search__cond_struct_new_regex_ci_str (charset, tmp); + g_string_free (tmp, TRUE); + } + + mc_search_cond->regex_handle = +#ifdef HAVE_PCRE2 + pcre2_compile ((unsigned char *) mc_search_cond->str->str, PCRE2_ZERO_TERMINATED, + pcre_options, &errcode, &erroffset, NULL); +#else + pcre_compile (mc_search_cond->str->str, pcre_options, &error, &erroffset, NULL); +#endif + if (mc_search_cond->regex_handle == NULL) + { +#ifdef HAVE_PCRE2 + pcre2_get_error_message (errcode, (unsigned char *) error, sizeof (error)); +#endif + mc_search_set_error (lc_mc_search, MC_SEARCH_E_REGEX_COMPILE, "%s", error); + return; + } +#ifdef HAVE_PCRE2 + if (pcre2_jit_compile (mc_search_cond->regex_handle, PCRE2_JIT_COMPLETE) && *error != '\0') +#else + lc_mc_search->regex_match_info = pcre_study (mc_search_cond->regex_handle, 0, &error); + if (lc_mc_search->regex_match_info == NULL && error != NULL) +#endif + { + mc_search_set_error (lc_mc_search, MC_SEARCH_E_REGEX_COMPILE, "%s", error); + MC_PTR_FREE (mc_search_cond->regex_handle); + return; + } +#endif /* SEARCH_TYPE_GLIB */ + } + + lc_mc_search->is_utf8 = str_isutf8 (charset); +} + +/* --------------------------------------------------------------------------------------------- */ + +gboolean +mc_search__run_regex (mc_search_t * lc_mc_search, const void *user_data, + gsize start_search, gsize end_search, gsize * found_len) +{ + mc_search_cbret_t ret = MC_SEARCH_CB_NOTFOUND; + gsize current_pos, virtual_pos; + gint start_pos; + gint end_pos; + + if (lc_mc_search->regex_buffer != NULL) + g_string_set_size (lc_mc_search->regex_buffer, 0); + else + lc_mc_search->regex_buffer = g_string_sized_new (64); + + virtual_pos = current_pos = start_search; + while (virtual_pos <= end_search) + { + g_string_set_size (lc_mc_search->regex_buffer, 0); + lc_mc_search->start_buffer = current_pos; + + if (lc_mc_search->search_fn != NULL) + { + while (TRUE) + { + int current_chr = '\n'; /* stop search symbol */ + + ret = lc_mc_search->search_fn (user_data, current_pos, ¤t_chr); + + if (ret == MC_SEARCH_CB_ABORT) + break; + + if (ret == MC_SEARCH_CB_INVALID) + continue; + + current_pos++; + + if (ret == MC_SEARCH_CB_SKIP) + continue; + + virtual_pos++; + + g_string_append_c (lc_mc_search->regex_buffer, (char) current_chr); + + if ((char) current_chr == '\n' || virtual_pos > end_search) + break; + } + } + else + { + /* optimization for standard case (for search from file manager) + * where there is no MC_SEARCH_CB_INVALID or MC_SEARCH_CB_SKIP + * return codes, so we can copy line at regex buffer all at once + */ + while (TRUE) + { + const char current_chr = ((const char *) user_data)[current_pos]; + + if (current_chr == '\0') + break; + + current_pos++; + + if (current_chr == '\n' || current_pos > end_search) + break; + } + + /* use virtual_pos as index of start of current chunk */ + g_string_append_len (lc_mc_search->regex_buffer, (const char *) user_data + virtual_pos, + current_pos - virtual_pos); + virtual_pos = current_pos; + } + + switch (mc_search__regex_found_cond (lc_mc_search, lc_mc_search->regex_buffer)) + { + case COND__FOUND_OK: +#ifdef SEARCH_TYPE_GLIB + g_match_info_fetch_pos (lc_mc_search->regex_match_info, 0, &start_pos, &end_pos); +#else /* SEARCH_TYPE_GLIB */ + start_pos = lc_mc_search->iovector[0]; + end_pos = lc_mc_search->iovector[1]; +#endif /* SEARCH_TYPE_GLIB */ + if (found_len != NULL) + *found_len = end_pos - start_pos; + lc_mc_search->normal_offset = lc_mc_search->start_buffer + start_pos; + return TRUE; + case COND__NOT_ALL_FOUND: + break; + default: + g_string_free (lc_mc_search->regex_buffer, TRUE); + lc_mc_search->regex_buffer = NULL; + return FALSE; + } + + if ((lc_mc_search->update_fn != NULL) && + ((lc_mc_search->update_fn) (user_data, current_pos) == MC_SEARCH_CB_ABORT)) + ret = MC_SEARCH_CB_ABORT; + + if (ret == MC_SEARCH_CB_ABORT || ret == MC_SEARCH_CB_NOTFOUND) + break; + } + + g_string_free (lc_mc_search->regex_buffer, TRUE); + lc_mc_search->regex_buffer = NULL; + + MC_PTR_FREE (lc_mc_search->error_str); + lc_mc_search->error = ret == MC_SEARCH_CB_ABORT ? MC_SEARCH_E_ABORT : MC_SEARCH_E_NOTFOUND; + + return FALSE; +} + +/* --------------------------------------------------------------------------------------------- */ + +GString * +mc_search_regex_prepare_replace_str (mc_search_t * lc_mc_search, GString * replace_str) +{ + GString *ret; + + int num_replace_tokens; + gsize loop; + gsize prev = 0; + replace_transform_type_t replace_flags = REPLACE_T_NO_TRANSFORM; + + num_replace_tokens = + mc_search_regex__get_max_num_of_replace_tokens (replace_str->str, replace_str->len); + + if (lc_mc_search->num_results < 0) + return mc_g_string_dup (replace_str); + + if (num_replace_tokens > lc_mc_search->num_results - 1 + || num_replace_tokens > MC_SEARCH__NUM_REPLACE_ARGS) + { + mc_search_set_error (lc_mc_search, MC_SEARCH_E_REGEX_REPLACE, "%s", + _(STR_E_RPL_NOT_EQ_TO_FOUND)); + return NULL; + } + + ret = g_string_sized_new (64); + + for (loop = 0; loop < replace_str->len - 1; loop++) + { + int lc_index; + gchar *tmp_str; + gsize len = 0; + + lc_index = mc_search_regex__process_replace_str (replace_str, loop, &len, &replace_flags); + + if (lc_index == REPLACE_PREPARE_T_NOTHING_SPECIAL) + { + if (len != 0) + { + mc_search_regex__process_append_str (ret, replace_str->str + prev, loop - prev, + &replace_flags); + mc_search_regex__process_append_str (ret, replace_str->str + loop + 1, len - 1, + &replace_flags); + prev = loop + len; + loop = prev - 1; /* prepare to loop++ */ + } + + continue; + } + + if (lc_index == REPLACE_PREPARE_T_REPLACE_FLAG) + { + if (loop != 0) + mc_search_regex__process_append_str (ret, replace_str->str + prev, loop - prev, + &replace_flags); + prev = loop + len; + loop = prev - 1; /* prepare to loop++ */ + continue; + } + + /* escape sequence */ + if (lc_index == REPLACE_PREPARE_T_ESCAPE_SEQ) + { + mc_search_regex__process_append_str (ret, replace_str->str + prev, loop - prev, + &replace_flags); + /* call process_escape_sequence without starting '\\' */ + mc_search_regex__process_escape_sequence (ret, replace_str->str + loop + 1, len - 1, + &replace_flags, lc_mc_search->is_utf8); + prev = loop + len; + loop = prev - 1; /* prepare to loop++ */ + continue; + } + + /* invalid capture buffer number */ + if (lc_index > lc_mc_search->num_results) + { + g_string_free (ret, TRUE); + mc_search_set_error (lc_mc_search, MC_SEARCH_E_REGEX_REPLACE, + _(STR_E_RPL_INVALID_TOKEN), lc_index); + return NULL; + } + + tmp_str = mc_search_regex__get_token_by_num (lc_mc_search, lc_index); + + if (loop != 0) + mc_search_regex__process_append_str (ret, replace_str->str + prev, loop - prev, + &replace_flags); + + mc_search_regex__process_append_str (ret, tmp_str, -1, &replace_flags); + g_free (tmp_str); + + prev = loop + len; + loop = prev - 1; /* prepare to loop++ */ + } + + mc_search_regex__process_append_str (ret, replace_str->str + prev, replace_str->len - prev, + &replace_flags); + + return ret; +} diff --git a/lib/search/search.c b/lib/search/search.c new file mode 100644 index 0000000..8ccb65f --- /dev/null +++ b/lib/search/search.c @@ -0,0 +1,521 @@ +/* + Search text engine. + Interface functions + + Copyright (C) 2009-2023 + Free Software Foundation, Inc. + + Written by: + Slava Zanko <slavazanko@gmail.com>, 2009 + Andrew Borodin <aborodin@vmail.ru>, 2013 + + This file is part of the Midnight Commander. + + The Midnight Commander is free software: you can redistribute it + and/or modify it under the terms of the GNU General Public License as + published by the Free Software Foundation, either version 3 of the License, + or (at your option) any later version. + + The Midnight Commander is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <config.h> + +#include <stdarg.h> +#include <stdlib.h> +#include <sys/types.h> + +#include "lib/global.h" +#include "lib/strutil.h" +#include "lib/search.h" +#include "lib/util.h" +#ifdef HAVE_CHARSET +#include "lib/charsets.h" +#endif + +#include "internal.h" + +/*** global variables ****************************************************************************/ + +/*** file scope macro definitions ****************************************************************/ + +/*** file scope type declarations ****************************************************************/ + +/*** forward declarations (file scope functions) *************************************************/ + +/*** file scope variables ************************************************************************/ + +static const mc_search_type_str_t mc_search__list_types[] = { + {N_("No&rmal"), MC_SEARCH_T_NORMAL}, + {N_("Re&gular expression"), MC_SEARCH_T_REGEX}, + {N_("He&xadecimal"), MC_SEARCH_T_HEX}, + {N_("Wil&dcard search"), MC_SEARCH_T_GLOB}, + {NULL, MC_SEARCH_T_INVALID} +}; + +/* --------------------------------------------------------------------------------------------- */ +/*** file scope functions ************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ + +static mc_search_cond_t * +mc_search__cond_struct_new (mc_search_t * lc_mc_search, const GString * str, const char *charset) +{ + mc_search_cond_t *mc_search_cond; + + mc_search_cond = g_malloc0 (sizeof (mc_search_cond_t)); + mc_search_cond->str = mc_g_string_dup (str); + mc_search_cond->charset = g_strdup (charset); +#ifdef HAVE_PCRE2 + lc_mc_search->regex_match_info = pcre2_match_data_create (MC_SEARCH__NUM_REPLACE_ARGS, NULL); + lc_mc_search->iovector = pcre2_get_ovector_pointer (lc_mc_search->regex_match_info); +#endif + switch (lc_mc_search->search_type) + { + case MC_SEARCH_T_GLOB: + mc_search__cond_struct_new_init_glob (charset, lc_mc_search, mc_search_cond); + break; + case MC_SEARCH_T_NORMAL: + mc_search__cond_struct_new_init_normal (charset, lc_mc_search, mc_search_cond); + break; + case MC_SEARCH_T_REGEX: + mc_search__cond_struct_new_init_regex (charset, lc_mc_search, mc_search_cond); + break; + case MC_SEARCH_T_HEX: + mc_search__cond_struct_new_init_hex (charset, lc_mc_search, mc_search_cond); + break; + default: + break; + } + return mc_search_cond; +} + +/* --------------------------------------------------------------------------------------------- */ + +static void +mc_search__cond_struct_free (gpointer data) +{ + mc_search_cond_t *mc_search_cond = (mc_search_cond_t *) data; + + if (mc_search_cond->upper != NULL) + g_string_free (mc_search_cond->upper, TRUE); + + if (mc_search_cond->lower != NULL) + g_string_free (mc_search_cond->lower, TRUE); + + g_string_free (mc_search_cond->str, TRUE); + g_free (mc_search_cond->charset); + +#ifdef SEARCH_TYPE_GLIB + if (mc_search_cond->regex_handle != NULL) + g_regex_unref (mc_search_cond->regex_handle); +#else /* SEARCH_TYPE_GLIB */ + g_free (mc_search_cond->regex_handle); +#endif /* SEARCH_TYPE_GLIB */ + + g_free (mc_search_cond); +} + +/* --------------------------------------------------------------------------------------------- */ +/*** public functions ****************************************************************************/ +/* --------------------------------------------------------------------------------------------- */ +/* Init search descriptor. + * + * @param original pattern to search + * @param original_charset charset of #original. If NULL then cp_display will be used + * + * @return new mc_search_t object. Use #mc_search_free() to free it. + */ + +mc_search_t * +mc_search_new (const gchar * original, const gchar * original_charset) +{ + if (original == NULL) + return NULL; + + return mc_search_new_len (original, strlen (original), original_charset); +} + +/* --------------------------------------------------------------------------------------------- */ +/* Init search descriptor. + * + * @param original pattern to search + * @param original_len length of #original or -1 if #original is NULL-terminated + * @param original_charset charset of #original. If NULL then cp_display will be used + * + * @return new mc_search_t object. Use #mc_search_free() to free it. + */ + +mc_search_t * +mc_search_new_len (const gchar * original, gsize original_len, const gchar * original_charset) +{ + mc_search_t *lc_mc_search; + + if (original == NULL || original_len == 0) + return NULL; + + lc_mc_search = g_new0 (mc_search_t, 1); + lc_mc_search->original.str = g_string_new_len (original, original_len); +#ifdef HAVE_CHARSET + lc_mc_search->original.charset = + g_strdup (original_charset != NULL + && *original_charset != '\0' ? original_charset : cp_display); +#else + (void) original_charset; +#endif + + return lc_mc_search; +} + +/* --------------------------------------------------------------------------------------------- */ + +void +mc_search_free (mc_search_t * lc_mc_search) +{ + if (lc_mc_search == NULL) + return; + + g_string_free (lc_mc_search->original.str, TRUE); +#ifdef HAVE_CHARSET + g_free (lc_mc_search->original.charset); +#endif + g_free (lc_mc_search->error_str); + + if (lc_mc_search->prepared.conditions != NULL) + g_ptr_array_free (lc_mc_search->prepared.conditions, TRUE); + +#ifdef SEARCH_TYPE_GLIB + if (lc_mc_search->regex_match_info != NULL) + g_match_info_free (lc_mc_search->regex_match_info); +#else /* SEARCH_TYPE_GLIB */ + g_free (lc_mc_search->regex_match_info); +#endif /* SEARCH_TYPE_GLIB */ + + if (lc_mc_search->regex_buffer != NULL) + g_string_free (lc_mc_search->regex_buffer, TRUE); + + g_free (lc_mc_search); +} + +/* --------------------------------------------------------------------------------------------- */ + +gboolean +mc_search_prepare (mc_search_t * lc_mc_search) +{ + GPtrArray *ret; + + if (lc_mc_search->prepared.conditions != NULL) + return lc_mc_search->prepared.result; + + ret = g_ptr_array_new_with_free_func (mc_search__cond_struct_free); +#ifdef HAVE_CHARSET + if (!lc_mc_search->is_all_charsets) + g_ptr_array_add (ret, + mc_search__cond_struct_new (lc_mc_search, lc_mc_search->original.str, + lc_mc_search->original.charset)); + else + { + gsize loop1; + + for (loop1 = 0; loop1 < codepages->len; loop1++) + { + const char *id; + + id = ((codepage_desc *) g_ptr_array_index (codepages, loop1))->id; + if (g_ascii_strcasecmp (id, lc_mc_search->original.charset) == 0) + g_ptr_array_add (ret, + mc_search__cond_struct_new (lc_mc_search, + lc_mc_search->original.str, + lc_mc_search->original.charset)); + else + { + GString *buffer; + + buffer = + mc_search__recode_str (lc_mc_search->original.str->str, + lc_mc_search->original.str->len, + lc_mc_search->original.charset, id); + g_ptr_array_add (ret, mc_search__cond_struct_new (lc_mc_search, buffer, id)); + g_string_free (buffer, TRUE); + } + } + } +#else + g_ptr_array_add (ret, + mc_search__cond_struct_new (lc_mc_search, lc_mc_search->original.str, + str_detect_termencoding ())); +#endif + lc_mc_search->prepared.conditions = ret; + lc_mc_search->prepared.result = (lc_mc_search->error == MC_SEARCH_E_OK); + + return lc_mc_search->prepared.result; +} + +/* --------------------------------------------------------------------------------------------- */ + +/** + * Carries out the search. + * + * Returns TRUE if found. + * + * Returns FALSE if not found. In this case, lc_mc_search->error reveals + * the reason: + * + * - MC_SEARCH_E_NOTFOUND: the pattern isn't in the subject string. + * - MC_SEARCH_E_ABORT: the user aborted the search. + * - For any other reason (but not for the above two!): the description + * is in lc_mc_search->error_str. + */ +gboolean +mc_search_run (mc_search_t * lc_mc_search, const void *user_data, + gsize start_search, gsize end_search, gsize * found_len) +{ + gboolean ret = FALSE; + + if (lc_mc_search == NULL || user_data == NULL) + return FALSE; + if (!mc_search_is_type_avail (lc_mc_search->search_type)) + { + mc_search_set_error (lc_mc_search, MC_SEARCH_E_INPUT, "%s", _(STR_E_UNKNOWN_TYPE)); + return FALSE; + } +#ifdef SEARCH_TYPE_GLIB + if (lc_mc_search->regex_match_info != NULL) + { + g_match_info_free (lc_mc_search->regex_match_info); + lc_mc_search->regex_match_info = NULL; + } +#endif /* SEARCH_TYPE_GLIB */ + + mc_search_set_error (lc_mc_search, MC_SEARCH_E_OK, NULL); + + if (!mc_search_prepare (lc_mc_search)) + return FALSE; + + switch (lc_mc_search->search_type) + { + case MC_SEARCH_T_NORMAL: + ret = mc_search__run_normal (lc_mc_search, user_data, start_search, end_search, found_len); + break; + case MC_SEARCH_T_REGEX: + ret = mc_search__run_regex (lc_mc_search, user_data, start_search, end_search, found_len); + break; + case MC_SEARCH_T_GLOB: + ret = mc_search__run_glob (lc_mc_search, user_data, start_search, end_search, found_len); + break; + case MC_SEARCH_T_HEX: + ret = mc_search__run_hex (lc_mc_search, user_data, start_search, end_search, found_len); + break; + default: + break; + } + return ret; +} + +/* --------------------------------------------------------------------------------------------- */ + +gboolean +mc_search_is_type_avail (mc_search_type_t search_type) +{ + switch (search_type) + { + case MC_SEARCH_T_GLOB: + case MC_SEARCH_T_NORMAL: + case MC_SEARCH_T_REGEX: + case MC_SEARCH_T_HEX: + return TRUE; + default: + break; + } + return FALSE; +} + +/* --------------------------------------------------------------------------------------------- */ + +const mc_search_type_str_t * +mc_search_types_list_get (size_t * num) +{ + /* don't count last NULL item */ + if (num != NULL) + *num = G_N_ELEMENTS (mc_search__list_types) - 1; + + return mc_search__list_types; +} + +/* --------------------------------------------------------------------------------------------- */ + +GString * +mc_search_prepare_replace_str (mc_search_t * lc_mc_search, GString * replace_str) +{ + GString *ret; + + if (replace_str == NULL || replace_str->len == 0) + return g_string_new (""); + + if (lc_mc_search == NULL) + return mc_g_string_dup (replace_str); + + switch (lc_mc_search->search_type) + { + case MC_SEARCH_T_REGEX: + ret = mc_search_regex_prepare_replace_str (lc_mc_search, replace_str); + break; + case MC_SEARCH_T_GLOB: + ret = mc_search_glob_prepare_replace_str (lc_mc_search, replace_str); + break; + case MC_SEARCH_T_NORMAL: + ret = mc_search_normal_prepare_replace_str (lc_mc_search, replace_str); + break; + case MC_SEARCH_T_HEX: + ret = mc_search_hex_prepare_replace_str (lc_mc_search, replace_str); + break; + default: + ret = mc_g_string_dup (replace_str); + break; + } + return ret; +} + +/* --------------------------------------------------------------------------------------------- */ + +char * +mc_search_prepare_replace_str2 (mc_search_t * lc_mc_search, const char *replace_str) +{ + GString *ret; + GString *replace_str2; + + replace_str2 = g_string_new (replace_str); + ret = mc_search_prepare_replace_str (lc_mc_search, replace_str2); + g_string_free (replace_str2, TRUE); + return (ret != NULL) ? g_string_free (ret, FALSE) : NULL; +} + +/* --------------------------------------------------------------------------------------------- */ + +gboolean +mc_search_is_fixed_search_str (const mc_search_t * lc_mc_search) +{ + if (lc_mc_search == NULL) + return FALSE; + switch (lc_mc_search->search_type) + { + case MC_SEARCH_T_REGEX: + case MC_SEARCH_T_GLOB: + return FALSE; + default: + return TRUE; + } +} + +/* --------------------------------------------------------------------------------------------- */ +/* Search specified pattern in specified string. + * + * @param pattern string to search + * @param pattern_charset charset of #pattern. If NULL then cp_display will be used + * @param str string where search #pattern + * @param search type (normal, regex, hex or glob) + * + * @return TRUE if found is successful, FALSE otherwise. + */ + +gboolean +mc_search (const gchar * pattern, const gchar * pattern_charset, const gchar * str, + mc_search_type_t type) +{ + gboolean ret; + mc_search_t *search; + + if (str == NULL) + return FALSE; + + search = mc_search_new (pattern, pattern_charset); + if (search == NULL) + return FALSE; + + search->search_type = type; + search->is_case_sensitive = TRUE; + + if (type == MC_SEARCH_T_GLOB) + search->is_entire_line = TRUE; + + ret = mc_search_run (search, str, 0, strlen (str), NULL); + mc_search_free (search); + return ret; +} + +/* --------------------------------------------------------------------------------------------- */ + +int +mc_search_getstart_result_by_num (mc_search_t * lc_mc_search, int lc_index) +{ + if (lc_mc_search == NULL) + return 0; + if (lc_mc_search->search_type == MC_SEARCH_T_NORMAL) + return 0; +#ifdef SEARCH_TYPE_GLIB + { + gint start_pos; + gint end_pos; + + g_match_info_fetch_pos (lc_mc_search->regex_match_info, lc_index, &start_pos, &end_pos); + return (int) start_pos; + } +#else /* SEARCH_TYPE_GLIB */ + return lc_mc_search->iovector[lc_index * 2]; +#endif /* SEARCH_TYPE_GLIB */ +} + +/* --------------------------------------------------------------------------------------------- */ + +int +mc_search_getend_result_by_num (mc_search_t * lc_mc_search, int lc_index) +{ + if (lc_mc_search == NULL) + return 0; + if (lc_mc_search->search_type == MC_SEARCH_T_NORMAL) + return 0; +#ifdef SEARCH_TYPE_GLIB + { + gint start_pos; + gint end_pos; + + g_match_info_fetch_pos (lc_mc_search->regex_match_info, lc_index, &start_pos, &end_pos); + return (int) end_pos; + } +#else /* SEARCH_TYPE_GLIB */ + return lc_mc_search->iovector[lc_index * 2 + 1]; +#endif /* SEARCH_TYPE_GLIB */ +} + +/* --------------------------------------------------------------------------------------------- */ +/** + * Replace an old error code and message of an mc_search_t object. + * + * @param mc_search mc_search_t object + * @param code error code, one of mc_search_error_t values + * @param format format of error message. If NULL, the old error string is free'd and become NULL + */ + +void +mc_search_set_error (mc_search_t * lc_mc_search, mc_search_error_t code, const gchar * format, ...) +{ + lc_mc_search->error = code; + + MC_PTR_FREE (lc_mc_search->error_str); + + if (format != NULL) + { + va_list args; + + va_start (args, format); + lc_mc_search->error_str = g_strdup_vprintf (format, args); + va_end (args); + } +} + +/* --------------------------------------------------------------------------------------------- */ |