diff options
Diffstat (limited to 'lib/glob')
-rw-r--r-- | lib/glob/Makefile.in | 169 | ||||
-rw-r--r-- | lib/glob/collsyms.h | 140 | ||||
-rw-r--r-- | lib/glob/doc/Makefile | 5 | ||||
-rw-r--r-- | lib/glob/doc/glob.texi | 1 | ||||
-rw-r--r-- | lib/glob/glob.c | 1609 | ||||
-rw-r--r-- | lib/glob/glob.h | 47 | ||||
-rw-r--r-- | lib/glob/glob_loop.c | 84 | ||||
-rw-r--r-- | lib/glob/gm_loop.c | 208 | ||||
-rw-r--r-- | lib/glob/gmisc.c | 108 | ||||
-rw-r--r-- | lib/glob/ndir.h | 50 | ||||
-rw-r--r-- | lib/glob/sm_loop.c | 981 | ||||
-rw-r--r-- | lib/glob/smatch.c | 638 | ||||
-rw-r--r-- | lib/glob/strmatch.c | 79 | ||||
-rw-r--r-- | lib/glob/strmatch.h | 65 | ||||
-rw-r--r-- | lib/glob/xmbsrtowcs.c | 523 |
15 files changed, 4707 insertions, 0 deletions
diff --git a/lib/glob/Makefile.in b/lib/glob/Makefile.in new file mode 100644 index 0000000..27e34f9 --- /dev/null +++ b/lib/glob/Makefile.in @@ -0,0 +1,169 @@ +## -*- text -*- #################################################### +# # +# Makefile for the GNU Glob Library. # +# # +#################################################################### +# +# Copyright (C) 1996-2009 Free Software Foundation, Inc. + +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +srcdir = @srcdir@ +VPATH = @srcdir@ +topdir = @top_srcdir@ +BUILD_DIR = @BUILD_DIR@ + +INSTALL = @INSTALL@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_DATA = @INSTALL_DATA@ + +CC = @CC@ +RANLIB = @RANLIB@ +AR = @AR@ +ARFLAGS = @ARFLAGS@ +RM = rm -f +CP = cp +MV = mv + +SHELL = @MAKE_SHELL@ + +PROFILE_FLAGS = @PROFILE_FLAGS@ + +CFLAGS = @CFLAGS@ +LOCAL_CFLAGS = @LOCAL_CFLAGS@ +STYLE_CFLAGS = @STYLE_CFLAGS@ +CPPFLAGS = @CPPFLAGS@ +LDFLAGS = @LDFLAGS@ @LOCAL_LDFLAGS@ + +DEFS = @DEFS@ +LOCAL_DEFS = @LOCAL_DEFS@ + +BASHINCDIR = ${topdir}/include + +INCLUDES = -I. -I../.. -I$(topdir) -I$(BASHINCDIR) -I$(topdir)/lib + +CCFLAGS = $(PROFILE_FLAGS) $(DEFS) $(LOCAL_DEFS) ${INCLUDES} $(CPPFLAGS) \ + $(STYLE_CFLAGS) $(LOCAL_CFLAGS) $(CFLAGS) ${ADDON_CFLAGS} + +# Here is a rule for making .o files from .c files that doesn't force +# the type of the machine (like -sun3) into the flags. +.c.o: + $(RM) $@ + $(CC) -c $(CCFLAGS) $< + +# The name of the library target. +LIBRARY_NAME = libglob.a + +# The C code source files for this library. +CSOURCES = $(srcdir)/glob.c $(srcdir)/strmatch.c $(srcdir)/smatch.c \ + $(srcdir)/xmbsrtowcs.c + +# The header files for this library. +HSOURCES = $(srcdir)/strmatch.h + +OBJECTS = glob.o strmatch.o smatch.o xmbsrtowcs.o gmisc.o + +# The texinfo files which document this library. +DOCSOURCE = doc/glob.texi +DOCOBJECT = doc/glob.dvi +DOCSUPPORT = doc/Makefile +DOCUMENTATION = $(DOCSOURCE) $(DOCOBJECT) $(DOCSUPPORT) + +SUPPORT = Makefile ChangeLog $(DOCSUPPORT) + +SOURCES = $(CSOURCES) $(HSOURCES) $(DOCSOURCE) + +THINGS_TO_TAR = $(SOURCES) $(SUPPORT) + +###################################################################### + +all: $(LIBRARY_NAME) + +$(LIBRARY_NAME): $(OBJECTS) + $(RM) -f $@ + $(AR) $(ARFLAGS) $@ $(OBJECTS) + -test -n "$(RANLIB)" && $(RANLIB) $@ + +what-tar: + @for file in $(THINGS_TO_TAR); do \ + echo $(selfdir)$$file; \ + done + +documentation: force + -(cd doc; $(MAKE) $(MFLAGS)) +force: + +# The rule for 'includes' is written funny so that the if statement +# always returns TRUE unless there really was an error installing the +# include files. +install: + +clean: + rm -f $(OBJECTS) $(LIBRARY_NAME) + -(cd doc && $(MAKE) $(MFLAGS) $@ ) + +realclean distclean maintainer-clean: clean + -( cd doc && $(MAKE) $(MFLAGS) $@ ) + $(RM) -f Makefile + +mostlyclean: clean + -( cd doc && $(MAKE) $(MFLAGS) $@ ) + +${BUILD_DIR}/pathnames.h: ${BUILD_DIR}/config.h ${BUILD_DIR}/Makefile Makefile + -( cd ${BUILD_DIR} && ${MAKE} ${MFLAGS} pathnames.h ) + +###################################################################### +# # +# Dependencies for the object files which make up this library. # +# # +###################################################################### + +smatch.o: strmatch.h +smatch.o: $(BUILD_DIR)/config.h +smatch.o: $(BASHINCDIR)/chartypes.h +smatch.o: $(BASHINCDIR)/ansi_stdlib.h $(topdir)/bashansi.h +smatch.o: $(BASHINCDIR)/shmbutil.h +smatch.o: $(topdir)/xmalloc.h + +strmatch.o: strmatch.h +strmatch.o: $(BUILD_DIR)/config.h +strmatch.o: $(BASHINCDIR)/stdc.h + +glob.o: $(BUILD_DIR)/config.h +glob.o: $(topdir)/shell.h $(BUILD_DIR)/pathnames.h +glob.o: $(topdir)/bashtypes.h $(BASHINCDIR)/ansi_stdlib.h $(topdir)/bashansi.h +glob.o: $(BASHINCDIR)/posixstat.h $(BASHINCDIR)/memalloc.h +glob.o: strmatch.h glob.h +glob.o: $(BASHINCDIR)/shmbutil.h +glob.o: $(topdir)/xmalloc.h + +gmisc.o: $(BUILD_DIR)/config.h +gmisc.o: $(topdir)/bashtypes.h $(BASHINCDIR)/ansi_stdlib.h $(topdir)/bashansi.h +gmisc.o: $(BASHINCDIR)/shmbutil.h + +xmbsrtowcs.o: ${BUILD_DIR}/config.h +xmbsrtowcs.o: ${topdir}/bashansi.h ${BASHINCDIR}/ansi_stdlib.h +xmbsrtowcs.o: ${BASHINCDIR}/shmbutil.h + +# Rules for deficient makes, like SunOS and Solaris +glob.o: glob.c +gmisc.o: gmisc.c +strmatch.o: strmatch.c +smatch.o: smatch.c +xmbsrtowcs.o: xmbsrtowcs.c + +# dependencies for C files that include other C files +glob.o: glob_loop.c +gmisc.o: gm_loop.c +smatch.o: sm_loop.c diff --git a/lib/glob/collsyms.h b/lib/glob/collsyms.h new file mode 100644 index 0000000..d56df61 --- /dev/null +++ b/lib/glob/collsyms.h @@ -0,0 +1,140 @@ +/* collsyms.h -- collating symbol names and their corresponding characters + (in ascii) as given by POSIX.2 in table 2.8. */ + +/* Copyright (C) 1997-2002 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne Again SHell. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* The upper-case letters, lower-case letters, and digits are omitted from + this table. The digits are not included in the table in the POSIX.2 + spec. The upper and lower case letters are translated by the code + in smatch.c:collsym(). */ + +typedef struct _COLLSYM { + XCHAR *name; + CHAR code; +} __COLLSYM; + +static __COLLSYM POSIXCOLL [] = +{ + { L("NUL"), L('\0') }, + { L("SOH"), L('\001') }, + { L("STX"), L('\002') }, + { L("ETX"), L('\003') }, + { L("EOT"), L('\004') }, + { L("ENQ"), L('\005') }, + { L("ACK"), L('\006') }, +#ifdef __STDC__ + { L("alert"), L('\a') }, +#else + { L("alert"), L('\007') }, +#endif + { L("BS"), L('\010') }, + { L("backspace"), L('\b') }, + { L("HT"), L('\011') }, + { L("tab"), L('\t') }, + { L("LF"), L('\012') }, + { L("newline"), L('\n') }, + { L("VT"), L('\013') }, + { L("vertical-tab"), L('\v') }, + { L("FF"), L('\014') }, + { L("form-feed"), L('\f') }, + { L("CR"), L('\015') }, + { L("carriage-return"), L('\r') }, + { L("SO"), L('\016') }, + { L("SI"), L('\017') }, + { L("DLE"), L('\020') }, + { L("DC1"), L('\021') }, + { L("DC2"), L('\022') }, + { L("DC3"), L('\023') }, + { L("DC4"), L('\024') }, + { L("NAK"), L('\025') }, + { L("SYN"), L('\026') }, + { L("ETB"), L('\027') }, + { L("CAN"), L('\030') }, + { L("EM"), L('\031') }, + { L("SUB"), L('\032') }, + { L("ESC"), L('\033') }, + { L("IS4"), L('\034') }, + { L("FS"), L('\034') }, + { L("IS3"), L('\035') }, + { L("GS"), L('\035') }, + { L("IS2"), L('\036') }, + { L("RS"), L('\036') }, + { L("IS1"), L('\037') }, + { L("US"), L('\037') }, + { L("space"), L(' ') }, + { L("exclamation-mark"), L('!') }, + { L("quotation-mark"), L('"') }, + { L("number-sign"), L('#') }, + { L("dollar-sign"), L('$') }, + { L("percent-sign"), L('%') }, + { L("ampersand"), L('&') }, + { L("apostrophe"), L('\'') }, + { L("left-parenthesis"), L('(') }, + { L("right-parenthesis"), L(')') }, + { L("asterisk"), L('*') }, + { L("plus-sign"), L('+') }, + { L("comma"), L(',') }, + { L("hyphen"), L('-') }, + { L("hyphen-minus"), L('-') }, + { L("minus"), L('-') }, /* extension from POSIX.2 */ + { L("dash"), L('-') }, /* extension from POSIX.2 */ + { L("period"), L('.') }, + { L("full-stop"), L('.') }, + { L("slash"), L('/') }, + { L("solidus"), L('/') }, /* extension from POSIX.2 */ + { L("zero"), L('0') }, + { L("one"), L('1') }, + { L("two"), L('2') }, + { L("three"), L('3') }, + { L("four"), L('4') }, + { L("five"), L('5') }, + { L("six"), L('6') }, + { L("seven"), L('7') }, + { L("eight"), L('8') }, + { L("nine"), L('9') }, + { L("colon"), L(':') }, + { L("semicolon"), L(';') }, + { L("less-than-sign"), L('<') }, + { L("equals-sign"), L('=') }, + { L("greater-than-sign"), L('>') }, + { L("question-mark"), L('?') }, + { L("commercial-at"), L('@') }, + /* upper-case letters omitted */ + { L("left-square-bracket"), L('[') }, + { L("backslash"), L('\\') }, + { L("reverse-solidus"), L('\\') }, + { L("right-square-bracket"), L(']') }, + { L("circumflex"), L('^') }, + { L("circumflex-accent"), L('^') }, /* extension from POSIX.2 */ + { L("underscore"), L('_') }, + { L("grave-accent"), L('`') }, + /* lower-case letters omitted */ + { L("left-brace"), L('{') }, /* extension from POSIX.2 */ + { L("left-curly-bracket"), L('{') }, + { L("vertical-line"), L('|') }, + { L("right-brace"), L('}') }, /* extension from POSIX.2 */ + { L("right-curly-bracket"), L('}') }, + { L("tilde"), L('~') }, + { L("DEL"), L('\177') }, + { 0, 0 }, +}; + +#undef _COLLSYM +#undef __COLLSYM +#undef POSIXCOLL diff --git a/lib/glob/doc/Makefile b/lib/glob/doc/Makefile new file mode 100644 index 0000000..8dca606 --- /dev/null +++ b/lib/glob/doc/Makefile @@ -0,0 +1,5 @@ +all: + cp glob.texi glob.info + +clean distclean mostlyclean maintainer-clean: + rm -f glob.?? glob.info diff --git a/lib/glob/doc/glob.texi b/lib/glob/doc/glob.texi new file mode 100644 index 0000000..0262ef1 --- /dev/null +++ b/lib/glob/doc/glob.texi @@ -0,0 +1 @@ +Nothing happens here. diff --git a/lib/glob/glob.c b/lib/glob/glob.c new file mode 100644 index 0000000..b66af85 --- /dev/null +++ b/lib/glob/glob.c @@ -0,0 +1,1609 @@ +/* glob.c -- file-name wildcard pattern matching for Bash. + + Copyright (C) 1985-2021 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne-Again SHell. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* To whomever it may concern: I have never seen the code which most + Unix programs use to perform this function. I wrote this from scratch + based on specifications for the pattern matching. --RMS. */ + +#include <config.h> + +#if !defined (__GNUC__) && !defined (HAVE_ALLOCA_H) && defined (_AIX) + #pragma alloca +#endif /* _AIX && RISC6000 && !__GNUC__ */ + +#include "bashtypes.h" + +#if defined (HAVE_UNISTD_H) +# include <unistd.h> +#endif + +#include "bashansi.h" +#include "posixdir.h" +#include "posixstat.h" +#include "shmbutil.h" +#include "xmalloc.h" + +#include "filecntl.h" +#if !defined (F_OK) +# define F_OK 0 +#endif + +#include "stdc.h" +#include "memalloc.h" + +#include <signal.h> + +#include "shell.h" +#include "general.h" + +#include "glob.h" +#include "strmatch.h" + +#if !defined (HAVE_BCOPY) && !defined (bcopy) +# define bcopy(s, d, n) ((void) memcpy ((d), (s), (n))) +#endif /* !HAVE_BCOPY && !bcopy */ + +#if !defined (NULL) +# if defined (__STDC__) +# define NULL ((void *) 0) +# else +# define NULL 0x0 +# endif /* __STDC__ */ +#endif /* !NULL */ + +#if !defined (FREE) +# define FREE(x) if (x) free (x) +#endif + +/* Don't try to alloca() more than this much memory for `struct globval' + in glob_vector() */ +#ifndef ALLOCA_MAX +# define ALLOCA_MAX 100000 +#endif + +struct globval + { + struct globval *next; + char *name; + }; + +extern void throw_to_top_level PARAMS((void)); +extern int sh_eaccess PARAMS((const char *, int)); +extern char *sh_makepath PARAMS((const char *, const char *, int)); +extern int signal_is_pending PARAMS((int)); +extern void run_pending_traps PARAMS((void)); + +extern int extended_glob; + +/* Global variable which controls whether or not * matches .*. + Non-zero means don't match .*. */ +int noglob_dot_filenames = 1; + +/* Global variable which controls whether or not filename globbing + is done without regard to case. */ +int glob_ignore_case = 0; + +/* Global variable controlling whether globbing ever returns . or .. + regardless of the pattern. If set to 1, no glob pattern will ever + match `.' or `..'. Disabled by default. */ +int glob_always_skip_dot_and_dotdot = 1; + +/* Global variable to return to signify an error in globbing. */ +char *glob_error_return; + +static struct globval finddirs_error_return; + +/* Some forward declarations. */ +static int skipname PARAMS((char *, char *, int)); +#if HANDLE_MULTIBYTE +static int mbskipname PARAMS((char *, char *, int)); +#endif +void udequote_pathname PARAMS((char *)); +#if HANDLE_MULTIBYTE +void wcdequote_pathname PARAMS((wchar_t *)); +static void wdequote_pathname PARAMS((char *)); +static void dequote_pathname PARAMS((char *)); +#else +# define dequote_pathname(p) udequote_pathname(p) +#endif +static int glob_testdir PARAMS((char *, int)); +static char **glob_dir_to_array PARAMS((char *, char **, int)); + +/* Make sure these names continue to agree with what's in smatch.c */ +extern char *glob_patscan PARAMS((char *, char *, int)); +extern wchar_t *glob_patscan_wc PARAMS((wchar_t *, wchar_t *, int)); + +/* And this from gmisc.c/gm_loop.c */ +extern int wextglob_pattern_p PARAMS((wchar_t *)); + +extern char *glob_dirscan PARAMS((char *, int)); + +/* Compile `glob_loop.c' for single-byte characters. */ +#define GCHAR unsigned char +#define CHAR char +#define INT int +#define L(CS) CS +#define INTERNAL_GLOB_PATTERN_P internal_glob_pattern_p +#include "glob_loop.c" + +/* Compile `glob_loop.c' again for multibyte characters. */ +#if HANDLE_MULTIBYTE + +#define GCHAR wchar_t +#define CHAR wchar_t +#define INT wint_t +#define L(CS) L##CS +#define INTERNAL_GLOB_PATTERN_P internal_glob_wpattern_p +#include "glob_loop.c" + +#endif /* HANDLE_MULTIBYTE */ + +/* And now a function that calls either the single-byte or multibyte version + of internal_glob_pattern_p. */ +int +glob_pattern_p (pattern) + const char *pattern; +{ +#if HANDLE_MULTIBYTE + size_t n; + wchar_t *wpattern; + int r; + + if (MB_CUR_MAX == 1 || mbsmbchar (pattern) == 0) + return (internal_glob_pattern_p ((unsigned char *)pattern)); + + /* Convert strings to wide chars, and call the multibyte version. */ + n = xdupmbstowcs (&wpattern, NULL, pattern); + if (n == (size_t)-1) + /* Oops. Invalid multibyte sequence. Try it as single-byte sequence. */ + return (internal_glob_pattern_p ((unsigned char *)pattern)); + + r = internal_glob_wpattern_p (wpattern); + free (wpattern); + + return r; +#else + return (internal_glob_pattern_p ((unsigned char *)pattern)); +#endif +} + +#if EXTENDED_GLOB + +#if defined (HANDLE_MULTIBYTE) +# define XSKIPNAME(p, d, f) mbskipname(p, d, f) +#else +# define XSKIPNAME(p, d, f) skipname(p, d, f) +#endif + +/* Return 1 if all subpatterns in the extended globbing pattern PAT indicate + that the name should be skipped. XXX - doesn't handle pattern negation, + not sure if it should */ +static int +extglob_skipname (pat, dname, flags) + char *pat, *dname; + int flags; +{ + char *pp, *pe, *t, *se; + int n, r, negate, wild, nullpat, xflags; + + negate = *pat == '!'; + wild = *pat == '*' || *pat == '?'; + pp = pat + 2; + se = pp + strlen (pp); /* end of pattern string */ + pe = glob_patscan (pp, se, 0); /* end of extglob pattern */ + + /* if pe == 0, this is an invalid extglob pattern */ + if (pe == 0) + return 0; + + xflags = flags | ( negate ? GX_NEGATE : 0); + + /* if pe != se we have more of the pattern at the end of the extglob + pattern. Check the easy case first ( */ + if (pe == se && *pe == 0 && pe[-1] == ')' && (t = strchr (pp, '|')) == 0) + { + pe[-1] = '\0'; + /* This is where we check whether the pattern is being negated and + match all files beginning with `.' if the pattern begins with a + literal `.'. */ + r = XSKIPNAME (pp, dname, xflags); /*(*/ + pe[-1] = ')'; + return r; + } + + /* Is the extglob pattern between the parens the null pattern? The null + pattern can match nothing, so should we check any remaining portion of + the pattern? */ + nullpat = pe >= (pat + 2) && pe[-2] == '(' && pe[-1] == ')'; + + /* check every subpattern */ + while (t = glob_patscan (pp, pe, '|')) + { + /* If T == PE and *T == 0 (&& PE[-1] == RPAREN), we have hit the end + of a pattern with no trailing characters. */ + n = t[-1]; /* ( */ + if (extglob_pattern_p (pp) && n == ')') /* nested extglob? */ + t[-1] = n; /* no-op for now */ + else + t[-1] = '\0'; + r = XSKIPNAME (pp, dname, xflags); + t[-1] = n; + if (r == 0) /* if any pattern says not skip, we don't skip */ + return r; + pp = t; + if (pp == pe) + break; + } + + /* glob_patscan might find end of string */ + if (pp == se) + return r; + + /* but if it doesn't then we didn't match a leading dot */ + if (wild && *pe) /* if we can match zero instances, check further */ + return (XSKIPNAME (pe, dname, flags)); + + return 1; +} +#endif + +/* Return 1 if DNAME should be skipped according to PAT. Mostly concerned + with matching leading `.'. */ +static int +skipname (pat, dname, flags) + char *pat; + char *dname; + int flags; +{ + int i; + +#if EXTENDED_GLOB + if (extglob_pattern_p (pat)) /* XXX */ + return (extglob_skipname (pat, dname, flags)); +#endif + + if (glob_always_skip_dot_and_dotdot && DOT_OR_DOTDOT (dname)) + return 1; + + /* If a leading dot need not be explicitly matched, and the pattern + doesn't start with a `.', don't match `.' or `..' */ + if (noglob_dot_filenames == 0 && pat[0] != '.' && + (pat[0] != '\\' || pat[1] != '.') && + DOT_OR_DOTDOT (dname)) + return 1; + +#if 0 + /* This is where we check whether the pattern is being negated and + match all files beginning with `.' if the pattern begins with a + literal `.'. This is the negation of the next clause. */ + else if ((flags & GX_NEGATE) && noglob_dot_filenames == 0 && + dname[0] == '.' && + (pat[0] == '.' || (pat[0] == '\\' && pat[1] == '.'))) + return 0; +#endif + + /* If a dot must be explicitly matched, check to see if they do. */ + else if (noglob_dot_filenames && dname[0] == '.' && + pat[0] != '.' && (pat[0] != '\\' || pat[1] != '.')) + return 1; + + return 0; +} + +#if HANDLE_MULTIBYTE + +static int +wskipname (pat, dname, flags) + wchar_t *pat, *dname; + int flags; +{ + int i; + + if (glob_always_skip_dot_and_dotdot && WDOT_OR_DOTDOT (dname)) + return 1; + + /* If a leading dot need not be explicitly matched, and the + pattern doesn't start with a `.', don't match `.' or `..' */ + if (noglob_dot_filenames == 0 && pat[0] != L'.' && + (pat[0] != L'\\' || pat[1] != L'.') && + WDOT_OR_DOTDOT (dname)) + return 1; + +#if 0 + /* This is where we check whether the pattern is being negated and + match all files beginning with `.' if the pattern begins with a + literal `.'. This is the negation of the next clause. */ + else if ((flags & GX_NEGATE) && noglob_dot_filenames == 0 && + dname[0] == L'.' && + (pat[0] == L'.' || (pat[0] == L'\\' && pat[1] == L'.'))) + return 0; +#endif + + /* If a leading dot must be explicitly matched, check to see if the + pattern and dirname both have one. */ + else if (noglob_dot_filenames && dname[0] == L'.' && + pat[0] != L'.' && (pat[0] != L'\\' || pat[1] != L'.')) + return 1; + + return 0; +} + +static int +wextglob_skipname (pat, dname, flags) + wchar_t *pat, *dname; + int flags; +{ +#if EXTENDED_GLOB + wchar_t *pp, *pe, *t, *se, n; + int r, negate, wild, nullpat, xflags; + + negate = *pat == L'!'; + wild = *pat == L'*' || *pat == L'?'; + pp = pat + 2; + se = pp + wcslen (pp); + pe = glob_patscan_wc (pp, se, 0); + + /* if pe == 0, this is an invalid extglob pattern */ + if (pe == 0) + return 0; + + xflags = flags | ( negate ? GX_NEGATE : 0); + + /* if pe != se we have more of the pattern at the end of the extglob + pattern. Check the easy case first ( */ + if (pe == se && *pe == L'\0' && pe[-1] == L')' && (t = wcschr (pp, L'|')) == 0) + { + pe[-1] = L'\0'; + r = wskipname (pp, dname, xflags); /*(*/ + pe[-1] = L')'; + return r; + } + + /* Is the extglob pattern between the parens the null pattern? The null + pattern can match nothing, so should we check any remaining portion of + the pattern? */ + nullpat = pe >= (pat + 2) && pe[-2] == L'(' && pe[-1] == L')'; + + /* check every subpattern */ + while (t = glob_patscan_wc (pp, pe, '|')) + { + n = t[-1]; /* ( */ + if (wextglob_pattern_p (pp) && n == L')') /* nested extglob? */ + t[-1] = n; /* no-op for now */ + else + t[-1] = L'\0'; + r = wskipname (pp, dname, xflags); + t[-1] = n; + if (r == 0) + return 0; + pp = t; + if (pp == pe) + break; + } + + /* glob_patscan_wc might find end of string */ + if (pp == se) + return r; + + /* but if it doesn't then we didn't match a leading dot */ + if (wild && *pe != L'\0') + return (wskipname (pe, dname, flags)); + + return 1; +#else + return (wskipname (pat, dname, flags)); +#endif +} + +/* Return 1 if DNAME should be skipped according to PAT. Handles multibyte + characters in PAT and DNAME. Mostly concerned with matching leading `.'. */ +static int +mbskipname (pat, dname, flags) + char *pat, *dname; + int flags; +{ + int ret, ext; + wchar_t *pat_wc, *dn_wc; + size_t pat_n, dn_n; + + if (mbsmbchar (dname) == 0 && mbsmbchar (pat) == 0) + return (skipname (pat, dname, flags)); + + ext = 0; +#if EXTENDED_GLOB + ext = extglob_pattern_p (pat); +#endif + + pat_wc = dn_wc = (wchar_t *)NULL; + + pat_n = xdupmbstowcs (&pat_wc, NULL, pat); + if (pat_n != (size_t)-1) + dn_n = xdupmbstowcs (&dn_wc, NULL, dname); + + ret = 0; + if (pat_n != (size_t)-1 && dn_n !=(size_t)-1) + ret = ext ? wextglob_skipname (pat_wc, dn_wc, flags) : wskipname (pat_wc, dn_wc, flags); + else + ret = skipname (pat, dname, flags); + + FREE (pat_wc); + FREE (dn_wc); + + return ret; +} +#endif /* HANDLE_MULTIBYTE */ + +/* Remove backslashes quoting characters in PATHNAME by modifying PATHNAME. */ +void +udequote_pathname (pathname) + char *pathname; +{ + register int i, j; + + for (i = j = 0; pathname && pathname[i]; ) + { + if (pathname[i] == '\\') + i++; + + pathname[j++] = pathname[i++]; + + if (pathname[i - 1] == 0) + break; + } + if (pathname) + pathname[j] = '\0'; +} + +#if HANDLE_MULTIBYTE +/* Remove backslashes quoting characters in PATHNAME by modifying PATHNAME. */ +void +wcdequote_pathname (wpathname) + wchar_t *wpathname; +{ + int i, j; + + for (i = j = 0; wpathname && wpathname[i]; ) + { + if (wpathname[i] == L'\\') + i++; + + wpathname[j++] = wpathname[i++]; + + if (wpathname[i - 1] == L'\0') + break; + } + if (wpathname) + wpathname[j] = L'\0'; +} + +static void +wdequote_pathname (pathname) + char *pathname; +{ + mbstate_t ps; + size_t len, n; + wchar_t *wpathname; + int i, j; + wchar_t *orig_wpathname; + + if (mbsmbchar (pathname) == 0) + { + udequote_pathname (pathname); + return; + } + + len = strlen (pathname); + /* Convert the strings into wide characters. */ + n = xdupmbstowcs (&wpathname, NULL, pathname); + if (n == (size_t) -1) + { + /* Something wrong. Fall back to single-byte */ + udequote_pathname (pathname); + return; + } + orig_wpathname = wpathname; + + wcdequote_pathname (wpathname); + + /* Convert the wide character string into unibyte character set. */ + memset (&ps, '\0', sizeof(mbstate_t)); + n = wcsrtombs(pathname, (const wchar_t **)&wpathname, len, &ps); + if (n == (size_t)-1 || (wpathname && *wpathname != 0)) /* what? now you tell me? */ + { + wpathname = orig_wpathname; + memset (&ps, '\0', sizeof(mbstate_t)); + n = xwcsrtombs (pathname, (const wchar_t **)&wpathname, len, &ps); + } + pathname[len] = '\0'; + + /* Can't just free wpathname here; wcsrtombs changes it in many cases. */ + free (orig_wpathname); +} + +static void +dequote_pathname (pathname) + char *pathname; +{ + if (MB_CUR_MAX > 1) + wdequote_pathname (pathname); + else + udequote_pathname (pathname); +} +#endif /* HANDLE_MULTIBYTE */ + +/* Test whether NAME exists. */ + +#if defined (HAVE_LSTAT) +# define GLOB_TESTNAME(name) (lstat (name, &finfo)) +#else /* !HAVE_LSTAT */ +# if !defined (AFS) +# define GLOB_TESTNAME(name) (sh_eaccess (name, F_OK)) +# else /* AFS */ +# define GLOB_TESTNAME(name) (access (name, F_OK)) +# endif /* AFS */ +#endif /* !HAVE_LSTAT */ + +/* Return 0 if DIR is a directory, -2 if DIR is a symlink, -1 otherwise. */ +static int +glob_testdir (dir, flags) + char *dir; + int flags; +{ + struct stat finfo; + int r; + +/*itrace("glob_testdir: testing %s" flags = %d, dir, flags);*/ +#if defined (HAVE_LSTAT) + r = (flags & GX_ALLDIRS) ? lstat (dir, &finfo) : stat (dir, &finfo); +#else + r = stat (dir, &finfo); +#endif + if (r < 0) + return (-1); + +#if defined (S_ISLNK) + if (S_ISLNK (finfo.st_mode)) + return (-2); +#endif + + if (S_ISDIR (finfo.st_mode) == 0) + return (-1); + + return (0); +} + +/* Recursively scan SDIR for directories matching PAT (PAT is always `**'). + FLAGS is simply passed down to the recursive call to glob_vector. Returns + a list of matching directory names. EP, if non-null, is set to the last + element of the returned list. NP, if non-null, is set to the number of + directories in the returned list. These two variables exist for the + convenience of the caller (always glob_vector). */ +static struct globval * +finddirs (pat, sdir, flags, ep, np) + char *pat; + char *sdir; + int flags; + struct globval **ep; + int *np; +{ + char **r, *n; + int ndirs; + struct globval *ret, *e, *g; + +/*itrace("finddirs: pat = `%s' sdir = `%s' flags = 0x%x", pat, sdir, flags);*/ + e = ret = 0; + r = glob_vector (pat, sdir, flags); + if (r == 0 || r[0] == 0) + { + if (np) + *np = 0; + if (ep) + *ep = 0; + if (r && r != &glob_error_return) + free (r); + return (struct globval *)0; + } + for (ndirs = 0; r[ndirs] != 0; ndirs++) + { + g = (struct globval *) malloc (sizeof (struct globval)); + if (g == 0) + { + while (ret) /* free list built so far */ + { + g = ret->next; + free (ret); + ret = g; + } + + free (r); + if (np) + *np = 0; + if (ep) + *ep = 0; + return (&finddirs_error_return); + } + if (e == 0) + e = g; + + g->next = ret; + ret = g; + + g->name = r[ndirs]; + } + + free (r); + if (ep) + *ep = e; + if (np) + *np = ndirs; + + return ret; +} + +/* Return a vector of names of files in directory DIR + whose names match glob pattern PAT. + The names are not in any particular order. + Wildcards at the beginning of PAT do not match an initial period. + + The vector is terminated by an element that is a null pointer. + + To free the space allocated, first free the vector's elements, + then free the vector. + + Return 0 if cannot get enough memory to hold the pointer + and the names. + + Return -1 if cannot access directory DIR. + Look in errno for more information. */ + +char ** +glob_vector (pat, dir, flags) + char *pat; + char *dir; + int flags; +{ + DIR *d; + register struct dirent *dp; + struct globval *lastlink, *e, *dirlist; + register struct globval *nextlink; + register char *nextname, *npat, *subdir; + unsigned int count; + int lose, skip, ndirs, isdir, sdlen, add_current, patlen; + register char **name_vector; + register unsigned int i; + int mflags; /* Flags passed to strmatch (). */ + int pflags; /* flags passed to sh_makepath () */ + int hasglob; /* return value from glob_pattern_p */ + int nalloca; + struct globval *firstmalloc, *tmplink; + char *convfn; + + lastlink = 0; + count = lose = skip = add_current = 0; + + firstmalloc = 0; + nalloca = 0; + + name_vector = NULL; + +/*itrace("glob_vector: pat = `%s' dir = `%s' flags = 0x%x", pat, dir, flags);*/ + /* If PAT is empty, skip the loop, but return one (empty) filename. */ + if (pat == 0 || *pat == '\0') + { + if (glob_testdir (dir, 0) < 0) + return ((char **) &glob_error_return); + + nextlink = (struct globval *)alloca (sizeof (struct globval)); + if (nextlink == NULL) + return ((char **) NULL); + + nextlink->next = (struct globval *)0; + nextname = (char *) malloc (1); + if (nextname == 0) + lose = 1; + else + { + lastlink = nextlink; + nextlink->name = nextname; + nextname[0] = '\0'; + count = 1; + } + + skip = 1; + } + + patlen = (pat && *pat) ? strlen (pat) : 0; + + /* If the filename pattern (PAT) does not contain any globbing characters, + or contains a pattern with only backslash escapes (hasglob == 2), + we can dispense with reading the directory, and just see if there is + a filename `DIR/PAT'. If there is, and we can access it, just make the + vector to return and bail immediately. */ + hasglob = 0; + if (skip == 0 && ((hasglob = glob_pattern_p (pat)) == 0 || hasglob == 2)) + { + int dirlen; + struct stat finfo; + + if (glob_testdir (dir, 0) < 0) + return ((char **) &glob_error_return); + + dirlen = strlen (dir); + nextname = (char *)malloc (dirlen + patlen + 2); + npat = (char *)malloc (patlen + 1); + if (nextname == 0 || npat == 0) + { + FREE (nextname); + FREE (npat); + lose = 1; + } + else + { + strcpy (npat, pat); + dequote_pathname (npat); + + strcpy (nextname, dir); + nextname[dirlen++] = '/'; + strcpy (nextname + dirlen, npat); + + if (GLOB_TESTNAME (nextname) >= 0) + { + free (nextname); + nextlink = (struct globval *)alloca (sizeof (struct globval)); + if (nextlink) + { + nextlink->next = (struct globval *)0; + lastlink = nextlink; + nextlink->name = npat; + count = 1; + } + else + { + free (npat); + lose = 1; + } + } + else + { + free (nextname); + free (npat); + } + } + + skip = 1; + } + + if (skip == 0) + { + /* Open the directory, punting immediately if we cannot. If opendir + is not robust (i.e., it opens non-directories successfully), test + that DIR is a directory and punt if it's not. */ +#if defined (OPENDIR_NOT_ROBUST) + if (glob_testdir (dir, 0) < 0) + return ((char **) &glob_error_return); +#endif + + d = opendir (dir); + if (d == NULL) + return ((char **) &glob_error_return); + + /* Compute the flags that will be passed to strmatch(). We don't + need to do this every time through the loop. */ + mflags = (noglob_dot_filenames ? FNM_PERIOD : FNM_DOTDOT) | FNM_PATHNAME; + +#ifdef FNM_CASEFOLD + if (glob_ignore_case) + mflags |= FNM_CASEFOLD; +#endif + + if (extended_glob) + mflags |= FNM_EXTMATCH; + + add_current = ((flags & (GX_ALLDIRS|GX_ADDCURDIR)) == (GX_ALLDIRS|GX_ADDCURDIR)); + + /* Scan the directory, finding all names that match For each name that matches, allocate a struct globval + on the stack and store the name in it. + Chain those structs together; lastlink is the front of the chain. */ + while (1) + { + /* Make globbing interruptible in the shell. */ + if (interrupt_state || terminating_signal) + { + lose = 1; + break; + } + else if (signal_is_pending (SIGINT)) /* XXX - make SIGINT traps responsive */ + { + lose = 1; + break; + } + + dp = readdir (d); + if (dp == NULL) + break; + + /* If this directory entry is not to be used, try again. */ + if (REAL_DIR_ENTRY (dp) == 0) + continue; + +#if 0 + if (dp->d_name == 0 || *dp->d_name == 0) + continue; +#endif + +#if HANDLE_MULTIBYTE + if (MB_CUR_MAX > 1 && mbskipname (pat, dp->d_name, flags)) + continue; + else +#endif + if (skipname (pat, dp->d_name, flags)) + continue; + + /* If we're only interested in directories, don't bother with files */ + if (flags & (GX_MATCHDIRS|GX_ALLDIRS)) + { + pflags = (flags & GX_ALLDIRS) ? MP_RMDOT : 0; + if (flags & GX_NULLDIR) + pflags |= MP_IGNDOT; + subdir = sh_makepath (dir, dp->d_name, pflags); + isdir = glob_testdir (subdir, flags); + if (isdir < 0 && (flags & GX_MATCHDIRS)) + { + free (subdir); + continue; + } + } + + if (flags & GX_ALLDIRS) + { + if (isdir == 0) + { + dirlist = finddirs (pat, subdir, (flags & ~GX_ADDCURDIR), &e, &ndirs); + if (dirlist == &finddirs_error_return) + { + free (subdir); + lose = 1; + break; + } + if (ndirs) /* add recursive directories to list */ + { + if (firstmalloc == 0) + firstmalloc = e; + e->next = lastlink; + lastlink = dirlist; + count += ndirs; + } + } + + /* XXX - should we even add this if it's not a directory? */ + nextlink = (struct globval *) malloc (sizeof (struct globval)); + if (firstmalloc == 0) + firstmalloc = nextlink; + sdlen = strlen (subdir); + nextname = (char *) malloc (sdlen + 1); + if (nextlink == 0 || nextname == 0) + { + if (firstmalloc && firstmalloc == nextlink) + firstmalloc = 0; + /* If we reset FIRSTMALLOC we can free this here. */ + FREE (nextlink); + FREE (nextname); + free (subdir); + lose = 1; + break; + } + nextlink->next = lastlink; + lastlink = nextlink; + nextlink->name = nextname; + bcopy (subdir, nextname, sdlen + 1); + free (subdir); + ++count; + continue; + } + else if (flags & GX_MATCHDIRS) + free (subdir); + + convfn = fnx_fromfs (dp->d_name, D_NAMLEN (dp)); + if (strmatch (pat, convfn, mflags) != FNM_NOMATCH) + { + if (nalloca < ALLOCA_MAX) + { + nextlink = (struct globval *) alloca (sizeof (struct globval)); + nalloca += sizeof (struct globval); + } + else + { + nextlink = (struct globval *) malloc (sizeof (struct globval)); + if (firstmalloc == 0) + firstmalloc = nextlink; + } + + nextname = (char *) malloc (D_NAMLEN (dp) + 1); + if (nextlink == 0 || nextname == 0) + { + /* We free NEXTLINK here, since it won't be added to the + LASTLINK chain. If we used malloc, and it returned non- + NULL, firstmalloc will be set to something valid. If it's + NEXTLINK, reset it before we free NEXTLINK to avoid + duplicate frees. If not, it will be taken care of by the + loop below with TMPLINK. */ + if (firstmalloc) + { + if (firstmalloc == nextlink) + firstmalloc = 0; + FREE (nextlink); + } + FREE (nextname); + lose = 1; + break; + } + nextlink->next = lastlink; + lastlink = nextlink; + nextlink->name = nextname; + bcopy (dp->d_name, nextname, D_NAMLEN (dp) + 1); + ++count; + } + } + + (void) closedir (d); + } + + /* compat: if GX_ADDCURDIR, add the passed directory also. Add an empty + directory name as a placeholder if GX_NULLDIR (in which case the passed + directory name is "."). */ + if (add_current && lose == 0) + { + sdlen = strlen (dir); + nextname = (char *)malloc (sdlen + 1); + nextlink = (struct globval *) malloc (sizeof (struct globval)); + if (nextlink == 0 || nextname == 0) + { + FREE (nextlink); + FREE (nextname); + lose = 1; + } + else + { + nextlink->name = nextname; + nextlink->next = lastlink; + lastlink = nextlink; + if (flags & GX_NULLDIR) + nextname[0] = '\0'; + else + bcopy (dir, nextname, sdlen + 1); + ++count; + } + } + + if (lose == 0) + { + name_vector = (char **) malloc ((count + 1) * sizeof (char *)); + lose |= name_vector == NULL; + } + + /* Have we run out of memory or been interrupted? */ + if (lose) + { + tmplink = 0; + + /* Here free the strings we have got. */ + while (lastlink) + { + /* Since we build the list in reverse order, the first N entries + will be allocated with malloc, if firstmalloc is set, from + lastlink to firstmalloc. */ + if (firstmalloc) + { + if (lastlink == firstmalloc) + firstmalloc = 0; + tmplink = lastlink; + } + else + tmplink = 0; + free (lastlink->name); + lastlink = lastlink->next; + FREE (tmplink); + } + + /* Don't call QUIT; here; let higher layers deal with it. */ + + return ((char **)NULL); + } + + /* Copy the name pointers from the linked list into the vector. */ + for (tmplink = lastlink, i = 0; i < count; ++i) + { + name_vector[i] = tmplink->name; + tmplink = tmplink->next; + } + + name_vector[count] = NULL; + + /* If we allocated some of the struct globvals, free them now. */ + if (firstmalloc) + { + tmplink = 0; + while (lastlink) + { + tmplink = lastlink; + if (lastlink == firstmalloc) + lastlink = firstmalloc = 0; + else + lastlink = lastlink->next; + free (tmplink); + } + } + + return (name_vector); +} + +/* Return a new array which is the concatenation of each string in ARRAY + to DIR. This function expects you to pass in an allocated ARRAY, and + it takes care of free()ing that array. Thus, you might think of this + function as side-effecting ARRAY. This should handle GX_MARKDIRS. */ +static char ** +glob_dir_to_array (dir, array, flags) + char *dir, **array; + int flags; +{ + register unsigned int i, l; + int add_slash; + char **result, *new; + struct stat sb; + + l = strlen (dir); + if (l == 0) + { + if (flags & GX_MARKDIRS) + for (i = 0; array[i]; i++) + { + if ((stat (array[i], &sb) == 0) && S_ISDIR (sb.st_mode)) + { + l = strlen (array[i]); + new = (char *)realloc (array[i], l + 2); + if (new == 0) + return NULL; + new[l] = '/'; + new[l+1] = '\0'; + array[i] = new; + } + } + return (array); + } + + add_slash = dir[l - 1] != '/'; + + i = 0; + while (array[i] != NULL) + ++i; + + result = (char **) malloc ((i + 1) * sizeof (char *)); + if (result == NULL) + return (NULL); + + for (i = 0; array[i] != NULL; i++) + { + /* 3 == 1 for NUL, 1 for slash at end of DIR, 1 for GX_MARKDIRS */ + result[i] = (char *) malloc (l + strlen (array[i]) + 3); + + if (result[i] == NULL) + { + int ind; + for (ind = 0; ind < i; ind++) + free (result[ind]); + free (result); + return (NULL); + } + + strcpy (result[i], dir); + if (add_slash) + result[i][l] = '/'; + if (array[i][0]) + { + strcpy (result[i] + l + add_slash, array[i]); + if (flags & GX_MARKDIRS) + { + if ((stat (result[i], &sb) == 0) && S_ISDIR (sb.st_mode)) + { + size_t rlen; + rlen = strlen (result[i]); + result[i][rlen] = '/'; + result[i][rlen+1] = '\0'; + } + } + } + else + result[i][l+add_slash] = '\0'; + } + result[i] = NULL; + + /* Free the input array. */ + for (i = 0; array[i] != NULL; i++) + free (array[i]); + free ((char *) array); + + return (result); +} + +/* Do globbing on PATHNAME. Return an array of pathnames that match, + marking the end of the array with a null-pointer as an element. + If no pathnames match, then the array is empty (first element is null). + If there isn't enough memory, then return NULL. + If a file system error occurs, return -1; `errno' has the error code. */ +char ** +glob_filename (pathname, flags) + char *pathname; + int flags; +{ + char **result, **new_result; + unsigned int result_size; + char *directory_name, *filename, *dname, *fn; + unsigned int directory_len; + int free_dirname; /* flag */ + int dflags, hasglob; + + result = (char **) malloc (sizeof (char *)); + result_size = 1; + if (result == NULL) + return (NULL); + + result[0] = NULL; + + directory_name = NULL; + + /* Find the filename. */ + filename = strrchr (pathname, '/'); +#if defined (EXTENDED_GLOB) + if (filename && extended_glob) + { + fn = glob_dirscan (pathname, '/'); +#if DEBUG_MATCHING + if (fn != filename) + fprintf (stderr, "glob_filename: glob_dirscan: fn (%s) != filename (%s)\n", fn ? fn : "(null)", filename); +#endif + filename = fn; + } +#endif + + if (filename == NULL) + { + filename = pathname; + directory_name = ""; + directory_len = 0; + free_dirname = 0; + } + else + { + directory_len = (filename - pathname) + 1; + directory_name = (char *) malloc (directory_len + 1); + + if (directory_name == 0) /* allocation failed? */ + { + free (result); + return (NULL); + } + + bcopy (pathname, directory_name, directory_len); + directory_name[directory_len] = '\0'; + ++filename; + free_dirname = 1; + } + + hasglob = 0; + /* If directory_name contains globbing characters, then we + have to expand the previous levels. Just recurse. + If glob_pattern_p returns != [0,1] we have a pattern that has backslash + quotes but no unquoted glob pattern characters. We dequote it below. */ + if (directory_len > 0 && (hasglob = glob_pattern_p (directory_name)) == 1) + { + char **directories, *d, *p; + register unsigned int i; + int all_starstar, last_starstar; + + all_starstar = last_starstar = 0; + d = directory_name; + dflags = flags & ~GX_MARKDIRS; + /* Collapse a sequence of ** patterns separated by one or more slashes + to a single ** terminated by a slash or NUL */ + if ((flags & GX_GLOBSTAR) && d[0] == '*' && d[1] == '*' && (d[2] == '/' || d[2] == '\0')) + { + p = d; + while (d[0] == '*' && d[1] == '*' && (d[2] == '/' || d[2] == '\0')) + { + p = d; + if (d[2]) + { + d += 3; + while (*d == '/') + d++; + if (*d == 0) + break; + } + } + if (*d == 0) + all_starstar = 1; + d = p; + dflags |= GX_ALLDIRS|GX_ADDCURDIR; + directory_len = strlen (d); + } + + /* If there is a non [star][star]/ component in directory_name, we + still need to collapse trailing sequences of [star][star]/ into + a single one and note that the directory name ends with [star][star], + so we can compensate if filename is [star][star] */ + if ((flags & GX_GLOBSTAR) && all_starstar == 0) + { + int dl, prev; + prev = dl = directory_len; + while (dl >= 4 && d[dl - 1] == '/' && + d[dl - 2] == '*' && + d[dl - 3] == '*' && + d[dl - 4] == '/') + prev = dl, dl -= 3; + if (dl != directory_len) + last_starstar = 1; + directory_len = prev; + } + + /* If the directory name ends in [star][star]/ but the filename is + [star][star], just remove the final [star][star] from the directory + so we don't have to scan everything twice. */ + if (last_starstar && directory_len > 4 && + filename[0] == '*' && filename[1] == '*' && filename[2] == 0) + { + directory_len -= 3; + } + + if (d[directory_len - 1] == '/') + d[directory_len - 1] = '\0'; + + directories = glob_filename (d, dflags|GX_RECURSE); + + if (free_dirname) + { + free (directory_name); + directory_name = NULL; + } + + if (directories == NULL) + goto memory_error; + else if (directories == (char **)&glob_error_return) + { + free ((char *) result); + return ((char **) &glob_error_return); + } + else if (*directories == NULL) + { + free ((char *) directories); + free ((char *) result); + return ((char **) &glob_error_return); + } + + /* If we have something like [star][star]/[star][star], it's no use to + glob **, then do it again, and throw half the results away. */ + if (all_starstar && filename[0] == '*' && filename[1] == '*' && filename[2] == 0) + { + free ((char *) directories); + free (directory_name); + directory_name = NULL; + directory_len = 0; + goto only_filename; + } + + /* We have successfully globbed the preceding directory name. + For each name in DIRECTORIES, call glob_vector on it and + FILENAME. Concatenate the results together. */ + for (i = 0; directories[i] != NULL; ++i) + { + char **temp_results; + int shouldbreak; + + shouldbreak = 0; + /* XXX -- we've recursively scanned any directories resulting from + a `**', so turn off the flag. We turn it on again below if + filename is `**' */ + /* Scan directory even on a NULL filename. That way, `*h/' + returns only directories ending in `h', instead of all + files ending in `h' with a `/' appended. */ + dname = directories[i]; + dflags = flags & ~(GX_MARKDIRS|GX_ALLDIRS|GX_ADDCURDIR); + /* last_starstar? */ + if ((flags & GX_GLOBSTAR) && filename[0] == '*' && filename[1] == '*' && filename[2] == '\0') + dflags |= GX_ALLDIRS|GX_ADDCURDIR; + if (dname[0] == '\0' && filename[0]) + { + dflags |= GX_NULLDIR; + dname = "."; /* treat null directory name and non-null filename as current directory */ + } + + /* Special handling for symlinks to directories with globstar on */ + if (all_starstar && (dflags & GX_NULLDIR) == 0) + { + int dlen; + + /* If we have a directory name that is not null (GX_NULLDIR above) + and is a symlink to a directory, we return the symlink if + we're not `descending' into it (filename[0] == 0) and return + glob_error_return (which causes the code below to skip the + name) otherwise. I should fold this into a test that does both + checks instead of calling stat twice. */ + if (glob_testdir (dname, flags|GX_ALLDIRS) == -2 && glob_testdir (dname, 0) == 0) + { + if (filename[0] != 0) + temp_results = (char **)&glob_error_return; /* skip */ + else + { + /* Construct array to pass to glob_dir_to_array */ + temp_results = (char **)malloc (2 * sizeof (char *)); + if (temp_results == NULL) + goto memory_error; + temp_results[0] = (char *)malloc (1); + if (temp_results[0] == 0) + { + free (temp_results); + goto memory_error; + } + **temp_results = '\0'; + temp_results[1] = NULL; + dflags |= GX_SYMLINK; /* mostly for debugging */ + } + } + else + temp_results = glob_vector (filename, dname, dflags); + } + else + temp_results = glob_vector (filename, dname, dflags); + + /* Handle error cases. */ + if (temp_results == NULL) + goto memory_error; + else if (temp_results == (char **)&glob_error_return) + /* This filename is probably not a directory. Ignore it. */ + ; + else + { + char **array; + register unsigned int l; + + /* If we're expanding **, we don't need to glue the directory + name to the results; we've already done it in glob_vector */ + if ((dflags & GX_ALLDIRS) && filename[0] == '*' && filename[1] == '*' && (filename[2] == '\0' || filename[2] == '/')) + { + /* When do we remove null elements from temp_results? And + how to avoid duplicate elements in the final result? */ + /* If (dflags & GX_NULLDIR) glob_filename potentially left a + NULL placeholder in the temp results just in case + glob_vector/glob_dir_to_array did something with it, but + if it didn't, and we're not supposed to be passing them + through for some reason ((flags & GX_NULLDIR) == 0) we + need to remove all the NULL elements from the beginning + of TEMP_RESULTS. */ + /* If we have a null directory name and ** as the filename, + we have just searched for everything from the current + directory on down. Break now (shouldbreak = 1) to avoid + duplicate entries in the final result. */ +#define NULL_PLACEHOLDER(x) ((x) && *(x) && **(x) == 0) + if ((dflags & GX_NULLDIR) && (flags & GX_NULLDIR) == 0 && + NULL_PLACEHOLDER (temp_results)) +#undef NULL_PLACEHOLDER + { + register int i, n; + for (n = 0; temp_results[n] && *temp_results[n] == 0; n++) + ; + i = n; + do + temp_results[i - n] = temp_results[i]; + while (temp_results[i++] != 0); + array = temp_results; + shouldbreak = 1; + } + else + array = temp_results; + } + else if (dflags & GX_SYMLINK) + array = glob_dir_to_array (directories[i], temp_results, flags); + else + array = glob_dir_to_array (directories[i], temp_results, flags); + l = 0; + while (array[l] != NULL) + ++l; + + new_result = (char **)realloc (result, (result_size + l) * sizeof (char *)); + + if (new_result == NULL) + { + for (l = 0; array[l]; ++l) + free (array[l]); + free ((char *)array); + goto memory_error; + } + result = new_result; + + for (l = 0; array[l] != NULL; ++l) + result[result_size++ - 1] = array[l]; + + result[result_size - 1] = NULL; + + /* Note that the elements of ARRAY are not freed. */ + if (array != temp_results) + free ((char *) array); + else if ((dflags & GX_ALLDIRS) && filename[0] == '*' && filename[1] == '*' && filename[2] == '\0') + free (temp_results); /* expanding ** case above */ + + if (shouldbreak) + break; + } + } + /* Free the directories. */ + for (i = 0; directories[i]; i++) + free (directories[i]); + + free ((char *) directories); + + return (result); + } + +only_filename: + /* If there is only a directory name, return it. */ + if (*filename == '\0') + { + result = (char **) realloc ((char *) result, 2 * sizeof (char *)); + if (result == NULL) + { + if (free_dirname) + free (directory_name); + return (NULL); + } + /* If we have a directory name with quoted characters, and we are + being called recursively to glob the directory portion of a pathname, + we need to dequote the directory name before returning it so the + caller can read the directory */ + if (directory_len > 0 && hasglob == 2 && (flags & GX_RECURSE) != 0) + { + dequote_pathname (directory_name); + directory_len = strlen (directory_name); + } + + /* We could check whether or not the dequoted directory_name is a + directory and return it here, returning the original directory_name + if not, but we don't do that. We do return the dequoted directory + name if we're not being called recursively and the dequoted name + corresponds to an actual directory. For better backwards compatibility, + we can return &glob_error_return unconditionally in this case. */ + + if (directory_len > 0 && hasglob == 2 && (flags & GX_RECURSE) == 0) + { + dequote_pathname (directory_name); + if (glob_testdir (directory_name, 0) < 0) + { + if (free_dirname) + free (directory_name); + free ((char *) result); + return ((char **)&glob_error_return); + } + } + + /* Handle GX_MARKDIRS here. */ + result[0] = (char *) malloc (directory_len + 1); + if (result[0] == NULL) + goto memory_error; + bcopy (directory_name, result[0], directory_len + 1); + if (free_dirname) + free (directory_name); + result[1] = NULL; + return (result); + } + else + { + char **temp_results; + + /* There are no unquoted globbing characters in DIRECTORY_NAME. + Dequote it before we try to open the directory since there may + be quoted globbing characters which should be treated verbatim. */ + if (directory_len > 0) + dequote_pathname (directory_name); + + /* We allocated a small array called RESULT, which we won't be using. + Free that memory now. */ + free (result); + + /* Just return what glob_vector () returns appended to the + directory name. */ + /* If flags & GX_ALLDIRS, we're called recursively */ + dflags = flags & ~GX_MARKDIRS; + if (directory_len == 0) + dflags |= GX_NULLDIR; + if ((flags & GX_GLOBSTAR) && filename[0] == '*' && filename[1] == '*' && filename[2] == '\0') + { + dflags |= GX_ALLDIRS|GX_ADDCURDIR; +#if 0 + /* If we want all directories (dflags & GX_ALLDIRS) and we're not + being called recursively as something like `echo [star][star]/[star].o' + ((flags & GX_ALLDIRS) == 0), we want to prevent glob_vector from + adding a null directory name to the front of the temp_results + array. We turn off ADDCURDIR if not called recursively and + dlen == 0 */ +#endif + if (directory_len == 0 && (flags & GX_ALLDIRS) == 0) + dflags &= ~GX_ADDCURDIR; + } + temp_results = glob_vector (filename, + (directory_len == 0 ? "." : directory_name), + dflags); + + if (temp_results == NULL || temp_results == (char **)&glob_error_return) + { + if (free_dirname) + free (directory_name); + QUIT; /* XXX - shell */ + run_pending_traps (); + return (temp_results); + } + + result = glob_dir_to_array ((dflags & GX_ALLDIRS) ? "" : directory_name, temp_results, flags); + + if (free_dirname) + free (directory_name); + return (result); + } + + /* We get to memory_error if the program has run out of memory, or + if this is the shell, and we have been interrupted. */ + memory_error: + if (result != NULL) + { + register unsigned int i; + for (i = 0; result[i] != NULL; ++i) + free (result[i]); + free ((char *) result); + } + + if (free_dirname && directory_name) + free (directory_name); + + QUIT; + run_pending_traps (); + + return (NULL); +} + +#if defined (TEST) + +main (argc, argv) + int argc; + char **argv; +{ + unsigned int i; + + for (i = 1; i < argc; ++i) + { + char **value = glob_filename (argv[i], 0); + if (value == NULL) + puts ("Out of memory."); + else if (value == &glob_error_return) + perror (argv[i]); + else + for (i = 0; value[i] != NULL; i++) + puts (value[i]); + } + + exit (0); +} +#endif /* TEST. */ diff --git a/lib/glob/glob.h b/lib/glob/glob.h new file mode 100644 index 0000000..4741057 --- /dev/null +++ b/lib/glob/glob.h @@ -0,0 +1,47 @@ +/* File-name wildcard pattern matching for GNU. + Copyright (C) 1985-2021 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne-Again SHell. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _GLOB_H_ +#define _GLOB_H_ + +#include "stdc.h" + +#define GX_MARKDIRS 0x001 /* mark directory names with trailing `/' */ +#define GX_NOCASE 0x002 /* ignore case */ +#define GX_MATCHDOT 0x004 /* match `.' literally */ +#define GX_MATCHDIRS 0x008 /* match only directory names */ +#define GX_ALLDIRS 0x010 /* match all directory names, no others */ +#define GX_NULLDIR 0x100 /* internal -- no directory preceding pattern */ +#define GX_ADDCURDIR 0x200 /* internal -- add passed directory name */ +#define GX_GLOBSTAR 0x400 /* turn on special handling of ** */ +#define GX_RECURSE 0x800 /* internal -- glob_filename called recursively */ +#define GX_SYMLINK 0x1000 /* internal -- symlink to a directory */ +#define GX_NEGATE 0x2000 /* internal -- extglob pattern being negated */ + +extern int glob_pattern_p PARAMS((const char *)); +extern char **glob_vector PARAMS((char *, char *, int)); +extern char **glob_filename PARAMS((char *, int)); + +extern int extglob_pattern_p PARAMS((const char *)); + +extern char *glob_error_return; +extern int noglob_dot_filenames; +extern int glob_ignore_case; + +#endif /* _GLOB_H_ */ diff --git a/lib/glob/glob_loop.c b/lib/glob/glob_loop.c new file mode 100644 index 0000000..467e7ae --- /dev/null +++ b/lib/glob/glob_loop.c @@ -0,0 +1,84 @@ +/* Copyright (C) 1991-2020 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne Again SHell. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see <http://www.gnu.org/licenses/>. +*/ + +static int INTERNAL_GLOB_PATTERN_P PARAMS((const GCHAR *)); + +/* Return nonzero if PATTERN has any special globbing chars in it. + Compiled twice, once each for single-byte and multibyte characters. */ +static int +INTERNAL_GLOB_PATTERN_P (pattern) + const GCHAR *pattern; +{ + register const GCHAR *p; + register GCHAR c; + int bopen, bsquote; + + p = pattern; + bopen = bsquote = 0; + + while ((c = *p++) != L('\0')) + switch (c) + { + case L('?'): + case L('*'): + return 1; + + case L('['): /* Only accept an open brace if there is a close */ + bopen++; /* brace to match it. Bracket expressions must be */ + continue; /* complete, according to Posix.2 */ + case L(']'): + if (bopen) + return 1; + continue; + + case L('+'): /* extended matching operators */ + case L('@'): + case L('!'): + if (*p == L('(')) /*) */ + return 1; + continue; + + case L('\\'): + /* Don't let the pattern end in a backslash (GMATCH returns no match + if the pattern ends in a backslash anyway), but otherwise note that + we have seen this, since the matching engine uses backslash as an + escape character and it can be removed. We return 2 later if we + have seen only backslash-escaped characters, so interested callers + know they can shortcut and just dequote the pathname. */ + if (*p != L('\0')) + { + p++; + bsquote = 1; + continue; + } + else /* (*p == L('\0')) */ + return 0; + } + +#if 0 + return bsquote ? 2 : 0; +#else + return (0); +#endif +} + +#undef INTERNAL_GLOB_PATTERN_P +#undef L +#undef INT +#undef CHAR +#undef GCHAR diff --git a/lib/glob/gm_loop.c b/lib/glob/gm_loop.c new file mode 100644 index 0000000..ac516f8 --- /dev/null +++ b/lib/glob/gm_loop.c @@ -0,0 +1,208 @@ +/* Copyright (C) 1991-2017 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne Again SHell. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see <http://www.gnu.org/licenses/>. +*/ + +#if EXTENDED_GLOB +int +EXTGLOB_PATTERN_P (pat) + const CHAR *pat; +{ + switch (pat[0]) + { + case L('*'): + case L('+'): + case L('!'): + case L('@'): + case L('?'): + return (pat[1] == L('(')); /* ) */ + default: + return 0; + } + + return 0; +} +#endif + +/* Return 1 of the first character of STRING could match the first + character of pattern PAT. Compiled to both single and wiide character + versions. FLAGS is a subset of strmatch flags; used to do case-insensitive + matching for now. */ +int +MATCH_PATTERN_CHAR (pat, string, flags) + CHAR *pat, *string; + int flags; +{ + CHAR c; + + if (*string == 0) + return (*pat == L('*')); /* XXX - allow only * to match empty string */ + + switch (c = *pat++) + { + default: + return (FOLD(*string) == FOLD(c)); + case L('\\'): + return (FOLD(*string) == FOLD(*pat)); + case L('?'): + return (*pat == L('(') ? 1 : (*string != L'\0')); + case L('*'): + return (1); + case L('+'): + case L('!'): + case L('@'): + return (*pat == L('(') ? 1 : (FOLD(*string) == FOLD(c))); + case L('['): + return (*string != L('\0')); + } +} + +int +MATCHLEN (pat, max) + CHAR *pat; + size_t max; +{ + CHAR c; + int matlen, bracklen, t, in_cclass, in_collsym, in_equiv; + + if (*pat == 0) + return (0); + + matlen = in_cclass = in_collsym = in_equiv = 0; + while (c = *pat++) + { + switch (c) + { + default: + matlen++; + break; + case L('\\'): + if (*pat == 0) + return ++matlen; + else + { + matlen++; + pat++; + } + break; + case L('?'): + if (*pat == LPAREN) + return (matlen = -1); /* XXX for now */ + else + matlen++; + break; + case L('*'): + return (matlen = -1); + case L('+'): + case L('!'): + case L('@'): + if (*pat == LPAREN) + return (matlen = -1); /* XXX for now */ + else + matlen++; + break; + case L('['): + /* scan for ending `]', skipping over embedded [:...:] */ + bracklen = 1; + c = *pat++; + do + { + if (c == 0) + { + pat--; /* back up to NUL */ + matlen += bracklen; + goto bad_bracket; + } + else if (c == L('\\')) + { + /* *pat == backslash-escaped character */ + bracklen++; + /* If the backslash or backslash-escape ends the string, + bail. The ++pat skips over the backslash escape */ + if (*pat == 0 || *++pat == 0) + { + matlen += bracklen; + goto bad_bracket; + } + } + else if (c == L('[') && *pat == L(':')) /* character class */ + { + pat++; + bracklen++; + in_cclass = 1; + } + else if (in_cclass && c == L(':') && *pat == L(']')) + { + pat++; + bracklen++; + in_cclass = 0; + } + else if (c == L('[') && *pat == L('.')) /* collating symbol */ + { + pat++; + bracklen++; + if (*pat == L(']')) /* right bracket can appear as collating symbol */ + { + pat++; + bracklen++; + } + in_collsym = 1; + } + else if (in_collsym && c == L('.') && *pat == L(']')) + { + pat++; + bracklen++; + in_collsym = 0; + } + else if (c == L('[') && *pat == L('=')) /* equivalence class */ + { + pat++; + bracklen++; + if (*pat == L(']')) /* right bracket can appear as equivalence class */ + { + pat++; + bracklen++; + } + in_equiv = 1; + } + else if (in_equiv && c == L('=') && *pat == L(']')) + { + pat++; + bracklen++; + in_equiv = 0; + } + else + bracklen++; + } + while ((c = *pat++) != L(']')); + matlen++; /* bracket expression can only match one char */ +bad_bracket: + break; + } + } + + return matlen; +} + +#undef EXTGLOB_PATTERN_P +#undef MATCH_PATTERN_CHAR +#undef MATCHLEN +#undef FOLD +#undef L +#undef LPAREN +#undef RPAREN +#undef INT +#undef CHAR diff --git a/lib/glob/gmisc.c b/lib/glob/gmisc.c new file mode 100644 index 0000000..f3d74ce --- /dev/null +++ b/lib/glob/gmisc.c @@ -0,0 +1,108 @@ +/* gmisc.c -- miscellaneous pattern matching utility functions for Bash. + + Copyright (C) 2010-2020 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne-Again SHell. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <config.h> + +#include "bashtypes.h" + +#if defined (HAVE_UNISTD_H) +# include <unistd.h> +#endif + +#include "bashansi.h" +#include "shmbutil.h" +#include "chartypes.h" + +#include "stdc.h" + +#ifndef FNM_CASEFOLD +# include "strmatch.h" +#endif +#include "glob.h" + +/* Make sure these names continue to agree with what's in smatch.c */ +extern char *glob_patscan PARAMS((char *, char *, int)); + +/* Compile `gm_loop.c' for single-byte characters. */ +#define CHAR char +#define INT int +#define L(CS) CS +#define EXTGLOB_PATTERN_P extglob_pattern_p +#define MATCH_PATTERN_CHAR match_pattern_char +#define MATCHLEN umatchlen +#define FOLD(c) ((flags & FNM_CASEFOLD) \ + ? TOLOWER ((unsigned char)c) \ + : ((unsigned char)c)) +#ifndef LPAREN +#define LPAREN '(' +#define RPAREN ')' +#endif +#include "gm_loop.c" + +/* Compile `gm_loop.c' again for multibyte characters. */ +#if HANDLE_MULTIBYTE + +#define CHAR wchar_t +#define INT wint_t +#define L(CS) L##CS +#define EXTGLOB_PATTERN_P wextglob_pattern_p +#define MATCH_PATTERN_CHAR match_pattern_wchar +#define MATCHLEN wmatchlen + +#define FOLD(c) ((flags & FNM_CASEFOLD) && iswupper (c) ? towlower (c) : (c)) +#define LPAREN L'(' +#define RPAREN L')' +#include "gm_loop.c" + +#endif /* HANDLE_MULTIBYTE */ + + +#if defined (EXTENDED_GLOB) +/* Skip characters in PAT and return the final occurrence of DIRSEP. This + is only called when extended_glob is set, so we have to skip over extglob + patterns x(...) */ +char * +glob_dirscan (pat, dirsep) + char *pat; + int dirsep; +{ + char *p, *d, *pe, *se; + + d = pe = se = 0; + for (p = pat; p && *p; p++) + { + if (extglob_pattern_p (p)) + { + if (se == 0) + se = p + strlen (p) - 1; + pe = glob_patscan (p + 2, se, 0); + if (pe == 0) + continue; + else if (*pe == 0) + break; + p = pe - 1; /* will do increment above */ + continue; + } + if (*p == dirsep) + d = p; + } + return d; +} +#endif /* EXTENDED_GLOB */ diff --git a/lib/glob/ndir.h b/lib/glob/ndir.h new file mode 100644 index 0000000..31261eb --- /dev/null +++ b/lib/glob/ndir.h @@ -0,0 +1,50 @@ +/* <dir.h> -- definitions for 4.2BSD-compatible directory access. + last edit: 09-Jul-1983 D A Gwyn. */ + +#if defined (VMS) +# if !defined (FAB$C_BID) +# include <fab.h> +# endif +# if !defined (NAM$C_BID) +# include <nam.h> +# endif +# if !defined (RMS$_SUC) +# include <rmsdef.h> +# endif +# include "dir.h" +#endif /* VMS */ + +/* Size of directory block. */ +#define DIRBLKSIZ 512 + +/* NOTE: MAXNAMLEN must be one less than a multiple of 4 */ + +#if defined (VMS) +# define MAXNAMLEN (DIR$S_NAME + 7) /* 80 plus room for version #. */ +# define MAXFULLSPEC NAM$C_MAXRSS /* Maximum full spec */ +#else +# define MAXNAMLEN 15 /* Maximum filename length. */ +#endif /* VMS */ + +/* Data from readdir (). */ +struct direct { + long d_ino; /* Inode number of entry. */ + unsigned short d_reclen; /* Length of this record. */ + unsigned short d_namlen; /* Length of string in d_name. */ + char d_name[MAXNAMLEN + 1]; /* Name of file. */ +}; + +/* Stream data from opendir (). */ +typedef struct { + int dd_fd; /* File descriptor. */ + int dd_loc; /* Offset in block. */ + int dd_size; /* Amount of valid data. */ + char dd_buf[DIRBLKSIZ]; /* Directory block. */ +} DIR; + +extern DIR *opendir (); +extern struct direct *readdir (); +extern long telldir (); +extern void seekdir (), closedir (); + +#define rewinddir(dirp) seekdir (dirp, 0L) diff --git a/lib/glob/sm_loop.c b/lib/glob/sm_loop.c new file mode 100644 index 0000000..247ba28 --- /dev/null +++ b/lib/glob/sm_loop.c @@ -0,0 +1,981 @@ +/* Copyright (C) 1991-2021 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne Again SHell. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see <http://www.gnu.org/licenses/>. +*/ + +extern int interrupt_state, terminating_signal; + +struct STRUCT +{ + CHAR *pattern; + CHAR *string; +}; + +int FCT PARAMS((CHAR *, CHAR *, int)); + +static int GMATCH PARAMS((CHAR *, CHAR *, CHAR *, CHAR *, struct STRUCT *, int)); +static CHAR *PARSE_COLLSYM PARAMS((CHAR *, INT *)); +static CHAR *BRACKMATCH PARAMS((CHAR *, U_CHAR, int)); +static int EXTMATCH PARAMS((INT, CHAR *, CHAR *, CHAR *, CHAR *, int)); + +extern void DEQUOTE_PATHNAME PARAMS((CHAR *)); + +/*static*/ CHAR *PATSCAN PARAMS((CHAR *, CHAR *, INT)); + +int +FCT (pattern, string, flags) + CHAR *pattern; + CHAR *string; + int flags; +{ + CHAR *se, *pe; + + if (string == 0 || pattern == 0) + return FNM_NOMATCH; + + se = string + STRLEN ((XCHAR *)string); + pe = pattern + STRLEN ((XCHAR *)pattern); + + return (GMATCH (string, se, pattern, pe, (struct STRUCT *)NULL, flags)); +} + +/* Match STRING against the filename pattern PATTERN, returning zero if + it matches, FNM_NOMATCH if not. */ +static int +GMATCH (string, se, pattern, pe, ends, flags) + CHAR *string, *se; + CHAR *pattern, *pe; + struct STRUCT *ends; + int flags; +{ + CHAR *p, *n; /* pattern, string */ + INT c; /* current pattern character - XXX U_CHAR? */ + INT sc; /* current string character - XXX U_CHAR? */ + + p = pattern; + n = string; + + if (string == 0 || pattern == 0) + return FNM_NOMATCH; + +#if DEBUG_MATCHING +fprintf(stderr, "gmatch: string = %s; se = %s\n", string, se); +fprintf(stderr, "gmatch: pattern = %s; pe = %s\n", pattern, pe); +#endif + + while (p < pe) + { + c = *p++; + c = FOLD (c); + + sc = n < se ? *n : '\0'; + + if (interrupt_state || terminating_signal) + return FNM_NOMATCH; + +#ifdef EXTENDED_GLOB + /* EXTMATCH () will handle recursively calling GMATCH, so we can + just return what EXTMATCH() returns. */ + if ((flags & FNM_EXTMATCH) && *p == L('(') && + (c == L('+') || c == L('*') || c == L('?') || c == L('@') || c == L('!'))) /* ) */ + { + int lflags; + /* If we're not matching the start of the string, we're not + concerned about the special cases for matching `.' */ + lflags = (n == string) ? flags : (flags & ~(FNM_PERIOD|FNM_DOTDOT)); + return (EXTMATCH (c, n, se, p, pe, lflags)); + } +#endif /* EXTENDED_GLOB */ + + switch (c) + { + case L('?'): /* Match single character */ + if (sc == '\0') + return FNM_NOMATCH; + else if ((flags & FNM_PATHNAME) && sc == L('/')) + /* If we are matching a pathname, `?' can never match a `/'. */ + return FNM_NOMATCH; + else if ((flags & FNM_PERIOD) && sc == L('.') && + (n == string || ((flags & FNM_PATHNAME) && n[-1] == L('/')))) + /* `?' cannot match a `.' if it is the first character of the + string or if it is the first character following a slash and + we are matching a pathname. */ + return FNM_NOMATCH; + + /* `?' cannot match `.' or `..' if it is the first character of the + string or if it is the first character following a slash and + we are matching a pathname. */ + if ((flags & FNM_DOTDOT) && + ((n == string && SDOT_OR_DOTDOT(n)) || + ((flags & FNM_PATHNAME) && n[-1] == L('/') && PDOT_OR_DOTDOT(n)))) + return FNM_NOMATCH; + + break; + + case L('\\'): /* backslash escape removes special meaning */ + if (p == pe && sc == '\\' && (n+1 == se)) + break; + + if (p == pe) + return FNM_NOMATCH; + + if ((flags & FNM_NOESCAPE) == 0) + { + c = *p++; + /* A trailing `\' cannot match. */ + if (p > pe) + return FNM_NOMATCH; + c = FOLD (c); + } + if (FOLD (sc) != (U_CHAR)c) + return FNM_NOMATCH; + break; + + case L('*'): /* Match zero or more characters */ + /* See below for the reason for using this. It avoids backtracking + back to a previous `*'. Picked up from glibc. */ + if (ends != NULL) + { + ends->pattern = p - 1; + ends->string = n; + return (0); + } + + if ((flags & FNM_PERIOD) && sc == L('.') && + (n == string || ((flags & FNM_PATHNAME) && n[-1] == L('/')))) + /* `*' cannot match a `.' if it is the first character of the + string or if it is the first character following a slash and + we are matching a pathname. */ + return FNM_NOMATCH; + + /* `*' cannot match `.' or `..' if it is the first character of the + string or if it is the first character following a slash and + we are matching a pathname. */ + if ((flags & FNM_DOTDOT) && + ((n == string && SDOT_OR_DOTDOT(n)) || + ((flags & FNM_PATHNAME) && n[-1] == L('/') && PDOT_OR_DOTDOT(n)))) + return FNM_NOMATCH; + + if (p == pe) + return 0; + + /* Collapse multiple consecutive `*' and `?', but make sure that + one character of the string is consumed for each `?'. */ + for (c = *p++; (c == L('?') || c == L('*')); c = *p++) + { + if ((flags & FNM_PATHNAME) && sc == L('/')) + /* A slash does not match a wildcard under FNM_PATHNAME. */ + return FNM_NOMATCH; +#ifdef EXTENDED_GLOB + else if ((flags & FNM_EXTMATCH) && c == L('?') && *p == L('(')) /* ) */ + { + CHAR *newn; + + /* We can match 0 or 1 times. If we match, return success */ + if (EXTMATCH (c, n, se, p, pe, flags) == 0) + return (0); + + /* We didn't match the extended glob pattern, but + that's OK, since we can match 0 or 1 occurrences. + We need to skip the glob pattern and see if we + match the rest of the string. */ + newn = PATSCAN (p + 1, pe, 0); + /* If NEWN is 0, we have an ill-formed pattern. */ + p = newn ? newn : pe; + } +#endif + else if (c == L('?')) + { + if (sc == L('\0')) + return FNM_NOMATCH; + /* One character of the string is consumed in matching + this ? wildcard, so *??? won't match if there are + fewer than three characters. */ + n++; + sc = n < se ? *n : '\0'; + } + +#ifdef EXTENDED_GLOB + /* Handle ******(patlist) */ + if ((flags & FNM_EXTMATCH) && c == L('*') && *p == L('(')) /*)*/ + { + CHAR *newn; + /* We need to check whether or not the extended glob + pattern matches the remainder of the string. + If it does, we match the entire pattern. */ + for (newn = n; newn < se; ++newn) + { + if (EXTMATCH (c, newn, se, p, pe, flags) == 0) + return (0); + } + /* We didn't match the extended glob pattern, but + that's OK, since we can match 0 or more occurrences. + We need to skip the glob pattern and see if we + match the rest of the string. */ + newn = PATSCAN (p + 1, pe, 0); + /* If NEWN is 0, we have an ill-formed pattern. */ + p = newn ? newn : pe; + } +#endif + if (p == pe) + break; + } + + /* The wildcards are the last element of the pattern. The name + cannot match completely if we are looking for a pathname and + it contains another slash, unless FNM_LEADING_DIR is set. */ + if (c == L('\0')) + { + int r = (flags & FNM_PATHNAME) == 0 ? 0 : FNM_NOMATCH; + if (flags & FNM_PATHNAME) + { + if (flags & FNM_LEADING_DIR) + r = 0; + else if (MEMCHR (n, L('/'), se - n) == NULL) + r = 0; + } + return r; + } + + /* If we've hit the end of the pattern and the last character of + the pattern was handled by the loop above, we've succeeded. + Otherwise, we need to match that last character. */ + if (p == pe && (c == L('?') || c == L('*'))) + return (0); + + /* If we've hit the end of the string and the rest of the pattern + is something that matches the empty string, we can succeed. */ +#if defined (EXTENDED_GLOB) + if (n == se && ((flags & FNM_EXTMATCH) && (c == L('!') || c == L('?')) && *p == L('('))) + { + --p; + if (EXTMATCH (c, n, se, p, pe, flags) == 0) + return (c == L('!') ? FNM_NOMATCH : 0); + return (c == L('!') ? 0 : FNM_NOMATCH); + } +#endif + + /* If we stop at a slash in the pattern and we are looking for a + pathname ([star]/foo), then consume enough of the string to stop + at any slash and then try to match the rest of the pattern. If + the string doesn't contain a slash, fail */ + if (c == L('/') && (flags & FNM_PATHNAME)) + { + while (n < se && *n != L('/')) + ++n; + if (n < se && *n == L('/') && (GMATCH (n+1, se, p, pe, NULL, flags) == 0)) + return 0; + return FNM_NOMATCH; /* XXX */ + } + + /* General case, use recursion. */ + { + U_CHAR c1; + const CHAR *endp; + struct STRUCT end; + + end.pattern = NULL; + endp = MEMCHR (n, (flags & FNM_PATHNAME) ? L('/') : L('\0'), se - n); + if (endp == 0) + endp = se; + + c1 = ((flags & FNM_NOESCAPE) == 0 && c == L('\\')) ? *p : c; + c1 = FOLD (c1); + for (--p; n < endp; ++n) + { + /* Only call strmatch if the first character indicates a + possible match. We can check the first character if + we're not doing an extended glob match. */ + if ((flags & FNM_EXTMATCH) == 0 && c != L('[') && FOLD (*n) != c1) /*]*/ + continue; + + /* If we're doing an extended glob match and the pattern is not + one of the extended glob patterns, we can check the first + character. */ + if ((flags & FNM_EXTMATCH) && p[1] != L('(') && /*)*/ + STRCHR (L("?*+@!"), *p) == 0 && c != L('[') && FOLD (*n) != c1) /*]*/ + continue; + + /* Otherwise, we just recurse. */ + if (GMATCH (n, se, p, pe, &end, flags & ~(FNM_PERIOD|FNM_DOTDOT)) == 0) + { + if (end.pattern == NULL) + return (0); + break; + } + } + /* This is a clever idea from glibc, used to avoid backtracking + to a `*' that appears earlier in the pattern. We get away + without saving se and pe because they are always the same, + even in the recursive calls to gmatch */ + if (end.pattern != NULL) + { + p = end.pattern; + n = end.string; + continue; + } + + return FNM_NOMATCH; + } + + case L('['): + { + if (sc == L('\0') || n == se) + return FNM_NOMATCH; + + /* A character class cannot match a `.' if it is the first + character of the string or if it is the first character + following a slash and we are matching a pathname. */ + if ((flags & FNM_PERIOD) && sc == L('.') && + (n == string || ((flags & FNM_PATHNAME) && n[-1] == L('/')))) + return (FNM_NOMATCH); + + /* `?' cannot match `.' or `..' if it is the first character of the + string or if it is the first character following a slash and + we are matching a pathname. */ + if ((flags & FNM_DOTDOT) && + ((n == string && SDOT_OR_DOTDOT(n)) || + ((flags & FNM_PATHNAME) && n[-1] == L('/') && PDOT_OR_DOTDOT(n)))) + return FNM_NOMATCH; + + p = BRACKMATCH (p, sc, flags); + if (p == 0) + return FNM_NOMATCH; + } + break; + + default: + if ((U_CHAR)c != FOLD (sc)) + return (FNM_NOMATCH); + } + + ++n; + } + + if (n == se) + return (0); + + if ((flags & FNM_LEADING_DIR) && *n == L('/')) + /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ + return 0; + + return (FNM_NOMATCH); +} + +/* Parse a bracket expression collating symbol ([.sym.]) starting at P, find + the value of the symbol, and move P past the collating symbol expression. + The value is returned in *VP, if VP is not null. */ +static CHAR * +PARSE_COLLSYM (p, vp) + CHAR *p; + INT *vp; +{ + register int pc; + INT val; + + p++; /* move past the `.' */ + + for (pc = 0; p[pc]; pc++) + if (p[pc] == L('.') && p[pc+1] == L(']')) + break; + if (p[pc] == 0) + { + if (vp) + *vp = INVALID; + return (p + pc); + } + val = COLLSYM (p, pc); + if (vp) + *vp = val; + return (p + pc + 2); +} + +/* Use prototype definition here because of type promotion. */ +static CHAR * +#if defined (PROTOTYPES) +BRACKMATCH (CHAR *p, U_CHAR test, int flags) +#else +BRACKMATCH (p, test, flags) + CHAR *p; + U_CHAR test; + int flags; +#endif +{ + register CHAR cstart, cend, c; + register int not; /* Nonzero if the sense of the character class is inverted. */ + int brcnt, forcecoll, isrange; + INT pc; + CHAR *savep; + CHAR *brchrp; + U_CHAR orig_test; + + orig_test = test; + test = FOLD (orig_test); + + savep = p; + + /* POSIX.2 3.13.1 says that an exclamation mark (`!') shall replace the + circumflex (`^') in its role in a `nonmatching list'. A bracket + expression starting with an unquoted circumflex character produces + unspecified results. This implementation treats the two identically. */ + if (not = (*p == L('!') || *p == L('^'))) + ++p; + + c = *p++; + for (;;) + { + /* Initialize cstart and cend in case `-' is the last + character of the pattern. */ + cstart = cend = c; + forcecoll = 0; + + /* POSIX.2 equivalence class: [=c=]. See POSIX.2 2.8.3.2. Find + the end of the equivalence class, move the pattern pointer past + it, and check for equivalence. XXX - this handles only + single-character equivalence classes, which is wrong, or at + least incomplete. */ + if (c == L('[') && *p == L('=') && p[2] == L('=') && p[3] == L(']')) + { + pc = FOLD (p[1]); + p += 4; + if (COLLEQUIV (test, pc)) + { +/*[*/ /* Move past the closing `]', since the first thing we do at + the `matched:' label is back p up one. */ + p++; + goto matched; + } + else + { + c = *p++; + if (c == L('\0')) + return ((test == L('[')) ? savep : (CHAR *)0); /*]*/ + c = FOLD (c); + continue; + } + } + + /* POSIX.2 character class expression. See POSIX.2 2.8.3.2. */ + if (c == L('[') && *p == L(':')) + { + CHAR *close, *ccname; + + pc = 0; /* make sure invalid char classes don't match. */ + /* Find end of character class name */ + for (close = p + 1; *close != '\0'; close++) + if (*close == L(':') && *(close+1) == L(']')) + break; + + if (*close != L('\0')) + { + ccname = (CHAR *)malloc ((close - p) * sizeof (CHAR)); + if (ccname == 0) + pc = 0; + else + { + bcopy (p + 1, ccname, (close - p - 1) * sizeof (CHAR)); + *(ccname + (close - p - 1)) = L('\0'); + /* As a result of a POSIX discussion, char class names are + allowed to be quoted (?) */ + DEQUOTE_PATHNAME (ccname); + pc = IS_CCLASS (orig_test, (XCHAR *)ccname); + } + if (pc == -1) + { + /* CCNAME is not a valid character class in the current + locale. In addition to noting no match (pc = 0), we have + a choice about what to do with the invalid charclass. + Posix leaves the behavior unspecified, but we're going + to skip over the charclass and keep going instead of + testing ORIG_TEST against each character in the class + string. If we don't want to do that, take out the update + of P. */ + pc = 0; + p = close + 2; + } + else + p = close + 2; /* move past the closing `]' */ + + free (ccname); + } + + if (pc) + { +/*[*/ /* Move past the closing `]', since the first thing we do at + the `matched:' label is back p up one. */ + p++; + goto matched; + } + else + { + /* continue the loop here, since this expression can't be + the first part of a range expression. */ + c = *p++; + if (c == L('\0')) + return ((test == L('[')) ? savep : (CHAR *)0); + else if (c == L(']')) + break; + c = FOLD (c); + continue; + } + } + + /* POSIX.2 collating symbols. See POSIX.2 2.8.3.2. Find the end of + the symbol name, make sure it is terminated by `.]', translate + the name to a character using the external table, and do the + comparison. */ + if (c == L('[') && *p == L('.')) + { + p = PARSE_COLLSYM (p, &pc); + /* An invalid collating symbol cannot be the first point of a + range. If it is, we set cstart to one greater than `test', + so any comparisons later will fail. */ + cstart = (pc == INVALID) ? test + 1 : pc; + forcecoll = 1; + } + + if (!(flags & FNM_NOESCAPE) && c == L('\\')) + { + if (*p == '\0') + return (CHAR *)0; + cstart = cend = *p++; + } + + cstart = cend = FOLD (cstart); + isrange = 0; + + /* POSIX.2 2.8.3.1.2 says: `An expression containing a `[' that + is not preceded by a backslash and is not part of a bracket + expression produces undefined results.' This implementation + treats the `[' as just a character to be matched if there is + not a closing `]'. */ + if (c == L('\0')) + return ((test == L('[')) ? savep : (CHAR *)0); + + c = *p++; + c = FOLD (c); + + if (c == L('\0')) + return ((test == L('[')) ? savep : (CHAR *)0); + + if ((flags & FNM_PATHNAME) && c == L('/')) + /* [/] can never match when matching a pathname. */ + return (CHAR *)0; + + /* This introduces a range, unless the `-' is the last + character of the class. Find the end of the range + and move past it. */ + if (c == L('-') && *p != L(']')) + { + cend = *p++; + if (!(flags & FNM_NOESCAPE) && cend == L('\\')) + cend = *p++; + if (cend == L('\0')) + return (CHAR *)0; + if (cend == L('[') && *p == L('.')) + { + p = PARSE_COLLSYM (p, &pc); + /* An invalid collating symbol cannot be the second part of a + range expression. If we get one, we set cend to one fewer + than the test character to make sure the range test fails. */ + cend = (pc == INVALID) ? test - 1 : pc; + forcecoll = 1; + } + cend = FOLD (cend); + + c = *p++; + + /* POSIX.2 2.8.3.2: ``The ending range point shall collate + equal to or higher than the starting range point; otherwise + the expression shall be treated as invalid.'' Note that this + applies to only the range expression; the rest of the bracket + expression is still checked for matches. */ + if (RANGECMP (cstart, cend, forcecoll) > 0) + { + if (c == L(']')) + break; + c = FOLD (c); + continue; + } + isrange = 1; + } + + if (isrange == 0 && test == cstart) + goto matched; + if (isrange && RANGECMP (test, cstart, forcecoll) >= 0 && RANGECMP (test, cend, forcecoll) <= 0) + goto matched; + + if (c == L(']')) + break; + } + /* No match. */ + return (!not ? (CHAR *)0 : p); + +matched: + /* Skip the rest of the [...] that already matched. */ + c = *--p; + brcnt = 1; + brchrp = 0; + while (brcnt > 0) + { + int oc; + + /* A `[' without a matching `]' is just another character to match. */ + if (c == L('\0')) + return ((test == L('[')) ? savep : (CHAR *)0); + + oc = c; + c = *p++; + if (c == L('[') && (*p == L('=') || *p == L(':') || *p == L('.'))) + { + brcnt++; + brchrp = p++; /* skip over the char after the left bracket */ + if ((c = *p) == L('\0')) + return ((test == L('[')) ? savep : (CHAR *)0); + /* If *brchrp == ':' we should check that the rest of the characters + form a valid character class name. We don't do that yet, but we + keep BRCHRP in case we want to. */ + } + /* We only want to check brchrp if we set it above. */ + else if (c == L(']') && brcnt > 1 && brchrp != 0 && oc == *brchrp) + { + brcnt--; + brchrp = 0; /* just in case */ + } + /* Left bracket loses its special meaning inside a bracket expression. + It is only valid when followed by a `.', `=', or `:', which we check + for above. Technically the right bracket can appear in a collating + symbol, so we check for that here. Otherwise, it terminates the + bracket expression. */ + else if (c == L(']') && (brchrp == 0 || *brchrp != L('.')) && brcnt >= 1) + brcnt = 0; + else if (!(flags & FNM_NOESCAPE) && c == L('\\')) + { + if (*p == '\0') + return (CHAR *)0; + /* XXX 1003.2d11 is unclear if this is right. */ + ++p; + } + } + return (not ? (CHAR *)0 : p); +} + +#if defined (EXTENDED_GLOB) +/* ksh-like extended pattern matching: + + [?*+@!](pat-list) + + where pat-list is a list of one or patterns separated by `|'. Operation + is as follows: + + ?(patlist) match zero or one of the given patterns + *(patlist) match zero or more of the given patterns + +(patlist) match one or more of the given patterns + @(patlist) match exactly one of the given patterns + !(patlist) match anything except one of the given patterns +*/ + +/* Scan a pattern starting at STRING and ending at END, keeping track of + embedded () and []. If DELIM is 0, we scan until a matching `)' + because we're scanning a `patlist'. Otherwise, we scan until we see + DELIM. In all cases, we never scan past END. The return value is the + first character after the matching DELIM or NULL if the pattern is + empty or invalid. */ +/*static*/ CHAR * +PATSCAN (string, end, delim) + CHAR *string, *end; + INT delim; +{ + int pnest, bnest, skip; + INT cchar; + CHAR *s, c, *bfirst; + + pnest = bnest = skip = 0; + cchar = 0; + bfirst = NULL; + + if (string == end) + return (NULL); + + for (s = string; c = *s; s++) + { + if (s >= end) + return (s); + if (skip) + { + skip = 0; + continue; + } + switch (c) + { + case L('\\'): + skip = 1; + break; + + case L('\0'): + return ((CHAR *)NULL); + + /* `[' is not special inside a bracket expression, but it may + introduce one of the special POSIX bracket expressions + ([.SYM.], [=c=], [: ... :]) that needs special handling. */ + case L('['): + if (bnest == 0) + { + bfirst = s + 1; + if (*bfirst == L('!') || *bfirst == L('^')) + bfirst++; + bnest++; + } + else if (s[1] == L(':') || s[1] == L('.') || s[1] == L('=')) + cchar = s[1]; + break; + + /* `]' is not special if it's the first char (after a leading `!' + or `^') in a bracket expression or if it's part of one of the + special POSIX bracket expressions ([.SYM.], [=c=], [: ... :]) */ + case L(']'): + if (bnest) + { + if (cchar && s[-1] == cchar) + cchar = 0; + else if (s != bfirst) + { + bnest--; + bfirst = 0; + } + } + break; + + case L('('): + if (bnest == 0) + pnest++; + break; + + case L(')'): + if (bnest == 0 && pnest-- <= 0) + return ++s; + break; + + case L('|'): + if (bnest == 0 && pnest == 0 && delim == L('|')) + return ++s; + break; + } + } + + return (NULL); +} + +/* Return 0 if dequoted pattern matches S in the current locale. */ +static int +STRCOMPARE (p, pe, s, se) + CHAR *p, *pe, *s, *se; +{ + int ret; + CHAR c1, c2; + int l1, l2; + + l1 = pe - p; + l2 = se - s; + + if (l1 != l2) + return (FNM_NOMATCH); /* unequal lengths, can't be identical */ + + c1 = *pe; + c2 = *se; + + if (c1 != 0) + *pe = '\0'; + if (c2 != 0) + *se = '\0'; + +#if HAVE_MULTIBYTE || defined (HAVE_STRCOLL) + ret = STRCOLL ((XCHAR *)p, (XCHAR *)s); +#else + ret = STRCMP ((XCHAR *)p, (XCHAR *)s); +#endif + + if (c1 != 0) + *pe = c1; + if (c2 != 0) + *se = c2; + + return (ret == 0 ? ret : FNM_NOMATCH); +} + +/* Match a ksh extended pattern specifier. Return FNM_NOMATCH on failure or + 0 on success. This is handed the entire rest of the pattern and string + the first time an extended pattern specifier is encountered, so it calls + gmatch recursively. */ +static int +EXTMATCH (xc, s, se, p, pe, flags) + INT xc; /* select which operation */ + CHAR *s, *se; + CHAR *p, *pe; + int flags; +{ + CHAR *prest; /* pointer to rest of pattern */ + CHAR *psub; /* pointer to sub-pattern */ + CHAR *pnext; /* pointer to next sub-pattern */ + CHAR *srest; /* pointer to rest of string */ + int m1, m2, xflags; /* xflags = flags passed to recursive matches */ + +#if DEBUG_MATCHING +fprintf(stderr, "extmatch: xc = %c\n", xc); +fprintf(stderr, "extmatch: s = %s; se = %s\n", s, se); +fprintf(stderr, "extmatch: p = %s; pe = %s\n", p, pe); +fprintf(stderr, "extmatch: flags = %d\n", flags); +#endif + + prest = PATSCAN (p + (*p == L('(')), pe, 0); /* ) */ + if (prest == 0) + /* If PREST is 0, we failed to scan a valid pattern. In this + case, we just want to compare the two as strings. */ + return (STRCOMPARE (p - 1, pe, s, se)); + + switch (xc) + { + case L('+'): /* match one or more occurrences */ + case L('*'): /* match zero or more occurrences */ + /* If we can get away with no matches, don't even bother. Just + call GMATCH on the rest of the pattern and return success if + it succeeds. */ + if (xc == L('*') && (GMATCH (s, se, prest, pe, NULL, flags) == 0)) + return 0; + + /* OK, we have to do this the hard way. First, we make sure one of + the subpatterns matches, then we try to match the rest of the + string. */ + for (psub = p + 1; ; psub = pnext) + { + pnext = PATSCAN (psub, pe, L('|')); + for (srest = s; srest <= se; srest++) + { + /* Match this substring (S -> SREST) against this + subpattern (psub -> pnext - 1) */ + m1 = GMATCH (s, srest, psub, pnext - 1, NULL, flags) == 0; + /* OK, we matched a subpattern, so make sure the rest of the + string matches the rest of the pattern. Also handle + multiple matches of the pattern. */ + if (m1) + { + /* if srest > s, we are not at start of string */ + xflags = (srest > s) ? (flags & ~(FNM_PERIOD|FNM_DOTDOT)) : flags; + m2 = (GMATCH (srest, se, prest, pe, NULL, xflags) == 0) || + (s != srest && GMATCH (srest, se, p - 1, pe, NULL, xflags) == 0); + } + if (m1 && m2) + return (0); + } + if (pnext == prest) + break; + } + return (FNM_NOMATCH); + + case L('?'): /* match zero or one of the patterns */ + case L('@'): /* match one (or more) of the patterns */ + /* If we can get away with no matches, don't even bother. Just + call gmatch on the rest of the pattern and return success if + it succeeds. */ + if (xc == L('?') && (GMATCH (s, se, prest, pe, NULL, flags) == 0)) + return 0; + + /* OK, we have to do this the hard way. First, we see if one of + the subpatterns matches, then, if it does, we try to match the + rest of the string. */ + for (psub = p + 1; ; psub = pnext) + { + pnext = PATSCAN (psub, pe, L('|')); + srest = (prest == pe) ? se : s; + for ( ; srest <= se; srest++) + { + /* if srest > s, we are not at start of string */ + xflags = (srest > s) ? (flags & ~(FNM_PERIOD|FNM_DOTDOT)) : flags; + if (GMATCH (s, srest, psub, pnext - 1, NULL, flags) == 0 && + GMATCH (srest, se, prest, pe, NULL, xflags) == 0) + return (0); + } + if (pnext == prest) + break; + } + return (FNM_NOMATCH); + + case '!': /* match anything *except* one of the patterns */ + for (srest = s; srest <= se; srest++) + { + m1 = 0; + for (psub = p + 1; ; psub = pnext) + { + pnext = PATSCAN (psub, pe, L('|')); + /* If one of the patterns matches, just bail immediately. */ + if (m1 = (GMATCH (s, srest, psub, pnext - 1, NULL, flags) == 0)) + break; + if (pnext == prest) + break; + } + + /* If nothing matched, but the string starts with a period and we + need to match periods explicitly, don't return this as a match, + even for negation. */ + if (m1 == 0 && (flags & FNM_PERIOD) && *s == '.') + return (FNM_NOMATCH); + + if (m1 == 0 && (flags & FNM_DOTDOT) && + (SDOT_OR_DOTDOT (s) || + ((flags & FNM_PATHNAME) && s[-1] == L('/') && PDOT_OR_DOTDOT(s)))) + return (FNM_NOMATCH); + + /* if srest > s, we are not at start of string */ + xflags = (srest > s) ? (flags & ~(FNM_PERIOD|FNM_DOTDOT)) : flags; + if (m1 == 0 && GMATCH (srest, se, prest, pe, NULL, xflags) == 0) + return (0); + } + return (FNM_NOMATCH); + } + + return (FNM_NOMATCH); +} +#endif /* EXTENDED_GLOB */ + +#undef IS_CCLASS +#undef FOLD +#undef CHAR +#undef U_CHAR +#undef XCHAR +#undef INT +#undef INVALID +#undef FCT +#undef GMATCH +#undef COLLSYM +#undef PARSE_COLLSYM +#undef PATSCAN +#undef STRCOMPARE +#undef EXTMATCH +#undef DEQUOTE_PATHNAME +#undef STRUCT +#undef BRACKMATCH +#undef STRCHR +#undef STRCOLL +#undef STRLEN +#undef STRCMP +#undef MEMCHR +#undef COLLEQUIV +#undef RANGECMP +#undef ISDIRSEP +#undef PATHSEP +#undef PDOT_OR_DOTDOT +#undef SDOT_OR_DOTDOT +#undef L diff --git a/lib/glob/smatch.c b/lib/glob/smatch.c new file mode 100644 index 0000000..379c2d2 --- /dev/null +++ b/lib/glob/smatch.c @@ -0,0 +1,638 @@ +/* strmatch.c -- ksh-like extended pattern matching for the shell and filename + globbing. */ + +/* Copyright (C) 1991-2021 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne Again SHell. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <config.h> + +#include <stdio.h> /* for debugging */ + +#include "strmatch.h" +#include <chartypes.h> + +#include "bashansi.h" +#include "shmbutil.h" +#include "xmalloc.h" + +#include <errno.h> + +#if !defined (errno) +extern int errno; +#endif + +#if FNMATCH_EQUIV_FALLBACK +/* We don't include <fnmatch.h> in order to avoid namespace collisions; the + internal strmatch still uses the FNM_ constants. */ +extern int fnmatch (const char *, const char *, int); +#endif + +/* First, compile `sm_loop.c' for single-byte characters. */ +#define CHAR unsigned char +#define U_CHAR unsigned char +#define XCHAR char +#define INT int +#define L(CS) CS +#define INVALID -1 + +#undef STREQ +#undef STREQN +#define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0) +#define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0) + +#ifndef GLOBASCII_DEFAULT +# define GLOBASCII_DEFAULT 0 +#endif + +int glob_asciirange = GLOBASCII_DEFAULT; + +#if FNMATCH_EQUIV_FALLBACK +/* Construct a string w1 = "c1" and a pattern w2 = "[[=c2=]]" and pass them + to fnmatch to see if wide characters c1 and c2 collate as members of the + same equivalence class. We can't really do this portably any other way */ +static int +_fnmatch_fallback (s, p) + int s, p; /* string char, patchar */ +{ + char s1[2]; /* string */ + char s2[8]; /* constructed pattern */ + + s1[0] = (unsigned char)s; + s1[1] = '\0'; + + /* reconstruct the pattern */ + s2[0] = s2[1] = '['; + s2[2] = '='; + s2[3] = (unsigned char)p; + s2[4] = '='; + s2[5] = s2[6] = ']'; + s2[7] = '\0'; + + return (fnmatch ((const char *)s2, (const char *)s1, 0)); +} +#endif + +/* We use strcoll(3) for range comparisons in bracket expressions, + even though it can have unwanted side effects in locales + other than POSIX or US. For instance, in the de locale, [A-Z] matches + all characters. If GLOB_ASCIIRANGE is non-zero, and we're not forcing + the use of strcoll (e.g., for explicit collating symbols), we use + straight ordering as if in the C locale. */ + +#if defined (HAVE_STRCOLL) +/* Helper functions for collating symbol equivalence. */ + +/* Return 0 if C1 == C2 or collates equally if FORCECOLL is non-zero. */ +static int +charcmp (c1, c2, forcecoll) + int c1, c2; + int forcecoll; +{ + static char s1[2] = { ' ', '\0' }; + static char s2[2] = { ' ', '\0' }; + int ret; + + /* Eight bits only. Period. */ + c1 &= 0xFF; + c2 &= 0xFF; + + if (c1 == c2) + return (0); + + if (forcecoll == 0 && glob_asciirange) + return (c1 - c2); + + s1[0] = c1; + s2[0] = c2; + + return (strcoll (s1, s2)); +} + +static int +rangecmp (c1, c2, forcecoll) + int c1, c2; + int forcecoll; +{ + int r; + + r = charcmp (c1, c2, forcecoll); + + /* We impose a total ordering here by returning c1-c2 if charcmp returns 0 */ + if (r != 0) + return r; + return (c1 - c2); /* impose total ordering */ +} +#else /* !HAVE_STRCOLL */ +# define rangecmp(c1, c2, f) ((int)(c1) - (int)(c2)) +#endif /* !HAVE_STRCOLL */ + +#if defined (HAVE_STRCOLL) +/* Returns 1 if chars C and EQUIV collate equally in the current locale. */ +static int +collequiv (c, equiv) + int c, equiv; +{ + if (charcmp (c, equiv, 1) == 0) + return 1; + +#if FNMATCH_EQUIV_FALLBACK + return (_fnmatch_fallback (c, equiv) == 0); +#else + return 0; +#endif + +} +#else +# define collequiv(c, equiv) ((c) == (equiv)) +#endif + +#define _COLLSYM _collsym +#define __COLLSYM __collsym +#define POSIXCOLL posix_collsyms +#include "collsyms.h" + +static int +collsym (s, len) + CHAR *s; + int len; +{ + register struct _collsym *csp; + char *x; + + x = (char *)s; + for (csp = posix_collsyms; csp->name; csp++) + { + if (STREQN(csp->name, x, len) && csp->name[len] == '\0') + return (csp->code); + } + if (len == 1) + return s[0]; + return INVALID; +} + +/* unibyte character classification */ +#if !defined (isascii) && !defined (HAVE_ISASCII) +# define isascii(c) ((unsigned int)(c) <= 0177) +#endif + +enum char_class + { + CC_NO_CLASS = 0, + CC_ASCII, CC_ALNUM, CC_ALPHA, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH, + CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_WORD, CC_XDIGIT + }; + +static char const *const cclass_name[] = + { + "", + "ascii", "alnum", "alpha", "blank", "cntrl", "digit", "graph", + "lower", "print", "punct", "space", "upper", "word", "xdigit" + }; + +#define N_CHAR_CLASS (sizeof(cclass_name) / sizeof (cclass_name[0])) + +static enum char_class +is_valid_cclass (name) + const char *name; +{ + enum char_class ret; + int i; + + ret = CC_NO_CLASS; + + for (i = 1; i < N_CHAR_CLASS; i++) + { + if (STREQ (name, cclass_name[i])) + { + ret = (enum char_class)i; + break; + } + } + + return ret; +} + +static int +cclass_test (c, char_class) + int c; + enum char_class char_class; +{ + int result; + + switch (char_class) + { + case CC_ASCII: + result = isascii (c); + break; + case CC_ALNUM: + result = ISALNUM (c); + break; + case CC_ALPHA: + result = ISALPHA (c); + break; + case CC_BLANK: + result = ISBLANK (c); + break; + case CC_CNTRL: + result = ISCNTRL (c); + break; + case CC_DIGIT: + result = ISDIGIT (c); + break; + case CC_GRAPH: + result = ISGRAPH (c); + break; + case CC_LOWER: + result = ISLOWER (c); + break; + case CC_PRINT: + result = ISPRINT (c); + break; + case CC_PUNCT: + result = ISPUNCT (c); + break; + case CC_SPACE: + result = ISSPACE (c); + break; + case CC_UPPER: + result = ISUPPER (c); + break; + case CC_WORD: + result = (ISALNUM (c) || c == '_'); + break; + case CC_XDIGIT: + result = ISXDIGIT (c); + break; + default: + result = -1; + break; + } + + return result; +} + +static int +is_cclass (c, name) + int c; + const char *name; +{ + enum char_class char_class; + int result; + + char_class = is_valid_cclass (name); + if (char_class == CC_NO_CLASS) + return -1; + + result = cclass_test (c, char_class); + return (result); +} + +/* Now include `sm_loop.c' for single-byte characters. */ +/* The result of FOLD is an `unsigned char' */ +# define FOLD(c) ((flags & FNM_CASEFOLD) \ + ? TOLOWER ((unsigned char)c) \ + : ((unsigned char)c)) + +#if !defined (__CYGWIN__) +# define ISDIRSEP(c) ((c) == '/') +#else +# define ISDIRSEP(c) ((c) == '/' || (c) == '\\') +#endif /* __CYGWIN__ */ +#define PATHSEP(c) (ISDIRSEP(c) || (c) == 0) + +# define PDOT_OR_DOTDOT(s) (s[0] == '.' && (PATHSEP (s[1]) || (s[1] == '.' && PATHSEP (s[2])))) +# define SDOT_OR_DOTDOT(s) (s[0] == '.' && (s[1] == 0 || (s[1] == '.' && s[2] == 0))) + +#define FCT internal_strmatch +#define GMATCH gmatch +#define COLLSYM collsym +#define PARSE_COLLSYM parse_collsym +#define BRACKMATCH brackmatch +#define PATSCAN glob_patscan +#define STRCOMPARE strcompare +#define EXTMATCH extmatch +#define DEQUOTE_PATHNAME udequote_pathname +#define STRUCT smat_struct +#define STRCHR(S, C) strchr((S), (C)) +#define MEMCHR(S, C, N) memchr((S), (C), (N)) +#define STRCOLL(S1, S2) strcoll((S1), (S2)) +#define STRLEN(S) strlen(S) +#define STRCMP(S1, S2) strcmp((S1), (S2)) +#define RANGECMP(C1, C2, F) rangecmp((C1), (C2), (F)) +#define COLLEQUIV(C1, C2) collequiv((C1), (C2)) +#define CTYPE_T enum char_class +#define IS_CCLASS(C, S) is_cclass((C), (S)) +#include "sm_loop.c" + +#if HANDLE_MULTIBYTE + +# define CHAR wchar_t +# define U_CHAR wint_t +# define XCHAR wchar_t +# define INT wint_t +# define L(CS) L##CS +# define INVALID WEOF + +# undef STREQ +# undef STREQN +# define STREQ(s1, s2) ((wcscmp (s1, s2) == 0)) +# define STREQN(a, b, n) ((a)[0] == (b)[0] && wcsncmp(a, b, n) == 0) + +extern char *mbsmbchar PARAMS((const char *)); + +#if FNMATCH_EQUIV_FALLBACK +/* Construct a string w1 = "c1" and a pattern w2 = "[[=c2=]]" and pass them + to fnmatch to see if wide characters c1 and c2 collate as members of the + same equivalence class. We can't really do this portably any other way */ +static int +_fnmatch_fallback_wc (c1, c2) + wchar_t c1, c2; /* string char, patchar */ +{ + char w1[MB_LEN_MAX+1]; /* string */ + char w2[MB_LEN_MAX+8]; /* constructed pattern */ + int l1, l2; + + l1 = wctomb (w1, c1); + if (l1 == -1) + return (2); + w1[l1] = '\0'; + + /* reconstruct the pattern */ + w2[0] = w2[1] = '['; + w2[2] = '='; + l2 = wctomb (w2+3, c2); + if (l2 == -1) + return (2); + w2[l2+3] = '='; + w2[l2+4] = w2[l2+5] = ']'; + w2[l2+6] = '\0'; + + return (fnmatch ((const char *)w2, (const char *)w1, 0)); +} +#endif + +static int +charcmp_wc (c1, c2, forcecoll) + wint_t c1, c2; + int forcecoll; +{ + static wchar_t s1[2] = { L' ', L'\0' }; + static wchar_t s2[2] = { L' ', L'\0' }; + int r; + + if (c1 == c2) + return 0; + + if (forcecoll == 0 && glob_asciirange && c1 <= UCHAR_MAX && c2 <= UCHAR_MAX) + return ((int)(c1 - c2)); + + s1[0] = c1; + s2[0] = c2; + + return (wcscoll (s1, s2)); +} + +static int +rangecmp_wc (c1, c2, forcecoll) + wint_t c1, c2; + int forcecoll; +{ + int r; + + r = charcmp_wc (c1, c2, forcecoll); + + /* We impose a total ordering here by returning c1-c2 if charcmp returns 0, + as we do above in the single-byte case. */ + if (r != 0 || forcecoll) + return r; + return ((int)(c1 - c2)); /* impose total ordering */ +} + +/* Returns 1 if wide chars C and EQUIV collate equally in the current locale. */ +static int +collequiv_wc (c, equiv) + wint_t c, equiv; +{ + wchar_t s, p; + + if (charcmp_wc (c, equiv, 1) == 0) + return 1; + +#if FNMATCH_EQUIV_FALLBACK +/* We check explicitly for success (fnmatch returns 0) to avoid problems if + our local definition of FNM_NOMATCH (strmatch.h) doesn't match the + system's (fnmatch.h). We don't care about error return values here. */ + + s = c; + p = equiv; + return (_fnmatch_fallback_wc (s, p) == 0); +#else + return 0; +#endif +} + +/* Helper function for collating symbol. */ +# define _COLLSYM _collwcsym +# define __COLLSYM __collwcsym +# define POSIXCOLL posix_collwcsyms +# include "collsyms.h" + +static wint_t +collwcsym (s, len) + wchar_t *s; + int len; +{ + register struct _collwcsym *csp; + + for (csp = posix_collwcsyms; csp->name; csp++) + { + if (STREQN(csp->name, s, len) && csp->name[len] == L'\0') + return (csp->code); + } + if (len == 1) + return s[0]; + return INVALID; +} + +static int +is_wcclass (wc, name) + wint_t wc; + wchar_t *name; +{ + char *mbs; + mbstate_t state; + size_t mbslength; + wctype_t desc; + int want_word; + + if ((wctype ("ascii") == (wctype_t)0) && (wcscmp (name, L"ascii") == 0)) + { + int c; + + if ((c = wctob (wc)) == EOF) + return 0; + else + return (c <= 0x7F); + } + + want_word = (wcscmp (name, L"word") == 0); + if (want_word) + name = L"alnum"; + + memset (&state, '\0', sizeof (mbstate_t)); + mbs = (char *) malloc (wcslen(name) * MB_CUR_MAX + 1); + if (mbs == 0) + return -1; + mbslength = wcsrtombs (mbs, (const wchar_t **)&name, (wcslen(name) * MB_CUR_MAX + 1), &state); + + if (mbslength == (size_t)-1 || mbslength == (size_t)-2) + { + free (mbs); + return -1; + } + desc = wctype (mbs); + free (mbs); + + if (desc == (wctype_t)0) + return -1; + + if (want_word) + return (iswctype (wc, desc) || wc == L'_'); + else + return (iswctype (wc, desc)); +} + +/* Return 1 if there are no char class [:class:] expressions (degenerate case) + or only posix-specified (C locale supported) char class expressions in + PATTERN. These are the ones where it's safe to punt to the single-byte + code, since wide character support allows locale-defined char classes. + This only uses single-byte code, but is only needed to support multibyte + locales. */ +static int +posix_cclass_only (pattern) + char *pattern; +{ + char *p, *p1; + char cc[16]; /* sufficient for all valid posix char class names */ + enum char_class valid; + + p = pattern; + while (p = strchr (p, '[')) + { + if (p[1] != ':') + { + p++; + continue; + } + p += 2; /* skip past "[:" */ + /* Find end of char class expression */ + for (p1 = p; *p1; p1++) + if (*p1 == ':' && p1[1] == ']') + break; + if (*p1 == 0) /* no char class expression found */ + break; + /* Find char class name and validate it against posix char classes */ + if ((p1 - p) >= sizeof (cc)) + return 0; + bcopy (p, cc, p1 - p); + cc[p1 - p] = '\0'; + valid = is_valid_cclass (cc); + if (valid == CC_NO_CLASS) + return 0; /* found unrecognized char class name */ + + p = p1 + 2; /* found posix char class name */ + } + + return 1; /* no char class names or only posix */ +} + +/* Now include `sm_loop.c' for multibyte characters. */ +#define FOLD(c) ((flags & FNM_CASEFOLD) && iswupper (c) ? towlower (c) : (c)) + +# if !defined (__CYGWIN__) +# define ISDIRSEP(c) ((c) == L'/') +# else +# define ISDIRSEP(c) ((c) == L'/' || (c) == L'\\') +# endif /* __CYGWIN__ */ +# define PATHSEP(c) (ISDIRSEP(c) || (c) == L'\0') + +# define PDOT_OR_DOTDOT(w) (w[0] == L'.' && (PATHSEP(w[1]) || (w[1] == L'.' && PATHSEP(w[2])))) +# define SDOT_OR_DOTDOT(w) (w[0] == L'.' && (w[1] == L'\0' || (w[1] == L'.' && w[2] == L'\0'))) + +#define FCT internal_wstrmatch +#define GMATCH gmatch_wc +#define COLLSYM collwcsym +#define PARSE_COLLSYM parse_collwcsym +#define BRACKMATCH brackmatch_wc +#define PATSCAN glob_patscan_wc +#define STRCOMPARE wscompare +#define EXTMATCH extmatch_wc +#define DEQUOTE_PATHNAME wcdequote_pathname +#define STRUCT wcsmat_struct +#define STRCHR(S, C) wcschr((S), (C)) +#define MEMCHR(S, C, N) wmemchr((S), (C), (N)) +#define STRCOLL(S1, S2) wcscoll((S1), (S2)) +#define STRLEN(S) wcslen(S) +#define STRCMP(S1, S2) wcscmp((S1), (S2)) +#define RANGECMP(C1, C2, F) rangecmp_wc((C1), (C2), (F)) +#define COLLEQUIV(C1, C2) collequiv_wc((C1), (C2)) +#define CTYPE_T enum char_class +#define IS_CCLASS(C, S) is_wcclass((C), (S)) +#include "sm_loop.c" + +#endif /* HAVE_MULTIBYTE */ + +int +xstrmatch (pattern, string, flags) + char *pattern; + char *string; + int flags; +{ +#if HANDLE_MULTIBYTE + int ret; + size_t n; + wchar_t *wpattern, *wstring; + size_t plen, slen, mplen, mslen; + + if (MB_CUR_MAX == 1) + return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); + + if (mbsmbchar (string) == 0 && mbsmbchar (pattern) == 0 && posix_cclass_only (pattern)) + return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); + + n = xdupmbstowcs (&wpattern, NULL, pattern); + if (n == (size_t)-1 || n == (size_t)-2) + return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); + + n = xdupmbstowcs (&wstring, NULL, string); + if (n == (size_t)-1 || n == (size_t)-2) + { + free (wpattern); + return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); + } + + ret = internal_wstrmatch (wpattern, wstring, flags); + + free (wpattern); + free (wstring); + + return ret; +#else + return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); +#endif /* !HANDLE_MULTIBYTE */ +} diff --git a/lib/glob/strmatch.c b/lib/glob/strmatch.c new file mode 100644 index 0000000..5b7b49f --- /dev/null +++ b/lib/glob/strmatch.c @@ -0,0 +1,79 @@ +/* strmatch.c -- ksh-like extended pattern matching for the shell and filename + globbing. */ + +/* Copyright (C) 1991-2020 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne Again SHell. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <config.h> + +#include "stdc.h" +#include "strmatch.h" + +extern int xstrmatch PARAMS((char *, char *, int)); +#if defined (HANDLE_MULTIBYTE) +extern int internal_wstrmatch PARAMS((wchar_t *, wchar_t *, int)); +#endif + +int +strmatch (pattern, string, flags) + char *pattern; + char *string; + int flags; +{ + if (string == 0 || pattern == 0) + return FNM_NOMATCH; + + return (xstrmatch (pattern, string, flags)); +} + +#if defined (HANDLE_MULTIBYTE) +int +wcsmatch (wpattern, wstring, flags) + wchar_t *wpattern; + wchar_t *wstring; + int flags; +{ + if (wstring == 0 || wpattern == 0) + return (FNM_NOMATCH); + + return (internal_wstrmatch (wpattern, wstring, flags)); +} +#endif + +#ifdef TEST +main (c, v) + int c; + char **v; +{ + char *string, *pat; + + string = v[1]; + pat = v[2]; + + if (strmatch (pat, string, 0) == 0) + { + printf ("%s matches %s\n", string, pat); + exit (0); + } + else + { + printf ("%s does not match %s\n", string, pat); + exit (1); + } +} +#endif diff --git a/lib/glob/strmatch.h b/lib/glob/strmatch.h new file mode 100644 index 0000000..d6a6b0f --- /dev/null +++ b/lib/glob/strmatch.h @@ -0,0 +1,65 @@ +/* Copyright (C) 1991-2021 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne-Again SHell. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifndef _STRMATCH_H +#define _STRMATCH_H 1 + +#include <config.h> + +#include "stdc.h" + +/* We #undef these before defining them because some losing systems + (HP-UX A.08.07 for example) define these in <unistd.h>. */ +#undef FNM_PATHNAME +#undef FNM_NOESCAPE +#undef FNM_PERIOD + +/* Bits set in the FLAGS argument to `strmatch'. */ + +/* standard flags are like fnmatch(3). */ +#define FNM_PATHNAME (1 << 0) /* No wildcard can ever match `/'. */ +#define FNM_NOESCAPE (1 << 1) /* Backslashes don't quote special chars. */ +#define FNM_PERIOD (1 << 2) /* Leading `.' is matched only explicitly. */ + +/* extended flags not available in most libc fnmatch versions, but we undef + them to avoid any possible warnings. */ +#undef FNM_LEADING_DIR +#undef FNM_CASEFOLD +#undef FNM_EXTMATCH + +#define FNM_LEADING_DIR (1 << 3) /* Ignore `/...' after a match. */ +#define FNM_CASEFOLD (1 << 4) /* Compare without regard to case. */ +#define FNM_EXTMATCH (1 << 5) /* Use ksh-like extended matching. */ + +#define FNM_FIRSTCHAR (1 << 6) /* Match only the first character */ +#define FNM_DOTDOT (1 << 7) /* force `.' and `..' to match explicitly even if FNM_PERIOD not supplied. */ + +/* Value returned by `strmatch' if STRING does not match PATTERN. */ +#undef FNM_NOMATCH + +#define FNM_NOMATCH 1 + +/* Match STRING against the filename pattern PATTERN, + returning zero if it matches, FNM_NOMATCH if not. */ +extern int strmatch PARAMS((char *, char *, int)); + +#if HANDLE_MULTIBYTE +extern int wcsmatch PARAMS((wchar_t *, wchar_t *, int)); +#endif + +#endif /* _STRMATCH_H */ diff --git a/lib/glob/xmbsrtowcs.c b/lib/glob/xmbsrtowcs.c new file mode 100644 index 0000000..17250c3 --- /dev/null +++ b/lib/glob/xmbsrtowcs.c @@ -0,0 +1,523 @@ +/* xmbsrtowcs.c -- replacement function for mbsrtowcs */ + +/* Copyright (C) 2002-2020 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne Again SHell. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see <http://www.gnu.org/licenses/>. +*/ + +/* Ask for GNU extensions to get extern declaration for mbsnrtowcs if + available via glibc. */ +#ifndef _GNU_SOURCE +# define _GNU_SOURCE 1 +#endif + +#include <config.h> + +#include <bashansi.h> + +/* <wchar.h>, <wctype.h> and <stdlib.h> are included in "shmbutil.h". + If <wchar.h>, <wctype.h>, mbsrtowcs(), exist, HANDLE_MULTIBYTE + is defined as 1. */ +#include <shmbutil.h> + +#if HANDLE_MULTIBYTE + +#include <errno.h> +#if !defined (errno) +extern int errno; +#endif + +#define WSBUF_INC 32 + +#ifndef FREE +# define FREE(x) do { if (x) free (x); } while (0) +#endif + +#if ! HAVE_STRCHRNUL +extern char *strchrnul PARAMS((const char *, int)); +#endif + +/* On some locales (ex. ja_JP.sjis), mbsrtowc doesn't convert 0x5c to U<0x5c>. + So, this function is made for converting 0x5c to U<0x5c>. */ + +static mbstate_t local_state; +static int local_state_use = 0; + +size_t +xmbsrtowcs (dest, src, len, pstate) + wchar_t *dest; + const char **src; + size_t len; + mbstate_t *pstate; +{ + mbstate_t *ps; + size_t mblength, wclength, n; + + ps = pstate; + if (pstate == NULL) + { + if (!local_state_use) + { + memset (&local_state, '\0', sizeof(mbstate_t)); + local_state_use = 1; + } + ps = &local_state; + } + + n = strlen (*src); + + if (dest == NULL) + { + wchar_t *wsbuf; + const char *mbs; + mbstate_t psbuf; + + /* It doesn't matter if malloc fails here, since mbsrtowcs should do + the right thing with a NULL first argument. */ + wsbuf = (wchar_t *) malloc ((n + 1) * sizeof(wchar_t)); + mbs = *src; + psbuf = *ps; + + wclength = mbsrtowcs (wsbuf, &mbs, n, &psbuf); + + if (wsbuf) + free (wsbuf); + return wclength; + } + + for (wclength = 0; wclength < len; wclength++, dest++) + { + if (mbsinit(ps)) + { + if (**src == '\0') + { + *dest = L'\0'; + *src = NULL; + return (wclength); + } + else if (**src == '\\') + { + *dest = L'\\'; + mblength = 1; + } + else + mblength = mbrtowc(dest, *src, n, ps); + } + else + mblength = mbrtowc(dest, *src, n, ps); + + /* Cannot convert multibyte character to wide character. */ + if (mblength == (size_t)-1 || mblength == (size_t)-2) + return (size_t)-1; + + *src += mblength; + n -= mblength; + + /* The multibyte string has been completely converted, + including the terminating '\0'. */ + if (*dest == L'\0') + { + *src = NULL; + break; + } + } + + return (wclength); +} + +#if HAVE_MBSNRTOWCS +/* Convert a multibyte string to a wide character string. Memory for the + new wide character string is obtained with malloc. + + Fast multiple-character version of xdupmbstowcs used when the indices are + not required and mbsnrtowcs is available. */ + +static size_t +xdupmbstowcs2 (destp, src) + wchar_t **destp; /* Store the pointer to the wide character string */ + const char *src; /* Multibyte character string */ +{ + const char *p; /* Conversion start position of src */ + wchar_t *wsbuf; /* Buffer for wide characters. */ + size_t wsbuf_size; /* Size of WSBUF */ + size_t wcnum; /* Number of wide characters in WSBUF */ + mbstate_t state; /* Conversion State */ + size_t n, wcslength; /* Number of wide characters produced by the conversion. */ + const char *end_or_backslash; + size_t nms; /* Number of multibyte characters to convert at one time. */ + mbstate_t tmp_state; + const char *tmp_p; + + memset (&state, '\0', sizeof(mbstate_t)); + + wsbuf_size = 0; + wsbuf = NULL; + + p = src; + wcnum = 0; + do + { + end_or_backslash = strchrnul(p, '\\'); + nms = end_or_backslash - p; + if (*end_or_backslash == '\0') + nms++; + + /* Compute the number of produced wide-characters. */ + tmp_p = p; + tmp_state = state; + + if (nms == 0 && *p == '\\') /* special initial case */ + nms = wcslength = 1; + else + wcslength = mbsnrtowcs (NULL, &tmp_p, nms, 0, &tmp_state); + + if (wcslength == 0) + { + tmp_p = p; /* will need below */ + tmp_state = state; + wcslength = 1; /* take a single byte */ + } + + /* Conversion failed. */ + if (wcslength == (size_t)-1) + { + free (wsbuf); + *destp = NULL; + return (size_t)-1; + } + + /* Resize the buffer if it is not large enough. */ + if (wsbuf_size < wcnum+wcslength+1) /* 1 for the L'\0' or the potential L'\\' */ + { + wchar_t *wstmp; + + while (wsbuf_size < wcnum+wcslength+1) /* 1 for the L'\0' or the potential L'\\' */ + wsbuf_size += WSBUF_INC; + + wstmp = (wchar_t *) realloc (wsbuf, wsbuf_size * sizeof (wchar_t)); + if (wstmp == NULL) + { + free (wsbuf); + *destp = NULL; + return (size_t)-1; + } + wsbuf = wstmp; + } + + /* Perform the conversion. This is assumed to return 'wcslength'. + It may set 'p' to NULL. */ + n = mbsnrtowcs(wsbuf+wcnum, &p, nms, wsbuf_size-wcnum, &state); + + if (n == 0 && p == 0) + { + wsbuf[wcnum] = L'\0'; + break; + } + + /* Compensate for taking single byte on wcs conversion failure above. */ + if (wcslength == 1 && (n == 0 || n == (size_t)-1)) + { + state = tmp_state; + p = tmp_p; + wsbuf[wcnum] = *p; + if (*p == 0) + break; + else + { + wcnum++; p++; + } + } + else + wcnum += wcslength; + + if (mbsinit (&state) && (p != NULL) && (*p == '\\')) + { + wsbuf[wcnum++] = L'\\'; + p++; + } + } + while (p != NULL); + + *destp = wsbuf; + + /* Return the length of the wide character string, not including `\0'. */ + return wcnum; +} +#endif /* HAVE_MBSNRTOWCS */ + +/* Convert a multibyte string to a wide character string. Memory for the + new wide character string is obtained with malloc. + + The return value is the length of the wide character string. Returns a + pointer to the wide character string in DESTP. If INDICESP is not NULL, + INDICESP stores the pointer to the pointer array. Each pointer is to + the first byte of each multibyte character. Memory for the pointer array + is obtained with malloc, too. + If conversion is failed, the return value is (size_t)-1 and the values + of DESTP and INDICESP are NULL. */ + +size_t +xdupmbstowcs (destp, indicesp, src) + wchar_t **destp; /* Store the pointer to the wide character string */ + char ***indicesp; /* Store the pointer to the pointer array. */ + const char *src; /* Multibyte character string */ +{ + const char *p; /* Conversion start position of src */ + wchar_t wc; /* Created wide character by conversion */ + wchar_t *wsbuf; /* Buffer for wide characters. */ + char **indices; /* Buffer for indices. */ + size_t wsbuf_size; /* Size of WSBUF */ + size_t wcnum; /* Number of wide characters in WSBUF */ + mbstate_t state; /* Conversion State */ + + /* In case SRC or DESP is NULL, conversion doesn't take place. */ + if (src == NULL || destp == NULL) + { + if (destp) + *destp = NULL; + if (indicesp) + *indicesp = NULL; + return (size_t)-1; + } + +#if HAVE_MBSNRTOWCS + if (indicesp == NULL) + return (xdupmbstowcs2 (destp, src)); +#endif + + memset (&state, '\0', sizeof(mbstate_t)); + wsbuf_size = WSBUF_INC; + + wsbuf = (wchar_t *) malloc (wsbuf_size * sizeof(wchar_t)); + if (wsbuf == NULL) + { + *destp = NULL; + if (indicesp) + *indicesp = NULL; + return (size_t)-1; + } + + indices = NULL; + if (indicesp) + { + indices = (char **) malloc (wsbuf_size * sizeof(char *)); + if (indices == NULL) + { + free (wsbuf); + *destp = NULL; + *indicesp = NULL; + return (size_t)-1; + } + } + + p = src; + wcnum = 0; + do + { + size_t mblength; /* Byte length of one multibyte character. */ + + if (mbsinit (&state)) + { + if (*p == '\0') + { + wc = L'\0'; + mblength = 1; + } + else if (*p == '\\') + { + wc = L'\\'; + mblength = 1; + } + else + mblength = mbrtowc(&wc, p, MB_LEN_MAX, &state); + } + else + mblength = mbrtowc(&wc, p, MB_LEN_MAX, &state); + + /* Conversion failed. */ + if (MB_INVALIDCH (mblength)) + { + free (wsbuf); + FREE (indices); + *destp = NULL; + if (indicesp) + *indicesp = NULL; + return (size_t)-1; + } + + ++wcnum; + + /* Resize buffers when they are not large enough. */ + if (wsbuf_size < wcnum) + { + wchar_t *wstmp; + char **idxtmp; + + wsbuf_size += WSBUF_INC; + + wstmp = (wchar_t *) realloc (wsbuf, wsbuf_size * sizeof (wchar_t)); + if (wstmp == NULL) + { + free (wsbuf); + FREE (indices); + *destp = NULL; + if (indicesp) + *indicesp = NULL; + return (size_t)-1; + } + wsbuf = wstmp; + + if (indicesp) + { + idxtmp = (char **) realloc (indices, wsbuf_size * sizeof (char *)); + if (idxtmp == NULL) + { + free (wsbuf); + free (indices); + *destp = NULL; + if (indicesp) + *indicesp = NULL; + return (size_t)-1; + } + indices = idxtmp; + } + } + + wsbuf[wcnum - 1] = wc; + if (indices) + indices[wcnum - 1] = (char *)p; + p += mblength; + } + while (MB_NULLWCH (wc) == 0); + + /* Return the length of the wide character string, not including `\0'. */ + *destp = wsbuf; + if (indicesp != NULL) + *indicesp = indices; + + return (wcnum - 1); +} + +/* Convert wide character string to multibyte character string. Treat invalid + wide characters as bytes. Used only in unusual circumstances. + + Written by Bruno Haible <bruno@clisp.org>, 2008, adapted by Chet Ramey + for use in Bash. */ + +/* Convert wide character string *SRCP to a multibyte character string and + store the result in DEST. Store at most LEN bytes in DEST. */ +size_t +xwcsrtombs (char *dest, const wchar_t **srcp, size_t len, mbstate_t *ps) +{ + const wchar_t *src; + size_t cur_max; /* XXX - locale_cur_max */ + char buf[64], *destptr, *tmp_dest; + unsigned char uc; + mbstate_t prev_state; + + cur_max = MB_CUR_MAX; + if (cur_max > sizeof (buf)) /* Holy cow. */ + return (size_t)-1; + + src = *srcp; + + if (dest != NULL) + { + destptr = dest; + + for (; len > 0; src++) + { + wchar_t wc; + size_t ret; + + wc = *src; + /* If we have room, store directly into DEST. */ + tmp_dest = destptr; + ret = wcrtomb (len >= cur_max ? destptr : buf, wc, ps); + + if (ret == (size_t)(-1)) /* XXX */ + { + /* Since this is used for globbing and other uses of filenames, + treat invalid wide character sequences as bytes. This is + intended to be symmetric with xdupmbstowcs2. */ +handle_byte: + destptr = tmp_dest; /* in case wcrtomb modified it */ + uc = wc; + ret = 1; + if (len >= cur_max) + *destptr = uc; + else + buf[0] = uc; + if (ps) + memset (ps, 0, sizeof (mbstate_t)); + } + + if (ret > cur_max) /* Holy cow */ + goto bad_input; + + if (len < ret) + break; + + if (len < cur_max) + memcpy (destptr, buf, ret); + + if (wc == 0) + { + src = NULL; + /* Here mbsinit (ps). */ + break; + } + destptr += ret; + len -= ret; + } + *srcp = src; + return destptr - dest; + } + else + { + /* Ignore dest and len, don't store *srcp at the end, and + don't clobber *ps. */ + mbstate_t state = *ps; + size_t totalcount = 0; + + for (;; src++) + { + wchar_t wc; + size_t ret; + + wc = *src; + ret = wcrtomb (buf, wc, &state); + + if (ret == (size_t)(-1)) + goto bad_input2; + if (wc == 0) + { + /* Here mbsinit (&state). */ + break; + } + totalcount += ret; + } + return totalcount; + } + +bad_input: + *srcp = src; +bad_input2: + errno = EILSEQ; + return (size_t)(-1); +} + +#endif /* HANDLE_MULTIBYTE */ |