Adding upstream version 14.5.upstream/14.5 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-04 12:15:05 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-04 12:15:05 +0000
commit: 46651ce6fe013220ed397add242004d764fc0153 (patch)
tree: 6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/test/modules/test_regex/test_regex.c
parent: Initial commit. (diff)
download: postgresql-14-upstream.tar.xz
postgresql-14-upstream.zip
1 files changed, 773 insertions, 0 deletions
diff --git a/src/test/modules/test_regex/test_regex.c b/src/test/modules/test_regex/test_regex.c
new file mode 100644
index 0000000..095751c
--- /dev/null
+++ b/src/test/modules/test_regex/test_regex.c
@@ -0,0 +1,773 @@
+/*--------------------------------------------------------------------------
+ *
+ * test_regex.c
+ *		Test harness for the regular expression package.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *		src/test/modules/test_regex/test_regex.c
+ *
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "regex/regex.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+
+PG_MODULE_MAGIC;
+
+
+/* all the options of interest for regex functions */
+typedef struct test_re_flags
+{
+	int			cflags;			/* compile flags for Spencer's regex code */
+	int			eflags;			/* execute flags for Spencer's regex code */
+	long		info;			/* expected re_info bits */
+	bool		glob;			/* do it globally (for each occurrence) */
+	bool		indices;		/* report indices not actual strings */
+	bool		partial;		/* expect partial match */
+} test_re_flags;
+
+/* cross-call state for test_regex() */
+typedef struct test_regex_ctx
+{
+	test_re_flags re_flags;		/* flags */
+	rm_detail_t details;		/* "details" from execution */
+	text	   *orig_str;		/* data string in original TEXT form */
+	int			nmatches;		/* number of places where pattern matched */
+	int			npatterns;		/* number of capturing subpatterns */
+	/* We store start char index and end+1 char index for each match */
+	/* so the number of entries in match_locs is nmatches * npatterns * 2 */
+	int		   *match_locs;		/* 0-based character indexes */
+	int			next_match;		/* 0-based index of next match to process */
+	/* workspace for build_test_match_result() */
+	Datum	   *elems;			/* has npatterns+1 elements */
+	bool	   *nulls;			/* has npatterns+1 elements */
+	pg_wchar   *wide_str;		/* wide-char version of original string */
+	char	   *conv_buf;		/* conversion buffer, if needed */
+	int			conv_bufsiz;	/* size thereof */
+} test_regex_ctx;
+
+/* Local functions */
+static void test_re_compile(text *text_re, int cflags, Oid collation,
+							regex_t *result_re);
+static void parse_test_flags(test_re_flags *flags, text *opts);
+static test_regex_ctx *setup_test_matches(text *orig_str,
+										  regex_t *cpattern,
+										  test_re_flags *flags,
+										  Oid collation,
+										  bool use_subpatterns);
+static ArrayType *build_test_info_result(regex_t *cpattern,
+										 test_re_flags *flags);
+static ArrayType *build_test_match_result(test_regex_ctx *matchctx);
+
+
+/*
+ * test_regex(pattern text, string text, flags text) returns setof text[]
+ *
+ * This is largely based on regexp.c's regexp_matches, with additions
+ * for debugging purposes.
+ */
+PG_FUNCTION_INFO_V1(test_regex);
+
+Datum
+test_regex(PG_FUNCTION_ARGS)
+{
+	FuncCallContext *funcctx;
+	test_regex_ctx *matchctx;
+	ArrayType  *result_ary;
+
+	if (SRF_IS_FIRSTCALL())
+	{
+		text	   *pattern = PG_GETARG_TEXT_PP(0);
+		text	   *flags = PG_GETARG_TEXT_PP(2);
+		Oid			collation = PG_GET_COLLATION();
+		test_re_flags re_flags;
+		regex_t		cpattern;
+		MemoryContext oldcontext;
+
+		funcctx = SRF_FIRSTCALL_INIT();
+		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+		/* Determine options */
+		parse_test_flags(&re_flags, flags);
+
+		/* set up the compiled pattern */
+		test_re_compile(pattern, re_flags.cflags, collation, &cpattern);
+
+		/* be sure to copy the input string into the multi-call ctx */
+		matchctx = setup_test_matches(PG_GETARG_TEXT_P_COPY(1), &cpattern,
+									  &re_flags,
+									  collation,
+									  true);
+
+		/* Pre-create workspace that build_test_match_result needs */
+		matchctx->elems = (Datum *) palloc(sizeof(Datum) *
+										   (matchctx->npatterns + 1));
+		matchctx->nulls = (bool *) palloc(sizeof(bool) *
+										  (matchctx->npatterns + 1));
+
+		MemoryContextSwitchTo(oldcontext);
+		funcctx->user_fctx = (void *) matchctx;
+
+		/*
+		 * Return the first result row, which is info equivalent to Tcl's
+		 * "regexp -about" output
+		 */
+		result_ary = build_test_info_result(&cpattern, &re_flags);
+
+		pg_regfree(&cpattern);
+
+		SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
+	}
+	else
+	{
+		/* Each subsequent row describes one match */
+		funcctx = SRF_PERCALL_SETUP();
+		matchctx = (test_regex_ctx *) funcctx->user_fctx;
+
+		if (matchctx->next_match < matchctx->nmatches)
+		{
+			result_ary = build_test_match_result(matchctx);
+			matchctx->next_match++;
+			SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
+		}
+	}
+
+	SRF_RETURN_DONE(funcctx);
+}
+
+
+/*
+ * test_re_compile - compile a RE
+ *
+ *	text_re --- the pattern, expressed as a TEXT object
+ *	cflags --- compile options for the pattern
+ *	collation --- collation to use for LC_CTYPE-dependent behavior
+ *  result_re --- output, compiled RE is stored here
+ *
+ * Pattern is given in the database encoding.  We internally convert to
+ * an array of pg_wchar, which is what Spencer's regex package wants.
+ *
+ * Caller must eventually pg_regfree the resulting RE to avoid memory leaks.
+ */
+static void
+test_re_compile(text *text_re, int cflags, Oid collation,
+				regex_t *result_re)
+{
+	int			text_re_len = VARSIZE_ANY_EXHDR(text_re);
+	char	   *text_re_val = VARDATA_ANY(text_re);
+	pg_wchar   *pattern;
+	int			pattern_len;
+	int			regcomp_result;
+	char		errMsg[100];
+
+	/* Convert pattern string to wide characters */
+	pattern = (pg_wchar *) palloc((text_re_len + 1) * sizeof(pg_wchar));
+	pattern_len = pg_mb2wchar_with_len(text_re_val,
+									   pattern,
+									   text_re_len);
+
+	regcomp_result = pg_regcomp(result_re,
+								pattern,
+								pattern_len,
+								cflags,
+								collation);
+
+	pfree(pattern);
+
+	if (regcomp_result != REG_OKAY)
+	{
+		/* re didn't compile (no need for pg_regfree, if so) */
+
+		/*
+		 * Here and in other places in this file, do CHECK_FOR_INTERRUPTS
+		 * before reporting a regex error.  This is so that if the regex
+		 * library aborts and returns REG_CANCEL, we don't print an error
+		 * message that implies the regex was invalid.
+		 */
+		CHECK_FOR_INTERRUPTS();
+
+		pg_regerror(regcomp_result, result_re, errMsg, sizeof(errMsg));
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
+				 errmsg("invalid regular expression: %s", errMsg)));
+	}
+}
+
+/*
+ * test_re_execute - execute a RE on pg_wchar data
+ *
+ * Returns true on match, false on no match
+ * Arguments are as for pg_regexec
+ */
+static bool
+test_re_execute(regex_t *re, pg_wchar *data, int data_len,
+				int start_search,
+				rm_detail_t *details,
+				int nmatch, regmatch_t *pmatch,
+				int eflags)
+{
+	int			regexec_result;
+	char		errMsg[100];
+
+	/* Initialize match locations in case engine doesn't */
+	details->rm_extend.rm_so = -1;
+	details->rm_extend.rm_eo = -1;
+	for (int i = 0; i < nmatch; i++)
+	{
+		pmatch[i].rm_so = -1;
+		pmatch[i].rm_eo = -1;
+	}
+
+	/* Perform RE match and return result */
+	regexec_result = pg_regexec(re,
+								data,
+								data_len,
+								start_search,
+								details,
+								nmatch,
+								pmatch,
+								eflags);
+
+	if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
+	{
+		/* re failed??? */
+		CHECK_FOR_INTERRUPTS();
+		pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
+				 errmsg("regular expression failed: %s", errMsg)));
+	}
+
+	return (regexec_result == REG_OKAY);
+}
+
+
+/*
+ * parse_test_flags - parse the flags argument
+ *
+ *	flags --- output argument, filled with desired options
+ *	opts --- TEXT object, or NULL for defaults
+ */
+static void
+parse_test_flags(test_re_flags *flags, text *opts)
+{
+	/* these defaults must match Tcl's */
+	int			cflags = REG_ADVANCED;
+	int			eflags = 0;
+	long		info = 0;
+
+	flags->glob = false;
+	flags->indices = false;
+	flags->partial = false;
+
+	if (opts)
+	{
+		char	   *opt_p = VARDATA_ANY(opts);
+		int			opt_len = VARSIZE_ANY_EXHDR(opts);
+		int			i;
+
+		for (i = 0; i < opt_len; i++)
+		{
+			switch (opt_p[i])
+			{
+				case '-':
+					/* allowed, no-op */
+					break;
+				case '!':
+					flags->partial = true;
+					break;
+				case '*':
+					/* test requires Unicode --- ignored here */
+					break;
+				case '0':
+					flags->indices = true;
+					break;
+
+					/* These flags correspond to user-exposed RE options: */
+				case 'g':		/* global match */
+					flags->glob = true;
+					break;
+				case 'i':		/* case insensitive */
+					cflags |= REG_ICASE;
+					break;
+				case 'n':		/* \n affects ^ $ . [^ */
+					cflags |= REG_NEWLINE;
+					break;
+				case 'p':		/* ~Perl, \n affects . [^ */
+					cflags |= REG_NLSTOP;
+					cflags &= ~REG_NLANCH;
+					break;
+				case 'w':		/* weird, \n affects ^ $ only */
+					cflags &= ~REG_NLSTOP;
+					cflags |= REG_NLANCH;
+					break;
+				case 'x':		/* expanded syntax */
+					cflags |= REG_EXPANDED;
+					break;
+
+					/* These flags correspond to Tcl's -xflags options: */
+				case 'a':
+					cflags |= REG_ADVF;
+					break;
+				case 'b':
+					cflags &= ~REG_ADVANCED;
+					break;
+				case 'c':
+
+					/*
+					 * Tcl calls this TCL_REG_CANMATCH, but it's really
+					 * REG_EXPECT.  In this implementation we must also set
+					 * the partial and indices flags, so that
+					 * setup_test_matches and build_test_match_result will
+					 * emit the desired data.  (They'll emit more fields than
+					 * Tcl would, but that's fine.)
+					 */
+					cflags |= REG_EXPECT;
+					flags->partial = true;
+					flags->indices = true;
+					break;
+				case 'e':
+					cflags &= ~REG_ADVANCED;
+					cflags |= REG_EXTENDED;
+					break;
+				case 'q':
+					cflags &= ~REG_ADVANCED;
+					cflags |= REG_QUOTE;
+					break;
+				case 'o':		/* o for opaque */
+					cflags |= REG_NOSUB;
+					break;
+				case 's':		/* s for start */
+					cflags |= REG_BOSONLY;
+					break;
+				case '+':
+					cflags |= REG_FAKE;
+					break;
+				case ',':
+					cflags |= REG_PROGRESS;
+					break;
+				case '.':
+					cflags |= REG_DUMP;
+					break;
+				case ':':
+					eflags |= REG_MTRACE;
+					break;
+				case ';':
+					eflags |= REG_FTRACE;
+					break;
+				case '^':
+					eflags |= REG_NOTBOL;
+					break;
+				case '$':
+					eflags |= REG_NOTEOL;
+					break;
+				case 't':
+					cflags |= REG_EXPECT;
+					break;
+				case '%':
+					eflags |= REG_SMALL;
+					break;
+
+					/* These flags define expected info bits: */
+				case 'A':
+					info |= REG_UBSALNUM;
+					break;
+				case 'B':
+					info |= REG_UBRACES;
+					break;
+				case 'E':
+					info |= REG_UBBS;
+					break;
+				case 'H':
+					info |= REG_ULOOKAROUND;
+					break;
+				case 'I':
+					info |= REG_UIMPOSSIBLE;
+					break;
+				case 'L':
+					info |= REG_ULOCALE;
+					break;
+				case 'M':
+					info |= REG_UUNPORT;
+					break;
+				case 'N':
+					info |= REG_UEMPTYMATCH;
+					break;
+				case 'P':
+					info |= REG_UNONPOSIX;
+					break;
+				case 'Q':
+					info |= REG_UBOUNDS;
+					break;
+				case 'R':
+					info |= REG_UBACKREF;
+					break;
+				case 'S':
+					info |= REG_UUNSPEC;
+					break;
+				case 'T':
+					info |= REG_USHORTEST;
+					break;
+				case 'U':
+					info |= REG_UPBOTCH;
+					break;
+
+				default:
+					ereport(ERROR,
+							(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+							 errmsg("invalid regular expression test option: \"%.*s\"",
+									pg_mblen(opt_p + i), opt_p + i)));
+					break;
+			}
+		}
+	}
+	flags->cflags = cflags;
+	flags->eflags = eflags;
+	flags->info = info;
+}
+
+/*
+ * setup_test_matches --- do the initial matching
+ *
+ * To simplify memory management, we do all the matching in one swoop.
+ * The returned test_regex_ctx contains the locations of all the substrings
+ * matching the pattern.
+ */
+static test_regex_ctx *
+setup_test_matches(text *orig_str,
+				   regex_t *cpattern, test_re_flags *re_flags,
+				   Oid collation,
+				   bool use_subpatterns)
+{
+	test_regex_ctx *matchctx = palloc0(sizeof(test_regex_ctx));
+	int			eml = pg_database_encoding_max_length();
+	int			orig_len;
+	pg_wchar   *wide_str;
+	int			wide_len;
+	regmatch_t *pmatch;
+	int			pmatch_len;
+	int			array_len;
+	int			array_idx;
+	int			prev_match_end;
+	int			start_search;
+	int			maxlen = 0;		/* largest fetch length in characters */
+
+	/* save flags */
+	matchctx->re_flags = *re_flags;
+
+	/* save original string --- we'll extract result substrings from it */
+	matchctx->orig_str = orig_str;
+
+	/* convert string to pg_wchar form for matching */
+	orig_len = VARSIZE_ANY_EXHDR(orig_str);
+	wide_str = (pg_wchar *) palloc(sizeof(pg_wchar) * (orig_len + 1));
+	wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len);
+
+	/* do we want to remember subpatterns? */
+	if (use_subpatterns && cpattern->re_nsub > 0)
+	{
+		matchctx->npatterns = cpattern->re_nsub + 1;
+		pmatch_len = cpattern->re_nsub + 1;
+	}
+	else
+	{
+		use_subpatterns = false;
+		matchctx->npatterns = 1;
+		pmatch_len = 1;
+	}
+
+	/* temporary output space for RE package */
+	pmatch = palloc(sizeof(regmatch_t) * pmatch_len);
+
+	/*
+	 * the real output space (grown dynamically if needed)
+	 *
+	 * use values 2^n-1, not 2^n, so that we hit the limit at 2^28-1 rather
+	 * than at 2^27
+	 */
+	array_len = re_flags->glob ? 255 : 31;
+	matchctx->match_locs = (int *) palloc(sizeof(int) * array_len);
+	array_idx = 0;
+
+	/* search for the pattern, perhaps repeatedly */
+	prev_match_end = 0;
+	start_search = 0;
+	while (test_re_execute(cpattern, wide_str, wide_len,
+						   start_search,
+						   &matchctx->details,
+						   pmatch_len, pmatch,
+						   re_flags->eflags))
+	{
+		/* enlarge output space if needed */
+		while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
+		{
+			array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
+			if (array_len > MaxAllocSize / sizeof(int))
+				ereport(ERROR,
+						(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+						 errmsg("too many regular expression matches")));
+			matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
+													sizeof(int) * array_len);
+		}
+
+		/* save this match's locations */
+		for (int i = 0; i < matchctx->npatterns; i++)
+		{
+			int			so = pmatch[i].rm_so;
+			int			eo = pmatch[i].rm_eo;
+
+			matchctx->match_locs[array_idx++] = so;
+			matchctx->match_locs[array_idx++] = eo;
+			if (so >= 0 && eo >= 0 && (eo - so) > maxlen)
+				maxlen = (eo - so);
+		}
+		matchctx->nmatches++;
+		prev_match_end = pmatch[0].rm_eo;
+
+		/* if not glob, stop after one match */
+		if (!re_flags->glob)
+			break;
+
+		/*
+		 * Advance search position.  Normally we start the next search at the
+		 * end of the previous match; but if the match was of zero length, we
+		 * have to advance by one character, or we'd just find the same match
+		 * again.
+		 */
+		start_search = prev_match_end;
+		if (pmatch[0].rm_so == pmatch[0].rm_eo)
+			start_search++;
+		if (start_search > wide_len)
+			break;
+	}
+
+	/*
+	 * If we had no match, but "partial" and "indices" are set, emit the
+	 * details.
+	 */
+	if (matchctx->nmatches == 0 && re_flags->partial && re_flags->indices)
+	{
+		/* enlarge output space if needed */
+		while (array_idx + matchctx->npatterns * 2 + 1 > array_len)
+		{
+			array_len += array_len + 1; /* 2^n-1 => 2^(n+1)-1 */
+			if (array_len > MaxAllocSize / sizeof(int))
+				ereport(ERROR,
+						(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+						 errmsg("too many regular expression matches")));
+			matchctx->match_locs = (int *) repalloc(matchctx->match_locs,
+													sizeof(int) * array_len);
+		}
+
+		matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_so;
+		matchctx->match_locs[array_idx++] = matchctx->details.rm_extend.rm_eo;
+		/* we don't have pmatch data, so emit -1 */
+		for (int i = 1; i < matchctx->npatterns; i++)
+		{
+			matchctx->match_locs[array_idx++] = -1;
+			matchctx->match_locs[array_idx++] = -1;
+		}
+		matchctx->nmatches++;
+	}
+
+	Assert(array_idx <= array_len);
+
+	if (eml > 1)
+	{
+		int64		maxsiz = eml * (int64) maxlen;
+		int			conv_bufsiz;
+
+		/*
+		 * Make the conversion buffer large enough for any substring of
+		 * interest.
+		 *
+		 * Worst case: assume we need the maximum size (maxlen*eml), but take
+		 * advantage of the fact that the original string length in bytes is
+		 * an upper bound on the byte length of any fetched substring (and we
+		 * know that len+1 is safe to allocate because the varlena header is
+		 * longer than 1 byte).
+		 */
+		if (maxsiz > orig_len)
+			conv_bufsiz = orig_len + 1;
+		else
+			conv_bufsiz = maxsiz + 1;	/* safe since maxsiz < 2^30 */
+
+		matchctx->conv_buf = palloc(conv_bufsiz);
+		matchctx->conv_bufsiz = conv_bufsiz;
+		matchctx->wide_str = wide_str;
+	}
+	else
+	{
+		/* No need to keep the wide string if we're in a single-byte charset. */
+		pfree(wide_str);
+		matchctx->wide_str = NULL;
+		matchctx->conv_buf = NULL;
+		matchctx->conv_bufsiz = 0;
+	}
+
+	/* Clean up temp storage */
+	pfree(pmatch);
+
+	return matchctx;
+}
+
+/*
+ * build_test_info_result - build output array describing compiled regexp
+ *
+ * This borrows some code from Tcl's TclRegAbout().
+ */
+static ArrayType *
+build_test_info_result(regex_t *cpattern, test_re_flags *flags)
+{
+	/* Translation data for flag bits in regex_t.re_info */
+	struct infoname
+	{
+		int			bit;
+		const char *text;
+	};
+	static const struct infoname infonames[] = {
+		{REG_UBACKREF, "REG_UBACKREF"},
+		{REG_ULOOKAROUND, "REG_ULOOKAROUND"},
+		{REG_UBOUNDS, "REG_UBOUNDS"},
+		{REG_UBRACES, "REG_UBRACES"},
+		{REG_UBSALNUM, "REG_UBSALNUM"},
+		{REG_UPBOTCH, "REG_UPBOTCH"},
+		{REG_UBBS, "REG_UBBS"},
+		{REG_UNONPOSIX, "REG_UNONPOSIX"},
+		{REG_UUNSPEC, "REG_UUNSPEC"},
+		{REG_UUNPORT, "REG_UUNPORT"},
+		{REG_ULOCALE, "REG_ULOCALE"},
+		{REG_UEMPTYMATCH, "REG_UEMPTYMATCH"},
+		{REG_UIMPOSSIBLE, "REG_UIMPOSSIBLE"},
+		{REG_USHORTEST, "REG_USHORTEST"},
+		{0, NULL}
+	};
+	const struct infoname *inf;
+	Datum		elems[lengthof(infonames) + 1];
+	int			nresults = 0;
+	char		buf[80];
+	int			dims[1];
+	int			lbs[1];
+
+	/* Set up results: first, the number of subexpressions */
+	snprintf(buf, sizeof(buf), "%d", (int) cpattern->re_nsub);
+	elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
+
+	/* Report individual info bit states */
+	for (inf = infonames; inf->bit != 0; inf++)
+	{
+		if (cpattern->re_info & inf->bit)
+		{
+			if (flags->info & inf->bit)
+				elems[nresults++] = PointerGetDatum(cstring_to_text(inf->text));
+			else
+			{
+				snprintf(buf, sizeof(buf), "unexpected %s!", inf->text);
+				elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
+			}
+		}
+		else
+		{
+			if (flags->info & inf->bit)
+			{
+				snprintf(buf, sizeof(buf), "missing %s!", inf->text);
+				elems[nresults++] = PointerGetDatum(cstring_to_text(buf));
+			}
+		}
+	}
+
+	/* And form an array */
+	dims[0] = nresults;
+	lbs[0] = 1;
+	/* XXX: this hardcodes assumptions about the text type */
+	return construct_md_array(elems, NULL, 1, dims, lbs,
+							  TEXTOID, -1, false, TYPALIGN_INT);
+}
+
+/*
+ * build_test_match_result - build output array for current match
+ *
+ * Note that if the indices flag is set, we don't need any strings,
+ * just the location data.
+ */
+static ArrayType *
+build_test_match_result(test_regex_ctx *matchctx)
+{
+	char	   *buf = matchctx->conv_buf;
+	Datum	   *elems = matchctx->elems;
+	bool	   *nulls = matchctx->nulls;
+	bool		indices = matchctx->re_flags.indices;
+	char		bufstr[80];
+	int			dims[1];
+	int			lbs[1];
+	int			loc;
+	int			i;
+
+	/* Extract matching substrings from the original string */
+	loc = matchctx->next_match * matchctx->npatterns * 2;
+	for (i = 0; i < matchctx->npatterns; i++)
+	{
+		int			so = matchctx->match_locs[loc++];
+		int			eo = matchctx->match_locs[loc++];
+
+		if (indices)
+		{
+			/* Report eo this way for consistency with Tcl */
+			snprintf(bufstr, sizeof(bufstr), "%d %d",
+					 so, so < 0 ? eo : eo - 1);
+			elems[i] = PointerGetDatum(cstring_to_text(bufstr));
+			nulls[i] = false;
+		}
+		else if (so < 0 || eo < 0)
+		{
+			elems[i] = (Datum) 0;
+			nulls[i] = true;
+		}
+		else if (buf)
+		{
+			int			len = pg_wchar2mb_with_len(matchctx->wide_str + so,
+												   buf,
+												   eo - so);
+
+			Assert(len < matchctx->conv_bufsiz);
+			elems[i] = PointerGetDatum(cstring_to_text_with_len(buf, len));
+			nulls[i] = false;
+		}
+		else
+		{
+			elems[i] = DirectFunctionCall3(text_substr,
+										   PointerGetDatum(matchctx->orig_str),
+										   Int32GetDatum(so + 1),
+										   Int32GetDatum(eo - so));
+			nulls[i] = false;
+		}
+	}
+
+	/* In EXPECT indices mode, also report the "details" */
+	if (indices && (matchctx->re_flags.cflags & REG_EXPECT))
+	{
+		int			so = matchctx->details.rm_extend.rm_so;
+		int			eo = matchctx->details.rm_extend.rm_eo;
+
+		snprintf(bufstr, sizeof(bufstr), "%d %d",
+				 so, so < 0 ? eo : eo - 1);
+		elems[i] = PointerGetDatum(cstring_to_text(bufstr));
+		nulls[i] = false;
+		i++;
+	}
+
+	/* And form an array */
+	dims[0] = i;
+	lbs[0] = 1;
+	/* XXX: this hardcodes assumptions about the text type */
+	return construct_md_array(elems, nulls, 1, dims, lbs,
+							  TEXTOID, -1, false, TYPALIGN_INT);
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-04 12:15:05 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-04 12:15:05 +0000
commit	46651ce6fe013220ed397add242004d764fc0153 (patch)
tree	6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/test/modules/test_regex/test_regex.c
parent	Initial commit. (diff)
download	postgresql-14-upstream.tar.xz postgresql-14-upstream.zip