Adding upstream version 1.22.1.upstream/1.22.1

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-16 19:25:22 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-16 19:25:22 +0000
commit: f6ad4dcef54c5ce997a4bad5a6d86de229015700 (patch)
tree: 7cfa4e31ace5c2bd95c72b154d15af494b2bcbef /src/regexp/syntax/regexp.go
parent: Initial commit. (diff)
download: golang-1.22-f6ad4dcef54c5ce997a4bad5a6d86de229015700.tar.xz
golang-1.22-f6ad4dcef54c5ce997a4bad5a6d86de229015700.zip
1 files changed, 477 insertions, 0 deletions
diff --git a/src/regexp/syntax/regexp.go b/src/regexp/syntax/regexp.go
new file mode 100644
index 0000000..4fa7d0e
--- /dev/null
+++ b/src/regexp/syntax/regexp.go
@@ -0,0 +1,477 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syntax
+
+// Note to implementers:
+// In this package, re is always a *Regexp and r is always a rune.
+
+import (
+	"strconv"
+	"strings"
+	"unicode"
+)
+
+// A Regexp is a node in a regular expression syntax tree.
+type Regexp struct {
+	Op       Op // operator
+	Flags    Flags
+	Sub      []*Regexp  // subexpressions, if any
+	Sub0     [1]*Regexp // storage for short Sub
+	Rune     []rune     // matched runes, for OpLiteral, OpCharClass
+	Rune0    [2]rune    // storage for short Rune
+	Min, Max int        // min, max for OpRepeat
+	Cap      int        // capturing index, for OpCapture
+	Name     string     // capturing name, for OpCapture
+}
+
+//go:generate stringer -type Op -trimprefix Op
+
+// An Op is a single regular expression operator.
+type Op uint8
+
+// Operators are listed in precedence order, tightest binding to weakest.
+// Character class operators are listed simplest to most complex
+// (OpLiteral, OpCharClass, OpAnyCharNotNL, OpAnyChar).
+
+const (
+	OpNoMatch        Op = 1 + iota // matches no strings
+	OpEmptyMatch                   // matches empty string
+	OpLiteral                      // matches Runes sequence
+	OpCharClass                    // matches Runes interpreted as range pair list
+	OpAnyCharNotNL                 // matches any character except newline
+	OpAnyChar                      // matches any character
+	OpBeginLine                    // matches empty string at beginning of line
+	OpEndLine                      // matches empty string at end of line
+	OpBeginText                    // matches empty string at beginning of text
+	OpEndText                      // matches empty string at end of text
+	OpWordBoundary                 // matches word boundary `\b`
+	OpNoWordBoundary               // matches word non-boundary `\B`
+	OpCapture                      // capturing subexpression with index Cap, optional name Name
+	OpStar                         // matches Sub[0] zero or more times
+	OpPlus                         // matches Sub[0] one or more times
+	OpQuest                        // matches Sub[0] zero or one times
+	OpRepeat                       // matches Sub[0] at least Min times, at most Max (Max == -1 is no limit)
+	OpConcat                       // matches concatenation of Subs
+	OpAlternate                    // matches alternation of Subs
+)
+
+const opPseudo Op = 128 // where pseudo-ops start
+
+// Equal reports whether x and y have identical structure.
+func (x *Regexp) Equal(y *Regexp) bool {
+	if x == nil || y == nil {
+		return x == y
+	}
+	if x.Op != y.Op {
+		return false
+	}
+	switch x.Op {
+	case OpEndText:
+		// The parse flags remember whether this is \z or \Z.
+		if x.Flags&WasDollar != y.Flags&WasDollar {
+			return false
+		}
+
+	case OpLiteral, OpCharClass:
+		if len(x.Rune) != len(y.Rune) {
+			return false
+		}
+		for i, r := range x.Rune {
+			if r != y.Rune[i] {
+				return false
+			}
+		}
+
+	case OpAlternate, OpConcat:
+		if len(x.Sub) != len(y.Sub) {
+			return false
+		}
+		for i, sub := range x.Sub {
+			if !sub.Equal(y.Sub[i]) {
+				return false
+			}
+		}
+
+	case OpStar, OpPlus, OpQuest:
+		if x.Flags&NonGreedy != y.Flags&NonGreedy || !x.Sub[0].Equal(y.Sub[0]) {
+			return false
+		}
+
+	case OpRepeat:
+		if x.Flags&NonGreedy != y.Flags&NonGreedy || x.Min != y.Min || x.Max != y.Max || !x.Sub[0].Equal(y.Sub[0]) {
+			return false
+		}
+
+	case OpCapture:
+		if x.Cap != y.Cap || x.Name != y.Name || !x.Sub[0].Equal(y.Sub[0]) {
+			return false
+		}
+	}
+	return true
+}
+
+// printFlags is a bit set indicating which flags (including non-capturing parens) to print around a regexp.
+type printFlags uint8
+
+const (
+	flagI    printFlags = 1 << iota // (?i:
+	flagM                           // (?m:
+	flagS                           // (?s:
+	flagOff                         // )
+	flagPrec                        // (?: )
+	negShift = 5                    // flagI<<negShift is (?-i:
+)
+
+// addSpan enables the flags f around start..last,
+// by setting flags[start] = f and flags[last] = flagOff.
+func addSpan(start, last *Regexp, f printFlags, flags *map[*Regexp]printFlags) {
+	if *flags == nil {
+		*flags = make(map[*Regexp]printFlags)
+	}
+	(*flags)[start] = f
+	(*flags)[last] |= flagOff // maybe start==last
+}
+
+// calcFlags calculates the flags to print around each subexpression in re,
+// storing that information in (*flags)[sub] for each affected subexpression.
+// The first time an entry needs to be written to *flags, calcFlags allocates the map.
+// calcFlags also calculates the flags that must be active or can't be active
+// around re and returns those flags.
+func calcFlags(re *Regexp, flags *map[*Regexp]printFlags) (must, cant printFlags) {
+	switch re.Op {
+	default:
+		return 0, 0
+
+	case OpLiteral:
+		// If literal is fold-sensitive, return (flagI, 0) or (0, flagI)
+		// according to whether (?i) is active.
+		// If literal is not fold-sensitive, return 0, 0.
+		for _, r := range re.Rune {
+			if minFold <= r && r <= maxFold && unicode.SimpleFold(r) != r {
+				if re.Flags&FoldCase != 0 {
+					return flagI, 0
+				} else {
+					return 0, flagI
+				}
+			}
+		}
+		return 0, 0
+
+	case OpCharClass:
+		// If literal is fold-sensitive, return 0, flagI - (?i) has been compiled out.
+		// If literal is not fold-sensitive, return 0, 0.
+		for i := 0; i < len(re.Rune); i += 2 {
+			lo := max(minFold, re.Rune[i])
+			hi := min(maxFold, re.Rune[i+1])
+			for r := lo; r <= hi; r++ {
+				for f := unicode.SimpleFold(r); f != r; f = unicode.SimpleFold(f) {
+					if !(lo <= f && f <= hi) && !inCharClass(f, re.Rune) {
+						return 0, flagI
+					}
+				}
+			}
+		}
+		return 0, 0
+
+	case OpAnyCharNotNL: // (?-s).
+		return 0, flagS
+
+	case OpAnyChar: // (?s).
+		return flagS, 0
+
+	case OpBeginLine, OpEndLine: // (?m)^ (?m)$
+		return flagM, 0
+
+	case OpEndText:
+		if re.Flags&WasDollar != 0 { // (?-m)$
+			return 0, flagM
+		}
+		return 0, 0
+
+	case OpCapture, OpStar, OpPlus, OpQuest, OpRepeat:
+		return calcFlags(re.Sub[0], flags)
+
+	case OpConcat, OpAlternate:
+		// Gather the must and cant for each subexpression.
+		// When we find a conflicting subexpression, insert the necessary
+		// flags around the previously identified span and start over.
+		var must, cant, allCant printFlags
+		start := 0
+		last := 0
+		did := false
+		for i, sub := range re.Sub {
+			subMust, subCant := calcFlags(sub, flags)
+			if must&subCant != 0 || subMust&cant != 0 {
+				if must != 0 {
+					addSpan(re.Sub[start], re.Sub[last], must, flags)
+				}
+				must = 0
+				cant = 0
+				start = i
+				did = true
+			}
+			must |= subMust
+			cant |= subCant
+			allCant |= subCant
+			if subMust != 0 {
+				last = i
+			}
+			if must == 0 && start == i {
+				start++
+			}
+		}
+		if !did {
+			// No conflicts: pass the accumulated must and cant upward.
+			return must, cant
+		}
+		if must != 0 {
+			// Conflicts found; need to finish final span.
+			addSpan(re.Sub[start], re.Sub[last], must, flags)
+		}
+		return 0, allCant
+	}
+}
+
+// writeRegexp writes the Perl syntax for the regular expression re to b.
+func writeRegexp(b *strings.Builder, re *Regexp, f printFlags, flags map[*Regexp]printFlags) {
+	f |= flags[re]
+	if f&flagPrec != 0 && f&^(flagOff|flagPrec) != 0 && f&flagOff != 0 {
+		// flagPrec is redundant with other flags being added and terminated
+		f &^= flagPrec
+	}
+	if f&^(flagOff|flagPrec) != 0 {
+		b.WriteString(`(?`)
+		if f&flagI != 0 {
+			b.WriteString(`i`)
+		}
+		if f&flagM != 0 {
+			b.WriteString(`m`)
+		}
+		if f&flagS != 0 {
+			b.WriteString(`s`)
+		}
+		if f&((flagM|flagS)<<negShift) != 0 {
+			b.WriteString(`-`)
+			if f&(flagM<<negShift) != 0 {
+				b.WriteString(`m`)
+			}
+			if f&(flagS<<negShift) != 0 {
+				b.WriteString(`s`)
+			}
+		}
+		b.WriteString(`:`)
+	}
+	if f&flagOff != 0 {
+		defer b.WriteString(`)`)
+	}
+	if f&flagPrec != 0 {
+		b.WriteString(`(?:`)
+		defer b.WriteString(`)`)
+	}
+
+	switch re.Op {
+	default:
+		b.WriteString("<invalid op" + strconv.Itoa(int(re.Op)) + ">")
+	case OpNoMatch:
+		b.WriteString(`[^\x00-\x{10FFFF}]`)
+	case OpEmptyMatch:
+		b.WriteString(`(?:)`)
+	case OpLiteral:
+		for _, r := range re.Rune {
+			escape(b, r, false)
+		}
+	case OpCharClass:
+		if len(re.Rune)%2 != 0 {
+			b.WriteString(`[invalid char class]`)
+			break
+		}
+		b.WriteRune('[')
+		if len(re.Rune) == 0 {
+			b.WriteString(`^\x00-\x{10FFFF}`)
+		} else if re.Rune[0] == 0 && re.Rune[len(re.Rune)-1] == unicode.MaxRune && len(re.Rune) > 2 {
+			// Contains 0 and MaxRune. Probably a negated class.
+			// Print the gaps.
+			b.WriteRune('^')
+			for i := 1; i < len(re.Rune)-1; i += 2 {
+				lo, hi := re.Rune[i]+1, re.Rune[i+1]-1
+				escape(b, lo, lo == '-')
+				if lo != hi {
+					if hi != lo+1 {
+						b.WriteRune('-')
+					}
+					escape(b, hi, hi == '-')
+				}
+			}
+		} else {
+			for i := 0; i < len(re.Rune); i += 2 {
+				lo, hi := re.Rune[i], re.Rune[i+1]
+				escape(b, lo, lo == '-')
+				if lo != hi {
+					if hi != lo+1 {
+						b.WriteRune('-')
+					}
+					escape(b, hi, hi == '-')
+				}
+			}
+		}
+		b.WriteRune(']')
+	case OpAnyCharNotNL, OpAnyChar:
+		b.WriteString(`.`)
+	case OpBeginLine:
+		b.WriteString(`^`)
+	case OpEndLine:
+		b.WriteString(`$`)
+	case OpBeginText:
+		b.WriteString(`\A`)
+	case OpEndText:
+		if re.Flags&WasDollar != 0 {
+			b.WriteString(`$`)
+		} else {
+			b.WriteString(`\z`)
+		}
+	case OpWordBoundary:
+		b.WriteString(`\b`)
+	case OpNoWordBoundary:
+		b.WriteString(`\B`)
+	case OpCapture:
+		if re.Name != "" {
+			b.WriteString(`(?P<`)
+			b.WriteString(re.Name)
+			b.WriteRune('>')
+		} else {
+			b.WriteRune('(')
+		}
+		if re.Sub[0].Op != OpEmptyMatch {
+			writeRegexp(b, re.Sub[0], flags[re.Sub[0]], flags)
+		}
+		b.WriteRune(')')
+	case OpStar, OpPlus, OpQuest, OpRepeat:
+		p := printFlags(0)
+		sub := re.Sub[0]
+		if sub.Op > OpCapture || sub.Op == OpLiteral && len(sub.Rune) > 1 {
+			p = flagPrec
+		}
+		writeRegexp(b, sub, p, flags)
+
+		switch re.Op {
+		case OpStar:
+			b.WriteRune('*')
+		case OpPlus:
+			b.WriteRune('+')
+		case OpQuest:
+			b.WriteRune('?')
+		case OpRepeat:
+			b.WriteRune('{')
+			b.WriteString(strconv.Itoa(re.Min))
+			if re.Max != re.Min {
+				b.WriteRune(',')
+				if re.Max >= 0 {
+					b.WriteString(strconv.Itoa(re.Max))
+				}
+			}
+			b.WriteRune('}')
+		}
+		if re.Flags&NonGreedy != 0 {
+			b.WriteRune('?')
+		}
+	case OpConcat:
+		for _, sub := range re.Sub {
+			p := printFlags(0)
+			if sub.Op == OpAlternate {
+				p = flagPrec
+			}
+			writeRegexp(b, sub, p, flags)
+		}
+	case OpAlternate:
+		for i, sub := range re.Sub {
+			if i > 0 {
+				b.WriteRune('|')
+			}
+			writeRegexp(b, sub, 0, flags)
+		}
+	}
+}
+
+func (re *Regexp) String() string {
+	var b strings.Builder
+	var flags map[*Regexp]printFlags
+	must, cant := calcFlags(re, &flags)
+	must |= (cant &^ flagI) << negShift
+	if must != 0 {
+		must |= flagOff
+	}
+	writeRegexp(&b, re, must, flags)
+	return b.String()
+}
+
+const meta = `\.+*?()|[]{}^$`
+
+func escape(b *strings.Builder, r rune, force bool) {
+	if unicode.IsPrint(r) {
+		if strings.ContainsRune(meta, r) || force {
+			b.WriteRune('\\')
+		}
+		b.WriteRune(r)
+		return
+	}
+
+	switch r {
+	case '\a':
+		b.WriteString(`\a`)
+	case '\f':
+		b.WriteString(`\f`)
+	case '\n':
+		b.WriteString(`\n`)
+	case '\r':
+		b.WriteString(`\r`)
+	case '\t':
+		b.WriteString(`\t`)
+	case '\v':
+		b.WriteString(`\v`)
+	default:
+		if r < 0x100 {
+			b.WriteString(`\x`)
+			s := strconv.FormatInt(int64(r), 16)
+			if len(s) == 1 {
+				b.WriteRune('0')
+			}
+			b.WriteString(s)
+			break
+		}
+		b.WriteString(`\x{`)
+		b.WriteString(strconv.FormatInt(int64(r), 16))
+		b.WriteString(`}`)
+	}
+}
+
+// MaxCap walks the regexp to find the maximum capture index.
+func (re *Regexp) MaxCap() int {
+	m := 0
+	if re.Op == OpCapture {
+		m = re.Cap
+	}
+	for _, sub := range re.Sub {
+		if n := sub.MaxCap(); m < n {
+			m = n
+		}
+	}
+	return m
+}
+
+// CapNames walks the regexp to find the names of capturing groups.
+func (re *Regexp) CapNames() []string {
+	names := make([]string, re.MaxCap()+1)
+	re.capNames(names)
+	return names
+}
+
+func (re *Regexp) capNames(names []string) {
+	if re.Op == OpCapture {
+		names[re.Cap] = re.Name
+	}
+	for _, sub := range re.Sub {
+		sub.capNames(names)
+	}
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-16 19:25:22 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-16 19:25:22 +0000
commit	f6ad4dcef54c5ce997a4bad5a6d86de229015700 (patch)
tree	7cfa4e31ace5c2bd95c72b154d15af494b2bcbef /src/regexp/syntax/regexp.go
parent	Initial commit. (diff)
download	golang-1.22-f6ad4dcef54c5ce997a4bad5a6d86de229015700.tar.xz golang-1.22-f6ad4dcef54c5ce997a4bad5a6d86de229015700.zip