summaryrefslogtreecommitdiffstats
path: root/src/net/http/pattern.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/net/http/pattern.go')
-rw-r--r--src/net/http/pattern.go529
1 files changed, 529 insertions, 0 deletions
diff --git a/src/net/http/pattern.go b/src/net/http/pattern.go
new file mode 100644
index 0000000..f6af19b
--- /dev/null
+++ b/src/net/http/pattern.go
@@ -0,0 +1,529 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Patterns for ServeMux routing.
+
+package http
+
+import (
+ "errors"
+ "fmt"
+ "net/url"
+ "strings"
+ "unicode"
+)
+
+// A pattern is something that can be matched against an HTTP request.
+// It has an optional method, an optional host, and a path.
+type pattern struct {
+ str string // original string
+ method string
+ host string
+ // The representation of a path differs from the surface syntax, which
+ // simplifies most algorithms.
+ //
+ // Paths ending in '/' are represented with an anonymous "..." wildcard.
+ // For example, the path "a/" is represented as a literal segment "a" followed
+ // by a segment with multi==true.
+ //
+ // Paths ending in "{$}" are represented with the literal segment "/".
+ // For example, the path "a/{$}" is represented as a literal segment "a" followed
+ // by a literal segment "/".
+ segments []segment
+ loc string // source location of registering call, for helpful messages
+}
+
+func (p *pattern) String() string { return p.str }
+
+func (p *pattern) lastSegment() segment {
+ return p.segments[len(p.segments)-1]
+}
+
+// A segment is a pattern piece that matches one or more path segments, or
+// a trailing slash.
+//
+// If wild is false, it matches a literal segment, or, if s == "/", a trailing slash.
+// Examples:
+//
+// "a" => segment{s: "a"}
+// "/{$}" => segment{s: "/"}
+//
+// If wild is true and multi is false, it matches a single path segment.
+// Example:
+//
+// "{x}" => segment{s: "x", wild: true}
+//
+// If both wild and multi are true, it matches all remaining path segments.
+// Example:
+//
+// "{rest...}" => segment{s: "rest", wild: true, multi: true}
+type segment struct {
+ s string // literal or wildcard name or "/" for "/{$}".
+ wild bool
+ multi bool // "..." wildcard
+}
+
+// parsePattern parses a string into a Pattern.
+// The string's syntax is
+//
+// [METHOD] [HOST]/[PATH]
+//
+// where:
+// - METHOD is an HTTP method
+// - HOST is a hostname
+// - PATH consists of slash-separated segments, where each segment is either
+// a literal or a wildcard of the form "{name}", "{name...}", or "{$}".
+//
+// METHOD, HOST and PATH are all optional; that is, the string can be "/".
+// If METHOD is present, it must be followed by a single space.
+// Wildcard names must be valid Go identifiers.
+// The "{$}" and "{name...}" wildcard must occur at the end of PATH.
+// PATH may end with a '/'.
+// Wildcard names in a path must be distinct.
+func parsePattern(s string) (_ *pattern, err error) {
+ if len(s) == 0 {
+ return nil, errors.New("empty pattern")
+ }
+ off := 0 // offset into string
+ defer func() {
+ if err != nil {
+ err = fmt.Errorf("at offset %d: %w", off, err)
+ }
+ }()
+
+ method, rest, found := strings.Cut(s, " ")
+ if !found {
+ rest = method
+ method = ""
+ }
+ if method != "" && !validMethod(method) {
+ return nil, fmt.Errorf("invalid method %q", method)
+ }
+ p := &pattern{str: s, method: method}
+
+ if found {
+ off = len(method) + 1
+ }
+ i := strings.IndexByte(rest, '/')
+ if i < 0 {
+ return nil, errors.New("host/path missing /")
+ }
+ p.host = rest[:i]
+ rest = rest[i:]
+ if j := strings.IndexByte(p.host, '{'); j >= 0 {
+ off += j
+ return nil, errors.New("host contains '{' (missing initial '/'?)")
+ }
+ // At this point, rest is the path.
+ off += i
+
+ // An unclean path with a method that is not CONNECT can never match,
+ // because paths are cleaned before matching.
+ if method != "" && method != "CONNECT" && rest != cleanPath(rest) {
+ return nil, errors.New("non-CONNECT pattern with unclean path can never match")
+ }
+
+ seenNames := map[string]bool{} // remember wildcard names to catch dups
+ for len(rest) > 0 {
+ // Invariant: rest[0] == '/'.
+ rest = rest[1:]
+ off = len(s) - len(rest)
+ if len(rest) == 0 {
+ // Trailing slash.
+ p.segments = append(p.segments, segment{wild: true, multi: true})
+ break
+ }
+ i := strings.IndexByte(rest, '/')
+ if i < 0 {
+ i = len(rest)
+ }
+ var seg string
+ seg, rest = rest[:i], rest[i:]
+ if i := strings.IndexByte(seg, '{'); i < 0 {
+ // Literal.
+ seg = pathUnescape(seg)
+ p.segments = append(p.segments, segment{s: seg})
+ } else {
+ // Wildcard.
+ if i != 0 {
+ return nil, errors.New("bad wildcard segment (must start with '{')")
+ }
+ if seg[len(seg)-1] != '}' {
+ return nil, errors.New("bad wildcard segment (must end with '}')")
+ }
+ name := seg[1 : len(seg)-1]
+ if name == "$" {
+ if len(rest) != 0 {
+ return nil, errors.New("{$} not at end")
+ }
+ p.segments = append(p.segments, segment{s: "/"})
+ break
+ }
+ name, multi := strings.CutSuffix(name, "...")
+ if multi && len(rest) != 0 {
+ return nil, errors.New("{...} wildcard not at end")
+ }
+ if name == "" {
+ return nil, errors.New("empty wildcard")
+ }
+ if !isValidWildcardName(name) {
+ return nil, fmt.Errorf("bad wildcard name %q", name)
+ }
+ if seenNames[name] {
+ return nil, fmt.Errorf("duplicate wildcard name %q", name)
+ }
+ seenNames[name] = true
+ p.segments = append(p.segments, segment{s: name, wild: true, multi: multi})
+ }
+ }
+ return p, nil
+}
+
+func isValidWildcardName(s string) bool {
+ if s == "" {
+ return false
+ }
+ // Valid Go identifier.
+ for i, c := range s {
+ if !unicode.IsLetter(c) && c != '_' && (i == 0 || !unicode.IsDigit(c)) {
+ return false
+ }
+ }
+ return true
+}
+
+func pathUnescape(path string) string {
+ u, err := url.PathUnescape(path)
+ if err != nil {
+ // Invalidly escaped path; use the original
+ return path
+ }
+ return u
+}
+
+// relationship is a relationship between two patterns, p1 and p2.
+type relationship string
+
+const (
+ equivalent relationship = "equivalent" // both match the same requests
+ moreGeneral relationship = "moreGeneral" // p1 matches everything p2 does & more
+ moreSpecific relationship = "moreSpecific" // p2 matches everything p1 does & more
+ disjoint relationship = "disjoint" // there is no request that both match
+ overlaps relationship = "overlaps" // there is a request that both match, but neither is more specific
+)
+
+// conflictsWith reports whether p1 conflicts with p2, that is, whether
+// there is a request that both match but where neither is higher precedence
+// than the other.
+//
+// Precedence is defined by two rules:
+// 1. Patterns with a host win over patterns without a host.
+// 2. Patterns whose method and path is more specific win. One pattern is more
+// specific than another if the second matches all the (method, path) pairs
+// of the first and more.
+//
+// If rule 1 doesn't apply, then two patterns conflict if their relationship
+// is either equivalence (they match the same set of requests) or overlap
+// (they both match some requests, but neither is more specific than the other).
+func (p1 *pattern) conflictsWith(p2 *pattern) bool {
+ if p1.host != p2.host {
+ // Either one host is empty and the other isn't, in which case the
+ // one with the host wins by rule 1, or neither host is empty
+ // and they differ, so they won't match the same paths.
+ return false
+ }
+ rel := p1.comparePathsAndMethods(p2)
+ return rel == equivalent || rel == overlaps
+}
+
+func (p1 *pattern) comparePathsAndMethods(p2 *pattern) relationship {
+ mrel := p1.compareMethods(p2)
+ // Optimization: avoid a call to comparePaths.
+ if mrel == disjoint {
+ return disjoint
+ }
+ prel := p1.comparePaths(p2)
+ return combineRelationships(mrel, prel)
+}
+
+// compareMethods determines the relationship between the method
+// part of patterns p1 and p2.
+//
+// A method can either be empty, "GET", or something else.
+// The empty string matches any method, so it is the most general.
+// "GET" matches both GET and HEAD.
+// Anything else matches only itself.
+func (p1 *pattern) compareMethods(p2 *pattern) relationship {
+ if p1.method == p2.method {
+ return equivalent
+ }
+ if p1.method == "" {
+ // p1 matches any method, but p2 does not, so p1 is more general.
+ return moreGeneral
+ }
+ if p2.method == "" {
+ return moreSpecific
+ }
+ if p1.method == "GET" && p2.method == "HEAD" {
+ // p1 matches GET and HEAD; p2 matches only HEAD.
+ return moreGeneral
+ }
+ if p2.method == "GET" && p1.method == "HEAD" {
+ return moreSpecific
+ }
+ return disjoint
+}
+
+// comparePaths determines the relationship between the path
+// part of two patterns.
+func (p1 *pattern) comparePaths(p2 *pattern) relationship {
+ // Optimization: if a path pattern doesn't end in a multi ("...") wildcard, then it
+ // can only match paths with the same number of segments.
+ if len(p1.segments) != len(p2.segments) && !p1.lastSegment().multi && !p2.lastSegment().multi {
+ return disjoint
+ }
+
+ // Consider corresponding segments in the two path patterns.
+ var segs1, segs2 []segment
+ rel := equivalent
+ for segs1, segs2 = p1.segments, p2.segments; len(segs1) > 0 && len(segs2) > 0; segs1, segs2 = segs1[1:], segs2[1:] {
+ rel = combineRelationships(rel, compareSegments(segs1[0], segs2[0]))
+ if rel == disjoint {
+ return rel
+ }
+ }
+ // We've reached the end of the corresponding segments of the patterns.
+ // If they have the same number of segments, then we've already determined
+ // their relationship.
+ if len(segs1) == 0 && len(segs2) == 0 {
+ return rel
+ }
+ // Otherwise, the only way they could fail to be disjoint is if the shorter
+ // pattern ends in a multi. In that case, that multi is more general
+ // than the remainder of the longer pattern, so combine those two relationships.
+ if len(segs1) < len(segs2) && p1.lastSegment().multi {
+ return combineRelationships(rel, moreGeneral)
+ }
+ if len(segs2) < len(segs1) && p2.lastSegment().multi {
+ return combineRelationships(rel, moreSpecific)
+ }
+ return disjoint
+}
+
+// compareSegments determines the relationship between two segments.
+func compareSegments(s1, s2 segment) relationship {
+ if s1.multi && s2.multi {
+ return equivalent
+ }
+ if s1.multi {
+ return moreGeneral
+ }
+ if s2.multi {
+ return moreSpecific
+ }
+ if s1.wild && s2.wild {
+ return equivalent
+ }
+ if s1.wild {
+ if s2.s == "/" {
+ // A single wildcard doesn't match a trailing slash.
+ return disjoint
+ }
+ return moreGeneral
+ }
+ if s2.wild {
+ if s1.s == "/" {
+ return disjoint
+ }
+ return moreSpecific
+ }
+ // Both literals.
+ if s1.s == s2.s {
+ return equivalent
+ }
+ return disjoint
+}
+
+// combineRelationships determines the overall relationship of two patterns
+// given the relationships of a partition of the patterns into two parts.
+//
+// For example, if p1 is more general than p2 in one way but equivalent
+// in the other, then it is more general overall.
+//
+// Or if p1 is more general in one way and more specific in the other, then
+// they overlap.
+func combineRelationships(r1, r2 relationship) relationship {
+ switch r1 {
+ case equivalent:
+ return r2
+ case disjoint:
+ return disjoint
+ case overlaps:
+ if r2 == disjoint {
+ return disjoint
+ }
+ return overlaps
+ case moreGeneral, moreSpecific:
+ switch r2 {
+ case equivalent:
+ return r1
+ case inverseRelationship(r1):
+ return overlaps
+ default:
+ return r2
+ }
+ default:
+ panic(fmt.Sprintf("unknown relationship %q", r1))
+ }
+}
+
+// If p1 has relationship `r` to p2, then
+// p2 has inverseRelationship(r) to p1.
+func inverseRelationship(r relationship) relationship {
+ switch r {
+ case moreSpecific:
+ return moreGeneral
+ case moreGeneral:
+ return moreSpecific
+ default:
+ return r
+ }
+}
+
+// isLitOrSingle reports whether the segment is a non-dollar literal or a single wildcard.
+func isLitOrSingle(seg segment) bool {
+ if seg.wild {
+ return !seg.multi
+ }
+ return seg.s != "/"
+}
+
+// describeConflict returns an explanation of why two patterns conflict.
+func describeConflict(p1, p2 *pattern) string {
+ mrel := p1.compareMethods(p2)
+ prel := p1.comparePaths(p2)
+ rel := combineRelationships(mrel, prel)
+ if rel == equivalent {
+ return fmt.Sprintf("%s matches the same requests as %s", p1, p2)
+ }
+ if rel != overlaps {
+ panic("describeConflict called with non-conflicting patterns")
+ }
+ if prel == overlaps {
+ return fmt.Sprintf(`%[1]s and %[2]s both match some paths, like %[3]q.
+But neither is more specific than the other.
+%[1]s matches %[4]q, but %[2]s doesn't.
+%[2]s matches %[5]q, but %[1]s doesn't.`,
+ p1, p2, commonPath(p1, p2), differencePath(p1, p2), differencePath(p2, p1))
+ }
+ if mrel == moreGeneral && prel == moreSpecific {
+ return fmt.Sprintf("%s matches more methods than %s, but has a more specific path pattern", p1, p2)
+ }
+ if mrel == moreSpecific && prel == moreGeneral {
+ return fmt.Sprintf("%s matches fewer methods than %s, but has a more general path pattern", p1, p2)
+ }
+ return fmt.Sprintf("bug: unexpected way for two patterns %s and %s to conflict: methods %s, paths %s", p1, p2, mrel, prel)
+}
+
+// writeMatchingPath writes to b a path that matches the segments.
+func writeMatchingPath(b *strings.Builder, segs []segment) {
+ for _, s := range segs {
+ writeSegment(b, s)
+ }
+}
+
+func writeSegment(b *strings.Builder, s segment) {
+ b.WriteByte('/')
+ if !s.multi && s.s != "/" {
+ b.WriteString(s.s)
+ }
+}
+
+// commonPath returns a path that both p1 and p2 match.
+// It assumes there is such a path.
+func commonPath(p1, p2 *pattern) string {
+ var b strings.Builder
+ var segs1, segs2 []segment
+ for segs1, segs2 = p1.segments, p2.segments; len(segs1) > 0 && len(segs2) > 0; segs1, segs2 = segs1[1:], segs2[1:] {
+ if s1 := segs1[0]; s1.wild {
+ writeSegment(&b, segs2[0])
+ } else {
+ writeSegment(&b, s1)
+ }
+ }
+ if len(segs1) > 0 {
+ writeMatchingPath(&b, segs1)
+ } else if len(segs2) > 0 {
+ writeMatchingPath(&b, segs2)
+ }
+ return b.String()
+}
+
+// differencePath returns a path that p1 matches and p2 doesn't.
+// It assumes there is such a path.
+func differencePath(p1, p2 *pattern) string {
+ var b strings.Builder
+
+ var segs1, segs2 []segment
+ for segs1, segs2 = p1.segments, p2.segments; len(segs1) > 0 && len(segs2) > 0; segs1, segs2 = segs1[1:], segs2[1:] {
+ s1 := segs1[0]
+ s2 := segs2[0]
+ if s1.multi && s2.multi {
+ // From here the patterns match the same paths, so we must have found a difference earlier.
+ b.WriteByte('/')
+ return b.String()
+
+ }
+ if s1.multi && !s2.multi {
+ // s1 ends in a "..." wildcard but s2 does not.
+ // A trailing slash will distinguish them, unless s2 ends in "{$}",
+ // in which case any segment will do; prefer the wildcard name if
+ // it has one.
+ b.WriteByte('/')
+ if s2.s == "/" {
+ if s1.s != "" {
+ b.WriteString(s1.s)
+ } else {
+ b.WriteString("x")
+ }
+ }
+ return b.String()
+ }
+ if !s1.multi && s2.multi {
+ writeSegment(&b, s1)
+ } else if s1.wild && s2.wild {
+ // Both patterns will match whatever we put here; use
+ // the first wildcard name.
+ writeSegment(&b, s1)
+ } else if s1.wild && !s2.wild {
+ // s1 is a wildcard, s2 is a literal.
+ // Any segment other than s2.s will work.
+ // Prefer the wildcard name, but if it's the same as the literal,
+ // tweak the literal.
+ if s1.s != s2.s {
+ writeSegment(&b, s1)
+ } else {
+ b.WriteByte('/')
+ b.WriteString(s2.s + "x")
+ }
+ } else if !s1.wild && s2.wild {
+ writeSegment(&b, s1)
+ } else {
+ // Both are literals. A precondition of this function is that the
+ // patterns overlap, so they must be the same literal. Use it.
+ if s1.s != s2.s {
+ panic(fmt.Sprintf("literals differ: %q and %q", s1.s, s2.s))
+ }
+ writeSegment(&b, s1)
+ }
+ }
+ if len(segs1) > 0 {
+ // p1 is longer than p2, and p2 does not end in a multi.
+ // Anything that matches the rest of p1 will do.
+ writeMatchingPath(&b, segs1)
+ } else if len(segs2) > 0 {
+ writeMatchingPath(&b, segs2)
+ }
+ return b.String()
+}