diff options
Diffstat (limited to 'src/net/http/pattern.go')
-rw-r--r-- | src/net/http/pattern.go | 529 |
1 files changed, 529 insertions, 0 deletions
diff --git a/src/net/http/pattern.go b/src/net/http/pattern.go new file mode 100644 index 0000000..f6af19b --- /dev/null +++ b/src/net/http/pattern.go @@ -0,0 +1,529 @@ +// Copyright 2023 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Patterns for ServeMux routing. + +package http + +import ( + "errors" + "fmt" + "net/url" + "strings" + "unicode" +) + +// A pattern is something that can be matched against an HTTP request. +// It has an optional method, an optional host, and a path. +type pattern struct { + str string // original string + method string + host string + // The representation of a path differs from the surface syntax, which + // simplifies most algorithms. + // + // Paths ending in '/' are represented with an anonymous "..." wildcard. + // For example, the path "a/" is represented as a literal segment "a" followed + // by a segment with multi==true. + // + // Paths ending in "{$}" are represented with the literal segment "/". + // For example, the path "a/{$}" is represented as a literal segment "a" followed + // by a literal segment "/". + segments []segment + loc string // source location of registering call, for helpful messages +} + +func (p *pattern) String() string { return p.str } + +func (p *pattern) lastSegment() segment { + return p.segments[len(p.segments)-1] +} + +// A segment is a pattern piece that matches one or more path segments, or +// a trailing slash. +// +// If wild is false, it matches a literal segment, or, if s == "/", a trailing slash. +// Examples: +// +// "a" => segment{s: "a"} +// "/{$}" => segment{s: "/"} +// +// If wild is true and multi is false, it matches a single path segment. +// Example: +// +// "{x}" => segment{s: "x", wild: true} +// +// If both wild and multi are true, it matches all remaining path segments. +// Example: +// +// "{rest...}" => segment{s: "rest", wild: true, multi: true} +type segment struct { + s string // literal or wildcard name or "/" for "/{$}". + wild bool + multi bool // "..." wildcard +} + +// parsePattern parses a string into a Pattern. +// The string's syntax is +// +// [METHOD] [HOST]/[PATH] +// +// where: +// - METHOD is an HTTP method +// - HOST is a hostname +// - PATH consists of slash-separated segments, where each segment is either +// a literal or a wildcard of the form "{name}", "{name...}", or "{$}". +// +// METHOD, HOST and PATH are all optional; that is, the string can be "/". +// If METHOD is present, it must be followed by a single space. +// Wildcard names must be valid Go identifiers. +// The "{$}" and "{name...}" wildcard must occur at the end of PATH. +// PATH may end with a '/'. +// Wildcard names in a path must be distinct. +func parsePattern(s string) (_ *pattern, err error) { + if len(s) == 0 { + return nil, errors.New("empty pattern") + } + off := 0 // offset into string + defer func() { + if err != nil { + err = fmt.Errorf("at offset %d: %w", off, err) + } + }() + + method, rest, found := strings.Cut(s, " ") + if !found { + rest = method + method = "" + } + if method != "" && !validMethod(method) { + return nil, fmt.Errorf("invalid method %q", method) + } + p := &pattern{str: s, method: method} + + if found { + off = len(method) + 1 + } + i := strings.IndexByte(rest, '/') + if i < 0 { + return nil, errors.New("host/path missing /") + } + p.host = rest[:i] + rest = rest[i:] + if j := strings.IndexByte(p.host, '{'); j >= 0 { + off += j + return nil, errors.New("host contains '{' (missing initial '/'?)") + } + // At this point, rest is the path. + off += i + + // An unclean path with a method that is not CONNECT can never match, + // because paths are cleaned before matching. + if method != "" && method != "CONNECT" && rest != cleanPath(rest) { + return nil, errors.New("non-CONNECT pattern with unclean path can never match") + } + + seenNames := map[string]bool{} // remember wildcard names to catch dups + for len(rest) > 0 { + // Invariant: rest[0] == '/'. + rest = rest[1:] + off = len(s) - len(rest) + if len(rest) == 0 { + // Trailing slash. + p.segments = append(p.segments, segment{wild: true, multi: true}) + break + } + i := strings.IndexByte(rest, '/') + if i < 0 { + i = len(rest) + } + var seg string + seg, rest = rest[:i], rest[i:] + if i := strings.IndexByte(seg, '{'); i < 0 { + // Literal. + seg = pathUnescape(seg) + p.segments = append(p.segments, segment{s: seg}) + } else { + // Wildcard. + if i != 0 { + return nil, errors.New("bad wildcard segment (must start with '{')") + } + if seg[len(seg)-1] != '}' { + return nil, errors.New("bad wildcard segment (must end with '}')") + } + name := seg[1 : len(seg)-1] + if name == "$" { + if len(rest) != 0 { + return nil, errors.New("{$} not at end") + } + p.segments = append(p.segments, segment{s: "/"}) + break + } + name, multi := strings.CutSuffix(name, "...") + if multi && len(rest) != 0 { + return nil, errors.New("{...} wildcard not at end") + } + if name == "" { + return nil, errors.New("empty wildcard") + } + if !isValidWildcardName(name) { + return nil, fmt.Errorf("bad wildcard name %q", name) + } + if seenNames[name] { + return nil, fmt.Errorf("duplicate wildcard name %q", name) + } + seenNames[name] = true + p.segments = append(p.segments, segment{s: name, wild: true, multi: multi}) + } + } + return p, nil +} + +func isValidWildcardName(s string) bool { + if s == "" { + return false + } + // Valid Go identifier. + for i, c := range s { + if !unicode.IsLetter(c) && c != '_' && (i == 0 || !unicode.IsDigit(c)) { + return false + } + } + return true +} + +func pathUnescape(path string) string { + u, err := url.PathUnescape(path) + if err != nil { + // Invalidly escaped path; use the original + return path + } + return u +} + +// relationship is a relationship between two patterns, p1 and p2. +type relationship string + +const ( + equivalent relationship = "equivalent" // both match the same requests + moreGeneral relationship = "moreGeneral" // p1 matches everything p2 does & more + moreSpecific relationship = "moreSpecific" // p2 matches everything p1 does & more + disjoint relationship = "disjoint" // there is no request that both match + overlaps relationship = "overlaps" // there is a request that both match, but neither is more specific +) + +// conflictsWith reports whether p1 conflicts with p2, that is, whether +// there is a request that both match but where neither is higher precedence +// than the other. +// +// Precedence is defined by two rules: +// 1. Patterns with a host win over patterns without a host. +// 2. Patterns whose method and path is more specific win. One pattern is more +// specific than another if the second matches all the (method, path) pairs +// of the first and more. +// +// If rule 1 doesn't apply, then two patterns conflict if their relationship +// is either equivalence (they match the same set of requests) or overlap +// (they both match some requests, but neither is more specific than the other). +func (p1 *pattern) conflictsWith(p2 *pattern) bool { + if p1.host != p2.host { + // Either one host is empty and the other isn't, in which case the + // one with the host wins by rule 1, or neither host is empty + // and they differ, so they won't match the same paths. + return false + } + rel := p1.comparePathsAndMethods(p2) + return rel == equivalent || rel == overlaps +} + +func (p1 *pattern) comparePathsAndMethods(p2 *pattern) relationship { + mrel := p1.compareMethods(p2) + // Optimization: avoid a call to comparePaths. + if mrel == disjoint { + return disjoint + } + prel := p1.comparePaths(p2) + return combineRelationships(mrel, prel) +} + +// compareMethods determines the relationship between the method +// part of patterns p1 and p2. +// +// A method can either be empty, "GET", or something else. +// The empty string matches any method, so it is the most general. +// "GET" matches both GET and HEAD. +// Anything else matches only itself. +func (p1 *pattern) compareMethods(p2 *pattern) relationship { + if p1.method == p2.method { + return equivalent + } + if p1.method == "" { + // p1 matches any method, but p2 does not, so p1 is more general. + return moreGeneral + } + if p2.method == "" { + return moreSpecific + } + if p1.method == "GET" && p2.method == "HEAD" { + // p1 matches GET and HEAD; p2 matches only HEAD. + return moreGeneral + } + if p2.method == "GET" && p1.method == "HEAD" { + return moreSpecific + } + return disjoint +} + +// comparePaths determines the relationship between the path +// part of two patterns. +func (p1 *pattern) comparePaths(p2 *pattern) relationship { + // Optimization: if a path pattern doesn't end in a multi ("...") wildcard, then it + // can only match paths with the same number of segments. + if len(p1.segments) != len(p2.segments) && !p1.lastSegment().multi && !p2.lastSegment().multi { + return disjoint + } + + // Consider corresponding segments in the two path patterns. + var segs1, segs2 []segment + rel := equivalent + for segs1, segs2 = p1.segments, p2.segments; len(segs1) > 0 && len(segs2) > 0; segs1, segs2 = segs1[1:], segs2[1:] { + rel = combineRelationships(rel, compareSegments(segs1[0], segs2[0])) + if rel == disjoint { + return rel + } + } + // We've reached the end of the corresponding segments of the patterns. + // If they have the same number of segments, then we've already determined + // their relationship. + if len(segs1) == 0 && len(segs2) == 0 { + return rel + } + // Otherwise, the only way they could fail to be disjoint is if the shorter + // pattern ends in a multi. In that case, that multi is more general + // than the remainder of the longer pattern, so combine those two relationships. + if len(segs1) < len(segs2) && p1.lastSegment().multi { + return combineRelationships(rel, moreGeneral) + } + if len(segs2) < len(segs1) && p2.lastSegment().multi { + return combineRelationships(rel, moreSpecific) + } + return disjoint +} + +// compareSegments determines the relationship between two segments. +func compareSegments(s1, s2 segment) relationship { + if s1.multi && s2.multi { + return equivalent + } + if s1.multi { + return moreGeneral + } + if s2.multi { + return moreSpecific + } + if s1.wild && s2.wild { + return equivalent + } + if s1.wild { + if s2.s == "/" { + // A single wildcard doesn't match a trailing slash. + return disjoint + } + return moreGeneral + } + if s2.wild { + if s1.s == "/" { + return disjoint + } + return moreSpecific + } + // Both literals. + if s1.s == s2.s { + return equivalent + } + return disjoint +} + +// combineRelationships determines the overall relationship of two patterns +// given the relationships of a partition of the patterns into two parts. +// +// For example, if p1 is more general than p2 in one way but equivalent +// in the other, then it is more general overall. +// +// Or if p1 is more general in one way and more specific in the other, then +// they overlap. +func combineRelationships(r1, r2 relationship) relationship { + switch r1 { + case equivalent: + return r2 + case disjoint: + return disjoint + case overlaps: + if r2 == disjoint { + return disjoint + } + return overlaps + case moreGeneral, moreSpecific: + switch r2 { + case equivalent: + return r1 + case inverseRelationship(r1): + return overlaps + default: + return r2 + } + default: + panic(fmt.Sprintf("unknown relationship %q", r1)) + } +} + +// If p1 has relationship `r` to p2, then +// p2 has inverseRelationship(r) to p1. +func inverseRelationship(r relationship) relationship { + switch r { + case moreSpecific: + return moreGeneral + case moreGeneral: + return moreSpecific + default: + return r + } +} + +// isLitOrSingle reports whether the segment is a non-dollar literal or a single wildcard. +func isLitOrSingle(seg segment) bool { + if seg.wild { + return !seg.multi + } + return seg.s != "/" +} + +// describeConflict returns an explanation of why two patterns conflict. +func describeConflict(p1, p2 *pattern) string { + mrel := p1.compareMethods(p2) + prel := p1.comparePaths(p2) + rel := combineRelationships(mrel, prel) + if rel == equivalent { + return fmt.Sprintf("%s matches the same requests as %s", p1, p2) + } + if rel != overlaps { + panic("describeConflict called with non-conflicting patterns") + } + if prel == overlaps { + return fmt.Sprintf(`%[1]s and %[2]s both match some paths, like %[3]q. +But neither is more specific than the other. +%[1]s matches %[4]q, but %[2]s doesn't. +%[2]s matches %[5]q, but %[1]s doesn't.`, + p1, p2, commonPath(p1, p2), differencePath(p1, p2), differencePath(p2, p1)) + } + if mrel == moreGeneral && prel == moreSpecific { + return fmt.Sprintf("%s matches more methods than %s, but has a more specific path pattern", p1, p2) + } + if mrel == moreSpecific && prel == moreGeneral { + return fmt.Sprintf("%s matches fewer methods than %s, but has a more general path pattern", p1, p2) + } + return fmt.Sprintf("bug: unexpected way for two patterns %s and %s to conflict: methods %s, paths %s", p1, p2, mrel, prel) +} + +// writeMatchingPath writes to b a path that matches the segments. +func writeMatchingPath(b *strings.Builder, segs []segment) { + for _, s := range segs { + writeSegment(b, s) + } +} + +func writeSegment(b *strings.Builder, s segment) { + b.WriteByte('/') + if !s.multi && s.s != "/" { + b.WriteString(s.s) + } +} + +// commonPath returns a path that both p1 and p2 match. +// It assumes there is such a path. +func commonPath(p1, p2 *pattern) string { + var b strings.Builder + var segs1, segs2 []segment + for segs1, segs2 = p1.segments, p2.segments; len(segs1) > 0 && len(segs2) > 0; segs1, segs2 = segs1[1:], segs2[1:] { + if s1 := segs1[0]; s1.wild { + writeSegment(&b, segs2[0]) + } else { + writeSegment(&b, s1) + } + } + if len(segs1) > 0 { + writeMatchingPath(&b, segs1) + } else if len(segs2) > 0 { + writeMatchingPath(&b, segs2) + } + return b.String() +} + +// differencePath returns a path that p1 matches and p2 doesn't. +// It assumes there is such a path. +func differencePath(p1, p2 *pattern) string { + var b strings.Builder + + var segs1, segs2 []segment + for segs1, segs2 = p1.segments, p2.segments; len(segs1) > 0 && len(segs2) > 0; segs1, segs2 = segs1[1:], segs2[1:] { + s1 := segs1[0] + s2 := segs2[0] + if s1.multi && s2.multi { + // From here the patterns match the same paths, so we must have found a difference earlier. + b.WriteByte('/') + return b.String() + + } + if s1.multi && !s2.multi { + // s1 ends in a "..." wildcard but s2 does not. + // A trailing slash will distinguish them, unless s2 ends in "{$}", + // in which case any segment will do; prefer the wildcard name if + // it has one. + b.WriteByte('/') + if s2.s == "/" { + if s1.s != "" { + b.WriteString(s1.s) + } else { + b.WriteString("x") + } + } + return b.String() + } + if !s1.multi && s2.multi { + writeSegment(&b, s1) + } else if s1.wild && s2.wild { + // Both patterns will match whatever we put here; use + // the first wildcard name. + writeSegment(&b, s1) + } else if s1.wild && !s2.wild { + // s1 is a wildcard, s2 is a literal. + // Any segment other than s2.s will work. + // Prefer the wildcard name, but if it's the same as the literal, + // tweak the literal. + if s1.s != s2.s { + writeSegment(&b, s1) + } else { + b.WriteByte('/') + b.WriteString(s2.s + "x") + } + } else if !s1.wild && s2.wild { + writeSegment(&b, s1) + } else { + // Both are literals. A precondition of this function is that the + // patterns overlap, so they must be the same literal. Use it. + if s1.s != s2.s { + panic(fmt.Sprintf("literals differ: %q and %q", s1.s, s2.s)) + } + writeSegment(&b, s1) + } + } + if len(segs1) > 0 { + // p1 is longer than p2, and p2 does not end in a multi. + // Anything that matches the rest of p1 will do. + writeMatchingPath(&b, segs1) + } else if len(segs2) > 0 { + writeMatchingPath(&b, segs2) + } + return b.String() +} |