1 files changed, 217 insertions, 0 deletions
diff --git a/src/html/template/url.go b/src/html/template/url.go
new file mode 100644
index 0000000..9390558
--- /dev/null
+++ b/src/html/template/url.go
@@ -0,0 +1,217 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+	"bytes"
+	"fmt"
+	"strings"
+)
+
+// urlFilter returns its input unless it contains an unsafe scheme in which
+// case it defangs the entire URL.
+//
+// Schemes that cause unintended side effects that are irreversible without user
+// interaction are considered unsafe. For example, clicking on a "javascript:"
+// link can immediately trigger JavaScript code execution.
+//
+// This filter conservatively assumes that all schemes other than the following
+// are unsafe:
+//    * http:   Navigates to a new website, and may open a new window or tab.
+//              These side effects can be reversed by navigating back to the
+//              previous website, or closing the window or tab. No irreversible
+//              changes will take place without further user interaction with
+//              the new website.
+//    * https:  Same as http.
+//    * mailto: Opens an email program and starts a new draft. This side effect
+//              is not irreversible until the user explicitly clicks send; it
+//              can be undone by closing the email program.
+//
+// To allow URLs containing other schemes to bypass this filter, developers must
+// explicitly indicate that such a URL is expected and safe by encapsulating it
+// in a template.URL value.
+func urlFilter(args ...any) string {
+	s, t := stringify(args...)
+	if t == contentTypeURL {
+		return s
+	}
+	if !isSafeURL(s) {
+		return "#" + filterFailsafe
+	}
+	return s
+}
+
+// isSafeURL is true if s is a relative URL or if URL has a protocol in
+// (http, https, mailto).
+func isSafeURL(s string) bool {
+	if protocol, _, ok := strings.Cut(s, ":"); ok && !strings.Contains(protocol, "/") {
+		if !strings.EqualFold(protocol, "http") && !strings.EqualFold(protocol, "https") && !strings.EqualFold(protocol, "mailto") {
+			return false
+		}
+	}
+	return true
+}
+
+// urlEscaper produces an output that can be embedded in a URL query.
+// The output can be embedded in an HTML attribute without further escaping.
+func urlEscaper(args ...any) string {
+	return urlProcessor(false, args...)
+}
+
+// urlNormalizer normalizes URL content so it can be embedded in a quote-delimited
+// string or parenthesis delimited url(...).
+// The normalizer does not encode all HTML specials. Specifically, it does not
+// encode '&' so correct embedding in an HTML attribute requires escaping of
+// '&' to '&amp;'.
+func urlNormalizer(args ...any) string {
+	return urlProcessor(true, args...)
+}
+
+// urlProcessor normalizes (when norm is true) or escapes its input to produce
+// a valid hierarchical or opaque URL part.
+func urlProcessor(norm bool, args ...any) string {
+	s, t := stringify(args...)
+	if t == contentTypeURL {
+		norm = true
+	}
+	var b bytes.Buffer
+	if processURLOnto(s, norm, &b) {
+		return b.String()
+	}
+	return s
+}
+
+// processURLOnto appends a normalized URL corresponding to its input to b
+// and reports whether the appended content differs from s.
+func processURLOnto(s string, norm bool, b *bytes.Buffer) bool {
+	b.Grow(len(s) + 16)
+	written := 0
+	// The byte loop below assumes that all URLs use UTF-8 as the
+	// content-encoding. This is similar to the URI to IRI encoding scheme
+	// defined in section 3.1 of  RFC 3987, and behaves the same as the
+	// EcmaScript builtin encodeURIComponent.
+	// It should not cause any misencoding of URLs in pages with
+	// Content-type: text/html;charset=UTF-8.
+	for i, n := 0, len(s); i < n; i++ {
+		c := s[i]
+		switch c {
+		// Single quote and parens are sub-delims in RFC 3986, but we
+		// escape them so the output can be embedded in single
+		// quoted attributes and unquoted CSS url(...) constructs.
+		// Single quotes are reserved in URLs, but are only used in
+		// the obsolete "mark" rule in an appendix in RFC 3986
+		// so can be safely encoded.
+		case '!', '#', '$', '&', '*', '+', ',', '/', ':', ';', '=', '?', '@', '[', ']':
+			if norm {
+				continue
+			}
+		// Unreserved according to RFC 3986 sec 2.3
+		// "For consistency, percent-encoded octets in the ranges of
+		// ALPHA (%41-%5A and %61-%7A), DIGIT (%30-%39), hyphen (%2D),
+		// period (%2E), underscore (%5F), or tilde (%7E) should not be
+		// created by URI producers
+		case '-', '.', '_', '~':
+			continue
+		case '%':
+			// When normalizing do not re-encode valid escapes.
+			if norm && i+2 < len(s) && isHex(s[i+1]) && isHex(s[i+2]) {
+				continue
+			}
+		default:
+			// Unreserved according to RFC 3986 sec 2.3
+			if 'a' <= c && c <= 'z' {
+				continue
+			}
+			if 'A' <= c && c <= 'Z' {
+				continue
+			}
+			if '0' <= c && c <= '9' {
+				continue
+			}
+		}
+		b.WriteString(s[written:i])
+		fmt.Fprintf(b, "%%%02x", c)
+		written = i + 1
+	}
+	b.WriteString(s[written:])
+	return written != 0
+}
+
+// Filters and normalizes srcset values which are comma separated
+// URLs followed by metadata.
+func srcsetFilterAndEscaper(args ...any) string {
+	s, t := stringify(args...)
+	switch t {
+	case contentTypeSrcset:
+		return s
+	case contentTypeURL:
+		// Normalizing gets rid of all HTML whitespace
+		// which separate the image URL from its metadata.
+		var b bytes.Buffer
+		if processURLOnto(s, true, &b) {
+			s = b.String()
+		}
+		// Additionally, commas separate one source from another.
+		return strings.ReplaceAll(s, ",", "%2c")
+	}
+
+	var b bytes.Buffer
+	written := 0
+	for i := 0; i < len(s); i++ {
+		if s[i] == ',' {
+			filterSrcsetElement(s, written, i, &b)
+			b.WriteString(",")
+			written = i + 1
+		}
+	}
+	filterSrcsetElement(s, written, len(s), &b)
+	return b.String()
+}
+
+// Derived from https://play.golang.org/p/Dhmj7FORT5
+const htmlSpaceAndASCIIAlnumBytes = "\x00\x36\x00\x00\x01\x00\xff\x03\xfe\xff\xff\x07\xfe\xff\xff\x07"
+
+// isHTMLSpace is true iff c is a whitespace character per
+// https://infra.spec.whatwg.org/#ascii-whitespace
+func isHTMLSpace(c byte) bool {
+	return (c <= 0x20) && 0 != (htmlSpaceAndASCIIAlnumBytes[c>>3]&(1<<uint(c&0x7)))
+}
+
+func isHTMLSpaceOrASCIIAlnum(c byte) bool {
+	return (c < 0x80) && 0 != (htmlSpaceAndASCIIAlnumBytes[c>>3]&(1<<uint(c&0x7)))
+}
+
+func filterSrcsetElement(s string, left int, right int, b *bytes.Buffer) {
+	start := left
+	for start < right && isHTMLSpace(s[start]) {
+		start++
+	}
+	end := right
+	for i := start; i < right; i++ {
+		if isHTMLSpace(s[i]) {
+			end = i
+			break
+		}
+	}
+	if url := s[start:end]; isSafeURL(url) {
+		// If image metadata is only spaces or alnums then
+		// we don't need to URL normalize it.
+		metadataOk := true
+		for i := end; i < right; i++ {
+			if !isHTMLSpaceOrASCIIAlnum(s[i]) {
+				metadataOk = false
+				break
+			}
+		}
+		if metadataOk {
+			b.WriteString(s[left:start])
+			processURLOnto(url, true, b)
+			b.WriteString(s[end:right])
+			return
+		}
+	}
+	b.WriteString("#")
+	b.WriteString(filterFailsafe)
+}