summaryrefslogtreecommitdiffstats
path: root/src/html/template/js.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/html/template/js.go')
-rw-r--r--src/html/template/js.go485
1 files changed, 485 insertions, 0 deletions
diff --git a/src/html/template/js.go b/src/html/template/js.go
new file mode 100644
index 0000000..d911ada
--- /dev/null
+++ b/src/html/template/js.go
@@ -0,0 +1,485 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "bytes"
+ "encoding/json"
+ "fmt"
+ "reflect"
+ "strings"
+ "unicode/utf8"
+)
+
+// jsWhitespace contains all of the JS whitespace characters, as defined
+// by the \s character class.
+// See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes.
+const jsWhitespace = "\f\n\r\t\v\u0020\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000\ufeff"
+
+// nextJSCtx returns the context that determines whether a slash after the
+// given run of tokens starts a regular expression instead of a division
+// operator: / or /=.
+//
+// This assumes that the token run does not include any string tokens, comment
+// tokens, regular expression literal tokens, or division operators.
+//
+// This fails on some valid but nonsensical JavaScript programs like
+// "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to
+// fail on any known useful programs. It is based on the draft
+// JavaScript 2.0 lexical grammar and requires one token of lookbehind:
+// https://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html
+func nextJSCtx(s []byte, preceding jsCtx) jsCtx {
+ // Trim all JS whitespace characters
+ s = bytes.TrimRight(s, jsWhitespace)
+ if len(s) == 0 {
+ return preceding
+ }
+
+ // All cases below are in the single-byte UTF-8 group.
+ switch c, n := s[len(s)-1], len(s); c {
+ case '+', '-':
+ // ++ and -- are not regexp preceders, but + and - are whether
+ // they are used as infix or prefix operators.
+ start := n - 1
+ // Count the number of adjacent dashes or pluses.
+ for start > 0 && s[start-1] == c {
+ start--
+ }
+ if (n-start)&1 == 1 {
+ // Reached for trailing minus signs since "---" is the
+ // same as "-- -".
+ return jsCtxRegexp
+ }
+ return jsCtxDivOp
+ case '.':
+ // Handle "42."
+ if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' {
+ return jsCtxDivOp
+ }
+ return jsCtxRegexp
+ // Suffixes for all punctuators from section 7.7 of the language spec
+ // that only end binary operators not handled above.
+ case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?':
+ return jsCtxRegexp
+ // Suffixes for all punctuators from section 7.7 of the language spec
+ // that are prefix operators not handled above.
+ case '!', '~':
+ return jsCtxRegexp
+ // Matches all the punctuators from section 7.7 of the language spec
+ // that are open brackets not handled above.
+ case '(', '[':
+ return jsCtxRegexp
+ // Matches all the punctuators from section 7.7 of the language spec
+ // that precede expression starts.
+ case ':', ';', '{':
+ return jsCtxRegexp
+ // CAVEAT: the close punctuators ('}', ']', ')') precede div ops and
+ // are handled in the default except for '}' which can precede a
+ // division op as in
+ // ({ valueOf: function () { return 42 } } / 2
+ // which is valid, but, in practice, developers don't divide object
+ // literals, so our heuristic works well for code like
+ // function () { ... } /foo/.test(x) && sideEffect();
+ // The ')' punctuator can precede a regular expression as in
+ // if (b) /foo/.test(x) && ...
+ // but this is much less likely than
+ // (a + b) / c
+ case '}':
+ return jsCtxRegexp
+ default:
+ // Look for an IdentifierName and see if it is a keyword that
+ // can precede a regular expression.
+ j := n
+ for j > 0 && isJSIdentPart(rune(s[j-1])) {
+ j--
+ }
+ if regexpPrecederKeywords[string(s[j:])] {
+ return jsCtxRegexp
+ }
+ }
+ // Otherwise is a punctuator not listed above, or
+ // a string which precedes a div op, or an identifier
+ // which precedes a div op.
+ return jsCtxDivOp
+}
+
+// regexpPrecederKeywords is a set of reserved JS keywords that can precede a
+// regular expression in JS source.
+var regexpPrecederKeywords = map[string]bool{
+ "break": true,
+ "case": true,
+ "continue": true,
+ "delete": true,
+ "do": true,
+ "else": true,
+ "finally": true,
+ "in": true,
+ "instanceof": true,
+ "return": true,
+ "throw": true,
+ "try": true,
+ "typeof": true,
+ "void": true,
+}
+
+var jsonMarshalType = reflect.TypeFor[json.Marshaler]()
+
+// indirectToJSONMarshaler returns the value, after dereferencing as many times
+// as necessary to reach the base type (or nil) or an implementation of json.Marshal.
+func indirectToJSONMarshaler(a any) any {
+ // text/template now supports passing untyped nil as a func call
+ // argument, so we must support it. Otherwise we'd panic below, as one
+ // cannot call the Type or Interface methods on an invalid
+ // reflect.Value. See golang.org/issue/18716.
+ if a == nil {
+ return nil
+ }
+
+ v := reflect.ValueOf(a)
+ for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Pointer && !v.IsNil() {
+ v = v.Elem()
+ }
+ return v.Interface()
+}
+
+// jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has
+// neither side-effects nor free variables outside (NaN, Infinity).
+func jsValEscaper(args ...any) string {
+ var a any
+ if len(args) == 1 {
+ a = indirectToJSONMarshaler(args[0])
+ switch t := a.(type) {
+ case JS:
+ return string(t)
+ case JSStr:
+ // TODO: normalize quotes.
+ return `"` + string(t) + `"`
+ case json.Marshaler:
+ // Do not treat as a Stringer.
+ case fmt.Stringer:
+ a = t.String()
+ }
+ } else {
+ for i, arg := range args {
+ args[i] = indirectToJSONMarshaler(arg)
+ }
+ a = fmt.Sprint(args...)
+ }
+ // TODO: detect cycles before calling Marshal which loops infinitely on
+ // cyclic data. This may be an unacceptable DoS risk.
+ b, err := json.Marshal(a)
+ if err != nil {
+ // While the standard JSON marshaller does not include user controlled
+ // information in the error message, if a type has a MarshalJSON method,
+ // the content of the error message is not guaranteed. Since we insert
+ // the error into the template, as part of a comment, we attempt to
+ // prevent the error from either terminating the comment, or the script
+ // block itself.
+ //
+ // In particular we:
+ // * replace "*/" comment end tokens with "* /", which does not
+ // terminate the comment
+ // * replace "</script" with "\x3C/script", and "<!--" with
+ // "\x3C!--", which prevents confusing script block termination
+ // semantics
+ //
+ // We also put a space before the comment so that if it is flush against
+ // a division operator it is not turned into a line comment:
+ // x/{{y}}
+ // turning into
+ // x//* error marshaling y:
+ // second line of error message */null
+ errStr := err.Error()
+ errStr = strings.ReplaceAll(errStr, "*/", "* /")
+ errStr = strings.ReplaceAll(errStr, "</script", `\x3C/script`)
+ errStr = strings.ReplaceAll(errStr, "<!--", `\x3C!--`)
+ return fmt.Sprintf(" /* %s */null ", errStr)
+ }
+
+ // TODO: maybe post-process output to prevent it from containing
+ // "<!--", "-->", "<![CDATA[", "]]>", or "</script"
+ // in case custom marshalers produce output containing those.
+ // Note: Do not use \x escaping to save bytes because it is not JSON compatible and this escaper
+ // supports ld+json content-type.
+ if len(b) == 0 {
+ // In, `x=y/{{.}}*z` a json.Marshaler that produces "" should
+ // not cause the output `x=y/*z`.
+ return " null "
+ }
+ first, _ := utf8.DecodeRune(b)
+ last, _ := utf8.DecodeLastRune(b)
+ var buf strings.Builder
+ // Prevent IdentifierNames and NumericLiterals from running into
+ // keywords: in, instanceof, typeof, void
+ pad := isJSIdentPart(first) || isJSIdentPart(last)
+ if pad {
+ buf.WriteByte(' ')
+ }
+ written := 0
+ // Make sure that json.Marshal escapes codepoints U+2028 & U+2029
+ // so it falls within the subset of JSON which is valid JS.
+ for i := 0; i < len(b); {
+ rune, n := utf8.DecodeRune(b[i:])
+ repl := ""
+ if rune == 0x2028 {
+ repl = `\u2028`
+ } else if rune == 0x2029 {
+ repl = `\u2029`
+ }
+ if repl != "" {
+ buf.Write(b[written:i])
+ buf.WriteString(repl)
+ written = i + n
+ }
+ i += n
+ }
+ if buf.Len() != 0 {
+ buf.Write(b[written:])
+ if pad {
+ buf.WriteByte(' ')
+ }
+ return buf.String()
+ }
+ return string(b)
+}
+
+// jsStrEscaper produces a string that can be included between quotes in
+// JavaScript source, in JavaScript embedded in an HTML5 <script> element,
+// or in an HTML5 event handler attribute such as onclick.
+func jsStrEscaper(args ...any) string {
+ s, t := stringify(args...)
+ if t == contentTypeJSStr {
+ return replace(s, jsStrNormReplacementTable)
+ }
+ return replace(s, jsStrReplacementTable)
+}
+
+func jsTmplLitEscaper(args ...any) string {
+ s, _ := stringify(args...)
+ return replace(s, jsBqStrReplacementTable)
+}
+
+// jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression
+// specials so the result is treated literally when included in a regular
+// expression literal. /foo{{.X}}bar/ matches the string "foo" followed by
+// the literal text of {{.X}} followed by the string "bar".
+func jsRegexpEscaper(args ...any) string {
+ s, _ := stringify(args...)
+ s = replace(s, jsRegexpReplacementTable)
+ if s == "" {
+ // /{{.X}}/ should not produce a line comment when .X == "".
+ return "(?:)"
+ }
+ return s
+}
+
+// replace replaces each rune r of s with replacementTable[r], provided that
+// r < len(replacementTable). If replacementTable[r] is the empty string then
+// no replacement is made.
+// It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and
+// `\u2029`.
+func replace(s string, replacementTable []string) string {
+ var b strings.Builder
+ r, w, written := rune(0), 0, 0
+ for i := 0; i < len(s); i += w {
+ // See comment in htmlEscaper.
+ r, w = utf8.DecodeRuneInString(s[i:])
+ var repl string
+ switch {
+ case int(r) < len(lowUnicodeReplacementTable):
+ repl = lowUnicodeReplacementTable[r]
+ case int(r) < len(replacementTable) && replacementTable[r] != "":
+ repl = replacementTable[r]
+ case r == '\u2028':
+ repl = `\u2028`
+ case r == '\u2029':
+ repl = `\u2029`
+ default:
+ continue
+ }
+ if written == 0 {
+ b.Grow(len(s))
+ }
+ b.WriteString(s[written:i])
+ b.WriteString(repl)
+ written = i + w
+ }
+ if written == 0 {
+ return s
+ }
+ b.WriteString(s[written:])
+ return b.String()
+}
+
+var lowUnicodeReplacementTable = []string{
+ 0: `\u0000`, 1: `\u0001`, 2: `\u0002`, 3: `\u0003`, 4: `\u0004`, 5: `\u0005`, 6: `\u0006`,
+ '\a': `\u0007`,
+ '\b': `\u0008`,
+ '\t': `\t`,
+ '\n': `\n`,
+ '\v': `\u000b`, // "\v" == "v" on IE 6.
+ '\f': `\f`,
+ '\r': `\r`,
+ 0xe: `\u000e`, 0xf: `\u000f`, 0x10: `\u0010`, 0x11: `\u0011`, 0x12: `\u0012`, 0x13: `\u0013`,
+ 0x14: `\u0014`, 0x15: `\u0015`, 0x16: `\u0016`, 0x17: `\u0017`, 0x18: `\u0018`, 0x19: `\u0019`,
+ 0x1a: `\u001a`, 0x1b: `\u001b`, 0x1c: `\u001c`, 0x1d: `\u001d`, 0x1e: `\u001e`, 0x1f: `\u001f`,
+}
+
+var jsStrReplacementTable = []string{
+ 0: `\u0000`,
+ '\t': `\t`,
+ '\n': `\n`,
+ '\v': `\u000b`, // "\v" == "v" on IE 6.
+ '\f': `\f`,
+ '\r': `\r`,
+ // Encode HTML specials as hex so the output can be embedded
+ // in HTML attributes without further encoding.
+ '"': `\u0022`,
+ '`': `\u0060`,
+ '&': `\u0026`,
+ '\'': `\u0027`,
+ '+': `\u002b`,
+ '/': `\/`,
+ '<': `\u003c`,
+ '>': `\u003e`,
+ '\\': `\\`,
+}
+
+// jsBqStrReplacementTable is like jsStrReplacementTable except it also contains
+// the special characters for JS template literals: $, {, and }.
+var jsBqStrReplacementTable = []string{
+ 0: `\u0000`,
+ '\t': `\t`,
+ '\n': `\n`,
+ '\v': `\u000b`, // "\v" == "v" on IE 6.
+ '\f': `\f`,
+ '\r': `\r`,
+ // Encode HTML specials as hex so the output can be embedded
+ // in HTML attributes without further encoding.
+ '"': `\u0022`,
+ '`': `\u0060`,
+ '&': `\u0026`,
+ '\'': `\u0027`,
+ '+': `\u002b`,
+ '/': `\/`,
+ '<': `\u003c`,
+ '>': `\u003e`,
+ '\\': `\\`,
+ '$': `\u0024`,
+ '{': `\u007b`,
+ '}': `\u007d`,
+}
+
+// jsStrNormReplacementTable is like jsStrReplacementTable but does not
+// overencode existing escapes since this table has no entry for `\`.
+var jsStrNormReplacementTable = []string{
+ 0: `\u0000`,
+ '\t': `\t`,
+ '\n': `\n`,
+ '\v': `\u000b`, // "\v" == "v" on IE 6.
+ '\f': `\f`,
+ '\r': `\r`,
+ // Encode HTML specials as hex so the output can be embedded
+ // in HTML attributes without further encoding.
+ '"': `\u0022`,
+ '&': `\u0026`,
+ '\'': `\u0027`,
+ '`': `\u0060`,
+ '+': `\u002b`,
+ '/': `\/`,
+ '<': `\u003c`,
+ '>': `\u003e`,
+}
+var jsRegexpReplacementTable = []string{
+ 0: `\u0000`,
+ '\t': `\t`,
+ '\n': `\n`,
+ '\v': `\u000b`, // "\v" == "v" on IE 6.
+ '\f': `\f`,
+ '\r': `\r`,
+ // Encode HTML specials as hex so the output can be embedded
+ // in HTML attributes without further encoding.
+ '"': `\u0022`,
+ '$': `\$`,
+ '&': `\u0026`,
+ '\'': `\u0027`,
+ '(': `\(`,
+ ')': `\)`,
+ '*': `\*`,
+ '+': `\u002b`,
+ '-': `\-`,
+ '.': `\.`,
+ '/': `\/`,
+ '<': `\u003c`,
+ '>': `\u003e`,
+ '?': `\?`,
+ '[': `\[`,
+ '\\': `\\`,
+ ']': `\]`,
+ '^': `\^`,
+ '{': `\{`,
+ '|': `\|`,
+ '}': `\}`,
+}
+
+// isJSIdentPart reports whether the given rune is a JS identifier part.
+// It does not handle all the non-Latin letters, joiners, and combining marks,
+// but it does handle every codepoint that can occur in a numeric literal or
+// a keyword.
+func isJSIdentPart(r rune) bool {
+ switch {
+ case r == '$':
+ return true
+ case '0' <= r && r <= '9':
+ return true
+ case 'A' <= r && r <= 'Z':
+ return true
+ case r == '_':
+ return true
+ case 'a' <= r && r <= 'z':
+ return true
+ }
+ return false
+}
+
+// isJSType reports whether the given MIME type should be considered JavaScript.
+//
+// It is used to determine whether a script tag with a type attribute is a javascript container.
+func isJSType(mimeType string) bool {
+ // per
+ // https://www.w3.org/TR/html5/scripting-1.html#attr-script-type
+ // https://tools.ietf.org/html/rfc7231#section-3.1.1
+ // https://tools.ietf.org/html/rfc4329#section-3
+ // https://www.ietf.org/rfc/rfc4627.txt
+ // discard parameters
+ mimeType, _, _ = strings.Cut(mimeType, ";")
+ mimeType = strings.ToLower(mimeType)
+ mimeType = strings.TrimSpace(mimeType)
+ switch mimeType {
+ case
+ "application/ecmascript",
+ "application/javascript",
+ "application/json",
+ "application/ld+json",
+ "application/x-ecmascript",
+ "application/x-javascript",
+ "module",
+ "text/ecmascript",
+ "text/javascript",
+ "text/javascript1.0",
+ "text/javascript1.1",
+ "text/javascript1.2",
+ "text/javascript1.3",
+ "text/javascript1.4",
+ "text/javascript1.5",
+ "text/jscript",
+ "text/livescript",
+ "text/x-ecmascript",
+ "text/x-javascript":
+ return true
+ default:
+ return false
+ }
+}