diff options
Diffstat (limited to 'src/html/template/js.go')
-rw-r--r-- | src/html/template/js.go | 485 |
1 files changed, 485 insertions, 0 deletions
diff --git a/src/html/template/js.go b/src/html/template/js.go new file mode 100644 index 0000000..d911ada --- /dev/null +++ b/src/html/template/js.go @@ -0,0 +1,485 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package template + +import ( + "bytes" + "encoding/json" + "fmt" + "reflect" + "strings" + "unicode/utf8" +) + +// jsWhitespace contains all of the JS whitespace characters, as defined +// by the \s character class. +// See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions/Character_classes. +const jsWhitespace = "\f\n\r\t\v\u0020\u00a0\u1680\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u2028\u2029\u202f\u205f\u3000\ufeff" + +// nextJSCtx returns the context that determines whether a slash after the +// given run of tokens starts a regular expression instead of a division +// operator: / or /=. +// +// This assumes that the token run does not include any string tokens, comment +// tokens, regular expression literal tokens, or division operators. +// +// This fails on some valid but nonsensical JavaScript programs like +// "x = ++/foo/i" which is quite different than "x++/foo/i", but is not known to +// fail on any known useful programs. It is based on the draft +// JavaScript 2.0 lexical grammar and requires one token of lookbehind: +// https://www.mozilla.org/js/language/js20-2000-07/rationale/syntax.html +func nextJSCtx(s []byte, preceding jsCtx) jsCtx { + // Trim all JS whitespace characters + s = bytes.TrimRight(s, jsWhitespace) + if len(s) == 0 { + return preceding + } + + // All cases below are in the single-byte UTF-8 group. + switch c, n := s[len(s)-1], len(s); c { + case '+', '-': + // ++ and -- are not regexp preceders, but + and - are whether + // they are used as infix or prefix operators. + start := n - 1 + // Count the number of adjacent dashes or pluses. + for start > 0 && s[start-1] == c { + start-- + } + if (n-start)&1 == 1 { + // Reached for trailing minus signs since "---" is the + // same as "-- -". + return jsCtxRegexp + } + return jsCtxDivOp + case '.': + // Handle "42." + if n != 1 && '0' <= s[n-2] && s[n-2] <= '9' { + return jsCtxDivOp + } + return jsCtxRegexp + // Suffixes for all punctuators from section 7.7 of the language spec + // that only end binary operators not handled above. + case ',', '<', '>', '=', '*', '%', '&', '|', '^', '?': + return jsCtxRegexp + // Suffixes for all punctuators from section 7.7 of the language spec + // that are prefix operators not handled above. + case '!', '~': + return jsCtxRegexp + // Matches all the punctuators from section 7.7 of the language spec + // that are open brackets not handled above. + case '(', '[': + return jsCtxRegexp + // Matches all the punctuators from section 7.7 of the language spec + // that precede expression starts. + case ':', ';', '{': + return jsCtxRegexp + // CAVEAT: the close punctuators ('}', ']', ')') precede div ops and + // are handled in the default except for '}' which can precede a + // division op as in + // ({ valueOf: function () { return 42 } } / 2 + // which is valid, but, in practice, developers don't divide object + // literals, so our heuristic works well for code like + // function () { ... } /foo/.test(x) && sideEffect(); + // The ')' punctuator can precede a regular expression as in + // if (b) /foo/.test(x) && ... + // but this is much less likely than + // (a + b) / c + case '}': + return jsCtxRegexp + default: + // Look for an IdentifierName and see if it is a keyword that + // can precede a regular expression. + j := n + for j > 0 && isJSIdentPart(rune(s[j-1])) { + j-- + } + if regexpPrecederKeywords[string(s[j:])] { + return jsCtxRegexp + } + } + // Otherwise is a punctuator not listed above, or + // a string which precedes a div op, or an identifier + // which precedes a div op. + return jsCtxDivOp +} + +// regexpPrecederKeywords is a set of reserved JS keywords that can precede a +// regular expression in JS source. +var regexpPrecederKeywords = map[string]bool{ + "break": true, + "case": true, + "continue": true, + "delete": true, + "do": true, + "else": true, + "finally": true, + "in": true, + "instanceof": true, + "return": true, + "throw": true, + "try": true, + "typeof": true, + "void": true, +} + +var jsonMarshalType = reflect.TypeFor[json.Marshaler]() + +// indirectToJSONMarshaler returns the value, after dereferencing as many times +// as necessary to reach the base type (or nil) or an implementation of json.Marshal. +func indirectToJSONMarshaler(a any) any { + // text/template now supports passing untyped nil as a func call + // argument, so we must support it. Otherwise we'd panic below, as one + // cannot call the Type or Interface methods on an invalid + // reflect.Value. See golang.org/issue/18716. + if a == nil { + return nil + } + + v := reflect.ValueOf(a) + for !v.Type().Implements(jsonMarshalType) && v.Kind() == reflect.Pointer && !v.IsNil() { + v = v.Elem() + } + return v.Interface() +} + +// jsValEscaper escapes its inputs to a JS Expression (section 11.14) that has +// neither side-effects nor free variables outside (NaN, Infinity). +func jsValEscaper(args ...any) string { + var a any + if len(args) == 1 { + a = indirectToJSONMarshaler(args[0]) + switch t := a.(type) { + case JS: + return string(t) + case JSStr: + // TODO: normalize quotes. + return `"` + string(t) + `"` + case json.Marshaler: + // Do not treat as a Stringer. + case fmt.Stringer: + a = t.String() + } + } else { + for i, arg := range args { + args[i] = indirectToJSONMarshaler(arg) + } + a = fmt.Sprint(args...) + } + // TODO: detect cycles before calling Marshal which loops infinitely on + // cyclic data. This may be an unacceptable DoS risk. + b, err := json.Marshal(a) + if err != nil { + // While the standard JSON marshaller does not include user controlled + // information in the error message, if a type has a MarshalJSON method, + // the content of the error message is not guaranteed. Since we insert + // the error into the template, as part of a comment, we attempt to + // prevent the error from either terminating the comment, or the script + // block itself. + // + // In particular we: + // * replace "*/" comment end tokens with "* /", which does not + // terminate the comment + // * replace "</script" with "\x3C/script", and "<!--" with + // "\x3C!--", which prevents confusing script block termination + // semantics + // + // We also put a space before the comment so that if it is flush against + // a division operator it is not turned into a line comment: + // x/{{y}} + // turning into + // x//* error marshaling y: + // second line of error message */null + errStr := err.Error() + errStr = strings.ReplaceAll(errStr, "*/", "* /") + errStr = strings.ReplaceAll(errStr, "</script", `\x3C/script`) + errStr = strings.ReplaceAll(errStr, "<!--", `\x3C!--`) + return fmt.Sprintf(" /* %s */null ", errStr) + } + + // TODO: maybe post-process output to prevent it from containing + // "<!--", "-->", "<![CDATA[", "]]>", or "</script" + // in case custom marshalers produce output containing those. + // Note: Do not use \x escaping to save bytes because it is not JSON compatible and this escaper + // supports ld+json content-type. + if len(b) == 0 { + // In, `x=y/{{.}}*z` a json.Marshaler that produces "" should + // not cause the output `x=y/*z`. + return " null " + } + first, _ := utf8.DecodeRune(b) + last, _ := utf8.DecodeLastRune(b) + var buf strings.Builder + // Prevent IdentifierNames and NumericLiterals from running into + // keywords: in, instanceof, typeof, void + pad := isJSIdentPart(first) || isJSIdentPart(last) + if pad { + buf.WriteByte(' ') + } + written := 0 + // Make sure that json.Marshal escapes codepoints U+2028 & U+2029 + // so it falls within the subset of JSON which is valid JS. + for i := 0; i < len(b); { + rune, n := utf8.DecodeRune(b[i:]) + repl := "" + if rune == 0x2028 { + repl = `\u2028` + } else if rune == 0x2029 { + repl = `\u2029` + } + if repl != "" { + buf.Write(b[written:i]) + buf.WriteString(repl) + written = i + n + } + i += n + } + if buf.Len() != 0 { + buf.Write(b[written:]) + if pad { + buf.WriteByte(' ') + } + return buf.String() + } + return string(b) +} + +// jsStrEscaper produces a string that can be included between quotes in +// JavaScript source, in JavaScript embedded in an HTML5 <script> element, +// or in an HTML5 event handler attribute such as onclick. +func jsStrEscaper(args ...any) string { + s, t := stringify(args...) + if t == contentTypeJSStr { + return replace(s, jsStrNormReplacementTable) + } + return replace(s, jsStrReplacementTable) +} + +func jsTmplLitEscaper(args ...any) string { + s, _ := stringify(args...) + return replace(s, jsBqStrReplacementTable) +} + +// jsRegexpEscaper behaves like jsStrEscaper but escapes regular expression +// specials so the result is treated literally when included in a regular +// expression literal. /foo{{.X}}bar/ matches the string "foo" followed by +// the literal text of {{.X}} followed by the string "bar". +func jsRegexpEscaper(args ...any) string { + s, _ := stringify(args...) + s = replace(s, jsRegexpReplacementTable) + if s == "" { + // /{{.X}}/ should not produce a line comment when .X == "". + return "(?:)" + } + return s +} + +// replace replaces each rune r of s with replacementTable[r], provided that +// r < len(replacementTable). If replacementTable[r] is the empty string then +// no replacement is made. +// It also replaces runes U+2028 and U+2029 with the raw strings `\u2028` and +// `\u2029`. +func replace(s string, replacementTable []string) string { + var b strings.Builder + r, w, written := rune(0), 0, 0 + for i := 0; i < len(s); i += w { + // See comment in htmlEscaper. + r, w = utf8.DecodeRuneInString(s[i:]) + var repl string + switch { + case int(r) < len(lowUnicodeReplacementTable): + repl = lowUnicodeReplacementTable[r] + case int(r) < len(replacementTable) && replacementTable[r] != "": + repl = replacementTable[r] + case r == '\u2028': + repl = `\u2028` + case r == '\u2029': + repl = `\u2029` + default: + continue + } + if written == 0 { + b.Grow(len(s)) + } + b.WriteString(s[written:i]) + b.WriteString(repl) + written = i + w + } + if written == 0 { + return s + } + b.WriteString(s[written:]) + return b.String() +} + +var lowUnicodeReplacementTable = []string{ + 0: `\u0000`, 1: `\u0001`, 2: `\u0002`, 3: `\u0003`, 4: `\u0004`, 5: `\u0005`, 6: `\u0006`, + '\a': `\u0007`, + '\b': `\u0008`, + '\t': `\t`, + '\n': `\n`, + '\v': `\u000b`, // "\v" == "v" on IE 6. + '\f': `\f`, + '\r': `\r`, + 0xe: `\u000e`, 0xf: `\u000f`, 0x10: `\u0010`, 0x11: `\u0011`, 0x12: `\u0012`, 0x13: `\u0013`, + 0x14: `\u0014`, 0x15: `\u0015`, 0x16: `\u0016`, 0x17: `\u0017`, 0x18: `\u0018`, 0x19: `\u0019`, + 0x1a: `\u001a`, 0x1b: `\u001b`, 0x1c: `\u001c`, 0x1d: `\u001d`, 0x1e: `\u001e`, 0x1f: `\u001f`, +} + +var jsStrReplacementTable = []string{ + 0: `\u0000`, + '\t': `\t`, + '\n': `\n`, + '\v': `\u000b`, // "\v" == "v" on IE 6. + '\f': `\f`, + '\r': `\r`, + // Encode HTML specials as hex so the output can be embedded + // in HTML attributes without further encoding. + '"': `\u0022`, + '`': `\u0060`, + '&': `\u0026`, + '\'': `\u0027`, + '+': `\u002b`, + '/': `\/`, + '<': `\u003c`, + '>': `\u003e`, + '\\': `\\`, +} + +// jsBqStrReplacementTable is like jsStrReplacementTable except it also contains +// the special characters for JS template literals: $, {, and }. +var jsBqStrReplacementTable = []string{ + 0: `\u0000`, + '\t': `\t`, + '\n': `\n`, + '\v': `\u000b`, // "\v" == "v" on IE 6. + '\f': `\f`, + '\r': `\r`, + // Encode HTML specials as hex so the output can be embedded + // in HTML attributes without further encoding. + '"': `\u0022`, + '`': `\u0060`, + '&': `\u0026`, + '\'': `\u0027`, + '+': `\u002b`, + '/': `\/`, + '<': `\u003c`, + '>': `\u003e`, + '\\': `\\`, + '$': `\u0024`, + '{': `\u007b`, + '}': `\u007d`, +} + +// jsStrNormReplacementTable is like jsStrReplacementTable but does not +// overencode existing escapes since this table has no entry for `\`. +var jsStrNormReplacementTable = []string{ + 0: `\u0000`, + '\t': `\t`, + '\n': `\n`, + '\v': `\u000b`, // "\v" == "v" on IE 6. + '\f': `\f`, + '\r': `\r`, + // Encode HTML specials as hex so the output can be embedded + // in HTML attributes without further encoding. + '"': `\u0022`, + '&': `\u0026`, + '\'': `\u0027`, + '`': `\u0060`, + '+': `\u002b`, + '/': `\/`, + '<': `\u003c`, + '>': `\u003e`, +} +var jsRegexpReplacementTable = []string{ + 0: `\u0000`, + '\t': `\t`, + '\n': `\n`, + '\v': `\u000b`, // "\v" == "v" on IE 6. + '\f': `\f`, + '\r': `\r`, + // Encode HTML specials as hex so the output can be embedded + // in HTML attributes without further encoding. + '"': `\u0022`, + '$': `\$`, + '&': `\u0026`, + '\'': `\u0027`, + '(': `\(`, + ')': `\)`, + '*': `\*`, + '+': `\u002b`, + '-': `\-`, + '.': `\.`, + '/': `\/`, + '<': `\u003c`, + '>': `\u003e`, + '?': `\?`, + '[': `\[`, + '\\': `\\`, + ']': `\]`, + '^': `\^`, + '{': `\{`, + '|': `\|`, + '}': `\}`, +} + +// isJSIdentPart reports whether the given rune is a JS identifier part. +// It does not handle all the non-Latin letters, joiners, and combining marks, +// but it does handle every codepoint that can occur in a numeric literal or +// a keyword. +func isJSIdentPart(r rune) bool { + switch { + case r == '$': + return true + case '0' <= r && r <= '9': + return true + case 'A' <= r && r <= 'Z': + return true + case r == '_': + return true + case 'a' <= r && r <= 'z': + return true + } + return false +} + +// isJSType reports whether the given MIME type should be considered JavaScript. +// +// It is used to determine whether a script tag with a type attribute is a javascript container. +func isJSType(mimeType string) bool { + // per + // https://www.w3.org/TR/html5/scripting-1.html#attr-script-type + // https://tools.ietf.org/html/rfc7231#section-3.1.1 + // https://tools.ietf.org/html/rfc4329#section-3 + // https://www.ietf.org/rfc/rfc4627.txt + // discard parameters + mimeType, _, _ = strings.Cut(mimeType, ";") + mimeType = strings.ToLower(mimeType) + mimeType = strings.TrimSpace(mimeType) + switch mimeType { + case + "application/ecmascript", + "application/javascript", + "application/json", + "application/ld+json", + "application/x-ecmascript", + "application/x-javascript", + "module", + "text/ecmascript", + "text/javascript", + "text/javascript1.0", + "text/javascript1.1", + "text/javascript1.2", + "text/javascript1.3", + "text/javascript1.4", + "text/javascript1.5", + "text/jscript", + "text/livescript", + "text/x-ecmascript", + "text/x-javascript": + return true + default: + return false + } +} |