From 73df946d56c74384511a194dd01dbe099584fd1a Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 28 Apr 2024 15:14:23 +0200 Subject: Adding upstream version 1.16.10. Signed-off-by: Daniel Baumann --- src/html/template/escape.go | 892 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 892 insertions(+) create mode 100644 src/html/template/escape.go (limited to 'src/html/template/escape.go') diff --git a/src/html/template/escape.go b/src/html/template/escape.go new file mode 100644 index 0000000..8739735 --- /dev/null +++ b/src/html/template/escape.go @@ -0,0 +1,892 @@ +// Copyright 2011 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package template + +import ( + "bytes" + "fmt" + "html" + "io" + "text/template" + "text/template/parse" +) + +// escapeTemplate rewrites the named template, which must be +// associated with t, to guarantee that the output of any of the named +// templates is properly escaped. If no error is returned, then the named templates have +// been modified. Otherwise the named templates have been rendered +// unusable. +func escapeTemplate(tmpl *Template, node parse.Node, name string) error { + c, _ := tmpl.esc.escapeTree(context{}, node, name, 0) + var err error + if c.err != nil { + err, c.err.Name = c.err, name + } else if c.state != stateText { + err = &Error{ErrEndContext, nil, name, 0, fmt.Sprintf("ends in a non-text context: %v", c)} + } + if err != nil { + // Prevent execution of unsafe templates. + if t := tmpl.set[name]; t != nil { + t.escapeErr = err + t.text.Tree = nil + t.Tree = nil + } + return err + } + tmpl.esc.commit() + if t := tmpl.set[name]; t != nil { + t.escapeErr = escapeOK + t.Tree = t.text.Tree + } + return nil +} + +// evalArgs formats the list of arguments into a string. It is equivalent to +// fmt.Sprint(args...), except that it deferences all pointers. +func evalArgs(args ...interface{}) string { + // Optimization for simple common case of a single string argument. + if len(args) == 1 { + if s, ok := args[0].(string); ok { + return s + } + } + for i, arg := range args { + args[i] = indirectToStringerOrError(arg) + } + return fmt.Sprint(args...) +} + +// funcMap maps command names to functions that render their inputs safe. +var funcMap = template.FuncMap{ + "_html_template_attrescaper": attrEscaper, + "_html_template_commentescaper": commentEscaper, + "_html_template_cssescaper": cssEscaper, + "_html_template_cssvaluefilter": cssValueFilter, + "_html_template_htmlnamefilter": htmlNameFilter, + "_html_template_htmlescaper": htmlEscaper, + "_html_template_jsregexpescaper": jsRegexpEscaper, + "_html_template_jsstrescaper": jsStrEscaper, + "_html_template_jsvalescaper": jsValEscaper, + "_html_template_nospaceescaper": htmlNospaceEscaper, + "_html_template_rcdataescaper": rcdataEscaper, + "_html_template_srcsetescaper": srcsetFilterAndEscaper, + "_html_template_urlescaper": urlEscaper, + "_html_template_urlfilter": urlFilter, + "_html_template_urlnormalizer": urlNormalizer, + "_eval_args_": evalArgs, +} + +// escaper collects type inferences about templates and changes needed to make +// templates injection safe. +type escaper struct { + // ns is the nameSpace that this escaper is associated with. + ns *nameSpace + // output[templateName] is the output context for a templateName that + // has been mangled to include its input context. + output map[string]context + // derived[c.mangle(name)] maps to a template derived from the template + // named name templateName for the start context c. + derived map[string]*template.Template + // called[templateName] is a set of called mangled template names. + called map[string]bool + // xxxNodeEdits are the accumulated edits to apply during commit. + // Such edits are not applied immediately in case a template set + // executes a given template in different escaping contexts. + actionNodeEdits map[*parse.ActionNode][]string + templateNodeEdits map[*parse.TemplateNode]string + textNodeEdits map[*parse.TextNode][]byte +} + +// makeEscaper creates a blank escaper for the given set. +func makeEscaper(n *nameSpace) escaper { + return escaper{ + n, + map[string]context{}, + map[string]*template.Template{}, + map[string]bool{}, + map[*parse.ActionNode][]string{}, + map[*parse.TemplateNode]string{}, + map[*parse.TextNode][]byte{}, + } +} + +// filterFailsafe is an innocuous word that is emitted in place of unsafe values +// by sanitizer functions. It is not a keyword in any programming language, +// contains no special characters, is not empty, and when it appears in output +// it is distinct enough that a developer can find the source of the problem +// via a search engine. +const filterFailsafe = "ZgotmplZ" + +// escape escapes a template node. +func (e *escaper) escape(c context, n parse.Node) context { + switch n := n.(type) { + case *parse.ActionNode: + return e.escapeAction(c, n) + case *parse.CommentNode: + return c + case *parse.IfNode: + return e.escapeBranch(c, &n.BranchNode, "if") + case *parse.ListNode: + return e.escapeList(c, n) + case *parse.RangeNode: + return e.escapeBranch(c, &n.BranchNode, "range") + case *parse.TemplateNode: + return e.escapeTemplate(c, n) + case *parse.TextNode: + return e.escapeText(c, n) + case *parse.WithNode: + return e.escapeBranch(c, &n.BranchNode, "with") + } + panic("escaping " + n.String() + " is unimplemented") +} + +// escapeAction escapes an action template node. +func (e *escaper) escapeAction(c context, n *parse.ActionNode) context { + if len(n.Pipe.Decl) != 0 { + // A local variable assignment, not an interpolation. + return c + } + c = nudge(c) + // Check for disallowed use of predefined escapers in the pipeline. + for pos, idNode := range n.Pipe.Cmds { + node, ok := idNode.Args[0].(*parse.IdentifierNode) + if !ok { + // A predefined escaper "esc" will never be found as an identifier in a + // Chain or Field node, since: + // - "esc.x ..." is invalid, since predefined escapers return strings, and + // strings do not have methods, keys or fields. + // - "... .esc" is invalid, since predefined escapers are global functions, + // not methods or fields of any types. + // Therefore, it is safe to ignore these two node types. + continue + } + ident := node.Ident + if _, ok := predefinedEscapers[ident]; ok { + if pos < len(n.Pipe.Cmds)-1 || + c.state == stateAttr && c.delim == delimSpaceOrTagEnd && ident == "html" { + return context{ + state: stateError, + err: errorf(ErrPredefinedEscaper, n, n.Line, "predefined escaper %q disallowed in template", ident), + } + } + } + } + s := make([]string, 0, 3) + switch c.state { + case stateError: + return c + case stateURL, stateCSSDqStr, stateCSSSqStr, stateCSSDqURL, stateCSSSqURL, stateCSSURL: + switch c.urlPart { + case urlPartNone: + s = append(s, "_html_template_urlfilter") + fallthrough + case urlPartPreQuery: + switch c.state { + case stateCSSDqStr, stateCSSSqStr: + s = append(s, "_html_template_cssescaper") + default: + s = append(s, "_html_template_urlnormalizer") + } + case urlPartQueryOrFrag: + s = append(s, "_html_template_urlescaper") + case urlPartUnknown: + return context{ + state: stateError, + err: errorf(ErrAmbigContext, n, n.Line, "%s appears in an ambiguous context within a URL", n), + } + default: + panic(c.urlPart.String()) + } + case stateJS: + s = append(s, "_html_template_jsvalescaper") + // A slash after a value starts a div operator. + c.jsCtx = jsCtxDivOp + case stateJSDqStr, stateJSSqStr: + s = append(s, "_html_template_jsstrescaper") + case stateJSRegexp: + s = append(s, "_html_template_jsregexpescaper") + case stateCSS: + s = append(s, "_html_template_cssvaluefilter") + case stateText: + s = append(s, "_html_template_htmlescaper") + case stateRCDATA: + s = append(s, "_html_template_rcdataescaper") + case stateAttr: + // Handled below in delim check. + case stateAttrName, stateTag: + c.state = stateAttrName + s = append(s, "_html_template_htmlnamefilter") + case stateSrcset: + s = append(s, "_html_template_srcsetescaper") + default: + if isComment(c.state) { + s = append(s, "_html_template_commentescaper") + } else { + panic("unexpected state " + c.state.String()) + } + } + switch c.delim { + case delimNone: + // No extra-escaping needed for raw text content. + case delimSpaceOrTagEnd: + s = append(s, "_html_template_nospaceescaper") + default: + s = append(s, "_html_template_attrescaper") + } + e.editActionNode(n, s) + return c +} + +// ensurePipelineContains ensures that the pipeline ends with the commands with +// the identifiers in s in order. If the pipeline ends with a predefined escaper +// (i.e. "html" or "urlquery"), merge it with the identifiers in s. +func ensurePipelineContains(p *parse.PipeNode, s []string) { + if len(s) == 0 { + // Do not rewrite pipeline if we have no escapers to insert. + return + } + // Precondition: p.Cmds contains at most one predefined escaper and the + // escaper will be present at p.Cmds[len(p.Cmds)-1]. This precondition is + // always true because of the checks in escapeAction. + pipelineLen := len(p.Cmds) + if pipelineLen > 0 { + lastCmd := p.Cmds[pipelineLen-1] + if idNode, ok := lastCmd.Args[0].(*parse.IdentifierNode); ok { + if esc := idNode.Ident; predefinedEscapers[esc] { + // Pipeline ends with a predefined escaper. + if len(p.Cmds) == 1 && len(lastCmd.Args) > 1 { + // Special case: pipeline is of the form {{ esc arg1 arg2 ... argN }}, + // where esc is the predefined escaper, and arg1...argN are its arguments. + // Convert this into the equivalent form + // {{ _eval_args_ arg1 arg2 ... argN | esc }}, so that esc can be easily + // merged with the escapers in s. + lastCmd.Args[0] = parse.NewIdentifier("_eval_args_").SetTree(nil).SetPos(lastCmd.Args[0].Position()) + p.Cmds = appendCmd(p.Cmds, newIdentCmd(esc, p.Position())) + pipelineLen++ + } + // If any of the commands in s that we are about to insert is equivalent + // to the predefined escaper, use the predefined escaper instead. + dup := false + for i, escaper := range s { + if escFnsEq(esc, escaper) { + s[i] = idNode.Ident + dup = true + } + } + if dup { + // The predefined escaper will already be inserted along with the + // escapers in s, so do not copy it to the rewritten pipeline. + pipelineLen-- + } + } + } + } + // Rewrite the pipeline, creating the escapers in s at the end of the pipeline. + newCmds := make([]*parse.CommandNode, pipelineLen, pipelineLen+len(s)) + insertedIdents := make(map[string]bool) + for i := 0; i < pipelineLen; i++ { + cmd := p.Cmds[i] + newCmds[i] = cmd + if idNode, ok := cmd.Args[0].(*parse.IdentifierNode); ok { + insertedIdents[normalizeEscFn(idNode.Ident)] = true + } + } + for _, name := range s { + if !insertedIdents[normalizeEscFn(name)] { + // When two templates share an underlying parse tree via the use of + // AddParseTree and one template is executed after the other, this check + // ensures that escapers that were already inserted into the pipeline on + // the first escaping pass do not get inserted again. + newCmds = appendCmd(newCmds, newIdentCmd(name, p.Position())) + } + } + p.Cmds = newCmds +} + +// predefinedEscapers contains template predefined escapers that are equivalent +// to some contextual escapers. Keep in sync with equivEscapers. +var predefinedEscapers = map[string]bool{ + "html": true, + "urlquery": true, +} + +// equivEscapers matches contextual escapers to equivalent predefined +// template escapers. +var equivEscapers = map[string]string{ + // The following pairs of HTML escapers provide equivalent security + // guarantees, since they all escape '\000', '\'', '"', '&', '<', and '>'. + "_html_template_attrescaper": "html", + "_html_template_htmlescaper": "html", + "_html_template_rcdataescaper": "html", + // These two URL escapers produce URLs safe for embedding in a URL query by + // percent-encoding all the reserved characters specified in RFC 3986 Section + // 2.2 + "_html_template_urlescaper": "urlquery", + // These two functions are not actually equivalent; urlquery is stricter as it + // escapes reserved characters (e.g. '#'), while _html_template_urlnormalizer + // does not. It is therefore only safe to replace _html_template_urlnormalizer + // with urlquery (this happens in ensurePipelineContains), but not the otherI've + // way around. We keep this entry around to preserve the behavior of templates + // written before Go 1.9, which might depend on this substitution taking place. + "_html_template_urlnormalizer": "urlquery", +} + +// escFnsEq reports whether the two escaping functions are equivalent. +func escFnsEq(a, b string) bool { + return normalizeEscFn(a) == normalizeEscFn(b) +} + +// normalizeEscFn(a) is equal to normalizeEscFn(b) for any pair of names of +// escaper functions a and b that are equivalent. +func normalizeEscFn(e string) string { + if norm := equivEscapers[e]; norm != "" { + return norm + } + return e +} + +// redundantFuncs[a][b] implies that funcMap[b](funcMap[a](x)) == funcMap[a](x) +// for all x. +var redundantFuncs = map[string]map[string]bool{ + "_html_template_commentescaper": { + "_html_template_attrescaper": true, + "_html_template_nospaceescaper": true, + "_html_template_htmlescaper": true, + }, + "_html_template_cssescaper": { + "_html_template_attrescaper": true, + }, + "_html_template_jsregexpescaper": { + "_html_template_attrescaper": true, + }, + "_html_template_jsstrescaper": { + "_html_template_attrescaper": true, + }, + "_html_template_urlescaper": { + "_html_template_urlnormalizer": true, + }, +} + +// appendCmd appends the given command to the end of the command pipeline +// unless it is redundant with the last command. +func appendCmd(cmds []*parse.CommandNode, cmd *parse.CommandNode) []*parse.CommandNode { + if n := len(cmds); n != 0 { + last, okLast := cmds[n-1].Args[0].(*parse.IdentifierNode) + next, okNext := cmd.Args[0].(*parse.IdentifierNode) + if okLast && okNext && redundantFuncs[last.Ident][next.Ident] { + return cmds + } + } + return append(cmds, cmd) +} + +// newIdentCmd produces a command containing a single identifier node. +func newIdentCmd(identifier string, pos parse.Pos) *parse.CommandNode { + return &parse.CommandNode{ + NodeType: parse.NodeCommand, + Args: []parse.Node{parse.NewIdentifier(identifier).SetTree(nil).SetPos(pos)}, // TODO: SetTree. + } +} + +// nudge returns the context that would result from following empty string +// transitions from the input context. +// For example, parsing: +// `90% of the time. + e.output[t.Name()] = c + return e.escapeListConditionally(c, t.Tree.Root, filter) +} + +// delimEnds maps each delim to a string of characters that terminate it. +var delimEnds = [...]string{ + delimDoubleQuote: `"`, + delimSingleQuote: "'", + // Determined empirically by running the below in various browsers. + // var div = document.createElement("DIV"); + // for (var i = 0; i < 0x10000; ++i) { + // div.innerHTML = ""; + // if (div.getElementsByTagName("SPAN")[0].title.indexOf("bar") < 0) + // document.write("

U+" + i.toString(16)); + // } + delimSpaceOrTagEnd: " \t\n\f\r>", +} + +var doctypeBytes = []byte("= i; j-- { + if s[j] == '<' { + end = j + break + } + } + } + for j := i; j < end; j++ { + if s[j] == '<' && !bytes.HasPrefix(bytes.ToUpper(s[j:]), doctypeBytes) { + b.Write(s[written:j]) + b.WriteString("<") + written = j + 1 + } + } + } else if isComment(c.state) && c.delim == delimNone { + switch c.state { + case stateJSBlockCmt: + // https://es5.github.com/#x7.4: + // "Comments behave like white space and are + // discarded except that, if a MultiLineComment + // contains a line terminator character, then + // the entire comment is considered to be a + // LineTerminator for purposes of parsing by + // the syntactic grammar." + if bytes.ContainsAny(s[written:i1], "\n\r\u2028\u2029") { + b.WriteByte('\n') + } else { + b.WriteByte(' ') + } + case stateCSSBlockCmt: + b.WriteByte(' ') + } + written = i1 + } + if c.state != c1.state && isComment(c1.state) && c1.delim == delimNone { + // Preserve the portion between written and the comment start. + cs := i1 - 2 + if c1.state == stateHTMLCmt { + // "