summaryrefslogtreecommitdiffstats
path: root/src/html/template/transition.go
diff options
context:
space:
mode:
Diffstat (limited to 'src/html/template/transition.go')
-rw-r--r--src/html/template/transition.go634
1 files changed, 634 insertions, 0 deletions
diff --git a/src/html/template/transition.go b/src/html/template/transition.go
new file mode 100644
index 0000000..3d2a37c
--- /dev/null
+++ b/src/html/template/transition.go
@@ -0,0 +1,634 @@
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package template
+
+import (
+ "bytes"
+ "strings"
+)
+
+// transitionFunc is the array of context transition functions for text nodes.
+// A transition function takes a context and template text input, and returns
+// the updated context and the number of bytes consumed from the front of the
+// input.
+var transitionFunc = [...]func(context, []byte) (context, int){
+ stateText: tText,
+ stateTag: tTag,
+ stateAttrName: tAttrName,
+ stateAfterName: tAfterName,
+ stateBeforeValue: tBeforeValue,
+ stateHTMLCmt: tHTMLCmt,
+ stateRCDATA: tSpecialTagEnd,
+ stateAttr: tAttr,
+ stateURL: tURL,
+ stateSrcset: tURL,
+ stateJS: tJS,
+ stateJSDqStr: tJSDelimited,
+ stateJSSqStr: tJSDelimited,
+ stateJSBqStr: tJSDelimited,
+ stateJSRegexp: tJSDelimited,
+ stateJSBlockCmt: tBlockCmt,
+ stateJSLineCmt: tLineCmt,
+ stateJSHTMLOpenCmt: tLineCmt,
+ stateJSHTMLCloseCmt: tLineCmt,
+ stateCSS: tCSS,
+ stateCSSDqStr: tCSSStr,
+ stateCSSSqStr: tCSSStr,
+ stateCSSDqURL: tCSSStr,
+ stateCSSSqURL: tCSSStr,
+ stateCSSURL: tCSSStr,
+ stateCSSBlockCmt: tBlockCmt,
+ stateCSSLineCmt: tLineCmt,
+ stateError: tError,
+}
+
+var commentStart = []byte("<!--")
+var commentEnd = []byte("-->")
+
+// tText is the context transition function for the text state.
+func tText(c context, s []byte) (context, int) {
+ k := 0
+ for {
+ i := k + bytes.IndexByte(s[k:], '<')
+ if i < k || i+1 == len(s) {
+ return c, len(s)
+ } else if i+4 <= len(s) && bytes.Equal(commentStart, s[i:i+4]) {
+ return context{state: stateHTMLCmt}, i + 4
+ }
+ i++
+ end := false
+ if s[i] == '/' {
+ if i+1 == len(s) {
+ return c, len(s)
+ }
+ end, i = true, i+1
+ }
+ j, e := eatTagName(s, i)
+ if j != i {
+ if end {
+ e = elementNone
+ }
+ // We've found an HTML tag.
+ return context{state: stateTag, element: e}, j
+ }
+ k = j
+ }
+}
+
+var elementContentType = [...]state{
+ elementNone: stateText,
+ elementScript: stateJS,
+ elementStyle: stateCSS,
+ elementTextarea: stateRCDATA,
+ elementTitle: stateRCDATA,
+}
+
+// tTag is the context transition function for the tag state.
+func tTag(c context, s []byte) (context, int) {
+ // Find the attribute name.
+ i := eatWhiteSpace(s, 0)
+ if i == len(s) {
+ return c, len(s)
+ }
+ if s[i] == '>' {
+ return context{
+ state: elementContentType[c.element],
+ element: c.element,
+ }, i + 1
+ }
+ j, err := eatAttrName(s, i)
+ if err != nil {
+ return context{state: stateError, err: err}, len(s)
+ }
+ state, attr := stateTag, attrNone
+ if i == j {
+ return context{
+ state: stateError,
+ err: errorf(ErrBadHTML, nil, 0, "expected space, attr name, or end of tag, but got %q", s[i:]),
+ }, len(s)
+ }
+
+ attrName := strings.ToLower(string(s[i:j]))
+ if c.element == elementScript && attrName == "type" {
+ attr = attrScriptType
+ } else {
+ switch attrType(attrName) {
+ case contentTypeURL:
+ attr = attrURL
+ case contentTypeCSS:
+ attr = attrStyle
+ case contentTypeJS:
+ attr = attrScript
+ case contentTypeSrcset:
+ attr = attrSrcset
+ }
+ }
+
+ if j == len(s) {
+ state = stateAttrName
+ } else {
+ state = stateAfterName
+ }
+ return context{state: state, element: c.element, attr: attr}, j
+}
+
+// tAttrName is the context transition function for stateAttrName.
+func tAttrName(c context, s []byte) (context, int) {
+ i, err := eatAttrName(s, 0)
+ if err != nil {
+ return context{state: stateError, err: err}, len(s)
+ } else if i != len(s) {
+ c.state = stateAfterName
+ }
+ return c, i
+}
+
+// tAfterName is the context transition function for stateAfterName.
+func tAfterName(c context, s []byte) (context, int) {
+ // Look for the start of the value.
+ i := eatWhiteSpace(s, 0)
+ if i == len(s) {
+ return c, len(s)
+ } else if s[i] != '=' {
+ // Occurs due to tag ending '>', and valueless attribute.
+ c.state = stateTag
+ return c, i
+ }
+ c.state = stateBeforeValue
+ // Consume the "=".
+ return c, i + 1
+}
+
+var attrStartStates = [...]state{
+ attrNone: stateAttr,
+ attrScript: stateJS,
+ attrScriptType: stateAttr,
+ attrStyle: stateCSS,
+ attrURL: stateURL,
+ attrSrcset: stateSrcset,
+}
+
+// tBeforeValue is the context transition function for stateBeforeValue.
+func tBeforeValue(c context, s []byte) (context, int) {
+ i := eatWhiteSpace(s, 0)
+ if i == len(s) {
+ return c, len(s)
+ }
+ // Find the attribute delimiter.
+ delim := delimSpaceOrTagEnd
+ switch s[i] {
+ case '\'':
+ delim, i = delimSingleQuote, i+1
+ case '"':
+ delim, i = delimDoubleQuote, i+1
+ }
+ c.state, c.delim = attrStartStates[c.attr], delim
+ return c, i
+}
+
+// tHTMLCmt is the context transition function for stateHTMLCmt.
+func tHTMLCmt(c context, s []byte) (context, int) {
+ if i := bytes.Index(s, commentEnd); i != -1 {
+ return context{}, i + 3
+ }
+ return c, len(s)
+}
+
+// specialTagEndMarkers maps element types to the character sequence that
+// case-insensitively signals the end of the special tag body.
+var specialTagEndMarkers = [...][]byte{
+ elementScript: []byte("script"),
+ elementStyle: []byte("style"),
+ elementTextarea: []byte("textarea"),
+ elementTitle: []byte("title"),
+}
+
+var (
+ specialTagEndPrefix = []byte("</")
+ tagEndSeparators = []byte("> \t\n\f/")
+)
+
+// tSpecialTagEnd is the context transition function for raw text and RCDATA
+// element states.
+func tSpecialTagEnd(c context, s []byte) (context, int) {
+ if c.element != elementNone {
+ // script end tags ("</script") within script literals are ignored, so that
+ // we can properly escape them.
+ if c.element == elementScript && (isInScriptLiteral(c.state) || isComment(c.state)) {
+ return c, len(s)
+ }
+ if i := indexTagEnd(s, specialTagEndMarkers[c.element]); i != -1 {
+ return context{}, i
+ }
+ }
+ return c, len(s)
+}
+
+// indexTagEnd finds the index of a special tag end in a case insensitive way, or returns -1
+func indexTagEnd(s []byte, tag []byte) int {
+ res := 0
+ plen := len(specialTagEndPrefix)
+ for len(s) > 0 {
+ // Try to find the tag end prefix first
+ i := bytes.Index(s, specialTagEndPrefix)
+ if i == -1 {
+ return i
+ }
+ s = s[i+plen:]
+ // Try to match the actual tag if there is still space for it
+ if len(tag) <= len(s) && bytes.EqualFold(tag, s[:len(tag)]) {
+ s = s[len(tag):]
+ // Check the tag is followed by a proper separator
+ if len(s) > 0 && bytes.IndexByte(tagEndSeparators, s[0]) != -1 {
+ return res + i
+ }
+ res += len(tag)
+ }
+ res += i + plen
+ }
+ return -1
+}
+
+// tAttr is the context transition function for the attribute state.
+func tAttr(c context, s []byte) (context, int) {
+ return c, len(s)
+}
+
+// tURL is the context transition function for the URL state.
+func tURL(c context, s []byte) (context, int) {
+ if bytes.ContainsAny(s, "#?") {
+ c.urlPart = urlPartQueryOrFrag
+ } else if len(s) != eatWhiteSpace(s, 0) && c.urlPart == urlPartNone {
+ // HTML5 uses "Valid URL potentially surrounded by spaces" for
+ // attrs: https://www.w3.org/TR/html5/index.html#attributes-1
+ c.urlPart = urlPartPreQuery
+ }
+ return c, len(s)
+}
+
+// tJS is the context transition function for the JS state.
+func tJS(c context, s []byte) (context, int) {
+ i := bytes.IndexAny(s, "\"`'/<-#")
+ if i == -1 {
+ // Entire input is non string, comment, regexp tokens.
+ c.jsCtx = nextJSCtx(s, c.jsCtx)
+ return c, len(s)
+ }
+ c.jsCtx = nextJSCtx(s[:i], c.jsCtx)
+ switch s[i] {
+ case '"':
+ c.state, c.jsCtx = stateJSDqStr, jsCtxRegexp
+ case '\'':
+ c.state, c.jsCtx = stateJSSqStr, jsCtxRegexp
+ case '`':
+ c.state, c.jsCtx = stateJSBqStr, jsCtxRegexp
+ case '/':
+ switch {
+ case i+1 < len(s) && s[i+1] == '/':
+ c.state, i = stateJSLineCmt, i+1
+ case i+1 < len(s) && s[i+1] == '*':
+ c.state, i = stateJSBlockCmt, i+1
+ case c.jsCtx == jsCtxRegexp:
+ c.state = stateJSRegexp
+ case c.jsCtx == jsCtxDivOp:
+ c.jsCtx = jsCtxRegexp
+ default:
+ return context{
+ state: stateError,
+ err: errorf(ErrSlashAmbig, nil, 0, "'/' could start a division or regexp: %.32q", s[i:]),
+ }, len(s)
+ }
+ // ECMAScript supports HTML style comments for legacy reasons, see Appendix
+ // B.1.1 "HTML-like Comments". The handling of these comments is somewhat
+ // confusing. Multi-line comments are not supported, i.e. anything on lines
+ // between the opening and closing tokens is not considered a comment, but
+ // anything following the opening or closing token, on the same line, is
+ // ignored. As such we simply treat any line prefixed with "<!--" or "-->"
+ // as if it were actually prefixed with "//" and move on.
+ case '<':
+ if i+3 < len(s) && bytes.Equal(commentStart, s[i:i+4]) {
+ c.state, i = stateJSHTMLOpenCmt, i+3
+ }
+ case '-':
+ if i+2 < len(s) && bytes.Equal(commentEnd, s[i:i+3]) {
+ c.state, i = stateJSHTMLCloseCmt, i+2
+ }
+ // ECMAScript also supports "hashbang" comment lines, see Section 12.5.
+ case '#':
+ if i+1 < len(s) && s[i+1] == '!' {
+ c.state, i = stateJSLineCmt, i+1
+ }
+ default:
+ panic("unreachable")
+ }
+ return c, i + 1
+}
+
+// tJSDelimited is the context transition function for the JS string and regexp
+// states.
+func tJSDelimited(c context, s []byte) (context, int) {
+ specials := `\"`
+ switch c.state {
+ case stateJSSqStr:
+ specials = `\'`
+ case stateJSBqStr:
+ specials = "`\\"
+ case stateJSRegexp:
+ specials = `\/[]`
+ }
+
+ k, inCharset := 0, false
+ for {
+ i := k + bytes.IndexAny(s[k:], specials)
+ if i < k {
+ break
+ }
+ switch s[i] {
+ case '\\':
+ i++
+ if i == len(s) {
+ return context{
+ state: stateError,
+ err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in JS string: %q", s),
+ }, len(s)
+ }
+ case '[':
+ inCharset = true
+ case ']':
+ inCharset = false
+ case '/':
+ // If "</script" appears in a regex literal, the '/' should not
+ // close the regex literal, and it will later be escaped to
+ // "\x3C/script" in escapeText.
+ if i > 0 && i+7 <= len(s) && bytes.Compare(bytes.ToLower(s[i-1:i+7]), []byte("</script")) == 0 {
+ i++
+ } else if !inCharset {
+ c.state, c.jsCtx = stateJS, jsCtxDivOp
+ return c, i + 1
+ }
+ default:
+ // end delimiter
+ if !inCharset {
+ c.state, c.jsCtx = stateJS, jsCtxDivOp
+ return c, i + 1
+ }
+ }
+ k = i + 1
+ }
+
+ if inCharset {
+ // This can be fixed by making context richer if interpolation
+ // into charsets is desired.
+ return context{
+ state: stateError,
+ err: errorf(ErrPartialCharset, nil, 0, "unfinished JS regexp charset: %q", s),
+ }, len(s)
+ }
+
+ return c, len(s)
+}
+
+var blockCommentEnd = []byte("*/")
+
+// tBlockCmt is the context transition function for /*comment*/ states.
+func tBlockCmt(c context, s []byte) (context, int) {
+ i := bytes.Index(s, blockCommentEnd)
+ if i == -1 {
+ return c, len(s)
+ }
+ switch c.state {
+ case stateJSBlockCmt:
+ c.state = stateJS
+ case stateCSSBlockCmt:
+ c.state = stateCSS
+ default:
+ panic(c.state.String())
+ }
+ return c, i + 2
+}
+
+// tLineCmt is the context transition function for //comment states, and the JS HTML-like comment state.
+func tLineCmt(c context, s []byte) (context, int) {
+ var lineTerminators string
+ var endState state
+ switch c.state {
+ case stateJSLineCmt, stateJSHTMLOpenCmt, stateJSHTMLCloseCmt:
+ lineTerminators, endState = "\n\r\u2028\u2029", stateJS
+ case stateCSSLineCmt:
+ lineTerminators, endState = "\n\f\r", stateCSS
+ // Line comments are not part of any published CSS standard but
+ // are supported by the 4 major browsers.
+ // This defines line comments as
+ // LINECOMMENT ::= "//" [^\n\f\d]*
+ // since https://www.w3.org/TR/css3-syntax/#SUBTOK-nl defines
+ // newlines:
+ // nl ::= #xA | #xD #xA | #xD | #xC
+ default:
+ panic(c.state.String())
+ }
+
+ i := bytes.IndexAny(s, lineTerminators)
+ if i == -1 {
+ return c, len(s)
+ }
+ c.state = endState
+ // Per section 7.4 of EcmaScript 5 : https://es5.github.com/#x7.4
+ // "However, the LineTerminator at the end of the line is not
+ // considered to be part of the single-line comment; it is
+ // recognized separately by the lexical grammar and becomes part
+ // of the stream of input elements for the syntactic grammar."
+ return c, i
+}
+
+// tCSS is the context transition function for the CSS state.
+func tCSS(c context, s []byte) (context, int) {
+ // CSS quoted strings are almost never used except for:
+ // (1) URLs as in background: "/foo.png"
+ // (2) Multiword font-names as in font-family: "Times New Roman"
+ // (3) List separators in content values as in inline-lists:
+ // <style>
+ // ul.inlineList { list-style: none; padding:0 }
+ // ul.inlineList > li { display: inline }
+ // ul.inlineList > li:before { content: ", " }
+ // ul.inlineList > li:first-child:before { content: "" }
+ // </style>
+ // <ul class=inlineList><li>One<li>Two<li>Three</ul>
+ // (4) Attribute value selectors as in a[href="http://example.com/"]
+ //
+ // We conservatively treat all strings as URLs, but make some
+ // allowances to avoid confusion.
+ //
+ // In (1), our conservative assumption is justified.
+ // In (2), valid font names do not contain ':', '?', or '#', so our
+ // conservative assumption is fine since we will never transition past
+ // urlPartPreQuery.
+ // In (3), our protocol heuristic should not be tripped, and there
+ // should not be non-space content after a '?' or '#', so as long as
+ // we only %-encode RFC 3986 reserved characters we are ok.
+ // In (4), we should URL escape for URL attributes, and for others we
+ // have the attribute name available if our conservative assumption
+ // proves problematic for real code.
+
+ k := 0
+ for {
+ i := k + bytes.IndexAny(s[k:], `("'/`)
+ if i < k {
+ return c, len(s)
+ }
+ switch s[i] {
+ case '(':
+ // Look for url to the left.
+ p := bytes.TrimRight(s[:i], "\t\n\f\r ")
+ if endsWithCSSKeyword(p, "url") {
+ j := len(s) - len(bytes.TrimLeft(s[i+1:], "\t\n\f\r "))
+ switch {
+ case j != len(s) && s[j] == '"':
+ c.state, j = stateCSSDqURL, j+1
+ case j != len(s) && s[j] == '\'':
+ c.state, j = stateCSSSqURL, j+1
+ default:
+ c.state = stateCSSURL
+ }
+ return c, j
+ }
+ case '/':
+ if i+1 < len(s) {
+ switch s[i+1] {
+ case '/':
+ c.state = stateCSSLineCmt
+ return c, i + 2
+ case '*':
+ c.state = stateCSSBlockCmt
+ return c, i + 2
+ }
+ }
+ case '"':
+ c.state = stateCSSDqStr
+ return c, i + 1
+ case '\'':
+ c.state = stateCSSSqStr
+ return c, i + 1
+ }
+ k = i + 1
+ }
+}
+
+// tCSSStr is the context transition function for the CSS string and URL states.
+func tCSSStr(c context, s []byte) (context, int) {
+ var endAndEsc string
+ switch c.state {
+ case stateCSSDqStr, stateCSSDqURL:
+ endAndEsc = `\"`
+ case stateCSSSqStr, stateCSSSqURL:
+ endAndEsc = `\'`
+ case stateCSSURL:
+ // Unquoted URLs end with a newline or close parenthesis.
+ // The below includes the wc (whitespace character) and nl.
+ endAndEsc = "\\\t\n\f\r )"
+ default:
+ panic(c.state.String())
+ }
+
+ k := 0
+ for {
+ i := k + bytes.IndexAny(s[k:], endAndEsc)
+ if i < k {
+ c, nread := tURL(c, decodeCSS(s[k:]))
+ return c, k + nread
+ }
+ if s[i] == '\\' {
+ i++
+ if i == len(s) {
+ return context{
+ state: stateError,
+ err: errorf(ErrPartialEscape, nil, 0, "unfinished escape sequence in CSS string: %q", s),
+ }, len(s)
+ }
+ } else {
+ c.state = stateCSS
+ return c, i + 1
+ }
+ c, _ = tURL(c, decodeCSS(s[:i+1]))
+ k = i + 1
+ }
+}
+
+// tError is the context transition function for the error state.
+func tError(c context, s []byte) (context, int) {
+ return c, len(s)
+}
+
+// eatAttrName returns the largest j such that s[i:j] is an attribute name.
+// It returns an error if s[i:] does not look like it begins with an
+// attribute name, such as encountering a quote mark without a preceding
+// equals sign.
+func eatAttrName(s []byte, i int) (int, *Error) {
+ for j := i; j < len(s); j++ {
+ switch s[j] {
+ case ' ', '\t', '\n', '\f', '\r', '=', '>':
+ return j, nil
+ case '\'', '"', '<':
+ // These result in a parse warning in HTML5 and are
+ // indicative of serious problems if seen in an attr
+ // name in a template.
+ return -1, errorf(ErrBadHTML, nil, 0, "%q in attribute name: %.32q", s[j:j+1], s)
+ default:
+ // No-op.
+ }
+ }
+ return len(s), nil
+}
+
+var elementNameMap = map[string]element{
+ "script": elementScript,
+ "style": elementStyle,
+ "textarea": elementTextarea,
+ "title": elementTitle,
+}
+
+// asciiAlpha reports whether c is an ASCII letter.
+func asciiAlpha(c byte) bool {
+ return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'
+}
+
+// asciiAlphaNum reports whether c is an ASCII letter or digit.
+func asciiAlphaNum(c byte) bool {
+ return asciiAlpha(c) || '0' <= c && c <= '9'
+}
+
+// eatTagName returns the largest j such that s[i:j] is a tag name and the tag type.
+func eatTagName(s []byte, i int) (int, element) {
+ if i == len(s) || !asciiAlpha(s[i]) {
+ return i, elementNone
+ }
+ j := i + 1
+ for j < len(s) {
+ x := s[j]
+ if asciiAlphaNum(x) {
+ j++
+ continue
+ }
+ // Allow "x-y" or "x:y" but not "x-", "-y", or "x--y".
+ if (x == ':' || x == '-') && j+1 < len(s) && asciiAlphaNum(s[j+1]) {
+ j += 2
+ continue
+ }
+ break
+ }
+ return j, elementNameMap[strings.ToLower(string(s[i:j]))]
+}
+
+// eatWhiteSpace returns the largest j such that s[i:j] is white space.
+func eatWhiteSpace(s []byte, i int) int {
+ for j := i; j < len(s); j++ {
+ switch s[j] {
+ case ' ', '\t', '\n', '\f', '\r':
+ // No-op.
+ default:
+ return j
+ }
+ }
+ return len(s)
+}