summaryrefslogtreecommitdiffstats
path: root/dependencies/pkg/mod/github.com/goccy/go-yaml@v1.9.6/scanner/scanner.go
diff options
context:
space:
mode:
Diffstat (limited to 'dependencies/pkg/mod/github.com/goccy/go-yaml@v1.9.6/scanner/scanner.go')
-rw-r--r--dependencies/pkg/mod/github.com/goccy/go-yaml@v1.9.6/scanner/scanner.go876
1 files changed, 876 insertions, 0 deletions
diff --git a/dependencies/pkg/mod/github.com/goccy/go-yaml@v1.9.6/scanner/scanner.go b/dependencies/pkg/mod/github.com/goccy/go-yaml@v1.9.6/scanner/scanner.go
new file mode 100644
index 0000000..1e09190
--- /dev/null
+++ b/dependencies/pkg/mod/github.com/goccy/go-yaml@v1.9.6/scanner/scanner.go
@@ -0,0 +1,876 @@
+package scanner
+
+import (
+ "io"
+ "strings"
+
+ "github.com/goccy/go-yaml/token"
+ "golang.org/x/xerrors"
+)
+
+// IndentState state for indent
+type IndentState int
+
+const (
+ // IndentStateEqual equals previous indent
+ IndentStateEqual IndentState = iota
+ // IndentStateUp more indent than previous
+ IndentStateUp
+ // IndentStateDown less indent than previous
+ IndentStateDown
+ // IndentStateKeep uses not indent token
+ IndentStateKeep
+)
+
+// Scanner holds the scanner's internal state while processing a given text.
+// It can be allocated as part of another data structure but must be initialized via Init before use.
+type Scanner struct {
+ source []rune
+ sourcePos int
+ sourceSize int
+ line int
+ column int
+ offset int
+ prevIndentLevel int
+ prevIndentNum int
+ prevIndentColumn int
+ docStartColumn int
+ indentLevel int
+ indentNum int
+ isFirstCharAtLine bool
+ isAnchor bool
+ startedFlowSequenceNum int
+ startedFlowMapNum int
+ indentState IndentState
+ savedPos *token.Position
+}
+
+func (s *Scanner) pos() *token.Position {
+ return &token.Position{
+ Line: s.line,
+ Column: s.column,
+ Offset: s.offset,
+ IndentNum: s.indentNum,
+ IndentLevel: s.indentLevel,
+ }
+}
+
+func (s *Scanner) bufferedToken(ctx *Context) *token.Token {
+ if s.savedPos != nil {
+ tk := ctx.bufferedToken(s.savedPos)
+ s.savedPos = nil
+ return tk
+ }
+ size := len(ctx.buf)
+ return ctx.bufferedToken(&token.Position{
+ Line: s.line,
+ Column: s.column - size,
+ Offset: s.offset - size,
+ IndentNum: s.indentNum,
+ IndentLevel: s.indentLevel,
+ })
+}
+
+func (s *Scanner) progressColumn(ctx *Context, num int) {
+ s.column += num
+ s.offset += num
+ ctx.progress(num)
+}
+
+func (s *Scanner) progressLine(ctx *Context) {
+ s.column = 1
+ s.line++
+ s.offset++
+ s.indentNum = 0
+ s.isFirstCharAtLine = true
+ s.isAnchor = false
+ ctx.progress(1)
+}
+
+func (s *Scanner) isNeededKeepPreviousIndentNum(ctx *Context, c rune) bool {
+ if !s.isChangedToIndentStateUp() {
+ return false
+ }
+ if ctx.isDocument() {
+ return true
+ }
+ if c == '-' && ctx.existsBuffer() {
+ return true
+ }
+ return false
+}
+
+func (s *Scanner) isNewLineChar(c rune) bool {
+ if c == '\n' {
+ return true
+ }
+ if c == '\r' {
+ return true
+ }
+ return false
+}
+
+func (s *Scanner) newLineCount(src []rune) int {
+ size := len(src)
+ cnt := 0
+ for i := 0; i < size; i++ {
+ c := src[i]
+ switch c {
+ case '\r':
+ if i+1 < size && src[i+1] == '\n' {
+ i++
+ }
+ cnt++
+ case '\n':
+ cnt++
+ }
+ }
+ return cnt
+}
+
+func (s *Scanner) updateIndentState(ctx *Context) {
+ indentNumBasedIndentState := s.indentState
+ if s.prevIndentNum < s.indentNum {
+ s.indentLevel = s.prevIndentLevel + 1
+ indentNumBasedIndentState = IndentStateUp
+ } else if s.prevIndentNum == s.indentNum {
+ s.indentLevel = s.prevIndentLevel
+ indentNumBasedIndentState = IndentStateEqual
+ } else {
+ indentNumBasedIndentState = IndentStateDown
+ if s.prevIndentLevel > 0 {
+ s.indentLevel = s.prevIndentLevel - 1
+ }
+ }
+
+ if s.prevIndentColumn > 0 {
+ if s.prevIndentColumn < s.column {
+ s.indentState = IndentStateUp
+ } else if s.prevIndentColumn != s.column || indentNumBasedIndentState != IndentStateEqual {
+ // The following case ( current position is 'd' ), some variables becomes like here
+ // - prevIndentColumn: 1 of 'a'
+ // - indentNumBasedIndentState: IndentStateDown because d's indentNum(1) is less than c's indentNum(3).
+ // Therefore, s.prevIndentColumn(1) == s.column(1) is true, but we want to treat this as IndentStateDown.
+ // So, we look also current indentState value by the above prevIndentNum based logic, and determins finally indentState.
+ // ---
+ // a:
+ // b
+ // c
+ // d: e
+ // ^
+ s.indentState = IndentStateDown
+ } else {
+ s.indentState = IndentStateEqual
+ }
+ } else {
+ s.indentState = indentNumBasedIndentState
+ }
+}
+
+func (s *Scanner) updateIndent(ctx *Context, c rune) {
+ if s.isFirstCharAtLine && s.isNewLineChar(c) && ctx.isDocument() {
+ return
+ }
+ if s.isFirstCharAtLine && c == ' ' {
+ s.indentNum++
+ return
+ }
+ if !s.isFirstCharAtLine {
+ s.indentState = IndentStateKeep
+ return
+ }
+ s.updateIndentState(ctx)
+ s.isFirstCharAtLine = false
+ if s.isNeededKeepPreviousIndentNum(ctx, c) {
+ return
+ }
+ if s.indentState != IndentStateUp {
+ s.prevIndentColumn = 0
+ }
+ s.prevIndentNum = s.indentNum
+ s.prevIndentLevel = s.indentLevel
+}
+
+func (s *Scanner) isChangedToIndentStateDown() bool {
+ return s.indentState == IndentStateDown
+}
+
+func (s *Scanner) isChangedToIndentStateUp() bool {
+ return s.indentState == IndentStateUp
+}
+
+func (s *Scanner) isChangedToIndentStateEqual() bool {
+ return s.indentState == IndentStateEqual
+}
+
+func (s *Scanner) addBufferedTokenIfExists(ctx *Context) {
+ ctx.addToken(s.bufferedToken(ctx))
+}
+
+func (s *Scanner) breakLiteral(ctx *Context) {
+ s.docStartColumn = 0
+ ctx.breakLiteral()
+}
+
+func (s *Scanner) scanSingleQuote(ctx *Context) (tk *token.Token, pos int) {
+ ctx.addOriginBuf('\'')
+ srcpos := s.pos()
+ startIndex := ctx.idx + 1
+ src := ctx.src
+ size := len(src)
+ value := []rune{}
+ isFirstLineChar := false
+ isNewLine := false
+ for idx := startIndex; idx < size; idx++ {
+ if !isNewLine {
+ s.progressColumn(ctx, 1)
+ } else {
+ isNewLine = false
+ }
+ c := src[idx]
+ pos = idx + 1
+ ctx.addOriginBuf(c)
+ if s.isNewLineChar(c) {
+ value = append(value, ' ')
+ isFirstLineChar = true
+ isNewLine = true
+ s.progressLine(ctx)
+ continue
+ } else if c == ' ' && isFirstLineChar {
+ continue
+ } else if c != '\'' {
+ value = append(value, c)
+ isFirstLineChar = false
+ continue
+ }
+ if idx+1 < len(ctx.src) && ctx.src[idx+1] == '\'' {
+ // '' handle as ' character
+ value = append(value, c)
+ ctx.addOriginBuf(c)
+ idx++
+ continue
+ }
+ s.progressColumn(ctx, 1)
+ tk = token.SingleQuote(string(value), string(ctx.obuf), srcpos)
+ pos = idx - startIndex + 1
+ return
+ }
+ return
+}
+
+func hexToInt(b rune) int {
+ if b >= 'A' && b <= 'F' {
+ return int(b) - 'A' + 10
+ }
+ if b >= 'a' && b <= 'f' {
+ return int(b) - 'a' + 10
+ }
+ return int(b) - '0'
+}
+
+func hexRunesToInt(b []rune) int {
+ sum := 0
+ for i := 0; i < len(b); i++ {
+ sum += hexToInt(b[i]) << (uint(len(b)-i-1) * 4)
+ }
+ return sum
+}
+
+func (s *Scanner) scanDoubleQuote(ctx *Context) (tk *token.Token, pos int) {
+ ctx.addOriginBuf('"')
+ srcpos := s.pos()
+ startIndex := ctx.idx + 1
+ src := ctx.src
+ size := len(src)
+ value := []rune{}
+ isFirstLineChar := false
+ isNewLine := false
+ for idx := startIndex; idx < size; idx++ {
+ if !isNewLine {
+ s.progressColumn(ctx, 1)
+ } else {
+ isNewLine = false
+ }
+ c := src[idx]
+ pos = idx + 1
+ ctx.addOriginBuf(c)
+ if s.isNewLineChar(c) {
+ value = append(value, ' ')
+ isFirstLineChar = true
+ isNewLine = true
+ s.progressLine(ctx)
+ continue
+ } else if c == ' ' && isFirstLineChar {
+ continue
+ } else if c == '\\' {
+ isFirstLineChar = false
+ if idx+1 < size {
+ nextChar := src[idx+1]
+ switch nextChar {
+ case 'b':
+ ctx.addOriginBuf(nextChar)
+ value = append(value, '\b')
+ idx++
+ continue
+ case 'e':
+ ctx.addOriginBuf(nextChar)
+ value = append(value, '\x1B')
+ idx++
+ continue
+ case 'f':
+ ctx.addOriginBuf(nextChar)
+ value = append(value, '\f')
+ idx++
+ continue
+ case 'n':
+ ctx.addOriginBuf(nextChar)
+ value = append(value, '\n')
+ idx++
+ continue
+ case 'v':
+ ctx.addOriginBuf(nextChar)
+ value = append(value, '\v')
+ idx++
+ continue
+ case 'L': // LS (#x2028)
+ ctx.addOriginBuf(nextChar)
+ value = append(value, []rune{'\xE2', '\x80', '\xA8'}...)
+ idx++
+ continue
+ case 'N': // NEL (#x85)
+ ctx.addOriginBuf(nextChar)
+ value = append(value, []rune{'\xC2', '\x85'}...)
+ idx++
+ continue
+ case 'P': // PS (#x2029)
+ ctx.addOriginBuf(nextChar)
+ value = append(value, []rune{'\xE2', '\x80', '\xA9'}...)
+ idx++
+ continue
+ case '_': // #xA0
+ ctx.addOriginBuf(nextChar)
+ value = append(value, []rune{'\xC2', '\xA0'}...)
+ idx++
+ continue
+ case '"':
+ ctx.addOriginBuf(nextChar)
+ value = append(value, nextChar)
+ idx++
+ continue
+ case 'x':
+ if idx+3 >= size {
+ // TODO: need to return error
+ //err = xerrors.New("invalid escape character \\x")
+ return
+ }
+ codeNum := hexRunesToInt(src[idx+2 : idx+4])
+ value = append(value, rune(codeNum))
+ idx += 3
+ continue
+ case 'u':
+ if idx+5 >= size {
+ // TODO: need to return error
+ //err = xerrors.New("invalid escape character \\u")
+ return
+ }
+ codeNum := hexRunesToInt(src[idx+2 : idx+6])
+ value = append(value, rune(codeNum))
+ idx += 5
+ continue
+ case 'U':
+ if idx+9 >= size {
+ // TODO: need to return error
+ //err = xerrors.New("invalid escape character \\U")
+ return
+ }
+ codeNum := hexRunesToInt(src[idx+2 : idx+10])
+ value = append(value, rune(codeNum))
+ idx += 9
+ continue
+ case '\\':
+ ctx.addOriginBuf(nextChar)
+ idx++
+ }
+ }
+ value = append(value, c)
+ continue
+ } else if c != '"' {
+ value = append(value, c)
+ isFirstLineChar = false
+ continue
+ }
+ s.progressColumn(ctx, 1)
+ tk = token.DoubleQuote(string(value), string(ctx.obuf), srcpos)
+ pos = idx - startIndex + 1
+ return
+ }
+ return
+}
+
+func (s *Scanner) scanQuote(ctx *Context, ch rune) (tk *token.Token, pos int) {
+ if ch == '\'' {
+ return s.scanSingleQuote(ctx)
+ }
+ return s.scanDoubleQuote(ctx)
+}
+
+func (s *Scanner) isMergeKey(ctx *Context) bool {
+ if ctx.repeatNum('<') != 2 {
+ return false
+ }
+ src := ctx.src
+ size := len(src)
+ for idx := ctx.idx + 2; idx < size; idx++ {
+ c := src[idx]
+ if c == ' ' {
+ continue
+ }
+ if c != ':' {
+ return false
+ }
+ if idx+1 < size {
+ nc := src[idx+1]
+ if nc == ' ' || s.isNewLineChar(nc) {
+ return true
+ }
+ }
+ }
+ return false
+}
+
+func (s *Scanner) scanTag(ctx *Context) (tk *token.Token, pos int) {
+ ctx.addOriginBuf('!')
+ ctx.progress(1) // skip '!' character
+ for idx, c := range ctx.src[ctx.idx:] {
+ pos = idx + 1
+ ctx.addOriginBuf(c)
+ switch c {
+ case ' ', '\n', '\r':
+ value := ctx.source(ctx.idx-1, ctx.idx+idx)
+ tk = token.Tag(value, string(ctx.obuf), s.pos())
+ pos = len([]rune(value))
+ return
+ }
+ }
+ return
+}
+
+func (s *Scanner) scanComment(ctx *Context) (tk *token.Token, pos int) {
+ ctx.addOriginBuf('#')
+ ctx.progress(1) // skip '#' character
+ for idx, c := range ctx.src[ctx.idx:] {
+ pos = idx + 1
+ ctx.addOriginBuf(c)
+ switch c {
+ case '\n', '\r':
+ if ctx.previousChar() == '\\' {
+ continue
+ }
+ value := ctx.source(ctx.idx, ctx.idx+idx)
+ tk = token.Comment(value, string(ctx.obuf), s.pos())
+ pos = len([]rune(value)) + 1
+ return
+ }
+ }
+ return
+}
+
+func trimCommentFromLiteralOpt(text string) (string, error) {
+ idx := strings.Index(text, "#")
+ if idx < 0 {
+ return text, nil
+ }
+ if idx == 0 {
+ return "", xerrors.New("invalid literal header")
+ }
+ return text[:idx-1], nil
+}
+
+func (s *Scanner) scanLiteral(ctx *Context, c rune) {
+ ctx.addOriginBuf(c)
+ if ctx.isEOS() {
+ if ctx.isLiteral {
+ ctx.addBuf(c)
+ }
+ value := ctx.bufferedSrc()
+ ctx.addToken(token.String(string(value), string(ctx.obuf), s.pos()))
+ ctx.resetBuffer()
+ s.progressColumn(ctx, 1)
+ } else if s.isNewLineChar(c) {
+ if ctx.isLiteral {
+ ctx.addBuf(c)
+ } else {
+ ctx.addBuf(' ')
+ }
+ s.progressLine(ctx)
+ } else if s.isFirstCharAtLine && c == ' ' {
+ if 0 < s.docStartColumn && s.docStartColumn <= s.column {
+ ctx.addBuf(c)
+ }
+ s.progressColumn(ctx, 1)
+ } else {
+ if s.docStartColumn == 0 {
+ s.docStartColumn = s.column
+ }
+ ctx.addBuf(c)
+ s.progressColumn(ctx, 1)
+ }
+}
+
+func (s *Scanner) scanLiteralHeader(ctx *Context) (pos int, err error) {
+ header := ctx.currentChar()
+ ctx.addOriginBuf(header)
+ ctx.progress(1) // skip '|' or '>' character
+ for idx, c := range ctx.src[ctx.idx:] {
+ pos = idx
+ ctx.addOriginBuf(c)
+ switch c {
+ case '\n', '\r':
+ value := ctx.source(ctx.idx, ctx.idx+idx)
+ opt := strings.TrimRight(value, " ")
+ orgOptLen := len(opt)
+ opt, err = trimCommentFromLiteralOpt(opt)
+ if err != nil {
+ return
+ }
+ switch opt {
+ case "", "+", "-",
+ "0", "1", "2", "3", "4", "5", "6", "7", "8", "9":
+ hasComment := len(opt) < orgOptLen
+ if header == '|' {
+ if hasComment {
+ commentLen := orgOptLen - len(opt)
+ headerPos := strings.Index(string(ctx.obuf), "|")
+ litBuf := ctx.obuf[:len(ctx.obuf)-commentLen-headerPos]
+ commentBuf := ctx.obuf[len(litBuf):]
+ ctx.addToken(token.Literal("|"+opt, string(litBuf), s.pos()))
+ s.column += len(litBuf)
+ s.offset += len(litBuf)
+ commentHeader := strings.Index(value, "#")
+ ctx.addToken(token.Comment(string(value[commentHeader+1:]), string(commentBuf), s.pos()))
+ } else {
+ ctx.addToken(token.Literal("|"+opt, string(ctx.obuf), s.pos()))
+ }
+ ctx.isLiteral = true
+ } else if header == '>' {
+ if hasComment {
+ commentLen := orgOptLen - len(opt)
+ headerPos := strings.Index(string(ctx.obuf), ">")
+ foldedBuf := ctx.obuf[:len(ctx.obuf)-commentLen-headerPos]
+ commentBuf := ctx.obuf[len(foldedBuf):]
+ ctx.addToken(token.Folded(">"+opt, string(foldedBuf), s.pos()))
+ s.column += len(foldedBuf)
+ s.offset += len(foldedBuf)
+ commentHeader := strings.Index(value, "#")
+ ctx.addToken(token.Comment(string(value[commentHeader+1:]), string(commentBuf), s.pos()))
+ } else {
+ ctx.addToken(token.Folded(">"+opt, string(ctx.obuf), s.pos()))
+ }
+ ctx.isFolded = true
+ }
+ s.indentState = IndentStateKeep
+ ctx.resetBuffer()
+ ctx.literalOpt = opt
+ return
+ }
+ break
+ }
+ }
+ err = xerrors.New("invalid literal header")
+ return
+}
+
+func (s *Scanner) scanNewLine(ctx *Context, c rune) {
+ if len(ctx.buf) > 0 && s.savedPos == nil {
+ s.savedPos = s.pos()
+ s.savedPos.Column -= len(ctx.bufferedSrc())
+ }
+
+ // if the following case, origin buffer has unnecessary two spaces.
+ // So, `removeRightSpaceFromOriginBuf` remove them, also fix column number too.
+ // ---
+ // a:[space][space]
+ // b: c
+ removedNum := ctx.removeRightSpaceFromBuf()
+ if removedNum > 0 {
+ s.column -= removedNum
+ s.offset -= removedNum
+ if s.savedPos != nil {
+ s.savedPos.Column -= removedNum
+ }
+ }
+
+ if ctx.isEOS() {
+ s.addBufferedTokenIfExists(ctx)
+ } else if s.isAnchor {
+ s.addBufferedTokenIfExists(ctx)
+ }
+ ctx.addBuf(' ')
+ ctx.addOriginBuf(c)
+ ctx.isSingleLine = false
+ s.progressLine(ctx)
+}
+
+func (s *Scanner) scan(ctx *Context) (pos int) {
+ for ctx.next() {
+ pos = ctx.nextPos()
+ c := ctx.currentChar()
+ s.updateIndent(ctx, c)
+ if ctx.isDocument() {
+ if s.isChangedToIndentStateEqual() ||
+ s.isChangedToIndentStateDown() {
+ s.addBufferedTokenIfExists(ctx)
+ s.breakLiteral(ctx)
+ } else {
+ s.scanLiteral(ctx, c)
+ continue
+ }
+ } else if s.isChangedToIndentStateDown() {
+ s.addBufferedTokenIfExists(ctx)
+ } else if s.isChangedToIndentStateEqual() {
+ // if first character is new line character, buffer expect to raw folded literal
+ if len(ctx.obuf) > 0 && s.newLineCount(ctx.obuf) <= 1 {
+ // doesn't raw folded literal
+ s.addBufferedTokenIfExists(ctx)
+ }
+ }
+ switch c {
+ case '{':
+ if !ctx.existsBuffer() {
+ ctx.addOriginBuf(c)
+ ctx.addToken(token.MappingStart(string(ctx.obuf), s.pos()))
+ s.startedFlowMapNum++
+ s.progressColumn(ctx, 1)
+ return
+ }
+ case '}':
+ if !ctx.existsBuffer() || s.startedFlowMapNum > 0 {
+ ctx.addToken(s.bufferedToken(ctx))
+ ctx.addOriginBuf(c)
+ ctx.addToken(token.MappingEnd(string(ctx.obuf), s.pos()))
+ s.startedFlowMapNum--
+ s.progressColumn(ctx, 1)
+ return
+ }
+ case '.':
+ if s.indentNum == 0 && s.column == 1 && ctx.repeatNum('.') == 3 {
+ ctx.addToken(token.DocumentEnd(string(ctx.obuf)+"...", s.pos()))
+ s.progressColumn(ctx, 3)
+ pos += 2
+ return
+ }
+ case '<':
+ if s.isMergeKey(ctx) {
+ s.prevIndentColumn = s.column
+ ctx.addToken(token.MergeKey(string(ctx.obuf)+"<<", s.pos()))
+ s.progressColumn(ctx, 1)
+ pos++
+ return
+ }
+ case '-':
+ if s.indentNum == 0 && s.column == 1 && ctx.repeatNum('-') == 3 {
+ s.addBufferedTokenIfExists(ctx)
+ ctx.addToken(token.DocumentHeader(string(ctx.obuf)+"---", s.pos()))
+ s.progressColumn(ctx, 3)
+ pos += 2
+ return
+ }
+ if ctx.existsBuffer() && s.isChangedToIndentStateUp() {
+ // raw folded
+ ctx.isRawFolded = true
+ ctx.addBuf(c)
+ ctx.addOriginBuf(c)
+ s.progressColumn(ctx, 1)
+ continue
+ }
+ if ctx.existsBuffer() {
+ // '-' is literal
+ ctx.addBuf(c)
+ ctx.addOriginBuf(c)
+ s.progressColumn(ctx, 1)
+ continue
+ }
+ nc := ctx.nextChar()
+ if nc == ' ' || s.isNewLineChar(nc) {
+ s.addBufferedTokenIfExists(ctx)
+ ctx.addOriginBuf(c)
+ tk := token.SequenceEntry(string(ctx.obuf), s.pos())
+ s.prevIndentColumn = tk.Position.Column
+ ctx.addToken(tk)
+ s.progressColumn(ctx, 1)
+ return
+ }
+ case '[':
+ if !ctx.existsBuffer() {
+ ctx.addOriginBuf(c)
+ ctx.addToken(token.SequenceStart(string(ctx.obuf), s.pos()))
+ s.startedFlowSequenceNum++
+ s.progressColumn(ctx, 1)
+ return
+ }
+ case ']':
+ if !ctx.existsBuffer() || s.startedFlowSequenceNum > 0 {
+ s.addBufferedTokenIfExists(ctx)
+ ctx.addOriginBuf(c)
+ ctx.addToken(token.SequenceEnd(string(ctx.obuf), s.pos()))
+ s.startedFlowSequenceNum--
+ s.progressColumn(ctx, 1)
+ return
+ }
+ case ',':
+ if s.startedFlowSequenceNum > 0 || s.startedFlowMapNum > 0 {
+ s.addBufferedTokenIfExists(ctx)
+ ctx.addOriginBuf(c)
+ ctx.addToken(token.CollectEntry(string(ctx.obuf), s.pos()))
+ s.progressColumn(ctx, 1)
+ return
+ }
+ case ':':
+ nc := ctx.nextChar()
+ if s.startedFlowMapNum > 0 || nc == ' ' || s.isNewLineChar(nc) || ctx.isNextEOS() {
+ // mapping value
+ tk := s.bufferedToken(ctx)
+ if tk != nil {
+ s.prevIndentColumn = tk.Position.Column
+ ctx.addToken(tk)
+ }
+ ctx.addToken(token.MappingValue(s.pos()))
+ s.progressColumn(ctx, 1)
+ return
+ }
+ case '|', '>':
+ if !ctx.existsBuffer() {
+ progress, err := s.scanLiteralHeader(ctx)
+ if err != nil {
+ // TODO: returns syntax error object
+ return
+ }
+ s.progressColumn(ctx, progress)
+ s.progressLine(ctx)
+ continue
+ }
+ case '!':
+ if !ctx.existsBuffer() {
+ token, progress := s.scanTag(ctx)
+ ctx.addToken(token)
+ s.progressColumn(ctx, progress)
+ if c := ctx.previousChar(); s.isNewLineChar(c) {
+ s.progressLine(ctx)
+ }
+ pos += progress
+ return
+ }
+ case '%':
+ if !ctx.existsBuffer() && s.indentNum == 0 {
+ ctx.addToken(token.Directive(string(ctx.obuf)+"%", s.pos()))
+ s.progressColumn(ctx, 1)
+ return
+ }
+ case '?':
+ nc := ctx.nextChar()
+ if !ctx.existsBuffer() && nc == ' ' {
+ ctx.addToken(token.MappingKey(s.pos()))
+ s.progressColumn(ctx, 1)
+ return
+ }
+ case '&':
+ if !ctx.existsBuffer() {
+ s.addBufferedTokenIfExists(ctx)
+ ctx.addOriginBuf(c)
+ ctx.addToken(token.Anchor(string(ctx.obuf), s.pos()))
+ s.progressColumn(ctx, 1)
+ s.isAnchor = true
+ return
+ }
+ case '*':
+ if !ctx.existsBuffer() {
+ s.addBufferedTokenIfExists(ctx)
+ ctx.addOriginBuf(c)
+ ctx.addToken(token.Alias(string(ctx.obuf), s.pos()))
+ s.progressColumn(ctx, 1)
+ return
+ }
+ case '#':
+ if !ctx.existsBuffer() || ctx.previousChar() == ' ' {
+ s.addBufferedTokenIfExists(ctx)
+ token, progress := s.scanComment(ctx)
+ ctx.addToken(token)
+ s.progressColumn(ctx, progress)
+ s.progressLine(ctx)
+ pos += progress
+ return
+ }
+ case '\'', '"':
+ if !ctx.existsBuffer() {
+ token, progress := s.scanQuote(ctx, c)
+ ctx.addToken(token)
+ pos += progress
+ return
+ }
+ case '\r', '\n':
+ // There is no problem that we ignore CR which followed by LF and normalize it to LF, because of following YAML1.2 spec.
+ // > Line breaks inside scalar content must be normalized by the YAML processor. Each such line break must be parsed into a single line feed character.
+ // > Outside scalar content, YAML allows any line break to be used to terminate lines.
+ // > -- https://yaml.org/spec/1.2/spec.html
+ if c == '\r' && ctx.nextChar() == '\n' {
+ ctx.addOriginBuf('\r')
+ ctx.progress(1)
+ c = '\n'
+ }
+ s.scanNewLine(ctx, c)
+ continue
+ case ' ':
+ if ctx.isSaveIndentMode() || (!s.isAnchor && !s.isFirstCharAtLine) {
+ ctx.addBuf(c)
+ ctx.addOriginBuf(c)
+ s.progressColumn(ctx, 1)
+ continue
+ }
+ if s.isFirstCharAtLine {
+ s.progressColumn(ctx, 1)
+ ctx.addOriginBuf(c)
+ continue
+ }
+ s.addBufferedTokenIfExists(ctx)
+ pos-- // to rescan white space at next scanning for adding white space to next buffer.
+ s.isAnchor = false
+ return
+ }
+ ctx.addBuf(c)
+ ctx.addOriginBuf(c)
+ s.progressColumn(ctx, 1)
+ }
+ s.addBufferedTokenIfExists(ctx)
+ return
+}
+
+// Init prepares the scanner s to tokenize the text src by setting the scanner at the beginning of src.
+func (s *Scanner) Init(text string) {
+ src := []rune(text)
+ s.source = src
+ s.sourcePos = 0
+ s.sourceSize = len(src)
+ s.line = 1
+ s.column = 1
+ s.offset = 1
+ s.prevIndentLevel = 0
+ s.prevIndentNum = 0
+ s.prevIndentColumn = 0
+ s.indentLevel = 0
+ s.indentNum = 0
+ s.isFirstCharAtLine = true
+}
+
+// Scan scans the next token and returns the token collection. The source end is indicated by io.EOF.
+func (s *Scanner) Scan() (token.Tokens, error) {
+ if s.sourcePos >= s.sourceSize {
+ return nil, io.EOF
+ }
+ ctx := newContext(s.source[s.sourcePos:])
+ defer ctx.release()
+ progress := s.scan(ctx)
+ s.sourcePos += progress
+ var tokens token.Tokens
+ tokens = append(tokens, ctx.tokens...)
+ return tokens, nil
+}