diff options
Diffstat (limited to 'dependencies/pkg/mod/github.com/goccy/go-yaml@v1.9.6/scanner')
-rw-r--r-- | dependencies/pkg/mod/github.com/goccy/go-yaml@v1.9.6/scanner/context.go | 201 | ||||
-rw-r--r-- | dependencies/pkg/mod/github.com/goccy/go-yaml@v1.9.6/scanner/scanner.go | 876 |
2 files changed, 1077 insertions, 0 deletions
diff --git a/dependencies/pkg/mod/github.com/goccy/go-yaml@v1.9.6/scanner/context.go b/dependencies/pkg/mod/github.com/goccy/go-yaml@v1.9.6/scanner/context.go new file mode 100644 index 0000000..e629a5c --- /dev/null +++ b/dependencies/pkg/mod/github.com/goccy/go-yaml@v1.9.6/scanner/context.go @@ -0,0 +1,201 @@ +package scanner + +import ( + "sync" + + "github.com/goccy/go-yaml/token" +) + +// Context context at scanning +type Context struct { + idx int + size int + notSpaceCharPos int + notSpaceOrgCharPos int + src []rune + buf []rune + obuf []rune + tokens token.Tokens + isRawFolded bool + isLiteral bool + isFolded bool + isSingleLine bool + literalOpt string +} + +var ( + ctxPool = sync.Pool{ + New: func() interface{} { + return createContext() + }, + } +) + +func createContext() *Context { + return &Context{ + idx: 0, + tokens: token.Tokens{}, + isSingleLine: true, + } +} + +func newContext(src []rune) *Context { + ctx := ctxPool.Get().(*Context) + ctx.reset(src) + return ctx +} + +func (c *Context) release() { + ctxPool.Put(c) +} + +func (c *Context) reset(src []rune) { + c.idx = 0 + c.size = len(src) + c.src = src + c.tokens = c.tokens[:0] + c.resetBuffer() + c.isSingleLine = true +} + +func (c *Context) resetBuffer() { + c.buf = c.buf[:0] + c.obuf = c.obuf[:0] + c.notSpaceCharPos = 0 + c.notSpaceOrgCharPos = 0 +} + +func (c *Context) isSaveIndentMode() bool { + return c.isLiteral || c.isFolded || c.isRawFolded +} + +func (c *Context) breakLiteral() { + c.isLiteral = false + c.isRawFolded = false + c.isFolded = false + c.literalOpt = "" +} + +func (c *Context) addToken(tk *token.Token) { + if tk == nil { + return + } + c.tokens = append(c.tokens, tk) +} + +func (c *Context) addBuf(r rune) { + if len(c.buf) == 0 && r == ' ' { + return + } + c.buf = append(c.buf, r) + if r != ' ' && r != '\t' { + c.notSpaceCharPos = len(c.buf) + } +} + +func (c *Context) addOriginBuf(r rune) { + c.obuf = append(c.obuf, r) + if r != ' ' && r != '\t' { + c.notSpaceOrgCharPos = len(c.obuf) + } +} + +func (c *Context) removeRightSpaceFromBuf() int { + trimmedBuf := c.obuf[:c.notSpaceOrgCharPos] + buflen := len(trimmedBuf) + diff := len(c.obuf) - buflen + if diff > 0 { + c.obuf = c.obuf[:buflen] + c.buf = c.bufferedSrc() + } + return diff +} + +func (c *Context) isDocument() bool { + return c.isLiteral || c.isFolded || c.isRawFolded +} + +func (c *Context) isEOS() bool { + return len(c.src)-1 <= c.idx +} + +func (c *Context) isNextEOS() bool { + return len(c.src)-1 <= c.idx+1 +} + +func (c *Context) next() bool { + return c.idx < c.size +} + +func (c *Context) source(s, e int) string { + return string(c.src[s:e]) +} + +func (c *Context) previousChar() rune { + if c.idx > 0 { + return c.src[c.idx-1] + } + return rune(0) +} + +func (c *Context) currentChar() rune { + return c.src[c.idx] +} + +func (c *Context) nextChar() rune { + if c.size > c.idx+1 { + return c.src[c.idx+1] + } + return rune(0) +} + +func (c *Context) repeatNum(r rune) int { + cnt := 0 + for i := c.idx; i < c.size; i++ { + if c.src[i] == r { + cnt++ + } else { + break + } + } + return cnt +} + +func (c *Context) progress(num int) { + c.idx += num +} + +func (c *Context) nextPos() int { + return c.idx + 1 +} + +func (c *Context) existsBuffer() bool { + return len(c.bufferedSrc()) != 0 +} + +func (c *Context) bufferedSrc() []rune { + src := c.buf[:c.notSpaceCharPos] + if len(src) > 0 && src[len(src)-1] == '\n' && c.isDocument() && c.literalOpt == "-" { + // remove end '\n' character + src = src[:len(src)-1] + } + return src +} + +func (c *Context) bufferedToken(pos *token.Position) *token.Token { + if c.idx == 0 { + return nil + } + source := c.bufferedSrc() + if len(source) == 0 { + return nil + } + var tk *token.Token + if c.isDocument() { + tk = token.String(string(source), string(c.obuf), pos) + } else { + tk = token.New(string(source), string(c.obuf), pos) + } + c.resetBuffer() + return tk +} diff --git a/dependencies/pkg/mod/github.com/goccy/go-yaml@v1.9.6/scanner/scanner.go b/dependencies/pkg/mod/github.com/goccy/go-yaml@v1.9.6/scanner/scanner.go new file mode 100644 index 0000000..1e09190 --- /dev/null +++ b/dependencies/pkg/mod/github.com/goccy/go-yaml@v1.9.6/scanner/scanner.go @@ -0,0 +1,876 @@ +package scanner + +import ( + "io" + "strings" + + "github.com/goccy/go-yaml/token" + "golang.org/x/xerrors" +) + +// IndentState state for indent +type IndentState int + +const ( + // IndentStateEqual equals previous indent + IndentStateEqual IndentState = iota + // IndentStateUp more indent than previous + IndentStateUp + // IndentStateDown less indent than previous + IndentStateDown + // IndentStateKeep uses not indent token + IndentStateKeep +) + +// Scanner holds the scanner's internal state while processing a given text. +// It can be allocated as part of another data structure but must be initialized via Init before use. +type Scanner struct { + source []rune + sourcePos int + sourceSize int + line int + column int + offset int + prevIndentLevel int + prevIndentNum int + prevIndentColumn int + docStartColumn int + indentLevel int + indentNum int + isFirstCharAtLine bool + isAnchor bool + startedFlowSequenceNum int + startedFlowMapNum int + indentState IndentState + savedPos *token.Position +} + +func (s *Scanner) pos() *token.Position { + return &token.Position{ + Line: s.line, + Column: s.column, + Offset: s.offset, + IndentNum: s.indentNum, + IndentLevel: s.indentLevel, + } +} + +func (s *Scanner) bufferedToken(ctx *Context) *token.Token { + if s.savedPos != nil { + tk := ctx.bufferedToken(s.savedPos) + s.savedPos = nil + return tk + } + size := len(ctx.buf) + return ctx.bufferedToken(&token.Position{ + Line: s.line, + Column: s.column - size, + Offset: s.offset - size, + IndentNum: s.indentNum, + IndentLevel: s.indentLevel, + }) +} + +func (s *Scanner) progressColumn(ctx *Context, num int) { + s.column += num + s.offset += num + ctx.progress(num) +} + +func (s *Scanner) progressLine(ctx *Context) { + s.column = 1 + s.line++ + s.offset++ + s.indentNum = 0 + s.isFirstCharAtLine = true + s.isAnchor = false + ctx.progress(1) +} + +func (s *Scanner) isNeededKeepPreviousIndentNum(ctx *Context, c rune) bool { + if !s.isChangedToIndentStateUp() { + return false + } + if ctx.isDocument() { + return true + } + if c == '-' && ctx.existsBuffer() { + return true + } + return false +} + +func (s *Scanner) isNewLineChar(c rune) bool { + if c == '\n' { + return true + } + if c == '\r' { + return true + } + return false +} + +func (s *Scanner) newLineCount(src []rune) int { + size := len(src) + cnt := 0 + for i := 0; i < size; i++ { + c := src[i] + switch c { + case '\r': + if i+1 < size && src[i+1] == '\n' { + i++ + } + cnt++ + case '\n': + cnt++ + } + } + return cnt +} + +func (s *Scanner) updateIndentState(ctx *Context) { + indentNumBasedIndentState := s.indentState + if s.prevIndentNum < s.indentNum { + s.indentLevel = s.prevIndentLevel + 1 + indentNumBasedIndentState = IndentStateUp + } else if s.prevIndentNum == s.indentNum { + s.indentLevel = s.prevIndentLevel + indentNumBasedIndentState = IndentStateEqual + } else { + indentNumBasedIndentState = IndentStateDown + if s.prevIndentLevel > 0 { + s.indentLevel = s.prevIndentLevel - 1 + } + } + + if s.prevIndentColumn > 0 { + if s.prevIndentColumn < s.column { + s.indentState = IndentStateUp + } else if s.prevIndentColumn != s.column || indentNumBasedIndentState != IndentStateEqual { + // The following case ( current position is 'd' ), some variables becomes like here + // - prevIndentColumn: 1 of 'a' + // - indentNumBasedIndentState: IndentStateDown because d's indentNum(1) is less than c's indentNum(3). + // Therefore, s.prevIndentColumn(1) == s.column(1) is true, but we want to treat this as IndentStateDown. + // So, we look also current indentState value by the above prevIndentNum based logic, and determins finally indentState. + // --- + // a: + // b + // c + // d: e + // ^ + s.indentState = IndentStateDown + } else { + s.indentState = IndentStateEqual + } + } else { + s.indentState = indentNumBasedIndentState + } +} + +func (s *Scanner) updateIndent(ctx *Context, c rune) { + if s.isFirstCharAtLine && s.isNewLineChar(c) && ctx.isDocument() { + return + } + if s.isFirstCharAtLine && c == ' ' { + s.indentNum++ + return + } + if !s.isFirstCharAtLine { + s.indentState = IndentStateKeep + return + } + s.updateIndentState(ctx) + s.isFirstCharAtLine = false + if s.isNeededKeepPreviousIndentNum(ctx, c) { + return + } + if s.indentState != IndentStateUp { + s.prevIndentColumn = 0 + } + s.prevIndentNum = s.indentNum + s.prevIndentLevel = s.indentLevel +} + +func (s *Scanner) isChangedToIndentStateDown() bool { + return s.indentState == IndentStateDown +} + +func (s *Scanner) isChangedToIndentStateUp() bool { + return s.indentState == IndentStateUp +} + +func (s *Scanner) isChangedToIndentStateEqual() bool { + return s.indentState == IndentStateEqual +} + +func (s *Scanner) addBufferedTokenIfExists(ctx *Context) { + ctx.addToken(s.bufferedToken(ctx)) +} + +func (s *Scanner) breakLiteral(ctx *Context) { + s.docStartColumn = 0 + ctx.breakLiteral() +} + +func (s *Scanner) scanSingleQuote(ctx *Context) (tk *token.Token, pos int) { + ctx.addOriginBuf('\'') + srcpos := s.pos() + startIndex := ctx.idx + 1 + src := ctx.src + size := len(src) + value := []rune{} + isFirstLineChar := false + isNewLine := false + for idx := startIndex; idx < size; idx++ { + if !isNewLine { + s.progressColumn(ctx, 1) + } else { + isNewLine = false + } + c := src[idx] + pos = idx + 1 + ctx.addOriginBuf(c) + if s.isNewLineChar(c) { + value = append(value, ' ') + isFirstLineChar = true + isNewLine = true + s.progressLine(ctx) + continue + } else if c == ' ' && isFirstLineChar { + continue + } else if c != '\'' { + value = append(value, c) + isFirstLineChar = false + continue + } + if idx+1 < len(ctx.src) && ctx.src[idx+1] == '\'' { + // '' handle as ' character + value = append(value, c) + ctx.addOriginBuf(c) + idx++ + continue + } + s.progressColumn(ctx, 1) + tk = token.SingleQuote(string(value), string(ctx.obuf), srcpos) + pos = idx - startIndex + 1 + return + } + return +} + +func hexToInt(b rune) int { + if b >= 'A' && b <= 'F' { + return int(b) - 'A' + 10 + } + if b >= 'a' && b <= 'f' { + return int(b) - 'a' + 10 + } + return int(b) - '0' +} + +func hexRunesToInt(b []rune) int { + sum := 0 + for i := 0; i < len(b); i++ { + sum += hexToInt(b[i]) << (uint(len(b)-i-1) * 4) + } + return sum +} + +func (s *Scanner) scanDoubleQuote(ctx *Context) (tk *token.Token, pos int) { + ctx.addOriginBuf('"') + srcpos := s.pos() + startIndex := ctx.idx + 1 + src := ctx.src + size := len(src) + value := []rune{} + isFirstLineChar := false + isNewLine := false + for idx := startIndex; idx < size; idx++ { + if !isNewLine { + s.progressColumn(ctx, 1) + } else { + isNewLine = false + } + c := src[idx] + pos = idx + 1 + ctx.addOriginBuf(c) + if s.isNewLineChar(c) { + value = append(value, ' ') + isFirstLineChar = true + isNewLine = true + s.progressLine(ctx) + continue + } else if c == ' ' && isFirstLineChar { + continue + } else if c == '\\' { + isFirstLineChar = false + if idx+1 < size { + nextChar := src[idx+1] + switch nextChar { + case 'b': + ctx.addOriginBuf(nextChar) + value = append(value, '\b') + idx++ + continue + case 'e': + ctx.addOriginBuf(nextChar) + value = append(value, '\x1B') + idx++ + continue + case 'f': + ctx.addOriginBuf(nextChar) + value = append(value, '\f') + idx++ + continue + case 'n': + ctx.addOriginBuf(nextChar) + value = append(value, '\n') + idx++ + continue + case 'v': + ctx.addOriginBuf(nextChar) + value = append(value, '\v') + idx++ + continue + case 'L': // LS (#x2028) + ctx.addOriginBuf(nextChar) + value = append(value, []rune{'\xE2', '\x80', '\xA8'}...) + idx++ + continue + case 'N': // NEL (#x85) + ctx.addOriginBuf(nextChar) + value = append(value, []rune{'\xC2', '\x85'}...) + idx++ + continue + case 'P': // PS (#x2029) + ctx.addOriginBuf(nextChar) + value = append(value, []rune{'\xE2', '\x80', '\xA9'}...) + idx++ + continue + case '_': // #xA0 + ctx.addOriginBuf(nextChar) + value = append(value, []rune{'\xC2', '\xA0'}...) + idx++ + continue + case '"': + ctx.addOriginBuf(nextChar) + value = append(value, nextChar) + idx++ + continue + case 'x': + if idx+3 >= size { + // TODO: need to return error + //err = xerrors.New("invalid escape character \\x") + return + } + codeNum := hexRunesToInt(src[idx+2 : idx+4]) + value = append(value, rune(codeNum)) + idx += 3 + continue + case 'u': + if idx+5 >= size { + // TODO: need to return error + //err = xerrors.New("invalid escape character \\u") + return + } + codeNum := hexRunesToInt(src[idx+2 : idx+6]) + value = append(value, rune(codeNum)) + idx += 5 + continue + case 'U': + if idx+9 >= size { + // TODO: need to return error + //err = xerrors.New("invalid escape character \\U") + return + } + codeNum := hexRunesToInt(src[idx+2 : idx+10]) + value = append(value, rune(codeNum)) + idx += 9 + continue + case '\\': + ctx.addOriginBuf(nextChar) + idx++ + } + } + value = append(value, c) + continue + } else if c != '"' { + value = append(value, c) + isFirstLineChar = false + continue + } + s.progressColumn(ctx, 1) + tk = token.DoubleQuote(string(value), string(ctx.obuf), srcpos) + pos = idx - startIndex + 1 + return + } + return +} + +func (s *Scanner) scanQuote(ctx *Context, ch rune) (tk *token.Token, pos int) { + if ch == '\'' { + return s.scanSingleQuote(ctx) + } + return s.scanDoubleQuote(ctx) +} + +func (s *Scanner) isMergeKey(ctx *Context) bool { + if ctx.repeatNum('<') != 2 { + return false + } + src := ctx.src + size := len(src) + for idx := ctx.idx + 2; idx < size; idx++ { + c := src[idx] + if c == ' ' { + continue + } + if c != ':' { + return false + } + if idx+1 < size { + nc := src[idx+1] + if nc == ' ' || s.isNewLineChar(nc) { + return true + } + } + } + return false +} + +func (s *Scanner) scanTag(ctx *Context) (tk *token.Token, pos int) { + ctx.addOriginBuf('!') + ctx.progress(1) // skip '!' character + for idx, c := range ctx.src[ctx.idx:] { + pos = idx + 1 + ctx.addOriginBuf(c) + switch c { + case ' ', '\n', '\r': + value := ctx.source(ctx.idx-1, ctx.idx+idx) + tk = token.Tag(value, string(ctx.obuf), s.pos()) + pos = len([]rune(value)) + return + } + } + return +} + +func (s *Scanner) scanComment(ctx *Context) (tk *token.Token, pos int) { + ctx.addOriginBuf('#') + ctx.progress(1) // skip '#' character + for idx, c := range ctx.src[ctx.idx:] { + pos = idx + 1 + ctx.addOriginBuf(c) + switch c { + case '\n', '\r': + if ctx.previousChar() == '\\' { + continue + } + value := ctx.source(ctx.idx, ctx.idx+idx) + tk = token.Comment(value, string(ctx.obuf), s.pos()) + pos = len([]rune(value)) + 1 + return + } + } + return +} + +func trimCommentFromLiteralOpt(text string) (string, error) { + idx := strings.Index(text, "#") + if idx < 0 { + return text, nil + } + if idx == 0 { + return "", xerrors.New("invalid literal header") + } + return text[:idx-1], nil +} + +func (s *Scanner) scanLiteral(ctx *Context, c rune) { + ctx.addOriginBuf(c) + if ctx.isEOS() { + if ctx.isLiteral { + ctx.addBuf(c) + } + value := ctx.bufferedSrc() + ctx.addToken(token.String(string(value), string(ctx.obuf), s.pos())) + ctx.resetBuffer() + s.progressColumn(ctx, 1) + } else if s.isNewLineChar(c) { + if ctx.isLiteral { + ctx.addBuf(c) + } else { + ctx.addBuf(' ') + } + s.progressLine(ctx) + } else if s.isFirstCharAtLine && c == ' ' { + if 0 < s.docStartColumn && s.docStartColumn <= s.column { + ctx.addBuf(c) + } + s.progressColumn(ctx, 1) + } else { + if s.docStartColumn == 0 { + s.docStartColumn = s.column + } + ctx.addBuf(c) + s.progressColumn(ctx, 1) + } +} + +func (s *Scanner) scanLiteralHeader(ctx *Context) (pos int, err error) { + header := ctx.currentChar() + ctx.addOriginBuf(header) + ctx.progress(1) // skip '|' or '>' character + for idx, c := range ctx.src[ctx.idx:] { + pos = idx + ctx.addOriginBuf(c) + switch c { + case '\n', '\r': + value := ctx.source(ctx.idx, ctx.idx+idx) + opt := strings.TrimRight(value, " ") + orgOptLen := len(opt) + opt, err = trimCommentFromLiteralOpt(opt) + if err != nil { + return + } + switch opt { + case "", "+", "-", + "0", "1", "2", "3", "4", "5", "6", "7", "8", "9": + hasComment := len(opt) < orgOptLen + if header == '|' { + if hasComment { + commentLen := orgOptLen - len(opt) + headerPos := strings.Index(string(ctx.obuf), "|") + litBuf := ctx.obuf[:len(ctx.obuf)-commentLen-headerPos] + commentBuf := ctx.obuf[len(litBuf):] + ctx.addToken(token.Literal("|"+opt, string(litBuf), s.pos())) + s.column += len(litBuf) + s.offset += len(litBuf) + commentHeader := strings.Index(value, "#") + ctx.addToken(token.Comment(string(value[commentHeader+1:]), string(commentBuf), s.pos())) + } else { + ctx.addToken(token.Literal("|"+opt, string(ctx.obuf), s.pos())) + } + ctx.isLiteral = true + } else if header == '>' { + if hasComment { + commentLen := orgOptLen - len(opt) + headerPos := strings.Index(string(ctx.obuf), ">") + foldedBuf := ctx.obuf[:len(ctx.obuf)-commentLen-headerPos] + commentBuf := ctx.obuf[len(foldedBuf):] + ctx.addToken(token.Folded(">"+opt, string(foldedBuf), s.pos())) + s.column += len(foldedBuf) + s.offset += len(foldedBuf) + commentHeader := strings.Index(value, "#") + ctx.addToken(token.Comment(string(value[commentHeader+1:]), string(commentBuf), s.pos())) + } else { + ctx.addToken(token.Folded(">"+opt, string(ctx.obuf), s.pos())) + } + ctx.isFolded = true + } + s.indentState = IndentStateKeep + ctx.resetBuffer() + ctx.literalOpt = opt + return + } + break + } + } + err = xerrors.New("invalid literal header") + return +} + +func (s *Scanner) scanNewLine(ctx *Context, c rune) { + if len(ctx.buf) > 0 && s.savedPos == nil { + s.savedPos = s.pos() + s.savedPos.Column -= len(ctx.bufferedSrc()) + } + + // if the following case, origin buffer has unnecessary two spaces. + // So, `removeRightSpaceFromOriginBuf` remove them, also fix column number too. + // --- + // a:[space][space] + // b: c + removedNum := ctx.removeRightSpaceFromBuf() + if removedNum > 0 { + s.column -= removedNum + s.offset -= removedNum + if s.savedPos != nil { + s.savedPos.Column -= removedNum + } + } + + if ctx.isEOS() { + s.addBufferedTokenIfExists(ctx) + } else if s.isAnchor { + s.addBufferedTokenIfExists(ctx) + } + ctx.addBuf(' ') + ctx.addOriginBuf(c) + ctx.isSingleLine = false + s.progressLine(ctx) +} + +func (s *Scanner) scan(ctx *Context) (pos int) { + for ctx.next() { + pos = ctx.nextPos() + c := ctx.currentChar() + s.updateIndent(ctx, c) + if ctx.isDocument() { + if s.isChangedToIndentStateEqual() || + s.isChangedToIndentStateDown() { + s.addBufferedTokenIfExists(ctx) + s.breakLiteral(ctx) + } else { + s.scanLiteral(ctx, c) + continue + } + } else if s.isChangedToIndentStateDown() { + s.addBufferedTokenIfExists(ctx) + } else if s.isChangedToIndentStateEqual() { + // if first character is new line character, buffer expect to raw folded literal + if len(ctx.obuf) > 0 && s.newLineCount(ctx.obuf) <= 1 { + // doesn't raw folded literal + s.addBufferedTokenIfExists(ctx) + } + } + switch c { + case '{': + if !ctx.existsBuffer() { + ctx.addOriginBuf(c) + ctx.addToken(token.MappingStart(string(ctx.obuf), s.pos())) + s.startedFlowMapNum++ + s.progressColumn(ctx, 1) + return + } + case '}': + if !ctx.existsBuffer() || s.startedFlowMapNum > 0 { + ctx.addToken(s.bufferedToken(ctx)) + ctx.addOriginBuf(c) + ctx.addToken(token.MappingEnd(string(ctx.obuf), s.pos())) + s.startedFlowMapNum-- + s.progressColumn(ctx, 1) + return + } + case '.': + if s.indentNum == 0 && s.column == 1 && ctx.repeatNum('.') == 3 { + ctx.addToken(token.DocumentEnd(string(ctx.obuf)+"...", s.pos())) + s.progressColumn(ctx, 3) + pos += 2 + return + } + case '<': + if s.isMergeKey(ctx) { + s.prevIndentColumn = s.column + ctx.addToken(token.MergeKey(string(ctx.obuf)+"<<", s.pos())) + s.progressColumn(ctx, 1) + pos++ + return + } + case '-': + if s.indentNum == 0 && s.column == 1 && ctx.repeatNum('-') == 3 { + s.addBufferedTokenIfExists(ctx) + ctx.addToken(token.DocumentHeader(string(ctx.obuf)+"---", s.pos())) + s.progressColumn(ctx, 3) + pos += 2 + return + } + if ctx.existsBuffer() && s.isChangedToIndentStateUp() { + // raw folded + ctx.isRawFolded = true + ctx.addBuf(c) + ctx.addOriginBuf(c) + s.progressColumn(ctx, 1) + continue + } + if ctx.existsBuffer() { + // '-' is literal + ctx.addBuf(c) + ctx.addOriginBuf(c) + s.progressColumn(ctx, 1) + continue + } + nc := ctx.nextChar() + if nc == ' ' || s.isNewLineChar(nc) { + s.addBufferedTokenIfExists(ctx) + ctx.addOriginBuf(c) + tk := token.SequenceEntry(string(ctx.obuf), s.pos()) + s.prevIndentColumn = tk.Position.Column + ctx.addToken(tk) + s.progressColumn(ctx, 1) + return + } + case '[': + if !ctx.existsBuffer() { + ctx.addOriginBuf(c) + ctx.addToken(token.SequenceStart(string(ctx.obuf), s.pos())) + s.startedFlowSequenceNum++ + s.progressColumn(ctx, 1) + return + } + case ']': + if !ctx.existsBuffer() || s.startedFlowSequenceNum > 0 { + s.addBufferedTokenIfExists(ctx) + ctx.addOriginBuf(c) + ctx.addToken(token.SequenceEnd(string(ctx.obuf), s.pos())) + s.startedFlowSequenceNum-- + s.progressColumn(ctx, 1) + return + } + case ',': + if s.startedFlowSequenceNum > 0 || s.startedFlowMapNum > 0 { + s.addBufferedTokenIfExists(ctx) + ctx.addOriginBuf(c) + ctx.addToken(token.CollectEntry(string(ctx.obuf), s.pos())) + s.progressColumn(ctx, 1) + return + } + case ':': + nc := ctx.nextChar() + if s.startedFlowMapNum > 0 || nc == ' ' || s.isNewLineChar(nc) || ctx.isNextEOS() { + // mapping value + tk := s.bufferedToken(ctx) + if tk != nil { + s.prevIndentColumn = tk.Position.Column + ctx.addToken(tk) + } + ctx.addToken(token.MappingValue(s.pos())) + s.progressColumn(ctx, 1) + return + } + case '|', '>': + if !ctx.existsBuffer() { + progress, err := s.scanLiteralHeader(ctx) + if err != nil { + // TODO: returns syntax error object + return + } + s.progressColumn(ctx, progress) + s.progressLine(ctx) + continue + } + case '!': + if !ctx.existsBuffer() { + token, progress := s.scanTag(ctx) + ctx.addToken(token) + s.progressColumn(ctx, progress) + if c := ctx.previousChar(); s.isNewLineChar(c) { + s.progressLine(ctx) + } + pos += progress + return + } + case '%': + if !ctx.existsBuffer() && s.indentNum == 0 { + ctx.addToken(token.Directive(string(ctx.obuf)+"%", s.pos())) + s.progressColumn(ctx, 1) + return + } + case '?': + nc := ctx.nextChar() + if !ctx.existsBuffer() && nc == ' ' { + ctx.addToken(token.MappingKey(s.pos())) + s.progressColumn(ctx, 1) + return + } + case '&': + if !ctx.existsBuffer() { + s.addBufferedTokenIfExists(ctx) + ctx.addOriginBuf(c) + ctx.addToken(token.Anchor(string(ctx.obuf), s.pos())) + s.progressColumn(ctx, 1) + s.isAnchor = true + return + } + case '*': + if !ctx.existsBuffer() { + s.addBufferedTokenIfExists(ctx) + ctx.addOriginBuf(c) + ctx.addToken(token.Alias(string(ctx.obuf), s.pos())) + s.progressColumn(ctx, 1) + return + } + case '#': + if !ctx.existsBuffer() || ctx.previousChar() == ' ' { + s.addBufferedTokenIfExists(ctx) + token, progress := s.scanComment(ctx) + ctx.addToken(token) + s.progressColumn(ctx, progress) + s.progressLine(ctx) + pos += progress + return + } + case '\'', '"': + if !ctx.existsBuffer() { + token, progress := s.scanQuote(ctx, c) + ctx.addToken(token) + pos += progress + return + } + case '\r', '\n': + // There is no problem that we ignore CR which followed by LF and normalize it to LF, because of following YAML1.2 spec. + // > Line breaks inside scalar content must be normalized by the YAML processor. Each such line break must be parsed into a single line feed character. + // > Outside scalar content, YAML allows any line break to be used to terminate lines. + // > -- https://yaml.org/spec/1.2/spec.html + if c == '\r' && ctx.nextChar() == '\n' { + ctx.addOriginBuf('\r') + ctx.progress(1) + c = '\n' + } + s.scanNewLine(ctx, c) + continue + case ' ': + if ctx.isSaveIndentMode() || (!s.isAnchor && !s.isFirstCharAtLine) { + ctx.addBuf(c) + ctx.addOriginBuf(c) + s.progressColumn(ctx, 1) + continue + } + if s.isFirstCharAtLine { + s.progressColumn(ctx, 1) + ctx.addOriginBuf(c) + continue + } + s.addBufferedTokenIfExists(ctx) + pos-- // to rescan white space at next scanning for adding white space to next buffer. + s.isAnchor = false + return + } + ctx.addBuf(c) + ctx.addOriginBuf(c) + s.progressColumn(ctx, 1) + } + s.addBufferedTokenIfExists(ctx) + return +} + +// Init prepares the scanner s to tokenize the text src by setting the scanner at the beginning of src. +func (s *Scanner) Init(text string) { + src := []rune(text) + s.source = src + s.sourcePos = 0 + s.sourceSize = len(src) + s.line = 1 + s.column = 1 + s.offset = 1 + s.prevIndentLevel = 0 + s.prevIndentNum = 0 + s.prevIndentColumn = 0 + s.indentLevel = 0 + s.indentNum = 0 + s.isFirstCharAtLine = true +} + +// Scan scans the next token and returns the token collection. The source end is indicated by io.EOF. +func (s *Scanner) Scan() (token.Tokens, error) { + if s.sourcePos >= s.sourceSize { + return nil, io.EOF + } + ctx := newContext(s.source[s.sourcePos:]) + defer ctx.release() + progress := s.scan(ctx) + s.sourcePos += progress + var tokens token.Tokens + tokens = append(tokens, ctx.tokens...) + return tokens, nil +} |