diff options
Diffstat (limited to 'third_party/wasm2c/src/wast-lexer.cc')
-rw-r--r-- | third_party/wasm2c/src/wast-lexer.cc | 557 |
1 files changed, 557 insertions, 0 deletions
diff --git a/third_party/wasm2c/src/wast-lexer.cc b/third_party/wasm2c/src/wast-lexer.cc new file mode 100644 index 0000000000..05ac736a5a --- /dev/null +++ b/third_party/wasm2c/src/wast-lexer.cc @@ -0,0 +1,557 @@ +/* + * Copyright 2016 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "src/wast-lexer.h" + +#include <cassert> +#include <cstdio> + +#include "config.h" + +#include "src/lexer-source.h" +#include "src/wast-parser.h" + +#define ERROR(...) parser->Error(GetLocation(), __VA_ARGS__) + +namespace wabt { + +namespace { + +#include "src/prebuilt/lexer-keywords.cc" + +} // namespace + +WastLexer::WastLexer(std::unique_ptr<LexerSource> source, string_view filename) + : source_(std::move(source)), + filename_(filename), + line_(1), + buffer_(static_cast<const char*>(source_->data())), + buffer_end_(buffer_ + source_->size()), + line_start_(buffer_), + token_start_(buffer_), + cursor_(buffer_) {} + +// static +std::unique_ptr<WastLexer> WastLexer::CreateBufferLexer(string_view filename, + const void* data, + size_t size) { + return MakeUnique<WastLexer>(MakeUnique<LexerSource>(data, size), filename); +} + +Token WastLexer::GetToken(WastParser* parser) { + while (true) { + token_start_ = cursor_; + switch (PeekChar()) { + case kEof: + return BareToken(TokenType::Eof); + + case '(': + if (MatchString("(;")) { + if (ReadBlockComment(parser)) { + continue; + } + return BareToken(TokenType::Eof); + } else if (MatchString("(@")) { + ReadReservedChars(); + // offset=2 to skip the "(@" prefix + return TextToken(TokenType::LparAnn, 2); + } else { + ReadChar(); + return BareToken(TokenType::Lpar); + } + break; + + case ')': + ReadChar(); + return BareToken(TokenType::Rpar); + + case ';': + if (MatchString(";;")) { + if (ReadLineComment()) { + continue; + } + return BareToken(TokenType::Eof); + } else { + ReadChar(); + ERROR("unexpected char"); + continue; + } + break; + + case ' ': + case '\t': + case '\r': + case '\n': + ReadWhitespace(); + continue; + + case '"': + return GetStringToken(parser); + + case '+': + case '-': + ReadChar(); + switch (PeekChar()) { + case 'i': + return GetInfToken(); + + case 'n': + return GetNanToken(); + + case '0': + return MatchString("0x") ? GetHexNumberToken(TokenType::Int) + : GetNumberToken(TokenType::Int); + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return GetNumberToken(TokenType::Int); + + default: + return GetReservedToken(); + } + break; + + case '0': + return MatchString("0x") ? GetHexNumberToken(TokenType::Nat) + : GetNumberToken(TokenType::Nat); + + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return GetNumberToken(TokenType::Nat); + + case '$': + return GetIdToken(); + + case 'a': + return GetNameEqNumToken("align=", TokenType::AlignEqNat); + + case 'i': + return GetInfToken(); + + case 'n': + return GetNanToken(); + + case 'o': + return GetNameEqNumToken("offset=", TokenType::OffsetEqNat); + + default: + if (IsKeyword(PeekChar())) { + return GetKeywordToken(); + } else if (IsReserved(PeekChar())) { + return GetReservedToken(); + } else { + ReadChar(); + ERROR("unexpected char"); + continue; + } + } + } +} + +Location WastLexer::GetLocation() { + auto column = [=](const char* p) { + return std::max(1, static_cast<int>(p - line_start_ + 1)); + }; + return Location(filename_, line_, column(token_start_), column(cursor_)); +} + +string_view WastLexer::GetText(size_t offset) { + return string_view(token_start_ + offset, (cursor_ - token_start_) - offset); +} + +Token WastLexer::BareToken(TokenType token_type) { + return Token(GetLocation(), token_type); +} + +Token WastLexer::LiteralToken(TokenType token_type, LiteralType literal_type) { + return Token(GetLocation(), token_type, Literal(literal_type, GetText())); +} + +Token WastLexer::TextToken(TokenType token_type, size_t offset) { + return Token(GetLocation(), token_type, GetText(offset)); +} + +int WastLexer::PeekChar() { + return cursor_ < buffer_end_ ? static_cast<uint8_t>(*cursor_) : kEof; +} + +int WastLexer::ReadChar() { + return cursor_ < buffer_end_ ? static_cast<uint8_t>(*cursor_++) : kEof; +} + +bool WastLexer::MatchChar(char c) { + if (PeekChar() == c) { + ReadChar(); + return true; + } + return false; +} + +bool WastLexer::MatchString(string_view s) { + const char* saved_cursor = cursor_; + for (char c : s) { + if (ReadChar() != c) { + cursor_ = saved_cursor; + return false; + } + } + return true; +} + +void WastLexer::Newline() { + line_++; + line_start_ = cursor_; +} + +bool WastLexer::ReadBlockComment(WastParser* parser) { + int nesting = 1; + while (true) { + switch (ReadChar()) { + case kEof: + ERROR("EOF in block comment"); + return false; + + case ';': + if (MatchChar(')') && --nesting == 0) { + return true; + } + break; + + case '(': + if (MatchChar(';')) { + nesting++; + } + break; + + case '\n': + Newline(); + break; + } + } +} + +bool WastLexer::ReadLineComment() { + while (true) { + switch (ReadChar()) { + case kEof: + return false; + + case '\n': + Newline(); + return true; + } + } +} + +void WastLexer::ReadWhitespace() { + while (true) { + switch (PeekChar()) { + case ' ': + case '\t': + case '\r': + ReadChar(); + break; + + case '\n': + ReadChar(); + Newline(); + break; + + default: + return; + } + } +} + +Token WastLexer::GetStringToken(WastParser* parser) { + const char* saved_token_start = token_start_; + bool has_error = false; + bool in_string = true; + ReadChar(); + while (in_string) { + switch (ReadChar()) { + case kEof: + return BareToken(TokenType::Eof); + + case '\n': + token_start_ = cursor_ - 1; + ERROR("newline in string"); + has_error = true; + Newline(); + continue; + + case '"': + in_string = false; + break; + + case '\\': { + switch (ReadChar()) { + case 't': + case 'n': + case 'r': + case '"': + case '\'': + case '\\': + // Valid escape. + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': // Hex byte escape. + if (IsHexDigit(PeekChar())) { + ReadChar(); + } else { + token_start_ = cursor_ - 2; + goto error; + } + break; + + default: + token_start_ = cursor_ - 2; + goto error; + + error: + ERROR("bad escape \"%.*s\"", + static_cast<int>(cursor_ - token_start_), token_start_); + has_error = true; + break; + } + break; + } + } + } + token_start_ = saved_token_start; + if (has_error) { + return Token(GetLocation(), TokenType::Invalid); + } + + return TextToken(TokenType::Text); +} + +// static +bool WastLexer::IsCharClass(int c, CharClass bit) { + // Generated by the following python script: + // + // def Range(c, lo, hi): return lo <= c <= hi + // def IsDigit(c): return Range(c, '0', '9') + // def IsHexDigit(c): return IsDigit(c) or Range(c.lower(), 'a', 'f') + // def IsKeyword(c): return Range(c, 'a', 'z') + // def IsReserved(c): return Range(c, '!', '~') and c not in '"(),;[]{}' + // + // print ([0] + [ + // (8 if IsDigit(c) else 0) | + // (4 if IsHexDigit(c) else 0) | + // (2 if IsKeyword(c) else 0) | + // (1 if IsReserved(c) else 0) + // for c in map(chr, range(0, 127)) + // ]) + static const char kCharClasses[257] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, + 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 1, 0, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, + 1, 1, 1, 7, 7, 7, 7, 7, 7, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 0, 1, 0, 1, + }; + + assert(c >= -1 && c < 256); + return (kCharClasses[c + 1] & static_cast<int>(bit)) != 0; +} + +bool WastLexer::ReadNum() { + if (IsDigit(PeekChar())) { + ReadChar(); + return MatchChar('_') || IsDigit(PeekChar()) ? ReadNum() : true; + } + return false; +} + +bool WastLexer::ReadHexNum() { + if (IsHexDigit(PeekChar())) { + ReadChar(); + return MatchChar('_') || IsHexDigit(PeekChar()) ? ReadHexNum() : true; + } + return false; +} + +int WastLexer::ReadReservedChars() { + int count = 0; + while (IsReserved(PeekChar())) { + ReadChar(); + ++count; + } + return count; +} + +void WastLexer::ReadSign() { + if (PeekChar() == '+' || PeekChar() == '-') { + ReadChar(); + } +} + +Token WastLexer::GetNumberToken(TokenType token_type) { + if (ReadNum()) { + if (MatchChar('.')) { + token_type = TokenType::Float; + if (IsDigit(PeekChar()) && !ReadNum()) { + return GetReservedToken(); + } + } + if (MatchChar('e') || MatchChar('E')) { + token_type = TokenType::Float; + ReadSign(); + if (!ReadNum()) { + return GetReservedToken(); + } + } + if (NoTrailingReservedChars()) { + if (token_type == TokenType::Float) { + return LiteralToken(token_type, LiteralType::Float); + } else { + return LiteralToken(token_type, LiteralType::Int); + } + } + } + return GetReservedToken(); +} + +Token WastLexer::GetHexNumberToken(TokenType token_type) { + if (ReadHexNum()) { + if (MatchChar('.')) { + token_type = TokenType::Float; + if (IsHexDigit(PeekChar()) && !ReadHexNum()) { + return GetReservedToken(); + } + } + if (MatchChar('p') || MatchChar('P')) { + token_type = TokenType::Float; + ReadSign(); + if (!ReadNum()) { + return GetReservedToken(); + } + } + if (NoTrailingReservedChars()) { + if (token_type == TokenType::Float) { + return LiteralToken(token_type, LiteralType::Hexfloat); + } else { + return LiteralToken(token_type, LiteralType::Int); + } + } + } + return GetReservedToken(); +} + +Token WastLexer::GetInfToken() { + if (MatchString("inf")) { + if (NoTrailingReservedChars()) { + return LiteralToken(TokenType::Float, LiteralType::Infinity); + } + return GetReservedToken(); + } + return GetKeywordToken(); +} + +Token WastLexer::GetNanToken() { + if (MatchString("nan")) { + if (MatchChar(':')) { + if (MatchString("0x") && ReadHexNum() && NoTrailingReservedChars()) { + return LiteralToken(TokenType::Float, LiteralType::Nan); + } + } else if (NoTrailingReservedChars()) { + return LiteralToken(TokenType::Float, LiteralType::Nan); + } + } + return GetKeywordToken(); +} + +Token WastLexer::GetNameEqNumToken(string_view name, TokenType token_type) { + if (MatchString(name)) { + if (MatchString("0x")) { + if (ReadHexNum() && NoTrailingReservedChars()) { + return TextToken(token_type, name.size()); + } + } else if (ReadNum() && NoTrailingReservedChars()) { + return TextToken(token_type, name.size()); + } + } + return GetKeywordToken(); +} + +Token WastLexer::GetIdToken() { + ReadChar(); + if (NoTrailingReservedChars()) { + return TextToken(TokenType::Reserved); + } + return TextToken(TokenType::Var); +} + +Token WastLexer::GetKeywordToken() { + ReadReservedChars(); + TokenInfo* info = + Perfect_Hash::InWordSet(token_start_, cursor_ - token_start_); + if (!info) { + return TextToken(TokenType::Reserved); + } + if (IsTokenTypeBare(info->token_type)) { + return BareToken(info->token_type); + } else if (IsTokenTypeType(info->token_type) || + IsTokenTypeRefKind(info->token_type)) { + return Token(GetLocation(), info->token_type, info->value_type); + } else { + assert(IsTokenTypeOpcode(info->token_type)); + return Token(GetLocation(), info->token_type, info->opcode); + } +} + +Token WastLexer::GetReservedToken() { + ReadReservedChars(); + return TextToken(TokenType::Reserved); +} + +} // namespace wabt |