diff options
Diffstat (limited to 'gfx/skia/skia/src/sksl/lex/Main.cpp')
-rw-r--r-- | gfx/skia/skia/src/sksl/lex/Main.cpp | 238 |
1 files changed, 238 insertions, 0 deletions
diff --git a/gfx/skia/skia/src/sksl/lex/Main.cpp b/gfx/skia/skia/src/sksl/lex/Main.cpp new file mode 100644 index 0000000000..ab4e3a618b --- /dev/null +++ b/gfx/skia/skia/src/sksl/lex/Main.cpp @@ -0,0 +1,238 @@ +/* + * Copyright 2017 Google Inc. + * + * Use of this source code is governed by a BSD-style license that can be + * found in the LICENSE file. + */ + +#include "src/sksl/lex/DFA.h" +#include "src/sksl/lex/LexUtil.h" +#include "src/sksl/lex/NFA.h" +#include "src/sksl/lex/NFAtoDFA.h" +#include "src/sksl/lex/RegexNode.h" +#include "src/sksl/lex/RegexParser.h" +#include "src/sksl/lex/TransitionTable.h" + +#include <stdio.h> +#include <stdlib.h> +#include <algorithm> +#include <sstream> +#include <string> +#include <vector> + +/** + * Processes a .lex file and produces .h and .cpp files which implement a lexical analyzer. The .lex + * file is a text file with one token definition per line. Each line is of the form: + * <TOKEN_NAME> = <pattern> + * where <pattern> is either a regular expression (e.g [0-9]) or a double-quoted literal string. + */ + +static constexpr const char HEADER[] = + "/*\n" + " * Copyright 2017 Google Inc.\n" + " *\n" + " * Use of this source code is governed by a BSD-style license that can be\n" + " * found in the LICENSE file.\n" + " */\n" + "/*****************************************************************************************\n" + " ******************** This file was generated by sksllex. Do not edit. *******************\n" + " *****************************************************************************************/\n"; + +static void writeH(const DFA& dfa, const char* lexer, const char* token, + const std::vector<std::string>& tokens, const char* hPath) { + std::ofstream out(hPath); + SkASSERT(out.good()); + out << HEADER; + out << "#ifndef SKSL_" << lexer << "\n"; + out << "#define SKSL_" << lexer << "\n"; + out << "#include <cstdint>\n"; + out << "#include <string_view>\n"; + out << "namespace SkSL {\n"; + out << "\n"; + out << "struct " << token << " {\n"; + out << " enum class Kind {\n"; + for (const std::string& t : tokens) { + out << " TK_" << t << ",\n"; + } + out << " TK_NONE,"; + out << R"( + }; + + )" << token << "() {}"; + + out << token << R"((Kind kind, int32_t offset, int32_t length) + : fKind(kind) + , fOffset(offset) + , fLength(length) {} + + Kind fKind = Kind::TK_NONE; + int32_t fOffset = -1; + int32_t fLength = -1; +}; + +class )" << lexer << R"( { +public: + void start(std::string_view text) { + fText = text; + fOffset = 0; + } + + )" << token << R"( next(); + + struct Checkpoint { + int32_t fOffset; + }; + + Checkpoint getCheckpoint() const { + return {fOffset}; + } + + void rewindToCheckpoint(Checkpoint checkpoint) { + fOffset = checkpoint.fOffset; + } + +private: + std::string_view fText; + int32_t fOffset; +}; + +} // namespace +#endif +)"; +} + +static void writeCPP(const DFA& dfa, const char* lexer, const char* token, const char* include, + const char* cppPath) { + std::ofstream out(cppPath); + SkASSERT(out.good()); + out << HEADER; + out << "#include \"" << include << "\"\n"; + out << "\n"; + out << "namespace SkSL {\n"; + out << "\n"; + + size_t states = 0; + for (const auto& row : dfa.fTransitions) { + states = std::max(states, row.size()); + } + out << "using State = " << (states <= 256 ? "uint8_t" : "uint16_t") << ";\n"; + + // Find the first character mapped in our DFA. + size_t startChar = 0; + for (; startChar < dfa.fCharMappings.size(); ++startChar) { + if (dfa.fCharMappings[startChar] != 0) { + break; + } + } + + // Arbitrarily-chosen character which is greater than startChar, and should not appear in actual + // input. + SkASSERT(startChar < 18); + out << "static constexpr uint8_t kInvalidChar = 18;"; + out << "static constexpr int8_t kMappings[" << dfa.fCharMappings.size() - startChar << "] = {\n" + " "; + const char* separator = ""; + for (size_t index = startChar; index < dfa.fCharMappings.size(); ++index) { + out << separator << std::to_string(dfa.fCharMappings[index]); + separator = ", "; + } + out << "\n};\n"; + + WriteTransitionTable(out, dfa, states); + + out << "static const int8_t kAccepts[" << states << "] = {"; + for (size_t i = 0; i < states; ++i) { + if (i < dfa.fAccepts.size()) { + out << " " << dfa.fAccepts[i] << ","; + } else { + out << " " << INVALID << ","; + } + } + out << " };\n"; + out << "\n"; + + out << token << " " << lexer << "::next() {"; + out << R"( + // note that we cheat here: normally a lexer needs to worry about the case + // where a token has a prefix which is not itself a valid token - for instance, + // maybe we have a valid token 'while', but 'w', 'wh', etc. are not valid + // tokens. Our grammar doesn't have this property, so we can simplify the logic + // a bit. + int32_t startOffset = fOffset; + State state = 1; + for (;;) { + if (fOffset >= (int32_t)fText.length()) { + if (startOffset == (int32_t)fText.length() || kAccepts[state] == -1) { + return )" << token << "(" << token << R"(::Kind::TK_END_OF_FILE, startOffset, 0); + } + break; + } + uint8_t c = (uint8_t)(fText[fOffset] - )" << startChar << R"(); + if (c >= )" << dfa.fCharMappings.size() - startChar << R"() { + c = kInvalidChar; + } + State newState = get_transition(kMappings[c], state); + if (!newState) { + break; + } + state = newState; + ++fOffset; + } + Token::Kind kind = ()" << token << R"(::Kind) kAccepts[state]; + return )" << token << R"((kind, startOffset, fOffset - startOffset); +} + +} // namespace +)"; +} + +static void process(const char* inPath, const char* lexer, const char* token, const char* hPath, + const char* cppPath) { + NFA nfa; + std::vector<std::string> tokens; + tokens.push_back("END_OF_FILE"); + std::string line; + std::ifstream in(inPath); + while (std::getline(in, line)) { + if (line.length() == 0) { + continue; + } + if (line.length() >= 2 && line[0] == '/' && line[1] == '/') { + continue; + } + std::istringstream split(line); + std::string name, delimiter, pattern; + if (split >> name >> delimiter >> pattern) { + SkASSERT(split.eof()); + SkASSERT(name != ""); + SkASSERT(delimiter == "="); + SkASSERT(pattern != ""); + tokens.push_back(name); + if (pattern[0] == '"') { + SkASSERT(pattern.size() > 2 && pattern[pattern.size() - 1] == '"'); + RegexNode node = RegexNode(RegexNode::kChar_Kind, pattern[1]); + for (size_t i = 2; i < pattern.size() - 1; ++i) { + node = RegexNode(RegexNode::kConcat_Kind, node, + RegexNode(RegexNode::kChar_Kind, pattern[i])); + } + nfa.addRegex(node); + } + else { + nfa.addRegex(RegexParser().parse(pattern)); + } + } + } + NFAtoDFA converter(&nfa); + DFA dfa = converter.convert(); + writeH(dfa, lexer, token, tokens, hPath); + writeCPP(dfa, lexer, token, (std::string("src/sksl/SkSL") + lexer + ".h").c_str(), cppPath); +} + +int main(int argc, const char** argv) { + if (argc != 6) { + printf("usage: sksllex <input.lex> <lexername> <tokenname> <output.h> <output.cpp>\n"); + exit(1); + } + process(argv[1], argv[2], argv[3], argv[4], argv[5]); + return 0; +} |