summaryrefslogtreecommitdiffstats
path: root/scripts/genksyms/lex.l
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/genksyms/lex.l')
-rw-r--r--scripts/genksyms/lex.l440
1 files changed, 440 insertions, 0 deletions
diff --git a/scripts/genksyms/lex.l b/scripts/genksyms/lex.l
new file mode 100644
index 000000000..a4d7495ea
--- /dev/null
+++ b/scripts/genksyms/lex.l
@@ -0,0 +1,440 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Lexical analysis for genksyms.
+ * Copyright 1996, 1997 Linux International.
+ *
+ * New implementation contributed by Richard Henderson <rth@tamu.edu>
+ * Based on original work by Bjorn Ekwall <bj0rn@blox.se>
+ *
+ * Taken from Linux modutils 2.4.22.
+ */
+
+%{
+
+#include <limits.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "genksyms.h"
+#include "parse.tab.h"
+
+/* We've got a two-level lexer here. We let flex do basic tokenization
+ and then we categorize those basic tokens in the second stage. */
+#define YY_DECL static int yylex1(void)
+
+%}
+
+IDENT [A-Za-z_\$][A-Za-z0-9_\$]*
+
+O_INT 0[0-7]*
+D_INT [1-9][0-9]*
+X_INT 0[Xx][0-9A-Fa-f]+
+I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
+INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}?
+
+FRAC ([0-9]*\.[0-9]+)|([0-9]+\.)
+EXP [Ee][+-]?[0-9]+
+F_SUF [FfLl]
+REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
+
+STRING L?\"([^\\\"]*\\.)*[^\\\"]*\"
+CHAR L?\'([^\\\']*\\.)*[^\\\']*\'
+
+MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
+
+/* We don't do multiple input files. */
+%option noyywrap
+
+%option noinput
+
+%%
+
+
+ /* Keep track of our location in the original source files. */
+^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME;
+^#.*\n cur_line++;
+\n cur_line++;
+
+ /* Ignore all other whitespace. */
+[ \t\f\v\r]+ ;
+
+
+{STRING} return STRING;
+{CHAR} return CHAR;
+{IDENT} return IDENT;
+
+ /* The Pedant requires that the other C multi-character tokens be
+ recognized as tokens. We don't actually use them since we don't
+ parse expressions, but we do want whitespace to be arranged
+ around them properly. */
+{MC_TOKEN} return OTHER;
+{INT} return INT;
+{REAL} return REAL;
+
+"..." return DOTS;
+
+ /* All other tokens are single characters. */
+. return yytext[0];
+
+
+%%
+
+/* Bring in the keyword recognizer. */
+
+#include "keywords.c"
+
+
+/* Macros to append to our phrase collection list. */
+
+/*
+ * We mark any token, that that equals to a known enumerator, as
+ * SYM_ENUM_CONST. The parser will change this for struct and union tags later,
+ * the only problem is struct and union members:
+ * enum e { a, b }; struct s { int a, b; }
+ * but in this case, the only effect will be, that the ABI checksums become
+ * more volatile, which is acceptable. Also, such collisions are quite rare,
+ * so far it was only observed in include/linux/telephony.h.
+ */
+#define _APP(T,L) do { \
+ cur_node = next_node; \
+ next_node = xmalloc(sizeof(*next_node)); \
+ next_node->next = cur_node; \
+ cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
+ cur_node->tag = \
+ find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\
+ SYM_ENUM_CONST : SYM_NORMAL ; \
+ cur_node->in_source_file = in_source_file; \
+ } while (0)
+
+#define APP _APP(yytext, yyleng)
+
+
+/* The second stage lexer. Here we incorporate knowledge of the state
+ of the parser to tailor the tokens that are returned. */
+
+int
+yylex(void)
+{
+ static enum {
+ ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1,
+ ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT,
+ } lexstate = ST_NOTSTARTED;
+
+ static int suppress_type_lookup, dont_want_brace_phrase;
+ static struct string_list *next_node;
+ static char *source_file;
+
+ int token, count = 0;
+ struct string_list *cur_node;
+
+ if (lexstate == ST_NOTSTARTED)
+ {
+ next_node = xmalloc(sizeof(*next_node));
+ next_node->next = NULL;
+ lexstate = ST_NORMAL;
+ }
+
+repeat:
+ token = yylex1();
+
+ if (token == 0)
+ return 0;
+ else if (token == FILENAME)
+ {
+ char *file, *e;
+
+ /* Save the filename and line number for later error messages. */
+
+ if (cur_filename)
+ free(cur_filename);
+
+ file = strchr(yytext, '\"')+1;
+ e = strchr(file, '\"');
+ *e = '\0';
+ cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
+ cur_line = atoi(yytext+2);
+
+ if (!source_file) {
+ source_file = xstrdup(cur_filename);
+ in_source_file = 1;
+ } else {
+ in_source_file = (strcmp(cur_filename, source_file) == 0);
+ }
+
+ goto repeat;
+ }
+
+ switch (lexstate)
+ {
+ case ST_NORMAL:
+ switch (token)
+ {
+ case IDENT:
+ APP;
+ {
+ int r = is_reserved_word(yytext, yyleng);
+ if (r >= 0)
+ {
+ switch (token = r)
+ {
+ case ATTRIBUTE_KEYW:
+ lexstate = ST_ATTRIBUTE;
+ count = 0;
+ goto repeat;
+ case ASM_KEYW:
+ lexstate = ST_ASM;
+ count = 0;
+ goto repeat;
+ case TYPEOF_KEYW:
+ lexstate = ST_TYPEOF;
+ count = 0;
+ goto repeat;
+
+ case STRUCT_KEYW:
+ case UNION_KEYW:
+ case ENUM_KEYW:
+ dont_want_brace_phrase = 3;
+ suppress_type_lookup = 2;
+ goto fini;
+
+ case EXPORT_SYMBOL_KEYW:
+ goto fini;
+
+ case STATIC_ASSERT_KEYW:
+ lexstate = ST_STATIC_ASSERT;
+ count = 0;
+ goto repeat;
+ }
+ }
+ if (!suppress_type_lookup)
+ {
+ if (find_symbol(yytext, SYM_TYPEDEF, 1))
+ token = TYPE;
+ }
+ }
+ break;
+
+ case '[':
+ APP;
+ lexstate = ST_BRACKET;
+ count = 1;
+ goto repeat;
+
+ case '{':
+ APP;
+ if (dont_want_brace_phrase)
+ break;
+ lexstate = ST_BRACE;
+ count = 1;
+ goto repeat;
+
+ case '=': case ':':
+ APP;
+ lexstate = ST_EXPRESSION;
+ break;
+
+ default:
+ APP;
+ break;
+ }
+ break;
+
+ case ST_ATTRIBUTE:
+ APP;
+ switch (token)
+ {
+ case '(':
+ ++count;
+ goto repeat;
+ case ')':
+ if (--count == 0)
+ {
+ lexstate = ST_NORMAL;
+ token = ATTRIBUTE_PHRASE;
+ break;
+ }
+ goto repeat;
+ default:
+ goto repeat;
+ }
+ break;
+
+ case ST_ASM:
+ APP;
+ switch (token)
+ {
+ case '(':
+ ++count;
+ goto repeat;
+ case ')':
+ if (--count == 0)
+ {
+ lexstate = ST_NORMAL;
+ token = ASM_PHRASE;
+ break;
+ }
+ goto repeat;
+ default:
+ goto repeat;
+ }
+ break;
+
+ case ST_TYPEOF_1:
+ if (token == IDENT)
+ {
+ if (is_reserved_word(yytext, yyleng) >= 0
+ || find_symbol(yytext, SYM_TYPEDEF, 1))
+ {
+ yyless(0);
+ unput('(');
+ lexstate = ST_NORMAL;
+ token = TYPEOF_KEYW;
+ break;
+ }
+ _APP("(", 1);
+ }
+ lexstate = ST_TYPEOF;
+ /* FALLTHRU */
+
+ case ST_TYPEOF:
+ switch (token)
+ {
+ case '(':
+ if ( ++count == 1 )
+ lexstate = ST_TYPEOF_1;
+ else
+ APP;
+ goto repeat;
+ case ')':
+ APP;
+ if (--count == 0)
+ {
+ lexstate = ST_NORMAL;
+ token = TYPEOF_PHRASE;
+ break;
+ }
+ goto repeat;
+ default:
+ APP;
+ goto repeat;
+ }
+ break;
+
+ case ST_BRACKET:
+ APP;
+ switch (token)
+ {
+ case '[':
+ ++count;
+ goto repeat;
+ case ']':
+ if (--count == 0)
+ {
+ lexstate = ST_NORMAL;
+ token = BRACKET_PHRASE;
+ break;
+ }
+ goto repeat;
+ default:
+ goto repeat;
+ }
+ break;
+
+ case ST_BRACE:
+ APP;
+ switch (token)
+ {
+ case '{':
+ ++count;
+ goto repeat;
+ case '}':
+ if (--count == 0)
+ {
+ lexstate = ST_NORMAL;
+ token = BRACE_PHRASE;
+ break;
+ }
+ goto repeat;
+ default:
+ goto repeat;
+ }
+ break;
+
+ case ST_EXPRESSION:
+ switch (token)
+ {
+ case '(': case '[': case '{':
+ ++count;
+ APP;
+ goto repeat;
+ case '}':
+ /* is this the last line of an enum declaration? */
+ if (count == 0)
+ {
+ /* Put back the token we just read so's we can find it again
+ after registering the expression. */
+ unput(token);
+
+ lexstate = ST_NORMAL;
+ token = EXPRESSION_PHRASE;
+ break;
+ }
+ /* FALLTHRU */
+ case ')': case ']':
+ --count;
+ APP;
+ goto repeat;
+ case ',': case ';':
+ if (count == 0)
+ {
+ /* Put back the token we just read so's we can find it again
+ after registering the expression. */
+ unput(token);
+
+ lexstate = ST_NORMAL;
+ token = EXPRESSION_PHRASE;
+ break;
+ }
+ APP;
+ goto repeat;
+ default:
+ APP;
+ goto repeat;
+ }
+ break;
+
+ case ST_STATIC_ASSERT:
+ APP;
+ switch (token)
+ {
+ case '(':
+ ++count;
+ goto repeat;
+ case ')':
+ if (--count == 0)
+ {
+ lexstate = ST_NORMAL;
+ token = STATIC_ASSERT_PHRASE;
+ break;
+ }
+ goto repeat;
+ default:
+ goto repeat;
+ }
+ break;
+
+ default:
+ exit(1);
+ }
+fini:
+
+ if (suppress_type_lookup > 0)
+ --suppress_type_lookup;
+ if (dont_want_brace_phrase > 0)
+ --dont_want_brace_phrase;
+
+ yylval = &next_node->next;
+
+ return token;
+}