diff options
Diffstat (limited to 'scripts/genksyms/lex.l')
-rw-r--r-- | scripts/genksyms/lex.l | 440 |
1 files changed, 440 insertions, 0 deletions
diff --git a/scripts/genksyms/lex.l b/scripts/genksyms/lex.l new file mode 100644 index 0000000000..a4d7495eaf --- /dev/null +++ b/scripts/genksyms/lex.l @@ -0,0 +1,440 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Lexical analysis for genksyms. + * Copyright 1996, 1997 Linux International. + * + * New implementation contributed by Richard Henderson <rth@tamu.edu> + * Based on original work by Bjorn Ekwall <bj0rn@blox.se> + * + * Taken from Linux modutils 2.4.22. + */ + +%{ + +#include <limits.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#include "genksyms.h" +#include "parse.tab.h" + +/* We've got a two-level lexer here. We let flex do basic tokenization + and then we categorize those basic tokens in the second stage. */ +#define YY_DECL static int yylex1(void) + +%} + +IDENT [A-Za-z_\$][A-Za-z0-9_\$]* + +O_INT 0[0-7]* +D_INT [1-9][0-9]* +X_INT 0[Xx][0-9A-Fa-f]+ +I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu] +INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}? + +FRAC ([0-9]*\.[0-9]+)|([0-9]+\.) +EXP [Ee][+-]?[0-9]+ +F_SUF [FfLl] +REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?) + +STRING L?\"([^\\\"]*\\.)*[^\\\"]*\" +CHAR L?\'([^\\\']*\\.)*[^\\\']*\' + +MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>) + +/* We don't do multiple input files. */ +%option noyywrap + +%option noinput + +%% + + + /* Keep track of our location in the original source files. */ +^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME; +^#.*\n cur_line++; +\n cur_line++; + + /* Ignore all other whitespace. */ +[ \t\f\v\r]+ ; + + +{STRING} return STRING; +{CHAR} return CHAR; +{IDENT} return IDENT; + + /* The Pedant requires that the other C multi-character tokens be + recognized as tokens. We don't actually use them since we don't + parse expressions, but we do want whitespace to be arranged + around them properly. */ +{MC_TOKEN} return OTHER; +{INT} return INT; +{REAL} return REAL; + +"..." return DOTS; + + /* All other tokens are single characters. */ +. return yytext[0]; + + +%% + +/* Bring in the keyword recognizer. */ + +#include "keywords.c" + + +/* Macros to append to our phrase collection list. */ + +/* + * We mark any token, that that equals to a known enumerator, as + * SYM_ENUM_CONST. The parser will change this for struct and union tags later, + * the only problem is struct and union members: + * enum e { a, b }; struct s { int a, b; } + * but in this case, the only effect will be, that the ABI checksums become + * more volatile, which is acceptable. Also, such collisions are quite rare, + * so far it was only observed in include/linux/telephony.h. + */ +#define _APP(T,L) do { \ + cur_node = next_node; \ + next_node = xmalloc(sizeof(*next_node)); \ + next_node->next = cur_node; \ + cur_node->string = memcpy(xmalloc(L+1), T, L+1); \ + cur_node->tag = \ + find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\ + SYM_ENUM_CONST : SYM_NORMAL ; \ + cur_node->in_source_file = in_source_file; \ + } while (0) + +#define APP _APP(yytext, yyleng) + + +/* The second stage lexer. Here we incorporate knowledge of the state + of the parser to tailor the tokens that are returned. */ + +int +yylex(void) +{ + static enum { + ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1, + ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT, + } lexstate = ST_NOTSTARTED; + + static int suppress_type_lookup, dont_want_brace_phrase; + static struct string_list *next_node; + static char *source_file; + + int token, count = 0; + struct string_list *cur_node; + + if (lexstate == ST_NOTSTARTED) + { + next_node = xmalloc(sizeof(*next_node)); + next_node->next = NULL; + lexstate = ST_NORMAL; + } + +repeat: + token = yylex1(); + + if (token == 0) + return 0; + else if (token == FILENAME) + { + char *file, *e; + + /* Save the filename and line number for later error messages. */ + + if (cur_filename) + free(cur_filename); + + file = strchr(yytext, '\"')+1; + e = strchr(file, '\"'); + *e = '\0'; + cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1); + cur_line = atoi(yytext+2); + + if (!source_file) { + source_file = xstrdup(cur_filename); + in_source_file = 1; + } else { + in_source_file = (strcmp(cur_filename, source_file) == 0); + } + + goto repeat; + } + + switch (lexstate) + { + case ST_NORMAL: + switch (token) + { + case IDENT: + APP; + { + int r = is_reserved_word(yytext, yyleng); + if (r >= 0) + { + switch (token = r) + { + case ATTRIBUTE_KEYW: + lexstate = ST_ATTRIBUTE; + count = 0; + goto repeat; + case ASM_KEYW: + lexstate = ST_ASM; + count = 0; + goto repeat; + case TYPEOF_KEYW: + lexstate = ST_TYPEOF; + count = 0; + goto repeat; + + case STRUCT_KEYW: + case UNION_KEYW: + case ENUM_KEYW: + dont_want_brace_phrase = 3; + suppress_type_lookup = 2; + goto fini; + + case EXPORT_SYMBOL_KEYW: + goto fini; + + case STATIC_ASSERT_KEYW: + lexstate = ST_STATIC_ASSERT; + count = 0; + goto repeat; + } + } + if (!suppress_type_lookup) + { + if (find_symbol(yytext, SYM_TYPEDEF, 1)) + token = TYPE; + } + } + break; + + case '[': + APP; + lexstate = ST_BRACKET; + count = 1; + goto repeat; + + case '{': + APP; + if (dont_want_brace_phrase) + break; + lexstate = ST_BRACE; + count = 1; + goto repeat; + + case '=': case ':': + APP; + lexstate = ST_EXPRESSION; + break; + + default: + APP; + break; + } + break; + + case ST_ATTRIBUTE: + APP; + switch (token) + { + case '(': + ++count; + goto repeat; + case ')': + if (--count == 0) + { + lexstate = ST_NORMAL; + token = ATTRIBUTE_PHRASE; + break; + } + goto repeat; + default: + goto repeat; + } + break; + + case ST_ASM: + APP; + switch (token) + { + case '(': + ++count; + goto repeat; + case ')': + if (--count == 0) + { + lexstate = ST_NORMAL; + token = ASM_PHRASE; + break; + } + goto repeat; + default: + goto repeat; + } + break; + + case ST_TYPEOF_1: + if (token == IDENT) + { + if (is_reserved_word(yytext, yyleng) >= 0 + || find_symbol(yytext, SYM_TYPEDEF, 1)) + { + yyless(0); + unput('('); + lexstate = ST_NORMAL; + token = TYPEOF_KEYW; + break; + } + _APP("(", 1); + } + lexstate = ST_TYPEOF; + /* FALLTHRU */ + + case ST_TYPEOF: + switch (token) + { + case '(': + if ( ++count == 1 ) + lexstate = ST_TYPEOF_1; + else + APP; + goto repeat; + case ')': + APP; + if (--count == 0) + { + lexstate = ST_NORMAL; + token = TYPEOF_PHRASE; + break; + } + goto repeat; + default: + APP; + goto repeat; + } + break; + + case ST_BRACKET: + APP; + switch (token) + { + case '[': + ++count; + goto repeat; + case ']': + if (--count == 0) + { + lexstate = ST_NORMAL; + token = BRACKET_PHRASE; + break; + } + goto repeat; + default: + goto repeat; + } + break; + + case ST_BRACE: + APP; + switch (token) + { + case '{': + ++count; + goto repeat; + case '}': + if (--count == 0) + { + lexstate = ST_NORMAL; + token = BRACE_PHRASE; + break; + } + goto repeat; + default: + goto repeat; + } + break; + + case ST_EXPRESSION: + switch (token) + { + case '(': case '[': case '{': + ++count; + APP; + goto repeat; + case '}': + /* is this the last line of an enum declaration? */ + if (count == 0) + { + /* Put back the token we just read so's we can find it again + after registering the expression. */ + unput(token); + + lexstate = ST_NORMAL; + token = EXPRESSION_PHRASE; + break; + } + /* FALLTHRU */ + case ')': case ']': + --count; + APP; + goto repeat; + case ',': case ';': + if (count == 0) + { + /* Put back the token we just read so's we can find it again + after registering the expression. */ + unput(token); + + lexstate = ST_NORMAL; + token = EXPRESSION_PHRASE; + break; + } + APP; + goto repeat; + default: + APP; + goto repeat; + } + break; + + case ST_STATIC_ASSERT: + APP; + switch (token) + { + case '(': + ++count; + goto repeat; + case ')': + if (--count == 0) + { + lexstate = ST_NORMAL; + token = STATIC_ASSERT_PHRASE; + break; + } + goto repeat; + default: + goto repeat; + } + break; + + default: + exit(1); + } +fini: + + if (suppress_type_lookup > 0) + --suppress_type_lookup; + if (dont_want_brace_phrase > 0) + --dont_want_brace_phrase; + + yylval = &next_node->next; + + return token; +} |