/*- * Copyright (c) 1985 Sun Microsystems, Inc. * Copyright (c) 1980, 1993 * The Regents of the University of California. All rights reserved. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if 0 #ifndef lint static char sccsid[] = "@(#)lexi.c 8.1 (Berkeley) 6/6/93"; #endif /* not lint */ #endif #include "c.h" /* * Here we have the token scanner for indent. It scans off one token and puts * it in the global variable "token". It returns a code, indicating the type * of token scanned. */ #include #include #include #include #include #include "indent_globs.h" #include "indent_codes.h" #include "indent.h" #define alphanum 1 #ifdef undef #define opchar 3 #endif struct templ { const char *rwd; int rwcode; }; /* * This table has to be sorted alphabetically, because it'll be used in binary * search. For the same reason, string must be the first thing in struct templ. */ struct templ specials[] = { {"_Bool", 4}, {"_Complex", 4}, {"_Imaginary", 4}, {"auto", 10}, {"bool", 4}, {"break", 9}, {"case", 8}, {"char", 4}, {"complex", 4}, {"const", 4}, {"continue", 12}, {"default", 8}, {"do", 6}, {"double", 4}, {"else", 6}, {"enum", 3}, {"extern", 10}, {"float", 4}, {"for", 5}, {"global", 4}, {"goto", 9}, {"if", 5}, {"imaginary", 4}, {"inline", 12}, {"int", 4}, {"long", 4}, {"offsetof", 1}, {"register", 10}, {"restrict", 12}, {"return", 9}, {"short", 4}, {"signed", 4}, {"sizeof", 2}, {"static", 10}, {"struct", 3}, {"switch", 7}, {"typedef", 11}, {"union", 3}, {"unsigned", 4}, {"void", 4}, {"volatile", 4}, {"while", 5} }; const char **typenames; int typename_count; int typename_top = -1; char chartype[128] = { /* this is used to facilitate the decision of * what type (alphanumeric, operator) each * character is */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 1, 3, 3, 0, 0, 0, 3, 3, 0, 3, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 3, 3, 3, 3, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 3, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 3, 0, 3, 0 }; static int strcmp_type(const void *e1, const void *e2) { return (strcmp(e1, *(const char * const *)e2)); } /* * Decide whether "foo(..." is a function definition or declaration. * * At call, we are looking at the '('. Look ahead to find the first * '{', ';' or ',' that is not within parentheses or comments; then * it's a definition if we found '{', otherwise a declaration. * Note that this rule is fooled by K&R-style parameter declarations, * but telling the difference between those and function attributes * seems like more trouble than it's worth. This code could also be * fooled by mismatched parens or apparent comment starts within string * literals, but that seems unlikely in the context it's used in. */ static int is_func_definition(char *tp) { int paren_depth = 0; int in_comment = false; int in_slash_comment = false; int lastc = 0; /* We may need to look past the end of the current buffer. */ lookahead_reset(); for (;;) { int c; /* Fetch next character. */ if (tp < buf_end) c = *tp++; else { c = lookahead(); if (c == EOF) break; } /* Handle comments. */ if (in_comment) { if (lastc == '*' && c == '/') in_comment = false; } else if (lastc == '/' && c == '*' && !in_slash_comment) in_comment = true; else if (in_slash_comment) { if (c == '\n') in_slash_comment = false; } else if (lastc == '/' && c == '/') in_slash_comment = true; /* Count nested parens properly. */ else if (c == '(') paren_depth++; else if (c == ')') { paren_depth--; /* * If we find unbalanced parens, we must have started inside a * declaration. */ if (paren_depth < 0) return false; } else if (paren_depth == 0) { /* We are outside any parentheses or comments. */ if (c == '{') return true; else if (c == ';' || c == ',') return false; } lastc = c; } /* Hit EOF --- for lack of anything better, assume "not a definition". */ return false; } int lexi(struct parser_state *state) { int unary_delim; /* this is set to 1 if the current token * forces a following operator to be unary */ int code; /* internal code to be returned */ char qchar; /* the delimiter character for a string */ e_token = s_token; /* point to start of place to save token */ unary_delim = false; state->col_1 = state->last_nl; /* tell world that this token started * in column 1 iff the last thing * scanned was a newline */ state->last_nl = false; while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ state->col_1 = false; /* leading blanks imply token is not in column * 1 */ if (++buf_ptr >= buf_end) fill_buffer(); } /* Scan an alphanumeric token */ if (chartype[*buf_ptr & 127] == alphanum || (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) { /* * we have a character or number */ struct templ *p; if (isdigit((unsigned char)*buf_ptr) || (buf_ptr[0] == '.' && isdigit((unsigned char)buf_ptr[1]))) { int seendot = 0, seenexp = 0, seensfx = 0; /* * base 2, base 8, base 16: */ if (buf_ptr[0] == '0' && buf_ptr[1] != '.') { int len; if (buf_ptr[1] == 'b' || buf_ptr[1] == 'B') len = strspn(buf_ptr + 2, "01") + 2; else if (buf_ptr[1] == 'x' || buf_ptr[1] == 'X') len = strspn(buf_ptr + 2, "0123456789ABCDEFabcdef") + 2; else len = strspn(buf_ptr + 1, "012345678") + 1; if (len > 0) { CHECK_SIZE_TOKEN(len); memcpy(e_token, buf_ptr, len); e_token += len; buf_ptr += len; } else diag2(1, "Unterminated literal"); } else /* base 10: */ while (1) { if (*buf_ptr == '.') { if (seendot) break; else seendot++; } CHECK_SIZE_TOKEN(3); *e_token++ = *buf_ptr++; if (!isdigit((unsigned char)*buf_ptr) && *buf_ptr != '.') { if ((*buf_ptr != 'E' && *buf_ptr != 'e') || seenexp) break; else { seenexp++; seendot++; *e_token++ = *buf_ptr++; if (*buf_ptr == '+' || *buf_ptr == '-') *e_token++ = *buf_ptr++; } } } while (1) { CHECK_SIZE_TOKEN(2); if (!(seensfx & 1) && (*buf_ptr == 'U' || *buf_ptr == 'u')) { *e_token++ = *buf_ptr++; seensfx |= 1; continue; } if (!(seensfx & 2) && (strchr("fFlL", *buf_ptr) != NULL)) { if (buf_ptr[1] == buf_ptr[0]) *e_token++ = *buf_ptr++; *e_token++ = *buf_ptr++; seensfx |= 2; continue; } break; } } else while (chartype[*buf_ptr & 127] == alphanum || *buf_ptr == BACKSLASH) { /* fill_buffer() terminates buffer with newline */ if (*buf_ptr == BACKSLASH) { if (*(buf_ptr + 1) == '\n') { buf_ptr += 2; if (buf_ptr >= buf_end) fill_buffer(); } else break; } CHECK_SIZE_TOKEN(1); /* copy it over */ *e_token++ = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); } *e_token = '\0'; if (s_token[0] == 'L' && s_token[1] == '\0' && (*buf_ptr == '"' || *buf_ptr == '\'')) return (strpfx); while (*buf_ptr == ' ' || *buf_ptr == '\t') { /* get rid of blanks */ if (++buf_ptr >= buf_end) fill_buffer(); } state->keyword = 0; if (state->last_token == structure && !state->p_l_follow) { /* if last token was 'struct' and we're not * in parentheses, then this token * should be treated as a declaration */ state->last_u_d = true; return (decl); } /* * Operator after identifier is binary unless last token was 'struct' */ state->last_u_d = (state->last_token == structure); p = bsearch(s_token, specials, sizeof(specials) / sizeof(specials[0]), sizeof(specials[0]), strcmp_type); if (p == NULL) { /* not a special keyword... */ char *u; /* ... so maybe a type_t or a typedef */ if ((auto_typedefs && ((u = strrchr(s_token, '_')) != NULL) && strcmp(u, "_t") == 0) || (typename_top >= 0 && bsearch(s_token, typenames, typename_top + 1, sizeof(typenames[0]), strcmp_type))) { state->keyword = 4; /* a type name */ state->last_u_d = true; goto found_typename; } } else { /* we have a keyword */ state->keyword = p->rwcode; state->last_u_d = true; switch (p->rwcode) { case 7: /* it is a switch */ return (swstmt); case 8: /* a case or default */ return (casestmt); case 3: /* a "struct" */ /* FALLTHROUGH */ case 4: /* one of the declaration keywords */ found_typename: if (state->p_l_follow) { /* inside parens: cast, param list, offsetof or sizeof */ state->cast_mask |= (1 << state->p_l_follow) & ~state->not_cast_mask; } if (state->last_token == period || state->last_token == unary_op) { state->keyword = 0; break; } if (p != NULL && p->rwcode == 3) return (structure); if (state->p_l_follow) break; return (decl); case 5: /* if, while, for */ return (sp_paren); case 6: /* do, else */ return (sp_nparen); case 10: /* storage class specifier */ return (storage); case 11: /* typedef */ return (type_def); default: /* all others are treated like any other * identifier */ return (ident); } /* end of switch */ } /* end of if (found_it) */ if (*buf_ptr == '(' && state->tos <= 1 && state->ind_level == 0 && state->in_parameter_declaration == 0 && state->block_init == 0) { if (is_func_definition(buf_ptr)) { strncpy(state->procname, token, sizeof state->procname - 1); if (state->in_decl) state->in_parameter_declaration = 1; return (funcname); } } /* * The following hack attempts to guess whether or not the current * token is in fact a declaration keyword -- one that has been * typedefd */ else if (!state->p_l_follow && !state->block_init && !state->in_stmt && ((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha((unsigned char)*buf_ptr)) && (state->last_token == semicolon || state->last_token == lbrace || state->last_token == rbrace)) { state->keyword = 4; /* a type name */ state->last_u_d = true; return decl; } if (state->last_token == decl) /* if this is a declared variable, * then following sign is unary */ state->last_u_d = true; /* will make "int a -1" work */ return (ident); /* the ident is not in the list */ } /* end of processing for alphanum character */ /* Scan a non-alphanumeric token */ CHECK_SIZE_TOKEN(3); /* things like "<<=" */ *e_token++ = *buf_ptr; /* if it is only a one-character token, it is * moved here */ *e_token = '\0'; if (++buf_ptr >= buf_end) fill_buffer(); switch (*token) { case '\n': unary_delim = state->last_u_d; state->last_nl = true; /* remember that we just had a newline */ code = (had_eof ? 0 : newline); /* * if data has been exhausted, the newline is a dummy, and we should * return code to stop */ break; case '\'': /* start of quoted character */ case '"': /* start of string */ qchar = *token; do { /* copy the string */ while (1) { /* move one character or [/] */ if (*buf_ptr == '\n') { diag2(1, "Unterminated literal"); goto stop_lit; } CHECK_SIZE_TOKEN(2); *e_token = *buf_ptr++; if (buf_ptr >= buf_end) fill_buffer(); if (*e_token == BACKSLASH) { /* if escape, copy extra char */ if (*buf_ptr == '\n') /* check for escaped newline */ ++line_no; *++e_token = *buf_ptr++; ++e_token; /* we must increment this again because we * copied two chars */ if (buf_ptr >= buf_end) fill_buffer(); } else break; /* we copied one character */ } /* end of while (1) */ } while (*e_token++ != qchar); stop_lit: code = ident; break; case ('('): case ('['): unary_delim = true; code = lparen; break; case (')'): case (']'): code = rparen; break; case '#': unary_delim = state->last_u_d; code = preesc; break; case '?': unary_delim = true; code = question; break; case (':'): code = colon; unary_delim = true; break; case (';'): unary_delim = true; code = semicolon; break; case ('{'): unary_delim = true; /* * if (state->in_or_st) state->block_init = 1; */ /* ? code = state->block_init ? lparen : lbrace; */ code = lbrace; break; case ('}'): unary_delim = true; /* ? code = state->block_init ? rparen : rbrace; */ code = rbrace; break; case 014: /* a form feed */ unary_delim = state->last_u_d; state->last_nl = true; /* remember this so we can set 'state->col_1' * right */ code = form_feed; break; case (','): unary_delim = true; code = comma; break; case '.': unary_delim = false; code = period; break; case '-': case '+': /* check for -, +, --, ++ */ code = (state->last_u_d ? unary_op : binary_op); unary_delim = true; if (*buf_ptr == token[0]) { /* check for doubled character */ *e_token++ = *buf_ptr++; /* buffer overflow will be checked at end of loop */ if (state->last_token == ident || state->last_token == rparen) { code = (state->last_u_d ? unary_op : postop); /* check for following ++ or -- */ unary_delim = false; } } else if (*buf_ptr == '=') /* check for operator += */ *e_token++ = *buf_ptr++; else if (*buf_ptr == '>') { /* check for operator -> */ *e_token++ = *buf_ptr++; unary_delim = false; code = unary_op; state->want_blank = false; } break; /* buffer overflow will be checked at end of * switch */ case '=': if (state->in_or_st) state->block_init = 1; #ifdef undef if (chartype[*buf_ptr & 127] == opchar) { /* we have two char assignment */ e_token[-1] = *buf_ptr++; if ((e_token[-1] == '<' || e_token[-1] == '>') && e_token[-1] == *buf_ptr) *e_token++ = *buf_ptr++; *e_token++ = '='; /* Flip =+ to += */ *e_token = 0; } #else if (*buf_ptr == '=') {/* == */ *e_token++ = '='; /* Flip =+ to += */ buf_ptr++; *e_token = 0; } #endif code = binary_op; unary_delim = true; break; /* can drop thru!!! */ case '>': case '<': case '!': /* ops like <, <<, <=, !=, etc */ if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') { *e_token++ = *buf_ptr; if (++buf_ptr >= buf_end) fill_buffer(); } if (*buf_ptr == '=') *e_token++ = *buf_ptr++; code = (state->last_u_d ? unary_op : binary_op); unary_delim = true; break; case '*': unary_delim = true; if (!state->last_u_d) { if (*buf_ptr == '=') *e_token++ = *buf_ptr++; code = binary_op; break; } while (*buf_ptr == '*' || isspace((unsigned char)*buf_ptr)) { if (*buf_ptr == '*') { CHECK_SIZE_TOKEN(1); *e_token++ = *buf_ptr; } if (++buf_ptr >= buf_end) fill_buffer(); } code = unary_op; break; default: if (token[0] == '/' && *buf_ptr == '*') { /* it is start of comment */ *e_token++ = '*'; if (++buf_ptr >= buf_end) fill_buffer(); code = comment; unary_delim = state->last_u_d; break; } while (*(e_token - 1) == *buf_ptr || *buf_ptr == '=') { /* * handle ||, &&, etc, and also things as in int *****i */ CHECK_SIZE_TOKEN(1); *e_token++ = *buf_ptr; if (++buf_ptr >= buf_end) fill_buffer(); } code = (state->last_u_d ? unary_op : binary_op); unary_delim = true; } /* end of switch */ if (buf_ptr >= buf_end) /* check for input buffer empty */ fill_buffer(); state->last_u_d = unary_delim; CHECK_SIZE_TOKEN(1); *e_token = '\0'; /* null terminate the token */ return (code); } void alloc_typenames(void) { typenames = (const char **)malloc(sizeof(typenames[0]) * (typename_count = 16)); if (typenames == NULL) err(1, NULL); } void add_typename(const char *key) { int comparison; const char *copy; if (typename_top + 1 >= typename_count) { typenames = realloc((void *)typenames, sizeof(typenames[0]) * (typename_count *= 2)); if (typenames == NULL) err(1, NULL); } if (typename_top == -1) typenames[++typename_top] = copy = strdup(key); else if ((comparison = strcmp(key, typenames[typename_top])) >= 0) { /* take advantage of sorted input */ if (comparison == 0) /* remove duplicates */ return; typenames[++typename_top] = copy = strdup(key); } else { int p; for (p = 0; (comparison = strcmp(key, typenames[p])) > 0; p++) /* find place for the new key */; if (comparison == 0) /* remove duplicates */ return; memmove(&typenames[p + 1], &typenames[p], sizeof(typenames[0]) * (++typename_top - p)); typenames[p] = copy = strdup(key); } if (copy == NULL) err(1, NULL); }