diff options
Diffstat (limited to '')
-rw-r--r-- | src/st/croco/cr-tknzr.c | 2762 |
1 files changed, 2762 insertions, 0 deletions
diff --git a/src/st/croco/cr-tknzr.c b/src/st/croco/cr-tknzr.c new file mode 100644 index 0000000..54f18f2 --- /dev/null +++ b/src/st/croco/cr-tknzr.c @@ -0,0 +1,2762 @@ +/* -*- Mode: C; indent-tabs-mode:nil; c-basic-offset: 8-*- */ + +/* + * This file is part of The Croco Library + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2.1 of the GNU Lesser General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + * USA + * + * Author: Dodji Seketeli + * See the COPYRIGHTS file for copyrights information. + */ + +/** + *@file + *The definition of the #CRTknzr (tokenizer) + *class. + */ + +#include "string.h" +#include "cr-tknzr.h" +#include "cr-doc-handler.h" + +struct _CRTknzrPriv { + /**The parser input stream of bytes*/ + CRInput *input; + + /** + *A cache where tknzr_unget_token() + *puts back the token. tknzr_get_next_token() + *first look in this cache, and if and + *only if it's empty, fetches the next token + *from the input stream. + */ + CRToken *token_cache; + + /** + *The position of the end of the previous token + *or char fetched. + */ + CRInputPos prev_pos; + + CRDocHandler *sac_handler; + + /** + *The reference count of the current instance + *of #CRTknzr. Is manipulated by cr_tknzr_ref() + *and cr_tknzr_unref(). + */ + glong ref_count; +}; + +#define PRIVATE(obj) ((obj)->priv) + +/** + *return TRUE if the character is a number ([0-9]), FALSE otherwise + *@param a_char the char to test. + */ +#define IS_NUM(a_char) (((a_char) >= '0' && (a_char) <= '9')?TRUE:FALSE) + +/** + *Checks if 'status' equals CR_OK. If not, goto the 'error' label. + * + *@param status the status (of type enum CRStatus) to test. + *@param is_exception if set to FALSE, the final status returned the + *current function will be CR_PARSING_ERROR. If set to TRUE, the + *current status will be the current value of the 'status' variable. + * + */ +#define CHECK_PARSING_STATUS(status, is_exception) \ +if ((status) != CR_OK) \ +{ \ + if (is_exception == FALSE) \ + { \ + status = CR_PARSING_ERROR ; \ + } \ + goto error ; \ +} + +/** + *Peeks the next char from the input stream of the current tokenizer. + *invokes CHECK_PARSING_STATUS on the status returned by + *cr_tknzr_input_peek_char(). + * + *@param the current instance of #CRTkzr. + *@param to_char a pointer to the char where to store the + *char peeked. + */ +#define PEEK_NEXT_CHAR(a_tknzr, a_to_char) \ +{\ +status = cr_tknzr_peek_char (a_tknzr, a_to_char) ; \ +CHECK_PARSING_STATUS (status, TRUE) \ +} + +/** + *Reads the next char from the input stream of the current parser. + *In case of error, jumps to the "error:" label located in the + *function where this macro is called. + *@param parser the current instance of #CRTknzr + *@param to_char a pointer to the guint32 char where to store + *the character read. + */ +#define READ_NEXT_CHAR(a_tknzr, to_char) \ +status = cr_tknzr_read_char (a_tknzr, to_char) ;\ +CHECK_PARSING_STATUS (status, TRUE) + +/** + *Gets information about the current position in + *the input of the parser. + *In case of failure, this macro returns from the + *calling function and + *returns a status code of type enum #CRStatus. + *@param parser the current instance of #CRTknzr. + *@param pos out parameter. A pointer to the position + *inside the current parser input. Must + */ +#define RECORD_INITIAL_POS(a_tknzr, a_pos) \ +status = cr_input_get_cur_pos (PRIVATE \ +(a_tknzr)->input, a_pos) ; \ +g_return_val_if_fail (status == CR_OK, status) + +/** + *Gets the address of the current byte inside the + *parser input. + *@param parser the current instance of #CRTknzr. + *@param addr out parameter a pointer (guchar*) + *to where the address must be put. + */ +#define RECORD_CUR_BYTE_ADDR(a_tknzr, a_addr) \ +status = cr_input_get_cur_byte_addr \ + (PRIVATE (a_tknzr)->input, a_addr) ; \ +CHECK_PARSING_STATUS (status, TRUE) + +/** + *Peeks a byte from the topmost parser input at + *a given offset from the current position. + *If it fails, goto the "error:" label. + * + *@param a_parser the current instance of #CRTknzr. + *@param a_offset the offset of the byte to peek, the + *current byte having the offset '0'. + *@param a_byte_ptr out parameter a pointer (guchar*) to + *where the peeked char is to be stored. + */ +#define PEEK_BYTE(a_tknzr, a_offset, a_byte_ptr) \ +status = cr_tknzr_peek_byte (a_tknzr, \ + a_offset, \ + a_byte_ptr) ; \ +CHECK_PARSING_STATUS (status, TRUE) ; + +#define BYTE(a_input, a_n, a_eof) \ +cr_input_peek_byte2 (a_input, a_n, a_eof) + +/** + *Reads a byte from the topmost parser input + *steam. + *If it fails, goto the "error" label. + *@param a_parser the current instance of #CRTknzr. + *@param a_byte_ptr the guchar * where to put the read char. + */ +#define READ_NEXT_BYTE(a_tknzr, a_byte_ptr) \ +status = \ +cr_input_read_byte (PRIVATE (a_tknzr)->input, a_byte_ptr) ;\ +CHECK_PARSING_STATUS (status, TRUE) ; + +/** + *Skips a given number of byte in the topmost + *parser input. Don't update line and column number. + *In case of error, jumps to the "error:" label + *of the surrounding function. + *@param a_parser the current instance of #CRTknzr. + *@param a_nb_bytes the number of bytes to skip. + */ +#define SKIP_BYTES(a_tknzr, a_nb_bytes) \ +status = cr_input_seek_index (PRIVATE (a_tknzr)->input, \ + CR_SEEK_CUR, a_nb_bytes) ; \ +CHECK_PARSING_STATUS (status, TRUE) ; + +/** + *Skip utf8 encoded characters. + *Updates line and column numbers. + *@param a_parser the current instance of #CRTknzr. + *@param a_nb_chars the number of chars to skip. Must be of + *type glong. + */ +#define SKIP_CHARS(a_tknzr, a_nb_chars) \ +{ \ +gulong nb_chars = a_nb_chars ; \ +status = cr_input_consume_chars \ + (PRIVATE (a_tknzr)->input,0, &nb_chars) ; \ +CHECK_PARSING_STATUS (status, TRUE) ; \ +} + +/** + *Tests the condition and if it is false, sets + *status to "CR_PARSING_ERROR" and goto the 'error' + *label. + *@param condition the condition to test. + */ +#define ENSURE_PARSING_COND(condition) \ +if (! (condition)) {status = CR_PARSING_ERROR; goto error ;} + +static enum CRStatus cr_tknzr_parse_nl (CRTknzr * a_this, + guchar ** a_start, + guchar ** a_end, + CRParsingLocation *a_location); + +static enum CRStatus cr_tknzr_parse_w (CRTknzr * a_this, + guchar ** a_start, + guchar ** a_end, + CRParsingLocation *a_location) ; + +static enum CRStatus cr_tknzr_parse_unicode_escape (CRTknzr * a_this, + guint32 * a_unicode, + CRParsingLocation *a_location) ; + +static enum CRStatus cr_tknzr_parse_escape (CRTknzr * a_this, + guint32 * a_esc_code, + CRParsingLocation *a_location); + +static enum CRStatus cr_tknzr_parse_string (CRTknzr * a_this, + CRString ** a_str); + +static enum CRStatus cr_tknzr_parse_comment (CRTknzr * a_this, + CRString ** a_comment); + +static enum CRStatus cr_tknzr_parse_nmstart (CRTknzr * a_this, + guint32 * a_char, + CRParsingLocation *a_location); + +static enum CRStatus cr_tknzr_parse_num (CRTknzr * a_this, + CRNum ** a_num); + +/********************************** + *PRIVATE methods + **********************************/ + +/** + *Parses a "w" as defined by the css spec at [4.1.1]: + * w ::= [ \t\r\n\f]* + * + *@param a_this the current instance of #CRTknzr. + *@param a_start out param. Upon successful completion, points + *to the beginning of the parsed white space, points to NULL otherwise. + *Can also point to NULL is there is no white space actually. + *@param a_end out param. Upon successful completion, points + *to the end of the parsed white space, points to NULL otherwise. + *Can also point to NULL is there is no white space actually. + */ +static enum CRStatus +cr_tknzr_parse_w (CRTknzr * a_this, + guchar ** a_start, + guchar ** a_end, + CRParsingLocation *a_location) +{ + guint32 cur_char = 0; + CRInputPos init_pos; + enum CRStatus status = CR_OK; + + g_return_val_if_fail (a_this && PRIVATE (a_this) + && PRIVATE (a_this)->input + && a_start && a_end, + CR_BAD_PARAM_ERROR); + + RECORD_INITIAL_POS (a_this, &init_pos); + + *a_start = NULL; + *a_end = NULL; + + READ_NEXT_CHAR (a_this, &cur_char); + + if (cr_utils_is_white_space (cur_char) == FALSE) { + status = CR_PARSING_ERROR; + goto error; + } + if (a_location) { + cr_tknzr_get_parsing_location (a_this, + a_location) ; + } + RECORD_CUR_BYTE_ADDR (a_this, a_start); + *a_end = *a_start; + + for (;;) { + gboolean is_eof = FALSE; + + cr_input_get_end_of_file (PRIVATE (a_this)->input, &is_eof); + if (is_eof) + break; + + status = cr_tknzr_peek_char (a_this, &cur_char); + if (status == CR_END_OF_INPUT_ERROR) { + break; + } else if (status != CR_OK) { + goto error; + } + + if (cr_utils_is_white_space (cur_char) == TRUE) { + READ_NEXT_CHAR (a_this, &cur_char); + RECORD_CUR_BYTE_ADDR (a_this, a_end); + } else { + break; + } + } + + return CR_OK; + + error: + cr_tknzr_set_cur_pos (a_this, &init_pos); + + return status; +} + +/** + *Parses a newline as defined in the css2 spec: + * nl ::= \n|\r\n|\r|\f + * + *@param a_this the "this pointer" of the current instance of #CRTknzr. + *@param a_start a pointer to the first character of the successfully + *parsed string. + *@param a_end a pointer to the last character of the successfully parsed + *string. + *@result CR_OK upon successful completion, an error code otherwise. + */ +static enum CRStatus +cr_tknzr_parse_nl (CRTknzr * a_this, + guchar ** a_start, + guchar ** a_end, + CRParsingLocation *a_location) +{ + CRInputPos init_pos; + guchar next_chars[2] = { 0 }; + enum CRStatus status = CR_PARSING_ERROR; + + g_return_val_if_fail (a_this && PRIVATE (a_this) + && a_start && a_end, CR_BAD_PARAM_ERROR); + + RECORD_INITIAL_POS (a_this, &init_pos); + + PEEK_BYTE (a_this, 1, &next_chars[0]); + PEEK_BYTE (a_this, 2, &next_chars[1]); + + if ((next_chars[0] == '\r' && next_chars[1] == '\n')) { + SKIP_BYTES (a_this, 1); + if (a_location) { + cr_tknzr_get_parsing_location + (a_this, a_location) ; + } + SKIP_CHARS (a_this, 1); + + RECORD_CUR_BYTE_ADDR (a_this, a_end); + + status = CR_OK; + } else if (next_chars[0] == '\n' + || next_chars[0] == '\r' || next_chars[0] == '\f') { + SKIP_CHARS (a_this, 1); + if (a_location) { + cr_tknzr_get_parsing_location + (a_this, a_location) ; + } + RECORD_CUR_BYTE_ADDR (a_this, a_start); + *a_end = *a_start; + status = CR_OK; + } else { + status = CR_PARSING_ERROR; + goto error; + } + return CR_OK ; + + error: + cr_tknzr_set_cur_pos (a_this, &init_pos) ; + return status; +} + +/** + *Go ahead in the parser input, skipping all the spaces. + *If the next char if not a white space, this function does nothing. + *In any cases, it stops when it encounters a non white space character. + * + *@param a_this the current instance of #CRTknzr. + *@return CR_OK upon successful completion, an error code otherwise. + */ +static enum CRStatus +cr_tknzr_try_to_skip_spaces (CRTknzr * a_this) +{ + enum CRStatus status = CR_ERROR; + guint32 cur_char = 0; + + g_return_val_if_fail (a_this && PRIVATE (a_this) + && PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR); + + status = cr_input_peek_char (PRIVATE (a_this)->input, &cur_char); + + if (status != CR_OK) { + if (status == CR_END_OF_INPUT_ERROR) + return CR_OK; + return status; + } + + if (cr_utils_is_white_space (cur_char) == TRUE) { + gulong nb_chars = -1; /*consume all spaces */ + + status = cr_input_consume_white_spaces + (PRIVATE (a_this)->input, &nb_chars); + } + + return status; +} + +/** + *Parses a "comment" as defined in the css spec at [4.1.1]: + *COMMENT ::= \/\*[^*]*\*+([^/][^*]*\*+)*\/ . + *This complex regexp is just to say that comments start + *with the two chars '/''*' and ends with the two chars '*''/'. + *It also means that comments cannot be nested. + *So based on that, I've just tried to implement the parsing function + *simply and in a straight forward manner. + */ +static enum CRStatus +cr_tknzr_parse_comment (CRTknzr * a_this, + CRString ** a_comment) +{ + enum CRStatus status = CR_OK; + CRInputPos init_pos; + guint32 cur_char = 0, next_char= 0; + CRString *comment = NULL; + CRParsingLocation loc = {0} ; + + g_return_val_if_fail (a_this && PRIVATE (a_this) + && PRIVATE (a_this)->input, + CR_BAD_PARAM_ERROR); + + RECORD_INITIAL_POS (a_this, &init_pos); + READ_NEXT_CHAR (a_this, &cur_char) ; + ENSURE_PARSING_COND (cur_char == '/'); + cr_tknzr_get_parsing_location (a_this, &loc) ; + + READ_NEXT_CHAR (a_this, &cur_char); + ENSURE_PARSING_COND (cur_char == '*'); + comment = cr_string_new (); + for (;;) { /* [^*]* */ + PEEK_NEXT_CHAR (a_this, &next_char); + if (next_char == '*') + break; + READ_NEXT_CHAR (a_this, &cur_char); + g_string_append_unichar (comment->stryng, cur_char); + } + /* Stop condition: next_char == '*' */ + for (;;) { /* \*+ */ + READ_NEXT_CHAR(a_this, &cur_char); + ENSURE_PARSING_COND (cur_char == '*'); + g_string_append_unichar (comment->stryng, cur_char); + PEEK_NEXT_CHAR (a_this, &next_char); + if (next_char != '*') + break; + } + /* Stop condition: next_char != '*' */ + for (;;) { /* ([^/][^*]*\*+)* */ + if (next_char == '/') + break; + READ_NEXT_CHAR(a_this, &cur_char); + g_string_append_unichar (comment->stryng, cur_char); + for (;;) { /* [^*]* */ + PEEK_NEXT_CHAR (a_this, &next_char); + if (next_char == '*') + break; + READ_NEXT_CHAR (a_this, &cur_char); + g_string_append_unichar (comment->stryng, cur_char); + } + /* Stop condition: next_char = '*', no need to verify, because peek and read exit to error anyway */ + for (;;) { /* \*+ */ + READ_NEXT_CHAR(a_this, &cur_char); + ENSURE_PARSING_COND (cur_char == '*'); + g_string_append_unichar (comment->stryng, cur_char); + PEEK_NEXT_CHAR (a_this, &next_char); + if (next_char != '*') + break; + } + /* Continue condition: next_char != '*' */ + } + /* Stop condition: next_char == '\/' */ + READ_NEXT_CHAR(a_this, &cur_char); + g_string_append_unichar (comment->stryng, cur_char); + + if (status == CR_OK) { + cr_parsing_location_copy (&comment->location, + &loc) ; + *a_comment = comment; + return CR_OK; + } + error: + + if (comment) { + cr_string_destroy (comment); + comment = NULL; + } + + cr_tknzr_set_cur_pos (a_this, &init_pos); + + return status; +} + +/** + *Parses an 'unicode' escape sequence defined + *in css spec at chap 4.1.1: + *unicode ::= \\[0-9a-f]{1,6}[ \n\r\t\f]? + *@param a_this the current instance of #CRTknzr. + *@param a_start out parameter. A pointer to the start + *of the unicode escape sequence. Must *NOT* be deleted by + *the caller. + *@param a_end out parameter. A pointer to the last character + *of the unicode escape sequence. Must *NOT* be deleted by the caller. + *@return CR_OK if parsing succeeded, an error code otherwise. + *Error code can be either CR_PARSING_ERROR if the string + *parsed just doesn't + *respect the production or another error if a + *lower level error occurred. + */ +static enum CRStatus +cr_tknzr_parse_unicode_escape (CRTknzr * a_this, + guint32 * a_unicode, + CRParsingLocation *a_location) +{ + guint32 cur_char; + CRInputPos init_pos; + glong occur = 0; + guint32 unicode = 0; + guchar *tmp_char_ptr1 = NULL, + *tmp_char_ptr2 = NULL; + enum CRStatus status = CR_OK; + + g_return_val_if_fail (a_this && PRIVATE (a_this) + && a_unicode, CR_BAD_PARAM_ERROR); + + /*first, let's backup the current position pointer */ + RECORD_INITIAL_POS (a_this, &init_pos); + + READ_NEXT_CHAR (a_this, &cur_char); + + if (cur_char != '\\') { + status = CR_PARSING_ERROR; + goto error; + } + if (a_location) { + cr_tknzr_get_parsing_location + (a_this, a_location) ; + } + PEEK_NEXT_CHAR (a_this, &cur_char); + + for (occur = 0, unicode = 0; ((cur_char >= '0' && cur_char <= '9') + || (cur_char >= 'a' && cur_char <= 'f') + || (cur_char >= 'A' && cur_char <= 'F')) + && occur < 6; occur++) { + gint cur_char_val = 0; + + READ_NEXT_CHAR (a_this, &cur_char); + + if ((cur_char >= '0' && cur_char <= '9')) { + cur_char_val = (cur_char - '0'); + } else if ((cur_char >= 'a' && cur_char <= 'f')) { + cur_char_val = 10 + (cur_char - 'a'); + } else if ((cur_char >= 'A' && cur_char <= 'F')) { + cur_char_val = 10 + (cur_char - 'A'); + } + + unicode = unicode * 16 + cur_char_val; + + PEEK_NEXT_CHAR (a_this, &cur_char); + } + + /* Eat a whitespace if possible. */ + cr_tknzr_parse_w (a_this, &tmp_char_ptr1, + &tmp_char_ptr2, NULL); + *a_unicode = unicode; + return CR_OK; + + error: + /* + *restore the initial position pointer backuped at + *the beginning of this function. + */ + cr_tknzr_set_cur_pos (a_this, &init_pos); + + return status; +} + +/** + *parses an escape sequence as defined by the css spec: + *escape ::= {unicode}|\\[ -~\200-\4177777] + *@param a_this the current instance of #CRTknzr . + */ +static enum CRStatus +cr_tknzr_parse_escape (CRTknzr * a_this, guint32 * a_esc_code, + CRParsingLocation *a_location) +{ + enum CRStatus status = CR_OK; + guint32 cur_char = 0; + CRInputPos init_pos; + guchar next_chars[2]; + + g_return_val_if_fail (a_this && PRIVATE (a_this) + && a_esc_code, CR_BAD_PARAM_ERROR); + + RECORD_INITIAL_POS (a_this, &init_pos); + + PEEK_BYTE (a_this, 1, &next_chars[0]); + PEEK_BYTE (a_this, 2, &next_chars[1]); + + if (next_chars[0] != '\\') { + status = CR_PARSING_ERROR; + goto error; + } + + if ((next_chars[1] >= '0' && next_chars[1] <= '9') + || (next_chars[1] >= 'a' && next_chars[1] <= 'f') + || (next_chars[1] >= 'A' && next_chars[1] <= 'F')) { + status = cr_tknzr_parse_unicode_escape (a_this, a_esc_code, + a_location); + } else { + /*consume the '\' char */ + READ_NEXT_CHAR (a_this, &cur_char); + if (a_location) { + cr_tknzr_get_parsing_location (a_this, + a_location) ; + } + /*then read the char after the '\' */ + READ_NEXT_CHAR (a_this, &cur_char); + + if (cur_char != ' ' && (cur_char < 200 || cur_char > 4177777)) { + status = CR_PARSING_ERROR; + goto error; + } + *a_esc_code = cur_char; + + } + if (status == CR_OK) { + return CR_OK; + } + error: + cr_tknzr_set_cur_pos (a_this, &init_pos); + return status; +} + +/** + *Parses a string type as defined in css spec [4.1.1]: + * + *string ::= {string1}|{string2} + *string1 ::= \"([\t !#$%&(-~]|\\{nl}|\'|{nonascii}|{escape})*\" + *string2 ::= \'([\t !#$%&(-~]|\\{nl}|\"|{nonascii}|{escape})*\' + * + *@param a_this the current instance of #CRTknzr. + *@param a_start out parameter. Upon successful completion, + *points to the beginning of the string, points to an undefined value + *otherwise. + *@param a_end out parameter. Upon successful completion, points to + *the beginning of the string, points to an undefined value otherwise. + *@return CR_OK upon successful completion, an error code otherwise. + */ +static enum CRStatus +cr_tknzr_parse_string (CRTknzr * a_this, CRString ** a_str) +{ + guint32 cur_char = 0, + delim = 0; + CRInputPos init_pos; + enum CRStatus status = CR_OK; + CRString *str = NULL; + + g_return_val_if_fail (a_this && PRIVATE (a_this) + && PRIVATE (a_this)->input + && a_str, CR_BAD_PARAM_ERROR); + + RECORD_INITIAL_POS (a_this, &init_pos); + READ_NEXT_CHAR (a_this, &cur_char); + + if (cur_char == '"') + delim = '"'; + else if (cur_char == '\'') + delim = '\''; + else { + status = CR_PARSING_ERROR; + goto error; + } + str = cr_string_new (); + if (str) { + cr_tknzr_get_parsing_location + (a_this, &str->location) ; + } + for (;;) { + guchar next_chars[2] = { 0 }; + + PEEK_BYTE (a_this, 1, &next_chars[0]); + PEEK_BYTE (a_this, 2, &next_chars[1]); + + if (next_chars[0] == '\\') { + guchar *tmp_char_ptr1 = NULL, + *tmp_char_ptr2 = NULL; + guint32 esc_code = 0; + + if (next_chars[1] == '\'' || next_chars[1] == '"') { + g_string_append_unichar (str->stryng, + next_chars[1]); + SKIP_BYTES (a_this, 2); + status = CR_OK; + } else { + status = cr_tknzr_parse_escape + (a_this, &esc_code, NULL); + + if (status == CR_OK) { + g_string_append_unichar + (str->stryng, + esc_code); + } + } + + if (status != CR_OK) { + /* + *consume the '\' char, and try to parse + *a newline. + */ + READ_NEXT_CHAR (a_this, &cur_char); + + status = cr_tknzr_parse_nl + (a_this, &tmp_char_ptr1, + &tmp_char_ptr2, NULL); + } + + CHECK_PARSING_STATUS (status, FALSE); + } else if (strchr ("\t !#$%&", next_chars[0]) + || (next_chars[0] >= '(' && next_chars[0] <= '~')) { + READ_NEXT_CHAR (a_this, &cur_char); + g_string_append_unichar (str->stryng, + cur_char); + status = CR_OK; + } + + else if (cr_utils_is_nonascii (next_chars[0])) { + READ_NEXT_CHAR (a_this, &cur_char); + g_string_append_unichar (str->stryng, cur_char); + } else if (next_chars[0] == delim) { + READ_NEXT_CHAR (a_this, &cur_char); + break; + } else { + status = CR_PARSING_ERROR; + goto error; + } + } + + if (status == CR_OK) { + if (*a_str == NULL) { + *a_str = str; + str = NULL; + } else { + (*a_str)->stryng = g_string_append_len + ((*a_str)->stryng, + str->stryng->str, + str->stryng->len); + cr_string_destroy (str); + } + return CR_OK; + } + + error: + + if (str) { + cr_string_destroy (str) ; + str = NULL; + } + cr_tknzr_set_cur_pos (a_this, &init_pos); + return status; +} + +/** + *Parses the an nmstart as defined by the css2 spec [4.1.1]: + * nmstart [a-zA-Z]|{nonascii}|{escape} + * + *@param a_this the current instance of #CRTknzr. + *@param a_start out param. A pointer to the starting point of + *the token. + *@param a_end out param. A pointer to the ending point of the + *token. + *@param a_char out param. The actual parsed nmchar. + *@return CR_OK upon successful completion, + *an error code otherwise. + */ +static enum CRStatus +cr_tknzr_parse_nmstart (CRTknzr * a_this, + guint32 * a_char, + CRParsingLocation *a_location) +{ + CRInputPos init_pos; + enum CRStatus status = CR_OK; + guint32 cur_char = 0, + next_char = 0; + + g_return_val_if_fail (a_this && PRIVATE (a_this) + && PRIVATE (a_this)->input + && a_char, CR_BAD_PARAM_ERROR); + + RECORD_INITIAL_POS (a_this, &init_pos); + + PEEK_NEXT_CHAR (a_this, &next_char); + + if (next_char == '\\') { + status = cr_tknzr_parse_escape (a_this, a_char, + a_location); + + if (status != CR_OK) + goto error; + + } else if (cr_utils_is_nonascii (next_char) == TRUE + || ((next_char >= 'a') && (next_char <= 'z')) + || ((next_char >= 'A') && (next_char <= 'Z')) + ) { + READ_NEXT_CHAR (a_this, &cur_char); + if (a_location) { + cr_tknzr_get_parsing_location (a_this, + a_location) ; + } + *a_char = cur_char; + status = CR_OK; + } else { + status = CR_PARSING_ERROR; + goto error; + } + + return CR_OK; + + error: + cr_tknzr_set_cur_pos (a_this, &init_pos); + + return status; + +} + +/** + *Parses an nmchar as described in the css spec at + *chap 4.1.1: + *nmchar ::= [a-z0-9-]|{nonascii}|{escape} + * + *Humm, I have added the possibility for nmchar to + *contain upper case letters. + * + *@param a_this the current instance of #CRTknzr. + *@param a_start out param. A pointer to the starting point of + *the token. + *@param a_end out param. A pointer to the ending point of the + *token. + *@param a_char out param. The actual parsed nmchar. + *@return CR_OK upon successful completion, + *an error code otherwise. + */ +static enum CRStatus +cr_tknzr_parse_nmchar (CRTknzr * a_this, guint32 * a_char, + CRParsingLocation *a_location) +{ + guint32 cur_char = 0, + next_char = 0; + enum CRStatus status = CR_OK; + CRInputPos init_pos; + + g_return_val_if_fail (a_this && PRIVATE (a_this) && a_char, + CR_BAD_PARAM_ERROR); + + RECORD_INITIAL_POS (a_this, &init_pos); + + status = cr_input_peek_char (PRIVATE (a_this)->input, + &next_char) ; + if (status != CR_OK) + goto error; + + if (next_char == '\\') { + status = cr_tknzr_parse_escape (a_this, a_char, + a_location); + + if (status != CR_OK) + goto error; + + } else if (cr_utils_is_nonascii (next_char) == TRUE + || ((next_char >= 'a') && (next_char <= 'z')) + || ((next_char >= 'A') && (next_char <= 'Z')) + || ((next_char >= '0') && (next_char <= '9')) + || (next_char == '-') + || (next_char == '_') /*'_' not allowed by the spec. */ + ) { + READ_NEXT_CHAR (a_this, &cur_char); + *a_char = cur_char; + status = CR_OK; + if (a_location) { + cr_tknzr_get_parsing_location + (a_this, a_location) ; + } + } else { + status = CR_PARSING_ERROR; + goto error; + } + return CR_OK; + + error: + cr_tknzr_set_cur_pos (a_this, &init_pos); + return status; +} + +/** + *Parses an "ident" as defined in css spec [4.1.1]: + *ident ::= {nmstart}{nmchar}* + * + *Actually parses it using the css3 grammar: + *ident ::= -?{nmstart}{nmchar}* + *@param a_this the currens instance of #CRTknzr. + * + *@param a_str a pointer to parsed ident. If *a_str is NULL, + *this function allocates a new instance of CRString. If not, + *the function just appends the parsed string to the one passed. + *In both cases it is up to the caller to free *a_str. + * + *@return CR_OK upon successful completion, an error code + *otherwise. + */ +static enum CRStatus +cr_tknzr_parse_ident (CRTknzr * a_this, CRString ** a_str) +{ + guint32 tmp_char = 0; + CRString *stringue = NULL ; + CRInputPos init_pos; + enum CRStatus status = CR_OK; + gboolean location_is_set = FALSE ; + + g_return_val_if_fail (a_this && PRIVATE (a_this) + && PRIVATE (a_this)->input + && a_str, CR_BAD_PARAM_ERROR); + + RECORD_INITIAL_POS (a_this, &init_pos); + PEEK_NEXT_CHAR (a_this, &tmp_char) ; + stringue = cr_string_new () ; + g_return_val_if_fail (stringue, + CR_OUT_OF_MEMORY_ERROR) ; + + if (tmp_char == '-') { + READ_NEXT_CHAR (a_this, &tmp_char) ; + cr_tknzr_get_parsing_location + (a_this, &stringue->location) ; + location_is_set = TRUE ; + g_string_append_unichar (stringue->stryng, + tmp_char) ; + } + status = cr_tknzr_parse_nmstart (a_this, &tmp_char, NULL); + if (status != CR_OK) { + status = CR_PARSING_ERROR; + goto end ; + } + if (location_is_set == FALSE) { + cr_tknzr_get_parsing_location + (a_this, &stringue->location) ; + location_is_set = TRUE ; + } + g_string_append_unichar (stringue->stryng, tmp_char); + for (;;) { + status = cr_tknzr_parse_nmchar (a_this, + &tmp_char, + NULL); + if (status != CR_OK) { + status = CR_OK ; + break; + } + g_string_append_unichar (stringue->stryng, tmp_char); + } + if (status == CR_OK) { + if (!*a_str) { + *a_str = stringue ; + + } else { + g_string_append_len ((*a_str)->stryng, + stringue->stryng->str, + stringue->stryng->len) ; + cr_string_destroy (stringue) ; + } + stringue = NULL ; + } + + error: + end: + if (stringue) { + cr_string_destroy (stringue) ; + stringue = NULL ; + } + if (status != CR_OK ) { + cr_tknzr_set_cur_pos (a_this, &init_pos) ; + } + return status ; +} + + +/** + *Parses a "name" as defined by css spec [4.1.1]: + *name ::= {nmchar}+ + * + *@param a_this the current instance of #CRTknzr. + * + *@param a_str out parameter. A pointer to the successfully parsed + *name. If *a_str is set to NULL, this function allocates a new instance + *of CRString. If not, it just appends the parsed name to the passed *a_str. + *In both cases, it is up to the caller to free *a_str. + * + *@return CR_OK upon successful completion, an error code otherwise. + */ +static enum CRStatus +cr_tknzr_parse_name (CRTknzr * a_this, + CRString ** a_str) +{ + guint32 tmp_char = 0; + CRInputPos init_pos; + enum CRStatus status = CR_OK; + gboolean str_needs_free = FALSE, + is_first_nmchar=TRUE ; + glong i = 0; + CRParsingLocation loc = {0} ; + + g_return_val_if_fail (a_this && PRIVATE (a_this) + && PRIVATE (a_this)->input + && a_str, + CR_BAD_PARAM_ERROR) ; + + RECORD_INITIAL_POS (a_this, &init_pos); + + if (*a_str == NULL) { + *a_str = cr_string_new (); + str_needs_free = TRUE; + } + for (i = 0;; i++) { + if (is_first_nmchar == TRUE) { + status = cr_tknzr_parse_nmchar + (a_this, &tmp_char, + &loc) ; + is_first_nmchar = FALSE ; + } else { + status = cr_tknzr_parse_nmchar + (a_this, &tmp_char, NULL) ; + } + if (status != CR_OK) + break; + g_string_append_unichar ((*a_str)->stryng, + tmp_char); + } + if (i > 0) { + cr_parsing_location_copy + (&(*a_str)->location, &loc) ; + return CR_OK; + } + if (str_needs_free == TRUE && *a_str) { + cr_string_destroy (*a_str); + *a_str = NULL; + } + cr_tknzr_set_cur_pos (a_this, &init_pos); + return CR_PARSING_ERROR; +} + +/** + *Parses a "hash" as defined by the css spec in [4.1.1]: + *HASH ::= #{name} + */ +static enum CRStatus +cr_tknzr_parse_hash (CRTknzr * a_this, CRString ** a_str) +{ + guint32 cur_char = 0; + CRInputPos init_pos; + enum CRStatus status = CR_OK; + gboolean str_needs_free = FALSE; + CRParsingLocation loc = {0} ; + + g_return_val_if_fail (a_this && PRIVATE (a_this) + && PRIVATE (a_this)->input, + CR_BAD_PARAM_ERROR); + + RECORD_INITIAL_POS (a_this, &init_pos); + READ_NEXT_CHAR (a_this, &cur_char); + if (cur_char != '#') { + status = CR_PARSING_ERROR; + goto error; + } + if (*a_str == NULL) { + *a_str = cr_string_new (); + str_needs_free = TRUE; + } + cr_tknzr_get_parsing_location (a_this, + &loc) ; + status = cr_tknzr_parse_name (a_this, a_str); + cr_parsing_location_copy (&(*a_str)->location, &loc) ; + if (status != CR_OK) { + goto error; + } + return CR_OK; + + error: + if (str_needs_free == TRUE && *a_str) { + cr_string_destroy (*a_str); + *a_str = NULL; + } + + cr_tknzr_set_cur_pos (a_this, &init_pos); + return status; +} + +/** + *Parses an uri as defined by the css spec [4.1.1]: + * URI ::= url\({w}{string}{w}\) + * |url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}\) + * + *@param a_this the current instance of #CRTknzr. + *@param a_str the successfully parsed url. + *@return CR_OK upon successful completion, an error code otherwise. + */ +static enum CRStatus +cr_tknzr_parse_uri (CRTknzr * a_this, + CRString ** a_str) +{ + guint32 cur_char = 0; + CRInputPos init_pos; + enum CRStatus status = CR_PARSING_ERROR; + guchar tab[4] = { 0 }, *tmp_ptr1 = NULL, *tmp_ptr2 = NULL; + CRString *str = NULL; + CRParsingLocation location = {0} ; + + g_return_val_if_fail (a_this + && PRIVATE (a_this) + && PRIVATE (a_this)->input + && a_str, + CR_BAD_PARAM_ERROR); + + RECORD_INITIAL_POS (a_this, &init_pos); + + PEEK_BYTE (a_this, 1, &tab[0]); + PEEK_BYTE (a_this, 2, &tab[1]); + PEEK_BYTE (a_this, 3, &tab[2]); + PEEK_BYTE (a_this, 4, &tab[3]); + + if (tab[0] != 'u' || tab[1] != 'r' || tab[2] != 'l' || tab[3] != '(') { + status = CR_PARSING_ERROR; + goto error; + } + /* + *Here, we want to skip 4 bytes ('u''r''l''('). + *But we also need to keep track of the parsing location + *of the 'u'. So, we skip 1 byte, we record the parsing + *location, then we skip the 3 remaining bytes. + */ + SKIP_CHARS (a_this, 1); + cr_tknzr_get_parsing_location (a_this, &location) ; + SKIP_CHARS (a_this, 3); + cr_tknzr_try_to_skip_spaces (a_this); + status = cr_tknzr_parse_string (a_this, a_str); + + if (status == CR_OK) { + guint32 next_char = 0; + status = cr_tknzr_parse_w (a_this, &tmp_ptr1, + &tmp_ptr2, NULL); + cr_tknzr_try_to_skip_spaces (a_this); + PEEK_NEXT_CHAR (a_this, &next_char); + if (next_char == ')') { + READ_NEXT_CHAR (a_this, &cur_char); + status = CR_OK; + } else { + status = CR_PARSING_ERROR; + } + } + if (status != CR_OK) { + str = cr_string_new (); + for (;;) { + guint32 next_char = 0; + PEEK_NEXT_CHAR (a_this, &next_char); + if (strchr ("!#$%&", next_char) + || (next_char >= '*' && next_char <= '~') + || (cr_utils_is_nonascii (next_char) == TRUE)) { + READ_NEXT_CHAR (a_this, &cur_char); + g_string_append_unichar + (str->stryng, cur_char); + status = CR_OK; + } else { + guint32 esc_code = 0; + status = cr_tknzr_parse_escape + (a_this, &esc_code, NULL); + if (status == CR_OK) { + g_string_append_unichar + (str->stryng, + esc_code); + } else { + status = CR_OK; + break; + } + } + } + cr_tknzr_try_to_skip_spaces (a_this); + READ_NEXT_CHAR (a_this, &cur_char); + if (cur_char == ')') { + status = CR_OK; + } else { + status = CR_PARSING_ERROR; + goto error; + } + if (str) { + if (*a_str == NULL) { + *a_str = str; + str = NULL; + } else { + g_string_append_len + ((*a_str)->stryng, + str->stryng->str, + str->stryng->len); + cr_string_destroy (str); + } + } + } + + cr_parsing_location_copy + (&(*a_str)->location, + &location) ; + return CR_OK ; + error: + if (str) { + cr_string_destroy (str); + str = NULL; + } + cr_tknzr_set_cur_pos (a_this, &init_pos); + return status; +} + +/** + *parses an RGB as defined in the css2 spec. + *rgb: rgb '('S*{num}%?S* ',' {num}#?S*,S*{num}#?S*')' + * + *@param a_this the "this pointer" of the current instance of + *@param a_rgb out parameter the parsed rgb. + *@return CR_OK upon successful completion, an error code otherwise. + */ +static enum CRStatus +cr_tknzr_parse_rgb (CRTknzr * a_this, CRRgb ** a_rgb) +{ + enum CRStatus status = CR_OK; + CRInputPos init_pos; + CRNum *num = NULL; + guchar next_bytes[3] = { 0 }, cur_byte = 0; + glong red = 0, + green = 0, + blue = 0, + i = 0; + gboolean is_percentage = FALSE; + CRParsingLocation location = {0} ; + + g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR); + + RECORD_INITIAL_POS (a_this, &init_pos); + + PEEK_BYTE (a_this, 1, &next_bytes[0]); + PEEK_BYTE (a_this, 2, &next_bytes[1]); + PEEK_BYTE (a_this, 3, &next_bytes[2]); + + if (((next_bytes[0] == 'r') || (next_bytes[0] == 'R')) + && ((next_bytes[1] == 'g') || (next_bytes[1] == 'G')) + && ((next_bytes[2] == 'b') || (next_bytes[2] == 'B'))) { + SKIP_CHARS (a_this, 1); + cr_tknzr_get_parsing_location (a_this, &location) ; + SKIP_CHARS (a_this, 2); + } else { + status = CR_PARSING_ERROR; + goto error; + } + READ_NEXT_BYTE (a_this, &cur_byte); + ENSURE_PARSING_COND (cur_byte == '('); + + cr_tknzr_try_to_skip_spaces (a_this); + status = cr_tknzr_parse_num (a_this, &num); + ENSURE_PARSING_COND ((status == CR_OK) && (num != NULL)); + + if (num->val > G_MAXLONG) { + status = CR_PARSING_ERROR; + goto error; + } + + red = num->val; + cr_num_destroy (num); + num = NULL; + + PEEK_BYTE (a_this, 1, &next_bytes[0]); + if (next_bytes[0] == '%') { + SKIP_CHARS (a_this, 1); + is_percentage = TRUE; + } + cr_tknzr_try_to_skip_spaces (a_this); + + for (i = 0; i < 2; i++) { + READ_NEXT_BYTE (a_this, &cur_byte); + ENSURE_PARSING_COND (cur_byte == ','); + + cr_tknzr_try_to_skip_spaces (a_this); + status = cr_tknzr_parse_num (a_this, &num); + ENSURE_PARSING_COND ((status == CR_OK) && (num != NULL)); + + if (num->val > G_MAXLONG) { + status = CR_PARSING_ERROR; + goto error; + } + + PEEK_BYTE (a_this, 1, &next_bytes[0]); + if (next_bytes[0] == '%') { + SKIP_CHARS (a_this, 1); + is_percentage = 1; + } + + if (i == 0) { + green = num->val; + } else if (i == 1) { + blue = num->val; + } + + if (num) { + cr_num_destroy (num); + num = NULL; + } + cr_tknzr_try_to_skip_spaces (a_this); + } + + READ_NEXT_BYTE (a_this, &cur_byte); + if (*a_rgb == NULL) { + *a_rgb = cr_rgb_new_with_vals (red, green, blue, + is_percentage); + + if (*a_rgb == NULL) { + status = CR_ERROR; + goto error; + } + status = CR_OK; + } else { + (*a_rgb)->red = red; + (*a_rgb)->green = green; + (*a_rgb)->blue = blue; + (*a_rgb)->is_percentage = is_percentage; + + status = CR_OK; + } + + if (status == CR_OK) { + if (a_rgb && *a_rgb) { + cr_parsing_location_copy + (&(*a_rgb)->location, + &location) ; + } + return CR_OK; + } + + error: + if (num) { + cr_num_destroy (num); + num = NULL; + } + + cr_tknzr_set_cur_pos (a_this, &init_pos); + return CR_OK; +} + +/** + *Parses a atkeyword as defined by the css spec in [4.1.1]: + *ATKEYWORD ::= @{ident} + * + *@param a_this the "this pointer" of the current instance of + *#CRTknzr. + * + *@param a_str out parameter. The parsed atkeyword. If *a_str is + *set to NULL this function allocates a new instance of CRString and + *sets it to the parsed atkeyword. If not, this function just appends + *the parsed atkeyword to the end of *a_str. In both cases it is up to + *the caller to free *a_str. + * + *@return CR_OK upon successful completion, an error code otherwise. + */ +static enum CRStatus +cr_tknzr_parse_atkeyword (CRTknzr * a_this, + CRString ** a_str) +{ + guint32 cur_char = 0; + CRInputPos init_pos; + gboolean str_needs_free = FALSE; + enum CRStatus status = CR_OK; + + g_return_val_if_fail (a_this && PRIVATE (a_this) + && PRIVATE (a_this)->input + && a_str, CR_BAD_PARAM_ERROR); + + RECORD_INITIAL_POS (a_this, &init_pos); + + READ_NEXT_CHAR (a_this, &cur_char); + + if (cur_char != '@') { + status = CR_PARSING_ERROR; + goto error; + } + + if (*a_str == NULL) { + *a_str = cr_string_new (); + str_needs_free = TRUE; + } + status = cr_tknzr_parse_ident (a_this, a_str); + if (status != CR_OK) { + goto error; + } + return CR_OK; + error: + + if (str_needs_free == TRUE && *a_str) { + cr_string_destroy (*a_str); + *a_str = NULL; + } + cr_tknzr_set_cur_pos (a_this, &init_pos); + return status; +} + +static enum CRStatus +cr_tknzr_parse_important (CRTknzr * a_this, + CRParsingLocation *a_location) +{ + guint32 cur_char = 0; + CRInputPos init_pos; + enum CRStatus status = CR_OK; + + g_return_val_if_fail (a_this && PRIVATE (a_this) + && PRIVATE (a_this)->input, + CR_BAD_PARAM_ERROR); + + RECORD_INITIAL_POS (a_this, &init_pos); + READ_NEXT_CHAR (a_this, &cur_char); + ENSURE_PARSING_COND (cur_char == '!'); + if (a_location) { + cr_tknzr_get_parsing_location (a_this, + a_location) ; + } + cr_tknzr_try_to_skip_spaces (a_this); + + if (BYTE (PRIVATE (a_this)->input, 1, NULL) == 'i' + && BYTE (PRIVATE (a_this)->input, 2, NULL) == 'm' + && BYTE (PRIVATE (a_this)->input, 3, NULL) == 'p' + && BYTE (PRIVATE (a_this)->input, 4, NULL) == 'o' + && BYTE (PRIVATE (a_this)->input, 5, NULL) == 'r' + && BYTE (PRIVATE (a_this)->input, 6, NULL) == 't' + && BYTE (PRIVATE (a_this)->input, 7, NULL) == 'a' + && BYTE (PRIVATE (a_this)->input, 8, NULL) == 'n' + && BYTE (PRIVATE (a_this)->input, 9, NULL) == 't') { + SKIP_BYTES (a_this, 9); + if (a_location) { + cr_tknzr_get_parsing_location (a_this, + a_location) ; + } + return CR_OK; + } else { + status = CR_PARSING_ERROR; + } + + error: + cr_tknzr_set_cur_pos (a_this, &init_pos); + + return status; +} + +/** + *Parses a num as defined in the css spec [4.1.1]: + *[0-9]+|[0-9]*\.[0-9]+ + *@param a_this the current instance of #CRTknzr. + *@param a_num out parameter. The parsed number. + *@return CR_OK upon successful completion, + *an error code otherwise. + * + *The CSS specification says that numbers may be + *preceded by '+' or '-' to indicate the sign. + *Technically, the "num" construction as defined + *by the tokenizer doesn't allow this, but we parse + *it here for simplicity. + */ +static enum CRStatus +cr_tknzr_parse_num (CRTknzr * a_this, + CRNum ** a_num) +{ + enum CRStatus status = CR_PARSING_ERROR; + enum CRNumType val_type = NUM_GENERIC; + gboolean parsing_dec, /* true iff seen decimal point. */ + parsed; /* true iff the substring seen so far is a valid CSS + number, i.e. `[0-9]+|[0-9]*\.[0-9]+'. */ + guint32 cur_char = 0, + next_char = 0; + gdouble numerator, denominator = 1; + CRInputPos init_pos; + CRParsingLocation location = {0} ; + int sign = 1; + + g_return_val_if_fail (a_this && PRIVATE (a_this) + && PRIVATE (a_this)->input, + CR_BAD_PARAM_ERROR); + + RECORD_INITIAL_POS (a_this, &init_pos); + READ_NEXT_CHAR (a_this, &cur_char); + + if (cur_char == '+' || cur_char == '-') { + if (cur_char == '-') { + sign = -1; + } + READ_NEXT_CHAR (a_this, &cur_char); + } + + if (IS_NUM (cur_char)) { + numerator = (cur_char - '0'); + parsing_dec = FALSE; + parsed = TRUE; + } else if (cur_char == '.') { + numerator = 0; + parsing_dec = TRUE; + parsed = FALSE; + } else { + status = CR_PARSING_ERROR; + goto error; + } + cr_tknzr_get_parsing_location (a_this, &location) ; + + for (;;) { + status = cr_tknzr_peek_char (a_this, &next_char); + if (status != CR_OK) { + if (status == CR_END_OF_INPUT_ERROR) + status = CR_OK; + break; + } + if (next_char == '.') { + if (parsing_dec) { + status = CR_PARSING_ERROR; + goto error; + } + + READ_NEXT_CHAR (a_this, &cur_char); + parsing_dec = TRUE; + parsed = FALSE; /* In CSS, there must be at least + one digit after `.'. */ + } else if (IS_NUM (next_char)) { + READ_NEXT_CHAR (a_this, &cur_char); + parsed = TRUE; + + numerator = numerator * 10 + (cur_char - '0'); + if (parsing_dec) { + denominator *= 10; + } + } else { + break; + } + } + + if (!parsed) { + status = CR_PARSING_ERROR; + } + + /* + *Now, set the output param values. + */ + if (status == CR_OK) { + gdouble val = (numerator / denominator) * sign; + if (*a_num == NULL) { + *a_num = cr_num_new_with_val (val, val_type); + + if (*a_num == NULL) { + status = CR_ERROR; + goto error; + } + } else { + (*a_num)->val = val; + (*a_num)->type = val_type; + } + cr_parsing_location_copy (&(*a_num)->location, + &location) ; + return CR_OK; + } + + error: + + cr_tknzr_set_cur_pos (a_this, &init_pos); + + return status; +} + +/********************************************* + *PUBLIC methods + ********************************************/ + +CRTknzr * +cr_tknzr_new (CRInput * a_input) +{ + CRTknzr *result = NULL; + + result = g_try_malloc (sizeof (CRTknzr)); + + if (result == NULL) { + cr_utils_trace_info ("Out of memory"); + return NULL; + } + + memset (result, 0, sizeof (CRTknzr)); + + result->priv = g_try_malloc (sizeof (CRTknzrPriv)); + + if (result->priv == NULL) { + cr_utils_trace_info ("Out of memory"); + + if (result) { + g_free (result); + result = NULL; + } + + return NULL; + } + memset (result->priv, 0, sizeof (CRTknzrPriv)); + if (a_input) + cr_tknzr_set_input (result, a_input); + return result; +} + +CRTknzr * +cr_tknzr_new_from_buf (guchar * a_buf, gulong a_len, + enum CREncoding a_enc, + gboolean a_free_at_destroy) +{ + CRTknzr *result = NULL; + CRInput *input = NULL; + + input = cr_input_new_from_buf (a_buf, a_len, a_enc, + a_free_at_destroy); + + g_return_val_if_fail (input != NULL, NULL); + + result = cr_tknzr_new (input); + + return result; +} + +CRTknzr * +cr_tknzr_new_from_uri (const guchar * a_file_uri, + enum CREncoding a_enc) +{ + CRTknzr *result = NULL; + CRInput *input = NULL; + + input = cr_input_new_from_uri ((const gchar *) a_file_uri, a_enc); + g_return_val_if_fail (input != NULL, NULL); + + result = cr_tknzr_new (input); + + return result; +} + +void +cr_tknzr_ref (CRTknzr * a_this) +{ + g_return_if_fail (a_this && PRIVATE (a_this)); + + PRIVATE (a_this)->ref_count++; +} + +gboolean +cr_tknzr_unref (CRTknzr * a_this) +{ + g_return_val_if_fail (a_this && PRIVATE (a_this), FALSE); + + if (PRIVATE (a_this)->ref_count > 0) { + PRIVATE (a_this)->ref_count--; + } + + if (PRIVATE (a_this)->ref_count == 0) { + cr_tknzr_destroy (a_this); + return TRUE; + } + + return FALSE; +} + +enum CRStatus +cr_tknzr_set_input (CRTknzr * a_this, CRInput * a_input) +{ + g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR); + + if (PRIVATE (a_this)->input) { + cr_input_unref (PRIVATE (a_this)->input); + } + + PRIVATE (a_this)->input = a_input; + + cr_input_ref (PRIVATE (a_this)->input); + + return CR_OK; +} + +enum CRStatus +cr_tknzr_get_input (CRTknzr * a_this, CRInput ** a_input) +{ + g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR); + + *a_input = PRIVATE (a_this)->input; + + return CR_OK; +} + +/********************************* + *Tokenizer input handling routines + *********************************/ + +/** + *Reads the next byte from the parser input stream. + *@param a_this the "this pointer" of the current instance of + *#CRParser. + *@param a_byte out parameter the place where to store the byte + *read. + *@return CR_OK upon successful completion, an error + *code otherwise. + */ +enum CRStatus +cr_tknzr_read_byte (CRTknzr * a_this, guchar * a_byte) +{ + g_return_val_if_fail (a_this && PRIVATE (a_this), CR_BAD_PARAM_ERROR); + + return cr_input_read_byte (PRIVATE (a_this)->input, a_byte); + +} + +/** + *Reads the next char from the parser input stream. + *@param a_this the current instance of #CRTknzr. + *@param a_char out parameter. The read char. + *@return CR_OK upon successful completion, an error code + *otherwise. + */ +enum CRStatus +cr_tknzr_read_char (CRTknzr * a_this, guint32 * a_char) +{ + g_return_val_if_fail (a_this && PRIVATE (a_this) + && PRIVATE (a_this)->input + && a_char, CR_BAD_PARAM_ERROR); + + if (PRIVATE (a_this)->token_cache) { + cr_input_set_cur_pos (PRIVATE (a_this)->input, + &PRIVATE (a_this)->prev_pos); + cr_token_destroy (PRIVATE (a_this)->token_cache); + PRIVATE (a_this)->token_cache = NULL; + } + + return cr_input_read_char (PRIVATE (a_this)->input, a_char); +} + +/** + *Peeks a char from the parser input stream. + *To "peek a char" means reads the next char without consuming it. + *Subsequent calls to this function return the same char. + *@param a_this the current instance of #CRTknzr. + *@param a_char out parameter. The peeked char upon successful completion. + *@return CR_OK upon successful completion, an error code otherwise. + */ +enum CRStatus +cr_tknzr_peek_char (CRTknzr * a_this, guint32 * a_char) +{ + g_return_val_if_fail (a_this && PRIVATE (a_this) + && PRIVATE (a_this)->input + && a_char, CR_BAD_PARAM_ERROR); + + if (PRIVATE (a_this)->token_cache) { + cr_input_set_cur_pos (PRIVATE (a_this)->input, + &PRIVATE (a_this)->prev_pos); + cr_token_destroy (PRIVATE (a_this)->token_cache); + PRIVATE (a_this)->token_cache = NULL; + } + + return cr_input_peek_char (PRIVATE (a_this)->input, a_char); +} + +/** + *Peeks a byte ahead at a given position in the parser input stream. + *@param a_this the current instance of #CRTknzr. + *@param a_offset the offset of the peeked byte starting from the current + *byte in the parser input stream. + *@param a_byte out parameter. The peeked byte upon + *successful completion. + *@return CR_OK upon successful completion, an error code otherwise. + */ +enum CRStatus +cr_tknzr_peek_byte (CRTknzr * a_this, gulong a_offset, guchar * a_byte) +{ + g_return_val_if_fail (a_this && PRIVATE (a_this) + && PRIVATE (a_this)->input && a_byte, + CR_BAD_PARAM_ERROR); + + if (PRIVATE (a_this)->token_cache) { + cr_input_set_cur_pos (PRIVATE (a_this)->input, + &PRIVATE (a_this)->prev_pos); + cr_token_destroy (PRIVATE (a_this)->token_cache); + PRIVATE (a_this)->token_cache = NULL; + } + + return cr_input_peek_byte (PRIVATE (a_this)->input, + CR_SEEK_CUR, a_offset, a_byte); +} + +/** + *Same as cr_tknzr_peek_byte() but this api returns the byte peeked. + *@param a_this the current instance of #CRTknzr. + *@param a_offset the offset of the peeked byte starting from the current + *byte in the parser input stream. + *@param a_eof out parameter. If not NULL, is set to TRUE if we reached end of + *file, FALE otherwise. If the caller sets it to NULL, this parameter + *is just ignored. + *@return the peeked byte. + */ +guchar +cr_tknzr_peek_byte2 (CRTknzr * a_this, gulong a_offset, gboolean * a_eof) +{ + g_return_val_if_fail (a_this && PRIVATE (a_this) + && PRIVATE (a_this)->input, 0); + + return cr_input_peek_byte2 (PRIVATE (a_this)->input, a_offset, a_eof); +} + +/** + *Gets the number of bytes left in the topmost input stream + *associated to this parser. + *@param a_this the current instance of #CRTknzr + *@return the number of bytes left or -1 in case of error. + */ +glong +cr_tknzr_get_nb_bytes_left (CRTknzr * a_this) +{ + g_return_val_if_fail (a_this && PRIVATE (a_this) + && PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR); + + if (PRIVATE (a_this)->token_cache) { + cr_input_set_cur_pos (PRIVATE (a_this)->input, + &PRIVATE (a_this)->prev_pos); + cr_token_destroy (PRIVATE (a_this)->token_cache); + PRIVATE (a_this)->token_cache = NULL; + } + + return cr_input_get_nb_bytes_left (PRIVATE (a_this)->input); +} + +enum CRStatus +cr_tknzr_get_cur_pos (CRTknzr * a_this, CRInputPos * a_pos) +{ + g_return_val_if_fail (a_this && PRIVATE (a_this) + && PRIVATE (a_this)->input + && a_pos, CR_BAD_PARAM_ERROR); + + if (PRIVATE (a_this)->token_cache) { + cr_input_set_cur_pos (PRIVATE (a_this)->input, + &PRIVATE (a_this)->prev_pos); + cr_token_destroy (PRIVATE (a_this)->token_cache); + PRIVATE (a_this)->token_cache = NULL; + } + + return cr_input_get_cur_pos (PRIVATE (a_this)->input, a_pos); +} + +enum CRStatus +cr_tknzr_get_parsing_location (CRTknzr *a_this, + CRParsingLocation *a_loc) +{ + g_return_val_if_fail (a_this + && PRIVATE (a_this) + && a_loc, + CR_BAD_PARAM_ERROR) ; + + return cr_input_get_parsing_location + (PRIVATE (a_this)->input, a_loc) ; +} + +enum CRStatus +cr_tknzr_get_cur_byte_addr (CRTknzr * a_this, guchar ** a_addr) +{ + g_return_val_if_fail (a_this && PRIVATE (a_this) + && PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR); + if (PRIVATE (a_this)->token_cache) { + cr_input_set_cur_pos (PRIVATE (a_this)->input, + &PRIVATE (a_this)->prev_pos); + cr_token_destroy (PRIVATE (a_this)->token_cache); + PRIVATE (a_this)->token_cache = NULL; + } + + return cr_input_get_cur_byte_addr (PRIVATE (a_this)->input, a_addr); +} + +enum CRStatus +cr_tknzr_seek_index (CRTknzr * a_this, enum CRSeekPos a_origin, gint a_pos) +{ + g_return_val_if_fail (a_this && PRIVATE (a_this) + && PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR); + + if (PRIVATE (a_this)->token_cache) { + cr_input_set_cur_pos (PRIVATE (a_this)->input, + &PRIVATE (a_this)->prev_pos); + cr_token_destroy (PRIVATE (a_this)->token_cache); + PRIVATE (a_this)->token_cache = NULL; + } + + return cr_input_seek_index (PRIVATE (a_this)->input, a_origin, a_pos); +} + +enum CRStatus +cr_tknzr_consume_chars (CRTknzr * a_this, guint32 a_char, glong * a_nb_char) +{ + gulong consumed = *(gulong *) a_nb_char; + enum CRStatus status; + g_return_val_if_fail (a_this && PRIVATE (a_this) + && PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR); + + if (PRIVATE (a_this)->token_cache) { + cr_input_set_cur_pos (PRIVATE (a_this)->input, + &PRIVATE (a_this)->prev_pos); + cr_token_destroy (PRIVATE (a_this)->token_cache); + PRIVATE (a_this)->token_cache = NULL; + } + + status = cr_input_consume_chars (PRIVATE (a_this)->input, + a_char, &consumed); + *a_nb_char = (glong) consumed; + return status; +} + +enum CRStatus +cr_tknzr_set_cur_pos (CRTknzr * a_this, CRInputPos * a_pos) +{ + g_return_val_if_fail (a_this && PRIVATE (a_this) + && PRIVATE (a_this)->input, CR_BAD_PARAM_ERROR); + + if (PRIVATE (a_this)->token_cache) { + cr_token_destroy (PRIVATE (a_this)->token_cache); + PRIVATE (a_this)->token_cache = NULL; + } + + return cr_input_set_cur_pos (PRIVATE (a_this)->input, a_pos); +} + +enum CRStatus +cr_tknzr_unget_token (CRTknzr * a_this, CRToken * a_token) +{ + g_return_val_if_fail (a_this && PRIVATE (a_this) + && PRIVATE (a_this)->token_cache == NULL, + CR_BAD_PARAM_ERROR); + + PRIVATE (a_this)->token_cache = a_token; + + return CR_OK; +} + +/** + *Returns the next token of the input stream. + *This method is really central. Each parsing + *method calls it. + *@param a_this the current tokenizer. + *@param a_tk out parameter. The returned token. + *for the sake of mem leak avoidance, *a_tk must + *be NULL. + *@param CR_OK upon successful completion, an error code + *otherwise. + */ +enum CRStatus +cr_tknzr_get_next_token (CRTknzr * a_this, CRToken ** a_tk) +{ + enum CRStatus status = CR_OK; + CRToken *token = NULL; + CRInputPos init_pos; + guint32 next_char = 0; + guchar next_bytes[4] = { 0 }; + gboolean reached_eof = FALSE; + CRInput *input = NULL; + CRString *str = NULL; + CRRgb *rgb = NULL; + CRParsingLocation location = {0} ; + + g_return_val_if_fail (a_this && PRIVATE (a_this) + && a_tk && *a_tk == NULL + && PRIVATE (a_this)->input, + CR_BAD_PARAM_ERROR); + + if (PRIVATE (a_this)->token_cache) { + *a_tk = PRIVATE (a_this)->token_cache; + PRIVATE (a_this)->token_cache = NULL; + return CR_OK; + } + + RECORD_INITIAL_POS (a_this, &init_pos); + + status = cr_input_get_end_of_file + (PRIVATE (a_this)->input, &reached_eof); + ENSURE_PARSING_COND (status == CR_OK); + + if (reached_eof == TRUE) { + status = CR_END_OF_INPUT_ERROR; + goto error; + } + + input = PRIVATE (a_this)->input; + + PEEK_NEXT_CHAR (a_this, &next_char); + token = cr_token_new (); + ENSURE_PARSING_COND (token); + + switch (next_char) { + case '@': + { + if (BYTE (input, 2, NULL) == 'f' + && BYTE (input, 3, NULL) == 'o' + && BYTE (input, 4, NULL) == 'n' + && BYTE (input, 5, NULL) == 't' + && BYTE (input, 6, NULL) == '-' + && BYTE (input, 7, NULL) == 'f' + && BYTE (input, 8, NULL) == 'a' + && BYTE (input, 9, NULL) == 'c' + && BYTE (input, 10, NULL) == 'e') { + SKIP_CHARS (a_this, 1); + cr_tknzr_get_parsing_location + (a_this, &location) ; + SKIP_CHARS (a_this, 9); + status = cr_token_set_font_face_sym (token); + CHECK_PARSING_STATUS (status, TRUE); + cr_parsing_location_copy (&token->location, + &location) ; + goto done; + } + + if (BYTE (input, 2, NULL) == 'c' + && BYTE (input, 3, NULL) == 'h' + && BYTE (input, 4, NULL) == 'a' + && BYTE (input, 5, NULL) == 'r' + && BYTE (input, 6, NULL) == 's' + && BYTE (input, 7, NULL) == 'e' + && BYTE (input, 8, NULL) == 't') { + SKIP_CHARS (a_this, 1); + cr_tknzr_get_parsing_location + (a_this, &location) ; + SKIP_CHARS (a_this, 7); + status = cr_token_set_charset_sym (token); + CHECK_PARSING_STATUS (status, TRUE); + cr_parsing_location_copy (&token->location, + &location) ; + goto done; + } + + if (BYTE (input, 2, NULL) == 'i' + && BYTE (input, 3, NULL) == 'm' + && BYTE (input, 4, NULL) == 'p' + && BYTE (input, 5, NULL) == 'o' + && BYTE (input, 6, NULL) == 'r' + && BYTE (input, 7, NULL) == 't') { + SKIP_CHARS (a_this, 1); + cr_tknzr_get_parsing_location + (a_this, &location) ; + SKIP_CHARS (a_this, 6); + status = cr_token_set_import_sym (token); + CHECK_PARSING_STATUS (status, TRUE); + cr_parsing_location_copy (&token->location, + &location) ; + goto done; + } + + if (BYTE (input, 2, NULL) == 'm' + && BYTE (input, 3, NULL) == 'e' + && BYTE (input, 4, NULL) == 'd' + && BYTE (input, 5, NULL) == 'i' + && BYTE (input, 6, NULL) == 'a') { + SKIP_CHARS (a_this, 1); + cr_tknzr_get_parsing_location (a_this, + &location) ; + SKIP_CHARS (a_this, 5); + status = cr_token_set_media_sym (token); + CHECK_PARSING_STATUS (status, TRUE); + cr_parsing_location_copy (&token->location, + &location) ; + goto done; + } + + if (BYTE (input, 2, NULL) == 'p' + && BYTE (input, 3, NULL) == 'a' + && BYTE (input, 4, NULL) == 'g' + && BYTE (input, 5, NULL) == 'e') { + SKIP_CHARS (a_this, 1); + cr_tknzr_get_parsing_location (a_this, + &location) ; + SKIP_CHARS (a_this, 4); + status = cr_token_set_page_sym (token); + CHECK_PARSING_STATUS (status, TRUE); + cr_parsing_location_copy (&token->location, + &location) ; + goto done; + } + status = cr_tknzr_parse_atkeyword (a_this, &str); + if (status == CR_OK) { + status = cr_token_set_atkeyword (token, str); + CHECK_PARSING_STATUS (status, TRUE); + if (str) { + cr_parsing_location_copy (&token->location, + &str->location) ; + } + goto done; + } + } + break; + + case 'u': + + if (BYTE (input, 2, NULL) == 'r' + && BYTE (input, 3, NULL) == 'l' + && BYTE (input, 4, NULL) == '(') { + CRString *str2 = NULL; + + status = cr_tknzr_parse_uri (a_this, &str2); + if (status == CR_OK) { + status = cr_token_set_uri (token, str2); + CHECK_PARSING_STATUS (status, TRUE); + if (str2) { + cr_parsing_location_copy (&token->location, + &str2->location) ; + } + goto done; + } + } + goto fallback; + break; + + case 'r': + if (BYTE (input, 2, NULL) == 'g' + && BYTE (input, 3, NULL) == 'b' + && BYTE (input, 4, NULL) == '(') { + status = cr_tknzr_parse_rgb (a_this, &rgb); + if (status == CR_OK && rgb) { + status = cr_token_set_rgb (token, rgb); + CHECK_PARSING_STATUS (status, TRUE); + if (rgb) { + cr_parsing_location_copy (&token->location, + &rgb->location) ; + } + rgb = NULL; + goto done; + } + + } + goto fallback; + break; + + case '<': + if (BYTE (input, 2, NULL) == '!' + && BYTE (input, 3, NULL) == '-' + && BYTE (input, 4, NULL) == '-') { + SKIP_CHARS (a_this, 1); + cr_tknzr_get_parsing_location (a_this, + &location) ; + SKIP_CHARS (a_this, 3); + status = cr_token_set_cdo (token); + CHECK_PARSING_STATUS (status, TRUE); + cr_parsing_location_copy (&token->location, + &location) ; + goto done; + } + break; + + case '-': + if (BYTE (input, 2, NULL) == '-' + && BYTE (input, 3, NULL) == '>') { + SKIP_CHARS (a_this, 1); + cr_tknzr_get_parsing_location (a_this, + &location) ; + SKIP_CHARS (a_this, 2); + status = cr_token_set_cdc (token); + CHECK_PARSING_STATUS (status, TRUE); + cr_parsing_location_copy (&token->location, + &location) ; + goto done; + } else { + status = cr_tknzr_parse_ident + (a_this, &str); + if (status == CR_OK) { + cr_token_set_ident + (token, str); + if (str) { + cr_parsing_location_copy (&token->location, + &str->location) ; + } + goto done; + } else { + goto parse_number; + } + } + break; + + case '~': + if (BYTE (input, 2, NULL) == '=') { + SKIP_CHARS (a_this, 1); + cr_tknzr_get_parsing_location (a_this, + &location) ; + SKIP_CHARS (a_this, 1); + status = cr_token_set_includes (token); + CHECK_PARSING_STATUS (status, TRUE); + cr_parsing_location_copy (&token->location, + &location) ; + goto done; + } + break; + + case '|': + if (BYTE (input, 2, NULL) == '=') { + SKIP_CHARS (a_this, 1); + cr_tknzr_get_parsing_location (a_this, + &location) ; + SKIP_CHARS (a_this, 1); + status = cr_token_set_dashmatch (token); + CHECK_PARSING_STATUS (status, TRUE); + cr_parsing_location_copy (&token->location, + &location) ; + goto done; + } + break; + + case '/': + if (BYTE (input, 2, NULL) == '*') { + status = cr_tknzr_parse_comment (a_this, &str); + + if (status == CR_OK) { + status = cr_token_set_comment (token, str); + str = NULL; + CHECK_PARSING_STATUS (status, TRUE); + if (str) { + cr_parsing_location_copy (&token->location, + &str->location) ; + } + goto done; + } + } + break ; + + case ';': + SKIP_CHARS (a_this, 1); + cr_tknzr_get_parsing_location (a_this, + &location) ; + status = cr_token_set_semicolon (token); + CHECK_PARSING_STATUS (status, TRUE); + cr_parsing_location_copy (&token->location, + &location) ; + goto done; + + case '{': + SKIP_CHARS (a_this, 1); + cr_tknzr_get_parsing_location (a_this, + &location) ; + status = cr_token_set_cbo (token); + CHECK_PARSING_STATUS (status, TRUE); + cr_tknzr_get_parsing_location (a_this, + &location) ; + goto done; + + case '}': + SKIP_CHARS (a_this, 1); + cr_tknzr_get_parsing_location (a_this, + &location) ; + status = cr_token_set_cbc (token); + CHECK_PARSING_STATUS (status, TRUE); + cr_parsing_location_copy (&token->location, + &location) ; + goto done; + + case '(': + SKIP_CHARS (a_this, 1); + cr_tknzr_get_parsing_location (a_this, + &location) ; + status = cr_token_set_po (token); + CHECK_PARSING_STATUS (status, TRUE); + cr_parsing_location_copy (&token->location, + &location) ; + goto done; + + case ')': + SKIP_CHARS (a_this, 1); + cr_tknzr_get_parsing_location (a_this, + &location) ; + status = cr_token_set_pc (token); + CHECK_PARSING_STATUS (status, TRUE); + cr_parsing_location_copy (&token->location, + &location) ; + goto done; + + case '[': + SKIP_CHARS (a_this, 1); + cr_tknzr_get_parsing_location (a_this, + &location) ; + status = cr_token_set_bo (token); + CHECK_PARSING_STATUS (status, TRUE); + cr_parsing_location_copy (&token->location, + &location) ; + goto done; + + case ']': + SKIP_CHARS (a_this, 1); + cr_tknzr_get_parsing_location (a_this, + &location) ; + status = cr_token_set_bc (token); + CHECK_PARSING_STATUS (status, TRUE); + cr_parsing_location_copy (&token->location, + &location) ; + goto done; + + case ' ': + case '\t': + case '\n': + case '\f': + case '\r': + { + guchar *start = NULL, + *end = NULL; + + status = cr_tknzr_parse_w (a_this, &start, + &end, &location); + if (status == CR_OK) { + status = cr_token_set_s (token); + CHECK_PARSING_STATUS (status, TRUE); + cr_tknzr_get_parsing_location (a_this, + &location) ; + goto done; + } + } + break; + + case '#': + { + status = cr_tknzr_parse_hash (a_this, &str); + if (status == CR_OK && str) { + status = cr_token_set_hash (token, str); + CHECK_PARSING_STATUS (status, TRUE); + if (str) { + cr_parsing_location_copy (&token->location, + &str->location) ; + } + str = NULL; + goto done; + } + } + break; + + case '\'': + case '"': + status = cr_tknzr_parse_string (a_this, &str); + if (status == CR_OK && str) { + status = cr_token_set_string (token, str); + CHECK_PARSING_STATUS (status, TRUE); + if (str) { + cr_parsing_location_copy (&token->location, + &str->location) ; + } + str = NULL; + goto done; + } + break; + + case '!': + status = cr_tknzr_parse_important (a_this, &location); + if (status == CR_OK) { + status = cr_token_set_important_sym (token); + CHECK_PARSING_STATUS (status, TRUE); + cr_parsing_location_copy (&token->location, + &location) ; + goto done; + } + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '.': + case '+': + /* '-' case is handled separately above for --> comments */ + parse_number: + { + CRNum *num = NULL; + + status = cr_tknzr_parse_num (a_this, &num); + if (status == CR_OK && num) { + next_bytes[0] = BYTE (input, 1, NULL); + next_bytes[1] = BYTE (input, 2, NULL); + next_bytes[2] = BYTE (input, 3, NULL); + next_bytes[3] = BYTE (input, 4, NULL); + + if (next_bytes[0] == 'e' + && next_bytes[1] == 'm') { + num->type = NUM_LENGTH_EM; + status = cr_token_set_ems (token, + num); + num = NULL; + SKIP_CHARS (a_this, 2); + } else if (next_bytes[0] == 'e' + && next_bytes[1] == 'x') { + num->type = NUM_LENGTH_EX; + status = cr_token_set_exs (token, + num); + num = NULL; + SKIP_CHARS (a_this, 2); + } else if (next_bytes[0] == 'p' + && next_bytes[1] == 'x') { + num->type = NUM_LENGTH_PX; + status = cr_token_set_length + (token, num, LENGTH_PX_ET); + num = NULL; + SKIP_CHARS (a_this, 2); + } else if (next_bytes[0] == 'c' + && next_bytes[1] == 'm') { + num->type = NUM_LENGTH_CM; + status = cr_token_set_length + (token, num, LENGTH_CM_ET); + num = NULL; + SKIP_CHARS (a_this, 2); + } else if (next_bytes[0] == 'm' + && next_bytes[1] == 'm') { + num->type = NUM_LENGTH_MM; + status = cr_token_set_length + (token, num, LENGTH_MM_ET); + num = NULL; + SKIP_CHARS (a_this, 2); + } else if (next_bytes[0] == 'i' + && next_bytes[1] == 'n') { + num->type = NUM_LENGTH_IN; + status = cr_token_set_length + (token, num, LENGTH_IN_ET); + num = NULL; + SKIP_CHARS (a_this, 2); + } else if (next_bytes[0] == 'p' + && next_bytes[1] == 't') { + num->type = NUM_LENGTH_PT; + status = cr_token_set_length + (token, num, LENGTH_PT_ET); + num = NULL; + SKIP_CHARS (a_this, 2); + } else if (next_bytes[0] == 'p' + && next_bytes[1] == 'c') { + num->type = NUM_LENGTH_PC; + status = cr_token_set_length + (token, num, LENGTH_PC_ET); + num = NULL; + SKIP_CHARS (a_this, 2); + } else if (next_bytes[0] == 'd' + && next_bytes[1] == 'e' + && next_bytes[2] == 'g') { + num->type = NUM_ANGLE_DEG; + status = cr_token_set_angle + (token, num, ANGLE_DEG_ET); + num = NULL; + SKIP_CHARS (a_this, 3); + } else if (next_bytes[0] == 'r' + && next_bytes[1] == 'a' + && next_bytes[2] == 'd') { + num->type = NUM_ANGLE_RAD; + status = cr_token_set_angle + (token, num, ANGLE_RAD_ET); + num = NULL; + SKIP_CHARS (a_this, 3); + } else if (next_bytes[0] == 'g' + && next_bytes[1] == 'r' + && next_bytes[2] == 'a' + && next_bytes[3] == 'd') { + num->type = NUM_ANGLE_GRAD; + status = cr_token_set_angle + (token, num, ANGLE_GRAD_ET); + num = NULL; + SKIP_CHARS (a_this, 4); + } else if (next_bytes[0] == 'm' + && next_bytes[1] == 's') { + num->type = NUM_TIME_MS; + status = cr_token_set_time + (token, num, TIME_MS_ET); + num = NULL; + SKIP_CHARS (a_this, 2); + } else if (next_bytes[0] == 's') { + num->type = NUM_TIME_S; + status = cr_token_set_time + (token, num, TIME_S_ET); + num = NULL; + SKIP_CHARS (a_this, 1); + } else if (next_bytes[0] == 'H' + && next_bytes[1] == 'z') { + num->type = NUM_FREQ_HZ; + status = cr_token_set_freq + (token, num, FREQ_HZ_ET); + num = NULL; + SKIP_CHARS (a_this, 2); + } else if (next_bytes[0] == 'k' + && next_bytes[1] == 'H' + && next_bytes[2] == 'z') { + num->type = NUM_FREQ_KHZ; + status = cr_token_set_freq + (token, num, FREQ_KHZ_ET); + num = NULL; + SKIP_CHARS (a_this, 3); + } else if (next_bytes[0] == '%') { + num->type = NUM_PERCENTAGE; + status = cr_token_set_percentage + (token, num); + num = NULL; + SKIP_CHARS (a_this, 1); + } else { + status = cr_tknzr_parse_ident (a_this, + &str); + if (status == CR_OK && str) { + num->type = NUM_UNKNOWN_TYPE; + status = cr_token_set_dimen + (token, num, str); + num = NULL; + CHECK_PARSING_STATUS (status, + TRUE); + str = NULL; + } else { + status = cr_token_set_number + (token, num); + num = NULL; + CHECK_PARSING_STATUS (status, CR_OK); + str = NULL; + } + } + if (token && token->u.num) { + cr_parsing_location_copy (&token->location, + &token->u.num->location) ; + } else { + status = CR_ERROR ; + } + goto done ; + } + } + break; + + default: + fallback: + /*process the fallback cases here */ + + if (next_char == '\\' + || (cr_utils_is_nonascii (next_bytes[0]) == TRUE) + || ((next_char >= 'a') && (next_char <= 'z')) + || ((next_char >= 'A') && (next_char <= 'Z'))) { + status = cr_tknzr_parse_ident (a_this, &str); + if (status == CR_OK && str) { + guint32 next_c = 0; + + status = cr_input_peek_char + (PRIVATE (a_this)->input, &next_c); + + if (status == CR_OK && next_c == '(') { + + SKIP_CHARS (a_this, 1); + status = cr_token_set_function + (token, str); + CHECK_PARSING_STATUS (status, TRUE); + /*ownership is transferred + *to token by cr_token_set_function. + */ + if (str) { + cr_parsing_location_copy (&token->location, + &str->location) ; + } + str = NULL; + } else { + status = cr_token_set_ident (token, + str); + CHECK_PARSING_STATUS (status, TRUE); + if (str) { + cr_parsing_location_copy (&token->location, + &str->location) ; + } + str = NULL; + } + goto done; + } else { + if (str) { + cr_string_destroy (str); + str = NULL; + } + } + } + break; + } + + READ_NEXT_CHAR (a_this, &next_char); + cr_tknzr_get_parsing_location (a_this, + &location) ; + status = cr_token_set_delim (token, next_char); + CHECK_PARSING_STATUS (status, TRUE); + cr_parsing_location_copy (&token->location, + &location) ; + done: + + if (status == CR_OK && token) { + *a_tk = token; + /* + *store the previous position input stream pos. + */ + memmove (&PRIVATE (a_this)->prev_pos, + &init_pos, sizeof (CRInputPos)); + return CR_OK; + } + + error: + if (token) { + cr_token_destroy (token); + token = NULL; + } + + if (str) { + cr_string_destroy (str); + str = NULL; + } + cr_tknzr_set_cur_pos (a_this, &init_pos); + return status; + +} + +enum CRStatus +cr_tknzr_parse_token (CRTknzr * a_this, enum CRTokenType a_type, + enum CRTokenExtraType a_et, gpointer a_res, + gpointer a_extra_res) +{ + enum CRStatus status = CR_OK; + CRToken *token = NULL; + + g_return_val_if_fail (a_this && PRIVATE (a_this) + && PRIVATE (a_this)->input + && a_res, CR_BAD_PARAM_ERROR); + + status = cr_tknzr_get_next_token (a_this, &token); + if (status != CR_OK) + return status; + if (token == NULL) + return CR_PARSING_ERROR; + + if (token->type == a_type) { + switch (a_type) { + case NO_TK: + case S_TK: + case CDO_TK: + case CDC_TK: + case INCLUDES_TK: + case DASHMATCH_TK: + case IMPORT_SYM_TK: + case PAGE_SYM_TK: + case MEDIA_SYM_TK: + case FONT_FACE_SYM_TK: + case CHARSET_SYM_TK: + case IMPORTANT_SYM_TK: + status = CR_OK; + break; + + case STRING_TK: + case IDENT_TK: + case HASH_TK: + case ATKEYWORD_TK: + case FUNCTION_TK: + case COMMENT_TK: + case URI_TK: + *((CRString **) a_res) = token->u.str; + token->u.str = NULL; + status = CR_OK; + break; + + case EMS_TK: + case EXS_TK: + case PERCENTAGE_TK: + case NUMBER_TK: + *((CRNum **) a_res) = token->u.num; + token->u.num = NULL; + status = CR_OK; + break; + + case LENGTH_TK: + case ANGLE_TK: + case TIME_TK: + case FREQ_TK: + if (token->extra_type == a_et) { + *((CRNum **) a_res) = token->u.num; + token->u.num = NULL; + status = CR_OK; + } + break; + + case DIMEN_TK: + *((CRNum **) a_res) = token->u.num; + if (a_extra_res == NULL) { + status = CR_BAD_PARAM_ERROR; + goto error; + } + + *((CRString **) a_extra_res) = token->dimen; + token->u.num = NULL; + token->dimen = NULL; + status = CR_OK; + break; + + case DELIM_TK: + *((guint32 *) a_res) = token->u.unichar; + status = CR_OK; + break; + + case UNICODERANGE_TK: + default: + status = CR_PARSING_ERROR; + break; + } + + cr_token_destroy (token); + token = NULL; + } else { + cr_tknzr_unget_token (a_this, token); + token = NULL; + status = CR_PARSING_ERROR; + } + + return status; + + error: + + if (token) { + cr_tknzr_unget_token (a_this, token); + token = NULL; + } + + return status; +} + +void +cr_tknzr_destroy (CRTknzr * a_this) +{ + g_return_if_fail (a_this); + + if (PRIVATE (a_this) && PRIVATE (a_this)->input) { + if (cr_input_unref (PRIVATE (a_this)->input) + == TRUE) { + PRIVATE (a_this)->input = NULL; + } + } + + if (PRIVATE (a_this)->token_cache) { + cr_token_destroy (PRIVATE (a_this)->token_cache); + PRIVATE (a_this)->token_cache = NULL; + } + + if (PRIVATE (a_this)) { + g_free (PRIVATE (a_this)); + PRIVATE (a_this) = NULL; + } + + g_free (a_this); +} |