diff options
Diffstat (limited to '')
-rw-r--r-- | src/sexp-input.cpp | 514 |
1 files changed, 514 insertions, 0 deletions
diff --git a/src/sexp-input.cpp b/src/sexp-input.cpp new file mode 100644 index 0000000..cb21686 --- /dev/null +++ b/src/sexp-input.cpp @@ -0,0 +1,514 @@ +/** + * + * Copyright 2021-2023 Ribose Inc. (https://www.ribose.com) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy of + * this software and associated documentation files (the "Software"), to deal in + * the Software without restriction, including without limitation the rights to + * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of + * the Software, and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS + * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR + * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Original copyright + * + * SEXP implementation code sexp-input.c + * Ron Rivest + * 7/21/1997 + */ + +#include "sexpp/sexp.h" + +namespace sexp { + +/* + * sexp_input_stream_t::sexp_input_stream_t + * Creates and initializes new sexp_input_stream_t object. + */ + +sexp_input_stream_t::sexp_input_stream_t(std::istream *i, size_t m_depth) +{ + set_input(i, m_depth); +} + +/* + * sexp_input_stream_t::set_input(std::istream *i, size_t m_depth) + */ + +sexp_input_stream_t *sexp_input_stream_t::set_input(std::istream *i, size_t m_depth) +{ + input_file = i; + byte_size = 8; + next_char = ' '; + bits = 0; + n_bits = 0; + count = -1; + reset_depth(m_depth); + return this; +} + +/* + * sexp_input_stream_t::set_byte_size(newByteSize) + */ +sexp_input_stream_t *sexp_input_stream_t::set_byte_size(uint32_t newByteSize) +{ + byte_size = newByteSize; + n_bits = 0; + bits = 0; + return this; +} + +int sexp_input_stream_t::read_char(void) +{ + count++; + return input_file->get(); +} + +/* + * sexp_input_stream_t::get_char() + * This is one possible character input routine for an input stream. + * (This version uses the standard input stream.) + * get_char places next 8-bit character into is->next_char. + * It also updates the count of number of 8-bit characters read. + * The value EOF is obtained when no more input is available. + * This code handles 4-bit/6-bit/8-bit channels. + */ +sexp_input_stream_t *sexp_input_stream_t::get_char(void) +{ + int c; + if (next_char == EOF) { + byte_size = 8; + return this; + } + + while (true) { + c = next_char = read_char(); + if (c == EOF) + return this; + if ((byte_size == 6 && (c == '|' || c == '}')) || (byte_size == 4 && (c == '#'))) { + // end of region reached; return terminating character, after checking for + // unused bits + if (n_bits > 0 && (((1 << n_bits) - 1) & bits) != 0) { + sexp_error(sexp_exception_t::warning, + "%d-bit region ended with %d unused bits left-over", + byte_size, + n_bits, + count); + } + return set_byte_size(8); + } else if (byte_size != 8 && is_white_space(c)) + ; /* ignore white space in hex and base64 regions */ + else if (byte_size == 6 && c == '=') + ; /* ignore equals signs in base64 regions */ + else if (byte_size == 8) { + return this; + } else if (byte_size < 8) { + bits = bits << byte_size; + n_bits += byte_size; + if (byte_size == 6 && is_base64_digit(c)) + bits = bits | base64value(c); + else if (byte_size == 4 && is_hex_digit(c)) + bits = bits | hexvalue(c); + else { + sexp_error(sexp_exception_t::error, + "character '%c' found in %u-bit coding region", + next_char, + byte_size, + count); + } + if (n_bits >= 8) { + next_char = (bits >> (n_bits - 8)) & 0xFF; + n_bits -= 8; + return this; + } + } + } +} + +/* + * sexp_input_stream_t::skip_white_space + * Skip over any white space on the given sexp_input_stream_t. + */ +sexp_input_stream_t *sexp_input_stream_t::skip_white_space(void) +{ + while (is_white_space(next_char)) + get_char(); + return this; +} + +/* + * sexp_input_stream_t::skip_char(c) + * Skip the following input character on input stream is, if it is + * equal to the character c. If it is not equal, then an error occurs. + */ +sexp_input_stream_t *sexp_input_stream_t::skip_char(int c) +{ + if (next_char != c) + sexp_error(sexp_exception_t::error, + "character '%c' found where '%c' was expected", + next_char, + c, + count); + return get_char(); +} + +/* + * sexp_input_stream_t::scan_token(ss) + * scan one or more characters into simple string ss as a token. + */ +void sexp_input_stream_t::scan_token(sexp_simple_string_t &ss) +{ + skip_white_space(); + while (is_token_char(next_char)) { + ss.append(next_char); + get_char(); + } +} + +/* + * sexp_input_stream_t::scan_to_eof(void) + * scan one or more characters (until EOF reached) + * return an object that is just that string + */ +std::shared_ptr<sexp_object_t> sexp_input_stream_t::scan_to_eof(void) +{ + sexp_simple_string_t ss; + skip_white_space(); + while (next_char != EOF) { + ss.append(next_char); + get_char(); + } + auto s = std::make_shared<sexp_string_t>(); + s->set_string(ss); + return s; +} + +/* + * scan_decimal_string(is) + * returns long integer that is value of decimal number + */ +uint32_t sexp_input_stream_t::scan_decimal_string(void) +{ + uint32_t value = 0; + uint32_t i = 0; + while (is_dec_digit(next_char)) { + value = value * 10 + decvalue(next_char); + get_char(); + if (i++ > 8) + sexp_error(sexp_exception_t::error, "Decimal number is too long", 0, 0, count); + } + return value; +} + +/* + * sexp_input_stream_t::scan_verbatim_string(is,ss,length) + * Reads verbatim string of given length into simple string ss. + */ +void sexp_input_stream_t::scan_verbatim_string(sexp_simple_string_t &ss, uint32_t length) +{ + skip_white_space()->skip_char(':'); + + // Some length is specified always, this is ensured by the caller's logic + assert(length != std::numeric_limits<uint32_t>::max()); + for (uint32_t i = 0; i < length; i++) { + ss.append(next_char); + get_char(); + } +} + +/* + * sexp_input_stream_t::scan_quoted_string(ss,length) + * Reads quoted string of given length into simple string ss. + * Handles ordinary C escapes. + * If of indefinite length, length is std::numeric_limits<uint32_t>::max(). + */ +void sexp_input_stream_t::scan_quoted_string(sexp_simple_string_t &ss, uint32_t length) +{ + skip_char('"'); + while (ss.length() <= length) { + if (next_char == '\"') { + if (length == std::numeric_limits<uint32_t>::max() || (ss.length() == length)) { + skip_char('\"'); + return; + } else + sexp_error(sexp_exception_t::error, + "Declared length was %d, but quoted string ended too early", + (int) length, + 0, + count); + } else if (next_char == '\\') /* handle escape sequence */ + { + get_char(); + switch (next_char) { + case 'b': + ss.append('\b'); + break; + case 't': + ss.append('\t'); + break; + case 'v': + ss.append('\v'); + break; + case 'n': + ss.append('\n'); + break; + case 'f': + ss.append('\f'); + break; + case 'r': + ss.append('\r'); + break; + case '\"': + ss.append('\"'); + break; + case '\'': + ss.append('\''); + break; + case '\\': + ss.append('\\'); + break; + case 'x': /* hexadecimal number */ + { + int j, val; + val = 0; + get_char(); + for (j = 0; j < 2; j++) { + if (is_hex_digit(next_char)) { + val = ((val << 4) | hexvalue(next_char)); + if (j < 1) { + get_char(); + } + } else + sexp_error(sexp_exception_t::error, + "Hex character \x5cx%x... too short", + val, + 0, + count); + } + ss.append(val); + } break; + case '\n': /* ignore backslash line feed */ + get_char(); /* also ignore following carriage-return if present */ + if (next_char != '\r') + continue; + break; + case '\r': /* ignore backslash carriage-return */ + get_char(); /* also ignore following linefeed if present */ + if (next_char != '\n') + continue; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': { /* octal number */ + int j, val; + val = 0; + for (j = 0; j < 3; j++) { + if (next_char >= '0' && next_char <= '7') { + val = ((val << 3) | (next_char - '0')); + if (j < 2) + get_char(); + } else + sexp_error(sexp_exception_t::error, + "Octal character \\%o... too short", + val, + 0, + count); + } + if (val > 255) + sexp_error(sexp_exception_t::error, + "Octal character \\%o... too big", + val, + 0, + count); + ss.append(val); + } break; + default: + sexp_error(sexp_exception_t::error, + "Unknown escape sequence \\%c", + next_char, + 0, + count); + } + } /* end of handling escape sequence */ + else if (next_char == EOF) { + sexp_error(sexp_exception_t::error, "unexpected end of file", 0, 0, count); + } else { + ss.append(next_char); + } + get_char(); + } /* end of main while loop */ +} + +/* + * scan_hexadecimal_string(ss,length) + * Reads hexadecimal string into simple string ss. + * String is of given length result, or length = std::numeric_limits<uint32_t>::max() + * if indefinite length. + */ +void sexp_input_stream_t::scan_hexadecimal_string(sexp_simple_string_t &ss, uint32_t length) +{ + set_byte_size(4)->skip_char('#'); + while (next_char != EOF && (next_char != '#' || get_byte_size() == 4)) { + ss.append(next_char); + get_char(); + } + skip_char('#'); + if (ss.length() != length && length != std::numeric_limits<uint32_t>::max()) + sexp_error(sexp_exception_t::warning, + "Hex string has length %d different than declared length %d", + ss.length(), + length, + count); +} + +/* + * sexp_input_stream_t::scan_base64_string(ss,length) + * Reads base64 string into simple string ss. + * String is of given length result, or length = std::numeric_limits<uint32_t>::max() + * if indefinite length. + */ +void sexp_input_stream_t::scan_base64_string(sexp_simple_string_t &ss, uint32_t length) +{ + set_byte_size(6)->skip_char('|'); + while (next_char != EOF && (next_char != '|' || get_byte_size() == 6)) { + ss.append(next_char); + get_char(); + } + skip_char('|'); + if (ss.length() != length && length != std::numeric_limits<uint32_t>::max()) + sexp_error(sexp_exception_t::warning, + "Base64 string has length %d different than declared length %d", + ss.length(), + length, + count); +} + +/* + * sexp_input_stream_t::scan_simple_string(void) + * Reads and returns a simple string from the input stream. + * Determines type of simple string from the initial character, and + * dispatches to appropriate routine based on that. + */ +sexp_simple_string_t sexp_input_stream_t::scan_simple_string(void) +{ + int length; + sexp_simple_string_t ss; + skip_white_space(); + /* Note that it is important in the following code to test for token-ness + * before checking the other cases, so that a token may begin with ":", + * which would otherwise be treated as a verbatim string missing a length. + */ + if (is_token_char(next_char) && !is_dec_digit(next_char)) { + scan_token(ss); + } else { + length = is_dec_digit(next_char) ? scan_decimal_string() : + std::numeric_limits<uint32_t>::max(); + + switch (next_char) { + case '\"': + scan_quoted_string(ss, length); + break; + case '#': + scan_hexadecimal_string(ss, length); + break; + case '|': + scan_base64_string(ss, length); + break; + case ':': + // ':' is 'tokenchar', so some length shall be defined + scan_verbatim_string(ss, length); + break; + default: { + const char *const msg = (next_char == EOF) ? "unexpected end of file" : + isprint(next_char) ? "illegal character '%c' (0x%x)" : + "illegal character 0x%x"; + sexp_error(sexp_exception_t::error, msg, next_char, next_char, count); + } + } + } + + if (ss.length() == 0) + sexp_error(sexp_exception_t::warning, "Simple string has zero length", 0, 0, count); + return ss; +} + +/* + * sexp_input_stream_t::scan_string(void) + * Reads and returns a string [presentationhint]string from input stream. + */ +std::shared_ptr<sexp_string_t> sexp_input_stream_t::scan_string(void) +{ + auto s = std::make_shared<sexp_string_t>(); + ; + s->parse(this); + return s; +} + +/* + * sexp_input_stream_t::scan_list(void) + * Read and return a sexp_list_t from the input stream. + */ +std::shared_ptr<sexp_list_t> sexp_input_stream_t::scan_list(void) +{ + auto list = std::make_shared<sexp_list_t>(); + list->parse(this); + return list; +} + +/* + * sexp_input_stream_t::scan_object(void) + * Reads and returns a sexp_object_t from the given input stream. + */ +std::shared_ptr<sexp_object_t> sexp_input_stream_t::scan_object(void) +{ + std::shared_ptr<sexp_object_t> object; + skip_white_space(); + if (next_char == '{' && byte_size != 6) { + set_byte_size(6)->skip_char('{'); + object = scan_object(); + skip_char('}'); + } else { + if (next_char == '(') + object = scan_list(); + else + object = scan_string(); + } + return object; +} + +/* + * sexp_input_stream_t::open_list(void) + */ +sexp_input_stream_t *sexp_input_stream_t::open_list(void) +{ + skip_char('('); + // gcc 4.8.5 generates wrong code in case of chaining like + // skip_char('(')->increase_depth(count) + increase_depth(count); + return this; +} +/* + * sexp_input_stream_t::close_list(void) + */ +sexp_input_stream_t *sexp_input_stream_t::close_list(void) +{ + skip_char(')'); + decrease_depth(); + return this; +} + +} // namespace sexp |