summaryrefslogtreecommitdiffstats
path: root/src/sexp-input.cpp
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/sexp-input.cpp514
1 files changed, 514 insertions, 0 deletions
diff --git a/src/sexp-input.cpp b/src/sexp-input.cpp
new file mode 100644
index 0000000..cb21686
--- /dev/null
+++ b/src/sexp-input.cpp
@@ -0,0 +1,514 @@
+/**
+ *
+ * Copyright 2021-2023 Ribose Inc. (https://www.ribose.com)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of
+ * this software and associated documentation files (the "Software"), to deal in
+ * the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
+ * the Software, and to permit persons to whom the Software is furnished to do so,
+ * subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+ * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
+ * COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Original copyright
+ *
+ * SEXP implementation code sexp-input.c
+ * Ron Rivest
+ * 7/21/1997
+ */
+
+#include "sexpp/sexp.h"
+
+namespace sexp {
+
+/*
+ * sexp_input_stream_t::sexp_input_stream_t
+ * Creates and initializes new sexp_input_stream_t object.
+ */
+
+sexp_input_stream_t::sexp_input_stream_t(std::istream *i, size_t m_depth)
+{
+ set_input(i, m_depth);
+}
+
+/*
+ * sexp_input_stream_t::set_input(std::istream *i, size_t m_depth)
+ */
+
+sexp_input_stream_t *sexp_input_stream_t::set_input(std::istream *i, size_t m_depth)
+{
+ input_file = i;
+ byte_size = 8;
+ next_char = ' ';
+ bits = 0;
+ n_bits = 0;
+ count = -1;
+ reset_depth(m_depth);
+ return this;
+}
+
+/*
+ * sexp_input_stream_t::set_byte_size(newByteSize)
+ */
+sexp_input_stream_t *sexp_input_stream_t::set_byte_size(uint32_t newByteSize)
+{
+ byte_size = newByteSize;
+ n_bits = 0;
+ bits = 0;
+ return this;
+}
+
+int sexp_input_stream_t::read_char(void)
+{
+ count++;
+ return input_file->get();
+}
+
+/*
+ * sexp_input_stream_t::get_char()
+ * This is one possible character input routine for an input stream.
+ * (This version uses the standard input stream.)
+ * get_char places next 8-bit character into is->next_char.
+ * It also updates the count of number of 8-bit characters read.
+ * The value EOF is obtained when no more input is available.
+ * This code handles 4-bit/6-bit/8-bit channels.
+ */
+sexp_input_stream_t *sexp_input_stream_t::get_char(void)
+{
+ int c;
+ if (next_char == EOF) {
+ byte_size = 8;
+ return this;
+ }
+
+ while (true) {
+ c = next_char = read_char();
+ if (c == EOF)
+ return this;
+ if ((byte_size == 6 && (c == '|' || c == '}')) || (byte_size == 4 && (c == '#'))) {
+ // end of region reached; return terminating character, after checking for
+ // unused bits
+ if (n_bits > 0 && (((1 << n_bits) - 1) & bits) != 0) {
+ sexp_error(sexp_exception_t::warning,
+ "%d-bit region ended with %d unused bits left-over",
+ byte_size,
+ n_bits,
+ count);
+ }
+ return set_byte_size(8);
+ } else if (byte_size != 8 && is_white_space(c))
+ ; /* ignore white space in hex and base64 regions */
+ else if (byte_size == 6 && c == '=')
+ ; /* ignore equals signs in base64 regions */
+ else if (byte_size == 8) {
+ return this;
+ } else if (byte_size < 8) {
+ bits = bits << byte_size;
+ n_bits += byte_size;
+ if (byte_size == 6 && is_base64_digit(c))
+ bits = bits | base64value(c);
+ else if (byte_size == 4 && is_hex_digit(c))
+ bits = bits | hexvalue(c);
+ else {
+ sexp_error(sexp_exception_t::error,
+ "character '%c' found in %u-bit coding region",
+ next_char,
+ byte_size,
+ count);
+ }
+ if (n_bits >= 8) {
+ next_char = (bits >> (n_bits - 8)) & 0xFF;
+ n_bits -= 8;
+ return this;
+ }
+ }
+ }
+}
+
+/*
+ * sexp_input_stream_t::skip_white_space
+ * Skip over any white space on the given sexp_input_stream_t.
+ */
+sexp_input_stream_t *sexp_input_stream_t::skip_white_space(void)
+{
+ while (is_white_space(next_char))
+ get_char();
+ return this;
+}
+
+/*
+ * sexp_input_stream_t::skip_char(c)
+ * Skip the following input character on input stream is, if it is
+ * equal to the character c. If it is not equal, then an error occurs.
+ */
+sexp_input_stream_t *sexp_input_stream_t::skip_char(int c)
+{
+ if (next_char != c)
+ sexp_error(sexp_exception_t::error,
+ "character '%c' found where '%c' was expected",
+ next_char,
+ c,
+ count);
+ return get_char();
+}
+
+/*
+ * sexp_input_stream_t::scan_token(ss)
+ * scan one or more characters into simple string ss as a token.
+ */
+void sexp_input_stream_t::scan_token(sexp_simple_string_t &ss)
+{
+ skip_white_space();
+ while (is_token_char(next_char)) {
+ ss.append(next_char);
+ get_char();
+ }
+}
+
+/*
+ * sexp_input_stream_t::scan_to_eof(void)
+ * scan one or more characters (until EOF reached)
+ * return an object that is just that string
+ */
+std::shared_ptr<sexp_object_t> sexp_input_stream_t::scan_to_eof(void)
+{
+ sexp_simple_string_t ss;
+ skip_white_space();
+ while (next_char != EOF) {
+ ss.append(next_char);
+ get_char();
+ }
+ auto s = std::make_shared<sexp_string_t>();
+ s->set_string(ss);
+ return s;
+}
+
+/*
+ * scan_decimal_string(is)
+ * returns long integer that is value of decimal number
+ */
+uint32_t sexp_input_stream_t::scan_decimal_string(void)
+{
+ uint32_t value = 0;
+ uint32_t i = 0;
+ while (is_dec_digit(next_char)) {
+ value = value * 10 + decvalue(next_char);
+ get_char();
+ if (i++ > 8)
+ sexp_error(sexp_exception_t::error, "Decimal number is too long", 0, 0, count);
+ }
+ return value;
+}
+
+/*
+ * sexp_input_stream_t::scan_verbatim_string(is,ss,length)
+ * Reads verbatim string of given length into simple string ss.
+ */
+void sexp_input_stream_t::scan_verbatim_string(sexp_simple_string_t &ss, uint32_t length)
+{
+ skip_white_space()->skip_char(':');
+
+ // Some length is specified always, this is ensured by the caller's logic
+ assert(length != std::numeric_limits<uint32_t>::max());
+ for (uint32_t i = 0; i < length; i++) {
+ ss.append(next_char);
+ get_char();
+ }
+}
+
+/*
+ * sexp_input_stream_t::scan_quoted_string(ss,length)
+ * Reads quoted string of given length into simple string ss.
+ * Handles ordinary C escapes.
+ * If of indefinite length, length is std::numeric_limits<uint32_t>::max().
+ */
+void sexp_input_stream_t::scan_quoted_string(sexp_simple_string_t &ss, uint32_t length)
+{
+ skip_char('"');
+ while (ss.length() <= length) {
+ if (next_char == '\"') {
+ if (length == std::numeric_limits<uint32_t>::max() || (ss.length() == length)) {
+ skip_char('\"');
+ return;
+ } else
+ sexp_error(sexp_exception_t::error,
+ "Declared length was %d, but quoted string ended too early",
+ (int) length,
+ 0,
+ count);
+ } else if (next_char == '\\') /* handle escape sequence */
+ {
+ get_char();
+ switch (next_char) {
+ case 'b':
+ ss.append('\b');
+ break;
+ case 't':
+ ss.append('\t');
+ break;
+ case 'v':
+ ss.append('\v');
+ break;
+ case 'n':
+ ss.append('\n');
+ break;
+ case 'f':
+ ss.append('\f');
+ break;
+ case 'r':
+ ss.append('\r');
+ break;
+ case '\"':
+ ss.append('\"');
+ break;
+ case '\'':
+ ss.append('\'');
+ break;
+ case '\\':
+ ss.append('\\');
+ break;
+ case 'x': /* hexadecimal number */
+ {
+ int j, val;
+ val = 0;
+ get_char();
+ for (j = 0; j < 2; j++) {
+ if (is_hex_digit(next_char)) {
+ val = ((val << 4) | hexvalue(next_char));
+ if (j < 1) {
+ get_char();
+ }
+ } else
+ sexp_error(sexp_exception_t::error,
+ "Hex character \x5cx%x... too short",
+ val,
+ 0,
+ count);
+ }
+ ss.append(val);
+ } break;
+ case '\n': /* ignore backslash line feed */
+ get_char(); /* also ignore following carriage-return if present */
+ if (next_char != '\r')
+ continue;
+ break;
+ case '\r': /* ignore backslash carriage-return */
+ get_char(); /* also ignore following linefeed if present */
+ if (next_char != '\n')
+ continue;
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7': { /* octal number */
+ int j, val;
+ val = 0;
+ for (j = 0; j < 3; j++) {
+ if (next_char >= '0' && next_char <= '7') {
+ val = ((val << 3) | (next_char - '0'));
+ if (j < 2)
+ get_char();
+ } else
+ sexp_error(sexp_exception_t::error,
+ "Octal character \\%o... too short",
+ val,
+ 0,
+ count);
+ }
+ if (val > 255)
+ sexp_error(sexp_exception_t::error,
+ "Octal character \\%o... too big",
+ val,
+ 0,
+ count);
+ ss.append(val);
+ } break;
+ default:
+ sexp_error(sexp_exception_t::error,
+ "Unknown escape sequence \\%c",
+ next_char,
+ 0,
+ count);
+ }
+ } /* end of handling escape sequence */
+ else if (next_char == EOF) {
+ sexp_error(sexp_exception_t::error, "unexpected end of file", 0, 0, count);
+ } else {
+ ss.append(next_char);
+ }
+ get_char();
+ } /* end of main while loop */
+}
+
+/*
+ * scan_hexadecimal_string(ss,length)
+ * Reads hexadecimal string into simple string ss.
+ * String is of given length result, or length = std::numeric_limits<uint32_t>::max()
+ * if indefinite length.
+ */
+void sexp_input_stream_t::scan_hexadecimal_string(sexp_simple_string_t &ss, uint32_t length)
+{
+ set_byte_size(4)->skip_char('#');
+ while (next_char != EOF && (next_char != '#' || get_byte_size() == 4)) {
+ ss.append(next_char);
+ get_char();
+ }
+ skip_char('#');
+ if (ss.length() != length && length != std::numeric_limits<uint32_t>::max())
+ sexp_error(sexp_exception_t::warning,
+ "Hex string has length %d different than declared length %d",
+ ss.length(),
+ length,
+ count);
+}
+
+/*
+ * sexp_input_stream_t::scan_base64_string(ss,length)
+ * Reads base64 string into simple string ss.
+ * String is of given length result, or length = std::numeric_limits<uint32_t>::max()
+ * if indefinite length.
+ */
+void sexp_input_stream_t::scan_base64_string(sexp_simple_string_t &ss, uint32_t length)
+{
+ set_byte_size(6)->skip_char('|');
+ while (next_char != EOF && (next_char != '|' || get_byte_size() == 6)) {
+ ss.append(next_char);
+ get_char();
+ }
+ skip_char('|');
+ if (ss.length() != length && length != std::numeric_limits<uint32_t>::max())
+ sexp_error(sexp_exception_t::warning,
+ "Base64 string has length %d different than declared length %d",
+ ss.length(),
+ length,
+ count);
+}
+
+/*
+ * sexp_input_stream_t::scan_simple_string(void)
+ * Reads and returns a simple string from the input stream.
+ * Determines type of simple string from the initial character, and
+ * dispatches to appropriate routine based on that.
+ */
+sexp_simple_string_t sexp_input_stream_t::scan_simple_string(void)
+{
+ int length;
+ sexp_simple_string_t ss;
+ skip_white_space();
+ /* Note that it is important in the following code to test for token-ness
+ * before checking the other cases, so that a token may begin with ":",
+ * which would otherwise be treated as a verbatim string missing a length.
+ */
+ if (is_token_char(next_char) && !is_dec_digit(next_char)) {
+ scan_token(ss);
+ } else {
+ length = is_dec_digit(next_char) ? scan_decimal_string() :
+ std::numeric_limits<uint32_t>::max();
+
+ switch (next_char) {
+ case '\"':
+ scan_quoted_string(ss, length);
+ break;
+ case '#':
+ scan_hexadecimal_string(ss, length);
+ break;
+ case '|':
+ scan_base64_string(ss, length);
+ break;
+ case ':':
+ // ':' is 'tokenchar', so some length shall be defined
+ scan_verbatim_string(ss, length);
+ break;
+ default: {
+ const char *const msg = (next_char == EOF) ? "unexpected end of file" :
+ isprint(next_char) ? "illegal character '%c' (0x%x)" :
+ "illegal character 0x%x";
+ sexp_error(sexp_exception_t::error, msg, next_char, next_char, count);
+ }
+ }
+ }
+
+ if (ss.length() == 0)
+ sexp_error(sexp_exception_t::warning, "Simple string has zero length", 0, 0, count);
+ return ss;
+}
+
+/*
+ * sexp_input_stream_t::scan_string(void)
+ * Reads and returns a string [presentationhint]string from input stream.
+ */
+std::shared_ptr<sexp_string_t> sexp_input_stream_t::scan_string(void)
+{
+ auto s = std::make_shared<sexp_string_t>();
+ ;
+ s->parse(this);
+ return s;
+}
+
+/*
+ * sexp_input_stream_t::scan_list(void)
+ * Read and return a sexp_list_t from the input stream.
+ */
+std::shared_ptr<sexp_list_t> sexp_input_stream_t::scan_list(void)
+{
+ auto list = std::make_shared<sexp_list_t>();
+ list->parse(this);
+ return list;
+}
+
+/*
+ * sexp_input_stream_t::scan_object(void)
+ * Reads and returns a sexp_object_t from the given input stream.
+ */
+std::shared_ptr<sexp_object_t> sexp_input_stream_t::scan_object(void)
+{
+ std::shared_ptr<sexp_object_t> object;
+ skip_white_space();
+ if (next_char == '{' && byte_size != 6) {
+ set_byte_size(6)->skip_char('{');
+ object = scan_object();
+ skip_char('}');
+ } else {
+ if (next_char == '(')
+ object = scan_list();
+ else
+ object = scan_string();
+ }
+ return object;
+}
+
+/*
+ * sexp_input_stream_t::open_list(void)
+ */
+sexp_input_stream_t *sexp_input_stream_t::open_list(void)
+{
+ skip_char('(');
+ // gcc 4.8.5 generates wrong code in case of chaining like
+ // skip_char('(')->increase_depth(count)
+ increase_depth(count);
+ return this;
+}
+/*
+ * sexp_input_stream_t::close_list(void)
+ */
+sexp_input_stream_t *sexp_input_stream_t::close_list(void)
+{
+ skip_char(')');
+ decrease_depth();
+ return this;
+}
+
+} // namespace sexp