summaryrefslogtreecommitdiffstats
path: root/src/libzscanner/scanner.rl
diff options
context:
space:
mode:
Diffstat (limited to 'src/libzscanner/scanner.rl')
-rw-r--r--src/libzscanner/scanner.rl541
1 files changed, 541 insertions, 0 deletions
diff --git a/src/libzscanner/scanner.rl b/src/libzscanner/scanner.rl
new file mode 100644
index 0000000..4050b56
--- /dev/null
+++ b/src/libzscanner/scanner.rl
@@ -0,0 +1,541 @@
+/* Copyright (C) 2018 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <arpa/inet.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <libgen.h>
+#include <math.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "libzscanner/scanner.h"
+#include "libzscanner/functions.h"
+#include "libknot/descriptor.h"
+
+/*! \brief Maximal length of rdata item. */
+#define MAX_ITEM_LENGTH 255
+
+/*! \brief Latitude value for equator (2^31). */
+#define LOC_LAT_ZERO (uint32_t)2147483648
+/*! \brief Longitude value for meridian (2^31). */
+#define LOC_LONG_ZERO (uint32_t)2147483648
+/*! \brief Zero level altitude value. */
+#define LOC_ALT_ZERO (uint32_t)10000000
+
+/*! \brief Shorthand for setting warning data. */
+#define WARN(err_code) { s->error.code = err_code; }
+/*! \brief Shorthand for setting error data. */
+#define ERR(err_code) { WARN(err_code); s->error.fatal = true; }
+/*! \brief Shorthand for error reset. */
+#define NOERR { WARN(ZS_OK); s->error.fatal = false; }
+
+/*!
+ * \brief Writes record type number to r_data.
+ *
+ * \param type Type number.
+ * \param rdata_tail Position where to write type number to.
+ */
+static inline void type_num(const uint16_t type, uint8_t **rdata_tail)
+{
+ *((uint16_t *)*rdata_tail) = htons(type);
+ *rdata_tail += 2;
+}
+
+/*!
+ * \brief Sets bit to bitmap window.
+ *
+ * \param type Type number.
+ * \param s Scanner context.
+ */
+static inline void window_add_bit(const uint16_t type, zs_scanner_t *s) {
+ uint8_t win = type / 256;
+ uint8_t bit_pos = type % 256;
+ uint8_t byte_pos = bit_pos / 8;
+
+ ((s->windows[win]).bitmap)[byte_pos] |= 128 >> (bit_pos % 8);
+
+ if ((s->windows[win]).length < byte_pos + 1) {
+ (s->windows[win]).length = byte_pos + 1;
+ }
+
+ if (s->last_window < win) {
+ s->last_window = win;
+ }
+}
+
+// Include scanner file (in Ragel).
+%%{
+ machine zone_scanner;
+
+ include "scanner_body.rl";
+
+ write data;
+}%%
+
+__attribute__((visibility("default")))
+int zs_init(
+ zs_scanner_t *s,
+ const char *origin,
+ const uint16_t rclass,
+ const uint32_t ttl)
+{
+ if (s == NULL) {
+ return -1;
+ }
+
+ memset(s, 0, sizeof(*s));
+
+ // Nonzero initial scanner state.
+ s->cs = %%{ write start; }%%;
+
+ // Reset the file descriptor.
+ s->file.descriptor = -1;
+
+ // Use the root zone as origin if not specified.
+ if (origin == NULL || strlen(origin) == 0) {
+ origin = ".";
+ }
+ size_t origin_len = strlen(origin);
+
+ // Prepare a zone settings header.
+ const char *format;
+ if (origin[origin_len - 1] != '.') {
+ format = "$ORIGIN %s.\n";
+ } else {
+ format = "$ORIGIN %s\n";
+ }
+
+ char settings[1024];
+ int ret = snprintf(settings, sizeof(settings), format, origin);
+ if (ret <= 0 || ret >= sizeof(settings)) {
+ ERR(ZS_ENOMEM);
+ return -1;
+ }
+
+ // Parse the settings to set up the scanner origin.
+ if (zs_set_input_string(s, settings, ret) != 0 ||
+ zs_parse_all(s) != 0) {
+ return -1;
+ }
+
+ // Set scanner defaults.
+ s->path = strdup(".");
+ if (s->path == NULL) {
+ ERR(ZS_ENOMEM);
+ return -1;
+ }
+ s->default_class = rclass;
+ s->default_ttl = ttl;
+ s->line_counter = 1;
+
+ s->state = ZS_STATE_NONE;
+ s->process.automatic = false;
+
+ return 0;
+}
+
+static void input_deinit(
+ zs_scanner_t *s,
+ bool keep_filename)
+{
+ // Deinit the file input.
+ if (s->file.descriptor != -1) {
+ // Unmap the file content.
+ if (s->input.start != NULL) {
+ if (s->input.mmaped) {
+ munmap((void *)s->input.start,
+ s->input.end - s->input.start);
+ } else {
+ free((void *)s->input.start);
+ }
+ }
+
+ // Close the opened file.
+ close(s->file.descriptor);
+ s->file.descriptor = -1;
+ }
+
+ // Keep file name for possible trailing error report.
+ if (!keep_filename) {
+ free(s->file.name);
+ s->file.name = NULL;
+ }
+
+ // Unset the input limits.
+ s->input.start = NULL;
+ s->input.current = NULL;
+ s->input.end = NULL;
+ s->input.eof = false;
+}
+
+__attribute__((visibility("default")))
+void zs_deinit(
+ zs_scanner_t *s)
+{
+ if (s == NULL) {
+ return;
+ }
+
+ input_deinit(s, false);
+ free(s->path);
+}
+
+static int set_input_string(
+ zs_scanner_t *s,
+ const char *input,
+ size_t size,
+ bool final_block)
+{
+ if (s == NULL) {
+ return -1;
+ }
+
+ if (input == NULL) {
+ ERR(ZS_EINVAL);
+ return -1;
+ }
+
+ // Deinit possibly opened file.
+ input_deinit(s, final_block);
+
+ // Set the scanner input limits.
+ s->input.start = input;
+ s->input.current = input;
+ s->input.end = input + size;
+ s->input.eof = final_block;
+
+ return 0;
+}
+
+static char *read_file_to_buf(
+ int fd,
+ size_t *bufsize)
+{
+ size_t bufs = 0, newbufs = 8192;
+ char *buf = malloc(bufs + newbufs);
+ int ret = 0;
+
+ while (buf != NULL && (ret = read(fd, buf + bufs, newbufs)) == newbufs) {
+ bufs += newbufs;
+ newbufs = bufs;
+ char *newbuf = realloc(buf, bufs + newbufs);
+ if (newbuf == NULL) {
+ free(buf);
+ }
+ buf = newbuf;
+ }
+ if (ret < 0) {
+ free(buf);
+ return NULL;
+ }
+
+ *bufsize = bufs + ret;
+ return buf;
+}
+
+__attribute__((visibility("default")))
+int zs_set_input_string(
+ zs_scanner_t *s,
+ const char *input,
+ size_t size)
+{
+ s->state = ZS_STATE_NONE;
+
+ return set_input_string(s, input, size, false);
+}
+
+__attribute__((visibility("default")))
+int zs_set_input_file(
+ zs_scanner_t *s,
+ const char *file_name)
+{
+ if (s == NULL) {
+ return -1;
+ }
+
+ if (file_name == NULL) {
+ ERR(ZS_EINVAL);
+ return -1;
+ }
+
+ // Deinit possibly opened file.
+ input_deinit(s, false);
+
+ // Try to open the file.
+ s->file.descriptor = open(file_name, O_RDONLY);
+ if (s->file.descriptor == -1) {
+ ERR(ZS_FILE_OPEN);
+ return -1;
+ }
+
+ char *start = NULL;
+ size_t size = 0;
+
+ // Check the input.
+ struct stat file_stat;
+ if (fstat(s->file.descriptor, &file_stat) == -1) {
+ ERR(ZS_FILE_INVALID);
+ input_deinit(s, false);
+ return -1;
+ } else if (S_ISCHR(file_stat.st_mode) ||
+ S_ISBLK(file_stat.st_mode) ||
+ S_ISFIFO(file_stat.st_mode)) {
+ // Workaround if cannot mmap, read to memory.
+ start = read_file_to_buf(s->file.descriptor, &size);
+ if (start == NULL) {
+ ERR(ZS_FILE_INVALID);
+ input_deinit(s, false);
+ return -1;
+ }
+ } else if (!S_ISREG(file_stat.st_mode)) { // Require regular file.
+ ERR(ZS_FILE_INVALID);
+ input_deinit(s, false);
+ return -1;
+ } else if (file_stat.st_size > 0) { // Mmap non-emtpy file.
+ start = mmap(0, file_stat.st_size, PROT_READ, MAP_SHARED,
+ s->file.descriptor, 0);
+ if (start == MAP_FAILED) {
+ ERR(ZS_FILE_INVALID);
+ input_deinit(s, false);
+ return -1;
+ }
+
+ size = file_stat.st_size;
+ s->input.mmaped = true;
+
+ // Try to set the mapped memory advise to sequential.
+ (void)madvise(start, size, MADV_SEQUENTIAL);
+ }
+
+ // Set the scanner input limits.
+ s->input.start = start;
+ s->input.current = start;
+ s->input.end = start + size;
+
+ // Get absolute path of the zone file if possible.
+ char *full_name = realpath(file_name, NULL);
+ if (full_name != NULL) {
+ free(s->path);
+ s->path = strdup(dirname(full_name));
+ free(full_name);
+ if (s->path == NULL) {
+ ERR(ZS_ENOMEM);
+ input_deinit(s, false);
+ return -1;
+ }
+ }
+
+ s->file.name = strdup(file_name);
+ if (s->file.name == NULL) {
+ ERR(ZS_ENOMEM);
+ input_deinit(s, false);
+ return -1;
+ }
+
+ s->state = ZS_STATE_NONE;
+
+ return 0;
+}
+
+__attribute__((visibility("default")))
+int zs_set_processing(
+ zs_scanner_t *s,
+ void (*process_record)(zs_scanner_t *),
+ void (*process_error)(zs_scanner_t *),
+ void *data)
+{
+ if (s == NULL) {
+ return -1;
+ }
+
+ s->process.record = process_record;
+ s->process.error = process_error;
+ s->process.data = data;
+
+ return 0;
+}
+
+typedef enum {
+ WRAP_NONE, // Initial state.
+ WRAP_DETECTED, // Input block end is a first '\' in rdata.
+ WRAP_PROCESS // Parsing of auxiliary block = "\".
+} wrap_t;
+
+static void parse(
+ zs_scanner_t *s,
+ wrap_t *wrap)
+{
+ // Restore scanner input limits (Ragel internals).
+ const char *p = s->input.current;
+ const char *pe = s->input.end;
+ const char *eof = s->input.eof ? pe : NULL;
+
+ // Restore state variables (Ragel internals).
+ int cs = s->cs;
+ int top = s->top;
+ int stack[ZS_RAGEL_STACK_SIZE];
+ memcpy(stack, s->stack, sizeof(stack));
+
+ // Next 2 variables are for better performance.
+ // Restoring r_data pointer to next free space.
+ uint8_t *rdata_tail = s->r_data + s->r_data_tail;
+ // Initialization of the last r_data byte.
+ uint8_t *rdata_stop = s->r_data + ZS_MAX_RDATA_LENGTH - 1;
+
+ // Write scanner body (in C).
+ %% write exec;
+
+ // Check if the scanner state machine is in an uncovered state.
+ bool extra_error = false;
+ if (cs == %%{ write error; }%%) {
+ ERR(ZS_UNCOVERED_STATE);
+ extra_error = true;
+ // Check for an unclosed multiline record.
+ } else if (s->input.eof && s->multiline) {
+ ERR(ZS_UNCLOSED_MULTILINE);
+ extra_error = true;
+ }
+
+ // Treat the extra error.
+ if (extra_error) {
+ s->error.counter++;
+ s->state = ZS_STATE_ERROR;
+
+ // Copy the error context just for the part of the current line.
+ s->buffer_length = 0;
+ while (p < pe && *p != '\n' && s->buffer_length < 50) {
+ s->buffer[s->buffer_length++] = *p++;
+ }
+ s->buffer[s->buffer_length++] = 0;
+
+ // Execute the error callback.
+ if (s->process.automatic && s->process.error != NULL) {
+ s->process.error(s);
+ }
+
+ return;
+ }
+
+ // Storing scanner states.
+ s->cs = cs;
+ s->top = top;
+ memcpy(s->stack, stack, sizeof(stack));
+
+ // Store the current parser position.
+ s->input.current = p;
+
+ // Storing r_data pointer.
+ s->r_data_tail = rdata_tail - s->r_data;
+
+ if (*wrap == WRAP_DETECTED) {
+ if (set_input_string(s, "\\", 1, true) != 0) {
+ return;
+ }
+
+ *wrap = WRAP_PROCESS;
+ parse(s, wrap);
+ } else {
+ *wrap = WRAP_NONE;
+ }
+}
+
+__attribute__((visibility("default")))
+int zs_parse_record(
+ zs_scanner_t *s)
+{
+ if (s == NULL) {
+ return -1;
+ }
+
+ // Check if parsing is possible.
+ switch (s->state) {
+ case ZS_STATE_NONE:
+ case ZS_STATE_DATA:
+ case ZS_STATE_INCLUDE:
+ break;
+ case ZS_STATE_ERROR:
+ if (s->error.fatal) {
+ return -1;
+ }
+ break;
+ default:
+ // Return if stop or end of file.
+ return 0;
+ }
+
+ // Check for the end of the input.
+ if (s->input.current != s->input.end) {
+ // Try to parse another item.
+ s->state = ZS_STATE_NONE;
+ wrap_t wrap = WRAP_NONE;
+ parse(s, &wrap);
+
+ // Finish if nothing was parsed.
+ if (s->state == ZS_STATE_NONE) {
+ // Parse the final block.
+ if (set_input_string(s, "\n", 1, true) != 0) {
+ return -1;
+ }
+ parse(s, &wrap);
+ if (s->state == ZS_STATE_NONE) {
+ s->state = ZS_STATE_EOF;
+ }
+ }
+ } else {
+ s->state = ZS_STATE_EOF;
+ }
+
+ return 0;
+}
+
+__attribute__((visibility("default")))
+int zs_parse_all(
+ zs_scanner_t *s)
+{
+ if (s == NULL) {
+ return -1;
+ }
+
+ s->process.automatic = true;
+
+ // Parse input block.
+ wrap_t wrap = WRAP_NONE;
+ parse(s, &wrap);
+
+ // Parse trailing newline-char block if it makes sense.
+ if (s->state != ZS_STATE_STOP && !s->error.fatal) {
+ if (set_input_string(s, "\n", 1, true) != 0) {
+ return -1;
+ }
+ parse(s, &wrap);
+ }
+
+ // Check if any errors have occurred.
+ if (s->error.counter > 0) {
+ return -1;
+ }
+
+ return 0;
+}