diff options
Diffstat (limited to '')
-rw-r--r-- | test/drive_data_scanner.cc | 310 |
1 files changed, 310 insertions, 0 deletions
diff --git a/test/drive_data_scanner.cc b/test/drive_data_scanner.cc new file mode 100644 index 0000000..a8679c6 --- /dev/null +++ b/test/drive_data_scanner.cc @@ -0,0 +1,310 @@ +/** + * Copyright (c) 2007-2012, Timothy Stack + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of Timothy Stack nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef __CYGWIN__ +# include <alloca.h> +#endif + +#include <fstream> +#include <iostream> + +#include <stdio.h> +#include <stdlib.h> + +#include "base/injector.hh" +#include "config.h" +#include "data_parser.hh" +#include "data_scanner.hh" +#include "elem_to_json.hh" +#include "log_format.hh" +#include "log_format_loader.hh" +#include "logfile.hh" +#include "pretty_printer.hh" +#include "shared_buffer.hh" +#include "view_curses.hh" + +const char* TMP_NAME = "scanned.tmp"; + +int +main(int argc, char* argv[]) +{ + int c, retval = EXIT_SUCCESS; + bool prompt = false, is_log = false, pretty_print = false; + bool scanner_details = false; + + { + static auto builtin_formats + = injector::get<std::vector<std::shared_ptr<log_format>>>(); + auto& root_formats = log_format::get_root_formats(); + + log_format::get_root_formats().insert(root_formats.begin(), + builtin_formats.begin(), + builtin_formats.end()); + builtin_formats.clear(); + } + + { + std::vector<ghc::filesystem::path> paths; + std::vector<lnav::console::user_message> errors; + + load_formats(paths, errors); + } + + while ((c = getopt(argc, argv, "pPls")) != -1) { + switch (c) { + case 'p': + prompt = true; + break; + + case 'P': + pretty_print = true; + break; + + case 'l': + is_log = true; + break; + + case 's': + scanner_details = true; + break; + + default: + retval = EXIT_FAILURE; + break; + } + } + + argc -= optind; + argv += optind; + + if (retval != EXIT_SUCCESS) { + } else if (argc < 1) { + fprintf(stderr, "error: expecting file name argument(s)\n"); + retval = EXIT_FAILURE; + } else { + for (int lpc = 0; lpc < argc; lpc++) { + std::unique_ptr<std::ifstream> in_ptr; + std::istream* in; + FILE* out; + + if (strcmp(argv[lpc], "-") == 0) { + in = &std::cin; + } else { + auto ifs = std::make_unique<std::ifstream>(argv[lpc]); + + if (!ifs->is_open()) { + fprintf(stderr, "error: unable to open file\n"); + retval = EXIT_FAILURE; + } else { + in_ptr = std::move(ifs); + in = in_ptr.get(); + } + } + + if ((out = fopen(TMP_NAME, "w")) == nullptr) { + fprintf(stderr, + "error: unable to temporary file for writing\n"); + retval = EXIT_FAILURE; + } else { + std::shared_ptr<log_format> format; + char* log_line; + bool found = false; + char cmd[2048]; + std::string line; + int rc; + + getline(*in, line); + if (strcmp(argv[lpc], "-") == 0) { + line = " " + line; + } + + log_line = (char*) alloca(line.length()); + strcpy(log_line, &line[13]); + auto sub_line = line.substr(13); + struct line_range body(0, sub_line.length()); + shared_buffer share_manager; + logline_value_vector ll_values; + auto& sbr = ll_values.lvv_sbr; + + sbr.share( + share_manager, (char*) sub_line.c_str(), sub_line.size()); + + auto& root_formats = log_format::get_root_formats(); + std::vector<std::shared_ptr<log_format>>::iterator iter; + std::vector<logline> index; + + if (is_log) { + logfile_open_options loo; + auto open_res = logfile::open(argv[lpc], loo); + auto lf = open_res.unwrap(); + ArenaAlloc::Alloc<char> allocator; + scan_batch_context sbc{allocator}; + for (iter = root_formats.begin(); + iter != root_formats.end() && !found; + ++iter) + { + line_info li = {{13}}; + + (*iter)->clear(); + if ((*iter)->scan(*lf, index, li, sbr, sbc) + == log_format::SCAN_MATCH) + { + format = (*iter)->specialized(); + found = true; + } + } + + if (!found) { + fprintf(stderr, "error: log sample does not match\n"); + return EXIT_FAILURE; + } + } + + string_attrs_t sa; + + if (format.get() != nullptr) { + format->annotate(0, sa, ll_values); + body = find_string_attr_range(sa, &SA_BODY); + } + + data_parser::TRACE_FILE = fopen("scanned.dpt", "w"); + + data_scanner ds(sub_line, body.lr_start); + + if (scanner_details) { + fprintf(out, + " %s\n", + ds.get_input().to_string().c_str()); + while (true) { + auto tok_res = ds.tokenize2(); + + if (!tok_res) { + break; + } + + fprintf(out, + "%4s %3d:%-3d ", + data_scanner::token2name(tok_res->tr_token), + tok_res->tr_capture.c_begin, + tok_res->tr_capture.c_end); + size_t cap_index = 0; + for (; cap_index < tok_res->tr_capture.c_end; + cap_index++) + { + if (cap_index == tok_res->tr_capture.c_begin) { + fputc('^', out); + } else if (cap_index + == (tok_res->tr_capture.c_end - 1)) + { + fputc('^', out); + } else if (cap_index > tok_res->tr_capture.c_begin) + { + fputc('-', out); + } else { + fputc(' ', out); + } + } + for (; cap_index < (int) ds.get_input().length(); + cap_index++) + { + fputc(' ', out); + } + + auto sub = tok_res->to_string(); + fprintf(out, " %s\n", sub.c_str()); + } + } + + ds.reset(); + data_parser dp(&ds); + std::string msg_format; + + dp.dp_msg_format = &msg_format; + dp.parse(); + dp.print(out, dp.dp_pairs); + fprintf( + out, "msg :%s\n", sub_line.c_str() + body.lr_start); + fprintf(out, "format :%s\n", msg_format.c_str()); + + if (pretty_print) { + data_scanner ds2(sub_line, body.lr_start); + pretty_printer pp(&ds2, sa); + attr_line_t pretty_out; + + pp.append_to(pretty_out); + fprintf(out, "\n--\n%s", pretty_out.get_string().c_str()); + } + + auto_mem<yajl_gen_t> gen(yajl_gen_free); + + gen = yajl_gen_alloc(nullptr); + yajl_gen_config(gen.in(), yajl_gen_beautify, true); + + elements_to_json(gen, dp, &dp.dp_pairs); + + const unsigned char* buf; + size_t len; + + yajl_gen_get_buf(gen, &buf, &len); + fwrite(buf, 1, len, out); + + fclose(out); + + sprintf(cmd, "diff -u %s %s", argv[lpc], TMP_NAME); + rc = system(cmd); + if (rc != 0) { + if (prompt) { + char resp[4]; + + printf("\nOriginal line:\n%s\n", + sub_line.c_str() + body.lr_start); + printf( + "Would you like to update the original file? " + "(y/N) "); + fflush(stdout); + log_perror(scanf("%3s", resp)); + if (strcasecmp(resp, "y") == 0) { + rename(TMP_NAME, argv[lpc]); + } else { + retval = EXIT_FAILURE; + } + } else { + fprintf(stderr, "error: mismatch\n"); + retval = EXIT_FAILURE; + } + } + + fclose(data_parser::TRACE_FILE); + data_parser::TRACE_FILE = nullptr; + } + } + } + + return retval; +} |