diff options
Diffstat (limited to '')
-rw-r--r-- | src/base/ansi_scrubber.cc | 388 |
1 files changed, 388 insertions, 0 deletions
diff --git a/src/base/ansi_scrubber.cc b/src/base/ansi_scrubber.cc new file mode 100644 index 0000000..26ae070 --- /dev/null +++ b/src/base/ansi_scrubber.cc @@ -0,0 +1,388 @@ +/** + * Copyright (c) 2013, Timothy Stack + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of Timothy Stack nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * @file ansi_scrubber.cc + */ + +#include <algorithm> + +#include "ansi_scrubber.hh" + +#include "base/opt_util.hh" +#include "config.h" +#include "pcrepp/pcre2pp.hh" +#include "scn/scn.h" +#include "view_curses.hh" + +static const lnav::pcre2pp::code& +ansi_regex() +{ + static const auto retval = lnav::pcre2pp::code::from_const( + "\x1b\\[([\\d=;\\?]*)([a-zA-Z])|(?:\\X\x08\\X)+"); + + return retval; +} + +size_t +erase_ansi_escapes(string_fragment input) +{ + static thread_local auto md = lnav::pcre2pp::match_data::unitialized(); + + const auto& regex = ansi_regex(); + nonstd::optional<int> move_start; + size_t fill_index = 0; + + auto matcher = regex.capture_from(input).into(md); + while (true) { + auto match_res = matcher.matches(PCRE2_NO_UTF_CHECK); + + if (match_res.is<lnav::pcre2pp::matcher::not_found>()) { + break; + } + if (match_res.is<lnav::pcre2pp::matcher::error>()) { + log_error("ansi scrub regex failure"); + break; + } + + auto sf = md[0].value(); + auto bs_index_res = sf.codepoint_to_byte_index(1); + + if (move_start) { + auto move_len = sf.sf_begin - move_start.value(); + memmove(input.writable_data(fill_index), + input.data() + move_start.value(), + move_len); + fill_index += move_len; + } else { + fill_index = sf.sf_begin; + } + + if (sf.length() >= 3 && bs_index_res.isOk() + && sf[bs_index_res.unwrap()] == '\b') + { + static const auto OVERSTRIKE_RE + = lnav::pcre2pp::code::from_const(R"((\X)\x08(\X))"); + + auto loop_res = OVERSTRIKE_RE.capture_from(sf).for_each( + [&fill_index, &input](lnav::pcre2pp::match_data& over_md) { + auto lhs = over_md[1].value(); + if (lhs == "_") { + auto rhs = over_md[2].value(); + memmove(input.writable_data(fill_index), + rhs.data(), + rhs.length()); + fill_index += rhs.length(); + } else { + memmove(input.writable_data(fill_index), + lhs.data(), + lhs.length()); + fill_index += lhs.length(); + } + }); + } + move_start = md.remaining().sf_begin; + } + + memmove(input.writable_data(fill_index), + md.remaining().data(), + md.remaining().length()); + fill_index += md.remaining().length(); + + return fill_index; +} + +void +scrub_ansi_string(std::string& str, string_attrs_t* sa) +{ + static thread_local auto md = lnav::pcre2pp::match_data::unitialized(); + const auto& regex = ansi_regex(); + int64_t origin_offset = 0; + int last_origin_offset_end = 0; + + replace(str.begin(), str.end(), '\0', ' '); + auto matcher = regex.capture_from(str).into(md); + while (true) { + auto match_res = matcher.matches(PCRE2_NO_UTF_CHECK); + + if (match_res.is<lnav::pcre2pp::matcher::not_found>()) { + break; + } + if (match_res.is<lnav::pcre2pp::matcher::error>()) { + log_error("ansi scrub regex failure"); + break; + } + + const auto sf = md[0].value(); + auto bs_index_res = sf.codepoint_to_byte_index(1); + + if (sf.length() >= 3 && bs_index_res.isOk() + && sf[bs_index_res.unwrap()] == '\b') + { + ssize_t fill_index = sf.sf_begin; + line_range bold_range; + line_range ul_range; + auto sub_sf = sf; + + while (!sub_sf.empty()) { + auto lhs_opt = sub_sf.consume_codepoint(); + if (!lhs_opt) { + return; + } + auto lhs_pair = lhs_opt.value(); + auto mid_opt = lhs_pair.second.consume_codepoint(); + if (!mid_opt) { + return; + } + auto mid_pair = mid_opt.value(); + auto rhs_opt = mid_pair.second.consume_codepoint(); + if (!rhs_opt) { + return; + } + auto rhs_pair = rhs_opt.value(); + sub_sf = rhs_pair.second; + + if (lhs_pair.first == '_' || rhs_pair.first == '_') { + if (sa != nullptr && bold_range.is_valid()) { + sa->emplace_back(bold_range, + VC_STYLE.value(text_attrs{A_BOLD})); + bold_range.clear(); + } + if (ul_range.is_valid()) { + ul_range.lr_end += 1; + } else { + ul_range.lr_start = fill_index; + ul_range.lr_end = fill_index + 1; + } + auto cp = lhs_pair.first == '_' ? rhs_pair.first + : lhs_pair.first; + ww898::utf::utf8::write(cp, [&str, &fill_index](auto ch) { + str[fill_index++] = ch; + }); + } else { + if (sa != nullptr && ul_range.is_valid()) { + sa->emplace_back( + ul_range, VC_STYLE.value(text_attrs{A_UNDERLINE})); + ul_range.clear(); + } + if (bold_range.is_valid()) { + bold_range.lr_end += 1; + } else { + bold_range.lr_start = fill_index; + bold_range.lr_end = fill_index + 1; + } + try { + ww898::utf::utf8::write(lhs_pair.first, + [&str, &fill_index](auto ch) { + str[fill_index++] = ch; + }); + } catch (const std::runtime_error& e) { + log_error("invalid UTF-8 at %d", sf.sf_begin); + return; + } + } + } + + auto output_size = fill_index - sf.sf_begin; + auto erased_size = sf.length() - output_size; + + if (sa != nullptr) { +#if 0 + shift_string_attrs( + *sa, caps->c_begin + sf.length() / 3, -erased_size); +#endif + sa->emplace_back(line_range{last_origin_offset_end, + sf.sf_begin + (int) output_size}, + SA_ORIGIN_OFFSET.value(origin_offset)); + } + + if (sa != nullptr && ul_range.is_valid()) { + sa->emplace_back(ul_range, + VC_STYLE.value(text_attrs{A_UNDERLINE})); + ul_range.clear(); + } + if (sa != nullptr && bold_range.is_valid()) { + sa->emplace_back(bold_range, + VC_STYLE.value(text_attrs{A_BOLD})); + bold_range.clear(); + } + + str.erase(str.begin() + fill_index, str.begin() + sf.sf_end); + last_origin_offset_end = sf.sf_begin + output_size; + origin_offset += erased_size; + matcher.reload_input(str, last_origin_offset_end); + continue; + } + + auto seq = md[1].value(); + auto terminator = md[2].value(); + struct line_range lr; + bool has_attrs = false; + text_attrs attrs; + auto role = nonstd::optional<role_t>(); + size_t lpc; + + switch (terminator[0]) { + case 'm': + for (lpc = seq.sf_begin; + lpc != std::string::npos && lpc < (size_t) seq.sf_end;) + { + auto ansi_code_res = scn::scan_value<int>( + scn::string_view{&str[lpc], &str[seq.sf_end]}); + + if (ansi_code_res) { + auto ansi_code = ansi_code_res.value(); + if (90 <= ansi_code && ansi_code <= 97) { + ansi_code -= 60; + attrs.ta_attrs |= A_STANDOUT; + } + if (30 <= ansi_code && ansi_code <= 37) { + attrs.ta_fg_color = ansi_code - 30; + } + if (40 <= ansi_code && ansi_code <= 47) { + attrs.ta_bg_color = ansi_code - 40; + } + switch (ansi_code) { + case 1: + attrs.ta_attrs |= A_BOLD; + break; + + case 2: + attrs.ta_attrs |= A_DIM; + break; + + case 4: + attrs.ta_attrs |= A_UNDERLINE; + break; + + case 7: + attrs.ta_attrs |= A_REVERSE; + break; + } + } + lpc = str.find(';', lpc); + if (lpc != std::string::npos) { + lpc += 1; + } + } + has_attrs = true; + break; + + case 'C': { + auto spaces_res + = scn::scan_value<unsigned int>(seq.to_string_view()); + + if (spaces_res && spaces_res.value() > 0) { + str.insert((std::string::size_type) sf.sf_end, + spaces_res.value(), + ' '); + } + break; + } + + case 'H': { + unsigned int row = 0, spaces = 0; + + if (scn::scan(seq.to_string_view(), "{};{}", row, spaces) + && spaces > 1) + { + int ispaces = spaces - 1; + if (ispaces > sf.sf_begin) { + str.insert((unsigned long) sf.sf_end, + ispaces - sf.sf_begin, + ' '); + } + } + break; + } + + case 'O': { + auto role_res = scn::scan_value<int>(seq.to_string_view()); + + if (role_res) { + role_t role_tmp = (role_t) role_res.value(); + if (role_tmp > role_t::VCR_NONE + && role_tmp < role_t::VCR__MAX) + { + role = role_tmp; + has_attrs = true; + } + } + break; + } + } + str.erase(str.begin() + sf.sf_begin, str.begin() + sf.sf_end); + if (sa != nullptr) { + shift_string_attrs(*sa, sf.sf_begin, -sf.length()); + + if (has_attrs) { + for (auto rit = sa->rbegin(); rit != sa->rend(); rit++) { + if (rit->sa_range.lr_end != -1) { + continue; + } + rit->sa_range.lr_end = sf.sf_begin; + } + lr.lr_start = sf.sf_begin; + lr.lr_end = -1; + if (attrs.ta_attrs || attrs.ta_fg_color || attrs.ta_bg_color) { + sa->emplace_back(lr, VC_STYLE.value(attrs)); + } + role | [&lr, &sa](role_t r) { + sa->emplace_back(lr, VC_ROLE.value(r)); + }; + } + sa->emplace_back(line_range{last_origin_offset_end, sf.sf_begin}, + SA_ORIGIN_OFFSET.value(origin_offset)); + last_origin_offset_end = sf.sf_begin; + origin_offset += sf.length(); + } + + matcher.reload_input(str, sf.sf_begin); + } + + if (sa != nullptr && last_origin_offset_end > 0) { + sa->emplace_back(line_range{last_origin_offset_end, (int) str.size()}, + SA_ORIGIN_OFFSET.value(origin_offset)); + } +} + +void +add_ansi_vars(std::map<std::string, scoped_value_t>& vars) +{ + vars["ansi_csi"] = ANSI_CSI; + vars["ansi_norm"] = ANSI_NORM; + vars["ansi_bold"] = ANSI_BOLD_START; + vars["ansi_underline"] = ANSI_UNDERLINE_START; + vars["ansi_black"] = ANSI_COLOR(COLOR_BLACK); + vars["ansi_red"] = ANSI_COLOR(COLOR_RED); + vars["ansi_green"] = ANSI_COLOR(COLOR_GREEN); + vars["ansi_yellow"] = ANSI_COLOR(COLOR_YELLOW); + vars["ansi_blue"] = ANSI_COLOR(COLOR_BLUE); + vars["ansi_magenta"] = ANSI_COLOR(COLOR_MAGENTA); + vars["ansi_cyan"] = ANSI_COLOR(COLOR_CYAN); + vars["ansi_white"] = ANSI_COLOR(COLOR_WHITE); +} |