diff options
Diffstat (limited to '')
-rw-r--r-- | src/md2attr_line.cc | 635 |
1 files changed, 635 insertions, 0 deletions
diff --git a/src/md2attr_line.cc b/src/md2attr_line.cc new file mode 100644 index 0000000..a208616 --- /dev/null +++ b/src/md2attr_line.cc @@ -0,0 +1,635 @@ +/** + * Copyright (c) 2022, Timothy Stack + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * * Neither the name of Timothy Stack nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "md2attr_line.hh" + +#include "base/attr_line.builder.hh" +#include "base/itertools.hh" +#include "base/lnav_log.hh" +#include "pcrepp/pcre2pp.hh" +#include "pugixml/pugixml.hpp" +#include "readline_highlighters.hh" +#include "view_curses.hh" + +using namespace lnav::roles::literals; + +void +md2attr_line::flush_footnotes() +{ + if (this->ml_footnotes.empty()) { + return; + } + + auto& block_text = this->ml_blocks.back(); + auto longest_foot = this->ml_footnotes + | lnav::itertools::map(&attr_line_t::utf8_length_or_length) + | lnav::itertools::max(0); + size_t index = 1; + + block_text.append("\n"); + for (auto& foot : this->ml_footnotes) { + block_text.append(lnav::string::attrs::preformatted(" ")) + .append("\u258c"_footnote_border) + .append(lnav::roles::footnote_text( + index < 10 && this->ml_footnotes.size() >= 10 ? " " : "")) + .append(lnav::roles::footnote_text( + fmt::format(FMT_STRING("[{}] - "), index))) + .append(foot.pad_to(longest_foot)) + .append("\n"); + index += 1; + } + this->ml_footnotes.clear(); +} + +Result<void, std::string> +md2attr_line::enter_block(const md4cpp::event_handler::block& bl) +{ + if (this->ml_list_stack.empty() + && (bl.is<MD_BLOCK_H_DETAIL*>() || bl.is<block_hr>() + || bl.is<block_p>())) + { + this->flush_footnotes(); + } + + this->ml_blocks.resize(this->ml_blocks.size() + 1); + if (bl.is<MD_BLOCK_OL_DETAIL*>()) { + auto* ol_detail = bl.get<MD_BLOCK_OL_DETAIL*>(); + + this->ml_list_stack.emplace_back(*ol_detail); + } else if (bl.is<MD_BLOCK_UL_DETAIL*>()) { + this->ml_list_stack.emplace_back(bl.get<MD_BLOCK_UL_DETAIL*>()); + } else if (bl.is<MD_BLOCK_TABLE_DETAIL*>()) { + this->ml_tables.resize(this->ml_tables.size() + 1); + } else if (bl.is<block_tr>()) { + this->ml_tables.back().t_rows.resize( + this->ml_tables.back().t_rows.size() + 1); + } else if (bl.is<MD_BLOCK_CODE_DETAIL*>()) { + this->ml_code_depth += 1; + } + + return Ok(); +} + +Result<void, std::string> +md2attr_line::leave_block(const md4cpp::event_handler::block& bl) +{ + auto block_text = std::move(this->ml_blocks.back()); + this->ml_blocks.pop_back(); + + auto& last_block = this->ml_blocks.back(); + if (!endswith(block_text.get_string(), "\n")) { + block_text.append("\n"); + } + if (bl.is<MD_BLOCK_H_DETAIL*>()) { + auto* hbl = bl.get<MD_BLOCK_H_DETAIL*>(); + auto role = role_t::VCR_TEXT; + + switch (hbl->level) { + case 1: + role = role_t::VCR_H1; + break; + case 2: + role = role_t::VCR_H2; + break; + case 3: + role = role_t::VCR_H3; + break; + case 4: + role = role_t::VCR_H4; + break; + case 5: + role = role_t::VCR_H5; + break; + case 6: + role = role_t::VCR_H6; + break; + } + block_text.rtrim().with_attr_for_all(VC_ROLE.value(role)); + last_block.append("\n").append(block_text).append("\n"); + } else if (bl.is<block_hr>()) { + block_text = attr_line_t() + .append(lnav::roles::hr(repeat("\u2501", 70))) + .with_attr_for_all(SA_PREFORMATTED.value()); + last_block.append("\n").append(block_text).append("\n"); + } else if (bl.is<MD_BLOCK_UL_DETAIL*>() || bl.is<MD_BLOCK_OL_DETAIL*>()) { + this->ml_list_stack.pop_back(); + if (last_block.empty()) { + last_block.append("\n"); + } else { + if (!endswith(last_block.get_string(), "\n")) { + last_block.append("\n"); + } + if (this->ml_list_stack.empty() + && !endswith(last_block.get_string(), "\n\n")) + { + last_block.append("\n"); + } + } + last_block.append(block_text); + } else if (bl.is<MD_BLOCK_LI_DETAIL*>()) { + auto last_list_block = this->ml_list_stack.back(); + text_wrap_settings tws = {0, 60}; + + attr_line_builder alb(last_block); + { + auto prefix = alb.with_attr(SA_PREFORMATTED.value()); + + alb.append(" ") + .append(last_list_block.match( + [this, &tws](const MD_BLOCK_UL_DETAIL*) { + tws.tws_indent = 3; + return this->ml_list_stack.size() % 2 == 1 + ? "\u2022"_list_glyph + : "\u2014"_list_glyph; + }, + [this, &tws](MD_BLOCK_OL_DETAIL ol_detail) { + auto retval = lnav::roles::list_glyph( + fmt::format(FMT_STRING("{}{}"), + ol_detail.start, + ol_detail.mark_delimiter)); + tws.tws_indent = retval.first.length() + 2; + + this->ml_list_stack.pop_back(); + ol_detail.start += 1; + this->ml_list_stack.emplace_back(ol_detail); + return retval; + })) + .append(" "); + } + + alb.append(block_text, &tws); + } else if (bl.is<MD_BLOCK_CODE_DETAIL*>()) { + auto* code_detail = bl.get<MD_BLOCK_CODE_DETAIL*>(); + + this->ml_code_depth -= 1; + + auto lang_sf = string_fragment::from_bytes(code_detail->lang.text, + code_detail->lang.size); + if (lang_sf == "lnav") { + readline_lnav_highlighter(block_text, block_text.length()); + } else if (lang_sf == "sql" || lang_sf == "sqlite") { + readline_sqlite_highlighter(block_text, block_text.length()); + } else if (lang_sf == "shell" || lang_sf == "bash") { + readline_shlex_highlighter(block_text, block_text.length()); + } else if (lang_sf == "console" + || lang_sf.iequal( + string_fragment::from_const("shellsession"))) + { + static const auto SH_PROMPT + = lnav::pcre2pp::code::from_const(R"([^\$>#%]*[\$>#%]\s+)"); + + attr_line_t new_block_text; + attr_line_t cmd_block; + int prompt_size = 0; + + for (auto line : block_text.split_lines()) { + if (!cmd_block.empty() + && endswith(cmd_block.get_string(), "\\\n")) + { + cmd_block.append(line).append("\n"); + continue; + } + + if (!cmd_block.empty()) { + readline_shlex_highlighter_int( + cmd_block, + cmd_block.length(), + line_range{prompt_size, (int) cmd_block.length()}); + new_block_text.append(cmd_block); + cmd_block.clear(); + } + + auto sh_find_res + = SH_PROMPT.find_in(line.get_string()).ignore_error(); + + if (sh_find_res) { + prompt_size = sh_find_res->f_all.length(); + line.with_attr(string_attr{ + line_range{0, prompt_size}, + VC_ROLE.value(role_t::VCR_LIST_GLYPH), + }); + cmd_block.append(line).append("\n"); + } else { + line.with_attr_for_all(VC_ROLE.value(role_t::VCR_COMMENT)); + new_block_text.append(line).append("\n"); + } + } + block_text = new_block_text; + } + + auto code_lines = block_text.rtrim().split_lines(); + auto max_width = code_lines + | lnav::itertools::map(&attr_line_t::utf8_length_or_length) + | lnav::itertools::max(0); + attr_line_t padded_text; + + for (auto& line : code_lines) { + line.pad_to(std::max(max_width + 4, ssize_t{40})) + .with_attr_for_all(VC_ROLE.value(role_t::VCR_QUOTED_CODE)); + padded_text.append(lnav::string::attrs::preformatted(" ")) + .append("\u258c"_code_border) + .append(line) + .append("\n"); + } + if (!padded_text.empty()) { + padded_text.with_attr_for_all(SA_PREFORMATTED.value()); + last_block.append("\n").append(padded_text); + } + } else if (bl.is<block_quote>()) { + text_wrap_settings tws = {0, 60}; + attr_line_t wrapped_text; + + wrapped_text.append(block_text.rtrim(), &tws); + auto quoted_lines = wrapped_text.split_lines(); + auto max_width = quoted_lines + | lnav::itertools::map(&attr_line_t::utf8_length_or_length) + | lnav::itertools::max(0); + attr_line_t padded_text; + + for (auto& line : quoted_lines) { + line.pad_to(max_width + 1) + .with_attr_for_all(VC_ROLE.value(role_t::VCR_QUOTED_TEXT)); + padded_text.append(" ") + .append("\u258c"_quote_border) + .append(line) + .append("\n"); + } + if (!padded_text.empty()) { + padded_text.with_attr_for_all(SA_PREFORMATTED.value()); + last_block.append("\n").append(padded_text); + } + } else if (bl.is<MD_BLOCK_TABLE_DETAIL*>()) { + auto* table_detail = bl.get<MD_BLOCK_TABLE_DETAIL*>(); + auto tab = std::move(this->ml_tables.back()); + this->ml_tables.pop_back(); + std::vector<ssize_t> max_col_sizes; + + block_text.clear(); + block_text.append("\n"); + max_col_sizes.resize(table_detail->col_count); + for (size_t lpc = 0; lpc < table_detail->col_count; lpc++) { + if (lpc < tab.t_headers.size()) { + max_col_sizes[lpc] = tab.t_headers[lpc].utf8_length_or_length(); + tab.t_headers[lpc].with_attr_for_all( + VC_ROLE.value(role_t::VCR_TABLE_HEADER)); + } + } + for (const auto& row : tab.t_rows) { + for (size_t lpc = 0; lpc < table_detail->col_count; lpc++) { + if (lpc >= row.r_columns.size()) { + continue; + } + auto col_len = row.r_columns[lpc].utf8_length_or_length(); + if (col_len > max_col_sizes[lpc]) { + max_col_sizes[lpc] = col_len; + } + } + } + auto col_sizes + = max_col_sizes | lnav::itertools::map([](const auto& elem) { + return std::min(elem, ssize_t{50}); + }); + auto full_width = col_sizes | lnav::itertools::sum(); + text_wrap_settings tws = {0, 50}; + std::vector<cell_lines> cells; + size_t max_cell_lines = 0; + for (size_t lpc = 0; lpc < tab.t_headers.size(); lpc++) { + tws.with_width(col_sizes[lpc]); + + attr_line_t td_block; + td_block.append(tab.t_headers[lpc], &tws); + cells.emplace_back(td_block.rtrim().split_lines()); + if (cells.back().cl_lines.size() > max_cell_lines) { + max_cell_lines = cells.back().cl_lines.size(); + } + } + for (size_t line_index = 0; line_index < max_cell_lines; line_index++) { + size_t col = 0; + for (const auto& cell : cells) { + block_text.append(" "); + if (line_index < cell.cl_lines.size()) { + block_text.append(cell.cl_lines[line_index]); + block_text.append( + col_sizes[col] + - cell.cl_lines[line_index].utf8_length_or_length(), + ' '); + } else { + block_text.append(col_sizes[col], ' '); + } + col += 1; + } + block_text.append("\n") + .append(lnav::roles::table_border( + repeat("\u2550", full_width + col_sizes.size()))) + .append("\n"); + } + for (const auto& row : tab.t_rows) { + cells.clear(); + max_cell_lines = 0; + for (size_t lpc = 0; lpc < row.r_columns.size(); lpc++) { + tws.with_width(col_sizes[lpc]); + + attr_line_t td_block; + td_block.append(row.r_columns[lpc], &tws); + cells.emplace_back(td_block.rtrim().split_lines()); + if (cells.back().cl_lines.size() > max_cell_lines) { + max_cell_lines = cells.back().cl_lines.size(); + } + } + for (size_t line_index = 0; line_index < max_cell_lines; + line_index++) + { + size_t col = 0; + for (const auto& cell : cells) { + block_text.append(" "); + if (line_index < cell.cl_lines.size()) { + block_text.append(cell.cl_lines[line_index]); + if (col < col_sizes.size() - 1) { + block_text.append( + col_sizes[col] + - cell.cl_lines[line_index] + .utf8_length_or_length(), + ' '); + } + } else if (col < col_sizes.size() - 1) { + block_text.append(col_sizes[col], ' '); + } + col += 1; + } + block_text.append("\n"); + } + } + if (!block_text.empty()) { + block_text.with_attr_for_all(SA_PREFORMATTED.value()); + last_block.append(block_text); + } + } else if (bl.is<block_th>()) { + this->ml_tables.back().t_headers.push_back(block_text); + } else if (bl.is<MD_BLOCK_TD_DETAIL*>()) { + this->ml_tables.back().t_rows.back().r_columns.push_back(block_text); + } else { + if (bl.is<block_html>()) { + if (startswith(block_text.get_string(), "<!--")) { + return Ok(); + } + } + + text_wrap_settings tws = {0, this->ml_blocks.size() == 1 ? 70 : 10000}; + + if (!last_block.empty()) { + last_block.append("\n"); + } + last_block.append(block_text, &tws); + } + if (bl.is<block_doc>()) { + this->flush_footnotes(); + } + return Ok(); +} + +Result<void, std::string> +md2attr_line::enter_span(const md4cpp::event_handler::span& sp) +{ + auto& last_block = this->ml_blocks.back(); + this->ml_span_starts.push_back(last_block.length()); + if (sp.is<span_code>()) { + last_block.append(" "); + this->ml_code_depth += 1; + } + return Ok(); +} + +Result<void, std::string> +md2attr_line::leave_span(const md4cpp::event_handler::span& sp) +{ + auto& last_block = this->ml_blocks.back(); + if (sp.is<span_code>()) { + this->ml_code_depth -= 1; + last_block.append(" "); + line_range lr{ + static_cast<int>(this->ml_span_starts.back()), + static_cast<int>(last_block.length()), + }; + last_block.with_attr({ + lr, + VC_ROLE.value(role_t::VCR_QUOTED_CODE), + }); + last_block.with_attr({ + lr, + SA_PREFORMATTED.value(), + }); + } else if (sp.is<span_em>()) { + line_range lr{ + static_cast<int>(this->ml_span_starts.back()), + static_cast<int>(last_block.length()), + }; +#if defined(A_ITALIC) + last_block.with_attr({ + lr, + VC_STYLE.value(text_attrs{(int32_t) A_ITALIC}), + }); +#endif + } else if (sp.is<span_strong>()) { + line_range lr{ + static_cast<int>(this->ml_span_starts.back()), + static_cast<int>(last_block.length()), + }; + last_block.with_attr({ + lr, + VC_STYLE.value(text_attrs{A_BOLD}), + }); + } else if (sp.is<span_u>()) { + line_range lr{ + static_cast<int>(this->ml_span_starts.back()), + static_cast<int>(last_block.length()), + }; + last_block.with_attr({ + lr, + VC_STYLE.value(text_attrs{A_UNDERLINE}), + }); + } else if (sp.is<MD_SPAN_A_DETAIL*>()) { + auto* a_detail = sp.get<MD_SPAN_A_DETAIL*>(); + auto href_str = std::string(a_detail->href.text, a_detail->href.size); + + this->append_url_footnote(href_str); + } else if (sp.is<MD_SPAN_IMG_DETAIL*>()) { + auto* img_detail = sp.get<MD_SPAN_IMG_DETAIL*>(); + auto src_str = std::string(img_detail->src.text, img_detail->src.size); + + this->append_url_footnote(src_str); + } + this->ml_span_starts.pop_back(); + return Ok(); +} + +Result<void, std::string> +md2attr_line::text(MD_TEXTTYPE tt, const string_fragment& sf) +{ + static const auto& entity_map = md4cpp::get_xml_entity_map(); + static const auto& vc = view_colors::singleton(); + + auto& last_block = this->ml_blocks.back(); + + switch (tt) { + case MD_TEXT_BR: + last_block.append("\n"); + break; + case MD_TEXT_SOFTBR: { + if (!last_block.empty() && !isspace(last_block.get_string().back())) + { + last_block.append(" "); + } + break; + } + case MD_TEXT_ENTITY: { + auto xe_iter = entity_map.xem_entities.find(sf.to_string()); + + if (xe_iter != entity_map.xem_entities.end()) { + last_block.append(xe_iter->second.xe_chars); + } + break; + } + case MD_TEXT_HTML: { + last_block.append(sf); + if (sf.startswith("<span ")) { + this->ml_html_span_starts.push_back(last_block.length() + - sf.length()); + } else if (sf == "</span>" && !this->ml_html_span_starts.empty()) { + std::string html_span = last_block.get_string().substr( + this->ml_html_span_starts.back()); + + pugi::xml_document doc; + + auto load_res = doc.load_string(html_span.c_str()); + if (load_res) { + auto span = doc.child("span"); + if (span) { + auto styled_span = attr_line_t(span.text().get()); + + auto span_class = span.attribute("class"); + if (span_class) { + auto cl_iter + = vc.vc_class_to_role.find(span_class.value()); + + if (cl_iter == vc.vc_class_to_role.end()) { + log_error("unknown span class: %s", + span_class.value()); + } else { + styled_span.with_attr_for_all(cl_iter->second); + } + } + last_block.erase(this->ml_html_span_starts.back()); + last_block.append(styled_span); + } + } else { + log_error("failed to parse: %s", load_res.description()); + } + this->ml_html_span_starts.pop_back(); + } + break; + } + default: { + static const auto REPL_RE = lnav::pcre2pp::code::from_const( + R"(-{2,3}|:[^:\s]*(?:::[^:\s]*)*:)"); + static const auto& emojis = md4cpp::get_emoji_map(); + + if (this->ml_code_depth > 0) { + last_block.append(sf); + return Ok(); + } + + std::string span_text; + + auto loop_res = REPL_RE.capture_from(sf).for_each( + [&span_text](lnav::pcre2pp::match_data& md) { + span_text += md.leading(); + + auto matched = *md[0]; + + if (matched == "--") { + span_text.append("\u2013"); + } else if (matched == "---") { + span_text.append("\u2014"); + } else if (matched.startswith(":")) { + auto em_iter = emojis.em_shortname2emoji.find( + matched.to_string()); + if (em_iter == emojis.em_shortname2emoji.end()) { + span_text += matched; + } else { + span_text.append(em_iter->second.get().e_value); + } + } + }); + + if (loop_res.isOk()) { + span_text += loop_res.unwrap(); + } else { + log_error("span replacement regex failed: %d", + loop_res.unwrapErr().e_error_code); + } + + text_wrap_settings tws + = {0, this->ml_blocks.size() == 1 ? 70 : 10000}; + + last_block.append(span_text, &tws); + break; + } + } + return Ok(); +} + +void +md2attr_line::append_url_footnote(std::string href_str) +{ + if (startswith(href_str, "#")) { + return; + } + + auto& last_block = this->ml_blocks.back(); + last_block.appendf(FMT_STRING("[{}]"), this->ml_footnotes.size() + 1); + last_block.with_attr(string_attr{ + line_range{ + (int) this->ml_span_starts.back(), + (int) last_block.length(), + }, + VC_STYLE.value(text_attrs{A_UNDERLINE}), + }); + if (this->ml_source_path && href_str.find(':') == std::string::npos) { + auto link_path = ghc::filesystem::absolute( + this->ml_source_path.value().parent_path() / href_str); + + href_str = fmt::format(FMT_STRING("file://{}"), link_path.string()); + } + + auto href + = attr_line_t().append(lnav::roles::hyperlink(href_str)).append(" "); + href.with_attr_for_all(VC_ROLE.value(role_t::VCR_FOOTNOTE_TEXT)); + href.with_attr_for_all(SA_PREFORMATTED.value()); + this->ml_footnotes.emplace_back(href); +} |