diff options
Diffstat (limited to '')
-rw-r--r-- | src/document.sections.cc | 541 |
1 files changed, 461 insertions, 80 deletions
diff --git a/src/document.sections.cc b/src/document.sections.cc index 04eb516..57820fb 100644 --- a/src/document.sections.cc +++ b/src/document.sections.cc @@ -27,6 +27,7 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include <algorithm> #include <utility> #include <vector> @@ -50,6 +51,7 @@ hier_node::lookup_child(section_key_t key) const if (iter != this->hn_named_children.end()) { return iter->second; } + return nullptr; }, [this](size_t index) -> hier_node* { @@ -60,6 +62,117 @@ hier_node::lookup_child(section_key_t key) const })); } +nonstd::optional<size_t> +hier_node::child_index(const hier_node* hn) const +{ + size_t retval = 0; + + for (const auto& child : this->hn_children) { + if (child.get() == hn) { + return retval; + } + retval += 1; + } + + return nonstd::nullopt; +} + +nonstd::optional<hier_node::child_neighbors_result> +hier_node::child_neighbors(const lnav::document::hier_node* hn, + file_off_t offset) const +{ + auto index_opt = this->child_index(hn); + if (!index_opt) { + return nonstd::nullopt; + } + + hier_node::child_neighbors_result retval; + + if (index_opt.value() == 0) { + if (this->hn_parent != nullptr) { + auto parent_neighbors_opt + = this->hn_parent->child_neighbors(this, offset); + + if (parent_neighbors_opt) { + retval.cnr_previous = parent_neighbors_opt->cnr_previous; + } + } else { + retval.cnr_previous = hn; + } + } else { + const auto* prev_hn = this->hn_children[index_opt.value() - 1].get(); + + if (hn->hn_line_number == 0 + || (hn->hn_line_number - prev_hn->hn_line_number) > 1) + { + retval.cnr_previous = prev_hn; + } else if (this->hn_parent != nullptr) { + auto parent_neighbors_opt + = this->hn_parent->child_neighbors(this, offset); + + if (parent_neighbors_opt) { + retval.cnr_previous = parent_neighbors_opt->cnr_previous; + } + } + } + + if (index_opt.value() == this->hn_children.size() - 1) { + if (this->hn_parent != nullptr) { + auto parent_neighbors_opt + = this->hn_parent->child_neighbors(this, offset); + + if (parent_neighbors_opt) { + retval.cnr_next = parent_neighbors_opt->cnr_next; + } + } else if (!hn->hn_children.empty()) { + for (const auto& child : hn->hn_children) { + if (child->hn_start > offset) { + retval.cnr_next = child.get(); + break; + } + } + } + } else { + const auto* next_hn = this->hn_children[index_opt.value() + 1].get(); + + if (next_hn->hn_start > offset + && (hn->hn_line_number == 0 + || (next_hn->hn_line_number - hn->hn_line_number) > 1)) + { + retval.cnr_next = next_hn; + } else if (this->hn_parent != nullptr) { + auto parent_neighbors_opt + = this->hn_parent->child_neighbors(this, offset); + + if (parent_neighbors_opt) { + retval.cnr_next = parent_neighbors_opt->cnr_next; + } + } + } + + return retval; +} + +nonstd::optional<hier_node::child_neighbors_result> +hier_node::line_neighbors(size_t ln) const +{ + if (this->hn_children.empty()) { + return nonstd::nullopt; + } + + hier_node::child_neighbors_result retval; + + for (const auto& child : this->hn_children) { + if (child->hn_line_number > ln) { + retval.cnr_next = child.get(); + break; + } + retval.cnr_previous = child.get(); + } + + return retval; +} + nonstd::optional<const hier_node*> hier_node::lookup_path(const hier_node* root, const std::vector<section_key_t>& path) @@ -81,15 +194,33 @@ hier_node::lookup_path(const hier_node* root, return retval; } +std::vector<section_key_t> +metadata::path_for_range(size_t start, size_t stop) +{ + std::vector<section_key_t> retval; + + this->m_sections_tree.visit_overlapping( + start, stop, [&retval](const lnav::document::section_interval_t& iv) { + retval.emplace_back(iv.value); + }); + return retval; +} + struct metadata_builder { std::vector<section_interval_t> mb_intervals; + std::vector<section_type_interval_t> mb_type_intervals; std::unique_ptr<hier_node> mb_root_node; + std::set<size_t> mb_indents; + text_format_t mb_text_format{text_format_t::TF_UNKNOWN}; metadata to_metadata() && { return { std::move(this->mb_intervals), std::move(this->mb_root_node), + std::move(this->mb_type_intervals), + std::move(this->mb_indents), + this->mb_text_format, }; } }; @@ -169,16 +300,18 @@ discover_metadata_int(const attr_line_t& al, metadata_builder& mb) new_open_intervals.emplace_back(std::move(oi)); } } - auto* parent_node = new_open_intervals.empty() - ? root_node.get() - : new_open_intervals.back().oi_node.get(); - new_open_intervals.emplace_back(role_num, - hdr_attr.sa_range.lr_start, - al.get_substring(hdr_attr.sa_range)); - new_open_intervals.back().oi_node->hn_parent = parent_node; - new_open_intervals.back().oi_node->hn_start - = hdr_attr.sa_range.lr_start; - + if (!hdr_attr.sa_range.empty()) { + auto* parent_node = new_open_intervals.empty() + ? root_node.get() + : new_open_intervals.back().oi_node.get(); + new_open_intervals.emplace_back( + role_num, + hdr_attr.sa_range.lr_start, + al.get_substring(hdr_attr.sa_range)); + new_open_intervals.back().oi_node->hn_parent = parent_node; + new_open_intervals.back().oi_node->hn_start + = hdr_attr.sa_range.lr_start; + } open_intervals = std::move(new_open_intervals); } @@ -210,6 +343,18 @@ discover_metadata_int(const attr_line_t& al, metadata_builder& mb) interval.stop += stop_off_iter->sa_value.get<int64_t>(); } } + for (auto& interval : mb.mb_type_intervals) { + auto start_off_iter = find_string_attr_containing( + orig_attrs, &SA_ORIGIN_OFFSET, interval.start); + if (start_off_iter != orig_attrs.end()) { + interval.start += start_off_iter->sa_value.get<int64_t>(); + } + auto stop_off_iter = find_string_attr_containing( + orig_attrs, &SA_ORIGIN_OFFSET, interval.stop - 1); + if (stop_off_iter != orig_attrs.end()) { + interval.stop += stop_off_iter->sa_value.get<int64_t>(); + } + } hier_node::depth_first(root_node.get(), [&orig_attrs](hier_node* node) { auto off_opt @@ -220,6 +365,16 @@ discover_metadata_int(const attr_line_t& al, metadata_builder& mb) } }); + hier_node::depth_first( + mb.mb_root_node.get(), [&orig_attrs](hier_node* node) { + auto off_opt = get_string_attr( + orig_attrs, &SA_ORIGIN_OFFSET, node->hn_start); + + if (off_opt) { + node->hn_start += off_opt.value()->sa_value.get<int64_t>(); + } + }); + if (!root_node->hn_children.empty() || !root_node->hn_named_children.empty()) { @@ -239,8 +394,8 @@ discover_metadata(const attr_line_t& al) class structure_walker { public: - explicit structure_walker(attr_line_t& al, line_range lr) - : sw_line(al), sw_range(lr), + explicit structure_walker(attr_line_t& al, line_range lr, text_format_t tf) + : sw_line(al), sw_range(lr), sw_text_format(tf), sw_scanner(string_fragment::from_str_range( al.get_string(), lr.lr_start, lr.lr_end)) { @@ -248,25 +403,62 @@ public: this->sw_hier_nodes.push_back(std::make_unique<hier_node>()); } + bool is_structured_text() const + { + switch (this->sw_text_format) { + case text_format_t::TF_JSON: + case text_format_t::TF_YAML: + case text_format_t::TF_TOML: + case text_format_t::TF_LOG: + case text_format_t::TF_UNKNOWN: + return true; + default: + return false; + } + } + metadata walk() { metadata_builder mb; - size_t garbage_count = 0; - while (garbage_count < 1000) { - auto tokenize_res = this->sw_scanner.tokenize2(); + mb.mb_text_format = this->sw_text_format; + while (true) { + auto tokenize_res + = this->sw_scanner.tokenize2(this->sw_text_format); if (!tokenize_res) { break; } auto dt = tokenize_res->tr_token; - element el(tokenize_res->tr_token, tokenize_res->tr_capture); + element el(dt, tokenize_res->tr_capture); + const auto& inner_cap = tokenize_res->tr_inner_capture; + +#if 0 + printf("tok %s %s\n", + data_scanner::token2name(dt), + tokenize_res->to_string().c_str()); +#endif + if (dt != DT_WHITE) { + this->sw_at_start = false; + } switch (dt) { case DT_XML_DECL_TAG: case DT_XML_EMPTY_TAG: this->sw_values.emplace_back(el); break; + case DT_COMMENT: + this->sw_type_intervals.emplace_back( + el.e_capture.c_begin, + el.e_capture.c_end, + section_types_t::comment); + this->sw_line.get_attrs().emplace_back( + line_range{ + this->sw_range.lr_start + el.e_capture.c_begin, + this->sw_range.lr_start + el.e_capture.c_end, + }, + VC_ROLE.value(role_t::VCR_COMMENT)); + break; case DT_XML_OPEN_TAG: this->flush_values(); this->sw_interval_state.back().is_start @@ -274,22 +466,32 @@ public: this->sw_interval_state.back().is_line_number = this->sw_line_number; this->sw_interval_state.back().is_name - = tokenize_res->to_string(); + = tokenize_res->to_string_fragment() + .to_unquoted_string(); this->sw_depth += 1; this->sw_interval_state.resize(this->sw_depth + 1); this->sw_hier_nodes.push_back( std::make_unique<hier_node>()); + this->sw_container_tokens.push_back(to_closer(dt)); break; case DT_XML_CLOSE_TAG: { auto term = this->flush_values(); if (this->sw_depth > 0) { - if (term) { - this->append_child_node(term); - } - this->sw_interval_state.pop_back(); - this->sw_hier_stage - = std::move(this->sw_hier_nodes.back()); - this->sw_hier_nodes.pop_back(); + auto found = false; + do { + if (this->sw_container_tokens.back() == dt) { + found = true; + } + if (term) { + this->append_child_node(term); + term = nonstd::nullopt; + } + this->sw_interval_state.pop_back(); + this->sw_hier_stage + = std::move(this->sw_hier_nodes.back()); + this->sw_hier_nodes.pop_back(); + this->sw_container_tokens.pop_back(); + } while (!found); } this->append_child_node(el.e_capture); if (this->sw_depth > 0) { @@ -301,77 +503,196 @@ public: case DT_H1: { this->sw_line.get_attrs().emplace_back( line_range{ - this->sw_range.lr_start + el.e_capture.c_begin + 1, - this->sw_range.lr_start + el.e_capture.c_end - 1, + this->sw_range.lr_start + inner_cap.c_begin, + this->sw_range.lr_start + inner_cap.c_end, + }, + VC_ROLE.value(role_t::VCR_H1)); + this->sw_line_number += 1; + break; + } + case DT_DIFF_FILE_HEADER: { + auto sf = this->sw_scanner.to_string_fragment(inner_cap); + auto split_res = sf.split_pair(string_fragment::tag1{'\n'}); + auto file1 = split_res->first.consume_n(4).value(); + auto file2 = split_res->second.consume_n(4).value(); + if ((file1 == "/dev/null" || file1.startswith("a/")) + && file2.startswith("b/")) + { + if (file1 != "/dev/null") { + file1 = file1.consume_n(2).value(); + } + file2 = file2.consume_n(2).value(); + } + this->sw_line.get_attrs().emplace_back( + line_range{ + this->sw_range.lr_start + + tokenize_res->tr_capture.c_begin, + this->sw_range.lr_start + + tokenize_res->tr_capture.c_begin, }, VC_ROLE.value(role_t::VCR_H1)); + if (file1 == "/dev/null" || file1 == file2) { + this->sw_line.get_attrs().emplace_back( + line_range{ + this->sw_range.lr_start + file2.sf_begin, + this->sw_range.lr_start + file2.sf_end, + }, + VC_ROLE.value(role_t::VCR_H1)); + } else { + this->sw_line.get_attrs().emplace_back( + line_range{ + this->sw_range.lr_start + inner_cap.c_begin, + this->sw_range.lr_start + inner_cap.c_end, + }, + VC_ROLE.value(role_t::VCR_H1)); + } this->sw_line_number += 2; break; } + case DT_DIFF_HUNK_HEADING: { + this->sw_line.get_attrs().emplace_back( + line_range{ + this->sw_range.lr_start + + tokenize_res->tr_capture.c_begin, + this->sw_range.lr_start + + tokenize_res->tr_capture.c_begin, + }, + VC_ROLE.value(role_t::VCR_H2)); + this->sw_line.get_attrs().emplace_back( + line_range{ + this->sw_range.lr_start + inner_cap.c_begin, + this->sw_range.lr_start + inner_cap.c_end, + }, + VC_ROLE.value(role_t::VCR_H2)); + this->sw_line_number += 1; + break; + } case DT_LCURLY: case DT_LSQUARE: case DT_LPAREN: { - this->flush_values(); - // this->append_child_node(term); - this->sw_depth += 1; - this->sw_interval_state.back().is_start - = el.e_capture.c_begin; - this->sw_interval_state.back().is_line_number - = this->sw_line_number; - this->sw_interval_state.resize(this->sw_depth + 1); - this->sw_hier_nodes.push_back( - std::make_unique<hier_node>()); + if (this->is_structured_text()) { + this->flush_values(); + // this->append_child_node(term); + this->sw_depth += 1; + this->sw_interval_state.back().is_start + = el.e_capture.c_begin; + this->sw_interval_state.back().is_line_number + = this->sw_line_number; + this->sw_interval_state.resize(this->sw_depth + 1); + this->sw_hier_nodes.push_back( + std::make_unique<hier_node>()); + this->sw_container_tokens.push_back(to_closer(dt)); + } else { + this->sw_values.emplace_back(el); + } break; } case DT_RCURLY: case DT_RSQUARE: - case DT_RPAREN: { - auto term = this->flush_values(); - if (this->sw_depth > 0) { - this->append_child_node(term); - this->sw_depth -= 1; - this->sw_interval_state.pop_back(); - this->sw_hier_stage - = std::move(this->sw_hier_nodes.back()); - this->sw_hier_nodes.pop_back(); - if (this->sw_interval_state.back().is_start) { - data_scanner::capture_t obj_cap = { - static_cast<int>(this->sw_interval_state.back() - .is_start.value()), - el.e_capture.c_end, - }; - - auto sf - = this->sw_scanner.to_string_fragment(obj_cap); - if (!sf.find('\n')) { - this->sw_hier_stage->hn_named_children.clear(); - this->sw_hier_stage->hn_children.clear(); - while (!this->sw_intervals.empty() - && this->sw_intervals.back().start - > obj_cap.c_begin) - { - this->sw_intervals.pop_back(); + case DT_RPAREN: + if (this->is_structured_text() + && !this->sw_container_tokens.empty() + && std::find(this->sw_container_tokens.begin(), + this->sw_container_tokens.end(), + dt) + != this->sw_container_tokens.end()) + { + auto term = this->flush_values(); + if (this->sw_depth > 0) { + auto found = false; + do { + if (this->sw_container_tokens.back() == dt) { + found = true; } - } + this->append_child_node(term); + term = nonstd::nullopt; + this->sw_depth -= 1; + this->sw_interval_state.pop_back(); + this->sw_hier_stage + = std::move(this->sw_hier_nodes.back()); + this->sw_hier_nodes.pop_back(); + if (this->sw_interval_state.back().is_start) { + data_scanner::capture_t obj_cap = { + static_cast<int>( + this->sw_interval_state.back() + .is_start.value()), + el.e_capture.c_end, + }; + + auto sf + = this->sw_scanner.to_string_fragment( + obj_cap); + if (!sf.find('\n')) { + this->sw_hier_stage->hn_named_children + .clear(); + this->sw_hier_stage->hn_children + .clear(); + while ( + !this->sw_intervals.empty() + && this->sw_intervals.back().start + > obj_cap.c_begin) + { + this->sw_intervals.pop_back(); + } + } + } + this->sw_container_tokens.pop_back(); + } while (!found); } } this->sw_values.emplace_back(el); break; - } case DT_COMMA: - if (this->sw_depth > 0) { - auto term = this->flush_values(); - this->append_child_node(term); + if (this->is_structured_text()) { + if (this->sw_depth > 0) { + auto term = this->flush_values(); + this->append_child_node(term); + } + } else { + this->sw_values.emplace_back(el); } break; case DT_LINE: this->sw_line_number += 1; + this->sw_at_start = true; break; case DT_WHITE: + if (this->sw_at_start) { + size_t indent_size = 0; + + for (auto ch : tokenize_res->to_string_fragment()) { + if (ch == '\t') { + do { + indent_size += 1; + } while (indent_size % 8); + } else { + indent_size += 1; + } + } + this->sw_indents.insert(indent_size); + this->sw_at_start = false; + } + break; + case DT_ZERO_WIDTH_SPACE: break; default: - if (dt == DT_GARBAGE) { - garbage_count += 1; + if (dt == DT_QUOTED_STRING) { + auto quoted_sf = tokenize_res->to_string_fragment(); + + if (quoted_sf.find('\n')) { + this->sw_type_intervals.emplace_back( + el.e_capture.c_begin, + el.e_capture.c_end, + section_types_t::multiline_string); + this->sw_line.get_attrs().emplace_back( + line_range{ + this->sw_range.lr_start + + el.e_capture.c_begin, + this->sw_range.lr_start + + el.e_capture.c_end, + }, + VC_ROLE.value(role_t::VCR_STRING)); + } } this->sw_values.emplace_back(el); break; @@ -394,8 +715,31 @@ public: this->sw_hier_stage->hn_parent = nullptr; } + if (!this->sw_indents.empty()) { + auto low_indent_iter = this->sw_indents.begin(); + + if (*low_indent_iter == 1) { + // adding guides for small indents is noisy, drop for now + this->sw_indents.clear(); + } else { + auto lcm = *low_indent_iter; + + for (auto indent_iter = this->sw_indents.begin(); + indent_iter != this->sw_indents.end();) + { + if ((*indent_iter % lcm) == 0) { + ++indent_iter; + } else { + indent_iter = this->sw_indents.erase(indent_iter); + } + } + } + } + mb.mb_root_node = std::move(this->sw_hier_stage); mb.mb_intervals = std::move(this->sw_intervals); + mb.mb_type_intervals = std::move(this->sw_type_intervals); + mb.mb_indents = std::move(this->sw_indents); discover_metadata_int(this->sw_line, mb); @@ -447,7 +791,7 @@ private: this->sw_interval_state.back().is_name = this->sw_scanner .to_string_fragment(last_key.value()) - .to_string(); + .to_unquoted_string(); if (!this->sw_interval_state.back().is_name.empty()) { this->sw_interval_state.back().is_start = static_cast<ssize_t>( @@ -487,18 +831,22 @@ private: auto new_key = ivstate.is_name.empty() ? lnav::document::section_key_t{top_node->hn_children.size()} : lnav::document::section_key_t{ivstate.is_name}; - this->sw_intervals.emplace_back(iv_start, iv_stop, new_key); auto* retval = new_node.get(); new_node->hn_parent = top_node; - new_node->hn_start = this->sw_intervals.back().start; + new_node->hn_start = iv_start; new_node->hn_line_number = ivstate.is_line_number; - if (!ivstate.is_name.empty()) { - top_node->hn_named_children.insert({ - ivstate.is_name, - retval, - }); + if (this->sw_depth == 1 + || new_node->hn_line_number != top_node->hn_line_number) + { + this->sw_intervals.emplace_back(iv_start, iv_stop, new_key); + if (!ivstate.is_name.empty()) { + top_node->hn_named_children.insert({ + ivstate.is_name, + retval, + }); + } + top_node->hn_children.emplace_back(std::move(new_node)); } - top_node->hn_children.emplace_back(std::move(new_node)); ivstate.is_start = nonstd::nullopt; ivstate.is_line_number = 0; ivstate.is_name.clear(); @@ -506,20 +854,25 @@ private: attr_line_t& sw_line; line_range sw_range; + text_format_t sw_text_format; data_scanner sw_scanner; int sw_depth{0}; size_t sw_line_number{0}; + bool sw_at_start{true}; + std::set<size_t> sw_indents; std::vector<element> sw_values{}; + std::vector<data_token_t> sw_container_tokens; std::vector<interval_state> sw_interval_state; std::vector<lnav::document::section_interval_t> sw_intervals; + std::vector<lnav::document::section_type_interval_t> sw_type_intervals; std::vector<std::unique_ptr<lnav::document::hier_node>> sw_hier_nodes; std::unique_ptr<lnav::document::hier_node> sw_hier_stage; }; metadata -discover_structure(attr_line_t& al, struct line_range lr) +discover_structure(attr_line_t& al, struct line_range lr, text_format_t tf) { - return structure_walker(al, lr).walk(); + return structure_walker(al, lr, tf).walk(); } std::vector<breadcrumb::possibility> @@ -542,3 +895,31 @@ metadata::possibility_provider(const std::vector<section_key_t>& path) } // namespace document } // namespace lnav + +namespace fmt { +auto +formatter<lnav::document::section_key_t>::format( + const lnav::document::section_key_t& key, + fmt::format_context& ctx) -> decltype(ctx.out()) const +{ + return key.match( + [this, &ctx](const std::string& str) { + return formatter<string_view>::format(str, ctx); + }, + [&ctx](size_t index) { + return format_to(ctx.out(), FMT_STRING("{}"), index); + }); +} + +auto +formatter<std::vector<lnav::document::section_key_t>>::format( + const std::vector<lnav::document::section_key_t>& path, + fmt::format_context& ctx) -> decltype(ctx.out()) const +{ + for (const auto& part : path) { + format_to(ctx.out(), FMT_STRING("\uff1a")); + format_to(ctx.out(), FMT_STRING("{}"), part); + } + return ctx.out(); +} +} // namespace fmt |