summaryrefslogtreecommitdiffstats
path: root/src/document.sections.cc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/document.sections.cc541
1 files changed, 461 insertions, 80 deletions
diff --git a/src/document.sections.cc b/src/document.sections.cc
index 04eb516..57820fb 100644
--- a/src/document.sections.cc
+++ b/src/document.sections.cc
@@ -27,6 +27,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
+#include <algorithm>
#include <utility>
#include <vector>
@@ -50,6 +51,7 @@ hier_node::lookup_child(section_key_t key) const
if (iter != this->hn_named_children.end()) {
return iter->second;
}
+
return nullptr;
},
[this](size_t index) -> hier_node* {
@@ -60,6 +62,117 @@ hier_node::lookup_child(section_key_t key) const
}));
}
+nonstd::optional<size_t>
+hier_node::child_index(const hier_node* hn) const
+{
+ size_t retval = 0;
+
+ for (const auto& child : this->hn_children) {
+ if (child.get() == hn) {
+ return retval;
+ }
+ retval += 1;
+ }
+
+ return nonstd::nullopt;
+}
+
+nonstd::optional<hier_node::child_neighbors_result>
+hier_node::child_neighbors(const lnav::document::hier_node* hn,
+ file_off_t offset) const
+{
+ auto index_opt = this->child_index(hn);
+ if (!index_opt) {
+ return nonstd::nullopt;
+ }
+
+ hier_node::child_neighbors_result retval;
+
+ if (index_opt.value() == 0) {
+ if (this->hn_parent != nullptr) {
+ auto parent_neighbors_opt
+ = this->hn_parent->child_neighbors(this, offset);
+
+ if (parent_neighbors_opt) {
+ retval.cnr_previous = parent_neighbors_opt->cnr_previous;
+ }
+ } else {
+ retval.cnr_previous = hn;
+ }
+ } else {
+ const auto* prev_hn = this->hn_children[index_opt.value() - 1].get();
+
+ if (hn->hn_line_number == 0
+ || (hn->hn_line_number - prev_hn->hn_line_number) > 1)
+ {
+ retval.cnr_previous = prev_hn;
+ } else if (this->hn_parent != nullptr) {
+ auto parent_neighbors_opt
+ = this->hn_parent->child_neighbors(this, offset);
+
+ if (parent_neighbors_opt) {
+ retval.cnr_previous = parent_neighbors_opt->cnr_previous;
+ }
+ }
+ }
+
+ if (index_opt.value() == this->hn_children.size() - 1) {
+ if (this->hn_parent != nullptr) {
+ auto parent_neighbors_opt
+ = this->hn_parent->child_neighbors(this, offset);
+
+ if (parent_neighbors_opt) {
+ retval.cnr_next = parent_neighbors_opt->cnr_next;
+ }
+ } else if (!hn->hn_children.empty()) {
+ for (const auto& child : hn->hn_children) {
+ if (child->hn_start > offset) {
+ retval.cnr_next = child.get();
+ break;
+ }
+ }
+ }
+ } else {
+ const auto* next_hn = this->hn_children[index_opt.value() + 1].get();
+
+ if (next_hn->hn_start > offset
+ && (hn->hn_line_number == 0
+ || (next_hn->hn_line_number - hn->hn_line_number) > 1))
+ {
+ retval.cnr_next = next_hn;
+ } else if (this->hn_parent != nullptr) {
+ auto parent_neighbors_opt
+ = this->hn_parent->child_neighbors(this, offset);
+
+ if (parent_neighbors_opt) {
+ retval.cnr_next = parent_neighbors_opt->cnr_next;
+ }
+ }
+ }
+
+ return retval;
+}
+
+nonstd::optional<hier_node::child_neighbors_result>
+hier_node::line_neighbors(size_t ln) const
+{
+ if (this->hn_children.empty()) {
+ return nonstd::nullopt;
+ }
+
+ hier_node::child_neighbors_result retval;
+
+ for (const auto& child : this->hn_children) {
+ if (child->hn_line_number > ln) {
+ retval.cnr_next = child.get();
+ break;
+ }
+ retval.cnr_previous = child.get();
+ }
+
+ return retval;
+}
+
nonstd::optional<const hier_node*>
hier_node::lookup_path(const hier_node* root,
const std::vector<section_key_t>& path)
@@ -81,15 +194,33 @@ hier_node::lookup_path(const hier_node* root,
return retval;
}
+std::vector<section_key_t>
+metadata::path_for_range(size_t start, size_t stop)
+{
+ std::vector<section_key_t> retval;
+
+ this->m_sections_tree.visit_overlapping(
+ start, stop, [&retval](const lnav::document::section_interval_t& iv) {
+ retval.emplace_back(iv.value);
+ });
+ return retval;
+}
+
struct metadata_builder {
std::vector<section_interval_t> mb_intervals;
+ std::vector<section_type_interval_t> mb_type_intervals;
std::unique_ptr<hier_node> mb_root_node;
+ std::set<size_t> mb_indents;
+ text_format_t mb_text_format{text_format_t::TF_UNKNOWN};
metadata to_metadata() &&
{
return {
std::move(this->mb_intervals),
std::move(this->mb_root_node),
+ std::move(this->mb_type_intervals),
+ std::move(this->mb_indents),
+ this->mb_text_format,
};
}
};
@@ -169,16 +300,18 @@ discover_metadata_int(const attr_line_t& al, metadata_builder& mb)
new_open_intervals.emplace_back(std::move(oi));
}
}
- auto* parent_node = new_open_intervals.empty()
- ? root_node.get()
- : new_open_intervals.back().oi_node.get();
- new_open_intervals.emplace_back(role_num,
- hdr_attr.sa_range.lr_start,
- al.get_substring(hdr_attr.sa_range));
- new_open_intervals.back().oi_node->hn_parent = parent_node;
- new_open_intervals.back().oi_node->hn_start
- = hdr_attr.sa_range.lr_start;
-
+ if (!hdr_attr.sa_range.empty()) {
+ auto* parent_node = new_open_intervals.empty()
+ ? root_node.get()
+ : new_open_intervals.back().oi_node.get();
+ new_open_intervals.emplace_back(
+ role_num,
+ hdr_attr.sa_range.lr_start,
+ al.get_substring(hdr_attr.sa_range));
+ new_open_intervals.back().oi_node->hn_parent = parent_node;
+ new_open_intervals.back().oi_node->hn_start
+ = hdr_attr.sa_range.lr_start;
+ }
open_intervals = std::move(new_open_intervals);
}
@@ -210,6 +343,18 @@ discover_metadata_int(const attr_line_t& al, metadata_builder& mb)
interval.stop += stop_off_iter->sa_value.get<int64_t>();
}
}
+ for (auto& interval : mb.mb_type_intervals) {
+ auto start_off_iter = find_string_attr_containing(
+ orig_attrs, &SA_ORIGIN_OFFSET, interval.start);
+ if (start_off_iter != orig_attrs.end()) {
+ interval.start += start_off_iter->sa_value.get<int64_t>();
+ }
+ auto stop_off_iter = find_string_attr_containing(
+ orig_attrs, &SA_ORIGIN_OFFSET, interval.stop - 1);
+ if (stop_off_iter != orig_attrs.end()) {
+ interval.stop += stop_off_iter->sa_value.get<int64_t>();
+ }
+ }
hier_node::depth_first(root_node.get(), [&orig_attrs](hier_node* node) {
auto off_opt
@@ -220,6 +365,16 @@ discover_metadata_int(const attr_line_t& al, metadata_builder& mb)
}
});
+ hier_node::depth_first(
+ mb.mb_root_node.get(), [&orig_attrs](hier_node* node) {
+ auto off_opt = get_string_attr(
+ orig_attrs, &SA_ORIGIN_OFFSET, node->hn_start);
+
+ if (off_opt) {
+ node->hn_start += off_opt.value()->sa_value.get<int64_t>();
+ }
+ });
+
if (!root_node->hn_children.empty()
|| !root_node->hn_named_children.empty())
{
@@ -239,8 +394,8 @@ discover_metadata(const attr_line_t& al)
class structure_walker {
public:
- explicit structure_walker(attr_line_t& al, line_range lr)
- : sw_line(al), sw_range(lr),
+ explicit structure_walker(attr_line_t& al, line_range lr, text_format_t tf)
+ : sw_line(al), sw_range(lr), sw_text_format(tf),
sw_scanner(string_fragment::from_str_range(
al.get_string(), lr.lr_start, lr.lr_end))
{
@@ -248,25 +403,62 @@ public:
this->sw_hier_nodes.push_back(std::make_unique<hier_node>());
}
+ bool is_structured_text() const
+ {
+ switch (this->sw_text_format) {
+ case text_format_t::TF_JSON:
+ case text_format_t::TF_YAML:
+ case text_format_t::TF_TOML:
+ case text_format_t::TF_LOG:
+ case text_format_t::TF_UNKNOWN:
+ return true;
+ default:
+ return false;
+ }
+ }
+
metadata walk()
{
metadata_builder mb;
- size_t garbage_count = 0;
- while (garbage_count < 1000) {
- auto tokenize_res = this->sw_scanner.tokenize2();
+ mb.mb_text_format = this->sw_text_format;
+ while (true) {
+ auto tokenize_res
+ = this->sw_scanner.tokenize2(this->sw_text_format);
if (!tokenize_res) {
break;
}
auto dt = tokenize_res->tr_token;
- element el(tokenize_res->tr_token, tokenize_res->tr_capture);
+ element el(dt, tokenize_res->tr_capture);
+ const auto& inner_cap = tokenize_res->tr_inner_capture;
+
+#if 0
+ printf("tok %s %s\n",
+ data_scanner::token2name(dt),
+ tokenize_res->to_string().c_str());
+#endif
+ if (dt != DT_WHITE) {
+ this->sw_at_start = false;
+ }
switch (dt) {
case DT_XML_DECL_TAG:
case DT_XML_EMPTY_TAG:
this->sw_values.emplace_back(el);
break;
+ case DT_COMMENT:
+ this->sw_type_intervals.emplace_back(
+ el.e_capture.c_begin,
+ el.e_capture.c_end,
+ section_types_t::comment);
+ this->sw_line.get_attrs().emplace_back(
+ line_range{
+ this->sw_range.lr_start + el.e_capture.c_begin,
+ this->sw_range.lr_start + el.e_capture.c_end,
+ },
+ VC_ROLE.value(role_t::VCR_COMMENT));
+ break;
case DT_XML_OPEN_TAG:
this->flush_values();
this->sw_interval_state.back().is_start
@@ -274,22 +466,32 @@ public:
this->sw_interval_state.back().is_line_number
= this->sw_line_number;
this->sw_interval_state.back().is_name
- = tokenize_res->to_string();
+ = tokenize_res->to_string_fragment()
+ .to_unquoted_string();
this->sw_depth += 1;
this->sw_interval_state.resize(this->sw_depth + 1);
this->sw_hier_nodes.push_back(
std::make_unique<hier_node>());
+ this->sw_container_tokens.push_back(to_closer(dt));
break;
case DT_XML_CLOSE_TAG: {
auto term = this->flush_values();
if (this->sw_depth > 0) {
- if (term) {
- this->append_child_node(term);
- }
- this->sw_interval_state.pop_back();
- this->sw_hier_stage
- = std::move(this->sw_hier_nodes.back());
- this->sw_hier_nodes.pop_back();
+ auto found = false;
+ do {
+ if (this->sw_container_tokens.back() == dt) {
+ found = true;
+ }
+ if (term) {
+ this->append_child_node(term);
+ term = nonstd::nullopt;
+ }
+ this->sw_interval_state.pop_back();
+ this->sw_hier_stage
+ = std::move(this->sw_hier_nodes.back());
+ this->sw_hier_nodes.pop_back();
+ this->sw_container_tokens.pop_back();
+ } while (!found);
}
this->append_child_node(el.e_capture);
if (this->sw_depth > 0) {
@@ -301,77 +503,196 @@ public:
case DT_H1: {
this->sw_line.get_attrs().emplace_back(
line_range{
- this->sw_range.lr_start + el.e_capture.c_begin + 1,
- this->sw_range.lr_start + el.e_capture.c_end - 1,
+ this->sw_range.lr_start + inner_cap.c_begin,
+ this->sw_range.lr_start + inner_cap.c_end,
+ },
+ VC_ROLE.value(role_t::VCR_H1));
+ this->sw_line_number += 1;
+ break;
+ }
+ case DT_DIFF_FILE_HEADER: {
+ auto sf = this->sw_scanner.to_string_fragment(inner_cap);
+ auto split_res = sf.split_pair(string_fragment::tag1{'\n'});
+ auto file1 = split_res->first.consume_n(4).value();
+ auto file2 = split_res->second.consume_n(4).value();
+ if ((file1 == "/dev/null" || file1.startswith("a/"))
+ && file2.startswith("b/"))
+ {
+ if (file1 != "/dev/null") {
+ file1 = file1.consume_n(2).value();
+ }
+ file2 = file2.consume_n(2).value();
+ }
+ this->sw_line.get_attrs().emplace_back(
+ line_range{
+ this->sw_range.lr_start
+ + tokenize_res->tr_capture.c_begin,
+ this->sw_range.lr_start
+ + tokenize_res->tr_capture.c_begin,
},
VC_ROLE.value(role_t::VCR_H1));
+ if (file1 == "/dev/null" || file1 == file2) {
+ this->sw_line.get_attrs().emplace_back(
+ line_range{
+ this->sw_range.lr_start + file2.sf_begin,
+ this->sw_range.lr_start + file2.sf_end,
+ },
+ VC_ROLE.value(role_t::VCR_H1));
+ } else {
+ this->sw_line.get_attrs().emplace_back(
+ line_range{
+ this->sw_range.lr_start + inner_cap.c_begin,
+ this->sw_range.lr_start + inner_cap.c_end,
+ },
+ VC_ROLE.value(role_t::VCR_H1));
+ }
this->sw_line_number += 2;
break;
}
+ case DT_DIFF_HUNK_HEADING: {
+ this->sw_line.get_attrs().emplace_back(
+ line_range{
+ this->sw_range.lr_start
+ + tokenize_res->tr_capture.c_begin,
+ this->sw_range.lr_start
+ + tokenize_res->tr_capture.c_begin,
+ },
+ VC_ROLE.value(role_t::VCR_H2));
+ this->sw_line.get_attrs().emplace_back(
+ line_range{
+ this->sw_range.lr_start + inner_cap.c_begin,
+ this->sw_range.lr_start + inner_cap.c_end,
+ },
+ VC_ROLE.value(role_t::VCR_H2));
+ this->sw_line_number += 1;
+ break;
+ }
case DT_LCURLY:
case DT_LSQUARE:
case DT_LPAREN: {
- this->flush_values();
- // this->append_child_node(term);
- this->sw_depth += 1;
- this->sw_interval_state.back().is_start
- = el.e_capture.c_begin;
- this->sw_interval_state.back().is_line_number
- = this->sw_line_number;
- this->sw_interval_state.resize(this->sw_depth + 1);
- this->sw_hier_nodes.push_back(
- std::make_unique<hier_node>());
+ if (this->is_structured_text()) {
+ this->flush_values();
+ // this->append_child_node(term);
+ this->sw_depth += 1;
+ this->sw_interval_state.back().is_start
+ = el.e_capture.c_begin;
+ this->sw_interval_state.back().is_line_number
+ = this->sw_line_number;
+ this->sw_interval_state.resize(this->sw_depth + 1);
+ this->sw_hier_nodes.push_back(
+ std::make_unique<hier_node>());
+ this->sw_container_tokens.push_back(to_closer(dt));
+ } else {
+ this->sw_values.emplace_back(el);
+ }
break;
}
case DT_RCURLY:
case DT_RSQUARE:
- case DT_RPAREN: {
- auto term = this->flush_values();
- if (this->sw_depth > 0) {
- this->append_child_node(term);
- this->sw_depth -= 1;
- this->sw_interval_state.pop_back();
- this->sw_hier_stage
- = std::move(this->sw_hier_nodes.back());
- this->sw_hier_nodes.pop_back();
- if (this->sw_interval_state.back().is_start) {
- data_scanner::capture_t obj_cap = {
- static_cast<int>(this->sw_interval_state.back()
- .is_start.value()),
- el.e_capture.c_end,
- };
-
- auto sf
- = this->sw_scanner.to_string_fragment(obj_cap);
- if (!sf.find('\n')) {
- this->sw_hier_stage->hn_named_children.clear();
- this->sw_hier_stage->hn_children.clear();
- while (!this->sw_intervals.empty()
- && this->sw_intervals.back().start
- > obj_cap.c_begin)
- {
- this->sw_intervals.pop_back();
+ case DT_RPAREN:
+ if (this->is_structured_text()
+ && !this->sw_container_tokens.empty()
+ && std::find(this->sw_container_tokens.begin(),
+ this->sw_container_tokens.end(),
+ dt)
+ != this->sw_container_tokens.end())
+ {
+ auto term = this->flush_values();
+ if (this->sw_depth > 0) {
+ auto found = false;
+ do {
+ if (this->sw_container_tokens.back() == dt) {
+ found = true;
}
- }
+ this->append_child_node(term);
+ term = nonstd::nullopt;
+ this->sw_depth -= 1;
+ this->sw_interval_state.pop_back();
+ this->sw_hier_stage
+ = std::move(this->sw_hier_nodes.back());
+ this->sw_hier_nodes.pop_back();
+ if (this->sw_interval_state.back().is_start) {
+ data_scanner::capture_t obj_cap = {
+ static_cast<int>(
+ this->sw_interval_state.back()
+ .is_start.value()),
+ el.e_capture.c_end,
+ };
+
+ auto sf
+ = this->sw_scanner.to_string_fragment(
+ obj_cap);
+ if (!sf.find('\n')) {
+ this->sw_hier_stage->hn_named_children
+ .clear();
+ this->sw_hier_stage->hn_children
+ .clear();
+ while (
+ !this->sw_intervals.empty()
+ && this->sw_intervals.back().start
+ > obj_cap.c_begin)
+ {
+ this->sw_intervals.pop_back();
+ }
+ }
+ }
+ this->sw_container_tokens.pop_back();
+ } while (!found);
}
}
this->sw_values.emplace_back(el);
break;
- }
case DT_COMMA:
- if (this->sw_depth > 0) {
- auto term = this->flush_values();
- this->append_child_node(term);
+ if (this->is_structured_text()) {
+ if (this->sw_depth > 0) {
+ auto term = this->flush_values();
+ this->append_child_node(term);
+ }
+ } else {
+ this->sw_values.emplace_back(el);
}
break;
case DT_LINE:
this->sw_line_number += 1;
+ this->sw_at_start = true;
break;
case DT_WHITE:
+ if (this->sw_at_start) {
+ size_t indent_size = 0;
+
+ for (auto ch : tokenize_res->to_string_fragment()) {
+ if (ch == '\t') {
+ do {
+ indent_size += 1;
+ } while (indent_size % 8);
+ } else {
+ indent_size += 1;
+ }
+ }
+ this->sw_indents.insert(indent_size);
+ this->sw_at_start = false;
+ }
+ break;
+ case DT_ZERO_WIDTH_SPACE:
break;
default:
- if (dt == DT_GARBAGE) {
- garbage_count += 1;
+ if (dt == DT_QUOTED_STRING) {
+ auto quoted_sf = tokenize_res->to_string_fragment();
+
+ if (quoted_sf.find('\n')) {
+ this->sw_type_intervals.emplace_back(
+ el.e_capture.c_begin,
+ el.e_capture.c_end,
+ section_types_t::multiline_string);
+ this->sw_line.get_attrs().emplace_back(
+ line_range{
+ this->sw_range.lr_start
+ + el.e_capture.c_begin,
+ this->sw_range.lr_start
+ + el.e_capture.c_end,
+ },
+ VC_ROLE.value(role_t::VCR_STRING));
+ }
}
this->sw_values.emplace_back(el);
break;
@@ -394,8 +715,31 @@ public:
this->sw_hier_stage->hn_parent = nullptr;
}
+ if (!this->sw_indents.empty()) {
+ auto low_indent_iter = this->sw_indents.begin();
+
+ if (*low_indent_iter == 1) {
+ // adding guides for small indents is noisy, drop for now
+ this->sw_indents.clear();
+ } else {
+ auto lcm = *low_indent_iter;
+
+ for (auto indent_iter = this->sw_indents.begin();
+ indent_iter != this->sw_indents.end();)
+ {
+ if ((*indent_iter % lcm) == 0) {
+ ++indent_iter;
+ } else {
+ indent_iter = this->sw_indents.erase(indent_iter);
+ }
+ }
+ }
+ }
+
mb.mb_root_node = std::move(this->sw_hier_stage);
mb.mb_intervals = std::move(this->sw_intervals);
+ mb.mb_type_intervals = std::move(this->sw_type_intervals);
+ mb.mb_indents = std::move(this->sw_indents);
discover_metadata_int(this->sw_line, mb);
@@ -447,7 +791,7 @@ private:
this->sw_interval_state.back().is_name
= this->sw_scanner
.to_string_fragment(last_key.value())
- .to_string();
+ .to_unquoted_string();
if (!this->sw_interval_state.back().is_name.empty()) {
this->sw_interval_state.back().is_start
= static_cast<ssize_t>(
@@ -487,18 +831,22 @@ private:
auto new_key = ivstate.is_name.empty()
? lnav::document::section_key_t{top_node->hn_children.size()}
: lnav::document::section_key_t{ivstate.is_name};
- this->sw_intervals.emplace_back(iv_start, iv_stop, new_key);
auto* retval = new_node.get();
new_node->hn_parent = top_node;
- new_node->hn_start = this->sw_intervals.back().start;
+ new_node->hn_start = iv_start;
new_node->hn_line_number = ivstate.is_line_number;
- if (!ivstate.is_name.empty()) {
- top_node->hn_named_children.insert({
- ivstate.is_name,
- retval,
- });
+ if (this->sw_depth == 1
+ || new_node->hn_line_number != top_node->hn_line_number)
+ {
+ this->sw_intervals.emplace_back(iv_start, iv_stop, new_key);
+ if (!ivstate.is_name.empty()) {
+ top_node->hn_named_children.insert({
+ ivstate.is_name,
+ retval,
+ });
+ }
+ top_node->hn_children.emplace_back(std::move(new_node));
}
- top_node->hn_children.emplace_back(std::move(new_node));
ivstate.is_start = nonstd::nullopt;
ivstate.is_line_number = 0;
ivstate.is_name.clear();
@@ -506,20 +854,25 @@ private:
attr_line_t& sw_line;
line_range sw_range;
+ text_format_t sw_text_format;
data_scanner sw_scanner;
int sw_depth{0};
size_t sw_line_number{0};
+ bool sw_at_start{true};
+ std::set<size_t> sw_indents;
std::vector<element> sw_values{};
+ std::vector<data_token_t> sw_container_tokens;
std::vector<interval_state> sw_interval_state;
std::vector<lnav::document::section_interval_t> sw_intervals;
+ std::vector<lnav::document::section_type_interval_t> sw_type_intervals;
std::vector<std::unique_ptr<lnav::document::hier_node>> sw_hier_nodes;
std::unique_ptr<lnav::document::hier_node> sw_hier_stage;
};
metadata
-discover_structure(attr_line_t& al, struct line_range lr)
+discover_structure(attr_line_t& al, struct line_range lr, text_format_t tf)
{
- return structure_walker(al, lr).walk();
+ return structure_walker(al, lr, tf).walk();
}
std::vector<breadcrumb::possibility>
@@ -542,3 +895,31 @@ metadata::possibility_provider(const std::vector<section_key_t>& path)
} // namespace document
} // namespace lnav
+
+namespace fmt {
+auto
+formatter<lnav::document::section_key_t>::format(
+ const lnav::document::section_key_t& key,
+ fmt::format_context& ctx) -> decltype(ctx.out()) const
+{
+ return key.match(
+ [this, &ctx](const std::string& str) {
+ return formatter<string_view>::format(str, ctx);
+ },
+ [&ctx](size_t index) {
+ return format_to(ctx.out(), FMT_STRING("{}"), index);
+ });
+}
+
+auto
+formatter<std::vector<lnav::document::section_key_t>>::format(
+ const std::vector<lnav::document::section_key_t>& path,
+ fmt::format_context& ctx) -> decltype(ctx.out()) const
+{
+ for (const auto& part : path) {
+ format_to(ctx.out(), FMT_STRING("\uff1a"));
+ format_to(ctx.out(), FMT_STRING("{}"), part);
+ }
+ return ctx.out();
+}
+} // namespace fmt