summaryrefslogtreecommitdiffstats
path: root/src/text_format.cc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/text_format.cc145
1 files changed, 126 insertions, 19 deletions
diff --git a/src/text_format.cc b/src/text_format.cc
index 0b72786..dc63cf6 100644
--- a/src/text_format.cc
+++ b/src/text_format.cc
@@ -29,8 +29,11 @@
* @file text_format.cc
*/
+#include <set>
+
#include "text_format.hh"
+#include "base/lnav_log.hh"
#include "config.h"
#include "pcrepp/pcre2pp.hh"
#include "yajl/api/yajl_parse.h"
@@ -39,37 +42,66 @@ text_format_t
detect_text_format(string_fragment sf,
nonstd::optional<ghc::filesystem::path> path)
{
- static const auto GZ_EXT = ghc::filesystem::path(".gz");
- static const auto BZ2_EXT = ghc::filesystem::path(".bz2");
+ static const std::set<ghc::filesystem::path> FILTER_EXTS = {
+ ".bz2",
+ ".gz",
+ ".lzma",
+ ".xz",
+ ".zst",
+ };
+ static const auto C_EXTS = std::set<ghc::filesystem::path>{
+ ".h",
+ ".hh",
+ ".hpp",
+ ".c",
+ ".cc",
+ ".cpp",
+ ".tpp",
+ };
+ static const auto PY_EXT = ghc::filesystem::path(".py");
+ static const auto RS_EXT = ghc::filesystem::path(".rs");
+ static const auto JAVA_EXT = ghc::filesystem::path(".java");
+ static const auto TOML_EXT = ghc::filesystem::path(".toml");
+ static const auto XML_EXT = ghc::filesystem::path(".xml");
+ static const auto YAML_EXT = ghc::filesystem::path(".yaml");
+ static const auto YML_EXT = ghc::filesystem::path(".yml");
+ static const auto MAKEFILE_STEM = ghc::filesystem::path("Makefile");
static const auto MD_EXT = ghc::filesystem::path(".md");
static const auto MARKDOWN_EXT = ghc::filesystem::path(".markdown");
+ static const auto SH_EXT = ghc::filesystem::path(".sh");
+
+ static const auto DIFF_MATCHERS = lnav::pcre2pp::code::from_const(
+ R"(^--- .*\n\+\+\+ .*\n)", PCRE2_MULTILINE);
static const auto MAN_MATCHERS = lnav::pcre2pp::code::from_const(
R"(^[A-Za-z][A-Za-z\-_\+0-9]+\(\d\)\s+)", PCRE2_MULTILINE);
- // XXX This is a pretty crude way of detecting format...
+ // XXX This is a pretty crude way of
+ // detecting format...
static const auto PYTHON_MATCHERS = lnav::pcre2pp::code::from_const(
"(?:"
- "^\\s*def\\s+\\w+\\([^)]*\\):[^\\n]*$|"
+ "^\\s*def\\s+\\w+\\([^)]*\\):"
+ "[^\\n]*$|"
"^\\s*try:[^\\n]*$"
")",
PCRE2_MULTILINE);
- static const auto RUST_MATCHERS
- = lnav::pcre2pp::code::from_const(R"(
+ static const auto RUST_MATCHERS = lnav::pcre2pp::code::from_const(
+ R"(
(?:
^\s*use\s+[\w+:\{\}]+;$|
-^\s*(?:pub)?\s+(?:const|enum|fn)\s+\w+.*$|
+^\s*(?:pub enum|pub const|(?:pub )?fn)\s+\w+.*$|
^\s*impl\s+\w+.*$
)
)",
- PCRE2_MULTILINE);
+ PCRE2_MULTILINE);
static const auto JAVA_MATCHERS = lnav::pcre2pp::code::from_const(
"(?:"
"^package\\s+|"
"^import\\s+|"
- "^\\s*(?:public)?\\s*class\\s*(\\w+\\s+)*\\s*{"
+ "^\\s*(?:public)?\\s*"
+ "class\\s*(\\w+\\s+)*\\s*{"
")",
PCRE2_MULTILINE);
@@ -77,15 +109,18 @@ detect_text_format(string_fragment sf,
"(?:"
"^#\\s*include\\s+|"
"^#\\s*define\\s+|"
- "^\\s*if\\s+\\([^)]+\\)[^\\n]*$|"
- "^\\s*(?:\\w+\\s+)*class \\w+ {"
+ "^\\s*if\\s+\\([^)]+\\)[^\\n]"
+ "*$|"
+ "^\\s*(?:\\w+\\s+)*class "
+ "\\w+ {"
")",
PCRE2_MULTILINE);
static const auto SQL_MATCHERS = lnav::pcre2pp::code::from_const(
"(?:"
"select\\s+.+\\s+from\\s+|"
- "insert\\s+into\\s+.+\\s+values"
+ "insert\\s+into\\s+.+\\s+"
+ "values"
")",
PCRE2_MULTILINE | PCRE2_CASELESS);
@@ -96,19 +131,55 @@ detect_text_format(string_fragment sf,
")",
PCRE2_MULTILINE | PCRE2_CASELESS);
- text_format_t retval = text_format_t::TF_UNKNOWN;
+ static const auto SH_MATCHERS
+ = lnav::pcre2pp::code::from_const("^#!.+sh\\b", PCRE2_MULTILINE);
if (path) {
- if (path->extension() == GZ_EXT) {
- path = path->stem();
- }
- if (path->extension() == BZ2_EXT) {
+ while (FILTER_EXTS.count(path->extension()) > 0) {
path = path->stem();
}
- if (path->extension() == MD_EXT || path->extension() == MARKDOWN_EXT) {
+ auto stem = path->stem();
+ auto ext = path->extension();
+ if (ext == MD_EXT || ext == MARKDOWN_EXT) {
return text_format_t::TF_MARKDOWN;
}
+
+ if (C_EXTS.count(ext) > 0) {
+ return text_format_t::TF_C_LIKE;
+ }
+
+ if (ext == PY_EXT) {
+ return text_format_t::TF_PYTHON;
+ }
+
+ if (ext == RS_EXT) {
+ return text_format_t::TF_RUST;
+ }
+
+ if (ext == TOML_EXT) {
+ return text_format_t::TF_TOML;
+ }
+
+ if (ext == JAVA_EXT) {
+ return text_format_t::TF_JAVA;
+ }
+
+ if (ext == YAML_EXT || ext == YML_EXT) {
+ return text_format_t::TF_YAML;
+ }
+
+ if (ext == XML_EXT) {
+ return text_format_t::TF_XML;
+ }
+
+ if (stem == MAKEFILE_STEM) {
+ return text_format_t::TF_MAKEFILE;
+ }
+
+ if (stem == SH_EXT) {
+ return text_format_t::TF_SHELL_SCRIPT;
+ }
}
{
@@ -120,6 +191,14 @@ detect_text_format(string_fragment sf,
}
}
+ if (DIFF_MATCHERS.find_in(sf).ignore_error()) {
+ return text_format_t::TF_DIFF;
+ }
+
+ if (SH_MATCHERS.find_in(sf).ignore_error()) {
+ return text_format_t::TF_SHELL_SCRIPT;
+ }
+
if (MAN_MATCHERS.find_in(sf).ignore_error()) {
return text_format_t::TF_MAN;
}
@@ -148,5 +227,33 @@ detect_text_format(string_fragment sf,
return text_format_t::TF_XML;
}
- return retval;
+ return text_format_t::TF_UNKNOWN;
+}
+
+nonstd::optional<text_format_meta_t>
+extract_text_meta(string_fragment sf, text_format_t tf)
+{
+ static const auto MAN_NAME = lnav::pcre2pp::code::from_const(
+ R"(^([A-Za-z][A-Za-z\-_\+0-9]+\(\d\))\s+)", PCRE2_MULTILINE);
+
+ switch (tf) {
+ case text_format_t::TF_MAN: {
+ static thread_local auto md
+ = lnav::pcre2pp::match_data::unitialized();
+
+ auto find_res
+ = MAN_NAME.capture_from(sf).into(md).matches().ignore_error();
+
+ if (find_res) {
+ return text_format_meta_t{
+ md.to_string(),
+ };
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ return nonstd::nullopt;
}