diff options
Diffstat (limited to '')
-rw-r--r-- | src/shlex.cc | 312 |
1 files changed, 261 insertions, 51 deletions
diff --git a/src/shlex.cc b/src/shlex.cc index 8da44bb..d33924e 100644 --- a/src/shlex.cc +++ b/src/shlex.cc @@ -34,42 +34,70 @@ #endif #include "config.h" +#include "pcrepp/pcre2pp.hh" #include "shlex.hh" -bool -shlex::tokenize(string_fragment& cap_out, shlex_token_t& token_out) +using namespace lnav::roles::literals; + +std::string +shlex::escape(std::string s) +{ + static const auto SH_CHARS = lnav::pcre2pp::code::from_const("'"); + + return SH_CHARS.replace(s, "\\'"); +} + +attr_line_t +shlex::to_attr_line(const shlex::tokenize_error_t& te) const +{ + return attr_line_t() + .append(string_fragment::from_bytes(this->s_str, this->s_len)) + .append("\n") + .pad_to(te.te_source.sf_begin) + .append("^"_snippet_border); +} + +Result<shlex::tokenize_result_t, shlex::tokenize_error_t> +shlex::tokenize() { + tokenize_result_t retval; + + retval.tr_frag.sf_string = this->s_str; while (this->s_index < this->s_len) { switch (this->s_str[this->s_index]) { case '\\': - cap_out.sf_begin = this->s_index; + retval.tr_frag.sf_begin = this->s_index; if (this->s_index + 1 < this->s_len) { - token_out = shlex_token_t::ST_ESCAPE; + retval.tr_token = shlex_token_t::escape; this->s_index += 2; - cap_out.sf_end = this->s_index; + retval.tr_frag.sf_end = this->s_index; } else { this->s_index += 1; - cap_out.sf_end = this->s_index; - token_out = shlex_token_t::ST_ERROR; + retval.tr_frag.sf_end = this->s_index; + + return Err(tokenize_error_t{ + "invalid escape", + retval.tr_frag, + }); } - return true; + return Ok(retval); case '\"': if (!this->s_ignore_quotes) { switch (this->s_state) { case state_t::STATE_NORMAL: - cap_out.sf_begin = this->s_index; + retval.tr_frag.sf_begin = this->s_index; this->s_index += 1; - cap_out.sf_end = this->s_index; - token_out = shlex_token_t::ST_DOUBLE_QUOTE_START; + retval.tr_frag.sf_end = this->s_index; + retval.tr_token = shlex_token_t::double_quote_start; this->s_state = state_t::STATE_IN_DOUBLE_QUOTE; - return true; + return Ok(retval); case state_t::STATE_IN_DOUBLE_QUOTE: - cap_out.sf_begin = this->s_index; + retval.tr_frag.sf_begin = this->s_index; this->s_index += 1; - cap_out.sf_end = this->s_index; - token_out = shlex_token_t::ST_DOUBLE_QUOTE_END; + retval.tr_frag.sf_end = this->s_index; + retval.tr_token = shlex_token_t::double_quote_end; this->s_state = state_t::STATE_NORMAL; - return true; + return Ok(retval); default: break; } @@ -79,19 +107,19 @@ shlex::tokenize(string_fragment& cap_out, shlex_token_t& token_out) if (!this->s_ignore_quotes) { switch (this->s_state) { case state_t::STATE_NORMAL: - cap_out.sf_begin = this->s_index; + retval.tr_frag.sf_begin = this->s_index; this->s_index += 1; - cap_out.sf_end = this->s_index; - token_out = shlex_token_t::ST_SINGLE_QUOTE_START; + retval.tr_frag.sf_end = this->s_index; + retval.tr_token = shlex_token_t::single_quote_start; this->s_state = state_t::STATE_IN_SINGLE_QUOTE; - return true; + return Ok(retval); case state_t::STATE_IN_SINGLE_QUOTE: - cap_out.sf_begin = this->s_index; + retval.tr_frag.sf_begin = this->s_index; this->s_index += 1; - cap_out.sf_end = this->s_index; - token_out = shlex_token_t::ST_SINGLE_QUOTE_END; + retval.tr_frag.sf_end = this->s_index; + retval.tr_token = shlex_token_t::single_quote_end; this->s_state = state_t::STATE_NORMAL; - return true; + return Ok(retval); default: break; } @@ -100,9 +128,10 @@ shlex::tokenize(string_fragment& cap_out, shlex_token_t& token_out) case '$': switch (this->s_state) { case state_t::STATE_NORMAL: - case state_t::STATE_IN_DOUBLE_QUOTE: - this->scan_variable_ref(cap_out, token_out); - return true; + case state_t::STATE_IN_DOUBLE_QUOTE: { + auto rc = TRY(this->scan_variable_ref()); + return Ok(rc); + } default: break; } @@ -110,7 +139,7 @@ shlex::tokenize(string_fragment& cap_out, shlex_token_t& token_out) case '~': switch (this->s_state) { case state_t::STATE_NORMAL: - cap_out.sf_begin = this->s_index; + retval.tr_frag.sf_begin = this->s_index; this->s_index += 1; while (this->s_index < this->s_len && (isalnum(this->s_str[this->s_index]) @@ -119,9 +148,9 @@ shlex::tokenize(string_fragment& cap_out, shlex_token_t& token_out) { this->s_index += 1; } - cap_out.sf_end = this->s_index; - token_out = shlex_token_t::ST_TILDE; - return true; + retval.tr_frag.sf_end = this->s_index; + retval.tr_token = shlex_token_t::tilde; + return Ok(retval); default: break; } @@ -130,13 +159,15 @@ shlex::tokenize(string_fragment& cap_out, shlex_token_t& token_out) case '\t': switch (this->s_state) { case state_t::STATE_NORMAL: - cap_out.sf_begin = this->s_index; - while (isspace(this->s_str[this->s_index])) { + retval.tr_frag.sf_begin = this->s_index; + while (this->s_index < this->s_len + && isspace(this->s_str[this->s_index])) + { this->s_index += 1; } - cap_out.sf_end = this->s_index; - token_out = shlex_token_t::ST_WHITESPACE; - return true; + retval.tr_frag.sf_end = this->s_index; + retval.tr_token = shlex_token_t::whitespace; + return Ok(retval); default: break; } @@ -148,29 +179,47 @@ shlex::tokenize(string_fragment& cap_out, shlex_token_t& token_out) this->s_index += 1; } - return false; + if (this->s_state != state_t::STATE_NORMAL) { + retval.tr_frag.sf_begin = this->s_index; + retval.tr_frag.sf_end = this->s_len; + return Err(tokenize_error_t{ + "non-terminated string", + retval.tr_frag, + }); + } + + retval.tr_frag.sf_begin = this->s_len; + retval.tr_frag.sf_end = this->s_len; + retval.tr_token = shlex_token_t::eof; + return Ok(retval); } -void -shlex::scan_variable_ref(string_fragment& cap_out, shlex_token_t& token_out) +Result<shlex::tokenize_result_t, shlex::tokenize_error_t> +shlex::scan_variable_ref() { - cap_out.sf_begin = this->s_index; + tokenize_result_t retval; + + retval.tr_frag.sf_string = this->s_str; + + retval.tr_frag.sf_begin = this->s_index; this->s_index += 1; if (this->s_index >= this->s_len) { - cap_out.sf_end = this->s_index; - token_out = shlex_token_t::ST_ERROR; - return; + retval.tr_frag.sf_end = this->s_index; + return Err(tokenize_error_t{ + "invalid variable reference", + retval.tr_frag, + }); } if (this->s_str[this->s_index] == '{') { - token_out = shlex_token_t::ST_QUOTED_VARIABLE_REF; + retval.tr_token = shlex_token_t::quoted_variable_ref; this->s_index += 1; } else { - token_out = shlex_token_t::ST_VARIABLE_REF; + retval.tr_token = shlex_token_t::variable_ref; } while (this->s_index < this->s_len) { - if (token_out == shlex_token_t::ST_VARIABLE_REF) { + if (retval.tr_token == shlex_token_t::variable_ref) { if (isalnum(this->s_str[this->s_index]) || this->s_str[this->s_index] == '#' || this->s_str[this->s_index] == '_') @@ -188,14 +237,19 @@ shlex::scan_variable_ref(string_fragment& cap_out, shlex_token_t& token_out) } } - cap_out.sf_end = this->s_index; - if (token_out == shlex_token_t::ST_QUOTED_VARIABLE_REF + retval.tr_frag.sf_end = this->s_index; + if (retval.tr_token == shlex_token_t::quoted_variable_ref && this->s_str[this->s_index - 1] != '}') { - cap_out.sf_begin += 1; - cap_out.sf_end = cap_out.sf_begin + 1; - token_out = shlex_token_t::ST_ERROR; + retval.tr_frag.sf_begin += 1; + retval.tr_frag.sf_end = retval.tr_frag.sf_begin + 1; + return Err(tokenize_error_t{ + "missing closing curly-brace in variable reference", + retval.tr_frag, + }); } + + return Ok(retval); } void @@ -216,3 +270,159 @@ shlex::resolve_home_dir(std::string& result, string_fragment cap) const } } } + +bool +shlex::eval(std::string& result, const scoped_resolver& vars) +{ + result.clear(); + + int last_index = 0; + bool done = false; + + while (!done) { + auto tokenize_res = this->tokenize(); + if (tokenize_res.isErr()) { + return false; + } + auto token = tokenize_res.unwrap(); + + result.append(&this->s_str[last_index], + token.tr_frag.sf_begin - last_index); + switch (token.tr_token) { + case shlex_token_t::eof: + done = true; + break; + case shlex_token_t::escape: + result.append(1, this->s_str[token.tr_frag.sf_begin + 1]); + break; + case shlex_token_t::whitespace: + result.append(&this->s_str[token.tr_frag.sf_begin], + token.tr_frag.length()); + break; + case shlex_token_t::variable_ref: + case shlex_token_t::quoted_variable_ref: { + int extra = token.tr_token == shlex_token_t::variable_ref ? 0 + : 1; + const std::string var_name( + &this->s_str[token.tr_frag.sf_begin + 1 + extra], + token.tr_frag.length() - 1 - extra * 2); + auto local_var = vars.find(var_name); + const char* var_value = getenv(var_name.c_str()); + + if (local_var != vars.end()) { + result.append(fmt::to_string(local_var->second)); + } else if (var_value != nullptr) { + result.append(var_value); + } + break; + } + case shlex_token_t::tilde: + this->resolve_home_dir(result, token.tr_frag); + break; + case shlex_token_t::double_quote_start: + case shlex_token_t::double_quote_end: + result.append("\""); + break; + case shlex_token_t::single_quote_start: + case shlex_token_t::single_quote_end: + result.append("'"); + break; + default: + break; + } + last_index = token.tr_frag.sf_end; + } + + result.append(&this->s_str[last_index], this->s_len - last_index); + + return true; +} + +Result<std::vector<shlex::split_element_t>, shlex::tokenize_error_t> +shlex::split(const scoped_resolver& vars) +{ + std::vector<split_element_t> retval; + int last_index = 0; + bool start_new = true; + bool done = false; + + while (this->s_index < this->s_len && isspace(this->s_str[this->s_index])) { + this->s_index += 1; + } + if (this->s_index == this->s_len) { + return Ok(retval); + } + while (!done) { + auto tokenize_res = TRY(this->tokenize()); + + if (start_new) { + if (last_index < this->s_len) { + retval.emplace_back(split_element_t{ + string_fragment::from_byte_range( + this->s_str, last_index, tokenize_res.tr_frag.sf_begin), + "", + }); + } + start_new = false; + } else if (tokenize_res.tr_token != shlex_token_t::whitespace) { + retval.back().se_origin.sf_end = tokenize_res.tr_frag.sf_end; + } else { + retval.back().se_origin.sf_end = tokenize_res.tr_frag.sf_begin; + } + retval.back().se_value.append( + &this->s_str[last_index], + tokenize_res.tr_frag.sf_begin - last_index); + switch (tokenize_res.tr_token) { + case shlex_token_t::eof: + done = true; + break; + case shlex_token_t::escape: + retval.back().se_value.append( + 1, this->s_str[tokenize_res.tr_frag.sf_begin + 1]); + break; + case shlex_token_t::whitespace: + start_new = true; + break; + case shlex_token_t::variable_ref: + case shlex_token_t::quoted_variable_ref: { + int extra = tokenize_res.tr_token == shlex_token_t::variable_ref + ? 0 + : 1; + std::string var_name( + &this->s_str[tokenize_res.tr_frag.sf_begin + 1 + extra], + tokenize_res.tr_frag.length() - 1 - extra * 2); + auto local_var = vars.find(var_name); + const char* var_value = getenv(var_name.c_str()); + + if (local_var != vars.end()) { + retval.back().se_value.append( + fmt::to_string(local_var->second)); + } else if (var_value != nullptr) { + retval.back().se_value.append(var_value); + } + break; + } + case shlex_token_t::tilde: + this->resolve_home_dir(retval.back().se_value, + tokenize_res.tr_frag); + break; + default: + break; + } + last_index = tokenize_res.tr_frag.sf_end; + } + + if (last_index < this->s_len) { + if (start_new || retval.empty()) { + retval.emplace_back(split_element_t{ + string_fragment::from_byte_range( + this->s_str, last_index, this->s_len), + "", + }); + } + retval.back().se_value.append(&this->s_str[last_index], + this->s_len - last_index); + } + + return Ok(retval); +} |