summaryrefslogtreecommitdiffstats
path: root/src/rgw/rgw_es_query.cc
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
commit19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/rgw/rgw_es_query.cc
parentInitial commit. (diff)
downloadceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz
ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/rgw/rgw_es_query.cc')
-rw-r--r--src/rgw/rgw_es_query.cc694
1 files changed, 694 insertions, 0 deletions
diff --git a/src/rgw/rgw_es_query.cc b/src/rgw/rgw_es_query.cc
new file mode 100644
index 000000000..57a51634d
--- /dev/null
+++ b/src/rgw/rgw_es_query.cc
@@ -0,0 +1,694 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab ft=cpp
+
+#include <list>
+#include <map>
+#include <string>
+#include <iostream>
+#include <boost/algorithm/string.hpp>
+
+#include "common/ceph_json.h"
+#include "rgw_common.h"
+#include "rgw_es_query.h"
+
+
+#define dout_context g_ceph_context
+#define dout_subsys ceph_subsys_rgw
+
+bool pop_front(list<string>& l, string *s)
+{
+ if (l.empty()) {
+ return false;
+ }
+ *s = l.front();
+ l.pop_front();
+ return true;
+}
+
+map<string, int> operator_map = {
+ { "or", 1 },
+ { "and", 2 },
+ { "<", 3 },
+ { "<=", 3 },
+ { "==", 3 },
+ { "!=", 3 },
+ { ">=", 3 },
+ { ">", 3 },
+};
+
+bool is_operator(const string& s)
+{
+ return (operator_map.find(s) != operator_map.end());
+}
+
+int operand_value(const string& op)
+{
+ auto i = operator_map.find(op);
+ if (i == operator_map.end()) {
+ return 0;
+ }
+
+ return i->second;
+}
+
+int check_precedence(const string& op1, const string& op2)
+{
+ return operand_value(op1) - operand_value(op2);
+}
+
+static bool infix_to_prefix(list<string>& source, list<string> *out)
+{
+ list<string> operator_stack;
+ list<string> operand_stack;
+
+ operator_stack.push_front("(");
+ source.push_back(")");
+
+ for (string& entity : source) {
+ if (entity == "(") {
+ operator_stack.push_front(entity);
+ } else if (entity == ")") {
+ string popped_operator;
+ if (!pop_front(operator_stack, &popped_operator)) {
+ return false;
+ }
+
+ while (popped_operator != "(") {
+ operand_stack.push_front(popped_operator);
+ if (!pop_front(operator_stack, &popped_operator)) {
+ return false;
+ }
+ }
+
+ } else if (is_operator(entity)) {
+ string popped_operator;
+ if (!pop_front(operator_stack, &popped_operator)) {
+ return false;
+ }
+
+ int precedence = check_precedence(popped_operator, entity);
+
+ while (precedence >= 0) {
+ operand_stack.push_front(popped_operator);
+ if (!pop_front(operator_stack, &popped_operator)) {
+ return false;
+ }
+ precedence = check_precedence(popped_operator, entity);
+ }
+
+ operator_stack.push_front(popped_operator);
+ operator_stack.push_front(entity);
+ } else {
+ operand_stack.push_front(entity);
+ }
+
+ }
+
+ if (!operator_stack.empty()) {
+ return false;
+ }
+
+ out->swap(operand_stack);
+ return true;
+}
+
+class ESQueryNode {
+protected:
+ ESQueryCompiler *compiler;
+public:
+ ESQueryNode(ESQueryCompiler *_compiler) : compiler(_compiler) {}
+ virtual ~ESQueryNode() {}
+
+ virtual bool init(ESQueryStack *s, ESQueryNode **pnode, string *perr) = 0;
+
+ virtual void dump(Formatter *f) const = 0;
+};
+
+static bool alloc_node(ESQueryCompiler *compiler, ESQueryStack *s, ESQueryNode **pnode, string *perr);
+
+class ESQueryNode_Bool : public ESQueryNode {
+ string op;
+ ESQueryNode *first{nullptr};
+ ESQueryNode *second{nullptr};
+public:
+ explicit ESQueryNode_Bool(ESQueryCompiler *compiler) : ESQueryNode(compiler) {}
+ ESQueryNode_Bool(ESQueryCompiler *compiler, const string& _op, ESQueryNode *_first, ESQueryNode *_second) :ESQueryNode(compiler), op(_op), first(_first), second(_second) {}
+ bool init(ESQueryStack *s, ESQueryNode **pnode, string *perr) override {
+ bool valid = s->pop(&op);
+ if (!valid) {
+ *perr = "incorrect expression";
+ return false;
+ }
+ valid = alloc_node(compiler, s, &first, perr) &&
+ alloc_node(compiler, s, &second, perr);
+ if (!valid) {
+ return false;
+ }
+ *pnode = this;
+ return true;
+ }
+ virtual ~ESQueryNode_Bool() {
+ delete first;
+ delete second;
+ }
+
+ void dump(Formatter *f) const override {
+ f->open_object_section("bool");
+ const char *section = (op == "and" ? "must" : "should");
+ f->open_array_section(section);
+ encode_json("entry", *first, f);
+ encode_json("entry", *second, f);
+ f->close_section();
+ f->close_section();
+ }
+
+};
+
+class ESQueryNodeLeafVal {
+public:
+ ESQueryNodeLeafVal() = default;
+ virtual ~ESQueryNodeLeafVal() {}
+
+ virtual bool init(const string& str_val, string *perr) = 0;
+ virtual void encode_json(const string& field, Formatter *f) const = 0;
+};
+
+class ESQueryNodeLeafVal_Str : public ESQueryNodeLeafVal {
+ string val;
+public:
+ ESQueryNodeLeafVal_Str() {}
+ bool init(const string& str_val, string *perr) override {
+ val = str_val;
+ return true;
+ }
+ void encode_json(const string& field, Formatter *f) const override {
+ ::encode_json(field.c_str(), val.c_str(), f);
+ }
+};
+
+class ESQueryNodeLeafVal_Int : public ESQueryNodeLeafVal {
+ int64_t val{0};
+public:
+ ESQueryNodeLeafVal_Int() {}
+ bool init(const string& str_val, string *perr) override {
+ string err;
+ val = strict_strtoll(str_val.c_str(), 10, &err);
+ if (!err.empty()) {
+ *perr = string("failed to parse integer: ") + err;
+ return false;
+ }
+ return true;
+ }
+ void encode_json(const string& field, Formatter *f) const override {
+ ::encode_json(field.c_str(), val, f);
+ }
+};
+
+class ESQueryNodeLeafVal_Date : public ESQueryNodeLeafVal {
+ ceph::real_time val;
+public:
+ ESQueryNodeLeafVal_Date() {}
+ bool init(const string& str_val, string *perr) override {
+ if (parse_time(str_val.c_str(), &val) < 0) {
+ *perr = string("failed to parse date: ") + str_val;
+ return false;
+ }
+ return true;
+ }
+ void encode_json(const string& field, Formatter *f) const override {
+ string s;
+ rgw_to_iso8601(val, &s);
+ ::encode_json(field.c_str(), s, f);
+ }
+};
+
+class ESQueryNode_Op : public ESQueryNode {
+protected:
+ string op;
+ string field;
+ string str_val;
+ ESQueryNodeLeafVal *val{nullptr};
+ ESEntityTypeMap::EntityType entity_type{ESEntityTypeMap::ES_ENTITY_NONE};
+ bool allow_restricted{false};
+
+ bool val_from_str(string *perr) {
+ switch (entity_type) {
+ case ESEntityTypeMap::ES_ENTITY_DATE:
+ val = new ESQueryNodeLeafVal_Date;
+ break;
+ case ESEntityTypeMap::ES_ENTITY_INT:
+ val = new ESQueryNodeLeafVal_Int;
+ break;
+ default:
+ val = new ESQueryNodeLeafVal_Str;
+ }
+ return val->init(str_val, perr);
+ }
+ bool do_init(ESQueryNode **pnode, string *perr) {
+ field = compiler->unalias_field(field);
+ ESQueryNode *effective_node;
+ if (!handle_nested(&effective_node, perr)) {
+ return false;
+ }
+ if (!val_from_str(perr)) {
+ return false;
+ }
+ *pnode = effective_node;
+ return true;
+ }
+
+public:
+ ESQueryNode_Op(ESQueryCompiler *compiler) : ESQueryNode(compiler) {}
+ ~ESQueryNode_Op() {
+ delete val;
+ }
+ virtual bool init(ESQueryStack *s, ESQueryNode **pnode, string *perr) override {
+ bool valid = s->pop(&op) &&
+ s->pop(&str_val) &&
+ s->pop(&field);
+ if (!valid) {
+ *perr = "invalid expression";
+ return false;
+ }
+ return do_init(pnode, perr);
+ }
+ bool handle_nested(ESQueryNode **pnode, string *perr);
+
+ void set_allow_restricted(bool allow) {
+ allow_restricted = allow;
+ }
+
+ virtual void dump(Formatter *f) const override = 0;
+};
+
+class ESQueryNode_Op_Equal : public ESQueryNode_Op {
+public:
+ explicit ESQueryNode_Op_Equal(ESQueryCompiler *compiler) : ESQueryNode_Op(compiler) {}
+ ESQueryNode_Op_Equal(ESQueryCompiler *compiler, const string& f, const string& v) : ESQueryNode_Op(compiler) {
+ op = "==";
+ field = f;
+ str_val = v;
+ }
+
+ bool init(ESQueryStack *s, ESQueryNode **pnode, string *perr) override {
+ if (op.empty()) {
+ return ESQueryNode_Op::init(s, pnode, perr);
+ }
+ return do_init(pnode, perr);
+ }
+
+ virtual void dump(Formatter *f) const override {
+ f->open_object_section("term");
+ val->encode_json(field, f);
+ f->close_section();
+ }
+};
+
+class ESQueryNode_Op_NotEqual : public ESQueryNode_Op {
+public:
+ explicit ESQueryNode_Op_NotEqual(ESQueryCompiler *compiler) : ESQueryNode_Op(compiler) {}
+ ESQueryNode_Op_NotEqual(ESQueryCompiler *compiler, const string& f, const string& v) : ESQueryNode_Op(compiler) {
+ op = "!=";
+ field = f;
+ str_val = v;
+ }
+
+ bool init(ESQueryStack *s, ESQueryNode **pnode, string *perr) override {
+ if (op.empty()) {
+ return ESQueryNode_Op::init(s, pnode, perr);
+ }
+ return do_init(pnode, perr);
+ }
+
+ virtual void dump(Formatter *f) const override {
+ f->open_object_section("bool");
+ f->open_object_section("must_not");
+ f->open_object_section("term");
+ val->encode_json(field, f);
+ f->close_section();
+ f->close_section();
+ f->close_section();
+ }
+};
+
+class ESQueryNode_Op_Range : public ESQueryNode_Op {
+ string range_str;
+public:
+ ESQueryNode_Op_Range(ESQueryCompiler *compiler, const string& rs) : ESQueryNode_Op(compiler), range_str(rs) {}
+
+ virtual void dump(Formatter *f) const override {
+ f->open_object_section("range");
+ f->open_object_section(field.c_str());
+ val->encode_json(range_str, f);
+ f->close_section();
+ f->close_section();
+ }
+};
+
+class ESQueryNode_Op_Nested_Parent : public ESQueryNode_Op {
+public:
+ ESQueryNode_Op_Nested_Parent(ESQueryCompiler *compiler) : ESQueryNode_Op(compiler) {}
+
+ virtual string get_custom_leaf_field_name() = 0;
+};
+
+template <class T>
+class ESQueryNode_Op_Nested : public ESQueryNode_Op_Nested_Parent {
+ string name;
+ ESQueryNode *next;
+public:
+ ESQueryNode_Op_Nested(ESQueryCompiler *compiler, const string& _name, ESQueryNode *_next) : ESQueryNode_Op_Nested_Parent(compiler),
+ name(_name), next(_next) {}
+ ~ESQueryNode_Op_Nested() {
+ delete next;
+ }
+
+ virtual void dump(Formatter *f) const override {
+ f->open_object_section("nested");
+ string s = string("meta.custom-") + type_str();
+ encode_json("path", s.c_str(), f);
+ f->open_object_section("query");
+ f->open_object_section("bool");
+ f->open_array_section("must");
+ f->open_object_section("entry");
+ f->open_object_section("match");
+ string n = s + ".name";
+ encode_json(n.c_str(), name.c_str(), f);
+ f->close_section();
+ f->close_section();
+ encode_json("entry", *next, f);
+ f->close_section();
+ f->close_section();
+ f->close_section();
+ f->close_section();
+ }
+
+ string type_str() const;
+ string get_custom_leaf_field_name() override {
+ return string("meta.custom-") + type_str() + ".value";
+ }
+};
+
+template<>
+string ESQueryNode_Op_Nested<string>::type_str() const {
+ return "string";
+}
+
+template<>
+string ESQueryNode_Op_Nested<int64_t>::type_str() const {
+ return "int";
+}
+
+template<>
+string ESQueryNode_Op_Nested<ceph::real_time>::type_str() const {
+ return "date";
+}
+
+bool ESQueryNode_Op::handle_nested(ESQueryNode **pnode, string *perr)
+{
+ string field_name = field;
+ const string& custom_prefix = compiler->get_custom_prefix();
+ if (!boost::algorithm::starts_with(field_name, custom_prefix)) {
+ *pnode = this;
+ auto m = compiler->get_generic_type_map();
+ if (m) {
+ bool found = m->find(field_name, &entity_type) &&
+ (allow_restricted || !compiler->is_restricted(field_name));
+ if (!found) {
+ *perr = string("unexpected generic field '") + field_name + "'";
+ }
+ return found;
+ }
+ *perr = "query parser does not support generic types";
+ return false;
+ }
+
+ field_name = field_name.substr(custom_prefix.size());
+ auto m = compiler->get_custom_type_map();
+ if (m) {
+ m->find(field_name, &entity_type);
+ /* ignoring returned bool, for now just treat it as string */
+ }
+
+ ESQueryNode_Op_Nested_Parent *new_node;
+ switch (entity_type) {
+ case ESEntityTypeMap::ES_ENTITY_INT:
+ new_node = new ESQueryNode_Op_Nested<int64_t>(compiler, field_name, this);
+ break;
+ case ESEntityTypeMap::ES_ENTITY_DATE:
+ new_node = new ESQueryNode_Op_Nested<ceph::real_time>(compiler, field_name, this);
+ break;
+ default:
+ new_node = new ESQueryNode_Op_Nested<string>(compiler, field_name, this);
+ }
+
+ field = new_node->get_custom_leaf_field_name();
+ *pnode = new_node;
+
+ return true;
+}
+
+static bool is_bool_op(const string& str)
+{
+ return (str == "or" || str == "and");
+}
+
+static bool alloc_node(ESQueryCompiler *compiler, ESQueryStack *s, ESQueryNode **pnode, string *perr)
+{
+ string op;
+ bool valid = s->peek(&op);
+ if (!valid) {
+ *perr = "incorrect expression";
+ return false;
+ }
+
+ ESQueryNode *node;
+
+ if (is_bool_op(op)) {
+ node = new ESQueryNode_Bool(compiler);
+ } else if (op == "==") {
+ node = new ESQueryNode_Op_Equal(compiler);
+ } else if (op == "!=") {
+ node = new ESQueryNode_Op_NotEqual(compiler);
+ } else {
+ static map<string, string> range_op_map = {
+ { "<", "lt"},
+ { "<=", "lte"},
+ { ">=", "gte"},
+ { ">", "gt"},
+ };
+
+ auto iter = range_op_map.find(op);
+ if (iter == range_op_map.end()) {
+ *perr = string("invalid operator: ") + op;
+ return false;
+ }
+
+ node = new ESQueryNode_Op_Range(compiler, iter->second);
+ }
+
+ if (!node->init(s, pnode, perr)) {
+ delete node;
+ return false;
+ }
+ return true;
+}
+
+
+bool is_key_char(char c)
+{
+ switch (c) {
+ case '(':
+ case ')':
+ case '<':
+ case '>':
+ case '!':
+ case '@':
+ case ',':
+ case ';':
+ case ':':
+ case '\\':
+ case '"':
+ case '/':
+ case '[':
+ case ']':
+ case '?':
+ case '=':
+ case '{':
+ case '}':
+ case ' ':
+ case '\t':
+ return false;
+ };
+ return (isascii(c) > 0);
+}
+
+static bool is_op_char(char c)
+{
+ switch (c) {
+ case '!':
+ case '<':
+ case '=':
+ case '>':
+ return true;
+ };
+ return false;
+}
+
+static bool is_val_char(char c)
+{
+ if (isspace(c)) {
+ return false;
+ }
+ return (c != ')');
+}
+
+void ESInfixQueryParser::skip_whitespace(const char *str, int size, int& pos) {
+ while (pos < size && isspace(str[pos])) {
+ ++pos;
+ }
+}
+
+bool ESInfixQueryParser::get_next_token(bool (*filter)(char)) {
+ skip_whitespace(str, size, pos);
+ int token_start = pos;
+ while (pos < size && filter(str[pos])) {
+ ++pos;
+ }
+ if (pos == token_start) {
+ return false;
+ }
+ string token = string(str + token_start, pos - token_start);
+ args.push_back(token);
+ return true;
+}
+
+bool ESInfixQueryParser::parse_condition() {
+ /*
+ * condition: <key> <operator> <val>
+ *
+ * whereas key: needs to conform to http header field restrictions
+ * operator: one of the following: < <= == != >= >
+ * val: ascii, terminated by either space or ')' (or end of string)
+ */
+
+ /* parse key */
+ bool valid = get_next_token(is_key_char) &&
+ get_next_token(is_op_char) &&
+ get_next_token(is_val_char);
+
+ if (!valid) {
+ return false;
+ }
+
+ return true;
+}
+
+bool ESInfixQueryParser::parse_and_or() {
+ skip_whitespace(str, size, pos);
+ if (pos + 3 <= size && strncmp(str + pos, "and", 3) == 0) {
+ pos += 3;
+ args.push_back("and");
+ return true;
+ }
+
+ if (pos + 2 <= size && strncmp(str + pos, "or", 2) == 0) {
+ pos += 2;
+ args.push_back("or");
+ return true;
+ }
+
+ return false;
+}
+
+bool ESInfixQueryParser::parse_specific_char(const char *pchar) {
+ skip_whitespace(str, size, pos);
+ if (pos >= size) {
+ return false;
+ }
+ if (str[pos] != *pchar) {
+ return false;
+ }
+
+ args.push_back(pchar);
+ ++pos;
+ return true;
+}
+
+bool ESInfixQueryParser::parse_open_bracket() {
+ return parse_specific_char("(");
+}
+
+bool ESInfixQueryParser::parse_close_bracket() {
+ return parse_specific_char(")");
+}
+
+bool ESInfixQueryParser::parse(list<string> *result) {
+ /*
+ * expression: [(]<condition>[[and/or]<condition>][)][and/or]...
+ */
+
+ while (pos < size) {
+ parse_open_bracket();
+ if (!parse_condition()) {
+ return false;
+ }
+ parse_close_bracket();
+ parse_and_or();
+ }
+
+ result->swap(args);
+
+ return true;
+}
+
+bool ESQueryCompiler::convert(list<string>& infix, string *perr) {
+ list<string> prefix;
+ if (!infix_to_prefix(infix, &prefix)) {
+ *perr = "invalid query";
+ return false;
+ }
+ stack.assign(prefix);
+ if (!alloc_node(this, &stack, &query_root, perr)) {
+ return false;
+ }
+ if (!stack.done()) {
+ *perr = "invalid query";
+ return false;
+ }
+ return true;
+}
+
+ESQueryCompiler::~ESQueryCompiler() {
+ delete query_root;
+}
+
+bool ESQueryCompiler::compile(string *perr) {
+ list<string> infix;
+ if (!parser.parse(&infix)) {
+ *perr = "failed to parse query";
+ return false;
+ }
+
+ if (!convert(infix, perr)) {
+ return false;
+ }
+
+ for (auto& c : eq_conds) {
+ ESQueryNode_Op_Equal *eq_node = new ESQueryNode_Op_Equal(this, c.first, c.second);
+ eq_node->set_allow_restricted(true); /* can access restricted fields */
+ ESQueryNode *effective_node;
+ if (!eq_node->init(nullptr, &effective_node, perr)) {
+ delete eq_node;
+ return false;
+ }
+ query_root = new ESQueryNode_Bool(this, "and", effective_node, query_root);
+ }
+
+ return true;
+}
+
+void ESQueryCompiler::dump(Formatter *f) const {
+ encode_json("query", *query_root, f);
+}
+