diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-13 12:24:36 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-13 12:24:36 +0000 |
commit | 06eaf7232e9a920468c0f8d74dcf2fe8b555501c (patch) | |
tree | e2c7b5777f728320e5b5542b6213fd3591ba51e2 /sql/json_table.cc | |
parent | Initial commit. (diff) | |
download | mariadb-06eaf7232e9a920468c0f8d74dcf2fe8b555501c.tar.xz mariadb-06eaf7232e9a920468c0f8d74dcf2fe8b555501c.zip |
Adding upstream version 1:10.11.6.upstream/1%10.11.6
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'sql/json_table.cc')
-rw-r--r-- | sql/json_table.cc | 1478 |
1 files changed, 1478 insertions, 0 deletions
diff --git a/sql/json_table.cc b/sql/json_table.cc new file mode 100644 index 00000000..4f3cfb6b --- /dev/null +++ b/sql/json_table.cc @@ -0,0 +1,1478 @@ +/* + Copyright (c) 2020, MariaDB Corporation + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA +*/ + +#include "mariadb.h" +#include "sql_priv.h" +#include "sql_class.h" /* TMP_TABLE_PARAM */ +#include "table.h" +#include "sql_type_json.h" +#include "item_jsonfunc.h" +#include "json_table.h" +#include "sql_show.h" +#include "sql_select.h" +#include "create_tmp_table.h" +#include "sql_parse.h" + +#define HA_ERR_JSON_TABLE (HA_ERR_LAST+1) + +/* + Allocating memory and *also* using it (reading and + writing from it) because some build instructions cause + compiler to optimize out stack_used_up. Since alloca() + here depends on stack_used_up, it doesnt get executed + correctly and causes json_debug_nonembedded to fail + ( --error ER_STACK_OVERRUN_NEED_MORE does not occur). +*/ +#define ALLOCATE_MEM_ON_STACK(A) do \ + { \ + uchar *array= (uchar*)alloca(A); \ + array[0]= 1; \ + array[0]++; \ + array[0] ? array[0]++ : array[0]--; \ + } while(0) + +class table_function_handlerton +{ +public: + handlerton m_hton; + table_function_handlerton() + { + bzero(&m_hton, sizeof(m_hton)); + m_hton.tablefile_extensions= hton_no_exts; + m_hton.slot= HA_SLOT_UNDEF; + } +}; + + +static table_function_handlerton table_function_hton; + +/* + @brief + Collect a set of tables that a given table function cannot have + references to. + + @param + table_func The table function we are connecting info for + join_list The nested join to be processed + disallowed_tables Collect the tables here. + + @detail + According to the SQL standard, a table function can refer to any table + that's "preceding" it in the FROM clause. + + The other limitation we would like to enforce is that the inner side of + an outer join cannot refer to the outer side. An example: + + SELECT * from JSON_TABLE(t1.col, ...) left join t1 on ... + + This function implements both of the above restrictions. + + Basic idea: the "join_list" contains the tables in the order that's a + reverse of the order they were specified in the query. + If we walk the join_list, we will encounter: + 1. First, the tables that table function cannot refer to (collect them in a + bitmap) + 2. Then, the table function itself (put it in the bitmap, too, as self- + references are not allowed, and stop the walk) + 3. Tables that the table function CAN refer to (we don't walk these as + we've stopped on step #2). + + The above can be applied recursively for nested joins (this covers NATURAL + JOIN, and JOIN ... USING constructs). + + Enforcing the "refer to only preceding tables" rule means that outer side + of LEFT JOIN cannot refer to the inner side. + + Handing RIGHT JOINs: There are no RIGHT JOINs in the join_list data + structures. They were converted to LEFT JOINs (see calls to st_select_lex:: + convert_right_join). This conversion changes the order of tables, but + we are ok with operating on the tables "in the left join order". + + @return + 0 - Continue + 1 - Finish the process, success + -1 - Finish the process, failure +*/ + +static +int get_disallowed_table_deps_for_list(MEM_ROOT *mem_root, + TABLE_LIST *table_func, + List<TABLE_LIST> *join_list, + List<TABLE_LIST> *disallowed_tables) +{ + TABLE_LIST *table; + NESTED_JOIN *nested_join; + List_iterator<TABLE_LIST> li(*join_list); + + DBUG_EXECUTE_IF("json_check_min_stack_requirement", + { + long arbitrary_var; + long stack_used_up= (available_stack_size(current_thd->thread_stack, &arbitrary_var)); + ALLOCATE_MEM_ON_STACK(my_thread_stack_size-stack_used_up-STACK_MIN_SIZE); + }); + if (check_stack_overrun(current_thd, STACK_MIN_SIZE , NULL)) + return 1; + + while ((table= li++)) + { + if ((nested_join= table->nested_join)) + { + int res; + if ((res= get_disallowed_table_deps_for_list(mem_root, table_func, + &nested_join->join_list, + disallowed_tables))) + return res; + } + else + { + if (disallowed_tables->push_back(table, mem_root)) + return -1; + if (table == table_func) + { + // This is the JSON_TABLE(...) that are we're computing dependencies + // for. + return 1; // Finish the processing + } + } + } + return 0; // Continue +} + + +/* + @brief + Given a join and a table function in it (specified by its table_func_bit), + produce a bitmap of tables that the table function can NOT have references + to. + + @detail + See get_disallowed_table_deps_for_list + + @return + NULL - Out of memory + Other - A list of tables that the function cannot have references to. May + be empty. +*/ + +static +List<TABLE_LIST>* get_disallowed_table_deps(MEM_ROOT *mem_root, + SELECT_LEX *select, + TABLE_LIST *table_func) +{ + List<TABLE_LIST> *disallowed_tables; + + if (!(disallowed_tables = new (mem_root) List<TABLE_LIST>)) + return NULL; + + int res= get_disallowed_table_deps_for_list(mem_root, table_func, + select->join_list, + disallowed_tables); + + // The collection process must have finished + DBUG_ASSERT(res != 0); + + if (res == -1) + return NULL; // Out of memory + + return disallowed_tables; +} + + +/* + A table that produces output rows for JSON_TABLE(). +*/ + +class ha_json_table: public handler +{ + Table_function_json_table *m_jt; + + String *m_js; // The JSON document we're reading + String m_tmps; // Buffer for the above + + int fill_column_values(THD *thd, uchar * buf, uchar *pos); + +public: + ha_json_table(TABLE_SHARE *share_arg, Table_function_json_table *jt): + handler(&table_function_hton.m_hton, share_arg), m_jt(jt) + { + /* + set the mark_trx_read_write_done to avoid the + handler::mark_trx_read_write_internal() call. + It relies on &ha_thd()->ha_data[ht->slot].ha_info[0] to be set. + But we don't set the ha_data for the ha_json_table, and + that call makes no sence for ha_json_table. + */ + mark_trx_read_write_done= 1; + + /* See ha_json_table::position for format definition */ + ref_length= m_jt->m_columns.elements * 4; + } + ~ha_json_table() {} + handler *clone(const char *name, MEM_ROOT *mem_root) override { return NULL; } + /* Rows also use a fixed-size format */ + enum row_type get_row_type() const override { return ROW_TYPE_FIXED; } + const char *table_type() const override + { + return "JSON_TABLE function"; + } + ulonglong table_flags() const override + { + return (HA_FAST_KEY_READ | /*HA_NO_BLOBS |*/ HA_NULL_IN_KEY | + HA_CAN_SQL_HANDLER | + HA_REC_NOT_IN_SEQ | HA_NO_TRANSACTIONS | + HA_HAS_RECORDS); + } + ulong index_flags(uint inx, uint part, bool all_parts) const override + { + return HA_ONLY_WHOLE_INDEX | HA_KEY_SCAN_NOT_ROR; + } + ha_rows records() override { return HA_POS_ERROR; } + + int open(const char *name, int mode, uint test_if_locked) override + { return 0; } + int close(void) override { return 0; } + int rnd_init(bool scan) override; + int rnd_next(uchar *buf) override; + int rnd_pos(uchar * buf, uchar *pos) override; + void position(const uchar *record) override; + int info(uint) override; + int extra(enum ha_extra_function operation) override { return 0; } + THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, + enum thr_lock_type lock_type) override + { return NULL; } + int create(const char *name, TABLE *form, HA_CREATE_INFO *create_info) + override { return 1; } + /* Give no message. */ + bool get_error_message(int error, String *buf) override + { + buf->length(0); + return TRUE; + } +}; + + +/* + Helper class that creates the temporary table that + represents the table function in the query. +*/ + +class Create_json_table final: public Create_tmp_table +{ +public: + Create_json_table() : + Create_tmp_table((ORDER*) 0, 0, 0, 0, 0) + {} + virtual ~Create_json_table() {}; + TABLE *start(THD *thd, + TMP_TABLE_PARAM *param, + Table_function_json_table *jt, + const LEX_CSTRING *table_alias); + bool choose_engine(THD *thd, TABLE *table, TMP_TABLE_PARAM *param) override + { + return 0; // Engine already choosen + } + bool add_json_table_fields(THD *thd, TABLE *table, + Table_function_json_table *jt); + bool finalize(THD *thd, TABLE *table, TMP_TABLE_PARAM *param, + Table_function_json_table *jt); +}; + + +/* + @brief + Start scanning the JSON document in [str ... end] + + @detail + Note: non-root nested paths are set to scan one JSON node (that is, a + "subdocument"). +*/ + +void Json_table_nested_path::scan_start(CHARSET_INFO *i_cs, + const uchar *str, const uchar *end) +{ + json_get_path_start(&m_engine, i_cs, str, end, &m_cur_path); + m_cur_nested= NULL; + m_null= false; + m_ordinality_counter= 0; +} + + +/* + @brief + Find the next JSON element that matches the search path. +*/ + +int Json_table_nested_path::scan_next() +{ + bool no_records_found= false; + if (m_cur_nested) + { + for (;;) + { + if (m_cur_nested->scan_next() == 0) + return 0; + if (!(m_cur_nested= m_cur_nested->m_next_nested)) + break; +handle_new_nested: + m_cur_nested->scan_start(m_engine.s.cs, m_engine.value_begin, + m_engine.s.str_end); + } + if (no_records_found) + return 0; + } + + DBUG_ASSERT(!m_cur_nested); + + while (!json_get_path_next(&m_engine, &m_cur_path)) + { + if (json_path_compare(&m_path, &m_cur_path, m_engine.value_type, + NULL)) + continue; + /* path found. */ + ++m_ordinality_counter; + + if (!m_nested) + return 0; + + m_cur_nested= m_nested; + no_records_found= true; + goto handle_new_nested; + } + + m_null= true; + return 1; +} + + +int ha_json_table::rnd_init(bool scan) +{ + Json_table_nested_path &p= m_jt->m_nested_path; + DBUG_ENTER("ha_json_table::rnd_init"); + + if ((m_js= m_jt->m_json->val_str(&m_tmps))) + { + p.scan_start(m_js->charset(), + (const uchar *) m_js->ptr(), (const uchar *) m_js->end()); + } + + DBUG_RETURN(0); +} + + +/* + @brief + Store JSON value in an SQL field, doing necessary special conversions + for JSON's null, true, and false. +*/ + +static void store_json_in_field(Field *f, const json_engine_t *je) +{ + switch (je->value_type) + { + case JSON_VALUE_NULL: + f->set_null(); + return; + + case JSON_VALUE_TRUE: + case JSON_VALUE_FALSE: + { + Item_result rt= f->result_type(); + if (rt == INT_RESULT || rt == DECIMAL_RESULT || rt == REAL_RESULT) + { + f->store(je->value_type == JSON_VALUE_TRUE, false); + return; + } + break; + } + default: + break; + }; + f->store((const char *) je->value, (uint32) je->value_len, je->s.cs); +} + + +static int store_json_in_json(Field *f, json_engine_t *je) +{ + const uchar *from= je->value_begin; + const uchar *to; + + if (json_value_scalar(je)) + to= je->value_end; + else + { + int error; + if ((error= json_skip_level(je))) + return error; + to= je->s.c_str; + } + f->store((const char *) from, (uint32) (to - from), je->s.cs); + return 0; +} + + +bool Json_table_nested_path::check_error(const char *str) +{ + if (m_engine.s.error) + { + report_json_error_ex(str, &m_engine, "JSON_TABLE", 0, + Sql_condition::WARN_LEVEL_ERROR); + return true; // Error + } + return false; // Ok +} + + +int ha_json_table::rnd_next(uchar *buf) +{ + if (!m_js) + return HA_ERR_END_OF_FILE; + + /* + Step 1: Move the root nested path to the next record (this implies moving + its child nested paths accordingly) + */ + if (m_jt->m_nested_path.scan_next()) + { + if (m_jt->m_nested_path.check_error(m_js->ptr())) + { + /* + We already reported an error, so returning an + error code that just doesn't produce extra + messages. + */ + return HA_ERR_JSON_TABLE; + } + return HA_ERR_END_OF_FILE; + } + + /* + Step 2: Read values for all columns (the columns refer to nested paths + they are in). + */ + return fill_column_values(table->in_use, buf, NULL) ? HA_ERR_JSON_TABLE : 0; +} + + +/* + @brief + Fill values of table columns, taking data either from Json_nested_path + objects, or from the rowid value + + @param pos NULL means the data should be read from Json_nested_path + objects. + Non-null value is a pointer to previously saved rowid (see + ha_json_table::position() for description) +*/ + +int ha_json_table::fill_column_values(THD *thd, uchar * buf, uchar *pos) +{ + MY_BITMAP *orig_map= dbug_tmp_use_all_columns(table, &table->write_set); + int error= 0; + Counting_error_handler er_handler; + Field **f= table->field; + Json_table_column *jc; + List_iterator_fast<Json_table_column> jc_i(m_jt->m_columns); + my_ptrdiff_t ptrdiff= buf - table->record[0]; + Abort_on_warning_instant_set ao_set(table->in_use, FALSE); + enum_check_fields cf_orig= table->in_use->count_cuted_fields; + + table->in_use->count_cuted_fields= CHECK_FIELD_ERROR_FOR_NULL; + + thd->push_internal_handler(&er_handler); + + while (!error && (jc= jc_i++)) + { + bool is_null_value; + uint int_pos= 0; /* just to make compilers happy. */ + + if (!bitmap_is_set(table->read_set, (*f)->field_index)) + { + /* + If the RESPONSE_ERROR is set for the column, we have + to unpack it even if it's not in the read_set - to check + for possible errors. + */ + if (jc->m_on_empty.m_response != Json_table_column::RESPONSE_ERROR && + jc->m_on_error.m_response != Json_table_column::RESPONSE_ERROR) + goto cont_loop; + } + + (*f)->move_field_offset(ptrdiff); + + /* + Read the NULL flag: + - if we are reading from a rowid value, 0 means SQL NULL. + - if scanning json document, read it from the nested path + */ + if (pos) + is_null_value= !(int_pos= uint4korr(pos)); + else + is_null_value= jc->m_nest->m_null; + + if (is_null_value) + { + (*f)->set_null(); + } + else + { + (*f)->set_notnull(); + switch (jc->m_column_type) + { + case Json_table_column::FOR_ORDINALITY: + { + /* + Read the cardinality counter: + - read it from nested path when scanning the json document + - or, read it from rowid when in rnd_pos() call + */ + longlong counter= pos? int_pos: jc->m_nest->m_ordinality_counter; + (*f)->store(counter, TRUE); + break; + } + case Json_table_column::PATH: + case Json_table_column::EXISTS_PATH: + { + json_engine_t je; + json_path_step_t *cur_step; + int array_counters[JSON_DEPTH_LIMIT]; + int not_found; + const uchar* node_start; + const uchar* node_end; + + /* + Get the JSON context node that we will need to evaluate PATH or + EXISTS against: + - when scanning the json document, read it from nested path + - when in rnd_pos call, the rowid has the start offset. + */ + if (pos) + { + node_start= (const uchar *) (m_js->ptr() + (int_pos-1)); + node_end= (const uchar *) m_js->end(); + } + else + { + node_start= jc->m_nest->get_value(); + node_end= jc->m_nest->get_value_end(); + } + + json_scan_start(&je, m_js->charset(), node_start, node_end); + + cur_step= jc->m_path.steps; + not_found= json_find_path(&je, &jc->m_path, &cur_step, array_counters) || + json_read_value(&je); + + if (jc->m_column_type == Json_table_column::EXISTS_PATH) + { + (*f)->store(!not_found); + } + else /*PATH*/ + { + if (not_found) + { + error= jc->m_on_empty.respond(jc, *f, ER_JSON_TABLE_ERROR_ON_FIELD); + } + else + { + if (jc->m_format_json) + { + if (!(error= store_json_in_json(*f, &je))) + error= er_handler.errors; + } + else if (!(error= !json_value_scalar(&je))) + { + store_json_in_field(*f, &je); + error= er_handler.errors; + } + + if (error) + { + error= jc->m_on_error.respond(jc, *f, + ER_JSON_TABLE_SCALAR_EXPECTED); + er_handler.errors= 0; + } + else + { + /* + If the path contains wildcards, check if there are + more matches for it in json and report an error if so. + */ + if (jc->m_path.types_used & + (JSON_PATH_WILD | JSON_PATH_DOUBLE_WILD | + JSON_PATH_ARRAY_RANGE) && + (json_scan_next(&je) || + !json_find_path(&je, &jc->m_path, &cur_step, + array_counters))) + { + error= jc->m_on_error.respond(jc, *f, + ER_JSON_TABLE_MULTIPLE_MATCHES); + } + } + } + } + break; + } + }; + } + (*f)->move_field_offset(-ptrdiff); + +cont_loop: + f++; + if (pos) + pos+= 4; + } + + dbug_tmp_restore_column_map(&table->write_set, orig_map); + thd->pop_internal_handler(); + thd->count_cuted_fields= cf_orig; + return error; +} + + +int ha_json_table::rnd_pos(uchar * buf, uchar *pos) +{ + return fill_column_values(table->in_use, buf, pos) ? HA_ERR_JSON_TABLE : 0; +} + + +/* + The reference has 4 bytes for every column of the JSON_TABLE. + There it keeps 0 for the NULL values, ordinality index for + the ORDINALITY columns and the offset of the field's data in + the JSON for other column types. +*/ +void ha_json_table::position(const uchar *record) +{ + uchar *c_ref= ref; + Json_table_column *jc; + List_iterator_fast<Json_table_column> jc_i(m_jt->m_columns); + + while ((jc= jc_i++)) + { + if (jc->m_nest->m_null) + { + int4store(c_ref, 0); + } + else + { + switch (jc->m_column_type) + { + case Json_table_column::FOR_ORDINALITY: + int4store(c_ref, jc->m_nest->m_ordinality_counter); + break; + case Json_table_column::PATH: + case Json_table_column::EXISTS_PATH: + { + size_t pos= jc->m_nest->get_value() - + (const uchar *) m_js->ptr() + 1; + int4store(c_ref, pos); + break; + } + }; + } + c_ref+= 4; + } +} + + +int ha_json_table::info(uint) +{ + /* + We don't want 0 or 1 in stats.records. + Though this value shouldn't matter as the optimizer + supposed to use Table_function_json_table::get_estimates + to obtain this data. + */ + stats.records= 4; + return 0; +} + + +/** + Create a json table according to a field list. + + @param thd thread handle + @param param a description used as input to create the table + @param jt json_table specificaion + @param table_alias alias +*/ + +TABLE *Create_json_table::start(THD *thd, + TMP_TABLE_PARAM *param, + Table_function_json_table *jt, + const LEX_CSTRING *table_alias) +{ + TABLE *table; + TABLE_SHARE *share; + DBUG_ENTER("Create_json_table::start"); + + param->tmp_name= "json"; + if (!(table= Create_tmp_table::start(thd, param, table_alias))) + DBUG_RETURN(0); + share= table->s; + share->not_usable_by_query_cache= FALSE; + share->db_plugin= NULL; + if (!(table->file= new (&table->mem_root) ha_json_table(share, jt))) + DBUG_RETURN(NULL); + table->file->init(); + DBUG_RETURN(table); +} + + +bool Create_json_table::finalize(THD *thd, TABLE *table, + TMP_TABLE_PARAM *param, + Table_function_json_table *jt) +{ + DBUG_ENTER("Create_json_table::finalize"); + DBUG_ASSERT(table); + + if (Create_tmp_table::finalize(thd, table, param, 1, 0)) + DBUG_RETURN(true); + + table->db_stat= HA_OPEN_KEYFILE; + if (unlikely(table->file->ha_open(table, table->s->path.str, O_RDWR, + HA_OPEN_TMP_TABLE | HA_OPEN_INTERNAL_TABLE))) + DBUG_RETURN(true); + + table->set_created(); + table->s->max_rows= ~(ha_rows) 0; + param->end_write_records= HA_POS_ERROR; + DBUG_RETURN(0); +} + + +/* + @brief + Read the JSON_TABLE's field definitions from @jt and add the fields to + table @table. +*/ + +bool Create_json_table::add_json_table_fields(THD *thd, TABLE *table, + Table_function_json_table *jt) +{ + TABLE_SHARE *share= table->s; + Json_table_column *jc; + uint fieldnr= 0; + MEM_ROOT *mem_root_save= thd->mem_root; + List_iterator_fast<Json_table_column> jc_i(jt->m_columns); + Column_derived_attributes da(&my_charset_utf8mb4_general_ci); + DBUG_ENTER("add_json_table_fields"); + + thd->mem_root= &table->mem_root; + current_counter= other; + + while ((jc= jc_i++)) + { + Create_field *sql_f= jc->m_field; + List_iterator_fast<Json_table_column> it2(jt->m_columns); + Json_table_column *jc2; + /* + Initialize length from its original value (number of characters), + which was set in the parser. This is necessary if we're + executing a prepared statement for the second time. + */ + sql_f->length= sql_f->char_length; + + if (sql_f->prepare_stage1(thd, thd->mem_root, + COLUMN_DEFINITION_TABLE_FIELD, + &da)) + goto err_exit; + + while ((jc2= it2++) != jc) + { + if (lex_string_cmp(system_charset_info, + &sql_f->field_name, &jc2->m_field->field_name) == 0) + { + my_error(ER_DUP_FIELDNAME, MYF(0), sql_f->field_name.str); + goto err_exit; + } + } + it2.rewind(); + } + + jc_i.rewind(); + + while ((jc= jc_i++)) + { + Create_field *sql_f= jc->m_field; + Record_addr addr(!(sql_f->flags & NOT_NULL_FLAG)); + Bit_addr bit(addr.null()); + uint uneven_delta; + + sql_f->prepare_stage2(table->file, table->file->ha_table_flags()); + + if (!sql_f->charset) + sql_f->charset= &my_charset_utf8mb4_bin; + + Field *f= sql_f->type_handler()->make_table_field_from_def(share, + thd->mem_root, &sql_f->field_name, addr, bit, sql_f, sql_f->flags); + if (!f) + goto err_exit; + f->init(table); + uneven_delta= m_uneven_bit_length; + add_field(table, f, fieldnr++, 0); + m_uneven_bit[current_counter]+= (m_uneven_bit_length - uneven_delta); + } + + share->fields= fieldnr; + share->blob_fields= m_blob_count; + table->field[fieldnr]= 0; // End marker + share->blob_field[m_blob_count]= 0; // End marker + share->column_bitmap_size= bitmap_buffer_size(share->fields); + + thd->mem_root= mem_root_save; + + DBUG_RETURN(FALSE); +err_exit: + thd->mem_root= mem_root_save; + DBUG_RETURN(TRUE); +} + + +/* + @brief + Given a TABLE_LIST representing JSON_TABLE(...) syntax, create a temporary + table for it. + + @detail + The temporary table will have: + - fields whose names/datatypes are specified in JSON_TABLE(...) syntax + - a ha_json_table as the storage engine. + + The uses of the temporary table are: + - name resolution: the query may have references to the columns of + JSON_TABLE(...). A TABLE object will allow to resolve them. + - query execution: ha_json_table will produce JSON_TABLE's rows. +*/ + +TABLE *create_table_for_function(THD *thd, TABLE_LIST *sql_table) +{ + TMP_TABLE_PARAM tp; + TABLE *table; + uint field_count= sql_table->table_function->m_columns.elements+1; + + DBUG_ENTER("create_table_for_function"); + + tp.init(); + tp.table_charset= system_charset_info; + tp.field_count= field_count; + { + Create_json_table maker; + + if (!(table= maker.start(thd, &tp, + sql_table->table_function, &sql_table->alias)) || + maker.add_json_table_fields(thd, table, sql_table->table_function) || + maker.finalize(thd, table, &tp, sql_table->table_function)) + { + if (table) + free_tmp_table(thd, table); + DBUG_RETURN(NULL); + } + } + sql_table->schema_table_name.length= 0; + + my_bitmap_map* bitmaps= + (my_bitmap_map*) thd->alloc(bitmap_buffer_size(field_count)); + my_bitmap_init(&table->def_read_set, (my_bitmap_map*) bitmaps, field_count); + table->read_set= &table->def_read_set; + bitmap_clear_all(table->read_set); + table->alias_name_used= true; + table->next= thd->derived_tables; + thd->derived_tables= table; + table->s->tmp_table= INTERNAL_TMP_TABLE; + table->grant.privilege= SELECT_ACL; + + sql_table->table= table; + + DBUG_RETURN(table); +} + + +int Json_table_column::set(THD *thd, enum_type ctype, const LEX_CSTRING &path, + CHARSET_INFO *cs) +{ + set(ctype); + m_explicit_cs= cs; + if (json_path_setup(&m_path, thd->variables.collation_connection, + (const uchar *) path.str, (const uchar *)(path.str + path.length))) + { + report_path_error_ex(path.str, &m_path, "JSON_TABLE", 1, + Sql_condition::WARN_LEVEL_ERROR); + return 1; + } + + /* + This is done so the ::print function can just print the path string. + Can be removed if we redo that function to print the path using it's + anctual content. Not sure though if we should. + */ + m_path.s.c_str= (const uchar *) path.str; + + if (ctype == PATH) + m_format_json= m_field->type_handler() == &type_handler_long_blob_json; + + return 0; +} + + +int Json_table_column::set(THD *thd, enum_type ctype, const LEX_CSTRING &path, + const Lex_column_charset_collation_attrs_st &cl) +{ + if (cl.is_empty() || cl.is_contextually_typed_collate_default()) + return set(thd, ctype, path, nullptr); + + CHARSET_INFO *tmp; + if (!(tmp= cl.resolved_to_character_set(&my_charset_utf8mb4_general_ci))) + return 1; + return set(thd, ctype, path, tmp); +} + + +static int print_path(String *str, const json_path_t *p) +{ + return str->append('\'') || + str->append_for_single_quote((const char *) p->s.c_str, + p->s.str_end - p->s.c_str) || + str->append('\''); +} + + +/* + Print the string representation of the Json_table_column. + + @param thd - the thread + @param f - the remaining array of Field-s from the table + if the Json_table_column + @param str - the string where to print +*/ +int Json_table_column::print(THD *thd, Field **f, String *str) +{ + StringBuffer<MAX_FIELD_WIDTH> column_type(str->charset()); + + if (append_identifier(thd, str, &m_field->field_name) || + str->append(' ')) + return 1; + + switch (m_column_type) + { + case FOR_ORDINALITY: + if (str->append(STRING_WITH_LEN("FOR ORDINALITY"))) + return 1; + break; + case EXISTS_PATH: + case PATH: + { + static const LEX_CSTRING path= { STRING_WITH_LEN(" PATH ") }; + static const LEX_CSTRING exists_path= { STRING_WITH_LEN(" EXISTS PATH ") }; + + (*f)->sql_type(column_type); + + if (str->append(column_type) || + ((*f)->has_charset() && m_explicit_cs && + (str->append(STRING_WITH_LEN(" CHARSET ")) || + str->append(&m_explicit_cs->cs_name) || + (Charset(m_explicit_cs).can_have_collate_clause() && + (str->append(STRING_WITH_LEN(" COLLATE ")) || + str->append(&m_explicit_cs->coll_name))))) || + str->append(m_column_type == PATH ? &path : &exists_path) || + print_path(str, &m_path)) + return 1; + break; + } + }; + + if (m_on_empty.print("EMPTY", str) || + m_on_error.print("ERROR", str)) + return 1; + + return 0; +} + + +int Json_table_nested_path::set_path(THD *thd, const LEX_CSTRING &path) +{ + if (json_path_setup(&m_path, thd->variables.collation_connection, + (const uchar *) path.str, (const uchar *)(path.str + path.length))) + { + report_path_error_ex(path.str, &m_path, "JSON_TABLE", 1, + Sql_condition::WARN_LEVEL_ERROR); + return 1; + } + + /* + This is done so the ::print function can just print the path string. + Can be removed if we redo that function to print the path using its + actual content. Not sure though if we should. + */ + m_path.s.c_str= (const uchar *) path.str; + return 0; +} + + +/* + @brief + Perform the action of this response on field @f (emit an error, or set @f + to NULL, or set it to default value). + error_num supposed to have the error message with field_name and table_name + arguments. +*/ + +int Json_table_column::On_response::respond(Json_table_column *jc, Field *f, + uint error_num) +{ + switch (m_response) + { + case Json_table_column::RESPONSE_NOT_SPECIFIED: + case Json_table_column::RESPONSE_NULL: + f->set_null(); + break; + case Json_table_column::RESPONSE_ERROR: + f->set_null(); + my_error(error_num, MYF(0), f->field_name.str, f->table->alias.ptr()); + return 1; + case Json_table_column::RESPONSE_DEFAULT: + f->set_notnull(); + f->store(m_default.str, + m_default.length, jc->m_defaults_cs); + break; + } + return 0; +} + + +int Json_table_column::On_response::print(const char *name, String *str) const +{ + LEX_CSTRING resp; + const LEX_CSTRING *ds= NULL; + if (m_response == Json_table_column::RESPONSE_NOT_SPECIFIED) + return 0; + + switch (m_response) + { + case Json_table_column::RESPONSE_NULL: + lex_string_set3(&resp, STRING_WITH_LEN("NULL")); + break; + case Json_table_column::RESPONSE_ERROR: + lex_string_set3(&resp, STRING_WITH_LEN("ERROR")); + break; + case Json_table_column::RESPONSE_DEFAULT: + { + lex_string_set3(&resp, STRING_WITH_LEN("DEFAULT")); + ds= &m_default; + break; + } + default: + lex_string_set3(&resp, "", 0); + DBUG_ASSERT(FALSE); /* should never happen. */ + } + + return (str->append(' ') || str->append(resp) || + (ds && (str->append(STRING_WITH_LEN(" '")) || + str->append_for_single_quote(ds->str, ds->length) || + str->append('\''))) || + str->append(STRING_WITH_LEN(" ON ")) || + str->append(name, strlen(name))); +} + + +void Table_function_json_table::start_nested_path(Json_table_nested_path *np) +{ + np->m_parent= cur_parent; + *last_sibling_hook= np; + + // Make the newly added path the parent + cur_parent= np; + last_sibling_hook= &np->m_nested; +} + + +void Table_function_json_table::end_nested_path() +{ + last_sibling_hook= &cur_parent->m_next_nested; + cur_parent= cur_parent->m_parent; +} + + +/* + @brief Create a name resolution context for doing name resolution in table + function argument. + + @seealso + push_new_name_resolution_context +*/ + +bool push_table_function_arg_context(LEX *lex, MEM_ROOT *alloc) +{ + // Walk the context stack until we find a context that is used for resolving + // the SELECT's WHERE clause. + List_iterator<Name_resolution_context> it(lex->context_stack); + Name_resolution_context *ctx; + while ((ctx= it++)) + { + if (ctx->select_lex && ctx == &ctx->select_lex->context) + break; + } + DBUG_ASSERT(ctx); + + // Then, create a copy of it and return it. + Name_resolution_context *new_ctx= new (alloc) Name_resolution_context; + + // Note: not all fields of *ctx are initialized yet at this point. + // We will get all of the fields filled in Table_function_json_table::setup + // (search for the "Prepare the name resolution context" comment). + *new_ctx= *ctx; + return lex->push_context(new_ctx); +} + + +/* + @brief + Perform name-resolution phase tasks + + @detail + The only argument that needs name resolution is the first parameter which + has the JSON text: + + JSON_TABLE(json_doc, ... ) + + The argument may refer to other tables and uses special name resolution + rules (see get_disallowed_table_deps_for_list for details). This function + sets up Name_resolution_context object appropriately before calling + fix_fields for the argument. + + @return + false OK + true Fatal error +*/ + +bool Table_function_json_table::setup(THD *thd, TABLE_LIST *sql_table, + SELECT_LEX *s_lex) +{ + thd->where= "JSON_TABLE argument"; + + if (!m_context_setup_done) + { + m_context_setup_done= true; + // Prepare the name resolution context. First, copy the context that is + // used for name resolution of the WHERE clause + *m_context= s_lex->context; + + // Then, restrict it to only allow to refer to tables that come before the + // table function reference + if (!(m_context->ignored_tables= + get_disallowed_table_deps(thd->stmt_arena->mem_root, s_lex, + sql_table))) + return TRUE; // Error + } + + bool save_is_item_list_lookup; + save_is_item_list_lookup= s_lex->is_item_list_lookup; + s_lex->is_item_list_lookup= 0; + + // Do the same what setup_without_group() does: do not count the referred + // fields in non_agg_field_used: + const bool saved_non_agg_field_used= s_lex->non_agg_field_used(); + + bool res= m_json->fix_fields_if_needed_for_scalar(thd, &m_json); + + s_lex->is_item_list_lookup= save_is_item_list_lookup; + s_lex->set_non_agg_field_used(saved_non_agg_field_used); + + if (res) + return TRUE; // Error + + return FALSE; +} + +int Table_function_json_table::walk_items(Item_processor processor, + bool walk_subquery, void *argument) +{ + return m_json->walk(processor, walk_subquery, argument); +} + +void Table_function_json_table::get_estimates(ha_rows *out_rows, + double *scan_time, + double *startup_cost) +{ + *out_rows= 40; + *scan_time= 0.0; + *startup_cost= 0.0; +} + + +/* + Check if a column belongs to the nested path + or a path that nested into it. + It only supposed to be used in the Json_table_nested_path::print, and + since the nested path should have at least one field we + don't have to loop through the m_next_nested. +*/ +bool Json_table_nested_path::column_in_this_or_nested( + const Json_table_nested_path *p, const Json_table_column *jc) +{ + for (; p; p= p->m_nested) + { + if (jc->m_nest == p) + return TRUE; + } + return FALSE; +} + + +/* + Print the string representation of the Json_nested_path object. + Which is the COLUMNS(...) part of the JSON_TABLE definition. + + @param thd - the thread + @param f - the remaining part of the array of Field* objects + taken from the TABLE. + It's needed as Json_table_column objects + don't have links to the related Field-s. + @param str - the string where to print + @param it - the remaining part of the Json_table_column list + @param last_column - the last column taken from the list. +*/ + +int Json_table_nested_path::print(THD *thd, Field ***f, String *str, + List_iterator_fast<Json_table_column> &it, + Json_table_column **last_column) +{ + Json_table_nested_path *c_path= this; + Json_table_nested_path *c_nested= m_nested; + Json_table_column *jc= *last_column; + bool first_column= TRUE; + + if (str->append(STRING_WITH_LEN("COLUMNS ("))) + return 1; + + /* loop while jc belongs to the current or nested paths. */ + while(jc && + (jc->m_nest == c_path || column_in_this_or_nested(c_nested, jc))) + { + if (first_column) + first_column= FALSE; + else if (str->append(STRING_WITH_LEN(", "))) + return 1; + + if (jc->m_nest == c_path) + { + if (jc->print(thd, *f, str)) + return 1; + if ((jc= it++)) + ++(*f); + } + else + { + DBUG_ASSERT(column_in_this_or_nested(c_nested, jc)); + if (str->append(STRING_WITH_LEN("NESTED PATH ")) || + print_path(str, &jc->m_nest->m_path) || + str->append(' ') || + c_nested->print(thd, f, str, it, &jc)) + return 1; + c_nested= c_nested->m_next_nested; + } + } + + if (str->append(STRING_WITH_LEN(")"))) + return 1; + + *last_column= jc; + return 0; +} + + +/* + Print the SQL definition of the JSON_TABLE. + Used mostly as a part of the CREATE VIEW statement. + + @param thd - the thread + @param sql_table - the corresponding TABLE_LIST object + @param str - the string where to print + @param query_type - the query type +*/ +int Table_function_json_table::print(THD *thd, TABLE_LIST *sql_table, + String *str, enum_query_type query_type) +{ + List_iterator_fast<Json_table_column> jc_i(m_columns); + Json_table_column *jc= jc_i++; + Field **f_list= sql_table->table->field; + + DBUG_ENTER("Table_function_json_table::print"); + + if (str->append(STRING_WITH_LEN("JSON_TABLE("))) + DBUG_RETURN(TRUE); + + m_json->print(str, query_type); + + if (str->append(STRING_WITH_LEN(", ")) || + print_path(str, &m_nested_path.m_path) || + str->append(' ') || + m_nested_path.print(thd, &f_list, str, jc_i, &jc) || + str->append(')')) + DBUG_RETURN(TRUE); + + DBUG_RETURN(0); +} + + +void Table_function_json_table::fix_after_pullout(TABLE_LIST *sql_table, + st_select_lex *new_parent, bool merge) +{ + m_json->fix_after_pullout(new_parent, &m_json, merge); + sql_table->dep_tables= used_tables(); +} + + +/* + @brief + Recursively make all tables in the join_list also depend on deps. +*/ + +static void add_extra_deps(List<TABLE_LIST> *join_list, table_map deps) +{ + TABLE_LIST *table; + List_iterator<TABLE_LIST> li(*join_list); + + DBUG_EXECUTE_IF("json_check_min_stack_requirement", + { + long arbitrary_var; + long stack_used_up= (available_stack_size(current_thd->thread_stack, &arbitrary_var)); + ALLOCATE_MEM_ON_STACK(my_thread_stack_size-stack_used_up-STACK_MIN_SIZE); + }); + if (check_stack_overrun(current_thd, STACK_MIN_SIZE , NULL)) + return; + while ((table= li++)) + { + table->dep_tables |= deps; + NESTED_JOIN *nested_join; + if ((nested_join= table->nested_join)) + { + // set the deps inside, too + add_extra_deps(&nested_join->join_list, deps); + } + } +} + + +/* + @brief + Add table dependencies that are directly caused by table functions, also + add extra dependencies so that the join optimizer does not construct + "dead-end" join prefixes. + + @detail + There are two kinds of limitations on join order: + 1A. Outer joins require that inner tables follow outer. + 1B. Tables within a join nest must be present in the join order + "without interleaving". See check_interleaving_with_nj for details. + + 2. Table function argument may refer to *any* table that precedes the + current table in the query text. The table maybe outside of the current + nested join and/or inside another nested join. + + One may think that adding dependency according to #2 would be sufficient, + but this is not the case. + + @example + + select ... + from + t20 left join t21 on t20.a=t21.a + join + (t31 left join (t32 join + JSON_TABLE(t21.js, + '$' COLUMNS (ab INT PATH '$.a')) AS jt + ) on t31.a<3 + ) + + Here, jt's argument refers to t21. + + Table dependencies are: + t21 -> t20 + t32 -> t31 + jt -> t21 t31 (also indirectly depends on t20 through t21) + + This allows to construct a "dead-end" join prefix, like: + + t31, t32 + + Here, "no interleaving" rule requires the next table to be jt, but we + can't add it, because it depends on t21 which is not in the join prefix. + + @end example + + Dead-end join prefixes do not work with join prefix pruning done for + @@optimizer_prune_level: it is possible that all non-dead-end prefixes are + pruned away. + + The solution is as follows: if there is an outer join that contains + (directly on indirectly) a table function JT which has a reference JREF + outside of the outer join: + + left join ( T_I ... json_table(JREF, ...) as JT ...) + + then make *all* tables T_I also dependent on outside references in JREF. + This way, the optimizer will put table T_I into the join prefix only when + JT can be put there as well, and "dead-end" prefixes will not be built. + + @param join_list List of tables to process. Initial invocation should + supply the JOIN's top-level table list. + @param nest_tables Bitmap of all tables in the join list. + + @return Bitmap of all outside references that tables in join_list have +*/ + +table_map add_table_function_dependencies(List<TABLE_LIST> *join_list, + table_map nest_tables) +{ + TABLE_LIST *table; + table_map res= 0; + List_iterator<TABLE_LIST> li(*join_list); + + DBUG_EXECUTE_IF("json_check_min_stack_requirement", + { + long arbitrary_var; + long stack_used_up= (available_stack_size(current_thd->thread_stack, &arbitrary_var)); + ALLOCATE_MEM_ON_STACK(my_thread_stack_size-stack_used_up-STACK_MIN_SIZE); + }); + if ((res=check_stack_overrun(current_thd, STACK_MIN_SIZE , NULL))) + return res; + + // Recursively compute extra dependencies + while ((table= li++)) + { + NESTED_JOIN *nested_join; + if ((nested_join= table->nested_join)) + { + res |= add_table_function_dependencies(&nested_join->join_list, + nested_join->used_tables); + } + else if (table->table_function) + { + table->dep_tables |= table->table_function->used_tables(); + res |= table->dep_tables; + } + } + res= res & ~nest_tables & ~PSEUDO_TABLE_BITS; + // Then, make all "peers" have them: + if (res) + add_extra_deps(join_list, res); + + return res; +} + + |