summaryrefslogtreecommitdiffstats
path: root/sql/json_table.h
blob: 74c2bfc217d372faeef057d0ddccf5c375b6179b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
#ifndef JSON_TABLE_INCLUDED
#define JSON_TABLE_INCLUDED

/* Copyright (c) 2020, MariaDB Corporation. All rights reserved.

   This program is free software; you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation; version 2 of the License.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1335  USA */


#include <json_lib.h>

class Json_table_column;

/*
  The Json_table_nested_path represents the 'current nesting' level
  for a set of JSON_TABLE columns.
  Each column (Json_table_column instance) is linked with corresponding
  'nested path' object and gets its piece of JSON to parse during the computation
  phase.
  The root 'nested_path' is always present as a part of Table_function_json_table,
  then other 'nested_paths' can be created and linked into a tree structure when new
  'NESTED PATH' is met. The nested 'nested_paths' are linked with 'm_nested', the same-level
  'nested_paths' are linked with 'm_next_nested'.
  So for instance
    JSON_TABLE( '...', '$[*]'
       COLUMNS( a INT PATH '$.a' ,
          NESTED PATH '$.b[*]' COLUMNS (b INT PATH '$',
                                        NESTED PATH '$.c[*]' COLUMNS(x INT PATH '$')),
          NESTED PATH '$.n[*]' COLUMNS (z INT PATH '$'))
  results in 4 'nested_path' created:
                 root          nested_b       nested_c     nested_n
  m_path           '$[*]'         '$.b[*]'        '$.c[*]'     '$.n[*]
  m_nested          &nested_b     &nested_c       NULL         NULL
  n_next_nested     NULL          &nested_n       NULL         NULL

  and 4 columns created:
              a          b            x            z
  m_nest    &root      &nested_b    &nested_c    &nested_n
*/

class Json_table_nested_path : public Sql_alloc
{
public:
  json_path_t m_path;  /* The JSON Path to get the rows from */
  bool m_null; // TRUE <=> producing a NULL-complemented row.

  /*** Construction interface ***/
  Json_table_nested_path():
    m_null(TRUE), m_nested(NULL), m_next_nested(NULL)
  {}

  int set_path(THD *thd, const LEX_CSTRING &path);

  /*** Methods for performing a scan ***/
  void scan_start(CHARSET_INFO *i_cs, const uchar *str, const uchar *end);
  int scan_next();
  bool check_error(const char *str);

  /*** Members for getting the values we've scanned to ***/
  const uchar *get_value() { return m_engine.value_begin; }
  const uchar *get_value_end() { return m_engine.s.str_end; }

  /* Counts the rows produced. Used by FOR ORDINALITY columns */
  longlong m_ordinality_counter;

  int print(THD *thd, Field ***f, String *str,
            List_iterator_fast<Json_table_column> &it,
            Json_table_column **last_column);
private:
  /* The head of the list of nested NESTED PATH statements. */
  Json_table_nested_path *m_nested;

  /* in the above list items are linked with the */
  Json_table_nested_path *m_next_nested;

  /*** Members describing NESTED PATH structure ***/
  /* Parent nested path. The "root" path has this NULL */
  Json_table_nested_path *m_parent;

  /*** Members describing current JSON Path scan state ***/
  /* The JSON Parser and JSON Path evaluator */
  json_engine_t m_engine;

  /* The path the parser is currently pointing to */
  json_path_t m_cur_path;

  /* The child NESTED PATH we're currently scanning */
  Json_table_nested_path *m_cur_nested;

  static bool column_in_this_or_nested(const Json_table_nested_path *p,
                                       const Json_table_column *jc);
  friend class Table_function_json_table;
};


/*
  @brief
    Describes the column definition in JSON_TABLE(...) syntax.

  @detail
    Has methods for printing/handling errors but otherwise it's a static
    object.
*/

class Json_table_column : public Sql_alloc
{
public:
  enum enum_type
  {
    FOR_ORDINALITY,
    PATH,
    EXISTS_PATH
  };

  enum enum_on_type
  {
    ON_EMPTY,
    ON_ERROR
  };

  enum enum_on_response
  {
    RESPONSE_NOT_SPECIFIED,
    RESPONSE_ERROR,
    RESPONSE_NULL,
    RESPONSE_DEFAULT
  };

  struct On_response
  {
  public:
    Json_table_column::enum_on_response m_response;
    LEX_CSTRING m_default;
    int respond(Json_table_column *jc, Field *f, uint error_num);
    int print(const char *name, String *str) const;
    bool specified() const { return m_response != RESPONSE_NOT_SPECIFIED; }
  };

  enum_type m_column_type;
  bool m_format_json;
  json_path_t m_path;
  On_response m_on_error;
  On_response m_on_empty;
  Create_field *m_field;
  Json_table_nested_path *m_nest;
  CHARSET_INFO *m_explicit_cs;
  CHARSET_INFO *m_defaults_cs;

  void set(enum_type ctype)
  {
    m_column_type= ctype;
  }
  int set(THD *thd, enum_type ctype, const LEX_CSTRING &path, CHARSET_INFO *cs);
  int set(THD *thd, enum_type ctype, const LEX_CSTRING &path,
          const Lex_column_charset_collation_attrs_st &cl);
  Json_table_column(Create_field *f, Json_table_nested_path *nest) :
    m_field(f), m_nest(nest), m_explicit_cs(NULL)
  {
    m_on_error.m_response= RESPONSE_NOT_SPECIFIED;
    m_on_empty.m_response= RESPONSE_NOT_SPECIFIED;
  }
  int print(THD *tnd, Field **f, String *str);
};


/*
  Class represents the table function, the function
  that returns the table as a result so supposed to appear
  in the FROM list of the SELECT statement.
  At the moment there is only one such function JSON_TABLE,
  so the class named after it, but should be refactored
  into the hierarchy root if we create more of that functions.

  As the parser finds the table function in the list it
  creates an instance of Table_function_json_table storing it
  into the TABLE_LIST::table_function.
  Then the ha_json_table instance is created based on it in
  the create_table_for_function().

  == Replication: whether JSON_TABLE is deterministic ==

  In sql_yacc.yy, we set BINLOG_STMT_UNSAFE_SYSTEM_FUNCTION whenever
  JSON_TABLE is used. The reasoning behind this is as follows:

  In the current MariaDB code, evaluation of JSON_TABLE is deterministic,
  that is, for a given input string JSON_TABLE will always produce the same
  set of rows in the same order.  However one can think of JSON documents
  that one can consider indentical which will produce different output.
  In order to be feature-proof and withstand changes like:
  - sorting JSON object members by name (like MySQL does)
  - changing the way duplicate object members are handled
  we mark the function as SBR-unsafe.
  (If there is ever an issue with this, marking the function as SBR-safe
   is a non-intrusive change we will always be able to make)
*/

class Table_function_json_table : public Sql_alloc
{
public:
  /*** Basic properties of the original JSON_TABLE(...) ***/
  Item *m_json; /* The JSON value to be parsed. */

  /* The COLUMNS(...) part representation. */
  Json_table_nested_path m_nested_path;

  /* The list of table column definitions. */
  List<Json_table_column> m_columns;

  /*** Name resolution functions ***/
  bool setup(THD *thd, TABLE_LIST *sql_table, SELECT_LEX *s_lex);

  int walk_items(Item_processor processor, bool walk_subquery,
                 void *argument);

  /*** Functions for interaction with the Query Optimizer ***/
  void fix_after_pullout(TABLE_LIST *sql_table,
                         st_select_lex *new_parent, bool merge);
  void update_used_tables() { m_json->update_used_tables(); }

  table_map used_tables() const { return m_json->used_tables(); }
  bool join_cache_allowed() const
  {
    /*
      Can use join cache when we have an outside reference.
      If there's dependency on any other table or randomness,
      cannot use it.
    */
    return !(used_tables() & ~OUTER_REF_TABLE_BIT);
  }
  void get_estimates(ha_rows *out_rows,
                     double *scan_time, double *startup_cost);

  int print(THD *thd, TABLE_LIST *sql_table,
            String *str, enum_query_type query_type);

  /*** Construction interface to be used from the parser ***/
  Table_function_json_table(Item *json):
    m_json(json),
    m_context_setup_done(false)
  {
    cur_parent= &m_nested_path;
    last_sibling_hook= &m_nested_path.m_nested;
  }

  void start_nested_path(Json_table_nested_path *np);
  void end_nested_path();
  Json_table_nested_path *get_cur_nested_path() { return cur_parent; }
  void set_name_resolution_context(Name_resolution_context *arg)
  {
    m_context= arg;
  }

  /* SQL Parser: current column in JSON_TABLE (...) syntax */
  Json_table_column *m_cur_json_table_column;

  /* SQL Parser: charset of the current text literal */
  CHARSET_INFO *m_text_literal_cs;

private:
  /* Context to be used for resolving the first argument. */
  Name_resolution_context *m_context;

  bool m_context_setup_done;

  /* Current NESTED PATH level being parsed */
  Json_table_nested_path *cur_parent;

  /*
    Pointer to the list tail where we add the next NESTED PATH.
    It points to the cur_parnt->m_nested for the first nested
    and prev_nested->m_next_nested for the coesequent ones.
  */
  Json_table_nested_path **last_sibling_hook;
};

bool push_table_function_arg_context(LEX *lex, MEM_ROOT *alloc);

TABLE *create_table_for_function(THD *thd, TABLE_LIST *sql_table);

table_map add_table_function_dependencies(List<TABLE_LIST> *join_list,
                                          table_map nest_tables);

#endif /* JSON_TABLE_INCLUDED */