Edit on GitHub

sqlglot.executor.python

  1import ast
  2import collections
  3import itertools
  4import math
  5
  6from sqlglot import exp, generator, planner, tokens
  7from sqlglot.dialects.dialect import Dialect, inline_array_sql
  8from sqlglot.errors import ExecuteError
  9from sqlglot.executor.context import Context
 10from sqlglot.executor.env import ENV
 11from sqlglot.executor.table import RowReader, Table
 12from sqlglot.helper import csv_reader, ensure_list, subclasses
 13
 14
 15class PythonExecutor:
 16    def __init__(self, env=None, tables=None):
 17        self.generator = Python().generator(identify=True, comments=False)
 18        self.env = {**ENV, **(env or {})}
 19        self.tables = tables or {}
 20
 21    def execute(self, plan):
 22        finished = set()
 23        queue = set(plan.leaves)
 24        contexts = {}
 25
 26        while queue:
 27            node = queue.pop()
 28            try:
 29                context = self.context(
 30                    {
 31                        name: table
 32                        for dep in node.dependencies
 33                        for name, table in contexts[dep].tables.items()
 34                    }
 35                )
 36
 37                if isinstance(node, planner.Scan):
 38                    contexts[node] = self.scan(node, context)
 39                elif isinstance(node, planner.Aggregate):
 40                    contexts[node] = self.aggregate(node, context)
 41                elif isinstance(node, planner.Join):
 42                    contexts[node] = self.join(node, context)
 43                elif isinstance(node, planner.Sort):
 44                    contexts[node] = self.sort(node, context)
 45                elif isinstance(node, planner.SetOperation):
 46                    contexts[node] = self.set_operation(node, context)
 47                else:
 48                    raise NotImplementedError
 49
 50                finished.add(node)
 51
 52                for dep in node.dependents:
 53                    if all(d in contexts for d in dep.dependencies):
 54                        queue.add(dep)
 55
 56                for dep in node.dependencies:
 57                    if all(d in finished for d in dep.dependents):
 58                        contexts.pop(dep)
 59            except Exception as e:
 60                raise ExecuteError(f"Step '{node.id}' failed: {e}") from e
 61
 62        root = plan.root
 63        return contexts[root].tables[root.name]
 64
 65    def generate(self, expression):
 66        """Convert a SQL expression into literal Python code and compile it into bytecode."""
 67        if not expression:
 68            return None
 69
 70        sql = self.generator.generate(expression)
 71        return compile(sql, sql, "eval", optimize=2)
 72
 73    def generate_tuple(self, expressions):
 74        """Convert an array of SQL expressions into tuple of Python byte code."""
 75        if not expressions:
 76            return tuple()
 77        return tuple(self.generate(expression) for expression in expressions)
 78
 79    def context(self, tables):
 80        return Context(tables, env=self.env)
 81
 82    def table(self, expressions):
 83        return Table(
 84            expression.alias_or_name if isinstance(expression, exp.Expression) else expression
 85            for expression in expressions
 86        )
 87
 88    def scan(self, step, context):
 89        source = step.source
 90
 91        if source and isinstance(source, exp.Expression):
 92            source = source.name or source.alias
 93
 94        if source is None:
 95            context, table_iter = self.static()
 96        elif source in context:
 97            if not step.projections and not step.condition:
 98                return self.context({step.name: context.tables[source]})
 99            table_iter = context.table_iter(source)
100        elif isinstance(step.source, exp.Table) and isinstance(step.source.this, exp.ReadCSV):
101            table_iter = self.scan_csv(step)
102            context = next(table_iter)
103        else:
104            context, table_iter = self.scan_table(step)
105
106        return self.context({step.name: self._project_and_filter(context, step, table_iter)})
107
108    def _project_and_filter(self, context, step, table_iter):
109        sink = self.table(step.projections if step.projections else context.columns)
110        condition = self.generate(step.condition)
111        projections = self.generate_tuple(step.projections)
112
113        for reader in table_iter:
114            if len(sink) >= step.limit:
115                break
116
117            if condition and not context.eval(condition):
118                continue
119
120            if projections:
121                sink.append(context.eval_tuple(projections))
122            else:
123                sink.append(reader.row)
124
125        return sink
126
127    def static(self):
128        return self.context({}), [RowReader(())]
129
130    def scan_table(self, step):
131        table = self.tables.find(step.source)
132        context = self.context({step.source.alias_or_name: table})
133        return context, iter(table)
134
135    def scan_csv(self, step):
136        alias = step.source.alias
137        source = step.source.this
138
139        with csv_reader(source) as reader:
140            columns = next(reader)
141            table = Table(columns)
142            context = self.context({alias: table})
143            yield context
144            types = []
145            for row in reader:
146                if not types:
147                    for v in row:
148                        try:
149                            types.append(type(ast.literal_eval(v)))
150                        except (ValueError, SyntaxError):
151                            types.append(str)
152
153                # We can't cast empty values ('') to non-string types, so we convert them to None instead
154                context.set_row(
155                    tuple(None if (t is not str and v == "") else t(v) for t, v in zip(types, row))
156                )
157                yield context.table.reader
158
159    def join(self, step, context):
160        source = step.source_name
161
162        source_table = context.tables[source]
163        source_context = self.context({source: source_table})
164        column_ranges = {source: range(0, len(source_table.columns))}
165
166        for name, join in step.joins.items():
167            table = context.tables[name]
168            start = max(r.stop for r in column_ranges.values())
169            column_ranges[name] = range(start, len(table.columns) + start)
170            join_context = self.context({name: table})
171
172            if join.get("source_key"):
173                table = self.hash_join(join, source_context, join_context)
174            else:
175                table = self.nested_loop_join(join, source_context, join_context)
176
177            source_context = self.context(
178                {
179                    name: Table(table.columns, table.rows, column_range)
180                    for name, column_range in column_ranges.items()
181                }
182            )
183            condition = self.generate(join["condition"])
184            if condition:
185                source_context.filter(condition)
186
187        if not step.condition and not step.projections:
188            return source_context
189
190        sink = self._project_and_filter(
191            source_context,
192            step,
193            (reader for reader, _ in iter(source_context)),
194        )
195
196        if step.projections:
197            return self.context({step.name: sink})
198        else:
199            return self.context(
200                {
201                    name: Table(table.columns, sink.rows, table.column_range)
202                    for name, table in source_context.tables.items()
203                }
204            )
205
206    def nested_loop_join(self, _join, source_context, join_context):
207        table = Table(source_context.columns + join_context.columns)
208
209        for reader_a, _ in source_context:
210            for reader_b, _ in join_context:
211                table.append(reader_a.row + reader_b.row)
212
213        return table
214
215    def hash_join(self, join, source_context, join_context):
216        source_key = self.generate_tuple(join["source_key"])
217        join_key = self.generate_tuple(join["join_key"])
218        left = join.get("side") == "LEFT"
219        right = join.get("side") == "RIGHT"
220
221        results = collections.defaultdict(lambda: ([], []))
222
223        for reader, ctx in source_context:
224            results[ctx.eval_tuple(source_key)][0].append(reader.row)
225        for reader, ctx in join_context:
226            results[ctx.eval_tuple(join_key)][1].append(reader.row)
227
228        table = Table(source_context.columns + join_context.columns)
229        nulls = [(None,) * len(join_context.columns if left else source_context.columns)]
230
231        for a_group, b_group in results.values():
232            if left:
233                b_group = b_group or nulls
234            elif right:
235                a_group = a_group or nulls
236
237            for a_row, b_row in itertools.product(a_group, b_group):
238                table.append(a_row + b_row)
239
240        return table
241
242    def aggregate(self, step, context):
243        group_by = self.generate_tuple(step.group.values())
244        aggregations = self.generate_tuple(step.aggregations)
245        operands = self.generate_tuple(step.operands)
246
247        if operands:
248            operand_table = Table(self.table(step.operands).columns)
249
250            for reader, ctx in context:
251                operand_table.append(ctx.eval_tuple(operands))
252
253            for i, (a, b) in enumerate(zip(context.table.rows, operand_table.rows)):
254                context.table.rows[i] = a + b
255
256            width = len(context.columns)
257            context.add_columns(*operand_table.columns)
258
259            operand_table = Table(
260                context.columns,
261                context.table.rows,
262                range(width, width + len(operand_table.columns)),
263            )
264
265            context = self.context(
266                {
267                    None: operand_table,
268                    **context.tables,
269                }
270            )
271
272        context.sort(group_by)
273
274        group = None
275        start = 0
276        end = 1
277        length = len(context.table)
278        table = self.table(list(step.group) + step.aggregations)
279
280        def add_row():
281            table.append(group + context.eval_tuple(aggregations))
282
283        if length:
284            for i in range(length):
285                context.set_index(i)
286                key = context.eval_tuple(group_by)
287                group = key if group is None else group
288                end += 1
289                if key != group:
290                    context.set_range(start, end - 2)
291                    add_row()
292                    group = key
293                    start = end - 2
294                if len(table.rows) >= step.limit:
295                    break
296                if i == length - 1:
297                    context.set_range(start, end - 1)
298                    add_row()
299        elif step.limit > 0 and not group_by:
300            context.set_range(0, 0)
301            table.append(context.eval_tuple(aggregations))
302
303        context = self.context({step.name: table, **{name: table for name in context.tables}})
304
305        if step.projections or step.condition:
306            return self.scan(step, context)
307        return context
308
309    def sort(self, step, context):
310        projections = self.generate_tuple(step.projections)
311        projection_columns = [p.alias_or_name for p in step.projections]
312        all_columns = list(context.columns) + projection_columns
313        sink = self.table(all_columns)
314        for reader, ctx in context:
315            sink.append(reader.row + ctx.eval_tuple(projections))
316
317        sort_ctx = self.context(
318            {
319                None: sink,
320                **{table: sink for table in context.tables},
321            }
322        )
323        sort_ctx.sort(self.generate_tuple(step.key))
324
325        if not math.isinf(step.limit):
326            sort_ctx.table.rows = sort_ctx.table.rows[0 : step.limit]
327
328        output = Table(
329            projection_columns,
330            rows=[r[len(context.columns) : len(all_columns)] for r in sort_ctx.table.rows],
331        )
332        return self.context({step.name: output})
333
334    def set_operation(self, step, context):
335        left = context.tables[step.left]
336        right = context.tables[step.right]
337
338        sink = self.table(left.columns)
339
340        if issubclass(step.op, exp.Intersect):
341            sink.rows = list(set(left.rows).intersection(set(right.rows)))
342        elif issubclass(step.op, exp.Except):
343            sink.rows = list(set(left.rows).difference(set(right.rows)))
344        elif issubclass(step.op, exp.Union) and step.distinct:
345            sink.rows = list(set(left.rows).union(set(right.rows)))
346        else:
347            sink.rows = left.rows + right.rows
348
349        if not math.isinf(step.limit):
350            sink.rows = sink.rows[0 : step.limit]
351
352        return self.context({step.name: sink})
353
354
355def _ordered_py(self, expression):
356    this = self.sql(expression, "this")
357    desc = "True" if expression.args.get("desc") else "False"
358    nulls_first = "True" if expression.args.get("nulls_first") else "False"
359    return f"ORDERED({this}, {desc}, {nulls_first})"
360
361
362def _rename(self, e):
363    try:
364        values = list(e.args.values())
365
366        if len(values) == 1:
367            values = values[0]
368            if not isinstance(values, list):
369                return self.func(e.key, values)
370            return self.func(e.key, *values)
371
372        if isinstance(e, exp.Func) and e.is_var_len_args:
373            *head, tail = values
374            return self.func(e.key, *head, *ensure_list(tail))
375
376        return self.func(e.key, *values)
377    except Exception as ex:
378        raise Exception(f"Could not rename {repr(e)}") from ex
379
380
381def _case_sql(self, expression):
382    this = self.sql(expression, "this")
383    chain = self.sql(expression, "default") or "None"
384
385    for e in reversed(expression.args["ifs"]):
386        true = self.sql(e, "true")
387        condition = self.sql(e, "this")
388        condition = f"{this} = ({condition})" if this else condition
389        chain = f"{true} if {condition} else ({chain})"
390
391    return chain
392
393
394def _lambda_sql(self, e: exp.Lambda) -> str:
395    names = {e.name.lower() for e in e.expressions}
396
397    e = e.transform(
398        lambda n: (
399            exp.var(n.name) if isinstance(n, exp.Identifier) and n.name.lower() in names else n
400        )
401    ).assert_is(exp.Lambda)
402
403    return f"lambda {self.expressions(e, flat=True)}: {self.sql(e, 'this')}"
404
405
406def _div_sql(self: generator.Generator, e: exp.Div) -> str:
407    denominator = self.sql(e, "expression")
408
409    if e.args.get("safe"):
410        denominator += " or None"
411
412    sql = f"DIV({self.sql(e, 'this')}, {denominator})"
413
414    if e.args.get("typed"):
415        sql = f"int({sql})"
416
417    return sql
418
419
420class Python(Dialect):
421    class Tokenizer(tokens.Tokenizer):
422        STRING_ESCAPES = ["\\"]
423
424    class Generator(generator.Generator):
425        TRANSFORMS = {
426            **{klass: _rename for klass in subclasses(exp.__name__, exp.Binary)},
427            **{klass: _rename for klass in exp.ALL_FUNCTIONS},
428            exp.Case: _case_sql,
429            exp.Alias: lambda self, e: self.sql(e.this),
430            exp.Array: inline_array_sql,
431            exp.And: lambda self, e: self.binary(e, "and"),
432            exp.Between: _rename,
433            exp.Boolean: lambda self, e: "True" if e.this else "False",
434            exp.Cast: lambda self, e: f"CAST({self.sql(e.this)}, exp.DataType.Type.{e.args['to']})",
435            exp.Column: lambda self,
436            e: f"scope[{self.sql(e, 'table') or None}][{self.sql(e.this)}]",
437            exp.Concat: lambda self, e: self.func(
438                "SAFECONCAT" if e.args.get("safe") else "CONCAT", *e.expressions
439            ),
440            exp.Distinct: lambda self, e: f"set({self.sql(e, 'this')})",
441            exp.Div: _div_sql,
442            exp.Extract: lambda self,
443            e: f"EXTRACT('{e.name.lower()}', {self.sql(e, 'expression')})",
444            exp.In: lambda self,
445            e: f"{self.sql(e, 'this')} in {{{self.expressions(e, flat=True)}}}",
446            exp.Interval: lambda self, e: f"INTERVAL({self.sql(e.this)}, '{self.sql(e.unit)}')",
447            exp.Is: lambda self, e: (
448                self.binary(e, "==") if isinstance(e.this, exp.Literal) else self.binary(e, "is")
449            ),
450            exp.JSONPath: lambda self, e: f"[{','.join(self.sql(p) for p in e.expressions[1:])}]",
451            exp.JSONPathKey: lambda self, e: f"'{self.sql(e.this)}'",
452            exp.JSONPathSubscript: lambda self, e: f"'{e.this}'",
453            exp.Lambda: _lambda_sql,
454            exp.Not: lambda self, e: f"not {self.sql(e.this)}",
455            exp.Null: lambda *_: "None",
456            exp.Or: lambda self, e: self.binary(e, "or"),
457            exp.Ordered: _ordered_py,
458            exp.Star: lambda *_: "1",
459        }
class PythonExecutor:
 16class PythonExecutor:
 17    def __init__(self, env=None, tables=None):
 18        self.generator = Python().generator(identify=True, comments=False)
 19        self.env = {**ENV, **(env or {})}
 20        self.tables = tables or {}
 21
 22    def execute(self, plan):
 23        finished = set()
 24        queue = set(plan.leaves)
 25        contexts = {}
 26
 27        while queue:
 28            node = queue.pop()
 29            try:
 30                context = self.context(
 31                    {
 32                        name: table
 33                        for dep in node.dependencies
 34                        for name, table in contexts[dep].tables.items()
 35                    }
 36                )
 37
 38                if isinstance(node, planner.Scan):
 39                    contexts[node] = self.scan(node, context)
 40                elif isinstance(node, planner.Aggregate):
 41                    contexts[node] = self.aggregate(node, context)
 42                elif isinstance(node, planner.Join):
 43                    contexts[node] = self.join(node, context)
 44                elif isinstance(node, planner.Sort):
 45                    contexts[node] = self.sort(node, context)
 46                elif isinstance(node, planner.SetOperation):
 47                    contexts[node] = self.set_operation(node, context)
 48                else:
 49                    raise NotImplementedError
 50
 51                finished.add(node)
 52
 53                for dep in node.dependents:
 54                    if all(d in contexts for d in dep.dependencies):
 55                        queue.add(dep)
 56
 57                for dep in node.dependencies:
 58                    if all(d in finished for d in dep.dependents):
 59                        contexts.pop(dep)
 60            except Exception as e:
 61                raise ExecuteError(f"Step '{node.id}' failed: {e}") from e
 62
 63        root = plan.root
 64        return contexts[root].tables[root.name]
 65
 66    def generate(self, expression):
 67        """Convert a SQL expression into literal Python code and compile it into bytecode."""
 68        if not expression:
 69            return None
 70
 71        sql = self.generator.generate(expression)
 72        return compile(sql, sql, "eval", optimize=2)
 73
 74    def generate_tuple(self, expressions):
 75        """Convert an array of SQL expressions into tuple of Python byte code."""
 76        if not expressions:
 77            return tuple()
 78        return tuple(self.generate(expression) for expression in expressions)
 79
 80    def context(self, tables):
 81        return Context(tables, env=self.env)
 82
 83    def table(self, expressions):
 84        return Table(
 85            expression.alias_or_name if isinstance(expression, exp.Expression) else expression
 86            for expression in expressions
 87        )
 88
 89    def scan(self, step, context):
 90        source = step.source
 91
 92        if source and isinstance(source, exp.Expression):
 93            source = source.name or source.alias
 94
 95        if source is None:
 96            context, table_iter = self.static()
 97        elif source in context:
 98            if not step.projections and not step.condition:
 99                return self.context({step.name: context.tables[source]})
100            table_iter = context.table_iter(source)
101        elif isinstance(step.source, exp.Table) and isinstance(step.source.this, exp.ReadCSV):
102            table_iter = self.scan_csv(step)
103            context = next(table_iter)
104        else:
105            context, table_iter = self.scan_table(step)
106
107        return self.context({step.name: self._project_and_filter(context, step, table_iter)})
108
109    def _project_and_filter(self, context, step, table_iter):
110        sink = self.table(step.projections if step.projections else context.columns)
111        condition = self.generate(step.condition)
112        projections = self.generate_tuple(step.projections)
113
114        for reader in table_iter:
115            if len(sink) >= step.limit:
116                break
117
118            if condition and not context.eval(condition):
119                continue
120
121            if projections:
122                sink.append(context.eval_tuple(projections))
123            else:
124                sink.append(reader.row)
125
126        return sink
127
128    def static(self):
129        return self.context({}), [RowReader(())]
130
131    def scan_table(self, step):
132        table = self.tables.find(step.source)
133        context = self.context({step.source.alias_or_name: table})
134        return context, iter(table)
135
136    def scan_csv(self, step):
137        alias = step.source.alias
138        source = step.source.this
139
140        with csv_reader(source) as reader:
141            columns = next(reader)
142            table = Table(columns)
143            context = self.context({alias: table})
144            yield context
145            types = []
146            for row in reader:
147                if not types:
148                    for v in row:
149                        try:
150                            types.append(type(ast.literal_eval(v)))
151                        except (ValueError, SyntaxError):
152                            types.append(str)
153
154                # We can't cast empty values ('') to non-string types, so we convert them to None instead
155                context.set_row(
156                    tuple(None if (t is not str and v == "") else t(v) for t, v in zip(types, row))
157                )
158                yield context.table.reader
159
160    def join(self, step, context):
161        source = step.source_name
162
163        source_table = context.tables[source]
164        source_context = self.context({source: source_table})
165        column_ranges = {source: range(0, len(source_table.columns))}
166
167        for name, join in step.joins.items():
168            table = context.tables[name]
169            start = max(r.stop for r in column_ranges.values())
170            column_ranges[name] = range(start, len(table.columns) + start)
171            join_context = self.context({name: table})
172
173            if join.get("source_key"):
174                table = self.hash_join(join, source_context, join_context)
175            else:
176                table = self.nested_loop_join(join, source_context, join_context)
177
178            source_context = self.context(
179                {
180                    name: Table(table.columns, table.rows, column_range)
181                    for name, column_range in column_ranges.items()
182                }
183            )
184            condition = self.generate(join["condition"])
185            if condition:
186                source_context.filter(condition)
187
188        if not step.condition and not step.projections:
189            return source_context
190
191        sink = self._project_and_filter(
192            source_context,
193            step,
194            (reader for reader, _ in iter(source_context)),
195        )
196
197        if step.projections:
198            return self.context({step.name: sink})
199        else:
200            return self.context(
201                {
202                    name: Table(table.columns, sink.rows, table.column_range)
203                    for name, table in source_context.tables.items()
204                }
205            )
206
207    def nested_loop_join(self, _join, source_context, join_context):
208        table = Table(source_context.columns + join_context.columns)
209
210        for reader_a, _ in source_context:
211            for reader_b, _ in join_context:
212                table.append(reader_a.row + reader_b.row)
213
214        return table
215
216    def hash_join(self, join, source_context, join_context):
217        source_key = self.generate_tuple(join["source_key"])
218        join_key = self.generate_tuple(join["join_key"])
219        left = join.get("side") == "LEFT"
220        right = join.get("side") == "RIGHT"
221
222        results = collections.defaultdict(lambda: ([], []))
223
224        for reader, ctx in source_context:
225            results[ctx.eval_tuple(source_key)][0].append(reader.row)
226        for reader, ctx in join_context:
227            results[ctx.eval_tuple(join_key)][1].append(reader.row)
228
229        table = Table(source_context.columns + join_context.columns)
230        nulls = [(None,) * len(join_context.columns if left else source_context.columns)]
231
232        for a_group, b_group in results.values():
233            if left:
234                b_group = b_group or nulls
235            elif right:
236                a_group = a_group or nulls
237
238            for a_row, b_row in itertools.product(a_group, b_group):
239                table.append(a_row + b_row)
240
241        return table
242
243    def aggregate(self, step, context):
244        group_by = self.generate_tuple(step.group.values())
245        aggregations = self.generate_tuple(step.aggregations)
246        operands = self.generate_tuple(step.operands)
247
248        if operands:
249            operand_table = Table(self.table(step.operands).columns)
250
251            for reader, ctx in context:
252                operand_table.append(ctx.eval_tuple(operands))
253
254            for i, (a, b) in enumerate(zip(context.table.rows, operand_table.rows)):
255                context.table.rows[i] = a + b
256
257            width = len(context.columns)
258            context.add_columns(*operand_table.columns)
259
260            operand_table = Table(
261                context.columns,
262                context.table.rows,
263                range(width, width + len(operand_table.columns)),
264            )
265
266            context = self.context(
267                {
268                    None: operand_table,
269                    **context.tables,
270                }
271            )
272
273        context.sort(group_by)
274
275        group = None
276        start = 0
277        end = 1
278        length = len(context.table)
279        table = self.table(list(step.group) + step.aggregations)
280
281        def add_row():
282            table.append(group + context.eval_tuple(aggregations))
283
284        if length:
285            for i in range(length):
286                context.set_index(i)
287                key = context.eval_tuple(group_by)
288                group = key if group is None else group
289                end += 1
290                if key != group:
291                    context.set_range(start, end - 2)
292                    add_row()
293                    group = key
294                    start = end - 2
295                if len(table.rows) >= step.limit:
296                    break
297                if i == length - 1:
298                    context.set_range(start, end - 1)
299                    add_row()
300        elif step.limit > 0 and not group_by:
301            context.set_range(0, 0)
302            table.append(context.eval_tuple(aggregations))
303
304        context = self.context({step.name: table, **{name: table for name in context.tables}})
305
306        if step.projections or step.condition:
307            return self.scan(step, context)
308        return context
309
310    def sort(self, step, context):
311        projections = self.generate_tuple(step.projections)
312        projection_columns = [p.alias_or_name for p in step.projections]
313        all_columns = list(context.columns) + projection_columns
314        sink = self.table(all_columns)
315        for reader, ctx in context:
316            sink.append(reader.row + ctx.eval_tuple(projections))
317
318        sort_ctx = self.context(
319            {
320                None: sink,
321                **{table: sink for table in context.tables},
322            }
323        )
324        sort_ctx.sort(self.generate_tuple(step.key))
325
326        if not math.isinf(step.limit):
327            sort_ctx.table.rows = sort_ctx.table.rows[0 : step.limit]
328
329        output = Table(
330            projection_columns,
331            rows=[r[len(context.columns) : len(all_columns)] for r in sort_ctx.table.rows],
332        )
333        return self.context({step.name: output})
334
335    def set_operation(self, step, context):
336        left = context.tables[step.left]
337        right = context.tables[step.right]
338
339        sink = self.table(left.columns)
340
341        if issubclass(step.op, exp.Intersect):
342            sink.rows = list(set(left.rows).intersection(set(right.rows)))
343        elif issubclass(step.op, exp.Except):
344            sink.rows = list(set(left.rows).difference(set(right.rows)))
345        elif issubclass(step.op, exp.Union) and step.distinct:
346            sink.rows = list(set(left.rows).union(set(right.rows)))
347        else:
348            sink.rows = left.rows + right.rows
349
350        if not math.isinf(step.limit):
351            sink.rows = sink.rows[0 : step.limit]
352
353        return self.context({step.name: sink})
PythonExecutor(env=None, tables=None)
17    def __init__(self, env=None, tables=None):
18        self.generator = Python().generator(identify=True, comments=False)
19        self.env = {**ENV, **(env or {})}
20        self.tables = tables or {}
generator
env
tables
def execute(self, plan):
22    def execute(self, plan):
23        finished = set()
24        queue = set(plan.leaves)
25        contexts = {}
26
27        while queue:
28            node = queue.pop()
29            try:
30                context = self.context(
31                    {
32                        name: table
33                        for dep in node.dependencies
34                        for name, table in contexts[dep].tables.items()
35                    }
36                )
37
38                if isinstance(node, planner.Scan):
39                    contexts[node] = self.scan(node, context)
40                elif isinstance(node, planner.Aggregate):
41                    contexts[node] = self.aggregate(node, context)
42                elif isinstance(node, planner.Join):
43                    contexts[node] = self.join(node, context)
44                elif isinstance(node, planner.Sort):
45                    contexts[node] = self.sort(node, context)
46                elif isinstance(node, planner.SetOperation):
47                    contexts[node] = self.set_operation(node, context)
48                else:
49                    raise NotImplementedError
50
51                finished.add(node)
52
53                for dep in node.dependents:
54                    if all(d in contexts for d in dep.dependencies):
55                        queue.add(dep)
56
57                for dep in node.dependencies:
58                    if all(d in finished for d in dep.dependents):
59                        contexts.pop(dep)
60            except Exception as e:
61                raise ExecuteError(f"Step '{node.id}' failed: {e}") from e
62
63        root = plan.root
64        return contexts[root].tables[root.name]
def generate(self, expression):
66    def generate(self, expression):
67        """Convert a SQL expression into literal Python code and compile it into bytecode."""
68        if not expression:
69            return None
70
71        sql = self.generator.generate(expression)
72        return compile(sql, sql, "eval", optimize=2)

Convert a SQL expression into literal Python code and compile it into bytecode.

def generate_tuple(self, expressions):
74    def generate_tuple(self, expressions):
75        """Convert an array of SQL expressions into tuple of Python byte code."""
76        if not expressions:
77            return tuple()
78        return tuple(self.generate(expression) for expression in expressions)

Convert an array of SQL expressions into tuple of Python byte code.

def context(self, tables):
80    def context(self, tables):
81        return Context(tables, env=self.env)
def table(self, expressions):
83    def table(self, expressions):
84        return Table(
85            expression.alias_or_name if isinstance(expression, exp.Expression) else expression
86            for expression in expressions
87        )
def scan(self, step, context):
 89    def scan(self, step, context):
 90        source = step.source
 91
 92        if source and isinstance(source, exp.Expression):
 93            source = source.name or source.alias
 94
 95        if source is None:
 96            context, table_iter = self.static()
 97        elif source in context:
 98            if not step.projections and not step.condition:
 99                return self.context({step.name: context.tables[source]})
100            table_iter = context.table_iter(source)
101        elif isinstance(step.source, exp.Table) and isinstance(step.source.this, exp.ReadCSV):
102            table_iter = self.scan_csv(step)
103            context = next(table_iter)
104        else:
105            context, table_iter = self.scan_table(step)
106
107        return self.context({step.name: self._project_and_filter(context, step, table_iter)})
def static(self):
128    def static(self):
129        return self.context({}), [RowReader(())]
def scan_table(self, step):
131    def scan_table(self, step):
132        table = self.tables.find(step.source)
133        context = self.context({step.source.alias_or_name: table})
134        return context, iter(table)
def scan_csv(self, step):
136    def scan_csv(self, step):
137        alias = step.source.alias
138        source = step.source.this
139
140        with csv_reader(source) as reader:
141            columns = next(reader)
142            table = Table(columns)
143            context = self.context({alias: table})
144            yield context
145            types = []
146            for row in reader:
147                if not types:
148                    for v in row:
149                        try:
150                            types.append(type(ast.literal_eval(v)))
151                        except (ValueError, SyntaxError):
152                            types.append(str)
153
154                # We can't cast empty values ('') to non-string types, so we convert them to None instead
155                context.set_row(
156                    tuple(None if (t is not str and v == "") else t(v) for t, v in zip(types, row))
157                )
158                yield context.table.reader
def join(self, step, context):
160    def join(self, step, context):
161        source = step.source_name
162
163        source_table = context.tables[source]
164        source_context = self.context({source: source_table})
165        column_ranges = {source: range(0, len(source_table.columns))}
166
167        for name, join in step.joins.items():
168            table = context.tables[name]
169            start = max(r.stop for r in column_ranges.values())
170            column_ranges[name] = range(start, len(table.columns) + start)
171            join_context = self.context({name: table})
172
173            if join.get("source_key"):
174                table = self.hash_join(join, source_context, join_context)
175            else:
176                table = self.nested_loop_join(join, source_context, join_context)
177
178            source_context = self.context(
179                {
180                    name: Table(table.columns, table.rows, column_range)
181                    for name, column_range in column_ranges.items()
182                }
183            )
184            condition = self.generate(join["condition"])
185            if condition:
186                source_context.filter(condition)
187
188        if not step.condition and not step.projections:
189            return source_context
190
191        sink = self._project_and_filter(
192            source_context,
193            step,
194            (reader for reader, _ in iter(source_context)),
195        )
196
197        if step.projections:
198            return self.context({step.name: sink})
199        else:
200            return self.context(
201                {
202                    name: Table(table.columns, sink.rows, table.column_range)
203                    for name, table in source_context.tables.items()
204                }
205            )
def nested_loop_join(self, _join, source_context, join_context):
207    def nested_loop_join(self, _join, source_context, join_context):
208        table = Table(source_context.columns + join_context.columns)
209
210        for reader_a, _ in source_context:
211            for reader_b, _ in join_context:
212                table.append(reader_a.row + reader_b.row)
213
214        return table
def hash_join(self, join, source_context, join_context):
216    def hash_join(self, join, source_context, join_context):
217        source_key = self.generate_tuple(join["source_key"])
218        join_key = self.generate_tuple(join["join_key"])
219        left = join.get("side") == "LEFT"
220        right = join.get("side") == "RIGHT"
221
222        results = collections.defaultdict(lambda: ([], []))
223
224        for reader, ctx in source_context:
225            results[ctx.eval_tuple(source_key)][0].append(reader.row)
226        for reader, ctx in join_context:
227            results[ctx.eval_tuple(join_key)][1].append(reader.row)
228
229        table = Table(source_context.columns + join_context.columns)
230        nulls = [(None,) * len(join_context.columns if left else source_context.columns)]
231
232        for a_group, b_group in results.values():
233            if left:
234                b_group = b_group or nulls
235            elif right:
236                a_group = a_group or nulls
237
238            for a_row, b_row in itertools.product(a_group, b_group):
239                table.append(a_row + b_row)
240
241        return table
def aggregate(self, step, context):
243    def aggregate(self, step, context):
244        group_by = self.generate_tuple(step.group.values())
245        aggregations = self.generate_tuple(step.aggregations)
246        operands = self.generate_tuple(step.operands)
247
248        if operands:
249            operand_table = Table(self.table(step.operands).columns)
250
251            for reader, ctx in context:
252                operand_table.append(ctx.eval_tuple(operands))
253
254            for i, (a, b) in enumerate(zip(context.table.rows, operand_table.rows)):
255                context.table.rows[i] = a + b
256
257            width = len(context.columns)
258            context.add_columns(*operand_table.columns)
259
260            operand_table = Table(
261                context.columns,
262                context.table.rows,
263                range(width, width + len(operand_table.columns)),
264            )
265
266            context = self.context(
267                {
268                    None: operand_table,
269                    **context.tables,
270                }
271            )
272
273        context.sort(group_by)
274
275        group = None
276        start = 0
277        end = 1
278        length = len(context.table)
279        table = self.table(list(step.group) + step.aggregations)
280
281        def add_row():
282            table.append(group + context.eval_tuple(aggregations))
283
284        if length:
285            for i in range(length):
286                context.set_index(i)
287                key = context.eval_tuple(group_by)
288                group = key if group is None else group
289                end += 1
290                if key != group:
291                    context.set_range(start, end - 2)
292                    add_row()
293                    group = key
294                    start = end - 2
295                if len(table.rows) >= step.limit:
296                    break
297                if i == length - 1:
298                    context.set_range(start, end - 1)
299                    add_row()
300        elif step.limit > 0 and not group_by:
301            context.set_range(0, 0)
302            table.append(context.eval_tuple(aggregations))
303
304        context = self.context({step.name: table, **{name: table for name in context.tables}})
305
306        if step.projections or step.condition:
307            return self.scan(step, context)
308        return context
def sort(self, step, context):
310    def sort(self, step, context):
311        projections = self.generate_tuple(step.projections)
312        projection_columns = [p.alias_or_name for p in step.projections]
313        all_columns = list(context.columns) + projection_columns
314        sink = self.table(all_columns)
315        for reader, ctx in context:
316            sink.append(reader.row + ctx.eval_tuple(projections))
317
318        sort_ctx = self.context(
319            {
320                None: sink,
321                **{table: sink for table in context.tables},
322            }
323        )
324        sort_ctx.sort(self.generate_tuple(step.key))
325
326        if not math.isinf(step.limit):
327            sort_ctx.table.rows = sort_ctx.table.rows[0 : step.limit]
328
329        output = Table(
330            projection_columns,
331            rows=[r[len(context.columns) : len(all_columns)] for r in sort_ctx.table.rows],
332        )
333        return self.context({step.name: output})
def set_operation(self, step, context):
335    def set_operation(self, step, context):
336        left = context.tables[step.left]
337        right = context.tables[step.right]
338
339        sink = self.table(left.columns)
340
341        if issubclass(step.op, exp.Intersect):
342            sink.rows = list(set(left.rows).intersection(set(right.rows)))
343        elif issubclass(step.op, exp.Except):
344            sink.rows = list(set(left.rows).difference(set(right.rows)))
345        elif issubclass(step.op, exp.Union) and step.distinct:
346            sink.rows = list(set(left.rows).union(set(right.rows)))
347        else:
348            sink.rows = left.rows + right.rows
349
350        if not math.isinf(step.limit):
351            sink.rows = sink.rows[0 : step.limit]
352
353        return self.context({step.name: sink})
class Python(sqlglot.dialects.dialect.Dialect):
421class Python(Dialect):
422    class Tokenizer(tokens.Tokenizer):
423        STRING_ESCAPES = ["\\"]
424
425    class Generator(generator.Generator):
426        TRANSFORMS = {
427            **{klass: _rename for klass in subclasses(exp.__name__, exp.Binary)},
428            **{klass: _rename for klass in exp.ALL_FUNCTIONS},
429            exp.Case: _case_sql,
430            exp.Alias: lambda self, e: self.sql(e.this),
431            exp.Array: inline_array_sql,
432            exp.And: lambda self, e: self.binary(e, "and"),
433            exp.Between: _rename,
434            exp.Boolean: lambda self, e: "True" if e.this else "False",
435            exp.Cast: lambda self, e: f"CAST({self.sql(e.this)}, exp.DataType.Type.{e.args['to']})",
436            exp.Column: lambda self,
437            e: f"scope[{self.sql(e, 'table') or None}][{self.sql(e.this)}]",
438            exp.Concat: lambda self, e: self.func(
439                "SAFECONCAT" if e.args.get("safe") else "CONCAT", *e.expressions
440            ),
441            exp.Distinct: lambda self, e: f"set({self.sql(e, 'this')})",
442            exp.Div: _div_sql,
443            exp.Extract: lambda self,
444            e: f"EXTRACT('{e.name.lower()}', {self.sql(e, 'expression')})",
445            exp.In: lambda self,
446            e: f"{self.sql(e, 'this')} in {{{self.expressions(e, flat=True)}}}",
447            exp.Interval: lambda self, e: f"INTERVAL({self.sql(e.this)}, '{self.sql(e.unit)}')",
448            exp.Is: lambda self, e: (
449                self.binary(e, "==") if isinstance(e.this, exp.Literal) else self.binary(e, "is")
450            ),
451            exp.JSONPath: lambda self, e: f"[{','.join(self.sql(p) for p in e.expressions[1:])}]",
452            exp.JSONPathKey: lambda self, e: f"'{self.sql(e.this)}'",
453            exp.JSONPathSubscript: lambda self, e: f"'{e.this}'",
454            exp.Lambda: _lambda_sql,
455            exp.Not: lambda self, e: f"not {self.sql(e.this)}",
456            exp.Null: lambda *_: "None",
457            exp.Or: lambda self, e: self.binary(e, "or"),
458            exp.Ordered: _ordered_py,
459            exp.Star: lambda *_: "1",
460        }
UNESCAPED_SEQUENCES: Dict[str, str] = {'\\a': '\x07', '\\b': '\x08', '\\f': '\x0c', '\\n': '\n', '\\r': '\r', '\\t': '\t', '\\v': '\x0b', '\\\\': '\\'}

Mapping of an escaped sequence (\n) to its unescaped version ( ).

tokenizer_class = <class 'Python.Tokenizer'>
parser_class = <class 'sqlglot.parser.Parser'>
generator_class = <class 'Python.Generator'>
TIME_TRIE: Dict = {}
FORMAT_TRIE: Dict = {}
INVERSE_TIME_MAPPING: Dict[str, str] = {}
INVERSE_TIME_TRIE: Dict = {}
ESCAPED_SEQUENCES: Dict[str, str] = {'\x07': '\\a', '\x08': '\\b', '\x0c': '\\f', '\n': '\\n', '\r': '\\r', '\t': '\\t', '\x0b': '\\v', '\\': '\\\\'}
QUOTE_START = "'"
QUOTE_END = "'"
IDENTIFIER_START = '"'
IDENTIFIER_END = '"'
BIT_START: Optional[str] = None
BIT_END: Optional[str] = None
HEX_START: Optional[str] = None
HEX_END: Optional[str] = None
BYTE_START: Optional[str] = None
BYTE_END: Optional[str] = None
UNICODE_START: Optional[str] = None
UNICODE_END: Optional[str] = None
class Python.Tokenizer(sqlglot.tokens.Tokenizer):
422    class Tokenizer(tokens.Tokenizer):
423        STRING_ESCAPES = ["\\"]
STRING_ESCAPES = ['\\']
class Python.Generator(sqlglot.generator.Generator):
425    class Generator(generator.Generator):
426        TRANSFORMS = {
427            **{klass: _rename for klass in subclasses(exp.__name__, exp.Binary)},
428            **{klass: _rename for klass in exp.ALL_FUNCTIONS},
429            exp.Case: _case_sql,
430            exp.Alias: lambda self, e: self.sql(e.this),
431            exp.Array: inline_array_sql,
432            exp.And: lambda self, e: self.binary(e, "and"),
433            exp.Between: _rename,
434            exp.Boolean: lambda self, e: "True" if e.this else "False",
435            exp.Cast: lambda self, e: f"CAST({self.sql(e.this)}, exp.DataType.Type.{e.args['to']})",
436            exp.Column: lambda self,
437            e: f"scope[{self.sql(e, 'table') or None}][{self.sql(e.this)}]",
438            exp.Concat: lambda self, e: self.func(
439                "SAFECONCAT" if e.args.get("safe") else "CONCAT", *e.expressions
440            ),
441            exp.Distinct: lambda self, e: f"set({self.sql(e, 'this')})",
442            exp.Div: _div_sql,
443            exp.Extract: lambda self,
444            e: f"EXTRACT('{e.name.lower()}', {self.sql(e, 'expression')})",
445            exp.In: lambda self,
446            e: f"{self.sql(e, 'this')} in {{{self.expressions(e, flat=True)}}}",
447            exp.Interval: lambda self, e: f"INTERVAL({self.sql(e.this)}, '{self.sql(e.unit)}')",
448            exp.Is: lambda self, e: (
449                self.binary(e, "==") if isinstance(e.this, exp.Literal) else self.binary(e, "is")
450            ),
451            exp.JSONPath: lambda self, e: f"[{','.join(self.sql(p) for p in e.expressions[1:])}]",
452            exp.JSONPathKey: lambda self, e: f"'{self.sql(e.this)}'",
453            exp.JSONPathSubscript: lambda self, e: f"'{e.this}'",
454            exp.Lambda: _lambda_sql,
455            exp.Not: lambda self, e: f"not {self.sql(e.this)}",
456            exp.Null: lambda *_: "None",
457            exp.Or: lambda self, e: self.binary(e, "or"),
458            exp.Ordered: _ordered_py,
459            exp.Star: lambda *_: "1",
460        }

Generator converts a given syntax tree to the corresponding SQL string.

Arguments:
  • pretty: Whether to format the produced SQL string. Default: False.
  • identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
  • normalize: Whether to normalize identifiers to lowercase. Default: False.
  • pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
  • indent: The indentation size in a formatted string. For example, this affects the indentation of subqueries and filters under a WHERE clause. Default: 2.
  • normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
  • unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
  • max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
  • leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
  • max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
  • comments: Whether to preserve comments in the output SQL code. Default: True
TRANSFORMS = {<class 'sqlglot.expressions.Add'>: <function _rename>, <class 'sqlglot.expressions.And'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.ArrayContains'>: <function _rename>, <class 'sqlglot.expressions.ArrayContainsAll'>: <function _rename>, <class 'sqlglot.expressions.ArrayOverlaps'>: <function _rename>, <class 'sqlglot.expressions.Binary'>: <function _rename>, <class 'sqlglot.expressions.BitwiseAnd'>: <function _rename>, <class 'sqlglot.expressions.BitwiseLeftShift'>: <function _rename>, <class 'sqlglot.expressions.BitwiseOr'>: <function _rename>, <class 'sqlglot.expressions.BitwiseRightShift'>: <function _rename>, <class 'sqlglot.expressions.BitwiseXor'>: <function _rename>, <class 'sqlglot.expressions.Collate'>: <function _rename>, <class 'sqlglot.expressions.Connector'>: <function _rename>, <class 'sqlglot.expressions.Corr'>: <function _rename>, <class 'sqlglot.expressions.CovarPop'>: <function _rename>, <class 'sqlglot.expressions.CovarSamp'>: <function _rename>, <class 'sqlglot.expressions.DPipe'>: <function _rename>, <class 'sqlglot.expressions.Distance'>: <function _rename>, <class 'sqlglot.expressions.Div'>: <function _div_sql>, <class 'sqlglot.expressions.Dot'>: <function _rename>, <class 'sqlglot.expressions.EQ'>: <function _rename>, <class 'sqlglot.expressions.Escape'>: <function _rename>, <class 'sqlglot.expressions.GT'>: <function _rename>, <class 'sqlglot.expressions.GTE'>: <function _rename>, <class 'sqlglot.expressions.Glob'>: <function _rename>, <class 'sqlglot.expressions.ILike'>: <function _rename>, <class 'sqlglot.expressions.ILikeAny'>: <function _rename>, <class 'sqlglot.expressions.IntDiv'>: <function _rename>, <class 'sqlglot.expressions.Is'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.JSONArrayContains'>: <function _rename>, <class 'sqlglot.expressions.JSONBContains'>: <function _rename>, <class 'sqlglot.expressions.JSONBExtract'>: <function _rename>, <class 'sqlglot.expressions.JSONBExtractScalar'>: <function _rename>, <class 'sqlglot.expressions.JSONExtract'>: <function _rename>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function _rename>, <class 'sqlglot.expressions.Kwarg'>: <function _rename>, <class 'sqlglot.expressions.LT'>: <function _rename>, <class 'sqlglot.expressions.LTE'>: <function _rename>, <class 'sqlglot.expressions.Like'>: <function _rename>, <class 'sqlglot.expressions.LikeAny'>: <function _rename>, <class 'sqlglot.expressions.Mod'>: <function _rename>, <class 'sqlglot.expressions.Mul'>: <function _rename>, <class 'sqlglot.expressions.NEQ'>: <function _rename>, <class 'sqlglot.expressions.NullSafeEQ'>: <function _rename>, <class 'sqlglot.expressions.NullSafeNEQ'>: <function _rename>, <class 'sqlglot.expressions.Operator'>: <function _rename>, <class 'sqlglot.expressions.Or'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.Overlaps'>: <function _rename>, <class 'sqlglot.expressions.Pow'>: <function _rename>, <class 'sqlglot.expressions.PropertyEQ'>: <function _rename>, <class 'sqlglot.expressions.RegexpILike'>: <function _rename>, <class 'sqlglot.expressions.RegexpLike'>: <function _rename>, <class 'sqlglot.expressions.SimilarTo'>: <function _rename>, <class 'sqlglot.expressions.Slice'>: <function _rename>, <class 'sqlglot.expressions.Sub'>: <function _rename>, <class 'sqlglot.expressions.Xor'>: <function _rename>, <class 'sqlglot.expressions.Abs'>: <function _rename>, <class 'sqlglot.expressions.AddMonths'>: <function _rename>, <class 'sqlglot.expressions.AnonymousAggFunc'>: <function _rename>, <class 'sqlglot.expressions.AnyValue'>: <function _rename>, <class 'sqlglot.expressions.ApproxDistinct'>: <function _rename>, <class 'sqlglot.expressions.ApproxQuantile'>: <function _rename>, <class 'sqlglot.expressions.ApproxTopK'>: <function _rename>, <class 'sqlglot.expressions.ArgMax'>: <function _rename>, <class 'sqlglot.expressions.ArgMin'>: <function _rename>, <class 'sqlglot.expressions.Array'>: <function inline_array_sql>, <class 'sqlglot.expressions.ArrayAgg'>: <function _rename>, <class 'sqlglot.expressions.ArrayAll'>: <function _rename>, <class 'sqlglot.expressions.ArrayAny'>: <function _rename>, <class 'sqlglot.expressions.ArrayConcat'>: <function _rename>, <class 'sqlglot.expressions.ArrayConstructCompact'>: <function _rename>, <class 'sqlglot.expressions.ArrayFilter'>: <function _rename>, <class 'sqlglot.expressions.ArraySize'>: <function _rename>, <class 'sqlglot.expressions.ArraySort'>: <function _rename>, <class 'sqlglot.expressions.ArraySum'>: <function _rename>, <class 'sqlglot.expressions.ArrayToString'>: <function _rename>, <class 'sqlglot.expressions.ArrayUnionAgg'>: <function _rename>, <class 'sqlglot.expressions.ArrayUniqueAgg'>: <function _rename>, <class 'sqlglot.expressions.Avg'>: <function _rename>, <class 'sqlglot.expressions.Case'>: <function _case_sql>, <class 'sqlglot.expressions.Cast'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.CastToStrType'>: <function _rename>, <class 'sqlglot.expressions.Cbrt'>: <function _rename>, <class 'sqlglot.expressions.Ceil'>: <function _rename>, <class 'sqlglot.expressions.Chr'>: <function _rename>, <class 'sqlglot.expressions.Coalesce'>: <function _rename>, <class 'sqlglot.expressions.CombinedAggFunc'>: <function _rename>, <class 'sqlglot.expressions.CombinedParameterizedAgg'>: <function _rename>, <class 'sqlglot.expressions.Concat'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.ConcatWs'>: <function _rename>, <class 'sqlglot.expressions.ConnectByRoot'>: <function _rename>, <class 'sqlglot.expressions.Convert'>: <function _rename>, <class 'sqlglot.expressions.Count'>: <function _rename>, <class 'sqlglot.expressions.CountIf'>: <function _rename>, <class 'sqlglot.expressions.CurrentDate'>: <function _rename>, <class 'sqlglot.expressions.CurrentDatetime'>: <function _rename>, <class 'sqlglot.expressions.CurrentTime'>: <function _rename>, <class 'sqlglot.expressions.CurrentTimestamp'>: <function _rename>, <class 'sqlglot.expressions.CurrentUser'>: <function _rename>, <class 'sqlglot.expressions.Date'>: <function _rename>, <class 'sqlglot.expressions.DateAdd'>: <function _rename>, <class 'sqlglot.expressions.DateDiff'>: <function _rename>, <class 'sqlglot.expressions.DateFromParts'>: <function _rename>, <class 'sqlglot.expressions.DateStrToDate'>: <function _rename>, <class 'sqlglot.expressions.DateSub'>: <function _rename>, <class 'sqlglot.expressions.DateToDateStr'>: <function _rename>, <class 'sqlglot.expressions.DateToDi'>: <function _rename>, <class 'sqlglot.expressions.DateTrunc'>: <function _rename>, <class 'sqlglot.expressions.DatetimeAdd'>: <function _rename>, <class 'sqlglot.expressions.DatetimeDiff'>: <function _rename>, <class 'sqlglot.expressions.DatetimeSub'>: <function _rename>, <class 'sqlglot.expressions.DatetimeTrunc'>: <function _rename>, <class 'sqlglot.expressions.Day'>: <function _rename>, <class 'sqlglot.expressions.DayOfMonth'>: <function _rename>, <class 'sqlglot.expressions.DayOfWeek'>: <function _rename>, <class 'sqlglot.expressions.DayOfYear'>: <function _rename>, <class 'sqlglot.expressions.Decode'>: <function _rename>, <class 'sqlglot.expressions.DiToDate'>: <function _rename>, <class 'sqlglot.expressions.Encode'>: <function _rename>, <class 'sqlglot.expressions.Exp'>: <function _rename>, <class 'sqlglot.expressions.Explode'>: <function _rename>, <class 'sqlglot.expressions.ExplodeOuter'>: <function _rename>, <class 'sqlglot.expressions.Extract'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.First'>: <function _rename>, <class 'sqlglot.expressions.FirstValue'>: <function _rename>, <class 'sqlglot.expressions.Flatten'>: <function _rename>, <class 'sqlglot.expressions.Floor'>: <function _rename>, <class 'sqlglot.expressions.FromBase'>: <function _rename>, <class 'sqlglot.expressions.FromBase64'>: <function _rename>, <class 'sqlglot.expressions.GenerateDateArray'>: <function _rename>, <class 'sqlglot.expressions.GenerateSeries'>: <function _rename>, <class 'sqlglot.expressions.Greatest'>: <function _rename>, <class 'sqlglot.expressions.GroupConcat'>: <function _rename>, <class 'sqlglot.expressions.Hex'>: <function _rename>, <class 'sqlglot.expressions.Hll'>: <function _rename>, <class 'sqlglot.expressions.If'>: <function _rename>, <class 'sqlglot.expressions.Initcap'>: <function _rename>, <class 'sqlglot.expressions.IsInf'>: <function _rename>, <class 'sqlglot.expressions.IsNan'>: <function _rename>, <class 'sqlglot.expressions.JSONArray'>: <function _rename>, <class 'sqlglot.expressions.JSONArrayAgg'>: <function _rename>, <class 'sqlglot.expressions.JSONFormat'>: <function _rename>, <class 'sqlglot.expressions.JSONObject'>: <function _rename>, <class 'sqlglot.expressions.JSONObjectAgg'>: <function _rename>, <class 'sqlglot.expressions.JSONTable'>: <function _rename>, <class 'sqlglot.expressions.Lag'>: <function _rename>, <class 'sqlglot.expressions.Last'>: <function _rename>, <class 'sqlglot.expressions.LastDay'>: <function _rename>, <class 'sqlglot.expressions.LastValue'>: <function _rename>, <class 'sqlglot.expressions.Lead'>: <function _rename>, <class 'sqlglot.expressions.Least'>: <function _rename>, <class 'sqlglot.expressions.Left'>: <function _rename>, <class 'sqlglot.expressions.Length'>: <function _rename>, <class 'sqlglot.expressions.Levenshtein'>: <function _rename>, <class 'sqlglot.expressions.Ln'>: <function _rename>, <class 'sqlglot.expressions.Log'>: <function _rename>, <class 'sqlglot.expressions.LogicalAnd'>: <function _rename>, <class 'sqlglot.expressions.LogicalOr'>: <function _rename>, <class 'sqlglot.expressions.Lower'>: <function _rename>, <class 'sqlglot.expressions.LowerHex'>: <function _rename>, <class 'sqlglot.expressions.MD5'>: <function _rename>, <class 'sqlglot.expressions.MD5Digest'>: <function _rename>, <class 'sqlglot.expressions.Map'>: <function _rename>, <class 'sqlglot.expressions.MapFromEntries'>: <function _rename>, <class 'sqlglot.expressions.MatchAgainst'>: <function _rename>, <class 'sqlglot.expressions.Max'>: <function _rename>, <class 'sqlglot.expressions.Min'>: <function _rename>, <class 'sqlglot.expressions.Month'>: <function _rename>, <class 'sqlglot.expressions.MonthsBetween'>: <function _rename>, <class 'sqlglot.expressions.NextValueFor'>: <function _rename>, <class 'sqlglot.expressions.NthValue'>: <function _rename>, <class 'sqlglot.expressions.Nullif'>: <function _rename>, <class 'sqlglot.expressions.NumberToStr'>: <function _rename>, <class 'sqlglot.expressions.Nvl2'>: <function _rename>, <class 'sqlglot.expressions.OpenJSON'>: <function _rename>, <class 'sqlglot.expressions.ParameterizedAgg'>: <function _rename>, <class 'sqlglot.expressions.ParseJSON'>: <function _rename>, <class 'sqlglot.expressions.PercentileCont'>: <function _rename>, <class 'sqlglot.expressions.PercentileDisc'>: <function _rename>, <class 'sqlglot.expressions.Posexplode'>: <function _rename>, <class 'sqlglot.expressions.PosexplodeOuter'>: <function _rename>, <class 'sqlglot.expressions.Predict'>: <function _rename>, <class 'sqlglot.expressions.Quantile'>: <function _rename>, <class 'sqlglot.expressions.Quarter'>: <function _rename>, <class 'sqlglot.expressions.Rand'>: <function _rename>, <class 'sqlglot.expressions.Randn'>: <function _rename>, <class 'sqlglot.expressions.RangeN'>: <function _rename>, <class 'sqlglot.expressions.ReadCSV'>: <function _rename>, <class 'sqlglot.expressions.Reduce'>: <function _rename>, <class 'sqlglot.expressions.RegexpExtract'>: <function _rename>, <class 'sqlglot.expressions.RegexpReplace'>: <function _rename>, <class 'sqlglot.expressions.RegexpSplit'>: <function _rename>, <class 'sqlglot.expressions.Repeat'>: <function _rename>, <class 'sqlglot.expressions.Right'>: <function _rename>, <class 'sqlglot.expressions.Round'>: <function _rename>, <class 'sqlglot.expressions.RowNumber'>: <function _rename>, <class 'sqlglot.expressions.SHA'>: <function _rename>, <class 'sqlglot.expressions.SHA2'>: <function _rename>, <class 'sqlglot.expressions.SafeDivide'>: <function _rename>, <class 'sqlglot.expressions.Sign'>: <function _rename>, <class 'sqlglot.expressions.SortArray'>: <function _rename>, <class 'sqlglot.expressions.Split'>: <function _rename>, <class 'sqlglot.expressions.Sqrt'>: <function _rename>, <class 'sqlglot.expressions.StandardHash'>: <function _rename>, <class 'sqlglot.expressions.StarMap'>: <function _rename>, <class 'sqlglot.expressions.StartsWith'>: <function _rename>, <class 'sqlglot.expressions.Stddev'>: <function _rename>, <class 'sqlglot.expressions.StddevPop'>: <function _rename>, <class 'sqlglot.expressions.StddevSamp'>: <function _rename>, <class 'sqlglot.expressions.StrPosition'>: <function _rename>, <class 'sqlglot.expressions.StrToDate'>: <function _rename>, <class 'sqlglot.expressions.StrToMap'>: <function _rename>, <class 'sqlglot.expressions.StrToTime'>: <function _rename>, <class 'sqlglot.expressions.StrToUnix'>: <function _rename>, <class 'sqlglot.expressions.StringToArray'>: <function _rename>, <class 'sqlglot.expressions.Struct'>: <function _rename>, <class 'sqlglot.expressions.StructExtract'>: <function _rename>, <class 'sqlglot.expressions.Stuff'>: <function _rename>, <class 'sqlglot.expressions.Substring'>: <function _rename>, <class 'sqlglot.expressions.Sum'>: <function _rename>, <class 'sqlglot.expressions.TimeAdd'>: <function _rename>, <class 'sqlglot.expressions.TimeDiff'>: <function _rename>, <class 'sqlglot.expressions.TimeFromParts'>: <function _rename>, <class 'sqlglot.expressions.TimeStrToDate'>: <function _rename>, <class 'sqlglot.expressions.TimeStrToTime'>: <function _rename>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function _rename>, <class 'sqlglot.expressions.TimeSub'>: <function _rename>, <class 'sqlglot.expressions.TimeToStr'>: <function _rename>, <class 'sqlglot.expressions.TimeToTimeStr'>: <function _rename>, <class 'sqlglot.expressions.TimeToUnix'>: <function _rename>, <class 'sqlglot.expressions.TimeTrunc'>: <function _rename>, <class 'sqlglot.expressions.Timestamp'>: <function _rename>, <class 'sqlglot.expressions.TimestampAdd'>: <function _rename>, <class 'sqlglot.expressions.TimestampDiff'>: <function _rename>, <class 'sqlglot.expressions.TimestampFromParts'>: <function _rename>, <class 'sqlglot.expressions.TimestampSub'>: <function _rename>, <class 'sqlglot.expressions.TimestampTrunc'>: <function _rename>, <class 'sqlglot.expressions.ToArray'>: <function _rename>, <class 'sqlglot.expressions.ToBase64'>: <function _rename>, <class 'sqlglot.expressions.ToChar'>: <function _rename>, <class 'sqlglot.expressions.ToDays'>: <function _rename>, <class 'sqlglot.expressions.ToMap'>: <function _rename>, <class 'sqlglot.expressions.ToNumber'>: <function _rename>, <class 'sqlglot.expressions.Transform'>: <function _rename>, <class 'sqlglot.expressions.Trim'>: <function _rename>, <class 'sqlglot.expressions.Try'>: <function _rename>, <class 'sqlglot.expressions.TryCast'>: <function _rename>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function _rename>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function _rename>, <class 'sqlglot.expressions.TsOrDsDiff'>: <function _rename>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _rename>, <class 'sqlglot.expressions.TsOrDsToDateStr'>: <function _rename>, <class 'sqlglot.expressions.TsOrDsToTime'>: <function _rename>, <class 'sqlglot.expressions.TsOrDsToTimestamp'>: <function _rename>, <class 'sqlglot.expressions.Unhex'>: <function _rename>, <class 'sqlglot.expressions.UnixDate'>: <function _rename>, <class 'sqlglot.expressions.UnixToStr'>: <function _rename>, <class 'sqlglot.expressions.UnixToTime'>: <function _rename>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function _rename>, <class 'sqlglot.expressions.Upper'>: <function _rename>, <class 'sqlglot.expressions.VarMap'>: <function _rename>, <class 'sqlglot.expressions.Variance'>: <function _rename>, <class 'sqlglot.expressions.VariancePop'>: <function _rename>, <class 'sqlglot.expressions.Week'>: <function _rename>, <class 'sqlglot.expressions.WeekOfYear'>: <function _rename>, <class 'sqlglot.expressions.When'>: <function _rename>, <class 'sqlglot.expressions.XMLTable'>: <function _rename>, <class 'sqlglot.expressions.Year'>: <function _rename>, <class 'sqlglot.expressions.Alias'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.Between'>: <function _rename>, <class 'sqlglot.expressions.Boolean'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.Column'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.Distinct'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.In'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.Interval'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.JSONPath'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.JSONPathKey'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.JSONPathSubscript'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.Lambda'>: <function _lambda_sql>, <class 'sqlglot.expressions.Not'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.Null'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.Ordered'>: <function _ordered_py>, <class 'sqlglot.expressions.Star'>: <function Python.Generator.<lambda>>}
SELECT_KINDS: Tuple[str, ...] = ()
TRY_SUPPORTED = False
AFTER_HAVING_MODIFIER_TRANSFORMS = {'qualify': <function Generator.<lambda>>, 'windows': <function Generator.<lambda>>}
Inherited Members
sqlglot.generator.Generator
Generator
NULL_ORDERING_SUPPORTED
IGNORE_NULLS_IN_FUNC
LOCKING_READS_SUPPORTED
EXPLICIT_UNION
WRAP_DERIVED_VALUES
CREATE_FUNCTION_RETURN_AS
MATCHED_BY_SOURCE
SINGLE_STRING_INTERVAL
INTERVAL_ALLOWS_PLURAL_FORM
LIMIT_FETCH
LIMIT_ONLY_LITERALS
RENAME_TABLE_WITH_DB
GROUPINGS_SEP
INDEX_ON
JOIN_HINTS
TABLE_HINTS
QUERY_HINTS
QUERY_HINT_SEP
IS_BOOL_ALLOWED
DUPLICATE_KEY_UPDATE_WITH_SET
LIMIT_IS_TOP
RETURNING_END
COLUMN_JOIN_MARKS_SUPPORTED
EXTRACT_ALLOWS_QUOTES
TZ_TO_WITH_TIME_ZONE
NVL2_SUPPORTED
VALUES_AS_TABLE
ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
UNNEST_WITH_ORDINALITY
AGGREGATE_FILTER_SUPPORTED
SEMI_ANTI_JOIN_WITH_SIDE
COMPUTED_COLUMN_WITH_TYPE
SUPPORTS_TABLE_COPY
TABLESAMPLE_REQUIRES_PARENS
TABLESAMPLE_SIZE_IS_ROWS
TABLESAMPLE_KEYWORDS
TABLESAMPLE_WITH_METHOD
TABLESAMPLE_SEED_KEYWORD
COLLATE_IS_FUNC
DATA_TYPE_SPECIFIERS_ALLOWED
ENSURE_BOOLS
CTE_RECURSIVE_KEYWORD_REQUIRED
SUPPORTS_SINGLE_ARG_CONCAT
LAST_DAY_SUPPORTS_DATE_PART
SUPPORTS_TABLE_ALIAS_COLUMNS
UNPIVOT_ALIASES_ARE_IDENTIFIERS
JSON_KEY_VALUE_PAIR_SEP
INSERT_OVERWRITE
SUPPORTS_SELECT_INTO
SUPPORTS_UNLOGGED_TABLES
SUPPORTS_CREATE_TABLE_LIKE
LIKE_PROPERTY_INSIDE_SCHEMA
MULTI_ARG_DISTINCT
JSON_TYPE_REQUIRED_FOR_EXTRACTION
JSON_PATH_BRACKETED_KEY_SUPPORTED
JSON_PATH_SINGLE_QUOTE_ESCAPE
SUPPORTED_JSON_PATH_PARTS
CAN_IMPLEMENT_ARRAY_ANY
SUPPORTS_TO_NUMBER
OUTER_UNION_MODIFIERS
COPY_PARAMS_ARE_WRAPPED
COPY_PARAMS_EQ_REQUIRED
COPY_HAS_INTO_KEYWORD
STAR_EXCEPT
HEX_FUNC
WITH_PROPERTIES_PREFIX
TYPE_MAPPING
TIME_PART_SINGULARS
TOKEN_MAPPING
STRUCT_DELIMITER
PARAMETER_TOKEN
NAMED_PLACEHOLDER_TOKEN
PROPERTIES_LOCATION
RESERVED_KEYWORDS
WITH_SEPARATED_COMMENTS
EXCLUDE_COMMENTS
UNWRAPPED_INTERVAL_VALUES
PARAMETERIZABLE_TEXT_TYPES
EXPRESSIONS_WITHOUT_NESTED_CTES
SENTINEL_LINE_BREAK
pretty
identify
normalize
pad
unsupported_level
max_unsupported
leading_comma
max_text_width
comments
dialect
normalize_functions
unsupported_messages
generate
preprocess
unsupported
sep
seg
pad_comment
maybe_comment
wrap
no_identify
normalize_func
indent
sql
uncache_sql
cache_sql
characterset_sql
column_parts
column_sql
columnposition_sql
columndef_sql
columnconstraint_sql
computedcolumnconstraint_sql
autoincrementcolumnconstraint_sql
compresscolumnconstraint_sql
generatedasidentitycolumnconstraint_sql
generatedasrowcolumnconstraint_sql
periodforsystemtimeconstraint_sql
notnullcolumnconstraint_sql
transformcolumnconstraint_sql
primarykeycolumnconstraint_sql
uniquecolumnconstraint_sql
createable_sql
create_sql
sequenceproperties_sql
clone_sql
describe_sql
heredoc_sql
prepend_ctes
with_sql
cte_sql
tablealias_sql
bitstring_sql
hexstring_sql
bytestring_sql
unicodestring_sql
rawstring_sql
datatypeparam_sql
datatype_sql
directory_sql
delete_sql
drop_sql
except_sql
except_op
fetch_sql
filter_sql
hint_sql
indexparameters_sql
index_sql
identifier_sql
hex_sql
lowerhex_sql
inputoutputformat_sql
national_sql
partition_sql
properties_sql
root_properties
properties
with_properties
locate_properties
property_name
property_sql
likeproperty_sql
fallbackproperty_sql
journalproperty_sql
freespaceproperty_sql
checksumproperty_sql
mergeblockratioproperty_sql
datablocksizeproperty_sql
blockcompressionproperty_sql
isolatedloadingproperty_sql
partitionboundspec_sql
partitionedofproperty_sql
lockingproperty_sql
withdataproperty_sql
withsystemversioningproperty_sql
insert_sql
intersect_sql
intersect_op
introducer_sql
kill_sql
pseudotype_sql
objectidentifier_sql
onconflict_sql
returning_sql
rowformatdelimitedproperty_sql
withtablehint_sql
indextablehint_sql
historicaldata_sql
table_parts
table_sql
tablesample_sql
pivot_sql
version_sql
tuple_sql
update_sql
values_sql
var_sql
into_sql
from_sql
group_sql
having_sql
connect_sql
prior_sql
join_sql
lambda_sql
lateral_op
lateral_sql
limit_sql
offset_sql
setitem_sql
set_sql
pragma_sql
lock_sql
literal_sql
escape_str
loaddata_sql
null_sql
boolean_sql
order_sql
withfill_sql
cluster_sql
distribute_sql
sort_sql
ordered_sql
matchrecognizemeasure_sql
matchrecognize_sql
query_modifiers
queryoption_sql
offset_limit_modifiers
after_limit_modifiers
select_sql
schema_sql
schema_columns_sql
star_sql
parameter_sql
sessionparameter_sql
placeholder_sql
subquery_sql
qualify_sql
set_operations
union_sql
union_op
unnest_sql
prewhere_sql
where_sql
window_sql
partition_by_sql
windowspec_sql
withingroup_sql
between_sql
bracket_offset_expressions
bracket_sql
all_sql
any_sql
exists_sql
case_sql
constraint_sql
nextvaluefor_sql
extract_sql
trim_sql
convert_concat_args
concat_sql
concatws_sql
check_sql
foreignkey_sql
primarykey_sql
if_sql
matchagainst_sql
jsonkeyvalue_sql
jsonpath_sql
json_path_part
formatjson_sql
jsonobject_sql
jsonobjectagg_sql
jsonarray_sql
jsonarrayagg_sql
jsoncolumndef_sql
jsonschema_sql
jsontable_sql
openjsoncolumndef_sql
openjson_sql
in_sql
in_unnest_op
interval_sql
return_sql
reference_sql
anonymous_sql
paren_sql
neg_sql
not_sql
alias_sql
pivotalias_sql
aliases_sql
atindex_sql
attimezone_sql
fromtimezone_sql
add_sql
and_sql
or_sql
xor_sql
connector_sql
bitwiseand_sql
bitwiseleftshift_sql
bitwisenot_sql
bitwiseor_sql
bitwiserightshift_sql
bitwisexor_sql
cast_sql
currentdate_sql
currenttimestamp_sql
collate_sql
command_sql
comment_sql
mergetreettlaction_sql
mergetreettl_sql
transaction_sql
commit_sql
rollback_sql
altercolumn_sql
alterdiststyle_sql
altersortkey_sql
renametable_sql
renamecolumn_sql
alterset_sql
altertable_sql
add_column_sql
droppartition_sql
addconstraint_sql
distinct_sql
ignorenulls_sql
respectnulls_sql
havingmax_sql
intdiv_sql
dpipe_sql
div_sql
overlaps_sql
distance_sql
dot_sql
eq_sql
propertyeq_sql
escape_sql
glob_sql
gt_sql
gte_sql
ilike_sql
ilikeany_sql
is_sql
like_sql
likeany_sql
similarto_sql
lt_sql
lte_sql
mod_sql
mul_sql
neq_sql
nullsafeeq_sql
nullsafeneq_sql
slice_sql
sub_sql
trycast_sql
try_sql
log_sql
use_sql
binary
function_fallback_sql
func
format_args
too_wide
format_time
expressions
op_expressions
naked_property
tag_sql
token_sql
userdefinedfunction_sql
joinhint_sql
kwarg_sql
when_sql
merge_sql
tochar_sql
tonumber_sql
dictproperty_sql
dictrange_sql
dictsubproperty_sql
oncluster_sql
clusteredbyproperty_sql
anyvalue_sql
querytransform_sql
indexconstraintoption_sql
checkcolumnconstraint_sql
indexcolumnconstraint_sql
nvl2_sql
comprehension_sql
columnprefix_sql
opclass_sql
predict_sql
forin_sql
refresh_sql
operator_sql
toarray_sql
tsordstotime_sql
tsordstotimestamp_sql
tsordstodate_sql
unixdate_sql
lastday_sql
dateadd_sql
arrayany_sql
generateseries_sql
struct_sql
partitionrange_sql
truncatetable_sql
convert_sql
copyparameter_sql
credentials_sql
copy_sql
semicolon_sql
datadeletionproperty_sql
maskingpolicycolumnconstraint_sql