sqlglot.executor.python
1import ast 2import collections 3import itertools 4import math 5 6from sqlglot import exp, generator, planner, tokens 7from sqlglot.dialects.dialect import Dialect, inline_array_sql 8from sqlglot.errors import ExecuteError 9from sqlglot.executor.context import Context 10from sqlglot.executor.env import ENV 11from sqlglot.executor.table import RowReader, Table 12from sqlglot.helper import csv_reader, subclasses 13 14 15class PythonExecutor: 16 def __init__(self, env=None, tables=None): 17 self.generator = Python().generator(identify=True, comments=False) 18 self.env = {**ENV, **(env or {})} 19 self.tables = tables or {} 20 21 def execute(self, plan): 22 finished = set() 23 queue = set(plan.leaves) 24 contexts = {} 25 26 while queue: 27 node = queue.pop() 28 try: 29 context = self.context( 30 { 31 name: table 32 for dep in node.dependencies 33 for name, table in contexts[dep].tables.items() 34 } 35 ) 36 37 if isinstance(node, planner.Scan): 38 contexts[node] = self.scan(node, context) 39 elif isinstance(node, planner.Aggregate): 40 contexts[node] = self.aggregate(node, context) 41 elif isinstance(node, planner.Join): 42 contexts[node] = self.join(node, context) 43 elif isinstance(node, planner.Sort): 44 contexts[node] = self.sort(node, context) 45 elif isinstance(node, planner.SetOperation): 46 contexts[node] = self.set_operation(node, context) 47 else: 48 raise NotImplementedError 49 50 finished.add(node) 51 52 for dep in node.dependents: 53 if all(d in contexts for d in dep.dependencies): 54 queue.add(dep) 55 56 for dep in node.dependencies: 57 if all(d in finished for d in dep.dependents): 58 contexts.pop(dep) 59 except Exception as e: 60 raise ExecuteError(f"Step '{node.id}' failed: {e}") from e 61 62 root = plan.root 63 return contexts[root].tables[root.name] 64 65 def generate(self, expression): 66 """Convert a SQL expression into literal Python code and compile it into bytecode.""" 67 if not expression: 68 return None 69 70 sql = self.generator.generate(expression) 71 return compile(sql, sql, "eval", optimize=2) 72 73 def generate_tuple(self, expressions): 74 """Convert an array of SQL expressions into tuple of Python byte code.""" 75 if not expressions: 76 return tuple() 77 return tuple(self.generate(expression) for expression in expressions) 78 79 def context(self, tables): 80 return Context(tables, env=self.env) 81 82 def table(self, expressions): 83 return Table( 84 expression.alias_or_name if isinstance(expression, exp.Expression) else expression 85 for expression in expressions 86 ) 87 88 def scan(self, step, context): 89 source = step.source 90 91 if source and isinstance(source, exp.Expression): 92 source = source.name or source.alias 93 94 if source is None: 95 context, table_iter = self.static() 96 elif source in context: 97 if not step.projections and not step.condition: 98 return self.context({step.name: context.tables[source]}) 99 table_iter = context.table_iter(source) 100 elif isinstance(step.source, exp.Table) and isinstance(step.source.this, exp.ReadCSV): 101 table_iter = self.scan_csv(step) 102 context = next(table_iter) 103 else: 104 context, table_iter = self.scan_table(step) 105 106 return self.context({step.name: self._project_and_filter(context, step, table_iter)}) 107 108 def _project_and_filter(self, context, step, table_iter): 109 sink = self.table(step.projections if step.projections else context.columns) 110 condition = self.generate(step.condition) 111 projections = self.generate_tuple(step.projections) 112 113 for reader in table_iter: 114 if len(sink) >= step.limit: 115 break 116 117 if condition and not context.eval(condition): 118 continue 119 120 if projections: 121 sink.append(context.eval_tuple(projections)) 122 else: 123 sink.append(reader.row) 124 125 return sink 126 127 def static(self): 128 return self.context({}), [RowReader(())] 129 130 def scan_table(self, step): 131 table = self.tables.find(step.source) 132 context = self.context({step.source.alias_or_name: table}) 133 return context, iter(table) 134 135 def scan_csv(self, step): 136 alias = step.source.alias 137 source = step.source.this 138 139 with csv_reader(source) as reader: 140 columns = next(reader) 141 table = Table(columns) 142 context = self.context({alias: table}) 143 yield context 144 types = [] 145 146 for row in reader: 147 if not types: 148 for v in row: 149 try: 150 types.append(type(ast.literal_eval(v))) 151 except (ValueError, SyntaxError): 152 types.append(str) 153 context.set_row(tuple(t(v) for t, v in zip(types, row))) 154 yield context.table.reader 155 156 def join(self, step, context): 157 source = step.name 158 159 source_table = context.tables[source] 160 source_context = self.context({source: source_table}) 161 column_ranges = {source: range(0, len(source_table.columns))} 162 163 for name, join in step.joins.items(): 164 table = context.tables[name] 165 start = max(r.stop for r in column_ranges.values()) 166 column_ranges[name] = range(start, len(table.columns) + start) 167 join_context = self.context({name: table}) 168 169 if join.get("source_key"): 170 table = self.hash_join(join, source_context, join_context) 171 else: 172 table = self.nested_loop_join(join, source_context, join_context) 173 174 source_context = self.context( 175 { 176 name: Table(table.columns, table.rows, column_range) 177 for name, column_range in column_ranges.items() 178 } 179 ) 180 condition = self.generate(join["condition"]) 181 if condition: 182 source_context.filter(condition) 183 184 if not step.condition and not step.projections: 185 return source_context 186 187 sink = self._project_and_filter( 188 source_context, 189 step, 190 (reader for reader, _ in iter(source_context)), 191 ) 192 193 if step.projections: 194 return self.context({step.name: sink}) 195 else: 196 return self.context( 197 { 198 name: Table(table.columns, sink.rows, table.column_range) 199 for name, table in source_context.tables.items() 200 } 201 ) 202 203 def nested_loop_join(self, _join, source_context, join_context): 204 table = Table(source_context.columns + join_context.columns) 205 206 for reader_a, _ in source_context: 207 for reader_b, _ in join_context: 208 table.append(reader_a.row + reader_b.row) 209 210 return table 211 212 def hash_join(self, join, source_context, join_context): 213 source_key = self.generate_tuple(join["source_key"]) 214 join_key = self.generate_tuple(join["join_key"]) 215 left = join.get("side") == "LEFT" 216 right = join.get("side") == "RIGHT" 217 218 results = collections.defaultdict(lambda: ([], [])) 219 220 for reader, ctx in source_context: 221 results[ctx.eval_tuple(source_key)][0].append(reader.row) 222 for reader, ctx in join_context: 223 results[ctx.eval_tuple(join_key)][1].append(reader.row) 224 225 table = Table(source_context.columns + join_context.columns) 226 nulls = [(None,) * len(join_context.columns if left else source_context.columns)] 227 228 for a_group, b_group in results.values(): 229 if left: 230 b_group = b_group or nulls 231 elif right: 232 a_group = a_group or nulls 233 234 for a_row, b_row in itertools.product(a_group, b_group): 235 table.append(a_row + b_row) 236 237 return table 238 239 def aggregate(self, step, context): 240 group_by = self.generate_tuple(step.group.values()) 241 aggregations = self.generate_tuple(step.aggregations) 242 operands = self.generate_tuple(step.operands) 243 244 if operands: 245 operand_table = Table(self.table(step.operands).columns) 246 247 for reader, ctx in context: 248 operand_table.append(ctx.eval_tuple(operands)) 249 250 for i, (a, b) in enumerate(zip(context.table.rows, operand_table.rows)): 251 context.table.rows[i] = a + b 252 253 width = len(context.columns) 254 context.add_columns(*operand_table.columns) 255 256 operand_table = Table( 257 context.columns, 258 context.table.rows, 259 range(width, width + len(operand_table.columns)), 260 ) 261 262 context = self.context( 263 { 264 None: operand_table, 265 **context.tables, 266 } 267 ) 268 269 context.sort(group_by) 270 271 group = None 272 start = 0 273 end = 1 274 length = len(context.table) 275 table = self.table(list(step.group) + step.aggregations) 276 277 def add_row(): 278 table.append(group + context.eval_tuple(aggregations)) 279 280 if length: 281 for i in range(length): 282 context.set_index(i) 283 key = context.eval_tuple(group_by) 284 group = key if group is None else group 285 end += 1 286 if key != group: 287 context.set_range(start, end - 2) 288 add_row() 289 group = key 290 start = end - 2 291 if len(table.rows) >= step.limit: 292 break 293 if i == length - 1: 294 context.set_range(start, end - 1) 295 add_row() 296 elif step.limit > 0 and not group_by: 297 context.set_range(0, 0) 298 table.append(context.eval_tuple(aggregations)) 299 300 context = self.context({step.name: table, **{name: table for name in context.tables}}) 301 302 if step.projections or step.condition: 303 return self.scan(step, context) 304 return context 305 306 def sort(self, step, context): 307 projections = self.generate_tuple(step.projections) 308 projection_columns = [p.alias_or_name for p in step.projections] 309 all_columns = list(context.columns) + projection_columns 310 sink = self.table(all_columns) 311 for reader, ctx in context: 312 sink.append(reader.row + ctx.eval_tuple(projections)) 313 314 sort_ctx = self.context( 315 { 316 None: sink, 317 **{table: sink for table in context.tables}, 318 } 319 ) 320 sort_ctx.sort(self.generate_tuple(step.key)) 321 322 if not math.isinf(step.limit): 323 sort_ctx.table.rows = sort_ctx.table.rows[0 : step.limit] 324 325 output = Table( 326 projection_columns, 327 rows=[r[len(context.columns) : len(all_columns)] for r in sort_ctx.table.rows], 328 ) 329 return self.context({step.name: output}) 330 331 def set_operation(self, step, context): 332 left = context.tables[step.left] 333 right = context.tables[step.right] 334 335 sink = self.table(left.columns) 336 337 if issubclass(step.op, exp.Intersect): 338 sink.rows = list(set(left.rows).intersection(set(right.rows))) 339 elif issubclass(step.op, exp.Except): 340 sink.rows = list(set(left.rows).difference(set(right.rows))) 341 elif issubclass(step.op, exp.Union) and step.distinct: 342 sink.rows = list(set(left.rows).union(set(right.rows))) 343 else: 344 sink.rows = left.rows + right.rows 345 346 return self.context({step.name: sink}) 347 348 349def _ordered_py(self, expression): 350 this = self.sql(expression, "this") 351 desc = "True" if expression.args.get("desc") else "False" 352 nulls_first = "True" if expression.args.get("nulls_first") else "False" 353 return f"ORDERED({this}, {desc}, {nulls_first})" 354 355 356def _rename(self, e): 357 try: 358 values = list(e.args.values()) 359 360 if len(values) == 1: 361 values = values[0] 362 if not isinstance(values, list): 363 return self.func(e.key, values) 364 return self.func(e.key, *values) 365 366 if isinstance(e, exp.Func) and e.is_var_len_args: 367 *head, tail = values 368 return self.func(e.key, *head, *tail) 369 370 return self.func(e.key, *values) 371 except Exception as ex: 372 raise Exception(f"Could not rename {repr(e)}") from ex 373 374 375def _case_sql(self, expression): 376 this = self.sql(expression, "this") 377 chain = self.sql(expression, "default") or "None" 378 379 for e in reversed(expression.args["ifs"]): 380 true = self.sql(e, "true") 381 condition = self.sql(e, "this") 382 condition = f"{this} = ({condition})" if this else condition 383 chain = f"{true} if {condition} else ({chain})" 384 385 return chain 386 387 388def _lambda_sql(self, e: exp.Lambda) -> str: 389 names = {e.name.lower() for e in e.expressions} 390 391 e = e.transform( 392 lambda n: exp.var(n.name) 393 if isinstance(n, exp.Identifier) and n.name.lower() in names 394 else n 395 ) 396 397 return f"lambda {self.expressions(e, flat=True)}: {self.sql(e, 'this')}" 398 399 400class Python(Dialect): 401 class Tokenizer(tokens.Tokenizer): 402 STRING_ESCAPES = ["\\"] 403 404 class Generator(generator.Generator): 405 TRANSFORMS = { 406 **{klass: _rename for klass in subclasses(exp.__name__, exp.Binary)}, 407 **{klass: _rename for klass in exp.ALL_FUNCTIONS}, 408 exp.Case: _case_sql, 409 exp.Alias: lambda self, e: self.sql(e.this), 410 exp.Array: inline_array_sql, 411 exp.And: lambda self, e: self.binary(e, "and"), 412 exp.Between: _rename, 413 exp.Boolean: lambda self, e: "True" if e.this else "False", 414 exp.Cast: lambda self, e: f"CAST({self.sql(e.this)}, exp.DataType.Type.{e.args['to']})", 415 exp.Column: lambda self, e: f"scope[{self.sql(e, 'table') or None}][{self.sql(e.this)}]", 416 exp.Distinct: lambda self, e: f"set({self.sql(e, 'this')})", 417 exp.Extract: lambda self, e: f"EXTRACT('{e.name.lower()}', {self.sql(e, 'expression')})", 418 exp.In: lambda self, e: f"{self.sql(e, 'this')} in {{{self.expressions(e, flat=True)}}}", 419 exp.Interval: lambda self, e: f"INTERVAL({self.sql(e.this)}, '{self.sql(e.unit)}')", 420 exp.Is: lambda self, e: self.binary(e, "==") 421 if isinstance(e.this, exp.Literal) 422 else self.binary(e, "is"), 423 exp.Lambda: _lambda_sql, 424 exp.Not: lambda self, e: f"not {self.sql(e.this)}", 425 exp.Null: lambda *_: "None", 426 exp.Or: lambda self, e: self.binary(e, "or"), 427 exp.Ordered: _ordered_py, 428 exp.Star: lambda *_: "1", 429 }
class
PythonExecutor:
16class PythonExecutor: 17 def __init__(self, env=None, tables=None): 18 self.generator = Python().generator(identify=True, comments=False) 19 self.env = {**ENV, **(env or {})} 20 self.tables = tables or {} 21 22 def execute(self, plan): 23 finished = set() 24 queue = set(plan.leaves) 25 contexts = {} 26 27 while queue: 28 node = queue.pop() 29 try: 30 context = self.context( 31 { 32 name: table 33 for dep in node.dependencies 34 for name, table in contexts[dep].tables.items() 35 } 36 ) 37 38 if isinstance(node, planner.Scan): 39 contexts[node] = self.scan(node, context) 40 elif isinstance(node, planner.Aggregate): 41 contexts[node] = self.aggregate(node, context) 42 elif isinstance(node, planner.Join): 43 contexts[node] = self.join(node, context) 44 elif isinstance(node, planner.Sort): 45 contexts[node] = self.sort(node, context) 46 elif isinstance(node, planner.SetOperation): 47 contexts[node] = self.set_operation(node, context) 48 else: 49 raise NotImplementedError 50 51 finished.add(node) 52 53 for dep in node.dependents: 54 if all(d in contexts for d in dep.dependencies): 55 queue.add(dep) 56 57 for dep in node.dependencies: 58 if all(d in finished for d in dep.dependents): 59 contexts.pop(dep) 60 except Exception as e: 61 raise ExecuteError(f"Step '{node.id}' failed: {e}") from e 62 63 root = plan.root 64 return contexts[root].tables[root.name] 65 66 def generate(self, expression): 67 """Convert a SQL expression into literal Python code and compile it into bytecode.""" 68 if not expression: 69 return None 70 71 sql = self.generator.generate(expression) 72 return compile(sql, sql, "eval", optimize=2) 73 74 def generate_tuple(self, expressions): 75 """Convert an array of SQL expressions into tuple of Python byte code.""" 76 if not expressions: 77 return tuple() 78 return tuple(self.generate(expression) for expression in expressions) 79 80 def context(self, tables): 81 return Context(tables, env=self.env) 82 83 def table(self, expressions): 84 return Table( 85 expression.alias_or_name if isinstance(expression, exp.Expression) else expression 86 for expression in expressions 87 ) 88 89 def scan(self, step, context): 90 source = step.source 91 92 if source and isinstance(source, exp.Expression): 93 source = source.name or source.alias 94 95 if source is None: 96 context, table_iter = self.static() 97 elif source in context: 98 if not step.projections and not step.condition: 99 return self.context({step.name: context.tables[source]}) 100 table_iter = context.table_iter(source) 101 elif isinstance(step.source, exp.Table) and isinstance(step.source.this, exp.ReadCSV): 102 table_iter = self.scan_csv(step) 103 context = next(table_iter) 104 else: 105 context, table_iter = self.scan_table(step) 106 107 return self.context({step.name: self._project_and_filter(context, step, table_iter)}) 108 109 def _project_and_filter(self, context, step, table_iter): 110 sink = self.table(step.projections if step.projections else context.columns) 111 condition = self.generate(step.condition) 112 projections = self.generate_tuple(step.projections) 113 114 for reader in table_iter: 115 if len(sink) >= step.limit: 116 break 117 118 if condition and not context.eval(condition): 119 continue 120 121 if projections: 122 sink.append(context.eval_tuple(projections)) 123 else: 124 sink.append(reader.row) 125 126 return sink 127 128 def static(self): 129 return self.context({}), [RowReader(())] 130 131 def scan_table(self, step): 132 table = self.tables.find(step.source) 133 context = self.context({step.source.alias_or_name: table}) 134 return context, iter(table) 135 136 def scan_csv(self, step): 137 alias = step.source.alias 138 source = step.source.this 139 140 with csv_reader(source) as reader: 141 columns = next(reader) 142 table = Table(columns) 143 context = self.context({alias: table}) 144 yield context 145 types = [] 146 147 for row in reader: 148 if not types: 149 for v in row: 150 try: 151 types.append(type(ast.literal_eval(v))) 152 except (ValueError, SyntaxError): 153 types.append(str) 154 context.set_row(tuple(t(v) for t, v in zip(types, row))) 155 yield context.table.reader 156 157 def join(self, step, context): 158 source = step.name 159 160 source_table = context.tables[source] 161 source_context = self.context({source: source_table}) 162 column_ranges = {source: range(0, len(source_table.columns))} 163 164 for name, join in step.joins.items(): 165 table = context.tables[name] 166 start = max(r.stop for r in column_ranges.values()) 167 column_ranges[name] = range(start, len(table.columns) + start) 168 join_context = self.context({name: table}) 169 170 if join.get("source_key"): 171 table = self.hash_join(join, source_context, join_context) 172 else: 173 table = self.nested_loop_join(join, source_context, join_context) 174 175 source_context = self.context( 176 { 177 name: Table(table.columns, table.rows, column_range) 178 for name, column_range in column_ranges.items() 179 } 180 ) 181 condition = self.generate(join["condition"]) 182 if condition: 183 source_context.filter(condition) 184 185 if not step.condition and not step.projections: 186 return source_context 187 188 sink = self._project_and_filter( 189 source_context, 190 step, 191 (reader for reader, _ in iter(source_context)), 192 ) 193 194 if step.projections: 195 return self.context({step.name: sink}) 196 else: 197 return self.context( 198 { 199 name: Table(table.columns, sink.rows, table.column_range) 200 for name, table in source_context.tables.items() 201 } 202 ) 203 204 def nested_loop_join(self, _join, source_context, join_context): 205 table = Table(source_context.columns + join_context.columns) 206 207 for reader_a, _ in source_context: 208 for reader_b, _ in join_context: 209 table.append(reader_a.row + reader_b.row) 210 211 return table 212 213 def hash_join(self, join, source_context, join_context): 214 source_key = self.generate_tuple(join["source_key"]) 215 join_key = self.generate_tuple(join["join_key"]) 216 left = join.get("side") == "LEFT" 217 right = join.get("side") == "RIGHT" 218 219 results = collections.defaultdict(lambda: ([], [])) 220 221 for reader, ctx in source_context: 222 results[ctx.eval_tuple(source_key)][0].append(reader.row) 223 for reader, ctx in join_context: 224 results[ctx.eval_tuple(join_key)][1].append(reader.row) 225 226 table = Table(source_context.columns + join_context.columns) 227 nulls = [(None,) * len(join_context.columns if left else source_context.columns)] 228 229 for a_group, b_group in results.values(): 230 if left: 231 b_group = b_group or nulls 232 elif right: 233 a_group = a_group or nulls 234 235 for a_row, b_row in itertools.product(a_group, b_group): 236 table.append(a_row + b_row) 237 238 return table 239 240 def aggregate(self, step, context): 241 group_by = self.generate_tuple(step.group.values()) 242 aggregations = self.generate_tuple(step.aggregations) 243 operands = self.generate_tuple(step.operands) 244 245 if operands: 246 operand_table = Table(self.table(step.operands).columns) 247 248 for reader, ctx in context: 249 operand_table.append(ctx.eval_tuple(operands)) 250 251 for i, (a, b) in enumerate(zip(context.table.rows, operand_table.rows)): 252 context.table.rows[i] = a + b 253 254 width = len(context.columns) 255 context.add_columns(*operand_table.columns) 256 257 operand_table = Table( 258 context.columns, 259 context.table.rows, 260 range(width, width + len(operand_table.columns)), 261 ) 262 263 context = self.context( 264 { 265 None: operand_table, 266 **context.tables, 267 } 268 ) 269 270 context.sort(group_by) 271 272 group = None 273 start = 0 274 end = 1 275 length = len(context.table) 276 table = self.table(list(step.group) + step.aggregations) 277 278 def add_row(): 279 table.append(group + context.eval_tuple(aggregations)) 280 281 if length: 282 for i in range(length): 283 context.set_index(i) 284 key = context.eval_tuple(group_by) 285 group = key if group is None else group 286 end += 1 287 if key != group: 288 context.set_range(start, end - 2) 289 add_row() 290 group = key 291 start = end - 2 292 if len(table.rows) >= step.limit: 293 break 294 if i == length - 1: 295 context.set_range(start, end - 1) 296 add_row() 297 elif step.limit > 0 and not group_by: 298 context.set_range(0, 0) 299 table.append(context.eval_tuple(aggregations)) 300 301 context = self.context({step.name: table, **{name: table for name in context.tables}}) 302 303 if step.projections or step.condition: 304 return self.scan(step, context) 305 return context 306 307 def sort(self, step, context): 308 projections = self.generate_tuple(step.projections) 309 projection_columns = [p.alias_or_name for p in step.projections] 310 all_columns = list(context.columns) + projection_columns 311 sink = self.table(all_columns) 312 for reader, ctx in context: 313 sink.append(reader.row + ctx.eval_tuple(projections)) 314 315 sort_ctx = self.context( 316 { 317 None: sink, 318 **{table: sink for table in context.tables}, 319 } 320 ) 321 sort_ctx.sort(self.generate_tuple(step.key)) 322 323 if not math.isinf(step.limit): 324 sort_ctx.table.rows = sort_ctx.table.rows[0 : step.limit] 325 326 output = Table( 327 projection_columns, 328 rows=[r[len(context.columns) : len(all_columns)] for r in sort_ctx.table.rows], 329 ) 330 return self.context({step.name: output}) 331 332 def set_operation(self, step, context): 333 left = context.tables[step.left] 334 right = context.tables[step.right] 335 336 sink = self.table(left.columns) 337 338 if issubclass(step.op, exp.Intersect): 339 sink.rows = list(set(left.rows).intersection(set(right.rows))) 340 elif issubclass(step.op, exp.Except): 341 sink.rows = list(set(left.rows).difference(set(right.rows))) 342 elif issubclass(step.op, exp.Union) and step.distinct: 343 sink.rows = list(set(left.rows).union(set(right.rows))) 344 else: 345 sink.rows = left.rows + right.rows 346 347 return self.context({step.name: sink})
def
execute(self, plan):
22 def execute(self, plan): 23 finished = set() 24 queue = set(plan.leaves) 25 contexts = {} 26 27 while queue: 28 node = queue.pop() 29 try: 30 context = self.context( 31 { 32 name: table 33 for dep in node.dependencies 34 for name, table in contexts[dep].tables.items() 35 } 36 ) 37 38 if isinstance(node, planner.Scan): 39 contexts[node] = self.scan(node, context) 40 elif isinstance(node, planner.Aggregate): 41 contexts[node] = self.aggregate(node, context) 42 elif isinstance(node, planner.Join): 43 contexts[node] = self.join(node, context) 44 elif isinstance(node, planner.Sort): 45 contexts[node] = self.sort(node, context) 46 elif isinstance(node, planner.SetOperation): 47 contexts[node] = self.set_operation(node, context) 48 else: 49 raise NotImplementedError 50 51 finished.add(node) 52 53 for dep in node.dependents: 54 if all(d in contexts for d in dep.dependencies): 55 queue.add(dep) 56 57 for dep in node.dependencies: 58 if all(d in finished for d in dep.dependents): 59 contexts.pop(dep) 60 except Exception as e: 61 raise ExecuteError(f"Step '{node.id}' failed: {e}") from e 62 63 root = plan.root 64 return contexts[root].tables[root.name]
def
generate(self, expression):
66 def generate(self, expression): 67 """Convert a SQL expression into literal Python code and compile it into bytecode.""" 68 if not expression: 69 return None 70 71 sql = self.generator.generate(expression) 72 return compile(sql, sql, "eval", optimize=2)
Convert a SQL expression into literal Python code and compile it into bytecode.
def
generate_tuple(self, expressions):
74 def generate_tuple(self, expressions): 75 """Convert an array of SQL expressions into tuple of Python byte code.""" 76 if not expressions: 77 return tuple() 78 return tuple(self.generate(expression) for expression in expressions)
Convert an array of SQL expressions into tuple of Python byte code.
def
scan(self, step, context):
89 def scan(self, step, context): 90 source = step.source 91 92 if source and isinstance(source, exp.Expression): 93 source = source.name or source.alias 94 95 if source is None: 96 context, table_iter = self.static() 97 elif source in context: 98 if not step.projections and not step.condition: 99 return self.context({step.name: context.tables[source]}) 100 table_iter = context.table_iter(source) 101 elif isinstance(step.source, exp.Table) and isinstance(step.source.this, exp.ReadCSV): 102 table_iter = self.scan_csv(step) 103 context = next(table_iter) 104 else: 105 context, table_iter = self.scan_table(step) 106 107 return self.context({step.name: self._project_and_filter(context, step, table_iter)})
def
scan_csv(self, step):
136 def scan_csv(self, step): 137 alias = step.source.alias 138 source = step.source.this 139 140 with csv_reader(source) as reader: 141 columns = next(reader) 142 table = Table(columns) 143 context = self.context({alias: table}) 144 yield context 145 types = [] 146 147 for row in reader: 148 if not types: 149 for v in row: 150 try: 151 types.append(type(ast.literal_eval(v))) 152 except (ValueError, SyntaxError): 153 types.append(str) 154 context.set_row(tuple(t(v) for t, v in zip(types, row))) 155 yield context.table.reader
def
join(self, step, context):
157 def join(self, step, context): 158 source = step.name 159 160 source_table = context.tables[source] 161 source_context = self.context({source: source_table}) 162 column_ranges = {source: range(0, len(source_table.columns))} 163 164 for name, join in step.joins.items(): 165 table = context.tables[name] 166 start = max(r.stop for r in column_ranges.values()) 167 column_ranges[name] = range(start, len(table.columns) + start) 168 join_context = self.context({name: table}) 169 170 if join.get("source_key"): 171 table = self.hash_join(join, source_context, join_context) 172 else: 173 table = self.nested_loop_join(join, source_context, join_context) 174 175 source_context = self.context( 176 { 177 name: Table(table.columns, table.rows, column_range) 178 for name, column_range in column_ranges.items() 179 } 180 ) 181 condition = self.generate(join["condition"]) 182 if condition: 183 source_context.filter(condition) 184 185 if not step.condition and not step.projections: 186 return source_context 187 188 sink = self._project_and_filter( 189 source_context, 190 step, 191 (reader for reader, _ in iter(source_context)), 192 ) 193 194 if step.projections: 195 return self.context({step.name: sink}) 196 else: 197 return self.context( 198 { 199 name: Table(table.columns, sink.rows, table.column_range) 200 for name, table in source_context.tables.items() 201 } 202 )
def
hash_join(self, join, source_context, join_context):
213 def hash_join(self, join, source_context, join_context): 214 source_key = self.generate_tuple(join["source_key"]) 215 join_key = self.generate_tuple(join["join_key"]) 216 left = join.get("side") == "LEFT" 217 right = join.get("side") == "RIGHT" 218 219 results = collections.defaultdict(lambda: ([], [])) 220 221 for reader, ctx in source_context: 222 results[ctx.eval_tuple(source_key)][0].append(reader.row) 223 for reader, ctx in join_context: 224 results[ctx.eval_tuple(join_key)][1].append(reader.row) 225 226 table = Table(source_context.columns + join_context.columns) 227 nulls = [(None,) * len(join_context.columns if left else source_context.columns)] 228 229 for a_group, b_group in results.values(): 230 if left: 231 b_group = b_group or nulls 232 elif right: 233 a_group = a_group or nulls 234 235 for a_row, b_row in itertools.product(a_group, b_group): 236 table.append(a_row + b_row) 237 238 return table
def
aggregate(self, step, context):
240 def aggregate(self, step, context): 241 group_by = self.generate_tuple(step.group.values()) 242 aggregations = self.generate_tuple(step.aggregations) 243 operands = self.generate_tuple(step.operands) 244 245 if operands: 246 operand_table = Table(self.table(step.operands).columns) 247 248 for reader, ctx in context: 249 operand_table.append(ctx.eval_tuple(operands)) 250 251 for i, (a, b) in enumerate(zip(context.table.rows, operand_table.rows)): 252 context.table.rows[i] = a + b 253 254 width = len(context.columns) 255 context.add_columns(*operand_table.columns) 256 257 operand_table = Table( 258 context.columns, 259 context.table.rows, 260 range(width, width + len(operand_table.columns)), 261 ) 262 263 context = self.context( 264 { 265 None: operand_table, 266 **context.tables, 267 } 268 ) 269 270 context.sort(group_by) 271 272 group = None 273 start = 0 274 end = 1 275 length = len(context.table) 276 table = self.table(list(step.group) + step.aggregations) 277 278 def add_row(): 279 table.append(group + context.eval_tuple(aggregations)) 280 281 if length: 282 for i in range(length): 283 context.set_index(i) 284 key = context.eval_tuple(group_by) 285 group = key if group is None else group 286 end += 1 287 if key != group: 288 context.set_range(start, end - 2) 289 add_row() 290 group = key 291 start = end - 2 292 if len(table.rows) >= step.limit: 293 break 294 if i == length - 1: 295 context.set_range(start, end - 1) 296 add_row() 297 elif step.limit > 0 and not group_by: 298 context.set_range(0, 0) 299 table.append(context.eval_tuple(aggregations)) 300 301 context = self.context({step.name: table, **{name: table for name in context.tables}}) 302 303 if step.projections or step.condition: 304 return self.scan(step, context) 305 return context
def
sort(self, step, context):
307 def sort(self, step, context): 308 projections = self.generate_tuple(step.projections) 309 projection_columns = [p.alias_or_name for p in step.projections] 310 all_columns = list(context.columns) + projection_columns 311 sink = self.table(all_columns) 312 for reader, ctx in context: 313 sink.append(reader.row + ctx.eval_tuple(projections)) 314 315 sort_ctx = self.context( 316 { 317 None: sink, 318 **{table: sink for table in context.tables}, 319 } 320 ) 321 sort_ctx.sort(self.generate_tuple(step.key)) 322 323 if not math.isinf(step.limit): 324 sort_ctx.table.rows = sort_ctx.table.rows[0 : step.limit] 325 326 output = Table( 327 projection_columns, 328 rows=[r[len(context.columns) : len(all_columns)] for r in sort_ctx.table.rows], 329 ) 330 return self.context({step.name: output})
def
set_operation(self, step, context):
332 def set_operation(self, step, context): 333 left = context.tables[step.left] 334 right = context.tables[step.right] 335 336 sink = self.table(left.columns) 337 338 if issubclass(step.op, exp.Intersect): 339 sink.rows = list(set(left.rows).intersection(set(right.rows))) 340 elif issubclass(step.op, exp.Except): 341 sink.rows = list(set(left.rows).difference(set(right.rows))) 342 elif issubclass(step.op, exp.Union) and step.distinct: 343 sink.rows = list(set(left.rows).union(set(right.rows))) 344 else: 345 sink.rows = left.rows + right.rows 346 347 return self.context({step.name: sink})
401class Python(Dialect): 402 class Tokenizer(tokens.Tokenizer): 403 STRING_ESCAPES = ["\\"] 404 405 class Generator(generator.Generator): 406 TRANSFORMS = { 407 **{klass: _rename for klass in subclasses(exp.__name__, exp.Binary)}, 408 **{klass: _rename for klass in exp.ALL_FUNCTIONS}, 409 exp.Case: _case_sql, 410 exp.Alias: lambda self, e: self.sql(e.this), 411 exp.Array: inline_array_sql, 412 exp.And: lambda self, e: self.binary(e, "and"), 413 exp.Between: _rename, 414 exp.Boolean: lambda self, e: "True" if e.this else "False", 415 exp.Cast: lambda self, e: f"CAST({self.sql(e.this)}, exp.DataType.Type.{e.args['to']})", 416 exp.Column: lambda self, e: f"scope[{self.sql(e, 'table') or None}][{self.sql(e.this)}]", 417 exp.Distinct: lambda self, e: f"set({self.sql(e, 'this')})", 418 exp.Extract: lambda self, e: f"EXTRACT('{e.name.lower()}', {self.sql(e, 'expression')})", 419 exp.In: lambda self, e: f"{self.sql(e, 'this')} in {{{self.expressions(e, flat=True)}}}", 420 exp.Interval: lambda self, e: f"INTERVAL({self.sql(e.this)}, '{self.sql(e.unit)}')", 421 exp.Is: lambda self, e: self.binary(e, "==") 422 if isinstance(e.this, exp.Literal) 423 else self.binary(e, "is"), 424 exp.Lambda: _lambda_sql, 425 exp.Not: lambda self, e: f"not {self.sql(e.this)}", 426 exp.Null: lambda *_: "None", 427 exp.Or: lambda self, e: self.binary(e, "or"), 428 exp.Ordered: _ordered_py, 429 exp.Star: lambda *_: "1", 430 }
tokenizer_class =
<class 'sqlglot.executor.python.Python.Tokenizer'>
parser_class =
<class 'sqlglot.parser.Parser'>
generator_class =
<class 'sqlglot.executor.python.Python.Generator'>
Inherited Members
- sqlglot.dialects.dialect.Dialect
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- RESOLVES_IDENTIFIERS_AS_UPPERCASE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- get_or_raise
- format_time
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- parser
- generator
Inherited Members
405 class Generator(generator.Generator): 406 TRANSFORMS = { 407 **{klass: _rename for klass in subclasses(exp.__name__, exp.Binary)}, 408 **{klass: _rename for klass in exp.ALL_FUNCTIONS}, 409 exp.Case: _case_sql, 410 exp.Alias: lambda self, e: self.sql(e.this), 411 exp.Array: inline_array_sql, 412 exp.And: lambda self, e: self.binary(e, "and"), 413 exp.Between: _rename, 414 exp.Boolean: lambda self, e: "True" if e.this else "False", 415 exp.Cast: lambda self, e: f"CAST({self.sql(e.this)}, exp.DataType.Type.{e.args['to']})", 416 exp.Column: lambda self, e: f"scope[{self.sql(e, 'table') or None}][{self.sql(e.this)}]", 417 exp.Distinct: lambda self, e: f"set({self.sql(e, 'this')})", 418 exp.Extract: lambda self, e: f"EXTRACT('{e.name.lower()}', {self.sql(e, 'expression')})", 419 exp.In: lambda self, e: f"{self.sql(e, 'this')} in {{{self.expressions(e, flat=True)}}}", 420 exp.Interval: lambda self, e: f"INTERVAL({self.sql(e.this)}, '{self.sql(e.unit)}')", 421 exp.Is: lambda self, e: self.binary(e, "==") 422 if isinstance(e.this, exp.Literal) 423 else self.binary(e, "is"), 424 exp.Lambda: _lambda_sql, 425 exp.Not: lambda self, e: f"not {self.sql(e.this)}", 426 exp.Null: lambda *_: "None", 427 exp.Or: lambda self, e: self.binary(e, "or"), 428 exp.Ordered: _ordered_py, 429 exp.Star: lambda *_: "1", 430 }
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
TRANSFORMS =
{<class 'sqlglot.expressions.Add'>: <function _rename>, <class 'sqlglot.expressions.And'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.ArrayContained'>: <function _rename>, <class 'sqlglot.expressions.ArrayContains'>: <function _rename>, <class 'sqlglot.expressions.ArrayOverlaps'>: <function _rename>, <class 'sqlglot.expressions.Binary'>: <function _rename>, <class 'sqlglot.expressions.BitwiseAnd'>: <function _rename>, <class 'sqlglot.expressions.BitwiseLeftShift'>: <function _rename>, <class 'sqlglot.expressions.BitwiseOr'>: <function _rename>, <class 'sqlglot.expressions.BitwiseRightShift'>: <function _rename>, <class 'sqlglot.expressions.BitwiseXor'>: <function _rename>, <class 'sqlglot.expressions.Collate'>: <function _rename>, <class 'sqlglot.expressions.Connector'>: <function _rename>, <class 'sqlglot.expressions.DPipe'>: <function _rename>, <class 'sqlglot.expressions.Distance'>: <function _rename>, <class 'sqlglot.expressions.Div'>: <function _rename>, <class 'sqlglot.expressions.Dot'>: <function _rename>, <class 'sqlglot.expressions.EQ'>: <function _rename>, <class 'sqlglot.expressions.Escape'>: <function _rename>, <class 'sqlglot.expressions.GT'>: <function _rename>, <class 'sqlglot.expressions.GTE'>: <function _rename>, <class 'sqlglot.expressions.Glob'>: <function _rename>, <class 'sqlglot.expressions.ILike'>: <function _rename>, <class 'sqlglot.expressions.ILikeAny'>: <function _rename>, <class 'sqlglot.expressions.IntDiv'>: <function _rename>, <class 'sqlglot.expressions.Is'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.JSONArrayContains'>: <function _rename>, <class 'sqlglot.expressions.JSONBContains'>: <function _rename>, <class 'sqlglot.expressions.JSONBExtract'>: <function _rename>, <class 'sqlglot.expressions.JSONBExtractScalar'>: <function _rename>, <class 'sqlglot.expressions.JSONExtract'>: <function _rename>, <class 'sqlglot.expressions.JSONExtractScalar'>: <function _rename>, <class 'sqlglot.expressions.Kwarg'>: <function _rename>, <class 'sqlglot.expressions.LT'>: <function _rename>, <class 'sqlglot.expressions.LTE'>: <function _rename>, <class 'sqlglot.expressions.Like'>: <function _rename>, <class 'sqlglot.expressions.LikeAny'>: <function _rename>, <class 'sqlglot.expressions.Mod'>: <function _rename>, <class 'sqlglot.expressions.Mul'>: <function _rename>, <class 'sqlglot.expressions.NEQ'>: <function _rename>, <class 'sqlglot.expressions.NullSafeEQ'>: <function _rename>, <class 'sqlglot.expressions.NullSafeNEQ'>: <function _rename>, <class 'sqlglot.expressions.Or'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.Overlaps'>: <function _rename>, <class 'sqlglot.expressions.Pow'>: <function _rename>, <class 'sqlglot.expressions.RegexpLike'>: <function _rename>, <class 'sqlglot.expressions.SafeDPipe'>: <function _rename>, <class 'sqlglot.expressions.SimilarTo'>: <function _rename>, <class 'sqlglot.expressions.Slice'>: <function _rename>, <class 'sqlglot.expressions.Sub'>: <function _rename>, <class 'sqlglot.expressions.Xor'>: <function _rename>, <class 'sqlglot.expressions.Abs'>: <function _rename>, <class 'sqlglot.expressions.AnyValue'>: <function _rename>, <class 'sqlglot.expressions.ApproxDistinct'>: <function _rename>, <class 'sqlglot.expressions.ApproxQuantile'>: <function _rename>, <class 'sqlglot.expressions.Array'>: <function inline_array_sql>, <class 'sqlglot.expressions.ArrayAgg'>: <function _rename>, <class 'sqlglot.expressions.ArrayAll'>: <function _rename>, <class 'sqlglot.expressions.ArrayAny'>: <function _rename>, <class 'sqlglot.expressions.ArrayConcat'>: <function _rename>, <class 'sqlglot.expressions.ArrayFilter'>: <function _rename>, <class 'sqlglot.expressions.ArrayJoin'>: <function _rename>, <class 'sqlglot.expressions.ArraySize'>: <function _rename>, <class 'sqlglot.expressions.ArraySort'>: <function _rename>, <class 'sqlglot.expressions.ArraySum'>: <function _rename>, <class 'sqlglot.expressions.ArrayUnionAgg'>: <function _rename>, <class 'sqlglot.expressions.Avg'>: <function _rename>, <class 'sqlglot.expressions.Case'>: <function _case_sql>, <class 'sqlglot.expressions.Cast'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.CastToStrType'>: <function _rename>, <class 'sqlglot.expressions.Ceil'>: <function _rename>, <class 'sqlglot.expressions.Coalesce'>: <function _rename>, <class 'sqlglot.expressions.Concat'>: <function _rename>, <class 'sqlglot.expressions.ConcatWs'>: <function _rename>, <class 'sqlglot.expressions.Count'>: <function _rename>, <class 'sqlglot.expressions.CountIf'>: <function _rename>, <class 'sqlglot.expressions.CurrentDate'>: <function _rename>, <class 'sqlglot.expressions.CurrentDatetime'>: <function _rename>, <class 'sqlglot.expressions.CurrentTime'>: <function _rename>, <class 'sqlglot.expressions.CurrentTimestamp'>: <function _rename>, <class 'sqlglot.expressions.CurrentUser'>: <function _rename>, <class 'sqlglot.expressions.Date'>: <function _rename>, <class 'sqlglot.expressions.DateAdd'>: <function _rename>, <class 'sqlglot.expressions.DateDiff'>: <function _rename>, <class 'sqlglot.expressions.DateFromParts'>: <function _rename>, <class 'sqlglot.expressions.DateStrToDate'>: <function _rename>, <class 'sqlglot.expressions.DateSub'>: <function _rename>, <class 'sqlglot.expressions.DateToDateStr'>: <function _rename>, <class 'sqlglot.expressions.DateToDi'>: <function _rename>, <class 'sqlglot.expressions.DateTrunc'>: <function _rename>, <class 'sqlglot.expressions.DatetimeAdd'>: <function _rename>, <class 'sqlglot.expressions.DatetimeDiff'>: <function _rename>, <class 'sqlglot.expressions.DatetimeSub'>: <function _rename>, <class 'sqlglot.expressions.DatetimeTrunc'>: <function _rename>, <class 'sqlglot.expressions.Day'>: <function _rename>, <class 'sqlglot.expressions.DayOfMonth'>: <function _rename>, <class 'sqlglot.expressions.DayOfWeek'>: <function _rename>, <class 'sqlglot.expressions.DayOfYear'>: <function _rename>, <class 'sqlglot.expressions.Decode'>: <function _rename>, <class 'sqlglot.expressions.DiToDate'>: <function _rename>, <class 'sqlglot.expressions.Encode'>: <function _rename>, <class 'sqlglot.expressions.Exp'>: <function _rename>, <class 'sqlglot.expressions.Explode'>: <function _rename>, <class 'sqlglot.expressions.Extract'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.Floor'>: <function _rename>, <class 'sqlglot.expressions.FromBase'>: <function _rename>, <class 'sqlglot.expressions.FromBase64'>: <function _rename>, <class 'sqlglot.expressions.GenerateSeries'>: <function _rename>, <class 'sqlglot.expressions.Greatest'>: <function _rename>, <class 'sqlglot.expressions.GroupConcat'>: <function _rename>, <class 'sqlglot.expressions.Hex'>: <function _rename>, <class 'sqlglot.expressions.Hll'>: <function _rename>, <class 'sqlglot.expressions.If'>: <function _rename>, <class 'sqlglot.expressions.Initcap'>: <function _rename>, <class 'sqlglot.expressions.JSONFormat'>: <function _rename>, <class 'sqlglot.expressions.JSONObject'>: <function _rename>, <class 'sqlglot.expressions.LastDateOfMonth'>: <function _rename>, <class 'sqlglot.expressions.Least'>: <function _rename>, <class 'sqlglot.expressions.Left'>: <function _rename>, <class 'sqlglot.expressions.Length'>: <function _rename>, <class 'sqlglot.expressions.Levenshtein'>: <function _rename>, <class 'sqlglot.expressions.Ln'>: <function _rename>, <class 'sqlglot.expressions.Log'>: <function _rename>, <class 'sqlglot.expressions.Log10'>: <function _rename>, <class 'sqlglot.expressions.Log2'>: <function _rename>, <class 'sqlglot.expressions.LogicalAnd'>: <function _rename>, <class 'sqlglot.expressions.LogicalOr'>: <function _rename>, <class 'sqlglot.expressions.Lower'>: <function _rename>, <class 'sqlglot.expressions.MD5'>: <function _rename>, <class 'sqlglot.expressions.MD5Digest'>: <function _rename>, <class 'sqlglot.expressions.Map'>: <function _rename>, <class 'sqlglot.expressions.MapFromEntries'>: <function _rename>, <class 'sqlglot.expressions.MatchAgainst'>: <function _rename>, <class 'sqlglot.expressions.Max'>: <function _rename>, <class 'sqlglot.expressions.Min'>: <function _rename>, <class 'sqlglot.expressions.Month'>: <function _rename>, <class 'sqlglot.expressions.MonthsBetween'>: <function _rename>, <class 'sqlglot.expressions.NextValueFor'>: <function _rename>, <class 'sqlglot.expressions.NumberToStr'>: <function _rename>, <class 'sqlglot.expressions.Nvl2'>: <function _rename>, <class 'sqlglot.expressions.OpenJSON'>: <function _rename>, <class 'sqlglot.expressions.ParameterizedAgg'>: <function _rename>, <class 'sqlglot.expressions.PercentileCont'>: <function _rename>, <class 'sqlglot.expressions.PercentileDisc'>: <function _rename>, <class 'sqlglot.expressions.Posexplode'>: <function _rename>, <class 'sqlglot.expressions.Quantile'>: <function _rename>, <class 'sqlglot.expressions.RangeN'>: <function _rename>, <class 'sqlglot.expressions.ReadCSV'>: <function _rename>, <class 'sqlglot.expressions.Reduce'>: <function _rename>, <class 'sqlglot.expressions.RegexpExtract'>: <function _rename>, <class 'sqlglot.expressions.RegexpILike'>: <function _rename>, <class 'sqlglot.expressions.RegexpReplace'>: <function _rename>, <class 'sqlglot.expressions.RegexpSplit'>: <function _rename>, <class 'sqlglot.expressions.Repeat'>: <function _rename>, <class 'sqlglot.expressions.Right'>: <function _rename>, <class 'sqlglot.expressions.Round'>: <function _rename>, <class 'sqlglot.expressions.RowNumber'>: <function _rename>, <class 'sqlglot.expressions.SHA'>: <function _rename>, <class 'sqlglot.expressions.SHA2'>: <function _rename>, <class 'sqlglot.expressions.SafeConcat'>: <function _rename>, <class 'sqlglot.expressions.SafeDivide'>: <function _rename>, <class 'sqlglot.expressions.SetAgg'>: <function _rename>, <class 'sqlglot.expressions.SortArray'>: <function _rename>, <class 'sqlglot.expressions.Split'>: <function _rename>, <class 'sqlglot.expressions.Sqrt'>: <function _rename>, <class 'sqlglot.expressions.StandardHash'>: <function _rename>, <class 'sqlglot.expressions.StarMap'>: <function _rename>, <class 'sqlglot.expressions.Stddev'>: <function _rename>, <class 'sqlglot.expressions.StddevPop'>: <function _rename>, <class 'sqlglot.expressions.StddevSamp'>: <function _rename>, <class 'sqlglot.expressions.StrPosition'>: <function _rename>, <class 'sqlglot.expressions.StrToDate'>: <function _rename>, <class 'sqlglot.expressions.StrToTime'>: <function _rename>, <class 'sqlglot.expressions.StrToUnix'>: <function _rename>, <class 'sqlglot.expressions.Struct'>: <function _rename>, <class 'sqlglot.expressions.StructExtract'>: <function _rename>, <class 'sqlglot.expressions.Substring'>: <function _rename>, <class 'sqlglot.expressions.Sum'>: <function _rename>, <class 'sqlglot.expressions.TimeAdd'>: <function _rename>, <class 'sqlglot.expressions.TimeDiff'>: <function _rename>, <class 'sqlglot.expressions.TimeStrToDate'>: <function _rename>, <class 'sqlglot.expressions.TimeStrToTime'>: <function _rename>, <class 'sqlglot.expressions.TimeStrToUnix'>: <function _rename>, <class 'sqlglot.expressions.TimeSub'>: <function _rename>, <class 'sqlglot.expressions.TimeToStr'>: <function _rename>, <class 'sqlglot.expressions.TimeToTimeStr'>: <function _rename>, <class 'sqlglot.expressions.TimeToUnix'>: <function _rename>, <class 'sqlglot.expressions.TimeTrunc'>: <function _rename>, <class 'sqlglot.expressions.TimestampAdd'>: <function _rename>, <class 'sqlglot.expressions.TimestampDiff'>: <function _rename>, <class 'sqlglot.expressions.TimestampSub'>: <function _rename>, <class 'sqlglot.expressions.TimestampTrunc'>: <function _rename>, <class 'sqlglot.expressions.ToBase64'>: <function _rename>, <class 'sqlglot.expressions.ToChar'>: <function _rename>, <class 'sqlglot.expressions.Transform'>: <function _rename>, <class 'sqlglot.expressions.Trim'>: <function _rename>, <class 'sqlglot.expressions.TryCast'>: <function _rename>, <class 'sqlglot.expressions.TsOrDiToDi'>: <function _rename>, <class 'sqlglot.expressions.TsOrDsAdd'>: <function _rename>, <class 'sqlglot.expressions.TsOrDsToDate'>: <function _rename>, <class 'sqlglot.expressions.TsOrDsToDateStr'>: <function _rename>, <class 'sqlglot.expressions.Unhex'>: <function _rename>, <class 'sqlglot.expressions.UnixToStr'>: <function _rename>, <class 'sqlglot.expressions.UnixToTime'>: <function _rename>, <class 'sqlglot.expressions.UnixToTimeStr'>: <function _rename>, <class 'sqlglot.expressions.Upper'>: <function _rename>, <class 'sqlglot.expressions.VarMap'>: <function _rename>, <class 'sqlglot.expressions.Variance'>: <function _rename>, <class 'sqlglot.expressions.VariancePop'>: <function _rename>, <class 'sqlglot.expressions.Week'>: <function _rename>, <class 'sqlglot.expressions.WeekOfYear'>: <function _rename>, <class 'sqlglot.expressions.When'>: <function _rename>, <class 'sqlglot.expressions.XMLTable'>: <function _rename>, <class 'sqlglot.expressions.Year'>: <function _rename>, <class 'sqlglot.expressions.Alias'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.Between'>: <function _rename>, <class 'sqlglot.expressions.Boolean'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.Column'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.Distinct'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.In'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.Interval'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.Lambda'>: <function _lambda_sql>, <class 'sqlglot.expressions.Not'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.Null'>: <function Python.Generator.<lambda>>, <class 'sqlglot.expressions.Ordered'>: <function _ordered_py>, <class 'sqlglot.expressions.Star'>: <function Python.Generator.<lambda>>}
@classmethod
def
can_identify(text: str, identify: str | bool = 'safe') -> bool:
248 @classmethod 249 def can_identify(cls, text: str, identify: str | bool = "safe") -> bool: 250 """Checks if text can be identified given an identify option. 251 252 Args: 253 text: The text to check. 254 identify: 255 "always" or `True`: Always returns true. 256 "safe": True if the identifier is case-insensitive. 257 258 Returns: 259 Whether or not the given text can be identified. 260 """ 261 if identify is True or identify == "always": 262 return True 263 264 if identify == "safe": 265 return not cls.case_sensitive(text) 266 267 return False
Checks if text can be identified given an identify option.
Arguments:
- text: The text to check.
- identify: "always" or
True
: Always returns true. "safe": True if the identifier is case-insensitive.
Returns:
Whether or not the given text can be identified.
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- LOCKING_READS_SUPPORTED
- EXPLICIT_UNION
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SIZE_IS_PERCENT
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- JOIN_HINTS
- TABLE_HINTS
- QUERY_HINTS
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- COLUMN_JOIN_MARKS_SUPPORTED
- TYPE_MAPPING
- STAR_MAPPING
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- PROPERTIES_LOCATION
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- SENTINEL_LINE_BREAK
- INDEX_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- IDENTIFIERS_CAN_START_WITH_DIGIT
- STRICT_STRING_CONCAT
- NORMALIZE_FUNCTIONS
- NULL_ORDERING
- ESCAPE_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- normalize_functions
- unsupported_messages
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypesize_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- pseudotype_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_having_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- safebracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- safeconcat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonobject_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- cast_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- safedpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql