sqlglot.helper
1from __future__ import annotations 2 3import inspect 4import logging 5import re 6import sys 7import typing as t 8from collections.abc import Collection 9from contextlib import contextmanager 10from copy import copy 11from enum import Enum 12 13if t.TYPE_CHECKING: 14 from sqlglot import exp 15 from sqlglot.expressions import Expression 16 17 T = t.TypeVar("T") 18 E = t.TypeVar("E", bound=Expression) 19 20CAMEL_CASE_PATTERN = re.compile("(?<!^)(?=[A-Z])") 21PYTHON_VERSION = sys.version_info[:2] 22logger = logging.getLogger("sqlglot") 23 24 25class AutoName(Enum): 26 """This is used for creating enum classes where `auto()` is the string form of the corresponding value's name.""" 27 28 def _generate_next_value_(name, _start, _count, _last_values): # type: ignore 29 return name 30 31 32def seq_get(seq: t.Sequence[T], index: int) -> t.Optional[T]: 33 """Returns the value in `seq` at position `index`, or `None` if `index` is out of bounds.""" 34 try: 35 return seq[index] 36 except IndexError: 37 return None 38 39 40@t.overload 41def ensure_list(value: t.Collection[T]) -> t.List[T]: 42 ... 43 44 45@t.overload 46def ensure_list(value: T) -> t.List[T]: 47 ... 48 49 50def ensure_list(value): 51 """ 52 Ensures that a value is a list, otherwise casts or wraps it into one. 53 54 Args: 55 value: the value of interest. 56 57 Returns: 58 The value cast as a list if it's a list or a tuple, or else the value wrapped in a list. 59 """ 60 if value is None: 61 return [] 62 elif isinstance(value, (list, tuple)): 63 return list(value) 64 65 return [value] 66 67 68@t.overload 69def ensure_collection(value: t.Collection[T]) -> t.Collection[T]: 70 ... 71 72 73@t.overload 74def ensure_collection(value: T) -> t.Collection[T]: 75 ... 76 77 78def ensure_collection(value): 79 """ 80 Ensures that a value is a collection (excluding `str` and `bytes`), otherwise wraps it into a list. 81 82 Args: 83 value: the value of interest. 84 85 Returns: 86 The value if it's a collection, or else the value wrapped in a list. 87 """ 88 if value is None: 89 return [] 90 return ( 91 value if isinstance(value, Collection) and not isinstance(value, (str, bytes)) else [value] 92 ) 93 94 95def csv(*args, sep: str = ", ") -> str: 96 """ 97 Formats any number of string arguments as CSV. 98 99 Args: 100 args: the string arguments to format. 101 sep: the argument separator. 102 103 Returns: 104 The arguments formatted as a CSV string. 105 """ 106 return sep.join(arg for arg in args if arg) 107 108 109def subclasses( 110 module_name: str, 111 classes: t.Type | t.Tuple[t.Type, ...], 112 exclude: t.Type | t.Tuple[t.Type, ...] = (), 113) -> t.List[t.Type]: 114 """ 115 Returns all subclasses for a collection of classes, possibly excluding some of them. 116 117 Args: 118 module_name: the name of the module to search for subclasses in. 119 classes: class(es) we want to find the subclasses of. 120 exclude: class(es) we want to exclude from the returned list. 121 122 Returns: 123 The target subclasses. 124 """ 125 return [ 126 obj 127 for _, obj in inspect.getmembers( 128 sys.modules[module_name], 129 lambda obj: inspect.isclass(obj) and issubclass(obj, classes) and obj not in exclude, 130 ) 131 ] 132 133 134def apply_index_offset(expressions: t.List[t.Optional[E]], offset: int) -> t.List[t.Optional[E]]: 135 """ 136 Applies an offset to a given integer literal expression. 137 138 Args: 139 expressions: the expression the offset will be applied to, wrapped in a list. 140 offset: the offset that will be applied. 141 142 Returns: 143 The original expression with the offset applied to it, wrapped in a list. If the provided 144 `expressions` argument contains more than one expressions, it's returned unaffected. 145 """ 146 if not offset or len(expressions) != 1: 147 return expressions 148 149 expression = expressions[0] 150 151 if expression and expression.is_int: 152 expression = expression.copy() 153 logger.warning("Applying array index offset (%s)", offset) 154 expression.args["this"] = str(int(expression.this) + offset) # type: ignore 155 return [expression] 156 157 return expressions 158 159 160def camel_to_snake_case(name: str) -> str: 161 """Converts `name` from camelCase to snake_case and returns the result.""" 162 return CAMEL_CASE_PATTERN.sub("_", name).upper() 163 164 165def while_changing( 166 expression: t.Optional[Expression], func: t.Callable[[t.Optional[Expression]], E] 167) -> E: 168 """ 169 Applies a transformation to a given expression until a fix point is reached. 170 171 Args: 172 expression: the expression to be transformed. 173 func: the transformation to be applied. 174 175 Returns: 176 The transformed expression. 177 """ 178 while True: 179 start = hash(expression) 180 expression = func(expression) 181 if start == hash(expression): 182 break 183 return expression 184 185 186def tsort(dag: t.Dict[T, t.List[T]]) -> t.List[T]: 187 """ 188 Sorts a given directed acyclic graph in topological order. 189 190 Args: 191 dag: the graph to be sorted. 192 193 Returns: 194 A list that contains all of the graph's nodes in topological order. 195 """ 196 result = [] 197 198 def visit(node: T, visited: t.Set[T]) -> None: 199 if node in result: 200 return 201 if node in visited: 202 raise ValueError("Cycle error") 203 204 visited.add(node) 205 206 for dep in dag.get(node, []): 207 visit(dep, visited) 208 209 visited.remove(node) 210 result.append(node) 211 212 for node in dag: 213 visit(node, set()) 214 215 return result 216 217 218def open_file(file_name: str) -> t.TextIO: 219 """Open a file that may be compressed as gzip and return it in universal newline mode.""" 220 with open(file_name, "rb") as f: 221 gzipped = f.read(2) == b"\x1f\x8b" 222 223 if gzipped: 224 import gzip 225 226 return gzip.open(file_name, "rt", newline="") 227 228 return open(file_name, encoding="utf-8", newline="") 229 230 231@contextmanager 232def csv_reader(read_csv: exp.ReadCSV) -> t.Any: 233 """ 234 Returns a csv reader given the expression `READ_CSV(name, ['delimiter', '|', ...])`. 235 236 Args: 237 read_csv: a `ReadCSV` function call 238 239 Yields: 240 A python csv reader. 241 """ 242 args = read_csv.expressions 243 file = open_file(read_csv.name) 244 245 delimiter = "," 246 args = iter(arg.name for arg in args) 247 for k, v in zip(args, args): 248 if k == "delimiter": 249 delimiter = v 250 251 try: 252 import csv as csv_ 253 254 yield csv_.reader(file, delimiter=delimiter) 255 finally: 256 file.close() 257 258 259def find_new_name(taken: t.Collection[str], base: str) -> str: 260 """ 261 Searches for a new name. 262 263 Args: 264 taken: a collection of taken names. 265 base: base name to alter. 266 267 Returns: 268 The new, available name. 269 """ 270 if base not in taken: 271 return base 272 273 i = 2 274 new = f"{base}_{i}" 275 while new in taken: 276 i += 1 277 new = f"{base}_{i}" 278 279 return new 280 281 282def object_to_dict(obj: t.Any, **kwargs) -> t.Dict: 283 """Returns a dictionary created from an object's attributes.""" 284 return {**{k: copy(v) for k, v in vars(obj).copy().items()}, **kwargs} 285 286 287def split_num_words( 288 value: str, sep: str, min_num_words: int, fill_from_start: bool = True 289) -> t.List[t.Optional[str]]: 290 """ 291 Perform a split on a value and return N words as a result with `None` used for words that don't exist. 292 293 Args: 294 value: the value to be split. 295 sep: the value to use to split on. 296 min_num_words: the minimum number of words that are going to be in the result. 297 fill_from_start: indicates that if `None` values should be inserted at the start or end of the list. 298 299 Examples: 300 >>> split_num_words("db.table", ".", 3) 301 [None, 'db', 'table'] 302 >>> split_num_words("db.table", ".", 3, fill_from_start=False) 303 ['db', 'table', None] 304 >>> split_num_words("db.table", ".", 1) 305 ['db', 'table'] 306 307 Returns: 308 The list of words returned by `split`, possibly augmented by a number of `None` values. 309 """ 310 words = value.split(sep) 311 if fill_from_start: 312 return [None] * (min_num_words - len(words)) + words 313 return words + [None] * (min_num_words - len(words)) 314 315 316def is_iterable(value: t.Any) -> bool: 317 """ 318 Checks if the value is an iterable, excluding the types `str` and `bytes`. 319 320 Examples: 321 >>> is_iterable([1,2]) 322 True 323 >>> is_iterable("test") 324 False 325 326 Args: 327 value: the value to check if it is an iterable. 328 329 Returns: 330 A `bool` value indicating if it is an iterable. 331 """ 332 return hasattr(value, "__iter__") and not isinstance(value, (str, bytes)) 333 334 335def flatten(values: t.Iterable[t.Iterable[t.Any] | t.Any]) -> t.Iterator[t.Any]: 336 """ 337 Flattens an iterable that can contain both iterable and non-iterable elements. Objects of 338 type `str` and `bytes` are not regarded as iterables. 339 340 Examples: 341 >>> list(flatten([[1, 2], 3, {4}, (5, "bla")])) 342 [1, 2, 3, 4, 5, 'bla'] 343 >>> list(flatten([1, 2, 3])) 344 [1, 2, 3] 345 346 Args: 347 values: the value to be flattened. 348 349 Yields: 350 Non-iterable elements in `values`. 351 """ 352 for value in values: 353 if is_iterable(value): 354 yield from flatten(value) 355 else: 356 yield value 357 358 359def count_params(function: t.Callable) -> int: 360 """ 361 Returns the number of formal parameters expected by a function, without counting "self" 362 and "cls", in case of instance and class methods, respectively. 363 """ 364 count = function.__code__.co_argcount 365 return count - 1 if inspect.ismethod(function) else count 366 367 368def dict_depth(d: t.Dict) -> int: 369 """ 370 Get the nesting depth of a dictionary. 371 372 For example: 373 >>> dict_depth(None) 374 0 375 >>> dict_depth({}) 376 1 377 >>> dict_depth({"a": "b"}) 378 1 379 >>> dict_depth({"a": {}}) 380 2 381 >>> dict_depth({"a": {"b": {}}}) 382 3 383 384 Args: 385 d (dict): dictionary 386 387 Returns: 388 int: depth 389 """ 390 try: 391 return 1 + dict_depth(next(iter(d.values()))) 392 except AttributeError: 393 # d doesn't have attribute "values" 394 return 0 395 except StopIteration: 396 # d.values() returns an empty sequence 397 return 1 398 399 400def first(it: t.Iterable[T]) -> T: 401 """Returns the first element from an iterable. 402 403 Useful for sets. 404 """ 405 return next(i for i in it)
26class AutoName(Enum): 27 """This is used for creating enum classes where `auto()` is the string form of the corresponding value's name.""" 28 29 def _generate_next_value_(name, _start, _count, _last_values): # type: ignore 30 return name
This is used for creating enum classes where auto()
is the string form of the corresponding value's name.
Inherited Members
- enum.Enum
- name
- value
33def seq_get(seq: t.Sequence[T], index: int) -> t.Optional[T]: 34 """Returns the value in `seq` at position `index`, or `None` if `index` is out of bounds.""" 35 try: 36 return seq[index] 37 except IndexError: 38 return None
Returns the value in seq
at position index
, or None
if index
is out of bounds.
51def ensure_list(value): 52 """ 53 Ensures that a value is a list, otherwise casts or wraps it into one. 54 55 Args: 56 value: the value of interest. 57 58 Returns: 59 The value cast as a list if it's a list or a tuple, or else the value wrapped in a list. 60 """ 61 if value is None: 62 return [] 63 elif isinstance(value, (list, tuple)): 64 return list(value) 65 66 return [value]
Ensures that a value is a list, otherwise casts or wraps it into one.
Arguments:
- value: the value of interest.
Returns:
The value cast as a list if it's a list or a tuple, or else the value wrapped in a list.
79def ensure_collection(value): 80 """ 81 Ensures that a value is a collection (excluding `str` and `bytes`), otherwise wraps it into a list. 82 83 Args: 84 value: the value of interest. 85 86 Returns: 87 The value if it's a collection, or else the value wrapped in a list. 88 """ 89 if value is None: 90 return [] 91 return ( 92 value if isinstance(value, Collection) and not isinstance(value, (str, bytes)) else [value] 93 )
Ensures that a value is a collection (excluding str
and bytes
), otherwise wraps it into a list.
Arguments:
- value: the value of interest.
Returns:
The value if it's a collection, or else the value wrapped in a list.
96def csv(*args, sep: str = ", ") -> str: 97 """ 98 Formats any number of string arguments as CSV. 99 100 Args: 101 args: the string arguments to format. 102 sep: the argument separator. 103 104 Returns: 105 The arguments formatted as a CSV string. 106 """ 107 return sep.join(arg for arg in args if arg)
Formats any number of string arguments as CSV.
Arguments:
- args: the string arguments to format.
- sep: the argument separator.
Returns:
The arguments formatted as a CSV string.
110def subclasses( 111 module_name: str, 112 classes: t.Type | t.Tuple[t.Type, ...], 113 exclude: t.Type | t.Tuple[t.Type, ...] = (), 114) -> t.List[t.Type]: 115 """ 116 Returns all subclasses for a collection of classes, possibly excluding some of them. 117 118 Args: 119 module_name: the name of the module to search for subclasses in. 120 classes: class(es) we want to find the subclasses of. 121 exclude: class(es) we want to exclude from the returned list. 122 123 Returns: 124 The target subclasses. 125 """ 126 return [ 127 obj 128 for _, obj in inspect.getmembers( 129 sys.modules[module_name], 130 lambda obj: inspect.isclass(obj) and issubclass(obj, classes) and obj not in exclude, 131 ) 132 ]
Returns all subclasses for a collection of classes, possibly excluding some of them.
Arguments:
- module_name: the name of the module to search for subclasses in.
- classes: class(es) we want to find the subclasses of.
- exclude: class(es) we want to exclude from the returned list.
Returns:
The target subclasses.
135def apply_index_offset(expressions: t.List[t.Optional[E]], offset: int) -> t.List[t.Optional[E]]: 136 """ 137 Applies an offset to a given integer literal expression. 138 139 Args: 140 expressions: the expression the offset will be applied to, wrapped in a list. 141 offset: the offset that will be applied. 142 143 Returns: 144 The original expression with the offset applied to it, wrapped in a list. If the provided 145 `expressions` argument contains more than one expressions, it's returned unaffected. 146 """ 147 if not offset or len(expressions) != 1: 148 return expressions 149 150 expression = expressions[0] 151 152 if expression and expression.is_int: 153 expression = expression.copy() 154 logger.warning("Applying array index offset (%s)", offset) 155 expression.args["this"] = str(int(expression.this) + offset) # type: ignore 156 return [expression] 157 158 return expressions
Applies an offset to a given integer literal expression.
Arguments:
- expressions: the expression the offset will be applied to, wrapped in a list.
- offset: the offset that will be applied.
Returns:
The original expression with the offset applied to it, wrapped in a list. If the provided
expressions
argument contains more than one expressions, it's returned unaffected.
161def camel_to_snake_case(name: str) -> str: 162 """Converts `name` from camelCase to snake_case and returns the result.""" 163 return CAMEL_CASE_PATTERN.sub("_", name).upper()
Converts name
from camelCase to snake_case and returns the result.
166def while_changing( 167 expression: t.Optional[Expression], func: t.Callable[[t.Optional[Expression]], E] 168) -> E: 169 """ 170 Applies a transformation to a given expression until a fix point is reached. 171 172 Args: 173 expression: the expression to be transformed. 174 func: the transformation to be applied. 175 176 Returns: 177 The transformed expression. 178 """ 179 while True: 180 start = hash(expression) 181 expression = func(expression) 182 if start == hash(expression): 183 break 184 return expression
Applies a transformation to a given expression until a fix point is reached.
Arguments:
- expression: the expression to be transformed.
- func: the transformation to be applied.
Returns:
The transformed expression.
187def tsort(dag: t.Dict[T, t.List[T]]) -> t.List[T]: 188 """ 189 Sorts a given directed acyclic graph in topological order. 190 191 Args: 192 dag: the graph to be sorted. 193 194 Returns: 195 A list that contains all of the graph's nodes in topological order. 196 """ 197 result = [] 198 199 def visit(node: T, visited: t.Set[T]) -> None: 200 if node in result: 201 return 202 if node in visited: 203 raise ValueError("Cycle error") 204 205 visited.add(node) 206 207 for dep in dag.get(node, []): 208 visit(dep, visited) 209 210 visited.remove(node) 211 result.append(node) 212 213 for node in dag: 214 visit(node, set()) 215 216 return result
Sorts a given directed acyclic graph in topological order.
Arguments:
- dag: the graph to be sorted.
Returns:
A list that contains all of the graph's nodes in topological order.
219def open_file(file_name: str) -> t.TextIO: 220 """Open a file that may be compressed as gzip and return it in universal newline mode.""" 221 with open(file_name, "rb") as f: 222 gzipped = f.read(2) == b"\x1f\x8b" 223 224 if gzipped: 225 import gzip 226 227 return gzip.open(file_name, "rt", newline="") 228 229 return open(file_name, encoding="utf-8", newline="")
Open a file that may be compressed as gzip and return it in universal newline mode.
232@contextmanager 233def csv_reader(read_csv: exp.ReadCSV) -> t.Any: 234 """ 235 Returns a csv reader given the expression `READ_CSV(name, ['delimiter', '|', ...])`. 236 237 Args: 238 read_csv: a `ReadCSV` function call 239 240 Yields: 241 A python csv reader. 242 """ 243 args = read_csv.expressions 244 file = open_file(read_csv.name) 245 246 delimiter = "," 247 args = iter(arg.name for arg in args) 248 for k, v in zip(args, args): 249 if k == "delimiter": 250 delimiter = v 251 252 try: 253 import csv as csv_ 254 255 yield csv_.reader(file, delimiter=delimiter) 256 finally: 257 file.close()
Returns a csv reader given the expression READ_CSV(name, ['delimiter', '|', ...])
.
Arguments:
- read_csv: a
ReadCSV
function call
Yields:
A python csv reader.
260def find_new_name(taken: t.Collection[str], base: str) -> str: 261 """ 262 Searches for a new name. 263 264 Args: 265 taken: a collection of taken names. 266 base: base name to alter. 267 268 Returns: 269 The new, available name. 270 """ 271 if base not in taken: 272 return base 273 274 i = 2 275 new = f"{base}_{i}" 276 while new in taken: 277 i += 1 278 new = f"{base}_{i}" 279 280 return new
Searches for a new name.
Arguments:
- taken: a collection of taken names.
- base: base name to alter.
Returns:
The new, available name.
283def object_to_dict(obj: t.Any, **kwargs) -> t.Dict: 284 """Returns a dictionary created from an object's attributes.""" 285 return {**{k: copy(v) for k, v in vars(obj).copy().items()}, **kwargs}
Returns a dictionary created from an object's attributes.
288def split_num_words( 289 value: str, sep: str, min_num_words: int, fill_from_start: bool = True 290) -> t.List[t.Optional[str]]: 291 """ 292 Perform a split on a value and return N words as a result with `None` used for words that don't exist. 293 294 Args: 295 value: the value to be split. 296 sep: the value to use to split on. 297 min_num_words: the minimum number of words that are going to be in the result. 298 fill_from_start: indicates that if `None` values should be inserted at the start or end of the list. 299 300 Examples: 301 >>> split_num_words("db.table", ".", 3) 302 [None, 'db', 'table'] 303 >>> split_num_words("db.table", ".", 3, fill_from_start=False) 304 ['db', 'table', None] 305 >>> split_num_words("db.table", ".", 1) 306 ['db', 'table'] 307 308 Returns: 309 The list of words returned by `split`, possibly augmented by a number of `None` values. 310 """ 311 words = value.split(sep) 312 if fill_from_start: 313 return [None] * (min_num_words - len(words)) + words 314 return words + [None] * (min_num_words - len(words))
Perform a split on a value and return N words as a result with None
used for words that don't exist.
Arguments:
- value: the value to be split.
- sep: the value to use to split on.
- min_num_words: the minimum number of words that are going to be in the result.
- fill_from_start: indicates that if
None
values should be inserted at the start or end of the list.
Examples:
>>> split_num_words("db.table", ".", 3) [None, 'db', 'table'] >>> split_num_words("db.table", ".", 3, fill_from_start=False) ['db', 'table', None] >>> split_num_words("db.table", ".", 1) ['db', 'table']
Returns:
The list of words returned by
split
, possibly augmented by a number ofNone
values.
317def is_iterable(value: t.Any) -> bool: 318 """ 319 Checks if the value is an iterable, excluding the types `str` and `bytes`. 320 321 Examples: 322 >>> is_iterable([1,2]) 323 True 324 >>> is_iterable("test") 325 False 326 327 Args: 328 value: the value to check if it is an iterable. 329 330 Returns: 331 A `bool` value indicating if it is an iterable. 332 """ 333 return hasattr(value, "__iter__") and not isinstance(value, (str, bytes))
Checks if the value is an iterable, excluding the types str
and bytes
.
Examples:
>>> is_iterable([1,2]) True >>> is_iterable("test") False
Arguments:
- value: the value to check if it is an iterable.
Returns:
A
bool
value indicating if it is an iterable.
336def flatten(values: t.Iterable[t.Iterable[t.Any] | t.Any]) -> t.Iterator[t.Any]: 337 """ 338 Flattens an iterable that can contain both iterable and non-iterable elements. Objects of 339 type `str` and `bytes` are not regarded as iterables. 340 341 Examples: 342 >>> list(flatten([[1, 2], 3, {4}, (5, "bla")])) 343 [1, 2, 3, 4, 5, 'bla'] 344 >>> list(flatten([1, 2, 3])) 345 [1, 2, 3] 346 347 Args: 348 values: the value to be flattened. 349 350 Yields: 351 Non-iterable elements in `values`. 352 """ 353 for value in values: 354 if is_iterable(value): 355 yield from flatten(value) 356 else: 357 yield value
Flattens an iterable that can contain both iterable and non-iterable elements. Objects of
type str
and bytes
are not regarded as iterables.
Examples:
>>> list(flatten([[1, 2], 3, {4}, (5, "bla")])) [1, 2, 3, 4, 5, 'bla'] >>> list(flatten([1, 2, 3])) [1, 2, 3]
Arguments:
- values: the value to be flattened.
Yields:
Non-iterable elements in
values
.
360def count_params(function: t.Callable) -> int: 361 """ 362 Returns the number of formal parameters expected by a function, without counting "self" 363 and "cls", in case of instance and class methods, respectively. 364 """ 365 count = function.__code__.co_argcount 366 return count - 1 if inspect.ismethod(function) else count
Returns the number of formal parameters expected by a function, without counting "self" and "cls", in case of instance and class methods, respectively.
369def dict_depth(d: t.Dict) -> int: 370 """ 371 Get the nesting depth of a dictionary. 372 373 For example: 374 >>> dict_depth(None) 375 0 376 >>> dict_depth({}) 377 1 378 >>> dict_depth({"a": "b"}) 379 1 380 >>> dict_depth({"a": {}}) 381 2 382 >>> dict_depth({"a": {"b": {}}}) 383 3 384 385 Args: 386 d (dict): dictionary 387 388 Returns: 389 int: depth 390 """ 391 try: 392 return 1 + dict_depth(next(iter(d.values()))) 393 except AttributeError: 394 # d doesn't have attribute "values" 395 return 0 396 except StopIteration: 397 # d.values() returns an empty sequence 398 return 1
Get the nesting depth of a dictionary.
For example:
>>> dict_depth(None) 0 >>> dict_depth({}) 1 >>> dict_depth({"a": "b"}) 1 >>> dict_depth({"a": {}}) 2 >>> dict_depth({"a": {"b": {}}}) 3
Arguments:
- d (dict): dictionary
Returns:
int: depth
401def first(it: t.Iterable[T]) -> T: 402 """Returns the first element from an iterable. 403 404 Useful for sets. 405 """ 406 return next(i for i in it)
Returns the first element from an iterable.
Useful for sets.