sqlglot.dialects.databricks
1from __future__ import annotations 2 3from sqlglot import exp, transforms 4from sqlglot.dialects.dialect import parse_date_delta 5from sqlglot.dialects.spark import Spark 6from sqlglot.dialects.tsql import generate_date_delta_with_unit_sql 7from sqlglot.tokens import TokenType 8 9 10class Databricks(Spark): 11 class Parser(Spark.Parser): 12 LOG_DEFAULTS_TO_LN = True 13 14 FUNCTIONS = { 15 **Spark.Parser.FUNCTIONS, 16 "DATEADD": parse_date_delta(exp.DateAdd), 17 "DATE_ADD": parse_date_delta(exp.DateAdd), 18 "DATEDIFF": parse_date_delta(exp.DateDiff), 19 } 20 21 FACTOR = { 22 **Spark.Parser.FACTOR, 23 TokenType.COLON: exp.JSONExtract, 24 } 25 26 class Generator(Spark.Generator): 27 TRANSFORMS = { 28 **Spark.Generator.TRANSFORMS, 29 exp.DateAdd: generate_date_delta_with_unit_sql, 30 exp.DateDiff: generate_date_delta_with_unit_sql, 31 exp.JSONExtract: lambda self, e: self.binary(e, ":"), 32 exp.Select: transforms.preprocess( 33 [ 34 transforms.eliminate_distinct_on, 35 transforms.unnest_to_explode, 36 ] 37 ), 38 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 39 } 40 41 PARAMETER_TOKEN = "$" 42 43 class Tokenizer(Spark.Tokenizer): 44 HEX_STRINGS = [] 45 46 SINGLE_TOKENS = { 47 **Spark.Tokenizer.SINGLE_TOKENS, 48 "$": TokenType.PARAMETER, 49 }
11class Databricks(Spark): 12 class Parser(Spark.Parser): 13 LOG_DEFAULTS_TO_LN = True 14 15 FUNCTIONS = { 16 **Spark.Parser.FUNCTIONS, 17 "DATEADD": parse_date_delta(exp.DateAdd), 18 "DATE_ADD": parse_date_delta(exp.DateAdd), 19 "DATEDIFF": parse_date_delta(exp.DateDiff), 20 } 21 22 FACTOR = { 23 **Spark.Parser.FACTOR, 24 TokenType.COLON: exp.JSONExtract, 25 } 26 27 class Generator(Spark.Generator): 28 TRANSFORMS = { 29 **Spark.Generator.TRANSFORMS, 30 exp.DateAdd: generate_date_delta_with_unit_sql, 31 exp.DateDiff: generate_date_delta_with_unit_sql, 32 exp.JSONExtract: lambda self, e: self.binary(e, ":"), 33 exp.Select: transforms.preprocess( 34 [ 35 transforms.eliminate_distinct_on, 36 transforms.unnest_to_explode, 37 ] 38 ), 39 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 40 } 41 42 PARAMETER_TOKEN = "$" 43 44 class Tokenizer(Spark.Tokenizer): 45 HEX_STRINGS = [] 46 47 SINGLE_TOKENS = { 48 **Spark.Tokenizer.SINGLE_TOKENS, 49 "$": TokenType.PARAMETER, 50 }
12 class Parser(Spark.Parser): 13 LOG_DEFAULTS_TO_LN = True 14 15 FUNCTIONS = { 16 **Spark.Parser.FUNCTIONS, 17 "DATEADD": parse_date_delta(exp.DateAdd), 18 "DATE_ADD": parse_date_delta(exp.DateAdd), 19 "DATEDIFF": parse_date_delta(exp.DateDiff), 20 } 21 22 FACTOR = { 23 **Spark.Parser.FACTOR, 24 TokenType.COLON: exp.JSONExtract, 25 }
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: Determines the amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
27 class Generator(Spark.Generator): 28 TRANSFORMS = { 29 **Spark.Generator.TRANSFORMS, 30 exp.DateAdd: generate_date_delta_with_unit_sql, 31 exp.DateDiff: generate_date_delta_with_unit_sql, 32 exp.JSONExtract: lambda self, e: self.binary(e, ":"), 33 exp.Select: transforms.preprocess( 34 [ 35 transforms.eliminate_distinct_on, 36 transforms.unnest_to_explode, 37 ] 38 ), 39 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 40 } 41 42 PARAMETER_TOKEN = "$"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether or not to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether or not to normalize identifiers to lowercase. Default: False.
- pad: Determines the pad size in a formatted string. Default: 2.
- indent: Determines the indentation size in a formatted string. Default: 2.
- normalize_functions: Whether or not to normalize all function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Determines whether or not the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether or not to preserve comments in the output SQL code. Default: True
@classmethod
def
can_identify(text: str, identify: str | bool = 'safe') -> bool:
247 @classmethod 248 def can_identify(cls, text: str, identify: str | bool = "safe") -> bool: 249 """Checks if text can be identified given an identify option. 250 251 Args: 252 text: The text to check. 253 identify: 254 "always" or `True`: Always returns true. 255 "safe": True if the identifier is case-insensitive. 256 257 Returns: 258 Whether or not the given text can be identified. 259 """ 260 if identify is True or identify == "always": 261 return True 262 263 if identify == "safe": 264 return not cls.case_sensitive(text) 265 266 return False
Checks if text can be identified given an identify option.
Arguments:
- text: The text to check.
- identify: "always" or
True
: Always returns true. "safe": True if the identifier is case-insensitive.
Returns:
Whether or not the given text can be identified.
Inherited Members
- sqlglot.generator.Generator
- Generator
- generate
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- clone_sql
- describe_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- rawstring_sql
- datatypesize_sql
- directory_sql
- delete_sql
- drop_sql
- except_sql
- except_op
- fetch_sql
- filter_sql
- hint_sql
- index_sql
- identifier_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- insert_sql
- intersect_sql
- intersect_op
- introducer_sql
- pseudotype_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- table_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- group_sql
- having_sql
- join_sql
- lambda_sql
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognize_sql
- query_modifiers
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- union_sql
- union_op
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- safeconcat_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonobject_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- aliases_sql
- attimezone_sql
- add_sql
- and_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- renametable_sql
- altertable_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- intdiv_sql
- dpipe_sql
- safedpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- eq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- or_sql
- slice_sql
- sub_sql
- trycast_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- text_width
- format_time
- expressions
- op_expressions
- naked_property
- set_operation
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- oncluster_sql
44 class Tokenizer(Spark.Tokenizer): 45 HEX_STRINGS = [] 46 47 SINGLE_TOKENS = { 48 **Spark.Tokenizer.SINGLE_TOKENS, 49 "$": TokenType.PARAMETER, 50 }