sqlglot.dialects.bigquery
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6 7from sqlglot import exp, generator, parser, tokens, transforms 8from sqlglot._typing import E 9from sqlglot.dialects.dialect import ( 10 Dialect, 11 NormalizationStrategy, 12 annotate_with_type_lambda, 13 arg_max_or_min_no_count, 14 binary_from_function, 15 date_add_interval_sql, 16 datestrtodate_sql, 17 build_formatted_time, 18 filter_array_using_unnest, 19 if_sql, 20 inline_array_unless_query, 21 max_or_greatest, 22 min_or_least, 23 no_ilike_sql, 24 build_date_delta_with_interval, 25 regexp_replace_sql, 26 rename_func, 27 sha256_sql, 28 timestrtotime_sql, 29 ts_or_ds_add_cast, 30 unit_to_var, 31 strposition_sql, 32 groupconcat_sql, 33 space_sql, 34) 35from sqlglot.helper import seq_get, split_num_words 36from sqlglot.tokens import TokenType 37from sqlglot.generator import unsupported_args 38 39if t.TYPE_CHECKING: 40 from sqlglot._typing import Lit 41 42 from sqlglot.optimizer.annotate_types import TypeAnnotator 43 44logger = logging.getLogger("sqlglot") 45 46 47JSON_EXTRACT_TYPE = t.Union[exp.JSONExtract, exp.JSONExtractScalar, exp.JSONExtractArray] 48 49DQUOTES_ESCAPING_JSON_FUNCTIONS = ("JSON_QUERY", "JSON_VALUE", "JSON_QUERY_ARRAY") 50 51 52def _derived_table_values_to_unnest(self: BigQuery.Generator, expression: exp.Values) -> str: 53 if not expression.find_ancestor(exp.From, exp.Join): 54 return self.values_sql(expression) 55 56 structs = [] 57 alias = expression.args.get("alias") 58 for tup in expression.find_all(exp.Tuple): 59 field_aliases = ( 60 alias.columns 61 if alias and alias.columns 62 else (f"_c{i}" for i in range(len(tup.expressions))) 63 ) 64 expressions = [ 65 exp.PropertyEQ(this=exp.to_identifier(name), expression=fld) 66 for name, fld in zip(field_aliases, tup.expressions) 67 ] 68 structs.append(exp.Struct(expressions=expressions)) 69 70 # Due to `UNNEST_COLUMN_ONLY`, it is expected that the table alias be contained in the columns expression 71 alias_name_only = exp.TableAlias(columns=[alias.this]) if alias else None 72 return self.unnest_sql( 73 exp.Unnest(expressions=[exp.array(*structs, copy=False)], alias=alias_name_only) 74 ) 75 76 77def _returnsproperty_sql(self: BigQuery.Generator, expression: exp.ReturnsProperty) -> str: 78 this = expression.this 79 if isinstance(this, exp.Schema): 80 this = f"{self.sql(this, 'this')} <{self.expressions(this)}>" 81 else: 82 this = self.sql(this) 83 return f"RETURNS {this}" 84 85 86def _create_sql(self: BigQuery.Generator, expression: exp.Create) -> str: 87 returns = expression.find(exp.ReturnsProperty) 88 if expression.kind == "FUNCTION" and returns and returns.args.get("is_table"): 89 expression.set("kind", "TABLE FUNCTION") 90 91 if isinstance(expression.expression, (exp.Subquery, exp.Literal)): 92 expression.set("expression", expression.expression.this) 93 94 return self.create_sql(expression) 95 96 97# https://issuetracker.google.com/issues/162294746 98# workaround for bigquery bug when grouping by an expression and then ordering 99# WITH x AS (SELECT 1 y) 100# SELECT y + 1 z 101# FROM x 102# GROUP BY x + 1 103# ORDER by z 104def _alias_ordered_group(expression: exp.Expression) -> exp.Expression: 105 if isinstance(expression, exp.Select): 106 group = expression.args.get("group") 107 order = expression.args.get("order") 108 109 if group and order: 110 aliases = { 111 select.this: select.args["alias"] 112 for select in expression.selects 113 if isinstance(select, exp.Alias) 114 } 115 116 for grouped in group.expressions: 117 if grouped.is_int: 118 continue 119 alias = aliases.get(grouped) 120 if alias: 121 grouped.replace(exp.column(alias)) 122 123 return expression 124 125 126def _pushdown_cte_column_names(expression: exp.Expression) -> exp.Expression: 127 """BigQuery doesn't allow column names when defining a CTE, so we try to push them down.""" 128 if isinstance(expression, exp.CTE) and expression.alias_column_names: 129 cte_query = expression.this 130 131 if cte_query.is_star: 132 logger.warning( 133 "Can't push down CTE column names for star queries. Run the query through" 134 " the optimizer or use 'qualify' to expand the star projections first." 135 ) 136 return expression 137 138 column_names = expression.alias_column_names 139 expression.args["alias"].set("columns", None) 140 141 for name, select in zip(column_names, cte_query.selects): 142 to_replace = select 143 144 if isinstance(select, exp.Alias): 145 select = select.this 146 147 # Inner aliases are shadowed by the CTE column names 148 to_replace.replace(exp.alias_(select, name)) 149 150 return expression 151 152 153def _build_parse_timestamp(args: t.List) -> exp.StrToTime: 154 this = build_formatted_time(exp.StrToTime, "bigquery")([seq_get(args, 1), seq_get(args, 0)]) 155 this.set("zone", seq_get(args, 2)) 156 return this 157 158 159def _build_timestamp(args: t.List) -> exp.Timestamp: 160 timestamp = exp.Timestamp.from_arg_list(args) 161 timestamp.set("with_tz", True) 162 return timestamp 163 164 165def _build_date(args: t.List) -> exp.Date | exp.DateFromParts: 166 expr_type = exp.DateFromParts if len(args) == 3 else exp.Date 167 return expr_type.from_arg_list(args) 168 169 170def _build_to_hex(args: t.List) -> exp.Hex | exp.MD5: 171 # TO_HEX(MD5(..)) is common in BigQuery, so it's parsed into MD5 to simplify its transpilation 172 arg = seq_get(args, 0) 173 return exp.MD5(this=arg.this) if isinstance(arg, exp.MD5Digest) else exp.LowerHex(this=arg) 174 175 176def _array_contains_sql(self: BigQuery.Generator, expression: exp.ArrayContains) -> str: 177 return self.sql( 178 exp.Exists( 179 this=exp.select("1") 180 .from_(exp.Unnest(expressions=[expression.left]).as_("_unnest", table=["_col"])) 181 .where(exp.column("_col").eq(expression.right)) 182 ) 183 ) 184 185 186def _ts_or_ds_add_sql(self: BigQuery.Generator, expression: exp.TsOrDsAdd) -> str: 187 return date_add_interval_sql("DATE", "ADD")(self, ts_or_ds_add_cast(expression)) 188 189 190def _ts_or_ds_diff_sql(self: BigQuery.Generator, expression: exp.TsOrDsDiff) -> str: 191 expression.this.replace(exp.cast(expression.this, exp.DataType.Type.TIMESTAMP)) 192 expression.expression.replace(exp.cast(expression.expression, exp.DataType.Type.TIMESTAMP)) 193 unit = unit_to_var(expression) 194 return self.func("DATE_DIFF", expression.this, expression.expression, unit) 195 196 197def _unix_to_time_sql(self: BigQuery.Generator, expression: exp.UnixToTime) -> str: 198 scale = expression.args.get("scale") 199 timestamp = expression.this 200 201 if scale in (None, exp.UnixToTime.SECONDS): 202 return self.func("TIMESTAMP_SECONDS", timestamp) 203 if scale == exp.UnixToTime.MILLIS: 204 return self.func("TIMESTAMP_MILLIS", timestamp) 205 if scale == exp.UnixToTime.MICROS: 206 return self.func("TIMESTAMP_MICROS", timestamp) 207 208 unix_seconds = exp.cast( 209 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 210 ) 211 return self.func("TIMESTAMP_SECONDS", unix_seconds) 212 213 214def _build_time(args: t.List) -> exp.Func: 215 if len(args) == 1: 216 return exp.TsOrDsToTime(this=args[0]) 217 if len(args) == 2: 218 return exp.Time.from_arg_list(args) 219 return exp.TimeFromParts.from_arg_list(args) 220 221 222def _build_datetime(args: t.List) -> exp.Func: 223 if len(args) == 1: 224 return exp.TsOrDsToDatetime.from_arg_list(args) 225 if len(args) == 2: 226 return exp.Datetime.from_arg_list(args) 227 return exp.TimestampFromParts.from_arg_list(args) 228 229 230def _build_regexp_extract( 231 expr_type: t.Type[E], default_group: t.Optional[exp.Expression] = None 232) -> t.Callable[[t.List], E]: 233 def _builder(args: t.List) -> E: 234 try: 235 group = re.compile(args[1].name).groups == 1 236 except re.error: 237 group = False 238 239 # Default group is used for the transpilation of REGEXP_EXTRACT_ALL 240 return expr_type( 241 this=seq_get(args, 0), 242 expression=seq_get(args, 1), 243 position=seq_get(args, 2), 244 occurrence=seq_get(args, 3), 245 group=exp.Literal.number(1) if group else default_group, 246 ) 247 248 return _builder 249 250 251def _build_extract_json_with_default_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 252 def _builder(args: t.List, dialect: Dialect) -> E: 253 if len(args) == 1: 254 # The default value for the JSONPath is '$' i.e all of the data 255 args.append(exp.Literal.string("$")) 256 return parser.build_extract_json_with_path(expr_type)(args, dialect) 257 258 return _builder 259 260 261def _str_to_datetime_sql( 262 self: BigQuery.Generator, expression: exp.StrToDate | exp.StrToTime 263) -> str: 264 this = self.sql(expression, "this") 265 dtype = "DATE" if isinstance(expression, exp.StrToDate) else "TIMESTAMP" 266 267 if expression.args.get("safe"): 268 fmt = self.format_time( 269 expression, 270 self.dialect.INVERSE_FORMAT_MAPPING, 271 self.dialect.INVERSE_FORMAT_TRIE, 272 ) 273 return f"SAFE_CAST({this} AS {dtype} FORMAT {fmt})" 274 275 fmt = self.format_time(expression) 276 return self.func(f"PARSE_{dtype}", fmt, this, expression.args.get("zone")) 277 278 279def _annotate_math_functions(self: TypeAnnotator, expression: E) -> E: 280 """ 281 Many BigQuery math functions such as CEIL, FLOOR etc follow this return type convention: 282 +---------+---------+---------+------------+---------+ 283 | INPUT | INT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 284 +---------+---------+---------+------------+---------+ 285 | OUTPUT | FLOAT64 | NUMERIC | BIGNUMERIC | FLOAT64 | 286 +---------+---------+---------+------------+---------+ 287 """ 288 self._annotate_args(expression) 289 290 this: exp.Expression = expression.this 291 292 self._set_type( 293 expression, 294 exp.DataType.Type.DOUBLE if this.is_type(*exp.DataType.INTEGER_TYPES) else this.type, 295 ) 296 return expression 297 298 299@unsupported_args("ins_cost", "del_cost", "sub_cost") 300def _levenshtein_sql(self: BigQuery.Generator, expression: exp.Levenshtein) -> str: 301 max_dist = expression.args.get("max_dist") 302 if max_dist: 303 max_dist = exp.Kwarg(this=exp.var("max_distance"), expression=max_dist) 304 305 return self.func("EDIT_DISTANCE", expression.this, expression.expression, max_dist) 306 307 308def _build_levenshtein(args: t.List) -> exp.Levenshtein: 309 max_dist = seq_get(args, 2) 310 return exp.Levenshtein( 311 this=seq_get(args, 0), 312 expression=seq_get(args, 1), 313 max_dist=max_dist.expression if max_dist else None, 314 ) 315 316 317def _build_format_time(expr_type: t.Type[exp.Expression]) -> t.Callable[[t.List], exp.TimeToStr]: 318 def _builder(args: t.List) -> exp.TimeToStr: 319 return exp.TimeToStr( 320 this=expr_type(this=seq_get(args, 1)), 321 format=seq_get(args, 0), 322 zone=seq_get(args, 2), 323 ) 324 325 return _builder 326 327 328def _build_contains_substring(args: t.List) -> exp.Contains | exp.Anonymous: 329 if len(args) == 3: 330 return exp.Anonymous(this="CONTAINS_SUBSTR", expressions=args) 331 332 # Lowercase the operands in case of transpilation, as exp.Contains 333 # is case-sensitive on other dialects 334 this = exp.Lower(this=seq_get(args, 0)) 335 expr = exp.Lower(this=seq_get(args, 1)) 336 337 return exp.Contains(this=this, expression=expr) 338 339 340def _json_extract_sql(self: BigQuery.Generator, expression: JSON_EXTRACT_TYPE) -> str: 341 name = (expression._meta and expression.meta.get("name")) or expression.sql_name() 342 upper = name.upper() 343 344 dquote_escaping = upper in DQUOTES_ESCAPING_JSON_FUNCTIONS 345 346 if dquote_escaping: 347 self._quote_json_path_key_using_brackets = False 348 349 sql = rename_func(upper)(self, expression) 350 351 if dquote_escaping: 352 self._quote_json_path_key_using_brackets = True 353 354 return sql 355 356 357def _annotate_concat(self: TypeAnnotator, expression: exp.Concat) -> exp.Concat: 358 annotated = self._annotate_by_args(expression, "expressions") 359 360 # Args must be BYTES or types that can be cast to STRING, return type is either BYTES or STRING 361 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#concat 362 if not annotated.is_type(exp.DataType.Type.BINARY, exp.DataType.Type.UNKNOWN): 363 annotated.type = exp.DataType.Type.VARCHAR 364 365 return annotated 366 367 368def _annotate_array(self: TypeAnnotator, expression: exp.Array) -> exp.Array: 369 array_args = expression.expressions 370 371 # BigQuery behaves as follows: 372 # 373 # SELECT t, TYPEOF(t) FROM (SELECT 'foo') AS t -- foo, STRUCT<STRING> 374 # SELECT ARRAY(SELECT 'foo'), TYPEOF(ARRAY(SELECT 'foo')) -- foo, ARRAY<STRING> 375 if ( 376 len(array_args) == 1 377 and isinstance(select := array_args[0].unnest(), exp.Select) 378 and (query_type := select.meta.get("query_type")) is not None 379 and query_type.is_type(exp.DataType.Type.STRUCT) 380 and len(query_type.expressions) == 1 381 and isinstance(col_def := query_type.expressions[0], exp.ColumnDef) 382 and (projection_type := col_def.kind) is not None 383 and not projection_type.is_type(exp.DataType.Type.UNKNOWN) 384 ): 385 array_type = exp.DataType( 386 this=exp.DataType.Type.ARRAY, 387 expressions=[projection_type.copy()], 388 nested=True, 389 ) 390 return self._annotate_with_type(expression, array_type) 391 392 return self._annotate_by_args(expression, "expressions", array=True) 393 394 395class BigQuery(Dialect): 396 WEEK_OFFSET = -1 397 UNNEST_COLUMN_ONLY = True 398 SUPPORTS_USER_DEFINED_TYPES = False 399 SUPPORTS_SEMI_ANTI_JOIN = False 400 LOG_BASE_FIRST = False 401 HEX_LOWERCASE = True 402 FORCE_EARLY_ALIAS_REF_EXPANSION = True 403 PRESERVE_ORIGINAL_NAMES = True 404 HEX_STRING_IS_INTEGER_TYPE = True 405 406 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 407 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 408 409 # bigquery udfs are case sensitive 410 NORMALIZE_FUNCTIONS = False 411 412 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 413 TIME_MAPPING = { 414 "%D": "%m/%d/%y", 415 "%E6S": "%S.%f", 416 "%e": "%-d", 417 } 418 419 FORMAT_MAPPING = { 420 "DD": "%d", 421 "MM": "%m", 422 "MON": "%b", 423 "MONTH": "%B", 424 "YYYY": "%Y", 425 "YY": "%y", 426 "HH": "%I", 427 "HH12": "%I", 428 "HH24": "%H", 429 "MI": "%M", 430 "SS": "%S", 431 "SSSSS": "%f", 432 "TZH": "%z", 433 } 434 435 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 436 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 437 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 438 439 # All set operations require either a DISTINCT or ALL specifier 440 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 441 442 # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types 443 TYPE_TO_EXPRESSIONS = { 444 **Dialect.TYPE_TO_EXPRESSIONS, 445 exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP], 446 } 447 TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP) 448 449 ANNOTATORS = { 450 **Dialect.ANNOTATORS, 451 **{ 452 expr_type: annotate_with_type_lambda(data_type) 453 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 454 for expr_type in expressions 455 }, 456 **{ 457 expr_type: lambda self, e: _annotate_math_functions(self, e) 458 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 459 }, 460 **{ 461 expr_type: lambda self, e: self._annotate_by_args(e, "this") 462 for expr_type in ( 463 exp.Left, 464 exp.Right, 465 exp.Lower, 466 exp.Upper, 467 exp.Pad, 468 exp.Trim, 469 exp.RegexpExtract, 470 exp.RegexpReplace, 471 exp.Repeat, 472 exp.Substring, 473 ) 474 }, 475 exp.Array: _annotate_array, 476 exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"), 477 exp.Ascii: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 478 exp.BitwiseAndAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 479 exp.BitwiseOrAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 480 exp.BitwiseXorAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 481 exp.BitwiseCountAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 482 exp.Concat: _annotate_concat, 483 exp.Corr: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 484 exp.CovarPop: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 485 exp.CovarSamp: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 486 exp.JSONArray: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON), 487 exp.JSONExtractScalar: lambda self, e: self._annotate_with_type( 488 e, exp.DataType.Type.VARCHAR 489 ), 490 exp.JSONValueArray: lambda self, e: self._annotate_with_type( 491 e, exp.DataType.build("ARRAY<VARCHAR>") 492 ), 493 exp.JSONType: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 494 exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"), 495 exp.SHA: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 496 exp.SHA2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 497 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 498 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 499 exp.TimestampFromParts: lambda self, e: self._annotate_with_type( 500 e, exp.DataType.Type.DATETIME 501 ), 502 exp.Unicode: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 503 } 504 505 def normalize_identifier(self, expression: E) -> E: 506 if ( 507 isinstance(expression, exp.Identifier) 508 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 509 ): 510 parent = expression.parent 511 while isinstance(parent, exp.Dot): 512 parent = parent.parent 513 514 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 515 # by default. The following check uses a heuristic to detect tables based on whether 516 # they are qualified. This should generally be correct, because tables in BigQuery 517 # must be qualified with at least a dataset, unless @@dataset_id is set. 518 case_sensitive = ( 519 isinstance(parent, exp.UserDefinedFunction) 520 or ( 521 isinstance(parent, exp.Table) 522 and parent.db 523 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 524 ) 525 or expression.meta.get("is_table") 526 ) 527 if not case_sensitive: 528 expression.set("this", expression.this.lower()) 529 530 return t.cast(E, expression) 531 532 return super().normalize_identifier(expression) 533 534 class Tokenizer(tokens.Tokenizer): 535 QUOTES = ["'", '"', '"""', "'''"] 536 COMMENTS = ["--", "#", ("/*", "*/")] 537 IDENTIFIERS = ["`"] 538 STRING_ESCAPES = ["\\"] 539 540 HEX_STRINGS = [("0x", ""), ("0X", "")] 541 542 BYTE_STRINGS = [ 543 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 544 ] 545 546 RAW_STRINGS = [ 547 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 548 ] 549 550 NESTED_COMMENTS = False 551 552 KEYWORDS = { 553 **tokens.Tokenizer.KEYWORDS, 554 "ANY TYPE": TokenType.VARIANT, 555 "BEGIN": TokenType.COMMAND, 556 "BEGIN TRANSACTION": TokenType.BEGIN, 557 "BYTEINT": TokenType.INT, 558 "BYTES": TokenType.BINARY, 559 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 560 "DATETIME": TokenType.TIMESTAMP, 561 "DECLARE": TokenType.DECLARE, 562 "ELSEIF": TokenType.COMMAND, 563 "EXCEPTION": TokenType.COMMAND, 564 "EXPORT": TokenType.EXPORT, 565 "FLOAT64": TokenType.DOUBLE, 566 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 567 "MODEL": TokenType.MODEL, 568 "NOT DETERMINISTIC": TokenType.VOLATILE, 569 "RECORD": TokenType.STRUCT, 570 "TIMESTAMP": TokenType.TIMESTAMPTZ, 571 } 572 KEYWORDS.pop("DIV") 573 KEYWORDS.pop("VALUES") 574 KEYWORDS.pop("/*+") 575 576 class Parser(parser.Parser): 577 PREFIXED_PIVOT_COLUMNS = True 578 LOG_DEFAULTS_TO_LN = True 579 SUPPORTS_IMPLICIT_UNNEST = True 580 JOINS_HAVE_EQUAL_PRECEDENCE = True 581 582 # BigQuery does not allow ASC/DESC to be used as an identifier 583 ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.ASC, TokenType.DESC} 584 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 585 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 586 COMMENT_TABLE_ALIAS_TOKENS = parser.Parser.COMMENT_TABLE_ALIAS_TOKENS - { 587 TokenType.ASC, 588 TokenType.DESC, 589 } 590 UPDATE_ALIAS_TOKENS = parser.Parser.UPDATE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 591 592 FUNCTIONS = { 593 **parser.Parser.FUNCTIONS, 594 "CONTAINS_SUBSTR": _build_contains_substring, 595 "DATE": _build_date, 596 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 597 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 598 "DATE_TRUNC": lambda args: exp.DateTrunc( 599 unit=seq_get(args, 1), 600 this=seq_get(args, 0), 601 zone=seq_get(args, 2), 602 ), 603 "DATETIME": _build_datetime, 604 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 605 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 606 "DIV": binary_from_function(exp.IntDiv), 607 "EDIT_DISTANCE": _build_levenshtein, 608 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 609 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 610 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 611 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 612 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 613 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 614 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 615 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 616 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 617 "MD5": exp.MD5Digest.from_arg_list, 618 "TO_HEX": _build_to_hex, 619 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 620 [seq_get(args, 1), seq_get(args, 0)] 621 ), 622 "PARSE_TIMESTAMP": _build_parse_timestamp, 623 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 624 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 625 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 626 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 627 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 628 ), 629 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 630 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 631 "SPLIT": lambda args: exp.Split( 632 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 633 this=seq_get(args, 0), 634 expression=seq_get(args, 1) or exp.Literal.string(","), 635 ), 636 "STRPOS": exp.StrPosition.from_arg_list, 637 "TIME": _build_time, 638 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 639 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 640 "TIMESTAMP": _build_timestamp, 641 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 642 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 643 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 644 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 645 ), 646 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 647 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 648 ), 649 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 650 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 651 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 652 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 653 } 654 655 FUNCTION_PARSERS = { 656 **parser.Parser.FUNCTION_PARSERS, 657 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 658 "JSON_ARRAY": lambda self: self.expression( 659 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise) 660 ), 661 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 662 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 663 } 664 FUNCTION_PARSERS.pop("TRIM") 665 666 NO_PAREN_FUNCTIONS = { 667 **parser.Parser.NO_PAREN_FUNCTIONS, 668 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 669 } 670 671 NESTED_TYPE_TOKENS = { 672 *parser.Parser.NESTED_TYPE_TOKENS, 673 TokenType.TABLE, 674 } 675 676 PROPERTY_PARSERS = { 677 **parser.Parser.PROPERTY_PARSERS, 678 "NOT DETERMINISTIC": lambda self: self.expression( 679 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 680 ), 681 "OPTIONS": lambda self: self._parse_with_property(), 682 } 683 684 CONSTRAINT_PARSERS = { 685 **parser.Parser.CONSTRAINT_PARSERS, 686 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 687 } 688 689 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 690 RANGE_PARSERS.pop(TokenType.OVERLAPS) 691 692 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 693 694 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 695 696 STATEMENT_PARSERS = { 697 **parser.Parser.STATEMENT_PARSERS, 698 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 699 TokenType.END: lambda self: self._parse_as_command(self._prev), 700 TokenType.FOR: lambda self: self._parse_for_in(), 701 TokenType.EXPORT: lambda self: self._parse_export_data(), 702 TokenType.DECLARE: lambda self: self._parse_declare(), 703 } 704 705 BRACKET_OFFSETS = { 706 "OFFSET": (0, False), 707 "ORDINAL": (1, False), 708 "SAFE_OFFSET": (0, True), 709 "SAFE_ORDINAL": (1, True), 710 } 711 712 def _parse_for_in(self) -> exp.ForIn: 713 this = self._parse_range() 714 self._match_text_seq("DO") 715 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 716 717 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 718 this = super()._parse_table_part(schema=schema) or self._parse_number() 719 720 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 721 if isinstance(this, exp.Identifier): 722 table_name = this.name 723 while self._match(TokenType.DASH, advance=False) and self._next: 724 start = self._curr 725 while self._is_connected() and not self._match_set( 726 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 727 ): 728 self._advance() 729 730 if start == self._curr: 731 break 732 733 table_name += self._find_sql(start, self._prev) 734 735 this = exp.Identifier( 736 this=table_name, quoted=this.args.get("quoted") 737 ).update_positions(this) 738 elif isinstance(this, exp.Literal): 739 table_name = this.name 740 741 if self._is_connected() and self._parse_var(any_token=True): 742 table_name += self._prev.text 743 744 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 745 746 return this 747 748 def _parse_table_parts( 749 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 750 ) -> exp.Table: 751 table = super()._parse_table_parts( 752 schema=schema, is_db_reference=is_db_reference, wildcard=True 753 ) 754 755 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 756 if not table.catalog: 757 if table.db: 758 previous_db = table.args["db"] 759 parts = table.db.split(".") 760 if len(parts) == 2 and not table.args["db"].quoted: 761 table.set( 762 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 763 ) 764 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 765 else: 766 previous_this = table.this 767 parts = table.name.split(".") 768 if len(parts) == 2 and not table.this.quoted: 769 table.set( 770 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 771 ) 772 table.set( 773 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 774 ) 775 776 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 777 alias = table.this 778 catalog, db, this, *rest = ( 779 exp.to_identifier(p, quoted=True) 780 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 781 ) 782 783 for part in (catalog, db, this): 784 if part: 785 part.update_positions(table.this) 786 787 if rest and this: 788 this = exp.Dot.build([this, *rest]) # type: ignore 789 790 table = exp.Table( 791 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 792 ) 793 table.meta["quoted_table"] = True 794 else: 795 alias = None 796 797 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 798 # dataset, so if the project identifier is omitted we need to fix the ast so that 799 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 800 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 801 # views, because it would seem like the "catalog" part is set, when it'd actually 802 # be the region/dataset. Merging the two identifiers into a single one is done to 803 # avoid producing a 4-part Table reference, which would cause issues in the schema 804 # module, when there are 3-part table names mixed with information schema views. 805 # 806 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 807 table_parts = table.parts 808 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 809 # We need to alias the table here to avoid breaking existing qualified columns. 810 # This is expected to be safe, because if there's an actual alias coming up in 811 # the token stream, it will overwrite this one. If there isn't one, we are only 812 # exposing the name that can be used to reference the view explicitly (a no-op). 813 exp.alias_( 814 table, 815 t.cast(exp.Identifier, alias or table_parts[-1]), 816 table=True, 817 copy=False, 818 ) 819 820 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 821 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 822 line=table_parts[-2].meta.get("line"), 823 col=table_parts[-1].meta.get("col"), 824 start=table_parts[-2].meta.get("start"), 825 end=table_parts[-1].meta.get("end"), 826 ) 827 table.set("this", new_this) 828 table.set("db", seq_get(table_parts, -3)) 829 table.set("catalog", seq_get(table_parts, -4)) 830 831 return table 832 833 def _parse_column(self) -> t.Optional[exp.Expression]: 834 column = super()._parse_column() 835 if isinstance(column, exp.Column): 836 parts = column.parts 837 if any("." in p.name for p in parts): 838 catalog, db, table, this, *rest = ( 839 exp.to_identifier(p, quoted=True) 840 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 841 ) 842 843 if rest and this: 844 this = exp.Dot.build([this, *rest]) # type: ignore 845 846 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 847 column.meta["quoted_column"] = True 848 849 return column 850 851 @t.overload 852 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 853 854 @t.overload 855 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 856 857 def _parse_json_object(self, agg=False): 858 json_object = super()._parse_json_object() 859 array_kv_pair = seq_get(json_object.expressions, 0) 860 861 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 862 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 863 if ( 864 array_kv_pair 865 and isinstance(array_kv_pair.this, exp.Array) 866 and isinstance(array_kv_pair.expression, exp.Array) 867 ): 868 keys = array_kv_pair.this.expressions 869 values = array_kv_pair.expression.expressions 870 871 json_object.set( 872 "expressions", 873 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 874 ) 875 876 return json_object 877 878 def _parse_bracket( 879 self, this: t.Optional[exp.Expression] = None 880 ) -> t.Optional[exp.Expression]: 881 bracket = super()._parse_bracket(this) 882 883 if this is bracket: 884 return bracket 885 886 if isinstance(bracket, exp.Bracket): 887 for expression in bracket.expressions: 888 name = expression.name.upper() 889 890 if name not in self.BRACKET_OFFSETS: 891 break 892 893 offset, safe = self.BRACKET_OFFSETS[name] 894 bracket.set("offset", offset) 895 bracket.set("safe", safe) 896 expression.replace(expression.expressions[0]) 897 898 return bracket 899 900 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 901 unnest = super()._parse_unnest(with_alias=with_alias) 902 903 if not unnest: 904 return None 905 906 unnest_expr = seq_get(unnest.expressions, 0) 907 if unnest_expr: 908 from sqlglot.optimizer.annotate_types import annotate_types 909 910 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 911 912 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 913 # in contrast to other dialects such as DuckDB which flattens only the array by default 914 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 915 array_elem.is_type(exp.DataType.Type.STRUCT) 916 for array_elem in unnest_expr._type.expressions 917 ): 918 unnest.set("explode_array", True) 919 920 return unnest 921 922 def _parse_make_interval(self) -> exp.MakeInterval: 923 expr = exp.MakeInterval() 924 925 for arg_key in expr.arg_types: 926 value = self._parse_lambda() 927 928 if not value: 929 break 930 931 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 932 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 933 if isinstance(value, exp.Kwarg): 934 arg_key = value.this.name 935 936 expr.set(arg_key, value) 937 938 self._match(TokenType.COMMA) 939 940 return expr 941 942 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 943 expr = self.expression( 944 exp.FeaturesAtTime, 945 this=(self._match(TokenType.TABLE) and self._parse_table()) 946 or self._parse_select(nested=True), 947 ) 948 949 while self._match(TokenType.COMMA): 950 arg = self._parse_lambda() 951 952 # Get the LHS of the Kwarg and set the arg to that value, e.g 953 # "num_rows => 1" sets the expr's `num_rows` arg 954 if arg: 955 expr.set(arg.this.name, arg) 956 957 return expr 958 959 def _parse_export_data(self) -> exp.Export: 960 self._match_text_seq("DATA") 961 962 return self.expression( 963 exp.Export, 964 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 965 options=self._parse_properties(), 966 this=self._match_text_seq("AS") and self._parse_select(), 967 ) 968 969 class Generator(generator.Generator): 970 INTERVAL_ALLOWS_PLURAL_FORM = False 971 JOIN_HINTS = False 972 QUERY_HINTS = False 973 TABLE_HINTS = False 974 LIMIT_FETCH = "LIMIT" 975 RENAME_TABLE_WITH_DB = False 976 NVL2_SUPPORTED = False 977 UNNEST_WITH_ORDINALITY = False 978 COLLATE_IS_FUNC = True 979 LIMIT_ONLY_LITERALS = True 980 SUPPORTS_TABLE_ALIAS_COLUMNS = False 981 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 982 JSON_KEY_VALUE_PAIR_SEP = "," 983 NULL_ORDERING_SUPPORTED = False 984 IGNORE_NULLS_IN_FUNC = True 985 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 986 CAN_IMPLEMENT_ARRAY_ANY = True 987 SUPPORTS_TO_NUMBER = False 988 NAMED_PLACEHOLDER_TOKEN = "@" 989 HEX_FUNC = "TO_HEX" 990 WITH_PROPERTIES_PREFIX = "OPTIONS" 991 SUPPORTS_EXPLODING_PROJECTIONS = False 992 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 993 SUPPORTS_UNIX_SECONDS = True 994 995 TRANSFORMS = { 996 **generator.Generator.TRANSFORMS, 997 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 998 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 999 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 1000 exp.Array: inline_array_unless_query, 1001 exp.ArrayContains: _array_contains_sql, 1002 exp.ArrayFilter: filter_array_using_unnest, 1003 exp.ArrayRemove: filter_array_using_unnest, 1004 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 1005 exp.CollateProperty: lambda self, e: ( 1006 f"DEFAULT COLLATE {self.sql(e, 'this')}" 1007 if e.args.get("default") 1008 else f"COLLATE {self.sql(e, 'this')}" 1009 ), 1010 exp.Commit: lambda *_: "COMMIT TRANSACTION", 1011 exp.CountIf: rename_func("COUNTIF"), 1012 exp.Create: _create_sql, 1013 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 1014 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 1015 exp.DateDiff: lambda self, e: self.func( 1016 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 1017 ), 1018 exp.DateFromParts: rename_func("DATE"), 1019 exp.DateStrToDate: datestrtodate_sql, 1020 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 1021 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 1022 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 1023 exp.FromTimeZone: lambda self, e: self.func( 1024 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 1025 ), 1026 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 1027 exp.GroupConcat: lambda self, e: groupconcat_sql( 1028 self, e, func_name="STRING_AGG", within_group=False 1029 ), 1030 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 1031 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 1032 exp.If: if_sql(false_value="NULL"), 1033 exp.ILike: no_ilike_sql, 1034 exp.IntDiv: rename_func("DIV"), 1035 exp.Int64: rename_func("INT64"), 1036 exp.JSONExtract: _json_extract_sql, 1037 exp.JSONExtractArray: _json_extract_sql, 1038 exp.JSONExtractScalar: _json_extract_sql, 1039 exp.JSONFormat: rename_func("TO_JSON_STRING"), 1040 exp.Levenshtein: _levenshtein_sql, 1041 exp.Max: max_or_greatest, 1042 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 1043 exp.MD5Digest: rename_func("MD5"), 1044 exp.Min: min_or_least, 1045 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1046 exp.RegexpExtract: lambda self, e: self.func( 1047 "REGEXP_EXTRACT", 1048 e.this, 1049 e.expression, 1050 e.args.get("position"), 1051 e.args.get("occurrence"), 1052 ), 1053 exp.RegexpExtractAll: lambda self, e: self.func( 1054 "REGEXP_EXTRACT_ALL", e.this, e.expression 1055 ), 1056 exp.RegexpReplace: regexp_replace_sql, 1057 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 1058 exp.ReturnsProperty: _returnsproperty_sql, 1059 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 1060 exp.Select: transforms.preprocess( 1061 [ 1062 transforms.explode_projection_to_unnest(), 1063 transforms.unqualify_unnest, 1064 transforms.eliminate_distinct_on, 1065 _alias_ordered_group, 1066 transforms.eliminate_semi_and_anti_joins, 1067 ] 1068 ), 1069 exp.SHA: rename_func("SHA1"), 1070 exp.SHA2: sha256_sql, 1071 exp.Space: space_sql, 1072 exp.StabilityProperty: lambda self, e: ( 1073 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1074 ), 1075 exp.String: rename_func("STRING"), 1076 exp.StrPosition: lambda self, e: ( 1077 strposition_sql( 1078 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1079 ) 1080 ), 1081 exp.StrToDate: _str_to_datetime_sql, 1082 exp.StrToTime: _str_to_datetime_sql, 1083 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1084 exp.TimeFromParts: rename_func("TIME"), 1085 exp.TimestampFromParts: rename_func("DATETIME"), 1086 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1087 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1088 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1089 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1090 exp.TimeStrToTime: timestrtotime_sql, 1091 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1092 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1093 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1094 exp.TsOrDsToTime: rename_func("TIME"), 1095 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1096 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1097 exp.Unhex: rename_func("FROM_HEX"), 1098 exp.UnixDate: rename_func("UNIX_DATE"), 1099 exp.UnixToTime: _unix_to_time_sql, 1100 exp.Uuid: lambda *_: "GENERATE_UUID()", 1101 exp.Values: _derived_table_values_to_unnest, 1102 exp.VariancePop: rename_func("VAR_POP"), 1103 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1104 } 1105 1106 SUPPORTED_JSON_PATH_PARTS = { 1107 exp.JSONPathKey, 1108 exp.JSONPathRoot, 1109 exp.JSONPathSubscript, 1110 } 1111 1112 TYPE_MAPPING = { 1113 **generator.Generator.TYPE_MAPPING, 1114 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1115 exp.DataType.Type.BIGINT: "INT64", 1116 exp.DataType.Type.BINARY: "BYTES", 1117 exp.DataType.Type.BLOB: "BYTES", 1118 exp.DataType.Type.BOOLEAN: "BOOL", 1119 exp.DataType.Type.CHAR: "STRING", 1120 exp.DataType.Type.DECIMAL: "NUMERIC", 1121 exp.DataType.Type.DOUBLE: "FLOAT64", 1122 exp.DataType.Type.FLOAT: "FLOAT64", 1123 exp.DataType.Type.INT: "INT64", 1124 exp.DataType.Type.NCHAR: "STRING", 1125 exp.DataType.Type.NVARCHAR: "STRING", 1126 exp.DataType.Type.SMALLINT: "INT64", 1127 exp.DataType.Type.TEXT: "STRING", 1128 exp.DataType.Type.TIMESTAMP: "DATETIME", 1129 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1130 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1131 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1132 exp.DataType.Type.TINYINT: "INT64", 1133 exp.DataType.Type.ROWVERSION: "BYTES", 1134 exp.DataType.Type.UUID: "STRING", 1135 exp.DataType.Type.VARBINARY: "BYTES", 1136 exp.DataType.Type.VARCHAR: "STRING", 1137 exp.DataType.Type.VARIANT: "ANY TYPE", 1138 } 1139 1140 PROPERTIES_LOCATION = { 1141 **generator.Generator.PROPERTIES_LOCATION, 1142 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1143 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1144 } 1145 1146 # WINDOW comes after QUALIFY 1147 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1148 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1149 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1150 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1151 } 1152 1153 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1154 RESERVED_KEYWORDS = { 1155 "all", 1156 "and", 1157 "any", 1158 "array", 1159 "as", 1160 "asc", 1161 "assert_rows_modified", 1162 "at", 1163 "between", 1164 "by", 1165 "case", 1166 "cast", 1167 "collate", 1168 "contains", 1169 "create", 1170 "cross", 1171 "cube", 1172 "current", 1173 "default", 1174 "define", 1175 "desc", 1176 "distinct", 1177 "else", 1178 "end", 1179 "enum", 1180 "escape", 1181 "except", 1182 "exclude", 1183 "exists", 1184 "extract", 1185 "false", 1186 "fetch", 1187 "following", 1188 "for", 1189 "from", 1190 "full", 1191 "group", 1192 "grouping", 1193 "groups", 1194 "hash", 1195 "having", 1196 "if", 1197 "ignore", 1198 "in", 1199 "inner", 1200 "intersect", 1201 "interval", 1202 "into", 1203 "is", 1204 "join", 1205 "lateral", 1206 "left", 1207 "like", 1208 "limit", 1209 "lookup", 1210 "merge", 1211 "natural", 1212 "new", 1213 "no", 1214 "not", 1215 "null", 1216 "nulls", 1217 "of", 1218 "on", 1219 "or", 1220 "order", 1221 "outer", 1222 "over", 1223 "partition", 1224 "preceding", 1225 "proto", 1226 "qualify", 1227 "range", 1228 "recursive", 1229 "respect", 1230 "right", 1231 "rollup", 1232 "rows", 1233 "select", 1234 "set", 1235 "some", 1236 "struct", 1237 "tablesample", 1238 "then", 1239 "to", 1240 "treat", 1241 "true", 1242 "unbounded", 1243 "union", 1244 "unnest", 1245 "using", 1246 "when", 1247 "where", 1248 "window", 1249 "with", 1250 "within", 1251 } 1252 1253 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 1254 unit = expression.unit 1255 unit_sql = unit.name if unit.is_string else self.sql(unit) 1256 return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone")) 1257 1258 def mod_sql(self, expression: exp.Mod) -> str: 1259 this = expression.this 1260 expr = expression.expression 1261 return self.func( 1262 "MOD", 1263 this.unnest() if isinstance(this, exp.Paren) else this, 1264 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1265 ) 1266 1267 def column_parts(self, expression: exp.Column) -> str: 1268 if expression.meta.get("quoted_column"): 1269 # If a column reference is of the form `dataset.table`.name, we need 1270 # to preserve the quoted table path, otherwise the reference breaks 1271 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1272 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1273 return f"{table_path}.{self.sql(expression, 'this')}" 1274 1275 return super().column_parts(expression) 1276 1277 def table_parts(self, expression: exp.Table) -> str: 1278 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1279 # we need to make sure the correct quoting is used in each case. 1280 # 1281 # For example, if there is a CTE x that clashes with a schema name, then the former will 1282 # return the table y in that schema, whereas the latter will return the CTE's y column: 1283 # 1284 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1285 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1286 if expression.meta.get("quoted_table"): 1287 table_parts = ".".join(p.name for p in expression.parts) 1288 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1289 1290 return super().table_parts(expression) 1291 1292 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1293 this = expression.this 1294 if isinstance(this, exp.TsOrDsToDatetime): 1295 func_name = "FORMAT_DATETIME" 1296 elif isinstance(this, exp.TsOrDsToTimestamp): 1297 func_name = "FORMAT_TIMESTAMP" 1298 else: 1299 func_name = "FORMAT_DATE" 1300 1301 time_expr = ( 1302 this 1303 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1304 else expression 1305 ) 1306 return self.func( 1307 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1308 ) 1309 1310 def eq_sql(self, expression: exp.EQ) -> str: 1311 # Operands of = cannot be NULL in BigQuery 1312 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1313 if not isinstance(expression.parent, exp.Update): 1314 return "NULL" 1315 1316 return self.binary(expression, "=") 1317 1318 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1319 parent = expression.parent 1320 1321 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1322 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1323 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1324 return self.func( 1325 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1326 ) 1327 1328 return super().attimezone_sql(expression) 1329 1330 def trycast_sql(self, expression: exp.TryCast) -> str: 1331 return self.cast_sql(expression, safe_prefix="SAFE_") 1332 1333 def bracket_sql(self, expression: exp.Bracket) -> str: 1334 this = expression.this 1335 expressions = expression.expressions 1336 1337 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1338 arg = expressions[0] 1339 if arg.type is None: 1340 from sqlglot.optimizer.annotate_types import annotate_types 1341 1342 arg = annotate_types(arg, dialect=self.dialect) 1343 1344 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1345 # BQ doesn't support bracket syntax with string values for structs 1346 return f"{self.sql(this)}.{arg.name}" 1347 1348 expressions_sql = self.expressions(expression, flat=True) 1349 offset = expression.args.get("offset") 1350 1351 if offset == 0: 1352 expressions_sql = f"OFFSET({expressions_sql})" 1353 elif offset == 1: 1354 expressions_sql = f"ORDINAL({expressions_sql})" 1355 elif offset is not None: 1356 self.unsupported(f"Unsupported array offset: {offset}") 1357 1358 if expression.args.get("safe"): 1359 expressions_sql = f"SAFE_{expressions_sql}" 1360 1361 return f"{self.sql(this)}[{expressions_sql}]" 1362 1363 def in_unnest_op(self, expression: exp.Unnest) -> str: 1364 return self.sql(expression) 1365 1366 def version_sql(self, expression: exp.Version) -> str: 1367 if expression.name == "TIMESTAMP": 1368 expression.set("this", "SYSTEM_TIME") 1369 return super().version_sql(expression) 1370 1371 def contains_sql(self, expression: exp.Contains) -> str: 1372 this = expression.this 1373 expr = expression.expression 1374 1375 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1376 this = this.this 1377 expr = expr.this 1378 1379 return self.func("CONTAINS_SUBSTR", this, expr) 1380 1381 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1382 this = expression.this 1383 1384 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1385 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1386 # because they aren't literals and so the above syntax is invalid BigQuery. 1387 if isinstance(this, exp.Array): 1388 elem = seq_get(this.expressions, 0) 1389 if not (elem and elem.find(exp.Query)): 1390 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1391 1392 return super().cast_sql(expression, safe_prefix=safe_prefix) 1393 1394 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1395 variables = self.expressions(expression, "this") 1396 default = self.sql(expression, "default") 1397 default = f" DEFAULT {default}" if default else "" 1398 kind = self.sql(expression, "kind") 1399 kind = f" {kind}" if kind else "" 1400 1401 return f"{variables}{kind}{default}"
396class BigQuery(Dialect): 397 WEEK_OFFSET = -1 398 UNNEST_COLUMN_ONLY = True 399 SUPPORTS_USER_DEFINED_TYPES = False 400 SUPPORTS_SEMI_ANTI_JOIN = False 401 LOG_BASE_FIRST = False 402 HEX_LOWERCASE = True 403 FORCE_EARLY_ALIAS_REF_EXPANSION = True 404 PRESERVE_ORIGINAL_NAMES = True 405 HEX_STRING_IS_INTEGER_TYPE = True 406 407 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#case_sensitivity 408 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_INSENSITIVE 409 410 # bigquery udfs are case sensitive 411 NORMALIZE_FUNCTIONS = False 412 413 # https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#format_elements_date_time 414 TIME_MAPPING = { 415 "%D": "%m/%d/%y", 416 "%E6S": "%S.%f", 417 "%e": "%-d", 418 } 419 420 FORMAT_MAPPING = { 421 "DD": "%d", 422 "MM": "%m", 423 "MON": "%b", 424 "MONTH": "%B", 425 "YYYY": "%Y", 426 "YY": "%y", 427 "HH": "%I", 428 "HH12": "%I", 429 "HH24": "%H", 430 "MI": "%M", 431 "SS": "%S", 432 "SSSSS": "%f", 433 "TZH": "%z", 434 } 435 436 # The _PARTITIONTIME and _PARTITIONDATE pseudo-columns are not returned by a SELECT * statement 437 # https://cloud.google.com/bigquery/docs/querying-partitioned-tables#query_an_ingestion-time_partitioned_table 438 PSEUDOCOLUMNS = {"_PARTITIONTIME", "_PARTITIONDATE"} 439 440 # All set operations require either a DISTINCT or ALL specifier 441 SET_OP_DISTINCT_BY_DEFAULT = dict.fromkeys((exp.Except, exp.Intersect, exp.Union), None) 442 443 # BigQuery maps Type.TIMESTAMP to DATETIME, so we need to amend the inferred types 444 TYPE_TO_EXPRESSIONS = { 445 **Dialect.TYPE_TO_EXPRESSIONS, 446 exp.DataType.Type.TIMESTAMPTZ: Dialect.TYPE_TO_EXPRESSIONS[exp.DataType.Type.TIMESTAMP], 447 } 448 TYPE_TO_EXPRESSIONS.pop(exp.DataType.Type.TIMESTAMP) 449 450 ANNOTATORS = { 451 **Dialect.ANNOTATORS, 452 **{ 453 expr_type: annotate_with_type_lambda(data_type) 454 for data_type, expressions in TYPE_TO_EXPRESSIONS.items() 455 for expr_type in expressions 456 }, 457 **{ 458 expr_type: lambda self, e: _annotate_math_functions(self, e) 459 for expr_type in (exp.Floor, exp.Ceil, exp.Log, exp.Ln, exp.Sqrt, exp.Exp, exp.Round) 460 }, 461 **{ 462 expr_type: lambda self, e: self._annotate_by_args(e, "this") 463 for expr_type in ( 464 exp.Left, 465 exp.Right, 466 exp.Lower, 467 exp.Upper, 468 exp.Pad, 469 exp.Trim, 470 exp.RegexpExtract, 471 exp.RegexpReplace, 472 exp.Repeat, 473 exp.Substring, 474 ) 475 }, 476 exp.Array: _annotate_array, 477 exp.ArrayConcat: lambda self, e: self._annotate_by_args(e, "this", "expressions"), 478 exp.Ascii: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 479 exp.BitwiseAndAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 480 exp.BitwiseOrAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 481 exp.BitwiseXorAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 482 exp.BitwiseCountAgg: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 483 exp.Concat: _annotate_concat, 484 exp.Corr: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 485 exp.CovarPop: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 486 exp.CovarSamp: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.DOUBLE), 487 exp.JSONArray: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.JSON), 488 exp.JSONExtractScalar: lambda self, e: self._annotate_with_type( 489 e, exp.DataType.Type.VARCHAR 490 ), 491 exp.JSONValueArray: lambda self, e: self._annotate_with_type( 492 e, exp.DataType.build("ARRAY<VARCHAR>") 493 ), 494 exp.JSONType: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.VARCHAR), 495 exp.Lag: lambda self, e: self._annotate_by_args(e, "this", "default"), 496 exp.SHA: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 497 exp.SHA2: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BINARY), 498 exp.Sign: lambda self, e: self._annotate_by_args(e, "this"), 499 exp.Split: lambda self, e: self._annotate_by_args(e, "this", array=True), 500 exp.TimestampFromParts: lambda self, e: self._annotate_with_type( 501 e, exp.DataType.Type.DATETIME 502 ), 503 exp.Unicode: lambda self, e: self._annotate_with_type(e, exp.DataType.Type.BIGINT), 504 } 505 506 def normalize_identifier(self, expression: E) -> E: 507 if ( 508 isinstance(expression, exp.Identifier) 509 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 510 ): 511 parent = expression.parent 512 while isinstance(parent, exp.Dot): 513 parent = parent.parent 514 515 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 516 # by default. The following check uses a heuristic to detect tables based on whether 517 # they are qualified. This should generally be correct, because tables in BigQuery 518 # must be qualified with at least a dataset, unless @@dataset_id is set. 519 case_sensitive = ( 520 isinstance(parent, exp.UserDefinedFunction) 521 or ( 522 isinstance(parent, exp.Table) 523 and parent.db 524 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 525 ) 526 or expression.meta.get("is_table") 527 ) 528 if not case_sensitive: 529 expression.set("this", expression.this.lower()) 530 531 return t.cast(E, expression) 532 533 return super().normalize_identifier(expression) 534 535 class Tokenizer(tokens.Tokenizer): 536 QUOTES = ["'", '"', '"""', "'''"] 537 COMMENTS = ["--", "#", ("/*", "*/")] 538 IDENTIFIERS = ["`"] 539 STRING_ESCAPES = ["\\"] 540 541 HEX_STRINGS = [("0x", ""), ("0X", "")] 542 543 BYTE_STRINGS = [ 544 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 545 ] 546 547 RAW_STRINGS = [ 548 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 549 ] 550 551 NESTED_COMMENTS = False 552 553 KEYWORDS = { 554 **tokens.Tokenizer.KEYWORDS, 555 "ANY TYPE": TokenType.VARIANT, 556 "BEGIN": TokenType.COMMAND, 557 "BEGIN TRANSACTION": TokenType.BEGIN, 558 "BYTEINT": TokenType.INT, 559 "BYTES": TokenType.BINARY, 560 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 561 "DATETIME": TokenType.TIMESTAMP, 562 "DECLARE": TokenType.DECLARE, 563 "ELSEIF": TokenType.COMMAND, 564 "EXCEPTION": TokenType.COMMAND, 565 "EXPORT": TokenType.EXPORT, 566 "FLOAT64": TokenType.DOUBLE, 567 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 568 "MODEL": TokenType.MODEL, 569 "NOT DETERMINISTIC": TokenType.VOLATILE, 570 "RECORD": TokenType.STRUCT, 571 "TIMESTAMP": TokenType.TIMESTAMPTZ, 572 } 573 KEYWORDS.pop("DIV") 574 KEYWORDS.pop("VALUES") 575 KEYWORDS.pop("/*+") 576 577 class Parser(parser.Parser): 578 PREFIXED_PIVOT_COLUMNS = True 579 LOG_DEFAULTS_TO_LN = True 580 SUPPORTS_IMPLICIT_UNNEST = True 581 JOINS_HAVE_EQUAL_PRECEDENCE = True 582 583 # BigQuery does not allow ASC/DESC to be used as an identifier 584 ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.ASC, TokenType.DESC} 585 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 586 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 587 COMMENT_TABLE_ALIAS_TOKENS = parser.Parser.COMMENT_TABLE_ALIAS_TOKENS - { 588 TokenType.ASC, 589 TokenType.DESC, 590 } 591 UPDATE_ALIAS_TOKENS = parser.Parser.UPDATE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 592 593 FUNCTIONS = { 594 **parser.Parser.FUNCTIONS, 595 "CONTAINS_SUBSTR": _build_contains_substring, 596 "DATE": _build_date, 597 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 598 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 599 "DATE_TRUNC": lambda args: exp.DateTrunc( 600 unit=seq_get(args, 1), 601 this=seq_get(args, 0), 602 zone=seq_get(args, 2), 603 ), 604 "DATETIME": _build_datetime, 605 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 606 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 607 "DIV": binary_from_function(exp.IntDiv), 608 "EDIT_DISTANCE": _build_levenshtein, 609 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 610 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 611 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 612 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 613 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 614 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 615 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 616 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 617 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 618 "MD5": exp.MD5Digest.from_arg_list, 619 "TO_HEX": _build_to_hex, 620 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 621 [seq_get(args, 1), seq_get(args, 0)] 622 ), 623 "PARSE_TIMESTAMP": _build_parse_timestamp, 624 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 625 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 626 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 627 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 628 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 629 ), 630 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 631 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 632 "SPLIT": lambda args: exp.Split( 633 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 634 this=seq_get(args, 0), 635 expression=seq_get(args, 1) or exp.Literal.string(","), 636 ), 637 "STRPOS": exp.StrPosition.from_arg_list, 638 "TIME": _build_time, 639 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 640 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 641 "TIMESTAMP": _build_timestamp, 642 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 643 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 644 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 645 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 646 ), 647 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 648 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 649 ), 650 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 651 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 652 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 653 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 654 } 655 656 FUNCTION_PARSERS = { 657 **parser.Parser.FUNCTION_PARSERS, 658 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 659 "JSON_ARRAY": lambda self: self.expression( 660 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise) 661 ), 662 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 663 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 664 } 665 FUNCTION_PARSERS.pop("TRIM") 666 667 NO_PAREN_FUNCTIONS = { 668 **parser.Parser.NO_PAREN_FUNCTIONS, 669 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 670 } 671 672 NESTED_TYPE_TOKENS = { 673 *parser.Parser.NESTED_TYPE_TOKENS, 674 TokenType.TABLE, 675 } 676 677 PROPERTY_PARSERS = { 678 **parser.Parser.PROPERTY_PARSERS, 679 "NOT DETERMINISTIC": lambda self: self.expression( 680 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 681 ), 682 "OPTIONS": lambda self: self._parse_with_property(), 683 } 684 685 CONSTRAINT_PARSERS = { 686 **parser.Parser.CONSTRAINT_PARSERS, 687 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 688 } 689 690 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 691 RANGE_PARSERS.pop(TokenType.OVERLAPS) 692 693 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 694 695 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 696 697 STATEMENT_PARSERS = { 698 **parser.Parser.STATEMENT_PARSERS, 699 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 700 TokenType.END: lambda self: self._parse_as_command(self._prev), 701 TokenType.FOR: lambda self: self._parse_for_in(), 702 TokenType.EXPORT: lambda self: self._parse_export_data(), 703 TokenType.DECLARE: lambda self: self._parse_declare(), 704 } 705 706 BRACKET_OFFSETS = { 707 "OFFSET": (0, False), 708 "ORDINAL": (1, False), 709 "SAFE_OFFSET": (0, True), 710 "SAFE_ORDINAL": (1, True), 711 } 712 713 def _parse_for_in(self) -> exp.ForIn: 714 this = self._parse_range() 715 self._match_text_seq("DO") 716 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 717 718 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 719 this = super()._parse_table_part(schema=schema) or self._parse_number() 720 721 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 722 if isinstance(this, exp.Identifier): 723 table_name = this.name 724 while self._match(TokenType.DASH, advance=False) and self._next: 725 start = self._curr 726 while self._is_connected() and not self._match_set( 727 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 728 ): 729 self._advance() 730 731 if start == self._curr: 732 break 733 734 table_name += self._find_sql(start, self._prev) 735 736 this = exp.Identifier( 737 this=table_name, quoted=this.args.get("quoted") 738 ).update_positions(this) 739 elif isinstance(this, exp.Literal): 740 table_name = this.name 741 742 if self._is_connected() and self._parse_var(any_token=True): 743 table_name += self._prev.text 744 745 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 746 747 return this 748 749 def _parse_table_parts( 750 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 751 ) -> exp.Table: 752 table = super()._parse_table_parts( 753 schema=schema, is_db_reference=is_db_reference, wildcard=True 754 ) 755 756 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 757 if not table.catalog: 758 if table.db: 759 previous_db = table.args["db"] 760 parts = table.db.split(".") 761 if len(parts) == 2 and not table.args["db"].quoted: 762 table.set( 763 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 764 ) 765 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 766 else: 767 previous_this = table.this 768 parts = table.name.split(".") 769 if len(parts) == 2 and not table.this.quoted: 770 table.set( 771 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 772 ) 773 table.set( 774 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 775 ) 776 777 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 778 alias = table.this 779 catalog, db, this, *rest = ( 780 exp.to_identifier(p, quoted=True) 781 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 782 ) 783 784 for part in (catalog, db, this): 785 if part: 786 part.update_positions(table.this) 787 788 if rest and this: 789 this = exp.Dot.build([this, *rest]) # type: ignore 790 791 table = exp.Table( 792 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 793 ) 794 table.meta["quoted_table"] = True 795 else: 796 alias = None 797 798 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 799 # dataset, so if the project identifier is omitted we need to fix the ast so that 800 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 801 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 802 # views, because it would seem like the "catalog" part is set, when it'd actually 803 # be the region/dataset. Merging the two identifiers into a single one is done to 804 # avoid producing a 4-part Table reference, which would cause issues in the schema 805 # module, when there are 3-part table names mixed with information schema views. 806 # 807 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 808 table_parts = table.parts 809 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 810 # We need to alias the table here to avoid breaking existing qualified columns. 811 # This is expected to be safe, because if there's an actual alias coming up in 812 # the token stream, it will overwrite this one. If there isn't one, we are only 813 # exposing the name that can be used to reference the view explicitly (a no-op). 814 exp.alias_( 815 table, 816 t.cast(exp.Identifier, alias or table_parts[-1]), 817 table=True, 818 copy=False, 819 ) 820 821 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 822 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 823 line=table_parts[-2].meta.get("line"), 824 col=table_parts[-1].meta.get("col"), 825 start=table_parts[-2].meta.get("start"), 826 end=table_parts[-1].meta.get("end"), 827 ) 828 table.set("this", new_this) 829 table.set("db", seq_get(table_parts, -3)) 830 table.set("catalog", seq_get(table_parts, -4)) 831 832 return table 833 834 def _parse_column(self) -> t.Optional[exp.Expression]: 835 column = super()._parse_column() 836 if isinstance(column, exp.Column): 837 parts = column.parts 838 if any("." in p.name for p in parts): 839 catalog, db, table, this, *rest = ( 840 exp.to_identifier(p, quoted=True) 841 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 842 ) 843 844 if rest and this: 845 this = exp.Dot.build([this, *rest]) # type: ignore 846 847 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 848 column.meta["quoted_column"] = True 849 850 return column 851 852 @t.overload 853 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 854 855 @t.overload 856 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 857 858 def _parse_json_object(self, agg=False): 859 json_object = super()._parse_json_object() 860 array_kv_pair = seq_get(json_object.expressions, 0) 861 862 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 863 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 864 if ( 865 array_kv_pair 866 and isinstance(array_kv_pair.this, exp.Array) 867 and isinstance(array_kv_pair.expression, exp.Array) 868 ): 869 keys = array_kv_pair.this.expressions 870 values = array_kv_pair.expression.expressions 871 872 json_object.set( 873 "expressions", 874 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 875 ) 876 877 return json_object 878 879 def _parse_bracket( 880 self, this: t.Optional[exp.Expression] = None 881 ) -> t.Optional[exp.Expression]: 882 bracket = super()._parse_bracket(this) 883 884 if this is bracket: 885 return bracket 886 887 if isinstance(bracket, exp.Bracket): 888 for expression in bracket.expressions: 889 name = expression.name.upper() 890 891 if name not in self.BRACKET_OFFSETS: 892 break 893 894 offset, safe = self.BRACKET_OFFSETS[name] 895 bracket.set("offset", offset) 896 bracket.set("safe", safe) 897 expression.replace(expression.expressions[0]) 898 899 return bracket 900 901 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 902 unnest = super()._parse_unnest(with_alias=with_alias) 903 904 if not unnest: 905 return None 906 907 unnest_expr = seq_get(unnest.expressions, 0) 908 if unnest_expr: 909 from sqlglot.optimizer.annotate_types import annotate_types 910 911 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 912 913 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 914 # in contrast to other dialects such as DuckDB which flattens only the array by default 915 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 916 array_elem.is_type(exp.DataType.Type.STRUCT) 917 for array_elem in unnest_expr._type.expressions 918 ): 919 unnest.set("explode_array", True) 920 921 return unnest 922 923 def _parse_make_interval(self) -> exp.MakeInterval: 924 expr = exp.MakeInterval() 925 926 for arg_key in expr.arg_types: 927 value = self._parse_lambda() 928 929 if not value: 930 break 931 932 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 933 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 934 if isinstance(value, exp.Kwarg): 935 arg_key = value.this.name 936 937 expr.set(arg_key, value) 938 939 self._match(TokenType.COMMA) 940 941 return expr 942 943 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 944 expr = self.expression( 945 exp.FeaturesAtTime, 946 this=(self._match(TokenType.TABLE) and self._parse_table()) 947 or self._parse_select(nested=True), 948 ) 949 950 while self._match(TokenType.COMMA): 951 arg = self._parse_lambda() 952 953 # Get the LHS of the Kwarg and set the arg to that value, e.g 954 # "num_rows => 1" sets the expr's `num_rows` arg 955 if arg: 956 expr.set(arg.this.name, arg) 957 958 return expr 959 960 def _parse_export_data(self) -> exp.Export: 961 self._match_text_seq("DATA") 962 963 return self.expression( 964 exp.Export, 965 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 966 options=self._parse_properties(), 967 this=self._match_text_seq("AS") and self._parse_select(), 968 ) 969 970 class Generator(generator.Generator): 971 INTERVAL_ALLOWS_PLURAL_FORM = False 972 JOIN_HINTS = False 973 QUERY_HINTS = False 974 TABLE_HINTS = False 975 LIMIT_FETCH = "LIMIT" 976 RENAME_TABLE_WITH_DB = False 977 NVL2_SUPPORTED = False 978 UNNEST_WITH_ORDINALITY = False 979 COLLATE_IS_FUNC = True 980 LIMIT_ONLY_LITERALS = True 981 SUPPORTS_TABLE_ALIAS_COLUMNS = False 982 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 983 JSON_KEY_VALUE_PAIR_SEP = "," 984 NULL_ORDERING_SUPPORTED = False 985 IGNORE_NULLS_IN_FUNC = True 986 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 987 CAN_IMPLEMENT_ARRAY_ANY = True 988 SUPPORTS_TO_NUMBER = False 989 NAMED_PLACEHOLDER_TOKEN = "@" 990 HEX_FUNC = "TO_HEX" 991 WITH_PROPERTIES_PREFIX = "OPTIONS" 992 SUPPORTS_EXPLODING_PROJECTIONS = False 993 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 994 SUPPORTS_UNIX_SECONDS = True 995 996 TRANSFORMS = { 997 **generator.Generator.TRANSFORMS, 998 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 999 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 1000 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 1001 exp.Array: inline_array_unless_query, 1002 exp.ArrayContains: _array_contains_sql, 1003 exp.ArrayFilter: filter_array_using_unnest, 1004 exp.ArrayRemove: filter_array_using_unnest, 1005 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 1006 exp.CollateProperty: lambda self, e: ( 1007 f"DEFAULT COLLATE {self.sql(e, 'this')}" 1008 if e.args.get("default") 1009 else f"COLLATE {self.sql(e, 'this')}" 1010 ), 1011 exp.Commit: lambda *_: "COMMIT TRANSACTION", 1012 exp.CountIf: rename_func("COUNTIF"), 1013 exp.Create: _create_sql, 1014 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 1015 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 1016 exp.DateDiff: lambda self, e: self.func( 1017 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 1018 ), 1019 exp.DateFromParts: rename_func("DATE"), 1020 exp.DateStrToDate: datestrtodate_sql, 1021 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 1022 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 1023 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 1024 exp.FromTimeZone: lambda self, e: self.func( 1025 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 1026 ), 1027 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 1028 exp.GroupConcat: lambda self, e: groupconcat_sql( 1029 self, e, func_name="STRING_AGG", within_group=False 1030 ), 1031 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 1032 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 1033 exp.If: if_sql(false_value="NULL"), 1034 exp.ILike: no_ilike_sql, 1035 exp.IntDiv: rename_func("DIV"), 1036 exp.Int64: rename_func("INT64"), 1037 exp.JSONExtract: _json_extract_sql, 1038 exp.JSONExtractArray: _json_extract_sql, 1039 exp.JSONExtractScalar: _json_extract_sql, 1040 exp.JSONFormat: rename_func("TO_JSON_STRING"), 1041 exp.Levenshtein: _levenshtein_sql, 1042 exp.Max: max_or_greatest, 1043 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 1044 exp.MD5Digest: rename_func("MD5"), 1045 exp.Min: min_or_least, 1046 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1047 exp.RegexpExtract: lambda self, e: self.func( 1048 "REGEXP_EXTRACT", 1049 e.this, 1050 e.expression, 1051 e.args.get("position"), 1052 e.args.get("occurrence"), 1053 ), 1054 exp.RegexpExtractAll: lambda self, e: self.func( 1055 "REGEXP_EXTRACT_ALL", e.this, e.expression 1056 ), 1057 exp.RegexpReplace: regexp_replace_sql, 1058 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 1059 exp.ReturnsProperty: _returnsproperty_sql, 1060 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 1061 exp.Select: transforms.preprocess( 1062 [ 1063 transforms.explode_projection_to_unnest(), 1064 transforms.unqualify_unnest, 1065 transforms.eliminate_distinct_on, 1066 _alias_ordered_group, 1067 transforms.eliminate_semi_and_anti_joins, 1068 ] 1069 ), 1070 exp.SHA: rename_func("SHA1"), 1071 exp.SHA2: sha256_sql, 1072 exp.Space: space_sql, 1073 exp.StabilityProperty: lambda self, e: ( 1074 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1075 ), 1076 exp.String: rename_func("STRING"), 1077 exp.StrPosition: lambda self, e: ( 1078 strposition_sql( 1079 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1080 ) 1081 ), 1082 exp.StrToDate: _str_to_datetime_sql, 1083 exp.StrToTime: _str_to_datetime_sql, 1084 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1085 exp.TimeFromParts: rename_func("TIME"), 1086 exp.TimestampFromParts: rename_func("DATETIME"), 1087 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1088 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1089 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1090 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1091 exp.TimeStrToTime: timestrtotime_sql, 1092 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1093 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1094 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1095 exp.TsOrDsToTime: rename_func("TIME"), 1096 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1097 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1098 exp.Unhex: rename_func("FROM_HEX"), 1099 exp.UnixDate: rename_func("UNIX_DATE"), 1100 exp.UnixToTime: _unix_to_time_sql, 1101 exp.Uuid: lambda *_: "GENERATE_UUID()", 1102 exp.Values: _derived_table_values_to_unnest, 1103 exp.VariancePop: rename_func("VAR_POP"), 1104 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1105 } 1106 1107 SUPPORTED_JSON_PATH_PARTS = { 1108 exp.JSONPathKey, 1109 exp.JSONPathRoot, 1110 exp.JSONPathSubscript, 1111 } 1112 1113 TYPE_MAPPING = { 1114 **generator.Generator.TYPE_MAPPING, 1115 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1116 exp.DataType.Type.BIGINT: "INT64", 1117 exp.DataType.Type.BINARY: "BYTES", 1118 exp.DataType.Type.BLOB: "BYTES", 1119 exp.DataType.Type.BOOLEAN: "BOOL", 1120 exp.DataType.Type.CHAR: "STRING", 1121 exp.DataType.Type.DECIMAL: "NUMERIC", 1122 exp.DataType.Type.DOUBLE: "FLOAT64", 1123 exp.DataType.Type.FLOAT: "FLOAT64", 1124 exp.DataType.Type.INT: "INT64", 1125 exp.DataType.Type.NCHAR: "STRING", 1126 exp.DataType.Type.NVARCHAR: "STRING", 1127 exp.DataType.Type.SMALLINT: "INT64", 1128 exp.DataType.Type.TEXT: "STRING", 1129 exp.DataType.Type.TIMESTAMP: "DATETIME", 1130 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1131 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1132 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1133 exp.DataType.Type.TINYINT: "INT64", 1134 exp.DataType.Type.ROWVERSION: "BYTES", 1135 exp.DataType.Type.UUID: "STRING", 1136 exp.DataType.Type.VARBINARY: "BYTES", 1137 exp.DataType.Type.VARCHAR: "STRING", 1138 exp.DataType.Type.VARIANT: "ANY TYPE", 1139 } 1140 1141 PROPERTIES_LOCATION = { 1142 **generator.Generator.PROPERTIES_LOCATION, 1143 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1144 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1145 } 1146 1147 # WINDOW comes after QUALIFY 1148 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1149 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1150 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1151 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1152 } 1153 1154 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1155 RESERVED_KEYWORDS = { 1156 "all", 1157 "and", 1158 "any", 1159 "array", 1160 "as", 1161 "asc", 1162 "assert_rows_modified", 1163 "at", 1164 "between", 1165 "by", 1166 "case", 1167 "cast", 1168 "collate", 1169 "contains", 1170 "create", 1171 "cross", 1172 "cube", 1173 "current", 1174 "default", 1175 "define", 1176 "desc", 1177 "distinct", 1178 "else", 1179 "end", 1180 "enum", 1181 "escape", 1182 "except", 1183 "exclude", 1184 "exists", 1185 "extract", 1186 "false", 1187 "fetch", 1188 "following", 1189 "for", 1190 "from", 1191 "full", 1192 "group", 1193 "grouping", 1194 "groups", 1195 "hash", 1196 "having", 1197 "if", 1198 "ignore", 1199 "in", 1200 "inner", 1201 "intersect", 1202 "interval", 1203 "into", 1204 "is", 1205 "join", 1206 "lateral", 1207 "left", 1208 "like", 1209 "limit", 1210 "lookup", 1211 "merge", 1212 "natural", 1213 "new", 1214 "no", 1215 "not", 1216 "null", 1217 "nulls", 1218 "of", 1219 "on", 1220 "or", 1221 "order", 1222 "outer", 1223 "over", 1224 "partition", 1225 "preceding", 1226 "proto", 1227 "qualify", 1228 "range", 1229 "recursive", 1230 "respect", 1231 "right", 1232 "rollup", 1233 "rows", 1234 "select", 1235 "set", 1236 "some", 1237 "struct", 1238 "tablesample", 1239 "then", 1240 "to", 1241 "treat", 1242 "true", 1243 "unbounded", 1244 "union", 1245 "unnest", 1246 "using", 1247 "when", 1248 "where", 1249 "window", 1250 "with", 1251 "within", 1252 } 1253 1254 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 1255 unit = expression.unit 1256 unit_sql = unit.name if unit.is_string else self.sql(unit) 1257 return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone")) 1258 1259 def mod_sql(self, expression: exp.Mod) -> str: 1260 this = expression.this 1261 expr = expression.expression 1262 return self.func( 1263 "MOD", 1264 this.unnest() if isinstance(this, exp.Paren) else this, 1265 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1266 ) 1267 1268 def column_parts(self, expression: exp.Column) -> str: 1269 if expression.meta.get("quoted_column"): 1270 # If a column reference is of the form `dataset.table`.name, we need 1271 # to preserve the quoted table path, otherwise the reference breaks 1272 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1273 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1274 return f"{table_path}.{self.sql(expression, 'this')}" 1275 1276 return super().column_parts(expression) 1277 1278 def table_parts(self, expression: exp.Table) -> str: 1279 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1280 # we need to make sure the correct quoting is used in each case. 1281 # 1282 # For example, if there is a CTE x that clashes with a schema name, then the former will 1283 # return the table y in that schema, whereas the latter will return the CTE's y column: 1284 # 1285 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1286 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1287 if expression.meta.get("quoted_table"): 1288 table_parts = ".".join(p.name for p in expression.parts) 1289 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1290 1291 return super().table_parts(expression) 1292 1293 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1294 this = expression.this 1295 if isinstance(this, exp.TsOrDsToDatetime): 1296 func_name = "FORMAT_DATETIME" 1297 elif isinstance(this, exp.TsOrDsToTimestamp): 1298 func_name = "FORMAT_TIMESTAMP" 1299 else: 1300 func_name = "FORMAT_DATE" 1301 1302 time_expr = ( 1303 this 1304 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1305 else expression 1306 ) 1307 return self.func( 1308 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1309 ) 1310 1311 def eq_sql(self, expression: exp.EQ) -> str: 1312 # Operands of = cannot be NULL in BigQuery 1313 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1314 if not isinstance(expression.parent, exp.Update): 1315 return "NULL" 1316 1317 return self.binary(expression, "=") 1318 1319 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1320 parent = expression.parent 1321 1322 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1323 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1324 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1325 return self.func( 1326 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1327 ) 1328 1329 return super().attimezone_sql(expression) 1330 1331 def trycast_sql(self, expression: exp.TryCast) -> str: 1332 return self.cast_sql(expression, safe_prefix="SAFE_") 1333 1334 def bracket_sql(self, expression: exp.Bracket) -> str: 1335 this = expression.this 1336 expressions = expression.expressions 1337 1338 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1339 arg = expressions[0] 1340 if arg.type is None: 1341 from sqlglot.optimizer.annotate_types import annotate_types 1342 1343 arg = annotate_types(arg, dialect=self.dialect) 1344 1345 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1346 # BQ doesn't support bracket syntax with string values for structs 1347 return f"{self.sql(this)}.{arg.name}" 1348 1349 expressions_sql = self.expressions(expression, flat=True) 1350 offset = expression.args.get("offset") 1351 1352 if offset == 0: 1353 expressions_sql = f"OFFSET({expressions_sql})" 1354 elif offset == 1: 1355 expressions_sql = f"ORDINAL({expressions_sql})" 1356 elif offset is not None: 1357 self.unsupported(f"Unsupported array offset: {offset}") 1358 1359 if expression.args.get("safe"): 1360 expressions_sql = f"SAFE_{expressions_sql}" 1361 1362 return f"{self.sql(this)}[{expressions_sql}]" 1363 1364 def in_unnest_op(self, expression: exp.Unnest) -> str: 1365 return self.sql(expression) 1366 1367 def version_sql(self, expression: exp.Version) -> str: 1368 if expression.name == "TIMESTAMP": 1369 expression.set("this", "SYSTEM_TIME") 1370 return super().version_sql(expression) 1371 1372 def contains_sql(self, expression: exp.Contains) -> str: 1373 this = expression.this 1374 expr = expression.expression 1375 1376 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1377 this = this.this 1378 expr = expr.this 1379 1380 return self.func("CONTAINS_SUBSTR", this, expr) 1381 1382 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1383 this = expression.this 1384 1385 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1386 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1387 # because they aren't literals and so the above syntax is invalid BigQuery. 1388 if isinstance(this, exp.Array): 1389 elem = seq_get(this.expressions, 0) 1390 if not (elem and elem.find(exp.Query)): 1391 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1392 1393 return super().cast_sql(expression, safe_prefix=safe_prefix) 1394 1395 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1396 variables = self.expressions(expression, "this") 1397 default = self.sql(expression, "default") 1398 default = f" DEFAULT {default}" if default else "" 1399 kind = self.sql(expression, "kind") 1400 kind = f" {kind}" if kind else "" 1401 1402 return f"{variables}{kind}{default}"
First day of the week in DATE_TRUNC(week). Defaults to 0 (Monday). -1 would be Sunday.
Whether the base comes first in the LOG function.
Possible values: True, False, None (two arguments are not supported by LOG)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Whether the name of the function should be preserved inside the node's metadata, can be useful for roundtripping deprecated vs new functions that share an AST node e.g JSON_VALUE vs JSON_EXTRACT_SCALAR in BigQuery
Whether hex strings such as x'CC' evaluate to integer or binary/blob type
Specifies the strategy according to which identifiers should be normalized.
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Associates this dialect's time formats with their equivalent Python strftime formats.
Helper which is used for parsing the special syntax CAST(x AS DATE FORMAT 'yyyy').
If empty, the corresponding trie will be constructed off of TIME_MAPPING.
Columns that are auto-generated by the engine corresponding to this dialect.
For example, such columns may be excluded from SELECT * queries.
Whether a set operation uses DISTINCT by default. This is None when either DISTINCT or ALL
must be explicitly specified.
506 def normalize_identifier(self, expression: E) -> E: 507 if ( 508 isinstance(expression, exp.Identifier) 509 and self.normalization_strategy is NormalizationStrategy.CASE_INSENSITIVE 510 ): 511 parent = expression.parent 512 while isinstance(parent, exp.Dot): 513 parent = parent.parent 514 515 # In BigQuery, CTEs are case-insensitive, but UDF and table names are case-sensitive 516 # by default. The following check uses a heuristic to detect tables based on whether 517 # they are qualified. This should generally be correct, because tables in BigQuery 518 # must be qualified with at least a dataset, unless @@dataset_id is set. 519 case_sensitive = ( 520 isinstance(parent, exp.UserDefinedFunction) 521 or ( 522 isinstance(parent, exp.Table) 523 and parent.db 524 and (parent.meta.get("quoted_table") or not parent.meta.get("maybe_column")) 525 ) 526 or expression.meta.get("is_table") 527 ) 528 if not case_sensitive: 529 expression.set("this", expression.this.lower()) 530 531 return t.cast(E, expression) 532 533 return super().normalize_identifier(expression)
Transforms an identifier in a way that resembles how it'd be resolved by this dialect.
For example, an identifier like FoO would be resolved as foo in Postgres, because it
lowercases all unquoted identifiers. On the other hand, Snowflake uppercases them, so
it would resolve it as FOO. If it was quoted, it'd need to be treated as case-sensitive,
and so any normalization would be prohibited in order to avoid "breaking" the identifier.
There are also dialects like Spark, which are case-insensitive even when quotes are present, and dialects like MySQL, whose resolution rules match those employed by the underlying operating system, for example they may always be case-sensitive in Linux.
Finally, the normalization behavior of some engines can even be controlled through flags, like in Redshift's case, where users can explicitly set enable_case_sensitive_identifier.
SQLGlot aims to understand and handle all of these different behaviors gracefully, so that it can analyze queries in the optimizer and successfully capture their semantics.
Mapping of an escaped sequence (\n) to its unescaped version (
).
535 class Tokenizer(tokens.Tokenizer): 536 QUOTES = ["'", '"', '"""', "'''"] 537 COMMENTS = ["--", "#", ("/*", "*/")] 538 IDENTIFIERS = ["`"] 539 STRING_ESCAPES = ["\\"] 540 541 HEX_STRINGS = [("0x", ""), ("0X", "")] 542 543 BYTE_STRINGS = [ 544 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("b", "B") 545 ] 546 547 RAW_STRINGS = [ 548 (prefix + q, q) for q in t.cast(t.List[str], QUOTES) for prefix in ("r", "R") 549 ] 550 551 NESTED_COMMENTS = False 552 553 KEYWORDS = { 554 **tokens.Tokenizer.KEYWORDS, 555 "ANY TYPE": TokenType.VARIANT, 556 "BEGIN": TokenType.COMMAND, 557 "BEGIN TRANSACTION": TokenType.BEGIN, 558 "BYTEINT": TokenType.INT, 559 "BYTES": TokenType.BINARY, 560 "CURRENT_DATETIME": TokenType.CURRENT_DATETIME, 561 "DATETIME": TokenType.TIMESTAMP, 562 "DECLARE": TokenType.DECLARE, 563 "ELSEIF": TokenType.COMMAND, 564 "EXCEPTION": TokenType.COMMAND, 565 "EXPORT": TokenType.EXPORT, 566 "FLOAT64": TokenType.DOUBLE, 567 "FOR SYSTEM_TIME": TokenType.TIMESTAMP_SNAPSHOT, 568 "MODEL": TokenType.MODEL, 569 "NOT DETERMINISTIC": TokenType.VOLATILE, 570 "RECORD": TokenType.STRUCT, 571 "TIMESTAMP": TokenType.TIMESTAMPTZ, 572 } 573 KEYWORDS.pop("DIV") 574 KEYWORDS.pop("VALUES") 575 KEYWORDS.pop("/*+")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- SINGLE_TOKENS
- BIT_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- VAR_SINGLE_TOKENS
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
577 class Parser(parser.Parser): 578 PREFIXED_PIVOT_COLUMNS = True 579 LOG_DEFAULTS_TO_LN = True 580 SUPPORTS_IMPLICIT_UNNEST = True 581 JOINS_HAVE_EQUAL_PRECEDENCE = True 582 583 # BigQuery does not allow ASC/DESC to be used as an identifier 584 ID_VAR_TOKENS = parser.Parser.ID_VAR_TOKENS - {TokenType.ASC, TokenType.DESC} 585 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 586 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 587 COMMENT_TABLE_ALIAS_TOKENS = parser.Parser.COMMENT_TABLE_ALIAS_TOKENS - { 588 TokenType.ASC, 589 TokenType.DESC, 590 } 591 UPDATE_ALIAS_TOKENS = parser.Parser.UPDATE_ALIAS_TOKENS - {TokenType.ASC, TokenType.DESC} 592 593 FUNCTIONS = { 594 **parser.Parser.FUNCTIONS, 595 "CONTAINS_SUBSTR": _build_contains_substring, 596 "DATE": _build_date, 597 "DATE_ADD": build_date_delta_with_interval(exp.DateAdd), 598 "DATE_SUB": build_date_delta_with_interval(exp.DateSub), 599 "DATE_TRUNC": lambda args: exp.DateTrunc( 600 unit=seq_get(args, 1), 601 this=seq_get(args, 0), 602 zone=seq_get(args, 2), 603 ), 604 "DATETIME": _build_datetime, 605 "DATETIME_ADD": build_date_delta_with_interval(exp.DatetimeAdd), 606 "DATETIME_SUB": build_date_delta_with_interval(exp.DatetimeSub), 607 "DIV": binary_from_function(exp.IntDiv), 608 "EDIT_DISTANCE": _build_levenshtein, 609 "FORMAT_DATE": _build_format_time(exp.TsOrDsToDate), 610 "GENERATE_ARRAY": exp.GenerateSeries.from_arg_list, 611 "JSON_EXTRACT_SCALAR": _build_extract_json_with_default_path(exp.JSONExtractScalar), 612 "JSON_EXTRACT_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 613 "JSON_QUERY": parser.build_extract_json_with_path(exp.JSONExtract), 614 "JSON_QUERY_ARRAY": _build_extract_json_with_default_path(exp.JSONExtractArray), 615 "JSON_VALUE": _build_extract_json_with_default_path(exp.JSONExtractScalar), 616 "JSON_VALUE_ARRAY": _build_extract_json_with_default_path(exp.JSONValueArray), 617 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 618 "MD5": exp.MD5Digest.from_arg_list, 619 "TO_HEX": _build_to_hex, 620 "PARSE_DATE": lambda args: build_formatted_time(exp.StrToDate, "bigquery")( 621 [seq_get(args, 1), seq_get(args, 0)] 622 ), 623 "PARSE_TIMESTAMP": _build_parse_timestamp, 624 "REGEXP_CONTAINS": exp.RegexpLike.from_arg_list, 625 "REGEXP_EXTRACT": _build_regexp_extract(exp.RegexpExtract), 626 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 627 "REGEXP_EXTRACT_ALL": _build_regexp_extract( 628 exp.RegexpExtractAll, default_group=exp.Literal.number(0) 629 ), 630 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 631 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 632 "SPLIT": lambda args: exp.Split( 633 # https://cloud.google.com/bigquery/docs/reference/standard-sql/string_functions#split 634 this=seq_get(args, 0), 635 expression=seq_get(args, 1) or exp.Literal.string(","), 636 ), 637 "STRPOS": exp.StrPosition.from_arg_list, 638 "TIME": _build_time, 639 "TIME_ADD": build_date_delta_with_interval(exp.TimeAdd), 640 "TIME_SUB": build_date_delta_with_interval(exp.TimeSub), 641 "TIMESTAMP": _build_timestamp, 642 "TIMESTAMP_ADD": build_date_delta_with_interval(exp.TimestampAdd), 643 "TIMESTAMP_SUB": build_date_delta_with_interval(exp.TimestampSub), 644 "TIMESTAMP_MICROS": lambda args: exp.UnixToTime( 645 this=seq_get(args, 0), scale=exp.UnixToTime.MICROS 646 ), 647 "TIMESTAMP_MILLIS": lambda args: exp.UnixToTime( 648 this=seq_get(args, 0), scale=exp.UnixToTime.MILLIS 649 ), 650 "TIMESTAMP_SECONDS": lambda args: exp.UnixToTime(this=seq_get(args, 0)), 651 "TO_JSON_STRING": exp.JSONFormat.from_arg_list, 652 "FORMAT_DATETIME": _build_format_time(exp.TsOrDsToDatetime), 653 "FORMAT_TIMESTAMP": _build_format_time(exp.TsOrDsToTimestamp), 654 } 655 656 FUNCTION_PARSERS = { 657 **parser.Parser.FUNCTION_PARSERS, 658 "ARRAY": lambda self: self.expression(exp.Array, expressions=[self._parse_statement()]), 659 "JSON_ARRAY": lambda self: self.expression( 660 exp.JSONArray, expressions=self._parse_csv(self._parse_bitwise) 661 ), 662 "MAKE_INTERVAL": lambda self: self._parse_make_interval(), 663 "FEATURES_AT_TIME": lambda self: self._parse_features_at_time(), 664 } 665 FUNCTION_PARSERS.pop("TRIM") 666 667 NO_PAREN_FUNCTIONS = { 668 **parser.Parser.NO_PAREN_FUNCTIONS, 669 TokenType.CURRENT_DATETIME: exp.CurrentDatetime, 670 } 671 672 NESTED_TYPE_TOKENS = { 673 *parser.Parser.NESTED_TYPE_TOKENS, 674 TokenType.TABLE, 675 } 676 677 PROPERTY_PARSERS = { 678 **parser.Parser.PROPERTY_PARSERS, 679 "NOT DETERMINISTIC": lambda self: self.expression( 680 exp.StabilityProperty, this=exp.Literal.string("VOLATILE") 681 ), 682 "OPTIONS": lambda self: self._parse_with_property(), 683 } 684 685 CONSTRAINT_PARSERS = { 686 **parser.Parser.CONSTRAINT_PARSERS, 687 "OPTIONS": lambda self: exp.Properties(expressions=self._parse_with_property()), 688 } 689 690 RANGE_PARSERS = parser.Parser.RANGE_PARSERS.copy() 691 RANGE_PARSERS.pop(TokenType.OVERLAPS) 692 693 NULL_TOKENS = {TokenType.NULL, TokenType.UNKNOWN} 694 695 DASHED_TABLE_PART_FOLLOW_TOKENS = {TokenType.DOT, TokenType.L_PAREN, TokenType.R_PAREN} 696 697 STATEMENT_PARSERS = { 698 **parser.Parser.STATEMENT_PARSERS, 699 TokenType.ELSE: lambda self: self._parse_as_command(self._prev), 700 TokenType.END: lambda self: self._parse_as_command(self._prev), 701 TokenType.FOR: lambda self: self._parse_for_in(), 702 TokenType.EXPORT: lambda self: self._parse_export_data(), 703 TokenType.DECLARE: lambda self: self._parse_declare(), 704 } 705 706 BRACKET_OFFSETS = { 707 "OFFSET": (0, False), 708 "ORDINAL": (1, False), 709 "SAFE_OFFSET": (0, True), 710 "SAFE_ORDINAL": (1, True), 711 } 712 713 def _parse_for_in(self) -> exp.ForIn: 714 this = self._parse_range() 715 self._match_text_seq("DO") 716 return self.expression(exp.ForIn, this=this, expression=self._parse_statement()) 717 718 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 719 this = super()._parse_table_part(schema=schema) or self._parse_number() 720 721 # https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#table_names 722 if isinstance(this, exp.Identifier): 723 table_name = this.name 724 while self._match(TokenType.DASH, advance=False) and self._next: 725 start = self._curr 726 while self._is_connected() and not self._match_set( 727 self.DASHED_TABLE_PART_FOLLOW_TOKENS, advance=False 728 ): 729 self._advance() 730 731 if start == self._curr: 732 break 733 734 table_name += self._find_sql(start, self._prev) 735 736 this = exp.Identifier( 737 this=table_name, quoted=this.args.get("quoted") 738 ).update_positions(this) 739 elif isinstance(this, exp.Literal): 740 table_name = this.name 741 742 if self._is_connected() and self._parse_var(any_token=True): 743 table_name += self._prev.text 744 745 this = exp.Identifier(this=table_name, quoted=True).update_positions(this) 746 747 return this 748 749 def _parse_table_parts( 750 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 751 ) -> exp.Table: 752 table = super()._parse_table_parts( 753 schema=schema, is_db_reference=is_db_reference, wildcard=True 754 ) 755 756 # proj-1.db.tbl -- `1.` is tokenized as a float so we need to unravel it here 757 if not table.catalog: 758 if table.db: 759 previous_db = table.args["db"] 760 parts = table.db.split(".") 761 if len(parts) == 2 and not table.args["db"].quoted: 762 table.set( 763 "catalog", exp.Identifier(this=parts[0]).update_positions(previous_db) 764 ) 765 table.set("db", exp.Identifier(this=parts[1]).update_positions(previous_db)) 766 else: 767 previous_this = table.this 768 parts = table.name.split(".") 769 if len(parts) == 2 and not table.this.quoted: 770 table.set( 771 "db", exp.Identifier(this=parts[0]).update_positions(previous_this) 772 ) 773 table.set( 774 "this", exp.Identifier(this=parts[1]).update_positions(previous_this) 775 ) 776 777 if isinstance(table.this, exp.Identifier) and any("." in p.name for p in table.parts): 778 alias = table.this 779 catalog, db, this, *rest = ( 780 exp.to_identifier(p, quoted=True) 781 for p in split_num_words(".".join(p.name for p in table.parts), ".", 3) 782 ) 783 784 for part in (catalog, db, this): 785 if part: 786 part.update_positions(table.this) 787 788 if rest and this: 789 this = exp.Dot.build([this, *rest]) # type: ignore 790 791 table = exp.Table( 792 this=this, db=db, catalog=catalog, pivots=table.args.get("pivots") 793 ) 794 table.meta["quoted_table"] = True 795 else: 796 alias = None 797 798 # The `INFORMATION_SCHEMA` views in BigQuery need to be qualified by a region or 799 # dataset, so if the project identifier is omitted we need to fix the ast so that 800 # the `INFORMATION_SCHEMA.X` bit is represented as a single (quoted) Identifier. 801 # Otherwise, we wouldn't correctly qualify a `Table` node that references these 802 # views, because it would seem like the "catalog" part is set, when it'd actually 803 # be the region/dataset. Merging the two identifiers into a single one is done to 804 # avoid producing a 4-part Table reference, which would cause issues in the schema 805 # module, when there are 3-part table names mixed with information schema views. 806 # 807 # See: https://cloud.google.com/bigquery/docs/information-schema-intro#syntax 808 table_parts = table.parts 809 if len(table_parts) > 1 and table_parts[-2].name.upper() == "INFORMATION_SCHEMA": 810 # We need to alias the table here to avoid breaking existing qualified columns. 811 # This is expected to be safe, because if there's an actual alias coming up in 812 # the token stream, it will overwrite this one. If there isn't one, we are only 813 # exposing the name that can be used to reference the view explicitly (a no-op). 814 exp.alias_( 815 table, 816 t.cast(exp.Identifier, alias or table_parts[-1]), 817 table=True, 818 copy=False, 819 ) 820 821 info_schema_view = f"{table_parts[-2].name}.{table_parts[-1].name}" 822 new_this = exp.Identifier(this=info_schema_view, quoted=True).update_positions( 823 line=table_parts[-2].meta.get("line"), 824 col=table_parts[-1].meta.get("col"), 825 start=table_parts[-2].meta.get("start"), 826 end=table_parts[-1].meta.get("end"), 827 ) 828 table.set("this", new_this) 829 table.set("db", seq_get(table_parts, -3)) 830 table.set("catalog", seq_get(table_parts, -4)) 831 832 return table 833 834 def _parse_column(self) -> t.Optional[exp.Expression]: 835 column = super()._parse_column() 836 if isinstance(column, exp.Column): 837 parts = column.parts 838 if any("." in p.name for p in parts): 839 catalog, db, table, this, *rest = ( 840 exp.to_identifier(p, quoted=True) 841 for p in split_num_words(".".join(p.name for p in parts), ".", 4) 842 ) 843 844 if rest and this: 845 this = exp.Dot.build([this, *rest]) # type: ignore 846 847 column = exp.Column(this=this, table=table, db=db, catalog=catalog) 848 column.meta["quoted_column"] = True 849 850 return column 851 852 @t.overload 853 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 854 855 @t.overload 856 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 857 858 def _parse_json_object(self, agg=False): 859 json_object = super()._parse_json_object() 860 array_kv_pair = seq_get(json_object.expressions, 0) 861 862 # Converts BQ's "signature 2" of JSON_OBJECT into SQLGlot's canonical representation 863 # https://cloud.google.com/bigquery/docs/reference/standard-sql/json_functions#json_object_signature2 864 if ( 865 array_kv_pair 866 and isinstance(array_kv_pair.this, exp.Array) 867 and isinstance(array_kv_pair.expression, exp.Array) 868 ): 869 keys = array_kv_pair.this.expressions 870 values = array_kv_pair.expression.expressions 871 872 json_object.set( 873 "expressions", 874 [exp.JSONKeyValue(this=k, expression=v) for k, v in zip(keys, values)], 875 ) 876 877 return json_object 878 879 def _parse_bracket( 880 self, this: t.Optional[exp.Expression] = None 881 ) -> t.Optional[exp.Expression]: 882 bracket = super()._parse_bracket(this) 883 884 if this is bracket: 885 return bracket 886 887 if isinstance(bracket, exp.Bracket): 888 for expression in bracket.expressions: 889 name = expression.name.upper() 890 891 if name not in self.BRACKET_OFFSETS: 892 break 893 894 offset, safe = self.BRACKET_OFFSETS[name] 895 bracket.set("offset", offset) 896 bracket.set("safe", safe) 897 expression.replace(expression.expressions[0]) 898 899 return bracket 900 901 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 902 unnest = super()._parse_unnest(with_alias=with_alias) 903 904 if not unnest: 905 return None 906 907 unnest_expr = seq_get(unnest.expressions, 0) 908 if unnest_expr: 909 from sqlglot.optimizer.annotate_types import annotate_types 910 911 unnest_expr = annotate_types(unnest_expr, dialect=self.dialect) 912 913 # Unnesting a nested array (i.e array of structs) explodes the top-level struct fields, 914 # in contrast to other dialects such as DuckDB which flattens only the array by default 915 if unnest_expr.is_type(exp.DataType.Type.ARRAY) and any( 916 array_elem.is_type(exp.DataType.Type.STRUCT) 917 for array_elem in unnest_expr._type.expressions 918 ): 919 unnest.set("explode_array", True) 920 921 return unnest 922 923 def _parse_make_interval(self) -> exp.MakeInterval: 924 expr = exp.MakeInterval() 925 926 for arg_key in expr.arg_types: 927 value = self._parse_lambda() 928 929 if not value: 930 break 931 932 # Non-named arguments are filled sequentially, (optionally) followed by named arguments 933 # that can appear in any order e.g MAKE_INTERVAL(1, minute => 5, day => 2) 934 if isinstance(value, exp.Kwarg): 935 arg_key = value.this.name 936 937 expr.set(arg_key, value) 938 939 self._match(TokenType.COMMA) 940 941 return expr 942 943 def _parse_features_at_time(self) -> exp.FeaturesAtTime: 944 expr = self.expression( 945 exp.FeaturesAtTime, 946 this=(self._match(TokenType.TABLE) and self._parse_table()) 947 or self._parse_select(nested=True), 948 ) 949 950 while self._match(TokenType.COMMA): 951 arg = self._parse_lambda() 952 953 # Get the LHS of the Kwarg and set the arg to that value, e.g 954 # "num_rows => 1" sets the expr's `num_rows` arg 955 if arg: 956 expr.set(arg.this.name, arg) 957 958 return expr 959 960 def _parse_export_data(self) -> exp.Export: 961 self._match_text_seq("DATA") 962 963 return self.expression( 964 exp.Export, 965 connection=self._match_text_seq("WITH", "CONNECTION") and self._parse_table_parts(), 966 options=self._parse_properties(), 967 this=self._match_text_seq("AS") and self._parse_select(), 968 )
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- COLON_PLACEHOLDER_TOKENS
- ARRAY_CONSTRUCTORS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- LAMBDAS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- IDENTIFY_PIVOT_STRINGS
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- JSON_EXTRACT_REQUIRES_JSON_EXPRESSION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
970 class Generator(generator.Generator): 971 INTERVAL_ALLOWS_PLURAL_FORM = False 972 JOIN_HINTS = False 973 QUERY_HINTS = False 974 TABLE_HINTS = False 975 LIMIT_FETCH = "LIMIT" 976 RENAME_TABLE_WITH_DB = False 977 NVL2_SUPPORTED = False 978 UNNEST_WITH_ORDINALITY = False 979 COLLATE_IS_FUNC = True 980 LIMIT_ONLY_LITERALS = True 981 SUPPORTS_TABLE_ALIAS_COLUMNS = False 982 UNPIVOT_ALIASES_ARE_IDENTIFIERS = False 983 JSON_KEY_VALUE_PAIR_SEP = "," 984 NULL_ORDERING_SUPPORTED = False 985 IGNORE_NULLS_IN_FUNC = True 986 JSON_PATH_SINGLE_QUOTE_ESCAPE = True 987 CAN_IMPLEMENT_ARRAY_ANY = True 988 SUPPORTS_TO_NUMBER = False 989 NAMED_PLACEHOLDER_TOKEN = "@" 990 HEX_FUNC = "TO_HEX" 991 WITH_PROPERTIES_PREFIX = "OPTIONS" 992 SUPPORTS_EXPLODING_PROJECTIONS = False 993 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 994 SUPPORTS_UNIX_SECONDS = True 995 996 TRANSFORMS = { 997 **generator.Generator.TRANSFORMS, 998 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 999 exp.ArgMax: arg_max_or_min_no_count("MAX_BY"), 1000 exp.ArgMin: arg_max_or_min_no_count("MIN_BY"), 1001 exp.Array: inline_array_unless_query, 1002 exp.ArrayContains: _array_contains_sql, 1003 exp.ArrayFilter: filter_array_using_unnest, 1004 exp.ArrayRemove: filter_array_using_unnest, 1005 exp.Cast: transforms.preprocess([transforms.remove_precision_parameterized_types]), 1006 exp.CollateProperty: lambda self, e: ( 1007 f"DEFAULT COLLATE {self.sql(e, 'this')}" 1008 if e.args.get("default") 1009 else f"COLLATE {self.sql(e, 'this')}" 1010 ), 1011 exp.Commit: lambda *_: "COMMIT TRANSACTION", 1012 exp.CountIf: rename_func("COUNTIF"), 1013 exp.Create: _create_sql, 1014 exp.CTE: transforms.preprocess([_pushdown_cte_column_names]), 1015 exp.DateAdd: date_add_interval_sql("DATE", "ADD"), 1016 exp.DateDiff: lambda self, e: self.func( 1017 "DATE_DIFF", e.this, e.expression, unit_to_var(e) 1018 ), 1019 exp.DateFromParts: rename_func("DATE"), 1020 exp.DateStrToDate: datestrtodate_sql, 1021 exp.DateSub: date_add_interval_sql("DATE", "SUB"), 1022 exp.DatetimeAdd: date_add_interval_sql("DATETIME", "ADD"), 1023 exp.DatetimeSub: date_add_interval_sql("DATETIME", "SUB"), 1024 exp.FromTimeZone: lambda self, e: self.func( 1025 "DATETIME", self.func("TIMESTAMP", e.this, e.args.get("zone")), "'UTC'" 1026 ), 1027 exp.GenerateSeries: rename_func("GENERATE_ARRAY"), 1028 exp.GroupConcat: lambda self, e: groupconcat_sql( 1029 self, e, func_name="STRING_AGG", within_group=False 1030 ), 1031 exp.Hex: lambda self, e: self.func("UPPER", self.func("TO_HEX", self.sql(e, "this"))), 1032 exp.HexString: lambda self, e: self.hexstring_sql(e, binary_function_repr="FROM_HEX"), 1033 exp.If: if_sql(false_value="NULL"), 1034 exp.ILike: no_ilike_sql, 1035 exp.IntDiv: rename_func("DIV"), 1036 exp.Int64: rename_func("INT64"), 1037 exp.JSONExtract: _json_extract_sql, 1038 exp.JSONExtractArray: _json_extract_sql, 1039 exp.JSONExtractScalar: _json_extract_sql, 1040 exp.JSONFormat: rename_func("TO_JSON_STRING"), 1041 exp.Levenshtein: _levenshtein_sql, 1042 exp.Max: max_or_greatest, 1043 exp.MD5: lambda self, e: self.func("TO_HEX", self.func("MD5", e.this)), 1044 exp.MD5Digest: rename_func("MD5"), 1045 exp.Min: min_or_least, 1046 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1047 exp.RegexpExtract: lambda self, e: self.func( 1048 "REGEXP_EXTRACT", 1049 e.this, 1050 e.expression, 1051 e.args.get("position"), 1052 e.args.get("occurrence"), 1053 ), 1054 exp.RegexpExtractAll: lambda self, e: self.func( 1055 "REGEXP_EXTRACT_ALL", e.this, e.expression 1056 ), 1057 exp.RegexpReplace: regexp_replace_sql, 1058 exp.RegexpLike: rename_func("REGEXP_CONTAINS"), 1059 exp.ReturnsProperty: _returnsproperty_sql, 1060 exp.Rollback: lambda *_: "ROLLBACK TRANSACTION", 1061 exp.Select: transforms.preprocess( 1062 [ 1063 transforms.explode_projection_to_unnest(), 1064 transforms.unqualify_unnest, 1065 transforms.eliminate_distinct_on, 1066 _alias_ordered_group, 1067 transforms.eliminate_semi_and_anti_joins, 1068 ] 1069 ), 1070 exp.SHA: rename_func("SHA1"), 1071 exp.SHA2: sha256_sql, 1072 exp.Space: space_sql, 1073 exp.StabilityProperty: lambda self, e: ( 1074 "DETERMINISTIC" if e.name == "IMMUTABLE" else "NOT DETERMINISTIC" 1075 ), 1076 exp.String: rename_func("STRING"), 1077 exp.StrPosition: lambda self, e: ( 1078 strposition_sql( 1079 self, e, func_name="INSTR", supports_position=True, supports_occurrence=True 1080 ) 1081 ), 1082 exp.StrToDate: _str_to_datetime_sql, 1083 exp.StrToTime: _str_to_datetime_sql, 1084 exp.TimeAdd: date_add_interval_sql("TIME", "ADD"), 1085 exp.TimeFromParts: rename_func("TIME"), 1086 exp.TimestampFromParts: rename_func("DATETIME"), 1087 exp.TimeSub: date_add_interval_sql("TIME", "SUB"), 1088 exp.TimestampAdd: date_add_interval_sql("TIMESTAMP", "ADD"), 1089 exp.TimestampDiff: rename_func("TIMESTAMP_DIFF"), 1090 exp.TimestampSub: date_add_interval_sql("TIMESTAMP", "SUB"), 1091 exp.TimeStrToTime: timestrtotime_sql, 1092 exp.Transaction: lambda *_: "BEGIN TRANSACTION", 1093 exp.TsOrDsAdd: _ts_or_ds_add_sql, 1094 exp.TsOrDsDiff: _ts_or_ds_diff_sql, 1095 exp.TsOrDsToTime: rename_func("TIME"), 1096 exp.TsOrDsToDatetime: rename_func("DATETIME"), 1097 exp.TsOrDsToTimestamp: rename_func("TIMESTAMP"), 1098 exp.Unhex: rename_func("FROM_HEX"), 1099 exp.UnixDate: rename_func("UNIX_DATE"), 1100 exp.UnixToTime: _unix_to_time_sql, 1101 exp.Uuid: lambda *_: "GENERATE_UUID()", 1102 exp.Values: _derived_table_values_to_unnest, 1103 exp.VariancePop: rename_func("VAR_POP"), 1104 exp.SafeDivide: rename_func("SAFE_DIVIDE"), 1105 } 1106 1107 SUPPORTED_JSON_PATH_PARTS = { 1108 exp.JSONPathKey, 1109 exp.JSONPathRoot, 1110 exp.JSONPathSubscript, 1111 } 1112 1113 TYPE_MAPPING = { 1114 **generator.Generator.TYPE_MAPPING, 1115 exp.DataType.Type.BIGDECIMAL: "BIGNUMERIC", 1116 exp.DataType.Type.BIGINT: "INT64", 1117 exp.DataType.Type.BINARY: "BYTES", 1118 exp.DataType.Type.BLOB: "BYTES", 1119 exp.DataType.Type.BOOLEAN: "BOOL", 1120 exp.DataType.Type.CHAR: "STRING", 1121 exp.DataType.Type.DECIMAL: "NUMERIC", 1122 exp.DataType.Type.DOUBLE: "FLOAT64", 1123 exp.DataType.Type.FLOAT: "FLOAT64", 1124 exp.DataType.Type.INT: "INT64", 1125 exp.DataType.Type.NCHAR: "STRING", 1126 exp.DataType.Type.NVARCHAR: "STRING", 1127 exp.DataType.Type.SMALLINT: "INT64", 1128 exp.DataType.Type.TEXT: "STRING", 1129 exp.DataType.Type.TIMESTAMP: "DATETIME", 1130 exp.DataType.Type.TIMESTAMPNTZ: "DATETIME", 1131 exp.DataType.Type.TIMESTAMPTZ: "TIMESTAMP", 1132 exp.DataType.Type.TIMESTAMPLTZ: "TIMESTAMP", 1133 exp.DataType.Type.TINYINT: "INT64", 1134 exp.DataType.Type.ROWVERSION: "BYTES", 1135 exp.DataType.Type.UUID: "STRING", 1136 exp.DataType.Type.VARBINARY: "BYTES", 1137 exp.DataType.Type.VARCHAR: "STRING", 1138 exp.DataType.Type.VARIANT: "ANY TYPE", 1139 } 1140 1141 PROPERTIES_LOCATION = { 1142 **generator.Generator.PROPERTIES_LOCATION, 1143 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1144 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1145 } 1146 1147 # WINDOW comes after QUALIFY 1148 # https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#window_clause 1149 AFTER_HAVING_MODIFIER_TRANSFORMS = { 1150 "qualify": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["qualify"], 1151 "windows": generator.Generator.AFTER_HAVING_MODIFIER_TRANSFORMS["windows"], 1152 } 1153 1154 # from: https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#reserved_keywords 1155 RESERVED_KEYWORDS = { 1156 "all", 1157 "and", 1158 "any", 1159 "array", 1160 "as", 1161 "asc", 1162 "assert_rows_modified", 1163 "at", 1164 "between", 1165 "by", 1166 "case", 1167 "cast", 1168 "collate", 1169 "contains", 1170 "create", 1171 "cross", 1172 "cube", 1173 "current", 1174 "default", 1175 "define", 1176 "desc", 1177 "distinct", 1178 "else", 1179 "end", 1180 "enum", 1181 "escape", 1182 "except", 1183 "exclude", 1184 "exists", 1185 "extract", 1186 "false", 1187 "fetch", 1188 "following", 1189 "for", 1190 "from", 1191 "full", 1192 "group", 1193 "grouping", 1194 "groups", 1195 "hash", 1196 "having", 1197 "if", 1198 "ignore", 1199 "in", 1200 "inner", 1201 "intersect", 1202 "interval", 1203 "into", 1204 "is", 1205 "join", 1206 "lateral", 1207 "left", 1208 "like", 1209 "limit", 1210 "lookup", 1211 "merge", 1212 "natural", 1213 "new", 1214 "no", 1215 "not", 1216 "null", 1217 "nulls", 1218 "of", 1219 "on", 1220 "or", 1221 "order", 1222 "outer", 1223 "over", 1224 "partition", 1225 "preceding", 1226 "proto", 1227 "qualify", 1228 "range", 1229 "recursive", 1230 "respect", 1231 "right", 1232 "rollup", 1233 "rows", 1234 "select", 1235 "set", 1236 "some", 1237 "struct", 1238 "tablesample", 1239 "then", 1240 "to", 1241 "treat", 1242 "true", 1243 "unbounded", 1244 "union", 1245 "unnest", 1246 "using", 1247 "when", 1248 "where", 1249 "window", 1250 "with", 1251 "within", 1252 } 1253 1254 def datetrunc_sql(self, expression: exp.DateTrunc) -> str: 1255 unit = expression.unit 1256 unit_sql = unit.name if unit.is_string else self.sql(unit) 1257 return self.func("DATE_TRUNC", expression.this, unit_sql, expression.args.get("zone")) 1258 1259 def mod_sql(self, expression: exp.Mod) -> str: 1260 this = expression.this 1261 expr = expression.expression 1262 return self.func( 1263 "MOD", 1264 this.unnest() if isinstance(this, exp.Paren) else this, 1265 expr.unnest() if isinstance(expr, exp.Paren) else expr, 1266 ) 1267 1268 def column_parts(self, expression: exp.Column) -> str: 1269 if expression.meta.get("quoted_column"): 1270 # If a column reference is of the form `dataset.table`.name, we need 1271 # to preserve the quoted table path, otherwise the reference breaks 1272 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1273 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1274 return f"{table_path}.{self.sql(expression, 'this')}" 1275 1276 return super().column_parts(expression) 1277 1278 def table_parts(self, expression: exp.Table) -> str: 1279 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1280 # we need to make sure the correct quoting is used in each case. 1281 # 1282 # For example, if there is a CTE x that clashes with a schema name, then the former will 1283 # return the table y in that schema, whereas the latter will return the CTE's y column: 1284 # 1285 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1286 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1287 if expression.meta.get("quoted_table"): 1288 table_parts = ".".join(p.name for p in expression.parts) 1289 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1290 1291 return super().table_parts(expression) 1292 1293 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1294 this = expression.this 1295 if isinstance(this, exp.TsOrDsToDatetime): 1296 func_name = "FORMAT_DATETIME" 1297 elif isinstance(this, exp.TsOrDsToTimestamp): 1298 func_name = "FORMAT_TIMESTAMP" 1299 else: 1300 func_name = "FORMAT_DATE" 1301 1302 time_expr = ( 1303 this 1304 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1305 else expression 1306 ) 1307 return self.func( 1308 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1309 ) 1310 1311 def eq_sql(self, expression: exp.EQ) -> str: 1312 # Operands of = cannot be NULL in BigQuery 1313 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1314 if not isinstance(expression.parent, exp.Update): 1315 return "NULL" 1316 1317 return self.binary(expression, "=") 1318 1319 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1320 parent = expression.parent 1321 1322 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1323 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1324 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1325 return self.func( 1326 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1327 ) 1328 1329 return super().attimezone_sql(expression) 1330 1331 def trycast_sql(self, expression: exp.TryCast) -> str: 1332 return self.cast_sql(expression, safe_prefix="SAFE_") 1333 1334 def bracket_sql(self, expression: exp.Bracket) -> str: 1335 this = expression.this 1336 expressions = expression.expressions 1337 1338 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1339 arg = expressions[0] 1340 if arg.type is None: 1341 from sqlglot.optimizer.annotate_types import annotate_types 1342 1343 arg = annotate_types(arg, dialect=self.dialect) 1344 1345 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1346 # BQ doesn't support bracket syntax with string values for structs 1347 return f"{self.sql(this)}.{arg.name}" 1348 1349 expressions_sql = self.expressions(expression, flat=True) 1350 offset = expression.args.get("offset") 1351 1352 if offset == 0: 1353 expressions_sql = f"OFFSET({expressions_sql})" 1354 elif offset == 1: 1355 expressions_sql = f"ORDINAL({expressions_sql})" 1356 elif offset is not None: 1357 self.unsupported(f"Unsupported array offset: {offset}") 1358 1359 if expression.args.get("safe"): 1360 expressions_sql = f"SAFE_{expressions_sql}" 1361 1362 return f"{self.sql(this)}[{expressions_sql}]" 1363 1364 def in_unnest_op(self, expression: exp.Unnest) -> str: 1365 return self.sql(expression) 1366 1367 def version_sql(self, expression: exp.Version) -> str: 1368 if expression.name == "TIMESTAMP": 1369 expression.set("this", "SYSTEM_TIME") 1370 return super().version_sql(expression) 1371 1372 def contains_sql(self, expression: exp.Contains) -> str: 1373 this = expression.this 1374 expr = expression.expression 1375 1376 if isinstance(this, exp.Lower) and isinstance(expr, exp.Lower): 1377 this = this.this 1378 expr = expr.this 1379 1380 return self.func("CONTAINS_SUBSTR", this, expr) 1381 1382 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1383 this = expression.this 1384 1385 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1386 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1387 # because they aren't literals and so the above syntax is invalid BigQuery. 1388 if isinstance(this, exp.Array): 1389 elem = seq_get(this.expressions, 0) 1390 if not (elem and elem.find(exp.Query)): 1391 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1392 1393 return super().cast_sql(expression, safe_prefix=safe_prefix) 1394 1395 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1396 variables = self.expressions(expression, "this") 1397 default = self.sql(expression, "default") 1398 default = f" DEFAULT {default}" if default else "" 1399 kind = self.sql(expression, "kind") 1400 kind = f" {kind}" if kind else "" 1401 1402 return f"{variables}{kind}{default}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1268 def column_parts(self, expression: exp.Column) -> str: 1269 if expression.meta.get("quoted_column"): 1270 # If a column reference is of the form `dataset.table`.name, we need 1271 # to preserve the quoted table path, otherwise the reference breaks 1272 table_parts = ".".join(p.name for p in expression.parts[:-1]) 1273 table_path = self.sql(exp.Identifier(this=table_parts, quoted=True)) 1274 return f"{table_path}.{self.sql(expression, 'this')}" 1275 1276 return super().column_parts(expression)
1278 def table_parts(self, expression: exp.Table) -> str: 1279 # Depending on the context, `x.y` may not resolve to the same data source as `x`.`y`, so 1280 # we need to make sure the correct quoting is used in each case. 1281 # 1282 # For example, if there is a CTE x that clashes with a schema name, then the former will 1283 # return the table y in that schema, whereas the latter will return the CTE's y column: 1284 # 1285 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x.y` -> cross join 1286 # - WITH x AS (SELECT [1, 2] AS y) SELECT * FROM x, `x`.`y` -> implicit unnest 1287 if expression.meta.get("quoted_table"): 1288 table_parts = ".".join(p.name for p in expression.parts) 1289 return self.sql(exp.Identifier(this=table_parts, quoted=True)) 1290 1291 return super().table_parts(expression)
1293 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1294 this = expression.this 1295 if isinstance(this, exp.TsOrDsToDatetime): 1296 func_name = "FORMAT_DATETIME" 1297 elif isinstance(this, exp.TsOrDsToTimestamp): 1298 func_name = "FORMAT_TIMESTAMP" 1299 else: 1300 func_name = "FORMAT_DATE" 1301 1302 time_expr = ( 1303 this 1304 if isinstance(this, (exp.TsOrDsToDatetime, exp.TsOrDsToTimestamp, exp.TsOrDsToDate)) 1305 else expression 1306 ) 1307 return self.func( 1308 func_name, self.format_time(expression), time_expr.this, expression.args.get("zone") 1309 )
1311 def eq_sql(self, expression: exp.EQ) -> str: 1312 # Operands of = cannot be NULL in BigQuery 1313 if isinstance(expression.left, exp.Null) or isinstance(expression.right, exp.Null): 1314 if not isinstance(expression.parent, exp.Update): 1315 return "NULL" 1316 1317 return self.binary(expression, "=")
1319 def attimezone_sql(self, expression: exp.AtTimeZone) -> str: 1320 parent = expression.parent 1321 1322 # BigQuery allows CAST(.. AS {STRING|TIMESTAMP} [FORMAT <fmt> [AT TIME ZONE <tz>]]). 1323 # Only the TIMESTAMP one should use the below conversion, when AT TIME ZONE is included. 1324 if not isinstance(parent, exp.Cast) or not parent.to.is_type("text"): 1325 return self.func( 1326 "TIMESTAMP", self.func("DATETIME", expression.this, expression.args.get("zone")) 1327 ) 1328 1329 return super().attimezone_sql(expression)
1334 def bracket_sql(self, expression: exp.Bracket) -> str: 1335 this = expression.this 1336 expressions = expression.expressions 1337 1338 if len(expressions) == 1 and this and this.is_type(exp.DataType.Type.STRUCT): 1339 arg = expressions[0] 1340 if arg.type is None: 1341 from sqlglot.optimizer.annotate_types import annotate_types 1342 1343 arg = annotate_types(arg, dialect=self.dialect) 1344 1345 if arg.type and arg.type.this in exp.DataType.TEXT_TYPES: 1346 # BQ doesn't support bracket syntax with string values for structs 1347 return f"{self.sql(this)}.{arg.name}" 1348 1349 expressions_sql = self.expressions(expression, flat=True) 1350 offset = expression.args.get("offset") 1351 1352 if offset == 0: 1353 expressions_sql = f"OFFSET({expressions_sql})" 1354 elif offset == 1: 1355 expressions_sql = f"ORDINAL({expressions_sql})" 1356 elif offset is not None: 1357 self.unsupported(f"Unsupported array offset: {offset}") 1358 1359 if expression.args.get("safe"): 1360 expressions_sql = f"SAFE_{expressions_sql}" 1361 1362 return f"{self.sql(this)}[{expressions_sql}]"
1382 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1383 this = expression.this 1384 1385 # This ensures that inline type-annotated ARRAY literals like ARRAY<INT64>[1, 2, 3] 1386 # are roundtripped unaffected. The inner check excludes ARRAY(SELECT ...) expressions, 1387 # because they aren't literals and so the above syntax is invalid BigQuery. 1388 if isinstance(this, exp.Array): 1389 elem = seq_get(this.expressions, 0) 1390 if not (elem and elem.find(exp.Query)): 1391 return f"{self.sql(expression, 'to')}{self.sql(this)}" 1392 1393 return super().cast_sql(expression, safe_prefix=safe_prefix)
1395 def declareitem_sql(self, expression: exp.DeclareItem) -> str: 1396 variables = self.expressions(expression, "this") 1397 default = self.sql(expression, "default") 1398 default = f" DEFAULT {default}" if default else "" 1399 kind = self.sql(expression, "kind") 1400 kind = f" {kind}" if kind else "" 1401 1402 return f"{variables}{kind}{default}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- SELECT_KINDS
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- SUPPORTS_MEDIAN
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ARRAY_SIZE_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- STRUCT_DELIMITER
- PARAMETER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- createable_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- datatype_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- unnest_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- similarto_sql
- lt_sql
- lte_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- log_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- show_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql