sqlglot.parser
1from __future__ import annotations 2 3import logging 4import re 5import typing as t 6import itertools 7from collections import defaultdict 8 9from sqlglot import exp 10from sqlglot.errors import ErrorLevel, ParseError, concat_messages, merge_errors 11from sqlglot.helper import apply_index_offset, ensure_list, seq_get 12from sqlglot.time import format_time 13from sqlglot.tokens import Token, Tokenizer, TokenType 14from sqlglot.trie import TrieResult, in_trie, new_trie 15 16if t.TYPE_CHECKING: 17 from sqlglot._typing import E, Lit 18 from sqlglot.dialects.dialect import Dialect, DialectType 19 20 T = t.TypeVar("T") 21 TCeilFloor = t.TypeVar("TCeilFloor", exp.Ceil, exp.Floor) 22 23logger = logging.getLogger("sqlglot") 24 25OPTIONS_TYPE = t.Dict[str, t.Sequence[t.Union[t.Sequence[str], str]]] 26 27# Used to detect alphabetical characters and +/- in timestamp literals 28TIME_ZONE_RE: t.Pattern[str] = re.compile(r":.*?[a-zA-Z\+\-]") 29 30 31def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 32 if len(args) == 1 and args[0].is_star: 33 return exp.StarMap(this=args[0]) 34 35 keys = [] 36 values = [] 37 for i in range(0, len(args), 2): 38 keys.append(args[i]) 39 values.append(args[i + 1]) 40 41 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False)) 42 43 44def build_like(args: t.List) -> exp.Escape | exp.Like: 45 like = exp.Like(this=seq_get(args, 1), expression=seq_get(args, 0)) 46 return exp.Escape(this=like, expression=seq_get(args, 2)) if len(args) > 2 else like 47 48 49def binary_range_parser( 50 expr_type: t.Type[exp.Expression], reverse_args: bool = False 51) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 52 def _parse_binary_range( 53 self: Parser, this: t.Optional[exp.Expression] 54 ) -> t.Optional[exp.Expression]: 55 expression = self._parse_bitwise() 56 if reverse_args: 57 this, expression = expression, this 58 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 59 60 return _parse_binary_range 61 62 63def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 64 # Default argument order is base, expression 65 this = seq_get(args, 0) 66 expression = seq_get(args, 1) 67 68 if expression: 69 if not dialect.LOG_BASE_FIRST: 70 this, expression = expression, this 71 return exp.Log(this=this, expression=expression) 72 73 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this) 74 75 76def build_hex(args: t.List, dialect: Dialect) -> exp.Hex | exp.LowerHex: 77 arg = seq_get(args, 0) 78 return exp.LowerHex(this=arg) if dialect.HEX_LOWERCASE else exp.Hex(this=arg) 79 80 81def build_lower(args: t.List) -> exp.Lower | exp.Hex: 82 # LOWER(HEX(..)) can be simplified to LowerHex to simplify its transpilation 83 arg = seq_get(args, 0) 84 return exp.LowerHex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Lower(this=arg) 85 86 87def build_upper(args: t.List) -> exp.Upper | exp.Hex: 88 # UPPER(HEX(..)) can be simplified to Hex to simplify its transpilation 89 arg = seq_get(args, 0) 90 return exp.Hex(this=arg.this) if isinstance(arg, exp.Hex) else exp.Upper(this=arg) 91 92 93def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 94 def _builder(args: t.List, dialect: Dialect) -> E: 95 expression = expr_type( 96 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 97 ) 98 if len(args) > 2 and expr_type is exp.JSONExtract: 99 expression.set("expressions", args[2:]) 100 101 return expression 102 103 return _builder 104 105 106def build_mod(args: t.List) -> exp.Mod: 107 this = seq_get(args, 0) 108 expression = seq_get(args, 1) 109 110 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 111 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 112 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 113 114 return exp.Mod(this=this, expression=expression) 115 116 117def build_pad(args: t.List, is_left: bool = True): 118 return exp.Pad( 119 this=seq_get(args, 0), 120 expression=seq_get(args, 1), 121 fill_pattern=seq_get(args, 2), 122 is_left=is_left, 123 ) 124 125 126def build_array_constructor( 127 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 128) -> exp.Expression: 129 array_exp = exp_class(expressions=args) 130 131 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 132 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 133 134 return array_exp 135 136 137def build_convert_timezone( 138 args: t.List, default_source_tz: t.Optional[str] = None 139) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 140 if len(args) == 2: 141 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 142 return exp.ConvertTimezone( 143 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 144 ) 145 146 return exp.ConvertTimezone.from_arg_list(args) 147 148 149def build_trim(args: t.List, is_left: bool = True): 150 return exp.Trim( 151 this=seq_get(args, 0), 152 expression=seq_get(args, 1), 153 position="LEADING" if is_left else "TRAILING", 154 ) 155 156 157def build_coalesce( 158 args: t.List, is_nvl: t.Optional[bool] = None, is_null: t.Optional[bool] = None 159) -> exp.Coalesce: 160 return exp.Coalesce(this=seq_get(args, 0), expressions=args[1:], is_nvl=is_nvl, is_null=is_null) 161 162 163def build_locate_strposition(args: t.List): 164 return exp.StrPosition( 165 this=seq_get(args, 1), 166 substr=seq_get(args, 0), 167 position=seq_get(args, 2), 168 ) 169 170 171class _Parser(type): 172 def __new__(cls, clsname, bases, attrs): 173 klass = super().__new__(cls, clsname, bases, attrs) 174 175 klass.SHOW_TRIE = new_trie(key.split(" ") for key in klass.SHOW_PARSERS) 176 klass.SET_TRIE = new_trie(key.split(" ") for key in klass.SET_PARSERS) 177 178 return klass 179 180 181class Parser(metaclass=_Parser): 182 """ 183 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 184 185 Args: 186 error_level: The desired error level. 187 Default: ErrorLevel.IMMEDIATE 188 error_message_context: The amount of context to capture from a query string when displaying 189 the error message (in number of characters). 190 Default: 100 191 max_errors: Maximum number of error messages to include in a raised ParseError. 192 This is only relevant if error_level is ErrorLevel.RAISE. 193 Default: 3 194 """ 195 196 FUNCTIONS: t.Dict[str, t.Callable] = { 197 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 198 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 199 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 200 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 201 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 202 ), 203 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 204 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 205 ), 206 "CHAR": lambda args: exp.Chr(expressions=args), 207 "CHR": lambda args: exp.Chr(expressions=args), 208 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 209 "CONCAT": lambda args, dialect: exp.Concat( 210 expressions=args, 211 safe=not dialect.STRICT_STRING_CONCAT, 212 coalesce=dialect.CONCAT_COALESCE, 213 ), 214 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 215 expressions=args, 216 safe=not dialect.STRICT_STRING_CONCAT, 217 coalesce=dialect.CONCAT_COALESCE, 218 ), 219 "CONVERT_TIMEZONE": build_convert_timezone, 220 "DATE_TO_DATE_STR": lambda args: exp.Cast( 221 this=seq_get(args, 0), 222 to=exp.DataType(this=exp.DataType.Type.TEXT), 223 ), 224 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 225 start=seq_get(args, 0), 226 end=seq_get(args, 1), 227 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 228 ), 229 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 230 "HEX": build_hex, 231 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 232 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 233 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 234 "LIKE": build_like, 235 "LOG": build_logarithm, 236 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 237 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 238 "LOWER": build_lower, 239 "LPAD": lambda args: build_pad(args), 240 "LEFTPAD": lambda args: build_pad(args), 241 "LTRIM": lambda args: build_trim(args), 242 "MOD": build_mod, 243 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 244 "RPAD": lambda args: build_pad(args, is_left=False), 245 "RTRIM": lambda args: build_trim(args, is_left=False), 246 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 247 if len(args) != 2 248 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 249 "STRPOS": exp.StrPosition.from_arg_list, 250 "CHARINDEX": lambda args: build_locate_strposition(args), 251 "INSTR": exp.StrPosition.from_arg_list, 252 "LOCATE": lambda args: build_locate_strposition(args), 253 "TIME_TO_TIME_STR": lambda args: exp.Cast( 254 this=seq_get(args, 0), 255 to=exp.DataType(this=exp.DataType.Type.TEXT), 256 ), 257 "TO_HEX": build_hex, 258 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 259 this=exp.Cast( 260 this=seq_get(args, 0), 261 to=exp.DataType(this=exp.DataType.Type.TEXT), 262 ), 263 start=exp.Literal.number(1), 264 length=exp.Literal.number(10), 265 ), 266 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 267 "UPPER": build_upper, 268 "VAR_MAP": build_var_map, 269 } 270 271 NO_PAREN_FUNCTIONS = { 272 TokenType.CURRENT_DATE: exp.CurrentDate, 273 TokenType.CURRENT_DATETIME: exp.CurrentDate, 274 TokenType.CURRENT_TIME: exp.CurrentTime, 275 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 276 TokenType.CURRENT_USER: exp.CurrentUser, 277 } 278 279 STRUCT_TYPE_TOKENS = { 280 TokenType.NESTED, 281 TokenType.OBJECT, 282 TokenType.STRUCT, 283 TokenType.UNION, 284 } 285 286 NESTED_TYPE_TOKENS = { 287 TokenType.ARRAY, 288 TokenType.LIST, 289 TokenType.LOWCARDINALITY, 290 TokenType.MAP, 291 TokenType.NULLABLE, 292 TokenType.RANGE, 293 *STRUCT_TYPE_TOKENS, 294 } 295 296 ENUM_TYPE_TOKENS = { 297 TokenType.DYNAMIC, 298 TokenType.ENUM, 299 TokenType.ENUM8, 300 TokenType.ENUM16, 301 } 302 303 AGGREGATE_TYPE_TOKENS = { 304 TokenType.AGGREGATEFUNCTION, 305 TokenType.SIMPLEAGGREGATEFUNCTION, 306 } 307 308 TYPE_TOKENS = { 309 TokenType.BIT, 310 TokenType.BOOLEAN, 311 TokenType.TINYINT, 312 TokenType.UTINYINT, 313 TokenType.SMALLINT, 314 TokenType.USMALLINT, 315 TokenType.INT, 316 TokenType.UINT, 317 TokenType.BIGINT, 318 TokenType.UBIGINT, 319 TokenType.INT128, 320 TokenType.UINT128, 321 TokenType.INT256, 322 TokenType.UINT256, 323 TokenType.MEDIUMINT, 324 TokenType.UMEDIUMINT, 325 TokenType.FIXEDSTRING, 326 TokenType.FLOAT, 327 TokenType.DOUBLE, 328 TokenType.UDOUBLE, 329 TokenType.CHAR, 330 TokenType.NCHAR, 331 TokenType.VARCHAR, 332 TokenType.NVARCHAR, 333 TokenType.BPCHAR, 334 TokenType.TEXT, 335 TokenType.MEDIUMTEXT, 336 TokenType.LONGTEXT, 337 TokenType.BLOB, 338 TokenType.MEDIUMBLOB, 339 TokenType.LONGBLOB, 340 TokenType.BINARY, 341 TokenType.VARBINARY, 342 TokenType.JSON, 343 TokenType.JSONB, 344 TokenType.INTERVAL, 345 TokenType.TINYBLOB, 346 TokenType.TINYTEXT, 347 TokenType.TIME, 348 TokenType.TIMETZ, 349 TokenType.TIMESTAMP, 350 TokenType.TIMESTAMP_S, 351 TokenType.TIMESTAMP_MS, 352 TokenType.TIMESTAMP_NS, 353 TokenType.TIMESTAMPTZ, 354 TokenType.TIMESTAMPLTZ, 355 TokenType.TIMESTAMPNTZ, 356 TokenType.DATETIME, 357 TokenType.DATETIME2, 358 TokenType.DATETIME64, 359 TokenType.SMALLDATETIME, 360 TokenType.DATE, 361 TokenType.DATE32, 362 TokenType.INT4RANGE, 363 TokenType.INT4MULTIRANGE, 364 TokenType.INT8RANGE, 365 TokenType.INT8MULTIRANGE, 366 TokenType.NUMRANGE, 367 TokenType.NUMMULTIRANGE, 368 TokenType.TSRANGE, 369 TokenType.TSMULTIRANGE, 370 TokenType.TSTZRANGE, 371 TokenType.TSTZMULTIRANGE, 372 TokenType.DATERANGE, 373 TokenType.DATEMULTIRANGE, 374 TokenType.DECIMAL, 375 TokenType.DECIMAL32, 376 TokenType.DECIMAL64, 377 TokenType.DECIMAL128, 378 TokenType.DECIMAL256, 379 TokenType.UDECIMAL, 380 TokenType.BIGDECIMAL, 381 TokenType.UUID, 382 TokenType.GEOGRAPHY, 383 TokenType.GEOGRAPHYPOINT, 384 TokenType.GEOMETRY, 385 TokenType.POINT, 386 TokenType.RING, 387 TokenType.LINESTRING, 388 TokenType.MULTILINESTRING, 389 TokenType.POLYGON, 390 TokenType.MULTIPOLYGON, 391 TokenType.HLLSKETCH, 392 TokenType.HSTORE, 393 TokenType.PSEUDO_TYPE, 394 TokenType.SUPER, 395 TokenType.SERIAL, 396 TokenType.SMALLSERIAL, 397 TokenType.BIGSERIAL, 398 TokenType.XML, 399 TokenType.YEAR, 400 TokenType.USERDEFINED, 401 TokenType.MONEY, 402 TokenType.SMALLMONEY, 403 TokenType.ROWVERSION, 404 TokenType.IMAGE, 405 TokenType.VARIANT, 406 TokenType.VECTOR, 407 TokenType.VOID, 408 TokenType.OBJECT, 409 TokenType.OBJECT_IDENTIFIER, 410 TokenType.INET, 411 TokenType.IPADDRESS, 412 TokenType.IPPREFIX, 413 TokenType.IPV4, 414 TokenType.IPV6, 415 TokenType.UNKNOWN, 416 TokenType.NOTHING, 417 TokenType.NULL, 418 TokenType.NAME, 419 TokenType.TDIGEST, 420 TokenType.DYNAMIC, 421 *ENUM_TYPE_TOKENS, 422 *NESTED_TYPE_TOKENS, 423 *AGGREGATE_TYPE_TOKENS, 424 } 425 426 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 427 TokenType.BIGINT: TokenType.UBIGINT, 428 TokenType.INT: TokenType.UINT, 429 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 430 TokenType.SMALLINT: TokenType.USMALLINT, 431 TokenType.TINYINT: TokenType.UTINYINT, 432 TokenType.DECIMAL: TokenType.UDECIMAL, 433 TokenType.DOUBLE: TokenType.UDOUBLE, 434 } 435 436 SUBQUERY_PREDICATES = { 437 TokenType.ANY: exp.Any, 438 TokenType.ALL: exp.All, 439 TokenType.EXISTS: exp.Exists, 440 TokenType.SOME: exp.Any, 441 } 442 443 RESERVED_TOKENS = { 444 *Tokenizer.SINGLE_TOKENS.values(), 445 TokenType.SELECT, 446 } - {TokenType.IDENTIFIER} 447 448 DB_CREATABLES = { 449 TokenType.DATABASE, 450 TokenType.DICTIONARY, 451 TokenType.FILE_FORMAT, 452 TokenType.MODEL, 453 TokenType.NAMESPACE, 454 TokenType.SCHEMA, 455 TokenType.SEMANTIC_VIEW, 456 TokenType.SEQUENCE, 457 TokenType.SINK, 458 TokenType.SOURCE, 459 TokenType.STAGE, 460 TokenType.STORAGE_INTEGRATION, 461 TokenType.STREAMLIT, 462 TokenType.TABLE, 463 TokenType.TAG, 464 TokenType.VIEW, 465 TokenType.WAREHOUSE, 466 } 467 468 CREATABLES = { 469 TokenType.COLUMN, 470 TokenType.CONSTRAINT, 471 TokenType.FOREIGN_KEY, 472 TokenType.FUNCTION, 473 TokenType.INDEX, 474 TokenType.PROCEDURE, 475 *DB_CREATABLES, 476 } 477 478 ALTERABLES = { 479 TokenType.INDEX, 480 TokenType.TABLE, 481 TokenType.VIEW, 482 } 483 484 # Tokens that can represent identifiers 485 ID_VAR_TOKENS = { 486 TokenType.ALL, 487 TokenType.ATTACH, 488 TokenType.VAR, 489 TokenType.ANTI, 490 TokenType.APPLY, 491 TokenType.ASC, 492 TokenType.ASOF, 493 TokenType.AUTO_INCREMENT, 494 TokenType.BEGIN, 495 TokenType.BPCHAR, 496 TokenType.CACHE, 497 TokenType.CASE, 498 TokenType.COLLATE, 499 TokenType.COMMAND, 500 TokenType.COMMENT, 501 TokenType.COMMIT, 502 TokenType.CONSTRAINT, 503 TokenType.COPY, 504 TokenType.CUBE, 505 TokenType.CURRENT_SCHEMA, 506 TokenType.DEFAULT, 507 TokenType.DELETE, 508 TokenType.DESC, 509 TokenType.DESCRIBE, 510 TokenType.DETACH, 511 TokenType.DICTIONARY, 512 TokenType.DIV, 513 TokenType.END, 514 TokenType.EXECUTE, 515 TokenType.EXPORT, 516 TokenType.ESCAPE, 517 TokenType.FALSE, 518 TokenType.FIRST, 519 TokenType.FILTER, 520 TokenType.FINAL, 521 TokenType.FORMAT, 522 TokenType.FULL, 523 TokenType.GET, 524 TokenType.IDENTIFIER, 525 TokenType.IS, 526 TokenType.ISNULL, 527 TokenType.INTERVAL, 528 TokenType.KEEP, 529 TokenType.KILL, 530 TokenType.LEFT, 531 TokenType.LIMIT, 532 TokenType.LOAD, 533 TokenType.MERGE, 534 TokenType.NATURAL, 535 TokenType.NEXT, 536 TokenType.OFFSET, 537 TokenType.OPERATOR, 538 TokenType.ORDINALITY, 539 TokenType.OVERLAPS, 540 TokenType.OVERWRITE, 541 TokenType.PARTITION, 542 TokenType.PERCENT, 543 TokenType.PIVOT, 544 TokenType.PRAGMA, 545 TokenType.PUT, 546 TokenType.RANGE, 547 TokenType.RECURSIVE, 548 TokenType.REFERENCES, 549 TokenType.REFRESH, 550 TokenType.RENAME, 551 TokenType.REPLACE, 552 TokenType.RIGHT, 553 TokenType.ROLLUP, 554 TokenType.ROW, 555 TokenType.ROWS, 556 TokenType.SEMI, 557 TokenType.SET, 558 TokenType.SETTINGS, 559 TokenType.SHOW, 560 TokenType.TEMPORARY, 561 TokenType.TOP, 562 TokenType.TRUE, 563 TokenType.TRUNCATE, 564 TokenType.UNIQUE, 565 TokenType.UNNEST, 566 TokenType.UNPIVOT, 567 TokenType.UPDATE, 568 TokenType.USE, 569 TokenType.VOLATILE, 570 TokenType.WINDOW, 571 *CREATABLES, 572 *SUBQUERY_PREDICATES, 573 *TYPE_TOKENS, 574 *NO_PAREN_FUNCTIONS, 575 } 576 ID_VAR_TOKENS.remove(TokenType.UNION) 577 578 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 579 TokenType.ANTI, 580 TokenType.APPLY, 581 TokenType.ASOF, 582 TokenType.FULL, 583 TokenType.LEFT, 584 TokenType.LOCK, 585 TokenType.NATURAL, 586 TokenType.RIGHT, 587 TokenType.SEMI, 588 TokenType.WINDOW, 589 } 590 591 ALIAS_TOKENS = ID_VAR_TOKENS 592 593 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 594 595 ARRAY_CONSTRUCTORS = { 596 "ARRAY": exp.Array, 597 "LIST": exp.List, 598 } 599 600 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 601 602 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 603 604 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 605 606 FUNC_TOKENS = { 607 TokenType.COLLATE, 608 TokenType.COMMAND, 609 TokenType.CURRENT_DATE, 610 TokenType.CURRENT_DATETIME, 611 TokenType.CURRENT_SCHEMA, 612 TokenType.CURRENT_TIMESTAMP, 613 TokenType.CURRENT_TIME, 614 TokenType.CURRENT_USER, 615 TokenType.FILTER, 616 TokenType.FIRST, 617 TokenType.FORMAT, 618 TokenType.GET, 619 TokenType.GLOB, 620 TokenType.IDENTIFIER, 621 TokenType.INDEX, 622 TokenType.ISNULL, 623 TokenType.ILIKE, 624 TokenType.INSERT, 625 TokenType.LIKE, 626 TokenType.MERGE, 627 TokenType.NEXT, 628 TokenType.OFFSET, 629 TokenType.PRIMARY_KEY, 630 TokenType.RANGE, 631 TokenType.REPLACE, 632 TokenType.RLIKE, 633 TokenType.ROW, 634 TokenType.UNNEST, 635 TokenType.VAR, 636 TokenType.LEFT, 637 TokenType.RIGHT, 638 TokenType.SEQUENCE, 639 TokenType.DATE, 640 TokenType.DATETIME, 641 TokenType.TABLE, 642 TokenType.TIMESTAMP, 643 TokenType.TIMESTAMPTZ, 644 TokenType.TRUNCATE, 645 TokenType.WINDOW, 646 TokenType.XOR, 647 *TYPE_TOKENS, 648 *SUBQUERY_PREDICATES, 649 } 650 651 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 652 TokenType.AND: exp.And, 653 } 654 655 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 656 TokenType.COLON_EQ: exp.PropertyEQ, 657 } 658 659 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 660 TokenType.OR: exp.Or, 661 } 662 663 EQUALITY = { 664 TokenType.EQ: exp.EQ, 665 TokenType.NEQ: exp.NEQ, 666 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 667 } 668 669 COMPARISON = { 670 TokenType.GT: exp.GT, 671 TokenType.GTE: exp.GTE, 672 TokenType.LT: exp.LT, 673 TokenType.LTE: exp.LTE, 674 } 675 676 BITWISE = { 677 TokenType.AMP: exp.BitwiseAnd, 678 TokenType.CARET: exp.BitwiseXor, 679 TokenType.PIPE: exp.BitwiseOr, 680 } 681 682 TERM = { 683 TokenType.DASH: exp.Sub, 684 TokenType.PLUS: exp.Add, 685 TokenType.MOD: exp.Mod, 686 TokenType.COLLATE: exp.Collate, 687 } 688 689 FACTOR = { 690 TokenType.DIV: exp.IntDiv, 691 TokenType.LR_ARROW: exp.Distance, 692 TokenType.SLASH: exp.Div, 693 TokenType.STAR: exp.Mul, 694 } 695 696 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 697 698 TIMES = { 699 TokenType.TIME, 700 TokenType.TIMETZ, 701 } 702 703 TIMESTAMPS = { 704 TokenType.TIMESTAMP, 705 TokenType.TIMESTAMPNTZ, 706 TokenType.TIMESTAMPTZ, 707 TokenType.TIMESTAMPLTZ, 708 *TIMES, 709 } 710 711 SET_OPERATIONS = { 712 TokenType.UNION, 713 TokenType.INTERSECT, 714 TokenType.EXCEPT, 715 } 716 717 JOIN_METHODS = { 718 TokenType.ASOF, 719 TokenType.NATURAL, 720 TokenType.POSITIONAL, 721 } 722 723 JOIN_SIDES = { 724 TokenType.LEFT, 725 TokenType.RIGHT, 726 TokenType.FULL, 727 } 728 729 JOIN_KINDS = { 730 TokenType.ANTI, 731 TokenType.CROSS, 732 TokenType.INNER, 733 TokenType.OUTER, 734 TokenType.SEMI, 735 TokenType.STRAIGHT_JOIN, 736 } 737 738 JOIN_HINTS: t.Set[str] = set() 739 740 LAMBDAS = { 741 TokenType.ARROW: lambda self, expressions: self.expression( 742 exp.Lambda, 743 this=self._replace_lambda( 744 self._parse_assignment(), 745 expressions, 746 ), 747 expressions=expressions, 748 ), 749 TokenType.FARROW: lambda self, expressions: self.expression( 750 exp.Kwarg, 751 this=exp.var(expressions[0].name), 752 expression=self._parse_assignment(), 753 ), 754 } 755 756 COLUMN_OPERATORS = { 757 TokenType.DOT: None, 758 TokenType.DOTCOLON: lambda self, this, to: self.expression( 759 exp.JSONCast, 760 this=this, 761 to=to, 762 ), 763 TokenType.DCOLON: lambda self, this, to: self.build_cast( 764 strict=self.STRICT_CAST, this=this, to=to 765 ), 766 TokenType.ARROW: lambda self, this, path: self.expression( 767 exp.JSONExtract, 768 this=this, 769 expression=self.dialect.to_json_path(path), 770 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 771 ), 772 TokenType.DARROW: lambda self, this, path: self.expression( 773 exp.JSONExtractScalar, 774 this=this, 775 expression=self.dialect.to_json_path(path), 776 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 777 ), 778 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 779 exp.JSONBExtract, 780 this=this, 781 expression=path, 782 ), 783 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 784 exp.JSONBExtractScalar, 785 this=this, 786 expression=path, 787 ), 788 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 789 exp.JSONBContains, 790 this=this, 791 expression=key, 792 ), 793 } 794 795 EXPRESSION_PARSERS = { 796 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 797 exp.Column: lambda self: self._parse_column(), 798 exp.Condition: lambda self: self._parse_assignment(), 799 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 800 exp.Expression: lambda self: self._parse_expression(), 801 exp.From: lambda self: self._parse_from(joins=True), 802 exp.Group: lambda self: self._parse_group(), 803 exp.Having: lambda self: self._parse_having(), 804 exp.Hint: lambda self: self._parse_hint_body(), 805 exp.Identifier: lambda self: self._parse_id_var(), 806 exp.Join: lambda self: self._parse_join(), 807 exp.Lambda: lambda self: self._parse_lambda(), 808 exp.Lateral: lambda self: self._parse_lateral(), 809 exp.Limit: lambda self: self._parse_limit(), 810 exp.Offset: lambda self: self._parse_offset(), 811 exp.Order: lambda self: self._parse_order(), 812 exp.Ordered: lambda self: self._parse_ordered(), 813 exp.Properties: lambda self: self._parse_properties(), 814 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 815 exp.Qualify: lambda self: self._parse_qualify(), 816 exp.Returning: lambda self: self._parse_returning(), 817 exp.Select: lambda self: self._parse_select(), 818 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 819 exp.Table: lambda self: self._parse_table_parts(), 820 exp.TableAlias: lambda self: self._parse_table_alias(), 821 exp.Tuple: lambda self: self._parse_value(values=False), 822 exp.Whens: lambda self: self._parse_when_matched(), 823 exp.Where: lambda self: self._parse_where(), 824 exp.Window: lambda self: self._parse_named_window(), 825 exp.With: lambda self: self._parse_with(), 826 "JOIN_TYPE": lambda self: self._parse_join_parts(), 827 } 828 829 STATEMENT_PARSERS = { 830 TokenType.ALTER: lambda self: self._parse_alter(), 831 TokenType.ANALYZE: lambda self: self._parse_analyze(), 832 TokenType.BEGIN: lambda self: self._parse_transaction(), 833 TokenType.CACHE: lambda self: self._parse_cache(), 834 TokenType.COMMENT: lambda self: self._parse_comment(), 835 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 836 TokenType.COPY: lambda self: self._parse_copy(), 837 TokenType.CREATE: lambda self: self._parse_create(), 838 TokenType.DELETE: lambda self: self._parse_delete(), 839 TokenType.DESC: lambda self: self._parse_describe(), 840 TokenType.DESCRIBE: lambda self: self._parse_describe(), 841 TokenType.DROP: lambda self: self._parse_drop(), 842 TokenType.GRANT: lambda self: self._parse_grant(), 843 TokenType.INSERT: lambda self: self._parse_insert(), 844 TokenType.KILL: lambda self: self._parse_kill(), 845 TokenType.LOAD: lambda self: self._parse_load(), 846 TokenType.MERGE: lambda self: self._parse_merge(), 847 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 848 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 849 TokenType.REFRESH: lambda self: self._parse_refresh(), 850 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 851 TokenType.SET: lambda self: self._parse_set(), 852 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 853 TokenType.UNCACHE: lambda self: self._parse_uncache(), 854 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 855 TokenType.UPDATE: lambda self: self._parse_update(), 856 TokenType.USE: lambda self: self._parse_use(), 857 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 858 } 859 860 UNARY_PARSERS = { 861 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 862 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 863 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 864 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 865 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 866 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 867 } 868 869 STRING_PARSERS = { 870 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 871 exp.RawString, this=token.text 872 ), 873 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 874 exp.National, this=token.text 875 ), 876 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 877 TokenType.STRING: lambda self, token: self.expression( 878 exp.Literal, this=token.text, is_string=True 879 ), 880 TokenType.UNICODE_STRING: lambda self, token: self.expression( 881 exp.UnicodeString, 882 this=token.text, 883 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 884 ), 885 } 886 887 NUMERIC_PARSERS = { 888 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 889 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 890 TokenType.HEX_STRING: lambda self, token: self.expression( 891 exp.HexString, 892 this=token.text, 893 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 894 ), 895 TokenType.NUMBER: lambda self, token: self.expression( 896 exp.Literal, this=token.text, is_string=False 897 ), 898 } 899 900 PRIMARY_PARSERS = { 901 **STRING_PARSERS, 902 **NUMERIC_PARSERS, 903 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 904 TokenType.NULL: lambda self, _: self.expression(exp.Null), 905 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 906 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 907 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 908 TokenType.STAR: lambda self, _: self._parse_star_ops(), 909 } 910 911 PLACEHOLDER_PARSERS = { 912 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 913 TokenType.PARAMETER: lambda self: self._parse_parameter(), 914 TokenType.COLON: lambda self: ( 915 self.expression(exp.Placeholder, this=self._prev.text) 916 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 917 else None 918 ), 919 } 920 921 RANGE_PARSERS = { 922 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 923 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 924 TokenType.GLOB: binary_range_parser(exp.Glob), 925 TokenType.ILIKE: binary_range_parser(exp.ILike), 926 TokenType.IN: lambda self, this: self._parse_in(this), 927 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 928 TokenType.IS: lambda self, this: self._parse_is(this), 929 TokenType.LIKE: binary_range_parser(exp.Like), 930 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 931 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 932 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 933 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 934 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 935 } 936 937 PIPE_SYNTAX_TRANSFORM_PARSERS = { 938 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 939 "AS": lambda self, query: self._build_pipe_cte( 940 query, [exp.Star()], self._parse_table_alias() 941 ), 942 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 943 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 944 "ORDER BY": lambda self, query: query.order_by( 945 self._parse_order(), append=False, copy=False 946 ), 947 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 948 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 949 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 950 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 951 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 952 } 953 954 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 955 "ALLOWED_VALUES": lambda self: self.expression( 956 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 957 ), 958 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 959 "AUTO": lambda self: self._parse_auto_property(), 960 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 961 "BACKUP": lambda self: self.expression( 962 exp.BackupProperty, this=self._parse_var(any_token=True) 963 ), 964 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 965 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 966 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 967 "CHECKSUM": lambda self: self._parse_checksum(), 968 "CLUSTER BY": lambda self: self._parse_cluster(), 969 "CLUSTERED": lambda self: self._parse_clustered_by(), 970 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 971 exp.CollateProperty, **kwargs 972 ), 973 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 974 "CONTAINS": lambda self: self._parse_contains_property(), 975 "COPY": lambda self: self._parse_copy_property(), 976 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 977 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 978 "DEFINER": lambda self: self._parse_definer(), 979 "DETERMINISTIC": lambda self: self.expression( 980 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 981 ), 982 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 983 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 984 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 985 "DISTKEY": lambda self: self._parse_distkey(), 986 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 987 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 988 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 989 "ENVIRONMENT": lambda self: self.expression( 990 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 991 ), 992 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 993 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 994 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 995 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 996 "FREESPACE": lambda self: self._parse_freespace(), 997 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 998 "HEAP": lambda self: self.expression(exp.HeapProperty), 999 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1000 "IMMUTABLE": lambda self: self.expression( 1001 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1002 ), 1003 "INHERITS": lambda self: self.expression( 1004 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1005 ), 1006 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1007 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1008 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1009 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1010 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1011 "LIKE": lambda self: self._parse_create_like(), 1012 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1013 "LOCK": lambda self: self._parse_locking(), 1014 "LOCKING": lambda self: self._parse_locking(), 1015 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1016 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1017 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1018 "MODIFIES": lambda self: self._parse_modifies_property(), 1019 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1020 "NO": lambda self: self._parse_no_property(), 1021 "ON": lambda self: self._parse_on_property(), 1022 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1023 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1024 "PARTITION": lambda self: self._parse_partitioned_of(), 1025 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1026 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1027 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1028 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1029 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1030 "READS": lambda self: self._parse_reads_property(), 1031 "REMOTE": lambda self: self._parse_remote_with_connection(), 1032 "RETURNS": lambda self: self._parse_returns(), 1033 "STRICT": lambda self: self.expression(exp.StrictProperty), 1034 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1035 "ROW": lambda self: self._parse_row(), 1036 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1037 "SAMPLE": lambda self: self.expression( 1038 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1039 ), 1040 "SECURE": lambda self: self.expression(exp.SecureProperty), 1041 "SECURITY": lambda self: self._parse_security(), 1042 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1043 "SETTINGS": lambda self: self._parse_settings_property(), 1044 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1045 "SORTKEY": lambda self: self._parse_sortkey(), 1046 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1047 "STABLE": lambda self: self.expression( 1048 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1049 ), 1050 "STORED": lambda self: self._parse_stored(), 1051 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1052 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1053 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1054 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1055 "TO": lambda self: self._parse_to_table(), 1056 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1057 "TRANSFORM": lambda self: self.expression( 1058 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1059 ), 1060 "TTL": lambda self: self._parse_ttl(), 1061 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1062 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1063 "VOLATILE": lambda self: self._parse_volatile_property(), 1064 "WITH": lambda self: self._parse_with_property(), 1065 } 1066 1067 CONSTRAINT_PARSERS = { 1068 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1069 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1070 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1071 "CHARACTER SET": lambda self: self.expression( 1072 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1073 ), 1074 "CHECK": lambda self: self.expression( 1075 exp.CheckColumnConstraint, 1076 this=self._parse_wrapped(self._parse_assignment), 1077 enforced=self._match_text_seq("ENFORCED"), 1078 ), 1079 "COLLATE": lambda self: self.expression( 1080 exp.CollateColumnConstraint, 1081 this=self._parse_identifier() or self._parse_column(), 1082 ), 1083 "COMMENT": lambda self: self.expression( 1084 exp.CommentColumnConstraint, this=self._parse_string() 1085 ), 1086 "COMPRESS": lambda self: self._parse_compress(), 1087 "CLUSTERED": lambda self: self.expression( 1088 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1089 ), 1090 "NONCLUSTERED": lambda self: self.expression( 1091 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1092 ), 1093 "DEFAULT": lambda self: self.expression( 1094 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1095 ), 1096 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1097 "EPHEMERAL": lambda self: self.expression( 1098 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1099 ), 1100 "EXCLUDE": lambda self: self.expression( 1101 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1102 ), 1103 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1104 "FORMAT": lambda self: self.expression( 1105 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1106 ), 1107 "GENERATED": lambda self: self._parse_generated_as_identity(), 1108 "IDENTITY": lambda self: self._parse_auto_increment(), 1109 "INLINE": lambda self: self._parse_inline(), 1110 "LIKE": lambda self: self._parse_create_like(), 1111 "NOT": lambda self: self._parse_not_constraint(), 1112 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1113 "ON": lambda self: ( 1114 self._match(TokenType.UPDATE) 1115 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1116 ) 1117 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1118 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1119 "PERIOD": lambda self: self._parse_period_for_system_time(), 1120 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1121 "REFERENCES": lambda self: self._parse_references(match=False), 1122 "TITLE": lambda self: self.expression( 1123 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1124 ), 1125 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1126 "UNIQUE": lambda self: self._parse_unique(), 1127 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1128 "WATERMARK": lambda self: self.expression( 1129 exp.WatermarkColumnConstraint, 1130 this=self._match(TokenType.FOR) and self._parse_column(), 1131 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1132 ), 1133 "WITH": lambda self: self.expression( 1134 exp.Properties, expressions=self._parse_wrapped_properties() 1135 ), 1136 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1137 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1138 } 1139 1140 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1141 if not self._match(TokenType.L_PAREN, advance=False): 1142 # Partitioning by bucket or truncate follows the syntax: 1143 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1144 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1145 self._retreat(self._index - 1) 1146 return None 1147 1148 klass = ( 1149 exp.PartitionedByBucket 1150 if self._prev.text.upper() == "BUCKET" 1151 else exp.PartitionByTruncate 1152 ) 1153 1154 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1155 this, expression = seq_get(args, 0), seq_get(args, 1) 1156 1157 if isinstance(this, exp.Literal): 1158 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1159 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1160 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1161 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1162 # 1163 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1164 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1165 this, expression = expression, this 1166 1167 return self.expression(klass, this=this, expression=expression) 1168 1169 ALTER_PARSERS = { 1170 "ADD": lambda self: self._parse_alter_table_add(), 1171 "AS": lambda self: self._parse_select(), 1172 "ALTER": lambda self: self._parse_alter_table_alter(), 1173 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1174 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1175 "DROP": lambda self: self._parse_alter_table_drop(), 1176 "RENAME": lambda self: self._parse_alter_table_rename(), 1177 "SET": lambda self: self._parse_alter_table_set(), 1178 "SWAP": lambda self: self.expression( 1179 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1180 ), 1181 } 1182 1183 ALTER_ALTER_PARSERS = { 1184 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1185 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1186 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1187 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1188 } 1189 1190 SCHEMA_UNNAMED_CONSTRAINTS = { 1191 "CHECK", 1192 "EXCLUDE", 1193 "FOREIGN KEY", 1194 "LIKE", 1195 "PERIOD", 1196 "PRIMARY KEY", 1197 "UNIQUE", 1198 "WATERMARK", 1199 "BUCKET", 1200 "TRUNCATE", 1201 } 1202 1203 NO_PAREN_FUNCTION_PARSERS = { 1204 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1205 "CASE": lambda self: self._parse_case(), 1206 "CONNECT_BY_ROOT": lambda self: self.expression( 1207 exp.ConnectByRoot, this=self._parse_column() 1208 ), 1209 "IF": lambda self: self._parse_if(), 1210 } 1211 1212 INVALID_FUNC_NAME_TOKENS = { 1213 TokenType.IDENTIFIER, 1214 TokenType.STRING, 1215 } 1216 1217 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1218 1219 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1220 1221 FUNCTION_PARSERS = { 1222 **{ 1223 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1224 }, 1225 **{ 1226 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1227 }, 1228 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1229 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1230 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1231 "DECODE": lambda self: self._parse_decode(), 1232 "EXTRACT": lambda self: self._parse_extract(), 1233 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1234 "GAP_FILL": lambda self: self._parse_gap_fill(), 1235 "JSON_OBJECT": lambda self: self._parse_json_object(), 1236 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1237 "JSON_TABLE": lambda self: self._parse_json_table(), 1238 "MATCH": lambda self: self._parse_match_against(), 1239 "NORMALIZE": lambda self: self._parse_normalize(), 1240 "OPENJSON": lambda self: self._parse_open_json(), 1241 "OVERLAY": lambda self: self._parse_overlay(), 1242 "POSITION": lambda self: self._parse_position(), 1243 "PREDICT": lambda self: self._parse_predict(), 1244 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1245 "STRING_AGG": lambda self: self._parse_string_agg(), 1246 "SUBSTRING": lambda self: self._parse_substring(), 1247 "TRIM": lambda self: self._parse_trim(), 1248 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1249 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1250 "XMLELEMENT": lambda self: self.expression( 1251 exp.XMLElement, 1252 this=self._match_text_seq("NAME") and self._parse_id_var(), 1253 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1254 ), 1255 "XMLTABLE": lambda self: self._parse_xml_table(), 1256 } 1257 1258 QUERY_MODIFIER_PARSERS = { 1259 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1260 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1261 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1262 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1263 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1264 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1265 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1266 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1267 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1268 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1269 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1270 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1271 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1272 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1273 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1274 TokenType.CLUSTER_BY: lambda self: ( 1275 "cluster", 1276 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1277 ), 1278 TokenType.DISTRIBUTE_BY: lambda self: ( 1279 "distribute", 1280 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1281 ), 1282 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1283 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1284 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1285 } 1286 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1287 1288 SET_PARSERS = { 1289 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1290 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1291 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1292 "TRANSACTION": lambda self: self._parse_set_transaction(), 1293 } 1294 1295 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1296 1297 TYPE_LITERAL_PARSERS = { 1298 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1299 } 1300 1301 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1302 1303 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1304 1305 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1306 1307 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1308 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1309 "ISOLATION": ( 1310 ("LEVEL", "REPEATABLE", "READ"), 1311 ("LEVEL", "READ", "COMMITTED"), 1312 ("LEVEL", "READ", "UNCOMITTED"), 1313 ("LEVEL", "SERIALIZABLE"), 1314 ), 1315 "READ": ("WRITE", "ONLY"), 1316 } 1317 1318 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1319 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1320 ) 1321 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1322 1323 CREATE_SEQUENCE: OPTIONS_TYPE = { 1324 "SCALE": ("EXTEND", "NOEXTEND"), 1325 "SHARD": ("EXTEND", "NOEXTEND"), 1326 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1327 **dict.fromkeys( 1328 ( 1329 "SESSION", 1330 "GLOBAL", 1331 "KEEP", 1332 "NOKEEP", 1333 "ORDER", 1334 "NOORDER", 1335 "NOCACHE", 1336 "CYCLE", 1337 "NOCYCLE", 1338 "NOMINVALUE", 1339 "NOMAXVALUE", 1340 "NOSCALE", 1341 "NOSHARD", 1342 ), 1343 tuple(), 1344 ), 1345 } 1346 1347 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1348 1349 USABLES: OPTIONS_TYPE = dict.fromkeys( 1350 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1351 ) 1352 1353 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1354 1355 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1356 "TYPE": ("EVOLUTION",), 1357 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1358 } 1359 1360 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1361 1362 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1363 1364 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1365 "NOT": ("ENFORCED",), 1366 "MATCH": ( 1367 "FULL", 1368 "PARTIAL", 1369 "SIMPLE", 1370 ), 1371 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1372 "USING": ( 1373 "BTREE", 1374 "HASH", 1375 ), 1376 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1377 } 1378 1379 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1380 "NO": ("OTHERS",), 1381 "CURRENT": ("ROW",), 1382 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1383 } 1384 1385 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1386 1387 CLONE_KEYWORDS = {"CLONE", "COPY"} 1388 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1389 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1390 1391 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1392 1393 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1394 1395 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1396 1397 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1398 1399 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1400 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1401 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1402 1403 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1404 1405 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1406 1407 ADD_CONSTRAINT_TOKENS = { 1408 TokenType.CONSTRAINT, 1409 TokenType.FOREIGN_KEY, 1410 TokenType.INDEX, 1411 TokenType.KEY, 1412 TokenType.PRIMARY_KEY, 1413 TokenType.UNIQUE, 1414 } 1415 1416 DISTINCT_TOKENS = {TokenType.DISTINCT} 1417 1418 NULL_TOKENS = {TokenType.NULL} 1419 1420 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1421 1422 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1423 1424 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1425 1426 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1427 1428 ODBC_DATETIME_LITERALS = { 1429 "d": exp.Date, 1430 "t": exp.Time, 1431 "ts": exp.Timestamp, 1432 } 1433 1434 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1435 1436 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1437 1438 # The style options for the DESCRIBE statement 1439 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1440 1441 # The style options for the ANALYZE statement 1442 ANALYZE_STYLES = { 1443 "BUFFER_USAGE_LIMIT", 1444 "FULL", 1445 "LOCAL", 1446 "NO_WRITE_TO_BINLOG", 1447 "SAMPLE", 1448 "SKIP_LOCKED", 1449 "VERBOSE", 1450 } 1451 1452 ANALYZE_EXPRESSION_PARSERS = { 1453 "ALL": lambda self: self._parse_analyze_columns(), 1454 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1455 "DELETE": lambda self: self._parse_analyze_delete(), 1456 "DROP": lambda self: self._parse_analyze_histogram(), 1457 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1458 "LIST": lambda self: self._parse_analyze_list(), 1459 "PREDICATE": lambda self: self._parse_analyze_columns(), 1460 "UPDATE": lambda self: self._parse_analyze_histogram(), 1461 "VALIDATE": lambda self: self._parse_analyze_validate(), 1462 } 1463 1464 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1465 1466 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1467 1468 OPERATION_MODIFIERS: t.Set[str] = set() 1469 1470 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1471 1472 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1473 1474 STRICT_CAST = True 1475 1476 PREFIXED_PIVOT_COLUMNS = False 1477 IDENTIFY_PIVOT_STRINGS = False 1478 1479 LOG_DEFAULTS_TO_LN = False 1480 1481 # Whether the table sample clause expects CSV syntax 1482 TABLESAMPLE_CSV = False 1483 1484 # The default method used for table sampling 1485 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1486 1487 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1488 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1489 1490 # Whether the TRIM function expects the characters to trim as its first argument 1491 TRIM_PATTERN_FIRST = False 1492 1493 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1494 STRING_ALIASES = False 1495 1496 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1497 MODIFIERS_ATTACHED_TO_SET_OP = True 1498 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1499 1500 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1501 NO_PAREN_IF_COMMANDS = True 1502 1503 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1504 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1505 1506 # Whether the `:` operator is used to extract a value from a VARIANT column 1507 COLON_IS_VARIANT_EXTRACT = False 1508 1509 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1510 # If this is True and '(' is not found, the keyword will be treated as an identifier 1511 VALUES_FOLLOWED_BY_PAREN = True 1512 1513 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1514 SUPPORTS_IMPLICIT_UNNEST = False 1515 1516 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1517 INTERVAL_SPANS = True 1518 1519 # Whether a PARTITION clause can follow a table reference 1520 SUPPORTS_PARTITION_SELECTION = False 1521 1522 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1523 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1524 1525 # Whether the 'AS' keyword is optional in the CTE definition syntax 1526 OPTIONAL_ALIAS_TOKEN_CTE = True 1527 1528 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1529 ALTER_RENAME_REQUIRES_COLUMN = True 1530 1531 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1532 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1533 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1534 # as BigQuery, where all joins have the same precedence. 1535 JOINS_HAVE_EQUAL_PRECEDENCE = False 1536 1537 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1538 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1539 1540 # Whether map literals support arbitrary expressions as keys. 1541 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1542 # When False, keys are typically restricted to identifiers. 1543 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1544 1545 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1546 # is true for Snowflake but not for BigQuery which can also process strings 1547 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1548 1549 __slots__ = ( 1550 "error_level", 1551 "error_message_context", 1552 "max_errors", 1553 "dialect", 1554 "sql", 1555 "errors", 1556 "_tokens", 1557 "_index", 1558 "_curr", 1559 "_next", 1560 "_prev", 1561 "_prev_comments", 1562 "_pipe_cte_counter", 1563 ) 1564 1565 # Autofilled 1566 SHOW_TRIE: t.Dict = {} 1567 SET_TRIE: t.Dict = {} 1568 1569 def __init__( 1570 self, 1571 error_level: t.Optional[ErrorLevel] = None, 1572 error_message_context: int = 100, 1573 max_errors: int = 3, 1574 dialect: DialectType = None, 1575 ): 1576 from sqlglot.dialects import Dialect 1577 1578 self.error_level = error_level or ErrorLevel.IMMEDIATE 1579 self.error_message_context = error_message_context 1580 self.max_errors = max_errors 1581 self.dialect = Dialect.get_or_raise(dialect) 1582 self.reset() 1583 1584 def reset(self): 1585 self.sql = "" 1586 self.errors = [] 1587 self._tokens = [] 1588 self._index = 0 1589 self._curr = None 1590 self._next = None 1591 self._prev = None 1592 self._prev_comments = None 1593 self._pipe_cte_counter = 0 1594 1595 def parse( 1596 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1597 ) -> t.List[t.Optional[exp.Expression]]: 1598 """ 1599 Parses a list of tokens and returns a list of syntax trees, one tree 1600 per parsed SQL statement. 1601 1602 Args: 1603 raw_tokens: The list of tokens. 1604 sql: The original SQL string, used to produce helpful debug messages. 1605 1606 Returns: 1607 The list of the produced syntax trees. 1608 """ 1609 return self._parse( 1610 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1611 ) 1612 1613 def parse_into( 1614 self, 1615 expression_types: exp.IntoType, 1616 raw_tokens: t.List[Token], 1617 sql: t.Optional[str] = None, 1618 ) -> t.List[t.Optional[exp.Expression]]: 1619 """ 1620 Parses a list of tokens into a given Expression type. If a collection of Expression 1621 types is given instead, this method will try to parse the token list into each one 1622 of them, stopping at the first for which the parsing succeeds. 1623 1624 Args: 1625 expression_types: The expression type(s) to try and parse the token list into. 1626 raw_tokens: The list of tokens. 1627 sql: The original SQL string, used to produce helpful debug messages. 1628 1629 Returns: 1630 The target Expression. 1631 """ 1632 errors = [] 1633 for expression_type in ensure_list(expression_types): 1634 parser = self.EXPRESSION_PARSERS.get(expression_type) 1635 if not parser: 1636 raise TypeError(f"No parser registered for {expression_type}") 1637 1638 try: 1639 return self._parse(parser, raw_tokens, sql) 1640 except ParseError as e: 1641 e.errors[0]["into_expression"] = expression_type 1642 errors.append(e) 1643 1644 raise ParseError( 1645 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1646 errors=merge_errors(errors), 1647 ) from errors[-1] 1648 1649 def _parse( 1650 self, 1651 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1652 raw_tokens: t.List[Token], 1653 sql: t.Optional[str] = None, 1654 ) -> t.List[t.Optional[exp.Expression]]: 1655 self.reset() 1656 self.sql = sql or "" 1657 1658 total = len(raw_tokens) 1659 chunks: t.List[t.List[Token]] = [[]] 1660 1661 for i, token in enumerate(raw_tokens): 1662 if token.token_type == TokenType.SEMICOLON: 1663 if token.comments: 1664 chunks.append([token]) 1665 1666 if i < total - 1: 1667 chunks.append([]) 1668 else: 1669 chunks[-1].append(token) 1670 1671 expressions = [] 1672 1673 for tokens in chunks: 1674 self._index = -1 1675 self._tokens = tokens 1676 self._advance() 1677 1678 expressions.append(parse_method(self)) 1679 1680 if self._index < len(self._tokens): 1681 self.raise_error("Invalid expression / Unexpected token") 1682 1683 self.check_errors() 1684 1685 return expressions 1686 1687 def check_errors(self) -> None: 1688 """Logs or raises any found errors, depending on the chosen error level setting.""" 1689 if self.error_level == ErrorLevel.WARN: 1690 for error in self.errors: 1691 logger.error(str(error)) 1692 elif self.error_level == ErrorLevel.RAISE and self.errors: 1693 raise ParseError( 1694 concat_messages(self.errors, self.max_errors), 1695 errors=merge_errors(self.errors), 1696 ) 1697 1698 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1699 """ 1700 Appends an error in the list of recorded errors or raises it, depending on the chosen 1701 error level setting. 1702 """ 1703 token = token or self._curr or self._prev or Token.string("") 1704 start = token.start 1705 end = token.end + 1 1706 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1707 highlight = self.sql[start:end] 1708 end_context = self.sql[end : end + self.error_message_context] 1709 1710 error = ParseError.new( 1711 f"{message}. Line {token.line}, Col: {token.col}.\n" 1712 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1713 description=message, 1714 line=token.line, 1715 col=token.col, 1716 start_context=start_context, 1717 highlight=highlight, 1718 end_context=end_context, 1719 ) 1720 1721 if self.error_level == ErrorLevel.IMMEDIATE: 1722 raise error 1723 1724 self.errors.append(error) 1725 1726 def expression( 1727 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1728 ) -> E: 1729 """ 1730 Creates a new, validated Expression. 1731 1732 Args: 1733 exp_class: The expression class to instantiate. 1734 comments: An optional list of comments to attach to the expression. 1735 kwargs: The arguments to set for the expression along with their respective values. 1736 1737 Returns: 1738 The target expression. 1739 """ 1740 instance = exp_class(**kwargs) 1741 instance.add_comments(comments) if comments else self._add_comments(instance) 1742 return self.validate_expression(instance) 1743 1744 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1745 if expression and self._prev_comments: 1746 expression.add_comments(self._prev_comments) 1747 self._prev_comments = None 1748 1749 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1750 """ 1751 Validates an Expression, making sure that all its mandatory arguments are set. 1752 1753 Args: 1754 expression: The expression to validate. 1755 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1756 1757 Returns: 1758 The validated expression. 1759 """ 1760 if self.error_level != ErrorLevel.IGNORE: 1761 for error_message in expression.error_messages(args): 1762 self.raise_error(error_message) 1763 1764 return expression 1765 1766 def _find_sql(self, start: Token, end: Token) -> str: 1767 return self.sql[start.start : end.end + 1] 1768 1769 def _is_connected(self) -> bool: 1770 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1771 1772 def _advance(self, times: int = 1) -> None: 1773 self._index += times 1774 self._curr = seq_get(self._tokens, self._index) 1775 self._next = seq_get(self._tokens, self._index + 1) 1776 1777 if self._index > 0: 1778 self._prev = self._tokens[self._index - 1] 1779 self._prev_comments = self._prev.comments 1780 else: 1781 self._prev = None 1782 self._prev_comments = None 1783 1784 def _retreat(self, index: int) -> None: 1785 if index != self._index: 1786 self._advance(index - self._index) 1787 1788 def _warn_unsupported(self) -> None: 1789 if len(self._tokens) <= 1: 1790 return 1791 1792 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1793 # interested in emitting a warning for the one being currently processed. 1794 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1795 1796 logger.warning( 1797 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1798 ) 1799 1800 def _parse_command(self) -> exp.Command: 1801 self._warn_unsupported() 1802 return self.expression( 1803 exp.Command, 1804 comments=self._prev_comments, 1805 this=self._prev.text.upper(), 1806 expression=self._parse_string(), 1807 ) 1808 1809 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1810 """ 1811 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1812 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1813 solve this by setting & resetting the parser state accordingly 1814 """ 1815 index = self._index 1816 error_level = self.error_level 1817 1818 self.error_level = ErrorLevel.IMMEDIATE 1819 try: 1820 this = parse_method() 1821 except ParseError: 1822 this = None 1823 finally: 1824 if not this or retreat: 1825 self._retreat(index) 1826 self.error_level = error_level 1827 1828 return this 1829 1830 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1831 start = self._prev 1832 exists = self._parse_exists() if allow_exists else None 1833 1834 self._match(TokenType.ON) 1835 1836 materialized = self._match_text_seq("MATERIALIZED") 1837 kind = self._match_set(self.CREATABLES) and self._prev 1838 if not kind: 1839 return self._parse_as_command(start) 1840 1841 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1842 this = self._parse_user_defined_function(kind=kind.token_type) 1843 elif kind.token_type == TokenType.TABLE: 1844 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1845 elif kind.token_type == TokenType.COLUMN: 1846 this = self._parse_column() 1847 else: 1848 this = self._parse_id_var() 1849 1850 self._match(TokenType.IS) 1851 1852 return self.expression( 1853 exp.Comment, 1854 this=this, 1855 kind=kind.text, 1856 expression=self._parse_string(), 1857 exists=exists, 1858 materialized=materialized, 1859 ) 1860 1861 def _parse_to_table( 1862 self, 1863 ) -> exp.ToTableProperty: 1864 table = self._parse_table_parts(schema=True) 1865 return self.expression(exp.ToTableProperty, this=table) 1866 1867 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1868 def _parse_ttl(self) -> exp.Expression: 1869 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1870 this = self._parse_bitwise() 1871 1872 if self._match_text_seq("DELETE"): 1873 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1874 if self._match_text_seq("RECOMPRESS"): 1875 return self.expression( 1876 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1877 ) 1878 if self._match_text_seq("TO", "DISK"): 1879 return self.expression( 1880 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1881 ) 1882 if self._match_text_seq("TO", "VOLUME"): 1883 return self.expression( 1884 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1885 ) 1886 1887 return this 1888 1889 expressions = self._parse_csv(_parse_ttl_action) 1890 where = self._parse_where() 1891 group = self._parse_group() 1892 1893 aggregates = None 1894 if group and self._match(TokenType.SET): 1895 aggregates = self._parse_csv(self._parse_set_item) 1896 1897 return self.expression( 1898 exp.MergeTreeTTL, 1899 expressions=expressions, 1900 where=where, 1901 group=group, 1902 aggregates=aggregates, 1903 ) 1904 1905 def _parse_statement(self) -> t.Optional[exp.Expression]: 1906 if self._curr is None: 1907 return None 1908 1909 if self._match_set(self.STATEMENT_PARSERS): 1910 comments = self._prev_comments 1911 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1912 stmt.add_comments(comments, prepend=True) 1913 return stmt 1914 1915 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1916 return self._parse_command() 1917 1918 expression = self._parse_expression() 1919 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1920 return self._parse_query_modifiers(expression) 1921 1922 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1923 start = self._prev 1924 temporary = self._match(TokenType.TEMPORARY) 1925 materialized = self._match_text_seq("MATERIALIZED") 1926 1927 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1928 if not kind: 1929 return self._parse_as_command(start) 1930 1931 concurrently = self._match_text_seq("CONCURRENTLY") 1932 if_exists = exists or self._parse_exists() 1933 1934 if kind == "COLUMN": 1935 this = self._parse_column() 1936 else: 1937 this = self._parse_table_parts( 1938 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1939 ) 1940 1941 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1942 1943 if self._match(TokenType.L_PAREN, advance=False): 1944 expressions = self._parse_wrapped_csv(self._parse_types) 1945 else: 1946 expressions = None 1947 1948 return self.expression( 1949 exp.Drop, 1950 exists=if_exists, 1951 this=this, 1952 expressions=expressions, 1953 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1954 temporary=temporary, 1955 materialized=materialized, 1956 cascade=self._match_text_seq("CASCADE"), 1957 constraints=self._match_text_seq("CONSTRAINTS"), 1958 purge=self._match_text_seq("PURGE"), 1959 cluster=cluster, 1960 concurrently=concurrently, 1961 ) 1962 1963 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1964 return ( 1965 self._match_text_seq("IF") 1966 and (not not_ or self._match(TokenType.NOT)) 1967 and self._match(TokenType.EXISTS) 1968 ) 1969 1970 def _parse_create(self) -> exp.Create | exp.Command: 1971 # Note: this can't be None because we've matched a statement parser 1972 start = self._prev 1973 1974 replace = ( 1975 start.token_type == TokenType.REPLACE 1976 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1977 or self._match_pair(TokenType.OR, TokenType.ALTER) 1978 ) 1979 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1980 1981 unique = self._match(TokenType.UNIQUE) 1982 1983 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1984 clustered = True 1985 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1986 "COLUMNSTORE" 1987 ): 1988 clustered = False 1989 else: 1990 clustered = None 1991 1992 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1993 self._advance() 1994 1995 properties = None 1996 create_token = self._match_set(self.CREATABLES) and self._prev 1997 1998 if not create_token: 1999 # exp.Properties.Location.POST_CREATE 2000 properties = self._parse_properties() 2001 create_token = self._match_set(self.CREATABLES) and self._prev 2002 2003 if not properties or not create_token: 2004 return self._parse_as_command(start) 2005 2006 concurrently = self._match_text_seq("CONCURRENTLY") 2007 exists = self._parse_exists(not_=True) 2008 this = None 2009 expression: t.Optional[exp.Expression] = None 2010 indexes = None 2011 no_schema_binding = None 2012 begin = None 2013 end = None 2014 clone = None 2015 2016 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2017 nonlocal properties 2018 if properties and temp_props: 2019 properties.expressions.extend(temp_props.expressions) 2020 elif temp_props: 2021 properties = temp_props 2022 2023 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2024 this = self._parse_user_defined_function(kind=create_token.token_type) 2025 2026 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2027 extend_props(self._parse_properties()) 2028 2029 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2030 extend_props(self._parse_properties()) 2031 2032 if not expression: 2033 if self._match(TokenType.COMMAND): 2034 expression = self._parse_as_command(self._prev) 2035 else: 2036 begin = self._match(TokenType.BEGIN) 2037 return_ = self._match_text_seq("RETURN") 2038 2039 if self._match(TokenType.STRING, advance=False): 2040 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2041 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2042 expression = self._parse_string() 2043 extend_props(self._parse_properties()) 2044 else: 2045 expression = self._parse_user_defined_function_expression() 2046 2047 end = self._match_text_seq("END") 2048 2049 if return_: 2050 expression = self.expression(exp.Return, this=expression) 2051 elif create_token.token_type == TokenType.INDEX: 2052 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2053 if not self._match(TokenType.ON): 2054 index = self._parse_id_var() 2055 anonymous = False 2056 else: 2057 index = None 2058 anonymous = True 2059 2060 this = self._parse_index(index=index, anonymous=anonymous) 2061 elif create_token.token_type in self.DB_CREATABLES: 2062 table_parts = self._parse_table_parts( 2063 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2064 ) 2065 2066 # exp.Properties.Location.POST_NAME 2067 self._match(TokenType.COMMA) 2068 extend_props(self._parse_properties(before=True)) 2069 2070 this = self._parse_schema(this=table_parts) 2071 2072 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2073 extend_props(self._parse_properties()) 2074 2075 has_alias = self._match(TokenType.ALIAS) 2076 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2077 # exp.Properties.Location.POST_ALIAS 2078 extend_props(self._parse_properties()) 2079 2080 if create_token.token_type == TokenType.SEQUENCE: 2081 expression = self._parse_types() 2082 extend_props(self._parse_properties()) 2083 else: 2084 expression = self._parse_ddl_select() 2085 2086 # Some dialects also support using a table as an alias instead of a SELECT. 2087 # Here we fallback to this as an alternative. 2088 if not expression and has_alias: 2089 expression = self._try_parse(self._parse_table_parts) 2090 2091 if create_token.token_type == TokenType.TABLE: 2092 # exp.Properties.Location.POST_EXPRESSION 2093 extend_props(self._parse_properties()) 2094 2095 indexes = [] 2096 while True: 2097 index = self._parse_index() 2098 2099 # exp.Properties.Location.POST_INDEX 2100 extend_props(self._parse_properties()) 2101 if not index: 2102 break 2103 else: 2104 self._match(TokenType.COMMA) 2105 indexes.append(index) 2106 elif create_token.token_type == TokenType.VIEW: 2107 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2108 no_schema_binding = True 2109 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2110 extend_props(self._parse_properties()) 2111 2112 shallow = self._match_text_seq("SHALLOW") 2113 2114 if self._match_texts(self.CLONE_KEYWORDS): 2115 copy = self._prev.text.lower() == "copy" 2116 clone = self.expression( 2117 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2118 ) 2119 2120 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2121 return self._parse_as_command(start) 2122 2123 create_kind_text = create_token.text.upper() 2124 return self.expression( 2125 exp.Create, 2126 this=this, 2127 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2128 replace=replace, 2129 refresh=refresh, 2130 unique=unique, 2131 expression=expression, 2132 exists=exists, 2133 properties=properties, 2134 indexes=indexes, 2135 no_schema_binding=no_schema_binding, 2136 begin=begin, 2137 end=end, 2138 clone=clone, 2139 concurrently=concurrently, 2140 clustered=clustered, 2141 ) 2142 2143 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2144 seq = exp.SequenceProperties() 2145 2146 options = [] 2147 index = self._index 2148 2149 while self._curr: 2150 self._match(TokenType.COMMA) 2151 if self._match_text_seq("INCREMENT"): 2152 self._match_text_seq("BY") 2153 self._match_text_seq("=") 2154 seq.set("increment", self._parse_term()) 2155 elif self._match_text_seq("MINVALUE"): 2156 seq.set("minvalue", self._parse_term()) 2157 elif self._match_text_seq("MAXVALUE"): 2158 seq.set("maxvalue", self._parse_term()) 2159 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2160 self._match_text_seq("=") 2161 seq.set("start", self._parse_term()) 2162 elif self._match_text_seq("CACHE"): 2163 # T-SQL allows empty CACHE which is initialized dynamically 2164 seq.set("cache", self._parse_number() or True) 2165 elif self._match_text_seq("OWNED", "BY"): 2166 # "OWNED BY NONE" is the default 2167 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2168 else: 2169 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2170 if opt: 2171 options.append(opt) 2172 else: 2173 break 2174 2175 seq.set("options", options if options else None) 2176 return None if self._index == index else seq 2177 2178 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2179 # only used for teradata currently 2180 self._match(TokenType.COMMA) 2181 2182 kwargs = { 2183 "no": self._match_text_seq("NO"), 2184 "dual": self._match_text_seq("DUAL"), 2185 "before": self._match_text_seq("BEFORE"), 2186 "default": self._match_text_seq("DEFAULT"), 2187 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2188 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2189 "after": self._match_text_seq("AFTER"), 2190 "minimum": self._match_texts(("MIN", "MINIMUM")), 2191 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2192 } 2193 2194 if self._match_texts(self.PROPERTY_PARSERS): 2195 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2196 try: 2197 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2198 except TypeError: 2199 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2200 2201 return None 2202 2203 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2204 return self._parse_wrapped_csv(self._parse_property) 2205 2206 def _parse_property(self) -> t.Optional[exp.Expression]: 2207 if self._match_texts(self.PROPERTY_PARSERS): 2208 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2209 2210 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2211 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2212 2213 if self._match_text_seq("COMPOUND", "SORTKEY"): 2214 return self._parse_sortkey(compound=True) 2215 2216 if self._match_text_seq("SQL", "SECURITY"): 2217 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2218 2219 index = self._index 2220 key = self._parse_column() 2221 2222 if not self._match(TokenType.EQ): 2223 self._retreat(index) 2224 return self._parse_sequence_properties() 2225 2226 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2227 if isinstance(key, exp.Column): 2228 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2229 2230 value = self._parse_bitwise() or self._parse_var(any_token=True) 2231 2232 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2233 if isinstance(value, exp.Column): 2234 value = exp.var(value.name) 2235 2236 return self.expression(exp.Property, this=key, value=value) 2237 2238 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2239 if self._match_text_seq("BY"): 2240 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2241 2242 self._match(TokenType.ALIAS) 2243 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2244 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2245 2246 return self.expression( 2247 exp.FileFormatProperty, 2248 this=( 2249 self.expression( 2250 exp.InputOutputFormat, 2251 input_format=input_format, 2252 output_format=output_format, 2253 ) 2254 if input_format or output_format 2255 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2256 ), 2257 hive_format=True, 2258 ) 2259 2260 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2261 field = self._parse_field() 2262 if isinstance(field, exp.Identifier) and not field.quoted: 2263 field = exp.var(field) 2264 2265 return field 2266 2267 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2268 self._match(TokenType.EQ) 2269 self._match(TokenType.ALIAS) 2270 2271 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2272 2273 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2274 properties = [] 2275 while True: 2276 if before: 2277 prop = self._parse_property_before() 2278 else: 2279 prop = self._parse_property() 2280 if not prop: 2281 break 2282 for p in ensure_list(prop): 2283 properties.append(p) 2284 2285 if properties: 2286 return self.expression(exp.Properties, expressions=properties) 2287 2288 return None 2289 2290 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2291 return self.expression( 2292 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2293 ) 2294 2295 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2296 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2297 security_specifier = self._prev.text.upper() 2298 return self.expression(exp.SecurityProperty, this=security_specifier) 2299 return None 2300 2301 def _parse_settings_property(self) -> exp.SettingsProperty: 2302 return self.expression( 2303 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2304 ) 2305 2306 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2307 if self._index >= 2: 2308 pre_volatile_token = self._tokens[self._index - 2] 2309 else: 2310 pre_volatile_token = None 2311 2312 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2313 return exp.VolatileProperty() 2314 2315 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2316 2317 def _parse_retention_period(self) -> exp.Var: 2318 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2319 number = self._parse_number() 2320 number_str = f"{number} " if number else "" 2321 unit = self._parse_var(any_token=True) 2322 return exp.var(f"{number_str}{unit}") 2323 2324 def _parse_system_versioning_property( 2325 self, with_: bool = False 2326 ) -> exp.WithSystemVersioningProperty: 2327 self._match(TokenType.EQ) 2328 prop = self.expression( 2329 exp.WithSystemVersioningProperty, 2330 **{ # type: ignore 2331 "on": True, 2332 "with": with_, 2333 }, 2334 ) 2335 2336 if self._match_text_seq("OFF"): 2337 prop.set("on", False) 2338 return prop 2339 2340 self._match(TokenType.ON) 2341 if self._match(TokenType.L_PAREN): 2342 while self._curr and not self._match(TokenType.R_PAREN): 2343 if self._match_text_seq("HISTORY_TABLE", "="): 2344 prop.set("this", self._parse_table_parts()) 2345 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2346 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2347 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2348 prop.set("retention_period", self._parse_retention_period()) 2349 2350 self._match(TokenType.COMMA) 2351 2352 return prop 2353 2354 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2355 self._match(TokenType.EQ) 2356 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2357 prop = self.expression(exp.DataDeletionProperty, on=on) 2358 2359 if self._match(TokenType.L_PAREN): 2360 while self._curr and not self._match(TokenType.R_PAREN): 2361 if self._match_text_seq("FILTER_COLUMN", "="): 2362 prop.set("filter_column", self._parse_column()) 2363 elif self._match_text_seq("RETENTION_PERIOD", "="): 2364 prop.set("retention_period", self._parse_retention_period()) 2365 2366 self._match(TokenType.COMMA) 2367 2368 return prop 2369 2370 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2371 kind = "HASH" 2372 expressions: t.Optional[t.List[exp.Expression]] = None 2373 if self._match_text_seq("BY", "HASH"): 2374 expressions = self._parse_wrapped_csv(self._parse_id_var) 2375 elif self._match_text_seq("BY", "RANDOM"): 2376 kind = "RANDOM" 2377 2378 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2379 buckets: t.Optional[exp.Expression] = None 2380 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2381 buckets = self._parse_number() 2382 2383 return self.expression( 2384 exp.DistributedByProperty, 2385 expressions=expressions, 2386 kind=kind, 2387 buckets=buckets, 2388 order=self._parse_order(), 2389 ) 2390 2391 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2392 self._match_text_seq("KEY") 2393 expressions = self._parse_wrapped_id_vars() 2394 return self.expression(expr_type, expressions=expressions) 2395 2396 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2397 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2398 prop = self._parse_system_versioning_property(with_=True) 2399 self._match_r_paren() 2400 return prop 2401 2402 if self._match(TokenType.L_PAREN, advance=False): 2403 return self._parse_wrapped_properties() 2404 2405 if self._match_text_seq("JOURNAL"): 2406 return self._parse_withjournaltable() 2407 2408 if self._match_texts(self.VIEW_ATTRIBUTES): 2409 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2410 2411 if self._match_text_seq("DATA"): 2412 return self._parse_withdata(no=False) 2413 elif self._match_text_seq("NO", "DATA"): 2414 return self._parse_withdata(no=True) 2415 2416 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2417 return self._parse_serde_properties(with_=True) 2418 2419 if self._match(TokenType.SCHEMA): 2420 return self.expression( 2421 exp.WithSchemaBindingProperty, 2422 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2423 ) 2424 2425 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2426 return self.expression( 2427 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2428 ) 2429 2430 if not self._next: 2431 return None 2432 2433 return self._parse_withisolatedloading() 2434 2435 def _parse_procedure_option(self) -> exp.Expression | None: 2436 if self._match_text_seq("EXECUTE", "AS"): 2437 return self.expression( 2438 exp.ExecuteAsProperty, 2439 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2440 or self._parse_string(), 2441 ) 2442 2443 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2444 2445 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2446 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2447 self._match(TokenType.EQ) 2448 2449 user = self._parse_id_var() 2450 self._match(TokenType.PARAMETER) 2451 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2452 2453 if not user or not host: 2454 return None 2455 2456 return exp.DefinerProperty(this=f"{user}@{host}") 2457 2458 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2459 self._match(TokenType.TABLE) 2460 self._match(TokenType.EQ) 2461 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2462 2463 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2464 return self.expression(exp.LogProperty, no=no) 2465 2466 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2467 return self.expression(exp.JournalProperty, **kwargs) 2468 2469 def _parse_checksum(self) -> exp.ChecksumProperty: 2470 self._match(TokenType.EQ) 2471 2472 on = None 2473 if self._match(TokenType.ON): 2474 on = True 2475 elif self._match_text_seq("OFF"): 2476 on = False 2477 2478 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2479 2480 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2481 return self.expression( 2482 exp.Cluster, 2483 expressions=( 2484 self._parse_wrapped_csv(self._parse_ordered) 2485 if wrapped 2486 else self._parse_csv(self._parse_ordered) 2487 ), 2488 ) 2489 2490 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2491 self._match_text_seq("BY") 2492 2493 self._match_l_paren() 2494 expressions = self._parse_csv(self._parse_column) 2495 self._match_r_paren() 2496 2497 if self._match_text_seq("SORTED", "BY"): 2498 self._match_l_paren() 2499 sorted_by = self._parse_csv(self._parse_ordered) 2500 self._match_r_paren() 2501 else: 2502 sorted_by = None 2503 2504 self._match(TokenType.INTO) 2505 buckets = self._parse_number() 2506 self._match_text_seq("BUCKETS") 2507 2508 return self.expression( 2509 exp.ClusteredByProperty, 2510 expressions=expressions, 2511 sorted_by=sorted_by, 2512 buckets=buckets, 2513 ) 2514 2515 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2516 if not self._match_text_seq("GRANTS"): 2517 self._retreat(self._index - 1) 2518 return None 2519 2520 return self.expression(exp.CopyGrantsProperty) 2521 2522 def _parse_freespace(self) -> exp.FreespaceProperty: 2523 self._match(TokenType.EQ) 2524 return self.expression( 2525 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2526 ) 2527 2528 def _parse_mergeblockratio( 2529 self, no: bool = False, default: bool = False 2530 ) -> exp.MergeBlockRatioProperty: 2531 if self._match(TokenType.EQ): 2532 return self.expression( 2533 exp.MergeBlockRatioProperty, 2534 this=self._parse_number(), 2535 percent=self._match(TokenType.PERCENT), 2536 ) 2537 2538 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2539 2540 def _parse_datablocksize( 2541 self, 2542 default: t.Optional[bool] = None, 2543 minimum: t.Optional[bool] = None, 2544 maximum: t.Optional[bool] = None, 2545 ) -> exp.DataBlocksizeProperty: 2546 self._match(TokenType.EQ) 2547 size = self._parse_number() 2548 2549 units = None 2550 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2551 units = self._prev.text 2552 2553 return self.expression( 2554 exp.DataBlocksizeProperty, 2555 size=size, 2556 units=units, 2557 default=default, 2558 minimum=minimum, 2559 maximum=maximum, 2560 ) 2561 2562 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2563 self._match(TokenType.EQ) 2564 always = self._match_text_seq("ALWAYS") 2565 manual = self._match_text_seq("MANUAL") 2566 never = self._match_text_seq("NEVER") 2567 default = self._match_text_seq("DEFAULT") 2568 2569 autotemp = None 2570 if self._match_text_seq("AUTOTEMP"): 2571 autotemp = self._parse_schema() 2572 2573 return self.expression( 2574 exp.BlockCompressionProperty, 2575 always=always, 2576 manual=manual, 2577 never=never, 2578 default=default, 2579 autotemp=autotemp, 2580 ) 2581 2582 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2583 index = self._index 2584 no = self._match_text_seq("NO") 2585 concurrent = self._match_text_seq("CONCURRENT") 2586 2587 if not self._match_text_seq("ISOLATED", "LOADING"): 2588 self._retreat(index) 2589 return None 2590 2591 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2592 return self.expression( 2593 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2594 ) 2595 2596 def _parse_locking(self) -> exp.LockingProperty: 2597 if self._match(TokenType.TABLE): 2598 kind = "TABLE" 2599 elif self._match(TokenType.VIEW): 2600 kind = "VIEW" 2601 elif self._match(TokenType.ROW): 2602 kind = "ROW" 2603 elif self._match_text_seq("DATABASE"): 2604 kind = "DATABASE" 2605 else: 2606 kind = None 2607 2608 if kind in ("DATABASE", "TABLE", "VIEW"): 2609 this = self._parse_table_parts() 2610 else: 2611 this = None 2612 2613 if self._match(TokenType.FOR): 2614 for_or_in = "FOR" 2615 elif self._match(TokenType.IN): 2616 for_or_in = "IN" 2617 else: 2618 for_or_in = None 2619 2620 if self._match_text_seq("ACCESS"): 2621 lock_type = "ACCESS" 2622 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2623 lock_type = "EXCLUSIVE" 2624 elif self._match_text_seq("SHARE"): 2625 lock_type = "SHARE" 2626 elif self._match_text_seq("READ"): 2627 lock_type = "READ" 2628 elif self._match_text_seq("WRITE"): 2629 lock_type = "WRITE" 2630 elif self._match_text_seq("CHECKSUM"): 2631 lock_type = "CHECKSUM" 2632 else: 2633 lock_type = None 2634 2635 override = self._match_text_seq("OVERRIDE") 2636 2637 return self.expression( 2638 exp.LockingProperty, 2639 this=this, 2640 kind=kind, 2641 for_or_in=for_or_in, 2642 lock_type=lock_type, 2643 override=override, 2644 ) 2645 2646 def _parse_partition_by(self) -> t.List[exp.Expression]: 2647 if self._match(TokenType.PARTITION_BY): 2648 return self._parse_csv(self._parse_assignment) 2649 return [] 2650 2651 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2652 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2653 if self._match_text_seq("MINVALUE"): 2654 return exp.var("MINVALUE") 2655 if self._match_text_seq("MAXVALUE"): 2656 return exp.var("MAXVALUE") 2657 return self._parse_bitwise() 2658 2659 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2660 expression = None 2661 from_expressions = None 2662 to_expressions = None 2663 2664 if self._match(TokenType.IN): 2665 this = self._parse_wrapped_csv(self._parse_bitwise) 2666 elif self._match(TokenType.FROM): 2667 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2668 self._match_text_seq("TO") 2669 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2670 elif self._match_text_seq("WITH", "(", "MODULUS"): 2671 this = self._parse_number() 2672 self._match_text_seq(",", "REMAINDER") 2673 expression = self._parse_number() 2674 self._match_r_paren() 2675 else: 2676 self.raise_error("Failed to parse partition bound spec.") 2677 2678 return self.expression( 2679 exp.PartitionBoundSpec, 2680 this=this, 2681 expression=expression, 2682 from_expressions=from_expressions, 2683 to_expressions=to_expressions, 2684 ) 2685 2686 # https://www.postgresql.org/docs/current/sql-createtable.html 2687 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2688 if not self._match_text_seq("OF"): 2689 self._retreat(self._index - 1) 2690 return None 2691 2692 this = self._parse_table(schema=True) 2693 2694 if self._match(TokenType.DEFAULT): 2695 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2696 elif self._match_text_seq("FOR", "VALUES"): 2697 expression = self._parse_partition_bound_spec() 2698 else: 2699 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2700 2701 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2702 2703 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2704 self._match(TokenType.EQ) 2705 return self.expression( 2706 exp.PartitionedByProperty, 2707 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2708 ) 2709 2710 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2711 if self._match_text_seq("AND", "STATISTICS"): 2712 statistics = True 2713 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2714 statistics = False 2715 else: 2716 statistics = None 2717 2718 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2719 2720 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2721 if self._match_text_seq("SQL"): 2722 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2723 return None 2724 2725 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2726 if self._match_text_seq("SQL", "DATA"): 2727 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2728 return None 2729 2730 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2731 if self._match_text_seq("PRIMARY", "INDEX"): 2732 return exp.NoPrimaryIndexProperty() 2733 if self._match_text_seq("SQL"): 2734 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2735 return None 2736 2737 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2738 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2739 return exp.OnCommitProperty() 2740 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2741 return exp.OnCommitProperty(delete=True) 2742 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2743 2744 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2745 if self._match_text_seq("SQL", "DATA"): 2746 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2747 return None 2748 2749 def _parse_distkey(self) -> exp.DistKeyProperty: 2750 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2751 2752 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2753 table = self._parse_table(schema=True) 2754 2755 options = [] 2756 while self._match_texts(("INCLUDING", "EXCLUDING")): 2757 this = self._prev.text.upper() 2758 2759 id_var = self._parse_id_var() 2760 if not id_var: 2761 return None 2762 2763 options.append( 2764 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2765 ) 2766 2767 return self.expression(exp.LikeProperty, this=table, expressions=options) 2768 2769 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2770 return self.expression( 2771 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2772 ) 2773 2774 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2775 self._match(TokenType.EQ) 2776 return self.expression( 2777 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2778 ) 2779 2780 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2781 self._match_text_seq("WITH", "CONNECTION") 2782 return self.expression( 2783 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2784 ) 2785 2786 def _parse_returns(self) -> exp.ReturnsProperty: 2787 value: t.Optional[exp.Expression] 2788 null = None 2789 is_table = self._match(TokenType.TABLE) 2790 2791 if is_table: 2792 if self._match(TokenType.LT): 2793 value = self.expression( 2794 exp.Schema, 2795 this="TABLE", 2796 expressions=self._parse_csv(self._parse_struct_types), 2797 ) 2798 if not self._match(TokenType.GT): 2799 self.raise_error("Expecting >") 2800 else: 2801 value = self._parse_schema(exp.var("TABLE")) 2802 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2803 null = True 2804 value = None 2805 else: 2806 value = self._parse_types() 2807 2808 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2809 2810 def _parse_describe(self) -> exp.Describe: 2811 kind = self._match_set(self.CREATABLES) and self._prev.text 2812 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2813 if self._match(TokenType.DOT): 2814 style = None 2815 self._retreat(self._index - 2) 2816 2817 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2818 2819 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2820 this = self._parse_statement() 2821 else: 2822 this = self._parse_table(schema=True) 2823 2824 properties = self._parse_properties() 2825 expressions = properties.expressions if properties else None 2826 partition = self._parse_partition() 2827 return self.expression( 2828 exp.Describe, 2829 this=this, 2830 style=style, 2831 kind=kind, 2832 expressions=expressions, 2833 partition=partition, 2834 format=format, 2835 ) 2836 2837 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2838 kind = self._prev.text.upper() 2839 expressions = [] 2840 2841 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2842 if self._match(TokenType.WHEN): 2843 expression = self._parse_disjunction() 2844 self._match(TokenType.THEN) 2845 else: 2846 expression = None 2847 2848 else_ = self._match(TokenType.ELSE) 2849 2850 if not self._match(TokenType.INTO): 2851 return None 2852 2853 return self.expression( 2854 exp.ConditionalInsert, 2855 this=self.expression( 2856 exp.Insert, 2857 this=self._parse_table(schema=True), 2858 expression=self._parse_derived_table_values(), 2859 ), 2860 expression=expression, 2861 else_=else_, 2862 ) 2863 2864 expression = parse_conditional_insert() 2865 while expression is not None: 2866 expressions.append(expression) 2867 expression = parse_conditional_insert() 2868 2869 return self.expression( 2870 exp.MultitableInserts, 2871 kind=kind, 2872 comments=comments, 2873 expressions=expressions, 2874 source=self._parse_table(), 2875 ) 2876 2877 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2878 comments = [] 2879 hint = self._parse_hint() 2880 overwrite = self._match(TokenType.OVERWRITE) 2881 ignore = self._match(TokenType.IGNORE) 2882 local = self._match_text_seq("LOCAL") 2883 alternative = None 2884 is_function = None 2885 2886 if self._match_text_seq("DIRECTORY"): 2887 this: t.Optional[exp.Expression] = self.expression( 2888 exp.Directory, 2889 this=self._parse_var_or_string(), 2890 local=local, 2891 row_format=self._parse_row_format(match_row=True), 2892 ) 2893 else: 2894 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2895 comments += ensure_list(self._prev_comments) 2896 return self._parse_multitable_inserts(comments) 2897 2898 if self._match(TokenType.OR): 2899 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2900 2901 self._match(TokenType.INTO) 2902 comments += ensure_list(self._prev_comments) 2903 self._match(TokenType.TABLE) 2904 is_function = self._match(TokenType.FUNCTION) 2905 2906 this = ( 2907 self._parse_table(schema=True, parse_partition=True) 2908 if not is_function 2909 else self._parse_function() 2910 ) 2911 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2912 this.set("alias", self._parse_table_alias()) 2913 2914 returning = self._parse_returning() 2915 2916 return self.expression( 2917 exp.Insert, 2918 comments=comments, 2919 hint=hint, 2920 is_function=is_function, 2921 this=this, 2922 stored=self._match_text_seq("STORED") and self._parse_stored(), 2923 by_name=self._match_text_seq("BY", "NAME"), 2924 exists=self._parse_exists(), 2925 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2926 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2927 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2928 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2929 conflict=self._parse_on_conflict(), 2930 returning=returning or self._parse_returning(), 2931 overwrite=overwrite, 2932 alternative=alternative, 2933 ignore=ignore, 2934 source=self._match(TokenType.TABLE) and self._parse_table(), 2935 ) 2936 2937 def _parse_kill(self) -> exp.Kill: 2938 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2939 2940 return self.expression( 2941 exp.Kill, 2942 this=self._parse_primary(), 2943 kind=kind, 2944 ) 2945 2946 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2947 conflict = self._match_text_seq("ON", "CONFLICT") 2948 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2949 2950 if not conflict and not duplicate: 2951 return None 2952 2953 conflict_keys = None 2954 constraint = None 2955 2956 if conflict: 2957 if self._match_text_seq("ON", "CONSTRAINT"): 2958 constraint = self._parse_id_var() 2959 elif self._match(TokenType.L_PAREN): 2960 conflict_keys = self._parse_csv(self._parse_id_var) 2961 self._match_r_paren() 2962 2963 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2964 if self._prev.token_type == TokenType.UPDATE: 2965 self._match(TokenType.SET) 2966 expressions = self._parse_csv(self._parse_equality) 2967 else: 2968 expressions = None 2969 2970 return self.expression( 2971 exp.OnConflict, 2972 duplicate=duplicate, 2973 expressions=expressions, 2974 action=action, 2975 conflict_keys=conflict_keys, 2976 constraint=constraint, 2977 where=self._parse_where(), 2978 ) 2979 2980 def _parse_returning(self) -> t.Optional[exp.Returning]: 2981 if not self._match(TokenType.RETURNING): 2982 return None 2983 return self.expression( 2984 exp.Returning, 2985 expressions=self._parse_csv(self._parse_expression), 2986 into=self._match(TokenType.INTO) and self._parse_table_part(), 2987 ) 2988 2989 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2990 if not self._match(TokenType.FORMAT): 2991 return None 2992 return self._parse_row_format() 2993 2994 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2995 index = self._index 2996 with_ = with_ or self._match_text_seq("WITH") 2997 2998 if not self._match(TokenType.SERDE_PROPERTIES): 2999 self._retreat(index) 3000 return None 3001 return self.expression( 3002 exp.SerdeProperties, 3003 **{ # type: ignore 3004 "expressions": self._parse_wrapped_properties(), 3005 "with": with_, 3006 }, 3007 ) 3008 3009 def _parse_row_format( 3010 self, match_row: bool = False 3011 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3012 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3013 return None 3014 3015 if self._match_text_seq("SERDE"): 3016 this = self._parse_string() 3017 3018 serde_properties = self._parse_serde_properties() 3019 3020 return self.expression( 3021 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3022 ) 3023 3024 self._match_text_seq("DELIMITED") 3025 3026 kwargs = {} 3027 3028 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3029 kwargs["fields"] = self._parse_string() 3030 if self._match_text_seq("ESCAPED", "BY"): 3031 kwargs["escaped"] = self._parse_string() 3032 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3033 kwargs["collection_items"] = self._parse_string() 3034 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3035 kwargs["map_keys"] = self._parse_string() 3036 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3037 kwargs["lines"] = self._parse_string() 3038 if self._match_text_seq("NULL", "DEFINED", "AS"): 3039 kwargs["null"] = self._parse_string() 3040 3041 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3042 3043 def _parse_load(self) -> exp.LoadData | exp.Command: 3044 if self._match_text_seq("DATA"): 3045 local = self._match_text_seq("LOCAL") 3046 self._match_text_seq("INPATH") 3047 inpath = self._parse_string() 3048 overwrite = self._match(TokenType.OVERWRITE) 3049 self._match_pair(TokenType.INTO, TokenType.TABLE) 3050 3051 return self.expression( 3052 exp.LoadData, 3053 this=self._parse_table(schema=True), 3054 local=local, 3055 overwrite=overwrite, 3056 inpath=inpath, 3057 partition=self._parse_partition(), 3058 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3059 serde=self._match_text_seq("SERDE") and self._parse_string(), 3060 ) 3061 return self._parse_as_command(self._prev) 3062 3063 def _parse_delete(self) -> exp.Delete: 3064 # This handles MySQL's "Multiple-Table Syntax" 3065 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3066 tables = None 3067 if not self._match(TokenType.FROM, advance=False): 3068 tables = self._parse_csv(self._parse_table) or None 3069 3070 returning = self._parse_returning() 3071 3072 return self.expression( 3073 exp.Delete, 3074 tables=tables, 3075 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3076 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3077 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3078 where=self._parse_where(), 3079 returning=returning or self._parse_returning(), 3080 limit=self._parse_limit(), 3081 ) 3082 3083 def _parse_update(self) -> exp.Update: 3084 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3085 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3086 returning = self._parse_returning() 3087 return self.expression( 3088 exp.Update, 3089 **{ # type: ignore 3090 "this": this, 3091 "expressions": expressions, 3092 "from": self._parse_from(joins=True), 3093 "where": self._parse_where(), 3094 "returning": returning or self._parse_returning(), 3095 "order": self._parse_order(), 3096 "limit": self._parse_limit(), 3097 }, 3098 ) 3099 3100 def _parse_use(self) -> exp.Use: 3101 return self.expression( 3102 exp.Use, 3103 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3104 this=self._parse_table(schema=False), 3105 ) 3106 3107 def _parse_uncache(self) -> exp.Uncache: 3108 if not self._match(TokenType.TABLE): 3109 self.raise_error("Expecting TABLE after UNCACHE") 3110 3111 return self.expression( 3112 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3113 ) 3114 3115 def _parse_cache(self) -> exp.Cache: 3116 lazy = self._match_text_seq("LAZY") 3117 self._match(TokenType.TABLE) 3118 table = self._parse_table(schema=True) 3119 3120 options = [] 3121 if self._match_text_seq("OPTIONS"): 3122 self._match_l_paren() 3123 k = self._parse_string() 3124 self._match(TokenType.EQ) 3125 v = self._parse_string() 3126 options = [k, v] 3127 self._match_r_paren() 3128 3129 self._match(TokenType.ALIAS) 3130 return self.expression( 3131 exp.Cache, 3132 this=table, 3133 lazy=lazy, 3134 options=options, 3135 expression=self._parse_select(nested=True), 3136 ) 3137 3138 def _parse_partition(self) -> t.Optional[exp.Partition]: 3139 if not self._match_texts(self.PARTITION_KEYWORDS): 3140 return None 3141 3142 return self.expression( 3143 exp.Partition, 3144 subpartition=self._prev.text.upper() == "SUBPARTITION", 3145 expressions=self._parse_wrapped_csv(self._parse_assignment), 3146 ) 3147 3148 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3149 def _parse_value_expression() -> t.Optional[exp.Expression]: 3150 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3151 return exp.var(self._prev.text.upper()) 3152 return self._parse_expression() 3153 3154 if self._match(TokenType.L_PAREN): 3155 expressions = self._parse_csv(_parse_value_expression) 3156 self._match_r_paren() 3157 return self.expression(exp.Tuple, expressions=expressions) 3158 3159 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3160 expression = self._parse_expression() 3161 if expression: 3162 return self.expression(exp.Tuple, expressions=[expression]) 3163 return None 3164 3165 def _parse_projections(self) -> t.List[exp.Expression]: 3166 return self._parse_expressions() 3167 3168 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3169 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3170 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3171 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3172 ) 3173 elif self._match(TokenType.FROM): 3174 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3175 # Support parentheses for duckdb FROM-first syntax 3176 select = self._parse_select() 3177 if select: 3178 select.set("from", from_) 3179 this = select 3180 else: 3181 this = exp.select("*").from_(t.cast(exp.From, from_)) 3182 else: 3183 this = ( 3184 self._parse_table(consume_pipe=True) 3185 if table 3186 else self._parse_select(nested=True, parse_set_operation=False) 3187 ) 3188 3189 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3190 # in case a modifier (e.g. join) is following 3191 if table and isinstance(this, exp.Values) and this.alias: 3192 alias = this.args["alias"].pop() 3193 this = exp.Table(this=this, alias=alias) 3194 3195 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3196 3197 return this 3198 3199 def _parse_select( 3200 self, 3201 nested: bool = False, 3202 table: bool = False, 3203 parse_subquery_alias: bool = True, 3204 parse_set_operation: bool = True, 3205 consume_pipe: bool = True, 3206 ) -> t.Optional[exp.Expression]: 3207 query = self._parse_select_query( 3208 nested=nested, 3209 table=table, 3210 parse_subquery_alias=parse_subquery_alias, 3211 parse_set_operation=parse_set_operation, 3212 ) 3213 3214 if ( 3215 consume_pipe 3216 and self._match(TokenType.PIPE_GT, advance=False) 3217 and isinstance(query, exp.Query) 3218 ): 3219 query = self._parse_pipe_syntax_query(query) 3220 query = query.subquery(copy=False) if query and table else query 3221 3222 return query 3223 3224 def _parse_select_query( 3225 self, 3226 nested: bool = False, 3227 table: bool = False, 3228 parse_subquery_alias: bool = True, 3229 parse_set_operation: bool = True, 3230 ) -> t.Optional[exp.Expression]: 3231 cte = self._parse_with() 3232 3233 if cte: 3234 this = self._parse_statement() 3235 3236 if not this: 3237 self.raise_error("Failed to parse any statement following CTE") 3238 return cte 3239 3240 if "with" in this.arg_types: 3241 this.set("with", cte) 3242 else: 3243 self.raise_error(f"{this.key} does not support CTE") 3244 this = cte 3245 3246 return this 3247 3248 # duckdb supports leading with FROM x 3249 from_ = ( 3250 self._parse_from(consume_pipe=True) 3251 if self._match(TokenType.FROM, advance=False) 3252 else None 3253 ) 3254 3255 if self._match(TokenType.SELECT): 3256 comments = self._prev_comments 3257 3258 hint = self._parse_hint() 3259 3260 if self._next and not self._next.token_type == TokenType.DOT: 3261 all_ = self._match(TokenType.ALL) 3262 distinct = self._match_set(self.DISTINCT_TOKENS) 3263 else: 3264 all_, distinct = None, None 3265 3266 kind = ( 3267 self._match(TokenType.ALIAS) 3268 and self._match_texts(("STRUCT", "VALUE")) 3269 and self._prev.text.upper() 3270 ) 3271 3272 if distinct: 3273 distinct = self.expression( 3274 exp.Distinct, 3275 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3276 ) 3277 3278 if all_ and distinct: 3279 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3280 3281 operation_modifiers = [] 3282 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3283 operation_modifiers.append(exp.var(self._prev.text.upper())) 3284 3285 limit = self._parse_limit(top=True) 3286 projections = self._parse_projections() 3287 3288 this = self.expression( 3289 exp.Select, 3290 kind=kind, 3291 hint=hint, 3292 distinct=distinct, 3293 expressions=projections, 3294 limit=limit, 3295 operation_modifiers=operation_modifiers or None, 3296 ) 3297 this.comments = comments 3298 3299 into = self._parse_into() 3300 if into: 3301 this.set("into", into) 3302 3303 if not from_: 3304 from_ = self._parse_from() 3305 3306 if from_: 3307 this.set("from", from_) 3308 3309 this = self._parse_query_modifiers(this) 3310 elif (table or nested) and self._match(TokenType.L_PAREN): 3311 this = self._parse_wrapped_select(table=table) 3312 3313 # We return early here so that the UNION isn't attached to the subquery by the 3314 # following call to _parse_set_operations, but instead becomes the parent node 3315 self._match_r_paren() 3316 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3317 elif self._match(TokenType.VALUES, advance=False): 3318 this = self._parse_derived_table_values() 3319 elif from_: 3320 this = exp.select("*").from_(from_.this, copy=False) 3321 elif self._match(TokenType.SUMMARIZE): 3322 table = self._match(TokenType.TABLE) 3323 this = self._parse_select() or self._parse_string() or self._parse_table() 3324 return self.expression(exp.Summarize, this=this, table=table) 3325 elif self._match(TokenType.DESCRIBE): 3326 this = self._parse_describe() 3327 elif self._match_text_seq("STREAM"): 3328 this = self._parse_function() 3329 if this: 3330 this = self.expression(exp.Stream, this=this) 3331 else: 3332 self._retreat(self._index - 1) 3333 else: 3334 this = None 3335 3336 return self._parse_set_operations(this) if parse_set_operation else this 3337 3338 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3339 self._match_text_seq("SEARCH") 3340 3341 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3342 3343 if not kind: 3344 return None 3345 3346 self._match_text_seq("FIRST", "BY") 3347 3348 return self.expression( 3349 exp.RecursiveWithSearch, 3350 kind=kind, 3351 this=self._parse_id_var(), 3352 expression=self._match_text_seq("SET") and self._parse_id_var(), 3353 using=self._match_text_seq("USING") and self._parse_id_var(), 3354 ) 3355 3356 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3357 if not skip_with_token and not self._match(TokenType.WITH): 3358 return None 3359 3360 comments = self._prev_comments 3361 recursive = self._match(TokenType.RECURSIVE) 3362 3363 last_comments = None 3364 expressions = [] 3365 while True: 3366 cte = self._parse_cte() 3367 if isinstance(cte, exp.CTE): 3368 expressions.append(cte) 3369 if last_comments: 3370 cte.add_comments(last_comments) 3371 3372 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3373 break 3374 else: 3375 self._match(TokenType.WITH) 3376 3377 last_comments = self._prev_comments 3378 3379 return self.expression( 3380 exp.With, 3381 comments=comments, 3382 expressions=expressions, 3383 recursive=recursive, 3384 search=self._parse_recursive_with_search(), 3385 ) 3386 3387 def _parse_cte(self) -> t.Optional[exp.CTE]: 3388 index = self._index 3389 3390 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3391 if not alias or not alias.this: 3392 self.raise_error("Expected CTE to have alias") 3393 3394 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3395 self._retreat(index) 3396 return None 3397 3398 comments = self._prev_comments 3399 3400 if self._match_text_seq("NOT", "MATERIALIZED"): 3401 materialized = False 3402 elif self._match_text_seq("MATERIALIZED"): 3403 materialized = True 3404 else: 3405 materialized = None 3406 3407 cte = self.expression( 3408 exp.CTE, 3409 this=self._parse_wrapped(self._parse_statement), 3410 alias=alias, 3411 materialized=materialized, 3412 comments=comments, 3413 ) 3414 3415 values = cte.this 3416 if isinstance(values, exp.Values): 3417 if values.alias: 3418 cte.set("this", exp.select("*").from_(values)) 3419 else: 3420 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3421 3422 return cte 3423 3424 def _parse_table_alias( 3425 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3426 ) -> t.Optional[exp.TableAlias]: 3427 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3428 # so this section tries to parse the clause version and if it fails, it treats the token 3429 # as an identifier (alias) 3430 if self._can_parse_limit_or_offset(): 3431 return None 3432 3433 any_token = self._match(TokenType.ALIAS) 3434 alias = ( 3435 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3436 or self._parse_string_as_identifier() 3437 ) 3438 3439 index = self._index 3440 if self._match(TokenType.L_PAREN): 3441 columns = self._parse_csv(self._parse_function_parameter) 3442 self._match_r_paren() if columns else self._retreat(index) 3443 else: 3444 columns = None 3445 3446 if not alias and not columns: 3447 return None 3448 3449 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3450 3451 # We bubble up comments from the Identifier to the TableAlias 3452 if isinstance(alias, exp.Identifier): 3453 table_alias.add_comments(alias.pop_comments()) 3454 3455 return table_alias 3456 3457 def _parse_subquery( 3458 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3459 ) -> t.Optional[exp.Subquery]: 3460 if not this: 3461 return None 3462 3463 return self.expression( 3464 exp.Subquery, 3465 this=this, 3466 pivots=self._parse_pivots(), 3467 alias=self._parse_table_alias() if parse_alias else None, 3468 sample=self._parse_table_sample(), 3469 ) 3470 3471 def _implicit_unnests_to_explicit(self, this: E) -> E: 3472 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3473 3474 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3475 for i, join in enumerate(this.args.get("joins") or []): 3476 table = join.this 3477 normalized_table = table.copy() 3478 normalized_table.meta["maybe_column"] = True 3479 normalized_table = _norm(normalized_table, dialect=self.dialect) 3480 3481 if isinstance(table, exp.Table) and not join.args.get("on"): 3482 if normalized_table.parts[0].name in refs: 3483 table_as_column = table.to_column() 3484 unnest = exp.Unnest(expressions=[table_as_column]) 3485 3486 # Table.to_column creates a parent Alias node that we want to convert to 3487 # a TableAlias and attach to the Unnest, so it matches the parser's output 3488 if isinstance(table.args.get("alias"), exp.TableAlias): 3489 table_as_column.replace(table_as_column.this) 3490 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3491 3492 table.replace(unnest) 3493 3494 refs.add(normalized_table.alias_or_name) 3495 3496 return this 3497 3498 def _parse_query_modifiers( 3499 self, this: t.Optional[exp.Expression] 3500 ) -> t.Optional[exp.Expression]: 3501 if isinstance(this, self.MODIFIABLES): 3502 for join in self._parse_joins(): 3503 this.append("joins", join) 3504 for lateral in iter(self._parse_lateral, None): 3505 this.append("laterals", lateral) 3506 3507 while True: 3508 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3509 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3510 key, expression = parser(self) 3511 3512 if expression: 3513 this.set(key, expression) 3514 if key == "limit": 3515 offset = expression.args.pop("offset", None) 3516 3517 if offset: 3518 offset = exp.Offset(expression=offset) 3519 this.set("offset", offset) 3520 3521 limit_by_expressions = expression.expressions 3522 expression.set("expressions", None) 3523 offset.set("expressions", limit_by_expressions) 3524 continue 3525 break 3526 3527 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3528 this = self._implicit_unnests_to_explicit(this) 3529 3530 return this 3531 3532 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3533 start = self._curr 3534 while self._curr: 3535 self._advance() 3536 3537 end = self._tokens[self._index - 1] 3538 return exp.Hint(expressions=[self._find_sql(start, end)]) 3539 3540 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3541 return self._parse_function_call() 3542 3543 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3544 start_index = self._index 3545 should_fallback_to_string = False 3546 3547 hints = [] 3548 try: 3549 for hint in iter( 3550 lambda: self._parse_csv( 3551 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3552 ), 3553 [], 3554 ): 3555 hints.extend(hint) 3556 except ParseError: 3557 should_fallback_to_string = True 3558 3559 if should_fallback_to_string or self._curr: 3560 self._retreat(start_index) 3561 return self._parse_hint_fallback_to_string() 3562 3563 return self.expression(exp.Hint, expressions=hints) 3564 3565 def _parse_hint(self) -> t.Optional[exp.Hint]: 3566 if self._match(TokenType.HINT) and self._prev_comments: 3567 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3568 3569 return None 3570 3571 def _parse_into(self) -> t.Optional[exp.Into]: 3572 if not self._match(TokenType.INTO): 3573 return None 3574 3575 temp = self._match(TokenType.TEMPORARY) 3576 unlogged = self._match_text_seq("UNLOGGED") 3577 self._match(TokenType.TABLE) 3578 3579 return self.expression( 3580 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3581 ) 3582 3583 def _parse_from( 3584 self, 3585 joins: bool = False, 3586 skip_from_token: bool = False, 3587 consume_pipe: bool = False, 3588 ) -> t.Optional[exp.From]: 3589 if not skip_from_token and not self._match(TokenType.FROM): 3590 return None 3591 3592 return self.expression( 3593 exp.From, 3594 comments=self._prev_comments, 3595 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3596 ) 3597 3598 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3599 return self.expression( 3600 exp.MatchRecognizeMeasure, 3601 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3602 this=self._parse_expression(), 3603 ) 3604 3605 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3606 if not self._match(TokenType.MATCH_RECOGNIZE): 3607 return None 3608 3609 self._match_l_paren() 3610 3611 partition = self._parse_partition_by() 3612 order = self._parse_order() 3613 3614 measures = ( 3615 self._parse_csv(self._parse_match_recognize_measure) 3616 if self._match_text_seq("MEASURES") 3617 else None 3618 ) 3619 3620 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3621 rows = exp.var("ONE ROW PER MATCH") 3622 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3623 text = "ALL ROWS PER MATCH" 3624 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3625 text += " SHOW EMPTY MATCHES" 3626 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3627 text += " OMIT EMPTY MATCHES" 3628 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3629 text += " WITH UNMATCHED ROWS" 3630 rows = exp.var(text) 3631 else: 3632 rows = None 3633 3634 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3635 text = "AFTER MATCH SKIP" 3636 if self._match_text_seq("PAST", "LAST", "ROW"): 3637 text += " PAST LAST ROW" 3638 elif self._match_text_seq("TO", "NEXT", "ROW"): 3639 text += " TO NEXT ROW" 3640 elif self._match_text_seq("TO", "FIRST"): 3641 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3642 elif self._match_text_seq("TO", "LAST"): 3643 text += f" TO LAST {self._advance_any().text}" # type: ignore 3644 after = exp.var(text) 3645 else: 3646 after = None 3647 3648 if self._match_text_seq("PATTERN"): 3649 self._match_l_paren() 3650 3651 if not self._curr: 3652 self.raise_error("Expecting )", self._curr) 3653 3654 paren = 1 3655 start = self._curr 3656 3657 while self._curr and paren > 0: 3658 if self._curr.token_type == TokenType.L_PAREN: 3659 paren += 1 3660 if self._curr.token_type == TokenType.R_PAREN: 3661 paren -= 1 3662 3663 end = self._prev 3664 self._advance() 3665 3666 if paren > 0: 3667 self.raise_error("Expecting )", self._curr) 3668 3669 pattern = exp.var(self._find_sql(start, end)) 3670 else: 3671 pattern = None 3672 3673 define = ( 3674 self._parse_csv(self._parse_name_as_expression) 3675 if self._match_text_seq("DEFINE") 3676 else None 3677 ) 3678 3679 self._match_r_paren() 3680 3681 return self.expression( 3682 exp.MatchRecognize, 3683 partition_by=partition, 3684 order=order, 3685 measures=measures, 3686 rows=rows, 3687 after=after, 3688 pattern=pattern, 3689 define=define, 3690 alias=self._parse_table_alias(), 3691 ) 3692 3693 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3694 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3695 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3696 cross_apply = False 3697 3698 if cross_apply is not None: 3699 this = self._parse_select(table=True) 3700 view = None 3701 outer = None 3702 elif self._match(TokenType.LATERAL): 3703 this = self._parse_select(table=True) 3704 view = self._match(TokenType.VIEW) 3705 outer = self._match(TokenType.OUTER) 3706 else: 3707 return None 3708 3709 if not this: 3710 this = ( 3711 self._parse_unnest() 3712 or self._parse_function() 3713 or self._parse_id_var(any_token=False) 3714 ) 3715 3716 while self._match(TokenType.DOT): 3717 this = exp.Dot( 3718 this=this, 3719 expression=self._parse_function() or self._parse_id_var(any_token=False), 3720 ) 3721 3722 ordinality: t.Optional[bool] = None 3723 3724 if view: 3725 table = self._parse_id_var(any_token=False) 3726 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3727 table_alias: t.Optional[exp.TableAlias] = self.expression( 3728 exp.TableAlias, this=table, columns=columns 3729 ) 3730 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3731 # We move the alias from the lateral's child node to the lateral itself 3732 table_alias = this.args["alias"].pop() 3733 else: 3734 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3735 table_alias = self._parse_table_alias() 3736 3737 return self.expression( 3738 exp.Lateral, 3739 this=this, 3740 view=view, 3741 outer=outer, 3742 alias=table_alias, 3743 cross_apply=cross_apply, 3744 ordinality=ordinality, 3745 ) 3746 3747 def _parse_join_parts( 3748 self, 3749 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3750 return ( 3751 self._match_set(self.JOIN_METHODS) and self._prev, 3752 self._match_set(self.JOIN_SIDES) and self._prev, 3753 self._match_set(self.JOIN_KINDS) and self._prev, 3754 ) 3755 3756 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3757 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3758 this = self._parse_column() 3759 if isinstance(this, exp.Column): 3760 return this.this 3761 return this 3762 3763 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3764 3765 def _parse_join( 3766 self, skip_join_token: bool = False, parse_bracket: bool = False 3767 ) -> t.Optional[exp.Join]: 3768 if self._match(TokenType.COMMA): 3769 table = self._try_parse(self._parse_table) 3770 cross_join = self.expression(exp.Join, this=table) if table else None 3771 3772 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3773 cross_join.set("kind", "CROSS") 3774 3775 return cross_join 3776 3777 index = self._index 3778 method, side, kind = self._parse_join_parts() 3779 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3780 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3781 join_comments = self._prev_comments 3782 3783 if not skip_join_token and not join: 3784 self._retreat(index) 3785 kind = None 3786 method = None 3787 side = None 3788 3789 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3790 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3791 3792 if not skip_join_token and not join and not outer_apply and not cross_apply: 3793 return None 3794 3795 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3796 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3797 kwargs["expressions"] = self._parse_csv( 3798 lambda: self._parse_table(parse_bracket=parse_bracket) 3799 ) 3800 3801 if method: 3802 kwargs["method"] = method.text 3803 if side: 3804 kwargs["side"] = side.text 3805 if kind: 3806 kwargs["kind"] = kind.text 3807 if hint: 3808 kwargs["hint"] = hint 3809 3810 if self._match(TokenType.MATCH_CONDITION): 3811 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3812 3813 if self._match(TokenType.ON): 3814 kwargs["on"] = self._parse_assignment() 3815 elif self._match(TokenType.USING): 3816 kwargs["using"] = self._parse_using_identifiers() 3817 elif ( 3818 not (outer_apply or cross_apply) 3819 and not isinstance(kwargs["this"], exp.Unnest) 3820 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3821 ): 3822 index = self._index 3823 joins: t.Optional[list] = list(self._parse_joins()) 3824 3825 if joins and self._match(TokenType.ON): 3826 kwargs["on"] = self._parse_assignment() 3827 elif joins and self._match(TokenType.USING): 3828 kwargs["using"] = self._parse_using_identifiers() 3829 else: 3830 joins = None 3831 self._retreat(index) 3832 3833 kwargs["this"].set("joins", joins if joins else None) 3834 3835 kwargs["pivots"] = self._parse_pivots() 3836 3837 comments = [c for token in (method, side, kind) if token for c in token.comments] 3838 comments = (join_comments or []) + comments 3839 return self.expression(exp.Join, comments=comments, **kwargs) 3840 3841 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3842 this = self._parse_assignment() 3843 3844 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3845 return this 3846 3847 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3848 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3849 3850 return this 3851 3852 def _parse_index_params(self) -> exp.IndexParameters: 3853 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3854 3855 if self._match(TokenType.L_PAREN, advance=False): 3856 columns = self._parse_wrapped_csv(self._parse_with_operator) 3857 else: 3858 columns = None 3859 3860 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3861 partition_by = self._parse_partition_by() 3862 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3863 tablespace = ( 3864 self._parse_var(any_token=True) 3865 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3866 else None 3867 ) 3868 where = self._parse_where() 3869 3870 on = self._parse_field() if self._match(TokenType.ON) else None 3871 3872 return self.expression( 3873 exp.IndexParameters, 3874 using=using, 3875 columns=columns, 3876 include=include, 3877 partition_by=partition_by, 3878 where=where, 3879 with_storage=with_storage, 3880 tablespace=tablespace, 3881 on=on, 3882 ) 3883 3884 def _parse_index( 3885 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3886 ) -> t.Optional[exp.Index]: 3887 if index or anonymous: 3888 unique = None 3889 primary = None 3890 amp = None 3891 3892 self._match(TokenType.ON) 3893 self._match(TokenType.TABLE) # hive 3894 table = self._parse_table_parts(schema=True) 3895 else: 3896 unique = self._match(TokenType.UNIQUE) 3897 primary = self._match_text_seq("PRIMARY") 3898 amp = self._match_text_seq("AMP") 3899 3900 if not self._match(TokenType.INDEX): 3901 return None 3902 3903 index = self._parse_id_var() 3904 table = None 3905 3906 params = self._parse_index_params() 3907 3908 return self.expression( 3909 exp.Index, 3910 this=index, 3911 table=table, 3912 unique=unique, 3913 primary=primary, 3914 amp=amp, 3915 params=params, 3916 ) 3917 3918 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3919 hints: t.List[exp.Expression] = [] 3920 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3921 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3922 hints.append( 3923 self.expression( 3924 exp.WithTableHint, 3925 expressions=self._parse_csv( 3926 lambda: self._parse_function() or self._parse_var(any_token=True) 3927 ), 3928 ) 3929 ) 3930 self._match_r_paren() 3931 else: 3932 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3933 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3934 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3935 3936 self._match_set((TokenType.INDEX, TokenType.KEY)) 3937 if self._match(TokenType.FOR): 3938 hint.set("target", self._advance_any() and self._prev.text.upper()) 3939 3940 hint.set("expressions", self._parse_wrapped_id_vars()) 3941 hints.append(hint) 3942 3943 return hints or None 3944 3945 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3946 return ( 3947 (not schema and self._parse_function(optional_parens=False)) 3948 or self._parse_id_var(any_token=False) 3949 or self._parse_string_as_identifier() 3950 or self._parse_placeholder() 3951 ) 3952 3953 def _parse_table_parts( 3954 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3955 ) -> exp.Table: 3956 catalog = None 3957 db = None 3958 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3959 3960 while self._match(TokenType.DOT): 3961 if catalog: 3962 # This allows nesting the table in arbitrarily many dot expressions if needed 3963 table = self.expression( 3964 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3965 ) 3966 else: 3967 catalog = db 3968 db = table 3969 # "" used for tsql FROM a..b case 3970 table = self._parse_table_part(schema=schema) or "" 3971 3972 if ( 3973 wildcard 3974 and self._is_connected() 3975 and (isinstance(table, exp.Identifier) or not table) 3976 and self._match(TokenType.STAR) 3977 ): 3978 if isinstance(table, exp.Identifier): 3979 table.args["this"] += "*" 3980 else: 3981 table = exp.Identifier(this="*") 3982 3983 # We bubble up comments from the Identifier to the Table 3984 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3985 3986 if is_db_reference: 3987 catalog = db 3988 db = table 3989 table = None 3990 3991 if not table and not is_db_reference: 3992 self.raise_error(f"Expected table name but got {self._curr}") 3993 if not db and is_db_reference: 3994 self.raise_error(f"Expected database name but got {self._curr}") 3995 3996 table = self.expression( 3997 exp.Table, 3998 comments=comments, 3999 this=table, 4000 db=db, 4001 catalog=catalog, 4002 ) 4003 4004 changes = self._parse_changes() 4005 if changes: 4006 table.set("changes", changes) 4007 4008 at_before = self._parse_historical_data() 4009 if at_before: 4010 table.set("when", at_before) 4011 4012 pivots = self._parse_pivots() 4013 if pivots: 4014 table.set("pivots", pivots) 4015 4016 return table 4017 4018 def _parse_table( 4019 self, 4020 schema: bool = False, 4021 joins: bool = False, 4022 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4023 parse_bracket: bool = False, 4024 is_db_reference: bool = False, 4025 parse_partition: bool = False, 4026 consume_pipe: bool = False, 4027 ) -> t.Optional[exp.Expression]: 4028 lateral = self._parse_lateral() 4029 if lateral: 4030 return lateral 4031 4032 unnest = self._parse_unnest() 4033 if unnest: 4034 return unnest 4035 4036 values = self._parse_derived_table_values() 4037 if values: 4038 return values 4039 4040 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4041 if subquery: 4042 if not subquery.args.get("pivots"): 4043 subquery.set("pivots", self._parse_pivots()) 4044 return subquery 4045 4046 bracket = parse_bracket and self._parse_bracket(None) 4047 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4048 4049 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4050 self._parse_table 4051 ) 4052 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4053 4054 only = self._match(TokenType.ONLY) 4055 4056 this = t.cast( 4057 exp.Expression, 4058 bracket 4059 or rows_from 4060 or self._parse_bracket( 4061 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4062 ), 4063 ) 4064 4065 if only: 4066 this.set("only", only) 4067 4068 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4069 self._match_text_seq("*") 4070 4071 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4072 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4073 this.set("partition", self._parse_partition()) 4074 4075 if schema: 4076 return self._parse_schema(this=this) 4077 4078 version = self._parse_version() 4079 4080 if version: 4081 this.set("version", version) 4082 4083 if self.dialect.ALIAS_POST_TABLESAMPLE: 4084 this.set("sample", self._parse_table_sample()) 4085 4086 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4087 if alias: 4088 this.set("alias", alias) 4089 4090 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4091 return self.expression( 4092 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4093 ) 4094 4095 this.set("hints", self._parse_table_hints()) 4096 4097 if not this.args.get("pivots"): 4098 this.set("pivots", self._parse_pivots()) 4099 4100 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4101 this.set("sample", self._parse_table_sample()) 4102 4103 if joins: 4104 for join in self._parse_joins(): 4105 this.append("joins", join) 4106 4107 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4108 this.set("ordinality", True) 4109 this.set("alias", self._parse_table_alias()) 4110 4111 return this 4112 4113 def _parse_version(self) -> t.Optional[exp.Version]: 4114 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4115 this = "TIMESTAMP" 4116 elif self._match(TokenType.VERSION_SNAPSHOT): 4117 this = "VERSION" 4118 else: 4119 return None 4120 4121 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4122 kind = self._prev.text.upper() 4123 start = self._parse_bitwise() 4124 self._match_texts(("TO", "AND")) 4125 end = self._parse_bitwise() 4126 expression: t.Optional[exp.Expression] = self.expression( 4127 exp.Tuple, expressions=[start, end] 4128 ) 4129 elif self._match_text_seq("CONTAINED", "IN"): 4130 kind = "CONTAINED IN" 4131 expression = self.expression( 4132 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4133 ) 4134 elif self._match(TokenType.ALL): 4135 kind = "ALL" 4136 expression = None 4137 else: 4138 self._match_text_seq("AS", "OF") 4139 kind = "AS OF" 4140 expression = self._parse_type() 4141 4142 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4143 4144 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4145 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4146 index = self._index 4147 historical_data = None 4148 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4149 this = self._prev.text.upper() 4150 kind = ( 4151 self._match(TokenType.L_PAREN) 4152 and self._match_texts(self.HISTORICAL_DATA_KIND) 4153 and self._prev.text.upper() 4154 ) 4155 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4156 4157 if expression: 4158 self._match_r_paren() 4159 historical_data = self.expression( 4160 exp.HistoricalData, this=this, kind=kind, expression=expression 4161 ) 4162 else: 4163 self._retreat(index) 4164 4165 return historical_data 4166 4167 def _parse_changes(self) -> t.Optional[exp.Changes]: 4168 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4169 return None 4170 4171 information = self._parse_var(any_token=True) 4172 self._match_r_paren() 4173 4174 return self.expression( 4175 exp.Changes, 4176 information=information, 4177 at_before=self._parse_historical_data(), 4178 end=self._parse_historical_data(), 4179 ) 4180 4181 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4182 if not self._match(TokenType.UNNEST): 4183 return None 4184 4185 expressions = self._parse_wrapped_csv(self._parse_equality) 4186 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4187 4188 alias = self._parse_table_alias() if with_alias else None 4189 4190 if alias: 4191 if self.dialect.UNNEST_COLUMN_ONLY: 4192 if alias.args.get("columns"): 4193 self.raise_error("Unexpected extra column alias in unnest.") 4194 4195 alias.set("columns", [alias.this]) 4196 alias.set("this", None) 4197 4198 columns = alias.args.get("columns") or [] 4199 if offset and len(expressions) < len(columns): 4200 offset = columns.pop() 4201 4202 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4203 self._match(TokenType.ALIAS) 4204 offset = self._parse_id_var( 4205 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4206 ) or exp.to_identifier("offset") 4207 4208 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4209 4210 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4211 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4212 if not is_derived and not ( 4213 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4214 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4215 ): 4216 return None 4217 4218 expressions = self._parse_csv(self._parse_value) 4219 alias = self._parse_table_alias() 4220 4221 if is_derived: 4222 self._match_r_paren() 4223 4224 return self.expression( 4225 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4226 ) 4227 4228 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4229 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4230 as_modifier and self._match_text_seq("USING", "SAMPLE") 4231 ): 4232 return None 4233 4234 bucket_numerator = None 4235 bucket_denominator = None 4236 bucket_field = None 4237 percent = None 4238 size = None 4239 seed = None 4240 4241 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4242 matched_l_paren = self._match(TokenType.L_PAREN) 4243 4244 if self.TABLESAMPLE_CSV: 4245 num = None 4246 expressions = self._parse_csv(self._parse_primary) 4247 else: 4248 expressions = None 4249 num = ( 4250 self._parse_factor() 4251 if self._match(TokenType.NUMBER, advance=False) 4252 else self._parse_primary() or self._parse_placeholder() 4253 ) 4254 4255 if self._match_text_seq("BUCKET"): 4256 bucket_numerator = self._parse_number() 4257 self._match_text_seq("OUT", "OF") 4258 bucket_denominator = bucket_denominator = self._parse_number() 4259 self._match(TokenType.ON) 4260 bucket_field = self._parse_field() 4261 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4262 percent = num 4263 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4264 size = num 4265 else: 4266 percent = num 4267 4268 if matched_l_paren: 4269 self._match_r_paren() 4270 4271 if self._match(TokenType.L_PAREN): 4272 method = self._parse_var(upper=True) 4273 seed = self._match(TokenType.COMMA) and self._parse_number() 4274 self._match_r_paren() 4275 elif self._match_texts(("SEED", "REPEATABLE")): 4276 seed = self._parse_wrapped(self._parse_number) 4277 4278 if not method and self.DEFAULT_SAMPLING_METHOD: 4279 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4280 4281 return self.expression( 4282 exp.TableSample, 4283 expressions=expressions, 4284 method=method, 4285 bucket_numerator=bucket_numerator, 4286 bucket_denominator=bucket_denominator, 4287 bucket_field=bucket_field, 4288 percent=percent, 4289 size=size, 4290 seed=seed, 4291 ) 4292 4293 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4294 return list(iter(self._parse_pivot, None)) or None 4295 4296 def _parse_joins(self) -> t.Iterator[exp.Join]: 4297 return iter(self._parse_join, None) 4298 4299 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4300 if not self._match(TokenType.INTO): 4301 return None 4302 4303 return self.expression( 4304 exp.UnpivotColumns, 4305 this=self._match_text_seq("NAME") and self._parse_column(), 4306 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4307 ) 4308 4309 # https://duckdb.org/docs/sql/statements/pivot 4310 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4311 def _parse_on() -> t.Optional[exp.Expression]: 4312 this = self._parse_bitwise() 4313 4314 if self._match(TokenType.IN): 4315 # PIVOT ... ON col IN (row_val1, row_val2) 4316 return self._parse_in(this) 4317 if self._match(TokenType.ALIAS, advance=False): 4318 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4319 return self._parse_alias(this) 4320 4321 return this 4322 4323 this = self._parse_table() 4324 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4325 into = self._parse_unpivot_columns() 4326 using = self._match(TokenType.USING) and self._parse_csv( 4327 lambda: self._parse_alias(self._parse_function()) 4328 ) 4329 group = self._parse_group() 4330 4331 return self.expression( 4332 exp.Pivot, 4333 this=this, 4334 expressions=expressions, 4335 using=using, 4336 group=group, 4337 unpivot=is_unpivot, 4338 into=into, 4339 ) 4340 4341 def _parse_pivot_in(self) -> exp.In: 4342 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4343 this = self._parse_select_or_expression() 4344 4345 self._match(TokenType.ALIAS) 4346 alias = self._parse_bitwise() 4347 if alias: 4348 if isinstance(alias, exp.Column) and not alias.db: 4349 alias = alias.this 4350 return self.expression(exp.PivotAlias, this=this, alias=alias) 4351 4352 return this 4353 4354 value = self._parse_column() 4355 4356 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4357 self.raise_error("Expecting IN (") 4358 4359 if self._match(TokenType.ANY): 4360 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4361 else: 4362 exprs = self._parse_csv(_parse_aliased_expression) 4363 4364 self._match_r_paren() 4365 return self.expression(exp.In, this=value, expressions=exprs) 4366 4367 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4368 func = self._parse_function() 4369 if not func: 4370 self.raise_error("Expecting an aggregation function in PIVOT") 4371 4372 return self._parse_alias(func) 4373 4374 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4375 index = self._index 4376 include_nulls = None 4377 4378 if self._match(TokenType.PIVOT): 4379 unpivot = False 4380 elif self._match(TokenType.UNPIVOT): 4381 unpivot = True 4382 4383 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4384 if self._match_text_seq("INCLUDE", "NULLS"): 4385 include_nulls = True 4386 elif self._match_text_seq("EXCLUDE", "NULLS"): 4387 include_nulls = False 4388 else: 4389 return None 4390 4391 expressions = [] 4392 4393 if not self._match(TokenType.L_PAREN): 4394 self._retreat(index) 4395 return None 4396 4397 if unpivot: 4398 expressions = self._parse_csv(self._parse_column) 4399 else: 4400 expressions = self._parse_csv(self._parse_pivot_aggregation) 4401 4402 if not expressions: 4403 self.raise_error("Failed to parse PIVOT's aggregation list") 4404 4405 if not self._match(TokenType.FOR): 4406 self.raise_error("Expecting FOR") 4407 4408 fields = [] 4409 while True: 4410 field = self._try_parse(self._parse_pivot_in) 4411 if not field: 4412 break 4413 fields.append(field) 4414 4415 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4416 self._parse_bitwise 4417 ) 4418 4419 group = self._parse_group() 4420 4421 self._match_r_paren() 4422 4423 pivot = self.expression( 4424 exp.Pivot, 4425 expressions=expressions, 4426 fields=fields, 4427 unpivot=unpivot, 4428 include_nulls=include_nulls, 4429 default_on_null=default_on_null, 4430 group=group, 4431 ) 4432 4433 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4434 pivot.set("alias", self._parse_table_alias()) 4435 4436 if not unpivot: 4437 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4438 4439 columns: t.List[exp.Expression] = [] 4440 all_fields = [] 4441 for pivot_field in pivot.fields: 4442 pivot_field_expressions = pivot_field.expressions 4443 4444 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4445 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4446 continue 4447 4448 all_fields.append( 4449 [ 4450 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4451 for fld in pivot_field_expressions 4452 ] 4453 ) 4454 4455 if all_fields: 4456 if names: 4457 all_fields.append(names) 4458 4459 # Generate all possible combinations of the pivot columns 4460 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4461 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4462 for fld_parts_tuple in itertools.product(*all_fields): 4463 fld_parts = list(fld_parts_tuple) 4464 4465 if names and self.PREFIXED_PIVOT_COLUMNS: 4466 # Move the "name" to the front of the list 4467 fld_parts.insert(0, fld_parts.pop(-1)) 4468 4469 columns.append(exp.to_identifier("_".join(fld_parts))) 4470 4471 pivot.set("columns", columns) 4472 4473 return pivot 4474 4475 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4476 return [agg.alias for agg in aggregations if agg.alias] 4477 4478 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4479 if not skip_where_token and not self._match(TokenType.PREWHERE): 4480 return None 4481 4482 return self.expression( 4483 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4484 ) 4485 4486 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4487 if not skip_where_token and not self._match(TokenType.WHERE): 4488 return None 4489 4490 return self.expression( 4491 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4492 ) 4493 4494 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4495 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4496 return None 4497 comments = self._prev_comments 4498 4499 elements: t.Dict[str, t.Any] = defaultdict(list) 4500 4501 if self._match(TokenType.ALL): 4502 elements["all"] = True 4503 elif self._match(TokenType.DISTINCT): 4504 elements["all"] = False 4505 4506 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4507 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4508 4509 while True: 4510 index = self._index 4511 4512 elements["expressions"].extend( 4513 self._parse_csv( 4514 lambda: None 4515 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4516 else self._parse_assignment() 4517 ) 4518 ) 4519 4520 before_with_index = self._index 4521 with_prefix = self._match(TokenType.WITH) 4522 4523 if self._match(TokenType.ROLLUP): 4524 elements["rollup"].append( 4525 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4526 ) 4527 elif self._match(TokenType.CUBE): 4528 elements["cube"].append( 4529 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4530 ) 4531 elif self._match(TokenType.GROUPING_SETS): 4532 elements["grouping_sets"].append( 4533 self.expression( 4534 exp.GroupingSets, 4535 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4536 ) 4537 ) 4538 elif self._match_text_seq("TOTALS"): 4539 elements["totals"] = True # type: ignore 4540 4541 if before_with_index <= self._index <= before_with_index + 1: 4542 self._retreat(before_with_index) 4543 break 4544 4545 if index == self._index: 4546 break 4547 4548 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4549 4550 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4551 return self.expression( 4552 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4553 ) 4554 4555 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4556 if self._match(TokenType.L_PAREN): 4557 grouping_set = self._parse_csv(self._parse_column) 4558 self._match_r_paren() 4559 return self.expression(exp.Tuple, expressions=grouping_set) 4560 4561 return self._parse_column() 4562 4563 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4564 if not skip_having_token and not self._match(TokenType.HAVING): 4565 return None 4566 return self.expression( 4567 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4568 ) 4569 4570 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4571 if not self._match(TokenType.QUALIFY): 4572 return None 4573 return self.expression(exp.Qualify, this=self._parse_assignment()) 4574 4575 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4576 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4577 exp.Prior, this=self._parse_bitwise() 4578 ) 4579 connect = self._parse_assignment() 4580 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4581 return connect 4582 4583 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4584 if skip_start_token: 4585 start = None 4586 elif self._match(TokenType.START_WITH): 4587 start = self._parse_assignment() 4588 else: 4589 return None 4590 4591 self._match(TokenType.CONNECT_BY) 4592 nocycle = self._match_text_seq("NOCYCLE") 4593 connect = self._parse_connect_with_prior() 4594 4595 if not start and self._match(TokenType.START_WITH): 4596 start = self._parse_assignment() 4597 4598 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4599 4600 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4601 this = self._parse_id_var(any_token=True) 4602 if self._match(TokenType.ALIAS): 4603 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4604 return this 4605 4606 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4607 if self._match_text_seq("INTERPOLATE"): 4608 return self._parse_wrapped_csv(self._parse_name_as_expression) 4609 return None 4610 4611 def _parse_order( 4612 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4613 ) -> t.Optional[exp.Expression]: 4614 siblings = None 4615 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4616 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4617 return this 4618 4619 siblings = True 4620 4621 return self.expression( 4622 exp.Order, 4623 comments=self._prev_comments, 4624 this=this, 4625 expressions=self._parse_csv(self._parse_ordered), 4626 siblings=siblings, 4627 ) 4628 4629 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4630 if not self._match(token): 4631 return None 4632 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4633 4634 def _parse_ordered( 4635 self, parse_method: t.Optional[t.Callable] = None 4636 ) -> t.Optional[exp.Ordered]: 4637 this = parse_method() if parse_method else self._parse_assignment() 4638 if not this: 4639 return None 4640 4641 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4642 this = exp.var("ALL") 4643 4644 asc = self._match(TokenType.ASC) 4645 desc = self._match(TokenType.DESC) or (asc and False) 4646 4647 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4648 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4649 4650 nulls_first = is_nulls_first or False 4651 explicitly_null_ordered = is_nulls_first or is_nulls_last 4652 4653 if ( 4654 not explicitly_null_ordered 4655 and ( 4656 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4657 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4658 ) 4659 and self.dialect.NULL_ORDERING != "nulls_are_last" 4660 ): 4661 nulls_first = True 4662 4663 if self._match_text_seq("WITH", "FILL"): 4664 with_fill = self.expression( 4665 exp.WithFill, 4666 **{ # type: ignore 4667 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4668 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4669 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4670 "interpolate": self._parse_interpolate(), 4671 }, 4672 ) 4673 else: 4674 with_fill = None 4675 4676 return self.expression( 4677 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4678 ) 4679 4680 def _parse_limit_options(self) -> exp.LimitOptions: 4681 percent = self._match(TokenType.PERCENT) 4682 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4683 self._match_text_seq("ONLY") 4684 with_ties = self._match_text_seq("WITH", "TIES") 4685 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4686 4687 def _parse_limit( 4688 self, 4689 this: t.Optional[exp.Expression] = None, 4690 top: bool = False, 4691 skip_limit_token: bool = False, 4692 ) -> t.Optional[exp.Expression]: 4693 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4694 comments = self._prev_comments 4695 if top: 4696 limit_paren = self._match(TokenType.L_PAREN) 4697 expression = self._parse_term() if limit_paren else self._parse_number() 4698 4699 if limit_paren: 4700 self._match_r_paren() 4701 4702 limit_options = self._parse_limit_options() 4703 else: 4704 limit_options = None 4705 expression = self._parse_term() 4706 4707 if self._match(TokenType.COMMA): 4708 offset = expression 4709 expression = self._parse_term() 4710 else: 4711 offset = None 4712 4713 limit_exp = self.expression( 4714 exp.Limit, 4715 this=this, 4716 expression=expression, 4717 offset=offset, 4718 comments=comments, 4719 limit_options=limit_options, 4720 expressions=self._parse_limit_by(), 4721 ) 4722 4723 return limit_exp 4724 4725 if self._match(TokenType.FETCH): 4726 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4727 direction = self._prev.text.upper() if direction else "FIRST" 4728 4729 count = self._parse_field(tokens=self.FETCH_TOKENS) 4730 4731 return self.expression( 4732 exp.Fetch, 4733 direction=direction, 4734 count=count, 4735 limit_options=self._parse_limit_options(), 4736 ) 4737 4738 return this 4739 4740 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4741 if not self._match(TokenType.OFFSET): 4742 return this 4743 4744 count = self._parse_term() 4745 self._match_set((TokenType.ROW, TokenType.ROWS)) 4746 4747 return self.expression( 4748 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4749 ) 4750 4751 def _can_parse_limit_or_offset(self) -> bool: 4752 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4753 return False 4754 4755 index = self._index 4756 result = bool( 4757 self._try_parse(self._parse_limit, retreat=True) 4758 or self._try_parse(self._parse_offset, retreat=True) 4759 ) 4760 self._retreat(index) 4761 return result 4762 4763 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4764 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4765 4766 def _parse_locks(self) -> t.List[exp.Lock]: 4767 locks = [] 4768 while True: 4769 update, key = None, None 4770 if self._match_text_seq("FOR", "UPDATE"): 4771 update = True 4772 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4773 "LOCK", "IN", "SHARE", "MODE" 4774 ): 4775 update = False 4776 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4777 update, key = False, True 4778 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4779 update, key = True, True 4780 else: 4781 break 4782 4783 expressions = None 4784 if self._match_text_seq("OF"): 4785 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4786 4787 wait: t.Optional[bool | exp.Expression] = None 4788 if self._match_text_seq("NOWAIT"): 4789 wait = True 4790 elif self._match_text_seq("WAIT"): 4791 wait = self._parse_primary() 4792 elif self._match_text_seq("SKIP", "LOCKED"): 4793 wait = False 4794 4795 locks.append( 4796 self.expression( 4797 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4798 ) 4799 ) 4800 4801 return locks 4802 4803 def parse_set_operation( 4804 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4805 ) -> t.Optional[exp.Expression]: 4806 start = self._index 4807 _, side_token, kind_token = self._parse_join_parts() 4808 4809 side = side_token.text if side_token else None 4810 kind = kind_token.text if kind_token else None 4811 4812 if not self._match_set(self.SET_OPERATIONS): 4813 self._retreat(start) 4814 return None 4815 4816 token_type = self._prev.token_type 4817 4818 if token_type == TokenType.UNION: 4819 operation: t.Type[exp.SetOperation] = exp.Union 4820 elif token_type == TokenType.EXCEPT: 4821 operation = exp.Except 4822 else: 4823 operation = exp.Intersect 4824 4825 comments = self._prev.comments 4826 4827 if self._match(TokenType.DISTINCT): 4828 distinct: t.Optional[bool] = True 4829 elif self._match(TokenType.ALL): 4830 distinct = False 4831 else: 4832 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4833 if distinct is None: 4834 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4835 4836 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4837 "STRICT", "CORRESPONDING" 4838 ) 4839 if self._match_text_seq("CORRESPONDING"): 4840 by_name = True 4841 if not side and not kind: 4842 kind = "INNER" 4843 4844 on_column_list = None 4845 if by_name and self._match_texts(("ON", "BY")): 4846 on_column_list = self._parse_wrapped_csv(self._parse_column) 4847 4848 expression = self._parse_select( 4849 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4850 ) 4851 4852 return self.expression( 4853 operation, 4854 comments=comments, 4855 this=this, 4856 distinct=distinct, 4857 by_name=by_name, 4858 expression=expression, 4859 side=side, 4860 kind=kind, 4861 on=on_column_list, 4862 ) 4863 4864 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4865 while this: 4866 setop = self.parse_set_operation(this) 4867 if not setop: 4868 break 4869 this = setop 4870 4871 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4872 expression = this.expression 4873 4874 if expression: 4875 for arg in self.SET_OP_MODIFIERS: 4876 expr = expression.args.get(arg) 4877 if expr: 4878 this.set(arg, expr.pop()) 4879 4880 return this 4881 4882 def _parse_expression(self) -> t.Optional[exp.Expression]: 4883 return self._parse_alias(self._parse_assignment()) 4884 4885 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4886 this = self._parse_disjunction() 4887 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4888 # This allows us to parse <non-identifier token> := <expr> 4889 this = exp.column( 4890 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4891 ) 4892 4893 while self._match_set(self.ASSIGNMENT): 4894 if isinstance(this, exp.Column) and len(this.parts) == 1: 4895 this = this.this 4896 4897 this = self.expression( 4898 self.ASSIGNMENT[self._prev.token_type], 4899 this=this, 4900 comments=self._prev_comments, 4901 expression=self._parse_assignment(), 4902 ) 4903 4904 return this 4905 4906 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4907 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4908 4909 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4910 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4911 4912 def _parse_equality(self) -> t.Optional[exp.Expression]: 4913 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4914 4915 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4916 return self._parse_tokens(self._parse_range, self.COMPARISON) 4917 4918 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4919 this = this or self._parse_bitwise() 4920 negate = self._match(TokenType.NOT) 4921 4922 if self._match_set(self.RANGE_PARSERS): 4923 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4924 if not expression: 4925 return this 4926 4927 this = expression 4928 elif self._match(TokenType.ISNULL): 4929 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4930 4931 # Postgres supports ISNULL and NOTNULL for conditions. 4932 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4933 if self._match(TokenType.NOTNULL): 4934 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4935 this = self.expression(exp.Not, this=this) 4936 4937 if negate: 4938 this = self._negate_range(this) 4939 4940 if self._match(TokenType.IS): 4941 this = self._parse_is(this) 4942 4943 return this 4944 4945 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4946 if not this: 4947 return this 4948 4949 return self.expression(exp.Not, this=this) 4950 4951 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4952 index = self._index - 1 4953 negate = self._match(TokenType.NOT) 4954 4955 if self._match_text_seq("DISTINCT", "FROM"): 4956 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4957 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4958 4959 if self._match(TokenType.JSON): 4960 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4961 4962 if self._match_text_seq("WITH"): 4963 _with = True 4964 elif self._match_text_seq("WITHOUT"): 4965 _with = False 4966 else: 4967 _with = None 4968 4969 unique = self._match(TokenType.UNIQUE) 4970 self._match_text_seq("KEYS") 4971 expression: t.Optional[exp.Expression] = self.expression( 4972 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4973 ) 4974 else: 4975 expression = self._parse_primary() or self._parse_null() 4976 if not expression: 4977 self._retreat(index) 4978 return None 4979 4980 this = self.expression(exp.Is, this=this, expression=expression) 4981 return self.expression(exp.Not, this=this) if negate else this 4982 4983 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4984 unnest = self._parse_unnest(with_alias=False) 4985 if unnest: 4986 this = self.expression(exp.In, this=this, unnest=unnest) 4987 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4988 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4989 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4990 4991 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4992 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4993 else: 4994 this = self.expression(exp.In, this=this, expressions=expressions) 4995 4996 if matched_l_paren: 4997 self._match_r_paren(this) 4998 elif not self._match(TokenType.R_BRACKET, expression=this): 4999 self.raise_error("Expecting ]") 5000 else: 5001 this = self.expression(exp.In, this=this, field=self._parse_column()) 5002 5003 return this 5004 5005 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5006 symmetric = None 5007 if self._match_text_seq("SYMMETRIC"): 5008 symmetric = True 5009 elif self._match_text_seq("ASYMMETRIC"): 5010 symmetric = False 5011 5012 low = self._parse_bitwise() 5013 self._match(TokenType.AND) 5014 high = self._parse_bitwise() 5015 5016 return self.expression( 5017 exp.Between, 5018 this=this, 5019 low=low, 5020 high=high, 5021 symmetric=symmetric, 5022 ) 5023 5024 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5025 if not self._match(TokenType.ESCAPE): 5026 return this 5027 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5028 5029 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5030 index = self._index 5031 5032 if not self._match(TokenType.INTERVAL) and match_interval: 5033 return None 5034 5035 if self._match(TokenType.STRING, advance=False): 5036 this = self._parse_primary() 5037 else: 5038 this = self._parse_term() 5039 5040 if not this or ( 5041 isinstance(this, exp.Column) 5042 and not this.table 5043 and not this.this.quoted 5044 and this.name.upper() == "IS" 5045 ): 5046 self._retreat(index) 5047 return None 5048 5049 unit = self._parse_function() or ( 5050 not self._match(TokenType.ALIAS, advance=False) 5051 and self._parse_var(any_token=True, upper=True) 5052 ) 5053 5054 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5055 # each INTERVAL expression into this canonical form so it's easy to transpile 5056 if this and this.is_number: 5057 this = exp.Literal.string(this.to_py()) 5058 elif this and this.is_string: 5059 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5060 if parts and unit: 5061 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5062 unit = None 5063 self._retreat(self._index - 1) 5064 5065 if len(parts) == 1: 5066 this = exp.Literal.string(parts[0][0]) 5067 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5068 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5069 unit = self.expression( 5070 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5071 ) 5072 5073 interval = self.expression(exp.Interval, this=this, unit=unit) 5074 5075 index = self._index 5076 self._match(TokenType.PLUS) 5077 5078 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5079 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5080 return self.expression( 5081 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5082 ) 5083 5084 self._retreat(index) 5085 return interval 5086 5087 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5088 this = self._parse_term() 5089 5090 while True: 5091 if self._match_set(self.BITWISE): 5092 this = self.expression( 5093 self.BITWISE[self._prev.token_type], 5094 this=this, 5095 expression=self._parse_term(), 5096 ) 5097 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5098 this = self.expression( 5099 exp.DPipe, 5100 this=this, 5101 expression=self._parse_term(), 5102 safe=not self.dialect.STRICT_STRING_CONCAT, 5103 ) 5104 elif self._match(TokenType.DQMARK): 5105 this = self.expression( 5106 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5107 ) 5108 elif self._match_pair(TokenType.LT, TokenType.LT): 5109 this = self.expression( 5110 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5111 ) 5112 elif self._match_pair(TokenType.GT, TokenType.GT): 5113 this = self.expression( 5114 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5115 ) 5116 else: 5117 break 5118 5119 return this 5120 5121 def _parse_term(self) -> t.Optional[exp.Expression]: 5122 this = self._parse_factor() 5123 5124 while self._match_set(self.TERM): 5125 klass = self.TERM[self._prev.token_type] 5126 comments = self._prev_comments 5127 expression = self._parse_factor() 5128 5129 this = self.expression(klass, this=this, comments=comments, expression=expression) 5130 5131 if isinstance(this, exp.Collate): 5132 expr = this.expression 5133 5134 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5135 # fallback to Identifier / Var 5136 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5137 ident = expr.this 5138 if isinstance(ident, exp.Identifier): 5139 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5140 5141 return this 5142 5143 def _parse_factor(self) -> t.Optional[exp.Expression]: 5144 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5145 this = parse_method() 5146 5147 while self._match_set(self.FACTOR): 5148 klass = self.FACTOR[self._prev.token_type] 5149 comments = self._prev_comments 5150 expression = parse_method() 5151 5152 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5153 self._retreat(self._index - 1) 5154 return this 5155 5156 this = self.expression(klass, this=this, comments=comments, expression=expression) 5157 5158 if isinstance(this, exp.Div): 5159 this.args["typed"] = self.dialect.TYPED_DIVISION 5160 this.args["safe"] = self.dialect.SAFE_DIVISION 5161 5162 return this 5163 5164 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5165 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5166 5167 def _parse_unary(self) -> t.Optional[exp.Expression]: 5168 if self._match_set(self.UNARY_PARSERS): 5169 return self.UNARY_PARSERS[self._prev.token_type](self) 5170 return self._parse_at_time_zone(self._parse_type()) 5171 5172 def _parse_type( 5173 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5174 ) -> t.Optional[exp.Expression]: 5175 interval = parse_interval and self._parse_interval() 5176 if interval: 5177 return interval 5178 5179 index = self._index 5180 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5181 5182 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5183 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5184 if isinstance(data_type, exp.Cast): 5185 # This constructor can contain ops directly after it, for instance struct unnesting: 5186 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5187 return self._parse_column_ops(data_type) 5188 5189 if data_type: 5190 index2 = self._index 5191 this = self._parse_primary() 5192 5193 if isinstance(this, exp.Literal): 5194 literal = this.name 5195 this = self._parse_column_ops(this) 5196 5197 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5198 if parser: 5199 return parser(self, this, data_type) 5200 5201 if ( 5202 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5203 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5204 and TIME_ZONE_RE.search(literal) 5205 ): 5206 data_type = exp.DataType.build("TIMESTAMPTZ") 5207 5208 return self.expression(exp.Cast, this=this, to=data_type) 5209 5210 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5211 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5212 # 5213 # If the index difference here is greater than 1, that means the parser itself must have 5214 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5215 # 5216 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5217 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5218 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5219 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5220 # 5221 # In these cases, we don't really want to return the converted type, but instead retreat 5222 # and try to parse a Column or Identifier in the section below. 5223 if data_type.expressions and index2 - index > 1: 5224 self._retreat(index2) 5225 return self._parse_column_ops(data_type) 5226 5227 self._retreat(index) 5228 5229 if fallback_to_identifier: 5230 return self._parse_id_var() 5231 5232 this = self._parse_column() 5233 return this and self._parse_column_ops(this) 5234 5235 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5236 this = self._parse_type() 5237 if not this: 5238 return None 5239 5240 if isinstance(this, exp.Column) and not this.table: 5241 this = exp.var(this.name.upper()) 5242 5243 return self.expression( 5244 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5245 ) 5246 5247 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5248 type_name = identifier.name 5249 5250 while self._match(TokenType.DOT): 5251 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5252 5253 return exp.DataType.build(type_name, udt=True) 5254 5255 def _parse_types( 5256 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5257 ) -> t.Optional[exp.Expression]: 5258 index = self._index 5259 5260 this: t.Optional[exp.Expression] = None 5261 prefix = self._match_text_seq("SYSUDTLIB", ".") 5262 5263 if not self._match_set(self.TYPE_TOKENS): 5264 identifier = allow_identifiers and self._parse_id_var( 5265 any_token=False, tokens=(TokenType.VAR,) 5266 ) 5267 if isinstance(identifier, exp.Identifier): 5268 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5269 5270 if len(tokens) != 1: 5271 self.raise_error("Unexpected identifier", self._prev) 5272 5273 if tokens[0].token_type in self.TYPE_TOKENS: 5274 self._prev = tokens[0] 5275 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5276 this = self._parse_user_defined_type(identifier) 5277 else: 5278 self._retreat(self._index - 1) 5279 return None 5280 else: 5281 return None 5282 5283 type_token = self._prev.token_type 5284 5285 if type_token == TokenType.PSEUDO_TYPE: 5286 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5287 5288 if type_token == TokenType.OBJECT_IDENTIFIER: 5289 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5290 5291 # https://materialize.com/docs/sql/types/map/ 5292 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5293 key_type = self._parse_types( 5294 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5295 ) 5296 if not self._match(TokenType.FARROW): 5297 self._retreat(index) 5298 return None 5299 5300 value_type = self._parse_types( 5301 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5302 ) 5303 if not self._match(TokenType.R_BRACKET): 5304 self._retreat(index) 5305 return None 5306 5307 return exp.DataType( 5308 this=exp.DataType.Type.MAP, 5309 expressions=[key_type, value_type], 5310 nested=True, 5311 prefix=prefix, 5312 ) 5313 5314 nested = type_token in self.NESTED_TYPE_TOKENS 5315 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5316 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5317 expressions = None 5318 maybe_func = False 5319 5320 if self._match(TokenType.L_PAREN): 5321 if is_struct: 5322 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5323 elif nested: 5324 expressions = self._parse_csv( 5325 lambda: self._parse_types( 5326 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5327 ) 5328 ) 5329 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5330 this = expressions[0] 5331 this.set("nullable", True) 5332 self._match_r_paren() 5333 return this 5334 elif type_token in self.ENUM_TYPE_TOKENS: 5335 expressions = self._parse_csv(self._parse_equality) 5336 elif is_aggregate: 5337 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5338 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5339 ) 5340 if not func_or_ident: 5341 return None 5342 expressions = [func_or_ident] 5343 if self._match(TokenType.COMMA): 5344 expressions.extend( 5345 self._parse_csv( 5346 lambda: self._parse_types( 5347 check_func=check_func, 5348 schema=schema, 5349 allow_identifiers=allow_identifiers, 5350 ) 5351 ) 5352 ) 5353 else: 5354 expressions = self._parse_csv(self._parse_type_size) 5355 5356 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5357 if type_token == TokenType.VECTOR and len(expressions) == 2: 5358 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5359 5360 if not expressions or not self._match(TokenType.R_PAREN): 5361 self._retreat(index) 5362 return None 5363 5364 maybe_func = True 5365 5366 values: t.Optional[t.List[exp.Expression]] = None 5367 5368 if nested and self._match(TokenType.LT): 5369 if is_struct: 5370 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5371 else: 5372 expressions = self._parse_csv( 5373 lambda: self._parse_types( 5374 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5375 ) 5376 ) 5377 5378 if not self._match(TokenType.GT): 5379 self.raise_error("Expecting >") 5380 5381 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5382 values = self._parse_csv(self._parse_assignment) 5383 if not values and is_struct: 5384 values = None 5385 self._retreat(self._index - 1) 5386 else: 5387 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5388 5389 if type_token in self.TIMESTAMPS: 5390 if self._match_text_seq("WITH", "TIME", "ZONE"): 5391 maybe_func = False 5392 tz_type = ( 5393 exp.DataType.Type.TIMETZ 5394 if type_token in self.TIMES 5395 else exp.DataType.Type.TIMESTAMPTZ 5396 ) 5397 this = exp.DataType(this=tz_type, expressions=expressions) 5398 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5399 maybe_func = False 5400 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5401 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5402 maybe_func = False 5403 elif type_token == TokenType.INTERVAL: 5404 unit = self._parse_var(upper=True) 5405 if unit: 5406 if self._match_text_seq("TO"): 5407 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5408 5409 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5410 else: 5411 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5412 elif type_token == TokenType.VOID: 5413 this = exp.DataType(this=exp.DataType.Type.NULL) 5414 5415 if maybe_func and check_func: 5416 index2 = self._index 5417 peek = self._parse_string() 5418 5419 if not peek: 5420 self._retreat(index) 5421 return None 5422 5423 self._retreat(index2) 5424 5425 if not this: 5426 if self._match_text_seq("UNSIGNED"): 5427 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5428 if not unsigned_type_token: 5429 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5430 5431 type_token = unsigned_type_token or type_token 5432 5433 this = exp.DataType( 5434 this=exp.DataType.Type[type_token.value], 5435 expressions=expressions, 5436 nested=nested, 5437 prefix=prefix, 5438 ) 5439 5440 # Empty arrays/structs are allowed 5441 if values is not None: 5442 cls = exp.Struct if is_struct else exp.Array 5443 this = exp.cast(cls(expressions=values), this, copy=False) 5444 5445 elif expressions: 5446 this.set("expressions", expressions) 5447 5448 # https://materialize.com/docs/sql/types/list/#type-name 5449 while self._match(TokenType.LIST): 5450 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5451 5452 index = self._index 5453 5454 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5455 matched_array = self._match(TokenType.ARRAY) 5456 5457 while self._curr: 5458 datatype_token = self._prev.token_type 5459 matched_l_bracket = self._match(TokenType.L_BRACKET) 5460 5461 if (not matched_l_bracket and not matched_array) or ( 5462 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5463 ): 5464 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5465 # not to be confused with the fixed size array parsing 5466 break 5467 5468 matched_array = False 5469 values = self._parse_csv(self._parse_assignment) or None 5470 if ( 5471 values 5472 and not schema 5473 and ( 5474 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5475 ) 5476 ): 5477 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5478 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5479 self._retreat(index) 5480 break 5481 5482 this = exp.DataType( 5483 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5484 ) 5485 self._match(TokenType.R_BRACKET) 5486 5487 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5488 converter = self.TYPE_CONVERTERS.get(this.this) 5489 if converter: 5490 this = converter(t.cast(exp.DataType, this)) 5491 5492 return this 5493 5494 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5495 index = self._index 5496 5497 if ( 5498 self._curr 5499 and self._next 5500 and self._curr.token_type in self.TYPE_TOKENS 5501 and self._next.token_type in self.TYPE_TOKENS 5502 ): 5503 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5504 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5505 this = self._parse_id_var() 5506 else: 5507 this = ( 5508 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5509 or self._parse_id_var() 5510 ) 5511 5512 self._match(TokenType.COLON) 5513 5514 if ( 5515 type_required 5516 and not isinstance(this, exp.DataType) 5517 and not self._match_set(self.TYPE_TOKENS, advance=False) 5518 ): 5519 self._retreat(index) 5520 return self._parse_types() 5521 5522 return self._parse_column_def(this) 5523 5524 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5525 if not self._match_text_seq("AT", "TIME", "ZONE"): 5526 return this 5527 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5528 5529 def _parse_column(self) -> t.Optional[exp.Expression]: 5530 this = self._parse_column_reference() 5531 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5532 5533 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5534 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5535 5536 return column 5537 5538 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5539 this = self._parse_field() 5540 if ( 5541 not this 5542 and self._match(TokenType.VALUES, advance=False) 5543 and self.VALUES_FOLLOWED_BY_PAREN 5544 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5545 ): 5546 this = self._parse_id_var() 5547 5548 if isinstance(this, exp.Identifier): 5549 # We bubble up comments from the Identifier to the Column 5550 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5551 5552 return this 5553 5554 def _parse_colon_as_variant_extract( 5555 self, this: t.Optional[exp.Expression] 5556 ) -> t.Optional[exp.Expression]: 5557 casts = [] 5558 json_path = [] 5559 escape = None 5560 5561 while self._match(TokenType.COLON): 5562 start_index = self._index 5563 5564 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5565 path = self._parse_column_ops( 5566 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5567 ) 5568 5569 # The cast :: operator has a lower precedence than the extraction operator :, so 5570 # we rearrange the AST appropriately to avoid casting the JSON path 5571 while isinstance(path, exp.Cast): 5572 casts.append(path.to) 5573 path = path.this 5574 5575 if casts: 5576 dcolon_offset = next( 5577 i 5578 for i, t in enumerate(self._tokens[start_index:]) 5579 if t.token_type == TokenType.DCOLON 5580 ) 5581 end_token = self._tokens[start_index + dcolon_offset - 1] 5582 else: 5583 end_token = self._prev 5584 5585 if path: 5586 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5587 # it'll roundtrip to a string literal in GET_PATH 5588 if isinstance(path, exp.Identifier) and path.quoted: 5589 escape = True 5590 5591 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5592 5593 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5594 # Databricks transforms it back to the colon/dot notation 5595 if json_path: 5596 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5597 5598 if json_path_expr: 5599 json_path_expr.set("escape", escape) 5600 5601 this = self.expression( 5602 exp.JSONExtract, 5603 this=this, 5604 expression=json_path_expr, 5605 variant_extract=True, 5606 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5607 ) 5608 5609 while casts: 5610 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5611 5612 return this 5613 5614 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5615 return self._parse_types() 5616 5617 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5618 this = self._parse_bracket(this) 5619 5620 while self._match_set(self.COLUMN_OPERATORS): 5621 op_token = self._prev.token_type 5622 op = self.COLUMN_OPERATORS.get(op_token) 5623 5624 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5625 field = self._parse_dcolon() 5626 if not field: 5627 self.raise_error("Expected type") 5628 elif op and self._curr: 5629 field = self._parse_column_reference() or self._parse_bracket() 5630 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5631 field = self._parse_column_ops(field) 5632 else: 5633 field = self._parse_field(any_token=True, anonymous_func=True) 5634 5635 # Function calls can be qualified, e.g., x.y.FOO() 5636 # This converts the final AST to a series of Dots leading to the function call 5637 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5638 if isinstance(field, (exp.Func, exp.Window)) and this: 5639 this = this.transform( 5640 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5641 ) 5642 5643 if op: 5644 this = op(self, this, field) 5645 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5646 this = self.expression( 5647 exp.Column, 5648 comments=this.comments, 5649 this=field, 5650 table=this.this, 5651 db=this.args.get("table"), 5652 catalog=this.args.get("db"), 5653 ) 5654 elif isinstance(field, exp.Window): 5655 # Move the exp.Dot's to the window's function 5656 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5657 field.set("this", window_func) 5658 this = field 5659 else: 5660 this = self.expression(exp.Dot, this=this, expression=field) 5661 5662 if field and field.comments: 5663 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5664 5665 this = self._parse_bracket(this) 5666 5667 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5668 5669 def _parse_paren(self) -> t.Optional[exp.Expression]: 5670 if not self._match(TokenType.L_PAREN): 5671 return None 5672 5673 comments = self._prev_comments 5674 query = self._parse_select() 5675 5676 if query: 5677 expressions = [query] 5678 else: 5679 expressions = self._parse_expressions() 5680 5681 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5682 5683 if not this and self._match(TokenType.R_PAREN, advance=False): 5684 this = self.expression(exp.Tuple) 5685 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5686 this = self._parse_subquery(this=this, parse_alias=False) 5687 elif isinstance(this, exp.Subquery): 5688 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5689 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5690 this = self.expression(exp.Tuple, expressions=expressions) 5691 else: 5692 this = self.expression(exp.Paren, this=this) 5693 5694 if this: 5695 this.add_comments(comments) 5696 5697 self._match_r_paren(expression=this) 5698 return this 5699 5700 def _parse_primary(self) -> t.Optional[exp.Expression]: 5701 if self._match_set(self.PRIMARY_PARSERS): 5702 token_type = self._prev.token_type 5703 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5704 5705 if token_type == TokenType.STRING: 5706 expressions = [primary] 5707 while self._match(TokenType.STRING): 5708 expressions.append(exp.Literal.string(self._prev.text)) 5709 5710 if len(expressions) > 1: 5711 return self.expression(exp.Concat, expressions=expressions) 5712 5713 return primary 5714 5715 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5716 return exp.Literal.number(f"0.{self._prev.text}") 5717 5718 return self._parse_paren() 5719 5720 def _parse_field( 5721 self, 5722 any_token: bool = False, 5723 tokens: t.Optional[t.Collection[TokenType]] = None, 5724 anonymous_func: bool = False, 5725 ) -> t.Optional[exp.Expression]: 5726 if anonymous_func: 5727 field = ( 5728 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5729 or self._parse_primary() 5730 ) 5731 else: 5732 field = self._parse_primary() or self._parse_function( 5733 anonymous=anonymous_func, any_token=any_token 5734 ) 5735 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5736 5737 def _parse_function( 5738 self, 5739 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5740 anonymous: bool = False, 5741 optional_parens: bool = True, 5742 any_token: bool = False, 5743 ) -> t.Optional[exp.Expression]: 5744 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5745 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5746 fn_syntax = False 5747 if ( 5748 self._match(TokenType.L_BRACE, advance=False) 5749 and self._next 5750 and self._next.text.upper() == "FN" 5751 ): 5752 self._advance(2) 5753 fn_syntax = True 5754 5755 func = self._parse_function_call( 5756 functions=functions, 5757 anonymous=anonymous, 5758 optional_parens=optional_parens, 5759 any_token=any_token, 5760 ) 5761 5762 if fn_syntax: 5763 self._match(TokenType.R_BRACE) 5764 5765 return func 5766 5767 def _parse_function_call( 5768 self, 5769 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5770 anonymous: bool = False, 5771 optional_parens: bool = True, 5772 any_token: bool = False, 5773 ) -> t.Optional[exp.Expression]: 5774 if not self._curr: 5775 return None 5776 5777 comments = self._curr.comments 5778 prev = self._prev 5779 token = self._curr 5780 token_type = self._curr.token_type 5781 this = self._curr.text 5782 upper = this.upper() 5783 5784 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5785 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5786 self._advance() 5787 return self._parse_window(parser(self)) 5788 5789 if not self._next or self._next.token_type != TokenType.L_PAREN: 5790 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5791 self._advance() 5792 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5793 5794 return None 5795 5796 if any_token: 5797 if token_type in self.RESERVED_TOKENS: 5798 return None 5799 elif token_type not in self.FUNC_TOKENS: 5800 return None 5801 5802 self._advance(2) 5803 5804 parser = self.FUNCTION_PARSERS.get(upper) 5805 if parser and not anonymous: 5806 this = parser(self) 5807 else: 5808 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5809 5810 if subquery_predicate: 5811 expr = None 5812 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5813 expr = self._parse_select() 5814 self._match_r_paren() 5815 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5816 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5817 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5818 self._advance(-1) 5819 expr = self._parse_bitwise() 5820 5821 if expr: 5822 return self.expression(subquery_predicate, comments=comments, this=expr) 5823 5824 if functions is None: 5825 functions = self.FUNCTIONS 5826 5827 function = functions.get(upper) 5828 known_function = function and not anonymous 5829 5830 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5831 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5832 5833 post_func_comments = self._curr and self._curr.comments 5834 if known_function and post_func_comments: 5835 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5836 # call we'll construct it as exp.Anonymous, even if it's "known" 5837 if any( 5838 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5839 for comment in post_func_comments 5840 ): 5841 known_function = False 5842 5843 if alias and known_function: 5844 args = self._kv_to_prop_eq(args) 5845 5846 if known_function: 5847 func_builder = t.cast(t.Callable, function) 5848 5849 if "dialect" in func_builder.__code__.co_varnames: 5850 func = func_builder(args, dialect=self.dialect) 5851 else: 5852 func = func_builder(args) 5853 5854 func = self.validate_expression(func, args) 5855 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5856 func.meta["name"] = this 5857 5858 this = func 5859 else: 5860 if token_type == TokenType.IDENTIFIER: 5861 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5862 5863 this = self.expression(exp.Anonymous, this=this, expressions=args) 5864 this = this.update_positions(token) 5865 5866 if isinstance(this, exp.Expression): 5867 this.add_comments(comments) 5868 5869 self._match_r_paren(this) 5870 return self._parse_window(this) 5871 5872 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5873 return expression 5874 5875 def _kv_to_prop_eq( 5876 self, expressions: t.List[exp.Expression], parse_map: bool = False 5877 ) -> t.List[exp.Expression]: 5878 transformed = [] 5879 5880 for index, e in enumerate(expressions): 5881 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5882 if isinstance(e, exp.Alias): 5883 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5884 5885 if not isinstance(e, exp.PropertyEQ): 5886 e = self.expression( 5887 exp.PropertyEQ, 5888 this=e.this if parse_map else exp.to_identifier(e.this.name), 5889 expression=e.expression, 5890 ) 5891 5892 if isinstance(e.this, exp.Column): 5893 e.this.replace(e.this.this) 5894 else: 5895 e = self._to_prop_eq(e, index) 5896 5897 transformed.append(e) 5898 5899 return transformed 5900 5901 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5902 return self._parse_statement() 5903 5904 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5905 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5906 5907 def _parse_user_defined_function( 5908 self, kind: t.Optional[TokenType] = None 5909 ) -> t.Optional[exp.Expression]: 5910 this = self._parse_table_parts(schema=True) 5911 5912 if not self._match(TokenType.L_PAREN): 5913 return this 5914 5915 expressions = self._parse_csv(self._parse_function_parameter) 5916 self._match_r_paren() 5917 return self.expression( 5918 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5919 ) 5920 5921 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5922 literal = self._parse_primary() 5923 if literal: 5924 return self.expression(exp.Introducer, this=token.text, expression=literal) 5925 5926 return self._identifier_expression(token) 5927 5928 def _parse_session_parameter(self) -> exp.SessionParameter: 5929 kind = None 5930 this = self._parse_id_var() or self._parse_primary() 5931 5932 if this and self._match(TokenType.DOT): 5933 kind = this.name 5934 this = self._parse_var() or self._parse_primary() 5935 5936 return self.expression(exp.SessionParameter, this=this, kind=kind) 5937 5938 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5939 return self._parse_id_var() 5940 5941 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5942 index = self._index 5943 5944 if self._match(TokenType.L_PAREN): 5945 expressions = t.cast( 5946 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5947 ) 5948 5949 if not self._match(TokenType.R_PAREN): 5950 self._retreat(index) 5951 else: 5952 expressions = [self._parse_lambda_arg()] 5953 5954 if self._match_set(self.LAMBDAS): 5955 return self.LAMBDAS[self._prev.token_type](self, expressions) 5956 5957 self._retreat(index) 5958 5959 this: t.Optional[exp.Expression] 5960 5961 if self._match(TokenType.DISTINCT): 5962 this = self.expression( 5963 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5964 ) 5965 else: 5966 this = self._parse_select_or_expression(alias=alias) 5967 5968 return self._parse_limit( 5969 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5970 ) 5971 5972 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5973 index = self._index 5974 if not self._match(TokenType.L_PAREN): 5975 return this 5976 5977 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5978 # expr can be of both types 5979 if self._match_set(self.SELECT_START_TOKENS): 5980 self._retreat(index) 5981 return this 5982 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5983 self._match_r_paren() 5984 return self.expression(exp.Schema, this=this, expressions=args) 5985 5986 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5987 return self._parse_column_def(self._parse_field(any_token=True)) 5988 5989 def _parse_column_def( 5990 self, this: t.Optional[exp.Expression], computed_column: bool = True 5991 ) -> t.Optional[exp.Expression]: 5992 # column defs are not really columns, they're identifiers 5993 if isinstance(this, exp.Column): 5994 this = this.this 5995 5996 if not computed_column: 5997 self._match(TokenType.ALIAS) 5998 5999 kind = self._parse_types(schema=True) 6000 6001 if self._match_text_seq("FOR", "ORDINALITY"): 6002 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6003 6004 constraints: t.List[exp.Expression] = [] 6005 6006 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6007 ("ALIAS", "MATERIALIZED") 6008 ): 6009 persisted = self._prev.text.upper() == "MATERIALIZED" 6010 constraint_kind = exp.ComputedColumnConstraint( 6011 this=self._parse_assignment(), 6012 persisted=persisted or self._match_text_seq("PERSISTED"), 6013 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6014 ) 6015 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6016 elif ( 6017 kind 6018 and self._match(TokenType.ALIAS, advance=False) 6019 and ( 6020 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6021 or (self._next and self._next.token_type == TokenType.L_PAREN) 6022 ) 6023 ): 6024 self._advance() 6025 constraints.append( 6026 self.expression( 6027 exp.ColumnConstraint, 6028 kind=exp.ComputedColumnConstraint( 6029 this=self._parse_disjunction(), 6030 persisted=self._match_texts(("STORED", "VIRTUAL")) 6031 and self._prev.text.upper() == "STORED", 6032 ), 6033 ) 6034 ) 6035 6036 while True: 6037 constraint = self._parse_column_constraint() 6038 if not constraint: 6039 break 6040 constraints.append(constraint) 6041 6042 if not kind and not constraints: 6043 return this 6044 6045 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6046 6047 def _parse_auto_increment( 6048 self, 6049 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6050 start = None 6051 increment = None 6052 order = None 6053 6054 if self._match(TokenType.L_PAREN, advance=False): 6055 args = self._parse_wrapped_csv(self._parse_bitwise) 6056 start = seq_get(args, 0) 6057 increment = seq_get(args, 1) 6058 elif self._match_text_seq("START"): 6059 start = self._parse_bitwise() 6060 self._match_text_seq("INCREMENT") 6061 increment = self._parse_bitwise() 6062 if self._match_text_seq("ORDER"): 6063 order = True 6064 elif self._match_text_seq("NOORDER"): 6065 order = False 6066 6067 if start and increment: 6068 return exp.GeneratedAsIdentityColumnConstraint( 6069 start=start, increment=increment, this=False, order=order 6070 ) 6071 6072 return exp.AutoIncrementColumnConstraint() 6073 6074 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6075 if not self._match_text_seq("REFRESH"): 6076 self._retreat(self._index - 1) 6077 return None 6078 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6079 6080 def _parse_compress(self) -> exp.CompressColumnConstraint: 6081 if self._match(TokenType.L_PAREN, advance=False): 6082 return self.expression( 6083 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6084 ) 6085 6086 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6087 6088 def _parse_generated_as_identity( 6089 self, 6090 ) -> ( 6091 exp.GeneratedAsIdentityColumnConstraint 6092 | exp.ComputedColumnConstraint 6093 | exp.GeneratedAsRowColumnConstraint 6094 ): 6095 if self._match_text_seq("BY", "DEFAULT"): 6096 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6097 this = self.expression( 6098 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6099 ) 6100 else: 6101 self._match_text_seq("ALWAYS") 6102 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6103 6104 self._match(TokenType.ALIAS) 6105 6106 if self._match_text_seq("ROW"): 6107 start = self._match_text_seq("START") 6108 if not start: 6109 self._match(TokenType.END) 6110 hidden = self._match_text_seq("HIDDEN") 6111 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6112 6113 identity = self._match_text_seq("IDENTITY") 6114 6115 if self._match(TokenType.L_PAREN): 6116 if self._match(TokenType.START_WITH): 6117 this.set("start", self._parse_bitwise()) 6118 if self._match_text_seq("INCREMENT", "BY"): 6119 this.set("increment", self._parse_bitwise()) 6120 if self._match_text_seq("MINVALUE"): 6121 this.set("minvalue", self._parse_bitwise()) 6122 if self._match_text_seq("MAXVALUE"): 6123 this.set("maxvalue", self._parse_bitwise()) 6124 6125 if self._match_text_seq("CYCLE"): 6126 this.set("cycle", True) 6127 elif self._match_text_seq("NO", "CYCLE"): 6128 this.set("cycle", False) 6129 6130 if not identity: 6131 this.set("expression", self._parse_range()) 6132 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6133 args = self._parse_csv(self._parse_bitwise) 6134 this.set("start", seq_get(args, 0)) 6135 this.set("increment", seq_get(args, 1)) 6136 6137 self._match_r_paren() 6138 6139 return this 6140 6141 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6142 self._match_text_seq("LENGTH") 6143 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6144 6145 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6146 if self._match_text_seq("NULL"): 6147 return self.expression(exp.NotNullColumnConstraint) 6148 if self._match_text_seq("CASESPECIFIC"): 6149 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6150 if self._match_text_seq("FOR", "REPLICATION"): 6151 return self.expression(exp.NotForReplicationColumnConstraint) 6152 6153 # Unconsume the `NOT` token 6154 self._retreat(self._index - 1) 6155 return None 6156 6157 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6158 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6159 6160 procedure_option_follows = ( 6161 self._match(TokenType.WITH, advance=False) 6162 and self._next 6163 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6164 ) 6165 6166 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6167 return self.expression( 6168 exp.ColumnConstraint, 6169 this=this, 6170 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6171 ) 6172 6173 return this 6174 6175 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6176 if not self._match(TokenType.CONSTRAINT): 6177 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6178 6179 return self.expression( 6180 exp.Constraint, 6181 this=self._parse_id_var(), 6182 expressions=self._parse_unnamed_constraints(), 6183 ) 6184 6185 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6186 constraints = [] 6187 while True: 6188 constraint = self._parse_unnamed_constraint() or self._parse_function() 6189 if not constraint: 6190 break 6191 constraints.append(constraint) 6192 6193 return constraints 6194 6195 def _parse_unnamed_constraint( 6196 self, constraints: t.Optional[t.Collection[str]] = None 6197 ) -> t.Optional[exp.Expression]: 6198 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6199 constraints or self.CONSTRAINT_PARSERS 6200 ): 6201 return None 6202 6203 constraint = self._prev.text.upper() 6204 if constraint not in self.CONSTRAINT_PARSERS: 6205 self.raise_error(f"No parser found for schema constraint {constraint}.") 6206 6207 return self.CONSTRAINT_PARSERS[constraint](self) 6208 6209 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6210 return self._parse_id_var(any_token=False) 6211 6212 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6213 self._match_texts(("KEY", "INDEX")) 6214 return self.expression( 6215 exp.UniqueColumnConstraint, 6216 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6217 this=self._parse_schema(self._parse_unique_key()), 6218 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6219 on_conflict=self._parse_on_conflict(), 6220 options=self._parse_key_constraint_options(), 6221 ) 6222 6223 def _parse_key_constraint_options(self) -> t.List[str]: 6224 options = [] 6225 while True: 6226 if not self._curr: 6227 break 6228 6229 if self._match(TokenType.ON): 6230 action = None 6231 on = self._advance_any() and self._prev.text 6232 6233 if self._match_text_seq("NO", "ACTION"): 6234 action = "NO ACTION" 6235 elif self._match_text_seq("CASCADE"): 6236 action = "CASCADE" 6237 elif self._match_text_seq("RESTRICT"): 6238 action = "RESTRICT" 6239 elif self._match_pair(TokenType.SET, TokenType.NULL): 6240 action = "SET NULL" 6241 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6242 action = "SET DEFAULT" 6243 else: 6244 self.raise_error("Invalid key constraint") 6245 6246 options.append(f"ON {on} {action}") 6247 else: 6248 var = self._parse_var_from_options( 6249 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6250 ) 6251 if not var: 6252 break 6253 options.append(var.name) 6254 6255 return options 6256 6257 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6258 if match and not self._match(TokenType.REFERENCES): 6259 return None 6260 6261 expressions = None 6262 this = self._parse_table(schema=True) 6263 options = self._parse_key_constraint_options() 6264 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6265 6266 def _parse_foreign_key(self) -> exp.ForeignKey: 6267 expressions = ( 6268 self._parse_wrapped_id_vars() 6269 if not self._match(TokenType.REFERENCES, advance=False) 6270 else None 6271 ) 6272 reference = self._parse_references() 6273 on_options = {} 6274 6275 while self._match(TokenType.ON): 6276 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6277 self.raise_error("Expected DELETE or UPDATE") 6278 6279 kind = self._prev.text.lower() 6280 6281 if self._match_text_seq("NO", "ACTION"): 6282 action = "NO ACTION" 6283 elif self._match(TokenType.SET): 6284 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6285 action = "SET " + self._prev.text.upper() 6286 else: 6287 self._advance() 6288 action = self._prev.text.upper() 6289 6290 on_options[kind] = action 6291 6292 return self.expression( 6293 exp.ForeignKey, 6294 expressions=expressions, 6295 reference=reference, 6296 options=self._parse_key_constraint_options(), 6297 **on_options, # type: ignore 6298 ) 6299 6300 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6301 return self._parse_ordered() or self._parse_field() 6302 6303 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6304 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6305 self._retreat(self._index - 1) 6306 return None 6307 6308 id_vars = self._parse_wrapped_id_vars() 6309 return self.expression( 6310 exp.PeriodForSystemTimeConstraint, 6311 this=seq_get(id_vars, 0), 6312 expression=seq_get(id_vars, 1), 6313 ) 6314 6315 def _parse_primary_key( 6316 self, wrapped_optional: bool = False, in_props: bool = False 6317 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6318 desc = ( 6319 self._match_set((TokenType.ASC, TokenType.DESC)) 6320 and self._prev.token_type == TokenType.DESC 6321 ) 6322 6323 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6324 return self.expression( 6325 exp.PrimaryKeyColumnConstraint, 6326 desc=desc, 6327 options=self._parse_key_constraint_options(), 6328 ) 6329 6330 expressions = self._parse_wrapped_csv( 6331 self._parse_primary_key_part, optional=wrapped_optional 6332 ) 6333 6334 return self.expression( 6335 exp.PrimaryKey, 6336 expressions=expressions, 6337 include=self._parse_index_params(), 6338 options=self._parse_key_constraint_options(), 6339 ) 6340 6341 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6342 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6343 6344 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6345 """ 6346 Parses a datetime column in ODBC format. We parse the column into the corresponding 6347 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6348 same as we did for `DATE('yyyy-mm-dd')`. 6349 6350 Reference: 6351 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6352 """ 6353 self._match(TokenType.VAR) 6354 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6355 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6356 if not self._match(TokenType.R_BRACE): 6357 self.raise_error("Expected }") 6358 return expression 6359 6360 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6361 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6362 return this 6363 6364 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6365 map_token = seq_get(self._tokens, self._index - 2) 6366 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6367 else: 6368 parse_map = False 6369 6370 bracket_kind = self._prev.token_type 6371 if ( 6372 bracket_kind == TokenType.L_BRACE 6373 and self._curr 6374 and self._curr.token_type == TokenType.VAR 6375 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6376 ): 6377 return self._parse_odbc_datetime_literal() 6378 6379 expressions = self._parse_csv( 6380 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6381 ) 6382 6383 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6384 self.raise_error("Expected ]") 6385 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6386 self.raise_error("Expected }") 6387 6388 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6389 if bracket_kind == TokenType.L_BRACE: 6390 this = self.expression( 6391 exp.Struct, 6392 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6393 ) 6394 elif not this: 6395 this = build_array_constructor( 6396 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6397 ) 6398 else: 6399 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6400 if constructor_type: 6401 return build_array_constructor( 6402 constructor_type, 6403 args=expressions, 6404 bracket_kind=bracket_kind, 6405 dialect=self.dialect, 6406 ) 6407 6408 expressions = apply_index_offset( 6409 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6410 ) 6411 this = self.expression( 6412 exp.Bracket, 6413 this=this, 6414 expressions=expressions, 6415 comments=this.pop_comments(), 6416 ) 6417 6418 self._add_comments(this) 6419 return self._parse_bracket(this) 6420 6421 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6422 if self._match(TokenType.COLON): 6423 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6424 return this 6425 6426 def _parse_case(self) -> t.Optional[exp.Expression]: 6427 ifs = [] 6428 default = None 6429 6430 comments = self._prev_comments 6431 expression = self._parse_assignment() 6432 6433 while self._match(TokenType.WHEN): 6434 this = self._parse_assignment() 6435 self._match(TokenType.THEN) 6436 then = self._parse_assignment() 6437 ifs.append(self.expression(exp.If, this=this, true=then)) 6438 6439 if self._match(TokenType.ELSE): 6440 default = self._parse_assignment() 6441 6442 if not self._match(TokenType.END): 6443 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6444 default = exp.column("interval") 6445 else: 6446 self.raise_error("Expected END after CASE", self._prev) 6447 6448 return self.expression( 6449 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6450 ) 6451 6452 def _parse_if(self) -> t.Optional[exp.Expression]: 6453 if self._match(TokenType.L_PAREN): 6454 args = self._parse_csv( 6455 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6456 ) 6457 this = self.validate_expression(exp.If.from_arg_list(args), args) 6458 self._match_r_paren() 6459 else: 6460 index = self._index - 1 6461 6462 if self.NO_PAREN_IF_COMMANDS and index == 0: 6463 return self._parse_as_command(self._prev) 6464 6465 condition = self._parse_assignment() 6466 6467 if not condition: 6468 self._retreat(index) 6469 return None 6470 6471 self._match(TokenType.THEN) 6472 true = self._parse_assignment() 6473 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6474 self._match(TokenType.END) 6475 this = self.expression(exp.If, this=condition, true=true, false=false) 6476 6477 return this 6478 6479 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6480 if not self._match_text_seq("VALUE", "FOR"): 6481 self._retreat(self._index - 1) 6482 return None 6483 6484 return self.expression( 6485 exp.NextValueFor, 6486 this=self._parse_column(), 6487 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6488 ) 6489 6490 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6491 this = self._parse_function() or self._parse_var_or_string(upper=True) 6492 6493 if self._match(TokenType.FROM): 6494 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6495 6496 if not self._match(TokenType.COMMA): 6497 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6498 6499 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6500 6501 def _parse_gap_fill(self) -> exp.GapFill: 6502 self._match(TokenType.TABLE) 6503 this = self._parse_table() 6504 6505 self._match(TokenType.COMMA) 6506 args = [this, *self._parse_csv(self._parse_lambda)] 6507 6508 gap_fill = exp.GapFill.from_arg_list(args) 6509 return self.validate_expression(gap_fill, args) 6510 6511 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6512 this = self._parse_assignment() 6513 6514 if not self._match(TokenType.ALIAS): 6515 if self._match(TokenType.COMMA): 6516 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6517 6518 self.raise_error("Expected AS after CAST") 6519 6520 fmt = None 6521 to = self._parse_types() 6522 6523 default = self._match(TokenType.DEFAULT) 6524 if default: 6525 default = self._parse_bitwise() 6526 self._match_text_seq("ON", "CONVERSION", "ERROR") 6527 6528 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6529 fmt_string = self._parse_string() 6530 fmt = self._parse_at_time_zone(fmt_string) 6531 6532 if not to: 6533 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6534 if to.this in exp.DataType.TEMPORAL_TYPES: 6535 this = self.expression( 6536 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6537 this=this, 6538 format=exp.Literal.string( 6539 format_time( 6540 fmt_string.this if fmt_string else "", 6541 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6542 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6543 ) 6544 ), 6545 safe=safe, 6546 ) 6547 6548 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6549 this.set("zone", fmt.args["zone"]) 6550 return this 6551 elif not to: 6552 self.raise_error("Expected TYPE after CAST") 6553 elif isinstance(to, exp.Identifier): 6554 to = exp.DataType.build(to.name, udt=True) 6555 elif to.this == exp.DataType.Type.CHAR: 6556 if self._match(TokenType.CHARACTER_SET): 6557 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6558 6559 return self.build_cast( 6560 strict=strict, 6561 this=this, 6562 to=to, 6563 format=fmt, 6564 safe=safe, 6565 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6566 default=default, 6567 ) 6568 6569 def _parse_string_agg(self) -> exp.GroupConcat: 6570 if self._match(TokenType.DISTINCT): 6571 args: t.List[t.Optional[exp.Expression]] = [ 6572 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6573 ] 6574 if self._match(TokenType.COMMA): 6575 args.extend(self._parse_csv(self._parse_assignment)) 6576 else: 6577 args = self._parse_csv(self._parse_assignment) # type: ignore 6578 6579 if self._match_text_seq("ON", "OVERFLOW"): 6580 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6581 if self._match_text_seq("ERROR"): 6582 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6583 else: 6584 self._match_text_seq("TRUNCATE") 6585 on_overflow = self.expression( 6586 exp.OverflowTruncateBehavior, 6587 this=self._parse_string(), 6588 with_count=( 6589 self._match_text_seq("WITH", "COUNT") 6590 or not self._match_text_seq("WITHOUT", "COUNT") 6591 ), 6592 ) 6593 else: 6594 on_overflow = None 6595 6596 index = self._index 6597 if not self._match(TokenType.R_PAREN) and args: 6598 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6599 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6600 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6601 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6602 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6603 6604 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6605 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6606 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6607 if not self._match_text_seq("WITHIN", "GROUP"): 6608 self._retreat(index) 6609 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6610 6611 # The corresponding match_r_paren will be called in parse_function (caller) 6612 self._match_l_paren() 6613 6614 return self.expression( 6615 exp.GroupConcat, 6616 this=self._parse_order(this=seq_get(args, 0)), 6617 separator=seq_get(args, 1), 6618 on_overflow=on_overflow, 6619 ) 6620 6621 def _parse_convert( 6622 self, strict: bool, safe: t.Optional[bool] = None 6623 ) -> t.Optional[exp.Expression]: 6624 this = self._parse_bitwise() 6625 6626 if self._match(TokenType.USING): 6627 to: t.Optional[exp.Expression] = self.expression( 6628 exp.CharacterSet, this=self._parse_var() 6629 ) 6630 elif self._match(TokenType.COMMA): 6631 to = self._parse_types() 6632 else: 6633 to = None 6634 6635 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6636 6637 def _parse_xml_table(self) -> exp.XMLTable: 6638 namespaces = None 6639 passing = None 6640 columns = None 6641 6642 if self._match_text_seq("XMLNAMESPACES", "("): 6643 namespaces = self._parse_xml_namespace() 6644 self._match_text_seq(")", ",") 6645 6646 this = self._parse_string() 6647 6648 if self._match_text_seq("PASSING"): 6649 # The BY VALUE keywords are optional and are provided for semantic clarity 6650 self._match_text_seq("BY", "VALUE") 6651 passing = self._parse_csv(self._parse_column) 6652 6653 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6654 6655 if self._match_text_seq("COLUMNS"): 6656 columns = self._parse_csv(self._parse_field_def) 6657 6658 return self.expression( 6659 exp.XMLTable, 6660 this=this, 6661 namespaces=namespaces, 6662 passing=passing, 6663 columns=columns, 6664 by_ref=by_ref, 6665 ) 6666 6667 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6668 namespaces = [] 6669 6670 while True: 6671 if self._match(TokenType.DEFAULT): 6672 uri = self._parse_string() 6673 else: 6674 uri = self._parse_alias(self._parse_string()) 6675 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6676 if not self._match(TokenType.COMMA): 6677 break 6678 6679 return namespaces 6680 6681 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6682 args = self._parse_csv(self._parse_assignment) 6683 6684 if len(args) < 3: 6685 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6686 6687 return self.expression(exp.DecodeCase, expressions=args) 6688 6689 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6690 self._match_text_seq("KEY") 6691 key = self._parse_column() 6692 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6693 self._match_text_seq("VALUE") 6694 value = self._parse_bitwise() 6695 6696 if not key and not value: 6697 return None 6698 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6699 6700 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6701 if not this or not self._match_text_seq("FORMAT", "JSON"): 6702 return this 6703 6704 return self.expression(exp.FormatJson, this=this) 6705 6706 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6707 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6708 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6709 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6710 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6711 else: 6712 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6713 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6714 6715 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6716 6717 if not empty and not error and not null: 6718 return None 6719 6720 return self.expression( 6721 exp.OnCondition, 6722 empty=empty, 6723 error=error, 6724 null=null, 6725 ) 6726 6727 def _parse_on_handling( 6728 self, on: str, *values: str 6729 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6730 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6731 for value in values: 6732 if self._match_text_seq(value, "ON", on): 6733 return f"{value} ON {on}" 6734 6735 index = self._index 6736 if self._match(TokenType.DEFAULT): 6737 default_value = self._parse_bitwise() 6738 if self._match_text_seq("ON", on): 6739 return default_value 6740 6741 self._retreat(index) 6742 6743 return None 6744 6745 @t.overload 6746 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6747 6748 @t.overload 6749 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6750 6751 def _parse_json_object(self, agg=False): 6752 star = self._parse_star() 6753 expressions = ( 6754 [star] 6755 if star 6756 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6757 ) 6758 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6759 6760 unique_keys = None 6761 if self._match_text_seq("WITH", "UNIQUE"): 6762 unique_keys = True 6763 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6764 unique_keys = False 6765 6766 self._match_text_seq("KEYS") 6767 6768 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6769 self._parse_type() 6770 ) 6771 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6772 6773 return self.expression( 6774 exp.JSONObjectAgg if agg else exp.JSONObject, 6775 expressions=expressions, 6776 null_handling=null_handling, 6777 unique_keys=unique_keys, 6778 return_type=return_type, 6779 encoding=encoding, 6780 ) 6781 6782 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6783 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6784 if not self._match_text_seq("NESTED"): 6785 this = self._parse_id_var() 6786 kind = self._parse_types(allow_identifiers=False) 6787 nested = None 6788 else: 6789 this = None 6790 kind = None 6791 nested = True 6792 6793 path = self._match_text_seq("PATH") and self._parse_string() 6794 nested_schema = nested and self._parse_json_schema() 6795 6796 return self.expression( 6797 exp.JSONColumnDef, 6798 this=this, 6799 kind=kind, 6800 path=path, 6801 nested_schema=nested_schema, 6802 ) 6803 6804 def _parse_json_schema(self) -> exp.JSONSchema: 6805 self._match_text_seq("COLUMNS") 6806 return self.expression( 6807 exp.JSONSchema, 6808 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6809 ) 6810 6811 def _parse_json_table(self) -> exp.JSONTable: 6812 this = self._parse_format_json(self._parse_bitwise()) 6813 path = self._match(TokenType.COMMA) and self._parse_string() 6814 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6815 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6816 schema = self._parse_json_schema() 6817 6818 return exp.JSONTable( 6819 this=this, 6820 schema=schema, 6821 path=path, 6822 error_handling=error_handling, 6823 empty_handling=empty_handling, 6824 ) 6825 6826 def _parse_match_against(self) -> exp.MatchAgainst: 6827 expressions = self._parse_csv(self._parse_column) 6828 6829 self._match_text_seq(")", "AGAINST", "(") 6830 6831 this = self._parse_string() 6832 6833 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6834 modifier = "IN NATURAL LANGUAGE MODE" 6835 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6836 modifier = f"{modifier} WITH QUERY EXPANSION" 6837 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6838 modifier = "IN BOOLEAN MODE" 6839 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6840 modifier = "WITH QUERY EXPANSION" 6841 else: 6842 modifier = None 6843 6844 return self.expression( 6845 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6846 ) 6847 6848 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6849 def _parse_open_json(self) -> exp.OpenJSON: 6850 this = self._parse_bitwise() 6851 path = self._match(TokenType.COMMA) and self._parse_string() 6852 6853 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6854 this = self._parse_field(any_token=True) 6855 kind = self._parse_types() 6856 path = self._parse_string() 6857 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6858 6859 return self.expression( 6860 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6861 ) 6862 6863 expressions = None 6864 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6865 self._match_l_paren() 6866 expressions = self._parse_csv(_parse_open_json_column_def) 6867 6868 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6869 6870 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6871 args = self._parse_csv(self._parse_bitwise) 6872 6873 if self._match(TokenType.IN): 6874 return self.expression( 6875 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6876 ) 6877 6878 if haystack_first: 6879 haystack = seq_get(args, 0) 6880 needle = seq_get(args, 1) 6881 else: 6882 haystack = seq_get(args, 1) 6883 needle = seq_get(args, 0) 6884 6885 return self.expression( 6886 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6887 ) 6888 6889 def _parse_predict(self) -> exp.Predict: 6890 self._match_text_seq("MODEL") 6891 this = self._parse_table() 6892 6893 self._match(TokenType.COMMA) 6894 self._match_text_seq("TABLE") 6895 6896 return self.expression( 6897 exp.Predict, 6898 this=this, 6899 expression=self._parse_table(), 6900 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6901 ) 6902 6903 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6904 args = self._parse_csv(self._parse_table) 6905 return exp.JoinHint(this=func_name.upper(), expressions=args) 6906 6907 def _parse_substring(self) -> exp.Substring: 6908 # Postgres supports the form: substring(string [from int] [for int]) 6909 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6910 6911 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6912 6913 if self._match(TokenType.FROM): 6914 args.append(self._parse_bitwise()) 6915 if self._match(TokenType.FOR): 6916 if len(args) == 1: 6917 args.append(exp.Literal.number(1)) 6918 args.append(self._parse_bitwise()) 6919 6920 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6921 6922 def _parse_trim(self) -> exp.Trim: 6923 # https://www.w3resource.com/sql/character-functions/trim.php 6924 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6925 6926 position = None 6927 collation = None 6928 expression = None 6929 6930 if self._match_texts(self.TRIM_TYPES): 6931 position = self._prev.text.upper() 6932 6933 this = self._parse_bitwise() 6934 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6935 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6936 expression = self._parse_bitwise() 6937 6938 if invert_order: 6939 this, expression = expression, this 6940 6941 if self._match(TokenType.COLLATE): 6942 collation = self._parse_bitwise() 6943 6944 return self.expression( 6945 exp.Trim, this=this, position=position, expression=expression, collation=collation 6946 ) 6947 6948 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6949 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6950 6951 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6952 return self._parse_window(self._parse_id_var(), alias=True) 6953 6954 def _parse_respect_or_ignore_nulls( 6955 self, this: t.Optional[exp.Expression] 6956 ) -> t.Optional[exp.Expression]: 6957 if self._match_text_seq("IGNORE", "NULLS"): 6958 return self.expression(exp.IgnoreNulls, this=this) 6959 if self._match_text_seq("RESPECT", "NULLS"): 6960 return self.expression(exp.RespectNulls, this=this) 6961 return this 6962 6963 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6964 if self._match(TokenType.HAVING): 6965 self._match_texts(("MAX", "MIN")) 6966 max = self._prev.text.upper() != "MIN" 6967 return self.expression( 6968 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6969 ) 6970 6971 return this 6972 6973 def _parse_window( 6974 self, this: t.Optional[exp.Expression], alias: bool = False 6975 ) -> t.Optional[exp.Expression]: 6976 func = this 6977 comments = func.comments if isinstance(func, exp.Expression) else None 6978 6979 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6980 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6981 if self._match_text_seq("WITHIN", "GROUP"): 6982 order = self._parse_wrapped(self._parse_order) 6983 this = self.expression(exp.WithinGroup, this=this, expression=order) 6984 6985 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6986 self._match(TokenType.WHERE) 6987 this = self.expression( 6988 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6989 ) 6990 self._match_r_paren() 6991 6992 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6993 # Some dialects choose to implement and some do not. 6994 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6995 6996 # There is some code above in _parse_lambda that handles 6997 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6998 6999 # The below changes handle 7000 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7001 7002 # Oracle allows both formats 7003 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7004 # and Snowflake chose to do the same for familiarity 7005 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7006 if isinstance(this, exp.AggFunc): 7007 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7008 7009 if ignore_respect and ignore_respect is not this: 7010 ignore_respect.replace(ignore_respect.this) 7011 this = self.expression(ignore_respect.__class__, this=this) 7012 7013 this = self._parse_respect_or_ignore_nulls(this) 7014 7015 # bigquery select from window x AS (partition by ...) 7016 if alias: 7017 over = None 7018 self._match(TokenType.ALIAS) 7019 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7020 return this 7021 else: 7022 over = self._prev.text.upper() 7023 7024 if comments and isinstance(func, exp.Expression): 7025 func.pop_comments() 7026 7027 if not self._match(TokenType.L_PAREN): 7028 return self.expression( 7029 exp.Window, 7030 comments=comments, 7031 this=this, 7032 alias=self._parse_id_var(False), 7033 over=over, 7034 ) 7035 7036 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7037 7038 first = self._match(TokenType.FIRST) 7039 if self._match_text_seq("LAST"): 7040 first = False 7041 7042 partition, order = self._parse_partition_and_order() 7043 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7044 7045 if kind: 7046 self._match(TokenType.BETWEEN) 7047 start = self._parse_window_spec() 7048 self._match(TokenType.AND) 7049 end = self._parse_window_spec() 7050 exclude = ( 7051 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7052 if self._match_text_seq("EXCLUDE") 7053 else None 7054 ) 7055 7056 spec = self.expression( 7057 exp.WindowSpec, 7058 kind=kind, 7059 start=start["value"], 7060 start_side=start["side"], 7061 end=end["value"], 7062 end_side=end["side"], 7063 exclude=exclude, 7064 ) 7065 else: 7066 spec = None 7067 7068 self._match_r_paren() 7069 7070 window = self.expression( 7071 exp.Window, 7072 comments=comments, 7073 this=this, 7074 partition_by=partition, 7075 order=order, 7076 spec=spec, 7077 alias=window_alias, 7078 over=over, 7079 first=first, 7080 ) 7081 7082 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7083 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7084 return self._parse_window(window, alias=alias) 7085 7086 return window 7087 7088 def _parse_partition_and_order( 7089 self, 7090 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7091 return self._parse_partition_by(), self._parse_order() 7092 7093 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7094 self._match(TokenType.BETWEEN) 7095 7096 return { 7097 "value": ( 7098 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7099 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7100 or self._parse_bitwise() 7101 ), 7102 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7103 } 7104 7105 def _parse_alias( 7106 self, this: t.Optional[exp.Expression], explicit: bool = False 7107 ) -> t.Optional[exp.Expression]: 7108 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7109 # so this section tries to parse the clause version and if it fails, it treats the token 7110 # as an identifier (alias) 7111 if self._can_parse_limit_or_offset(): 7112 return this 7113 7114 any_token = self._match(TokenType.ALIAS) 7115 comments = self._prev_comments or [] 7116 7117 if explicit and not any_token: 7118 return this 7119 7120 if self._match(TokenType.L_PAREN): 7121 aliases = self.expression( 7122 exp.Aliases, 7123 comments=comments, 7124 this=this, 7125 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7126 ) 7127 self._match_r_paren(aliases) 7128 return aliases 7129 7130 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7131 self.STRING_ALIASES and self._parse_string_as_identifier() 7132 ) 7133 7134 if alias: 7135 comments.extend(alias.pop_comments()) 7136 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7137 column = this.this 7138 7139 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7140 if not this.comments and column and column.comments: 7141 this.comments = column.pop_comments() 7142 7143 return this 7144 7145 def _parse_id_var( 7146 self, 7147 any_token: bool = True, 7148 tokens: t.Optional[t.Collection[TokenType]] = None, 7149 ) -> t.Optional[exp.Expression]: 7150 expression = self._parse_identifier() 7151 if not expression and ( 7152 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7153 ): 7154 quoted = self._prev.token_type == TokenType.STRING 7155 expression = self._identifier_expression(quoted=quoted) 7156 7157 return expression 7158 7159 def _parse_string(self) -> t.Optional[exp.Expression]: 7160 if self._match_set(self.STRING_PARSERS): 7161 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7162 return self._parse_placeholder() 7163 7164 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7165 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7166 if output: 7167 output.update_positions(self._prev) 7168 return output 7169 7170 def _parse_number(self) -> t.Optional[exp.Expression]: 7171 if self._match_set(self.NUMERIC_PARSERS): 7172 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7173 return self._parse_placeholder() 7174 7175 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7176 if self._match(TokenType.IDENTIFIER): 7177 return self._identifier_expression(quoted=True) 7178 return self._parse_placeholder() 7179 7180 def _parse_var( 7181 self, 7182 any_token: bool = False, 7183 tokens: t.Optional[t.Collection[TokenType]] = None, 7184 upper: bool = False, 7185 ) -> t.Optional[exp.Expression]: 7186 if ( 7187 (any_token and self._advance_any()) 7188 or self._match(TokenType.VAR) 7189 or (self._match_set(tokens) if tokens else False) 7190 ): 7191 return self.expression( 7192 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7193 ) 7194 return self._parse_placeholder() 7195 7196 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7197 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7198 self._advance() 7199 return self._prev 7200 return None 7201 7202 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7203 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7204 7205 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7206 return self._parse_primary() or self._parse_var(any_token=True) 7207 7208 def _parse_null(self) -> t.Optional[exp.Expression]: 7209 if self._match_set(self.NULL_TOKENS): 7210 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7211 return self._parse_placeholder() 7212 7213 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7214 if self._match(TokenType.TRUE): 7215 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7216 if self._match(TokenType.FALSE): 7217 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7218 return self._parse_placeholder() 7219 7220 def _parse_star(self) -> t.Optional[exp.Expression]: 7221 if self._match(TokenType.STAR): 7222 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7223 return self._parse_placeholder() 7224 7225 def _parse_parameter(self) -> exp.Parameter: 7226 this = self._parse_identifier() or self._parse_primary_or_var() 7227 return self.expression(exp.Parameter, this=this) 7228 7229 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7230 if self._match_set(self.PLACEHOLDER_PARSERS): 7231 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7232 if placeholder: 7233 return placeholder 7234 self._advance(-1) 7235 return None 7236 7237 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7238 if not self._match_texts(keywords): 7239 return None 7240 if self._match(TokenType.L_PAREN, advance=False): 7241 return self._parse_wrapped_csv(self._parse_expression) 7242 7243 expression = self._parse_expression() 7244 return [expression] if expression else None 7245 7246 def _parse_csv( 7247 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7248 ) -> t.List[exp.Expression]: 7249 parse_result = parse_method() 7250 items = [parse_result] if parse_result is not None else [] 7251 7252 while self._match(sep): 7253 self._add_comments(parse_result) 7254 parse_result = parse_method() 7255 if parse_result is not None: 7256 items.append(parse_result) 7257 7258 return items 7259 7260 def _parse_tokens( 7261 self, parse_method: t.Callable, expressions: t.Dict 7262 ) -> t.Optional[exp.Expression]: 7263 this = parse_method() 7264 7265 while self._match_set(expressions): 7266 this = self.expression( 7267 expressions[self._prev.token_type], 7268 this=this, 7269 comments=self._prev_comments, 7270 expression=parse_method(), 7271 ) 7272 7273 return this 7274 7275 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7276 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7277 7278 def _parse_wrapped_csv( 7279 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7280 ) -> t.List[exp.Expression]: 7281 return self._parse_wrapped( 7282 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7283 ) 7284 7285 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7286 wrapped = self._match(TokenType.L_PAREN) 7287 if not wrapped and not optional: 7288 self.raise_error("Expecting (") 7289 parse_result = parse_method() 7290 if wrapped: 7291 self._match_r_paren() 7292 return parse_result 7293 7294 def _parse_expressions(self) -> t.List[exp.Expression]: 7295 return self._parse_csv(self._parse_expression) 7296 7297 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7298 return self._parse_select() or self._parse_set_operations( 7299 self._parse_alias(self._parse_assignment(), explicit=True) 7300 if alias 7301 else self._parse_assignment() 7302 ) 7303 7304 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7305 return self._parse_query_modifiers( 7306 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7307 ) 7308 7309 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7310 this = None 7311 if self._match_texts(self.TRANSACTION_KIND): 7312 this = self._prev.text 7313 7314 self._match_texts(("TRANSACTION", "WORK")) 7315 7316 modes = [] 7317 while True: 7318 mode = [] 7319 while self._match(TokenType.VAR): 7320 mode.append(self._prev.text) 7321 7322 if mode: 7323 modes.append(" ".join(mode)) 7324 if not self._match(TokenType.COMMA): 7325 break 7326 7327 return self.expression(exp.Transaction, this=this, modes=modes) 7328 7329 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7330 chain = None 7331 savepoint = None 7332 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7333 7334 self._match_texts(("TRANSACTION", "WORK")) 7335 7336 if self._match_text_seq("TO"): 7337 self._match_text_seq("SAVEPOINT") 7338 savepoint = self._parse_id_var() 7339 7340 if self._match(TokenType.AND): 7341 chain = not self._match_text_seq("NO") 7342 self._match_text_seq("CHAIN") 7343 7344 if is_rollback: 7345 return self.expression(exp.Rollback, savepoint=savepoint) 7346 7347 return self.expression(exp.Commit, chain=chain) 7348 7349 def _parse_refresh(self) -> exp.Refresh: 7350 self._match(TokenType.TABLE) 7351 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7352 7353 def _parse_column_def_with_exists(self): 7354 start = self._index 7355 self._match(TokenType.COLUMN) 7356 7357 exists_column = self._parse_exists(not_=True) 7358 expression = self._parse_field_def() 7359 7360 if not isinstance(expression, exp.ColumnDef): 7361 self._retreat(start) 7362 return None 7363 7364 expression.set("exists", exists_column) 7365 7366 return expression 7367 7368 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7369 if not self._prev.text.upper() == "ADD": 7370 return None 7371 7372 expression = self._parse_column_def_with_exists() 7373 if not expression: 7374 return None 7375 7376 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7377 if self._match_texts(("FIRST", "AFTER")): 7378 position = self._prev.text 7379 column_position = self.expression( 7380 exp.ColumnPosition, this=self._parse_column(), position=position 7381 ) 7382 expression.set("position", column_position) 7383 7384 return expression 7385 7386 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7387 drop = self._match(TokenType.DROP) and self._parse_drop() 7388 if drop and not isinstance(drop, exp.Command): 7389 drop.set("kind", drop.args.get("kind", "COLUMN")) 7390 return drop 7391 7392 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7393 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7394 return self.expression( 7395 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7396 ) 7397 7398 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7399 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7400 self._match_text_seq("ADD") 7401 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7402 return self.expression( 7403 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7404 ) 7405 7406 column_def = self._parse_add_column() 7407 if isinstance(column_def, exp.ColumnDef): 7408 return column_def 7409 7410 exists = self._parse_exists(not_=True) 7411 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7412 return self.expression( 7413 exp.AddPartition, 7414 exists=exists, 7415 this=self._parse_field(any_token=True), 7416 location=self._match_text_seq("LOCATION", advance=False) 7417 and self._parse_property(), 7418 ) 7419 7420 return None 7421 7422 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7423 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7424 or self._match_text_seq("COLUMNS") 7425 ): 7426 schema = self._parse_schema() 7427 7428 return ( 7429 ensure_list(schema) 7430 if schema 7431 else self._parse_csv(self._parse_column_def_with_exists) 7432 ) 7433 7434 return self._parse_csv(_parse_add_alteration) 7435 7436 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7437 if self._match_texts(self.ALTER_ALTER_PARSERS): 7438 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7439 7440 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7441 # keyword after ALTER we default to parsing this statement 7442 self._match(TokenType.COLUMN) 7443 column = self._parse_field(any_token=True) 7444 7445 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7446 return self.expression(exp.AlterColumn, this=column, drop=True) 7447 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7448 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7449 if self._match(TokenType.COMMENT): 7450 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7451 if self._match_text_seq("DROP", "NOT", "NULL"): 7452 return self.expression( 7453 exp.AlterColumn, 7454 this=column, 7455 drop=True, 7456 allow_null=True, 7457 ) 7458 if self._match_text_seq("SET", "NOT", "NULL"): 7459 return self.expression( 7460 exp.AlterColumn, 7461 this=column, 7462 allow_null=False, 7463 ) 7464 7465 if self._match_text_seq("SET", "VISIBLE"): 7466 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7467 if self._match_text_seq("SET", "INVISIBLE"): 7468 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7469 7470 self._match_text_seq("SET", "DATA") 7471 self._match_text_seq("TYPE") 7472 return self.expression( 7473 exp.AlterColumn, 7474 this=column, 7475 dtype=self._parse_types(), 7476 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7477 using=self._match(TokenType.USING) and self._parse_assignment(), 7478 ) 7479 7480 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7481 if self._match_texts(("ALL", "EVEN", "AUTO")): 7482 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7483 7484 self._match_text_seq("KEY", "DISTKEY") 7485 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7486 7487 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7488 if compound: 7489 self._match_text_seq("SORTKEY") 7490 7491 if self._match(TokenType.L_PAREN, advance=False): 7492 return self.expression( 7493 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7494 ) 7495 7496 self._match_texts(("AUTO", "NONE")) 7497 return self.expression( 7498 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7499 ) 7500 7501 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7502 index = self._index - 1 7503 7504 partition_exists = self._parse_exists() 7505 if self._match(TokenType.PARTITION, advance=False): 7506 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7507 7508 self._retreat(index) 7509 return self._parse_csv(self._parse_drop_column) 7510 7511 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7512 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7513 exists = self._parse_exists() 7514 old_column = self._parse_column() 7515 to = self._match_text_seq("TO") 7516 new_column = self._parse_column() 7517 7518 if old_column is None or to is None or new_column is None: 7519 return None 7520 7521 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7522 7523 self._match_text_seq("TO") 7524 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7525 7526 def _parse_alter_table_set(self) -> exp.AlterSet: 7527 alter_set = self.expression(exp.AlterSet) 7528 7529 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7530 "TABLE", "PROPERTIES" 7531 ): 7532 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7533 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7534 alter_set.set("expressions", [self._parse_assignment()]) 7535 elif self._match_texts(("LOGGED", "UNLOGGED")): 7536 alter_set.set("option", exp.var(self._prev.text.upper())) 7537 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7538 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7539 elif self._match_text_seq("LOCATION"): 7540 alter_set.set("location", self._parse_field()) 7541 elif self._match_text_seq("ACCESS", "METHOD"): 7542 alter_set.set("access_method", self._parse_field()) 7543 elif self._match_text_seq("TABLESPACE"): 7544 alter_set.set("tablespace", self._parse_field()) 7545 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7546 alter_set.set("file_format", [self._parse_field()]) 7547 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7548 alter_set.set("file_format", self._parse_wrapped_options()) 7549 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7550 alter_set.set("copy_options", self._parse_wrapped_options()) 7551 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7552 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7553 else: 7554 if self._match_text_seq("SERDE"): 7555 alter_set.set("serde", self._parse_field()) 7556 7557 properties = self._parse_wrapped(self._parse_properties, optional=True) 7558 alter_set.set("expressions", [properties]) 7559 7560 return alter_set 7561 7562 def _parse_alter(self) -> exp.Alter | exp.Command: 7563 start = self._prev 7564 7565 alter_token = self._match_set(self.ALTERABLES) and self._prev 7566 if not alter_token: 7567 return self._parse_as_command(start) 7568 7569 exists = self._parse_exists() 7570 only = self._match_text_seq("ONLY") 7571 this = self._parse_table(schema=True) 7572 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7573 7574 if self._next: 7575 self._advance() 7576 7577 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7578 if parser: 7579 actions = ensure_list(parser(self)) 7580 not_valid = self._match_text_seq("NOT", "VALID") 7581 options = self._parse_csv(self._parse_property) 7582 7583 if not self._curr and actions: 7584 return self.expression( 7585 exp.Alter, 7586 this=this, 7587 kind=alter_token.text.upper(), 7588 exists=exists, 7589 actions=actions, 7590 only=only, 7591 options=options, 7592 cluster=cluster, 7593 not_valid=not_valid, 7594 ) 7595 7596 return self._parse_as_command(start) 7597 7598 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7599 start = self._prev 7600 # https://duckdb.org/docs/sql/statements/analyze 7601 if not self._curr: 7602 return self.expression(exp.Analyze) 7603 7604 options = [] 7605 while self._match_texts(self.ANALYZE_STYLES): 7606 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7607 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7608 else: 7609 options.append(self._prev.text.upper()) 7610 7611 this: t.Optional[exp.Expression] = None 7612 inner_expression: t.Optional[exp.Expression] = None 7613 7614 kind = self._curr and self._curr.text.upper() 7615 7616 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7617 this = self._parse_table_parts() 7618 elif self._match_text_seq("TABLES"): 7619 if self._match_set((TokenType.FROM, TokenType.IN)): 7620 kind = f"{kind} {self._prev.text.upper()}" 7621 this = self._parse_table(schema=True, is_db_reference=True) 7622 elif self._match_text_seq("DATABASE"): 7623 this = self._parse_table(schema=True, is_db_reference=True) 7624 elif self._match_text_seq("CLUSTER"): 7625 this = self._parse_table() 7626 # Try matching inner expr keywords before fallback to parse table. 7627 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7628 kind = None 7629 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7630 else: 7631 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7632 kind = None 7633 this = self._parse_table_parts() 7634 7635 partition = self._try_parse(self._parse_partition) 7636 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7637 return self._parse_as_command(start) 7638 7639 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7640 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7641 "WITH", "ASYNC", "MODE" 7642 ): 7643 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7644 else: 7645 mode = None 7646 7647 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7648 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7649 7650 properties = self._parse_properties() 7651 return self.expression( 7652 exp.Analyze, 7653 kind=kind, 7654 this=this, 7655 mode=mode, 7656 partition=partition, 7657 properties=properties, 7658 expression=inner_expression, 7659 options=options, 7660 ) 7661 7662 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7663 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7664 this = None 7665 kind = self._prev.text.upper() 7666 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7667 expressions = [] 7668 7669 if not self._match_text_seq("STATISTICS"): 7670 self.raise_error("Expecting token STATISTICS") 7671 7672 if self._match_text_seq("NOSCAN"): 7673 this = "NOSCAN" 7674 elif self._match(TokenType.FOR): 7675 if self._match_text_seq("ALL", "COLUMNS"): 7676 this = "FOR ALL COLUMNS" 7677 if self._match_texts("COLUMNS"): 7678 this = "FOR COLUMNS" 7679 expressions = self._parse_csv(self._parse_column_reference) 7680 elif self._match_text_seq("SAMPLE"): 7681 sample = self._parse_number() 7682 expressions = [ 7683 self.expression( 7684 exp.AnalyzeSample, 7685 sample=sample, 7686 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7687 ) 7688 ] 7689 7690 return self.expression( 7691 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7692 ) 7693 7694 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7695 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7696 kind = None 7697 this = None 7698 expression: t.Optional[exp.Expression] = None 7699 if self._match_text_seq("REF", "UPDATE"): 7700 kind = "REF" 7701 this = "UPDATE" 7702 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7703 this = "UPDATE SET DANGLING TO NULL" 7704 elif self._match_text_seq("STRUCTURE"): 7705 kind = "STRUCTURE" 7706 if self._match_text_seq("CASCADE", "FAST"): 7707 this = "CASCADE FAST" 7708 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7709 ("ONLINE", "OFFLINE") 7710 ): 7711 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7712 expression = self._parse_into() 7713 7714 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7715 7716 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7717 this = self._prev.text.upper() 7718 if self._match_text_seq("COLUMNS"): 7719 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7720 return None 7721 7722 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7723 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7724 if self._match_text_seq("STATISTICS"): 7725 return self.expression(exp.AnalyzeDelete, kind=kind) 7726 return None 7727 7728 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7729 if self._match_text_seq("CHAINED", "ROWS"): 7730 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7731 return None 7732 7733 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7734 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7735 this = self._prev.text.upper() 7736 expression: t.Optional[exp.Expression] = None 7737 expressions = [] 7738 update_options = None 7739 7740 if self._match_text_seq("HISTOGRAM", "ON"): 7741 expressions = self._parse_csv(self._parse_column_reference) 7742 with_expressions = [] 7743 while self._match(TokenType.WITH): 7744 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7745 if self._match_texts(("SYNC", "ASYNC")): 7746 if self._match_text_seq("MODE", advance=False): 7747 with_expressions.append(f"{self._prev.text.upper()} MODE") 7748 self._advance() 7749 else: 7750 buckets = self._parse_number() 7751 if self._match_text_seq("BUCKETS"): 7752 with_expressions.append(f"{buckets} BUCKETS") 7753 if with_expressions: 7754 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7755 7756 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7757 TokenType.UPDATE, advance=False 7758 ): 7759 update_options = self._prev.text.upper() 7760 self._advance() 7761 elif self._match_text_seq("USING", "DATA"): 7762 expression = self.expression(exp.UsingData, this=self._parse_string()) 7763 7764 return self.expression( 7765 exp.AnalyzeHistogram, 7766 this=this, 7767 expressions=expressions, 7768 expression=expression, 7769 update_options=update_options, 7770 ) 7771 7772 def _parse_merge(self) -> exp.Merge: 7773 self._match(TokenType.INTO) 7774 target = self._parse_table() 7775 7776 if target and self._match(TokenType.ALIAS, advance=False): 7777 target.set("alias", self._parse_table_alias()) 7778 7779 self._match(TokenType.USING) 7780 using = self._parse_table() 7781 7782 self._match(TokenType.ON) 7783 on = self._parse_assignment() 7784 7785 return self.expression( 7786 exp.Merge, 7787 this=target, 7788 using=using, 7789 on=on, 7790 whens=self._parse_when_matched(), 7791 returning=self._parse_returning(), 7792 ) 7793 7794 def _parse_when_matched(self) -> exp.Whens: 7795 whens = [] 7796 7797 while self._match(TokenType.WHEN): 7798 matched = not self._match(TokenType.NOT) 7799 self._match_text_seq("MATCHED") 7800 source = ( 7801 False 7802 if self._match_text_seq("BY", "TARGET") 7803 else self._match_text_seq("BY", "SOURCE") 7804 ) 7805 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7806 7807 self._match(TokenType.THEN) 7808 7809 if self._match(TokenType.INSERT): 7810 this = self._parse_star() 7811 if this: 7812 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7813 else: 7814 then = self.expression( 7815 exp.Insert, 7816 this=exp.var("ROW") 7817 if self._match_text_seq("ROW") 7818 else self._parse_value(values=False), 7819 expression=self._match_text_seq("VALUES") and self._parse_value(), 7820 ) 7821 elif self._match(TokenType.UPDATE): 7822 expressions = self._parse_star() 7823 if expressions: 7824 then = self.expression(exp.Update, expressions=expressions) 7825 else: 7826 then = self.expression( 7827 exp.Update, 7828 expressions=self._match(TokenType.SET) 7829 and self._parse_csv(self._parse_equality), 7830 ) 7831 elif self._match(TokenType.DELETE): 7832 then = self.expression(exp.Var, this=self._prev.text) 7833 else: 7834 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7835 7836 whens.append( 7837 self.expression( 7838 exp.When, 7839 matched=matched, 7840 source=source, 7841 condition=condition, 7842 then=then, 7843 ) 7844 ) 7845 return self.expression(exp.Whens, expressions=whens) 7846 7847 def _parse_show(self) -> t.Optional[exp.Expression]: 7848 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7849 if parser: 7850 return parser(self) 7851 return self._parse_as_command(self._prev) 7852 7853 def _parse_set_item_assignment( 7854 self, kind: t.Optional[str] = None 7855 ) -> t.Optional[exp.Expression]: 7856 index = self._index 7857 7858 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7859 return self._parse_set_transaction(global_=kind == "GLOBAL") 7860 7861 left = self._parse_primary() or self._parse_column() 7862 assignment_delimiter = self._match_texts(("=", "TO")) 7863 7864 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7865 self._retreat(index) 7866 return None 7867 7868 right = self._parse_statement() or self._parse_id_var() 7869 if isinstance(right, (exp.Column, exp.Identifier)): 7870 right = exp.var(right.name) 7871 7872 this = self.expression(exp.EQ, this=left, expression=right) 7873 return self.expression(exp.SetItem, this=this, kind=kind) 7874 7875 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7876 self._match_text_seq("TRANSACTION") 7877 characteristics = self._parse_csv( 7878 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7879 ) 7880 return self.expression( 7881 exp.SetItem, 7882 expressions=characteristics, 7883 kind="TRANSACTION", 7884 **{"global": global_}, # type: ignore 7885 ) 7886 7887 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7888 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7889 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7890 7891 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7892 index = self._index 7893 set_ = self.expression( 7894 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7895 ) 7896 7897 if self._curr: 7898 self._retreat(index) 7899 return self._parse_as_command(self._prev) 7900 7901 return set_ 7902 7903 def _parse_var_from_options( 7904 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7905 ) -> t.Optional[exp.Var]: 7906 start = self._curr 7907 if not start: 7908 return None 7909 7910 option = start.text.upper() 7911 continuations = options.get(option) 7912 7913 index = self._index 7914 self._advance() 7915 for keywords in continuations or []: 7916 if isinstance(keywords, str): 7917 keywords = (keywords,) 7918 7919 if self._match_text_seq(*keywords): 7920 option = f"{option} {' '.join(keywords)}" 7921 break 7922 else: 7923 if continuations or continuations is None: 7924 if raise_unmatched: 7925 self.raise_error(f"Unknown option {option}") 7926 7927 self._retreat(index) 7928 return None 7929 7930 return exp.var(option) 7931 7932 def _parse_as_command(self, start: Token) -> exp.Command: 7933 while self._curr: 7934 self._advance() 7935 text = self._find_sql(start, self._prev) 7936 size = len(start.text) 7937 self._warn_unsupported() 7938 return exp.Command(this=text[:size], expression=text[size:]) 7939 7940 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7941 settings = [] 7942 7943 self._match_l_paren() 7944 kind = self._parse_id_var() 7945 7946 if self._match(TokenType.L_PAREN): 7947 while True: 7948 key = self._parse_id_var() 7949 value = self._parse_primary() 7950 if not key and value is None: 7951 break 7952 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7953 self._match(TokenType.R_PAREN) 7954 7955 self._match_r_paren() 7956 7957 return self.expression( 7958 exp.DictProperty, 7959 this=this, 7960 kind=kind.this if kind else None, 7961 settings=settings, 7962 ) 7963 7964 def _parse_dict_range(self, this: str) -> exp.DictRange: 7965 self._match_l_paren() 7966 has_min = self._match_text_seq("MIN") 7967 if has_min: 7968 min = self._parse_var() or self._parse_primary() 7969 self._match_text_seq("MAX") 7970 max = self._parse_var() or self._parse_primary() 7971 else: 7972 max = self._parse_var() or self._parse_primary() 7973 min = exp.Literal.number(0) 7974 self._match_r_paren() 7975 return self.expression(exp.DictRange, this=this, min=min, max=max) 7976 7977 def _parse_comprehension( 7978 self, this: t.Optional[exp.Expression] 7979 ) -> t.Optional[exp.Comprehension]: 7980 index = self._index 7981 expression = self._parse_column() 7982 if not self._match(TokenType.IN): 7983 self._retreat(index - 1) 7984 return None 7985 iterator = self._parse_column() 7986 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7987 return self.expression( 7988 exp.Comprehension, 7989 this=this, 7990 expression=expression, 7991 iterator=iterator, 7992 condition=condition, 7993 ) 7994 7995 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7996 if self._match(TokenType.HEREDOC_STRING): 7997 return self.expression(exp.Heredoc, this=self._prev.text) 7998 7999 if not self._match_text_seq("$"): 8000 return None 8001 8002 tags = ["$"] 8003 tag_text = None 8004 8005 if self._is_connected(): 8006 self._advance() 8007 tags.append(self._prev.text.upper()) 8008 else: 8009 self.raise_error("No closing $ found") 8010 8011 if tags[-1] != "$": 8012 if self._is_connected() and self._match_text_seq("$"): 8013 tag_text = tags[-1] 8014 tags.append("$") 8015 else: 8016 self.raise_error("No closing $ found") 8017 8018 heredoc_start = self._curr 8019 8020 while self._curr: 8021 if self._match_text_seq(*tags, advance=False): 8022 this = self._find_sql(heredoc_start, self._prev) 8023 self._advance(len(tags)) 8024 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8025 8026 self._advance() 8027 8028 self.raise_error(f"No closing {''.join(tags)} found") 8029 return None 8030 8031 def _find_parser( 8032 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8033 ) -> t.Optional[t.Callable]: 8034 if not self._curr: 8035 return None 8036 8037 index = self._index 8038 this = [] 8039 while True: 8040 # The current token might be multiple words 8041 curr = self._curr.text.upper() 8042 key = curr.split(" ") 8043 this.append(curr) 8044 8045 self._advance() 8046 result, trie = in_trie(trie, key) 8047 if result == TrieResult.FAILED: 8048 break 8049 8050 if result == TrieResult.EXISTS: 8051 subparser = parsers[" ".join(this)] 8052 return subparser 8053 8054 self._retreat(index) 8055 return None 8056 8057 def _match(self, token_type, advance=True, expression=None): 8058 if not self._curr: 8059 return None 8060 8061 if self._curr.token_type == token_type: 8062 if advance: 8063 self._advance() 8064 self._add_comments(expression) 8065 return True 8066 8067 return None 8068 8069 def _match_set(self, types, advance=True): 8070 if not self._curr: 8071 return None 8072 8073 if self._curr.token_type in types: 8074 if advance: 8075 self._advance() 8076 return True 8077 8078 return None 8079 8080 def _match_pair(self, token_type_a, token_type_b, advance=True): 8081 if not self._curr or not self._next: 8082 return None 8083 8084 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8085 if advance: 8086 self._advance(2) 8087 return True 8088 8089 return None 8090 8091 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8092 if not self._match(TokenType.L_PAREN, expression=expression): 8093 self.raise_error("Expecting (") 8094 8095 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8096 if not self._match(TokenType.R_PAREN, expression=expression): 8097 self.raise_error("Expecting )") 8098 8099 def _match_texts(self, texts, advance=True): 8100 if ( 8101 self._curr 8102 and self._curr.token_type != TokenType.STRING 8103 and self._curr.text.upper() in texts 8104 ): 8105 if advance: 8106 self._advance() 8107 return True 8108 return None 8109 8110 def _match_text_seq(self, *texts, advance=True): 8111 index = self._index 8112 for text in texts: 8113 if ( 8114 self._curr 8115 and self._curr.token_type != TokenType.STRING 8116 and self._curr.text.upper() == text 8117 ): 8118 self._advance() 8119 else: 8120 self._retreat(index) 8121 return None 8122 8123 if not advance: 8124 self._retreat(index) 8125 8126 return True 8127 8128 def _replace_lambda( 8129 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8130 ) -> t.Optional[exp.Expression]: 8131 if not node: 8132 return node 8133 8134 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8135 8136 for column in node.find_all(exp.Column): 8137 typ = lambda_types.get(column.parts[0].name) 8138 if typ is not None: 8139 dot_or_id = column.to_dot() if column.table else column.this 8140 8141 if typ: 8142 dot_or_id = self.expression( 8143 exp.Cast, 8144 this=dot_or_id, 8145 to=typ, 8146 ) 8147 8148 parent = column.parent 8149 8150 while isinstance(parent, exp.Dot): 8151 if not isinstance(parent.parent, exp.Dot): 8152 parent.replace(dot_or_id) 8153 break 8154 parent = parent.parent 8155 else: 8156 if column is node: 8157 node = dot_or_id 8158 else: 8159 column.replace(dot_or_id) 8160 return node 8161 8162 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8163 start = self._prev 8164 8165 # Not to be confused with TRUNCATE(number, decimals) function call 8166 if self._match(TokenType.L_PAREN): 8167 self._retreat(self._index - 2) 8168 return self._parse_function() 8169 8170 # Clickhouse supports TRUNCATE DATABASE as well 8171 is_database = self._match(TokenType.DATABASE) 8172 8173 self._match(TokenType.TABLE) 8174 8175 exists = self._parse_exists(not_=False) 8176 8177 expressions = self._parse_csv( 8178 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8179 ) 8180 8181 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8182 8183 if self._match_text_seq("RESTART", "IDENTITY"): 8184 identity = "RESTART" 8185 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8186 identity = "CONTINUE" 8187 else: 8188 identity = None 8189 8190 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8191 option = self._prev.text 8192 else: 8193 option = None 8194 8195 partition = self._parse_partition() 8196 8197 # Fallback case 8198 if self._curr: 8199 return self._parse_as_command(start) 8200 8201 return self.expression( 8202 exp.TruncateTable, 8203 expressions=expressions, 8204 is_database=is_database, 8205 exists=exists, 8206 cluster=cluster, 8207 identity=identity, 8208 option=option, 8209 partition=partition, 8210 ) 8211 8212 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8213 this = self._parse_ordered(self._parse_opclass) 8214 8215 if not self._match(TokenType.WITH): 8216 return this 8217 8218 op = self._parse_var(any_token=True) 8219 8220 return self.expression(exp.WithOperator, this=this, op=op) 8221 8222 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8223 self._match(TokenType.EQ) 8224 self._match(TokenType.L_PAREN) 8225 8226 opts: t.List[t.Optional[exp.Expression]] = [] 8227 option: exp.Expression | None 8228 while self._curr and not self._match(TokenType.R_PAREN): 8229 if self._match_text_seq("FORMAT_NAME", "="): 8230 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8231 option = self._parse_format_name() 8232 else: 8233 option = self._parse_property() 8234 8235 if option is None: 8236 self.raise_error("Unable to parse option") 8237 break 8238 8239 opts.append(option) 8240 8241 return opts 8242 8243 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8244 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8245 8246 options = [] 8247 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8248 option = self._parse_var(any_token=True) 8249 prev = self._prev.text.upper() 8250 8251 # Different dialects might separate options and values by white space, "=" and "AS" 8252 self._match(TokenType.EQ) 8253 self._match(TokenType.ALIAS) 8254 8255 param = self.expression(exp.CopyParameter, this=option) 8256 8257 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8258 TokenType.L_PAREN, advance=False 8259 ): 8260 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8261 param.set("expressions", self._parse_wrapped_options()) 8262 elif prev == "FILE_FORMAT": 8263 # T-SQL's external file format case 8264 param.set("expression", self._parse_field()) 8265 else: 8266 param.set("expression", self._parse_unquoted_field()) 8267 8268 options.append(param) 8269 self._match(sep) 8270 8271 return options 8272 8273 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8274 expr = self.expression(exp.Credentials) 8275 8276 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8277 expr.set("storage", self._parse_field()) 8278 if self._match_text_seq("CREDENTIALS"): 8279 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8280 creds = ( 8281 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8282 ) 8283 expr.set("credentials", creds) 8284 if self._match_text_seq("ENCRYPTION"): 8285 expr.set("encryption", self._parse_wrapped_options()) 8286 if self._match_text_seq("IAM_ROLE"): 8287 expr.set("iam_role", self._parse_field()) 8288 if self._match_text_seq("REGION"): 8289 expr.set("region", self._parse_field()) 8290 8291 return expr 8292 8293 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8294 return self._parse_field() 8295 8296 def _parse_copy(self) -> exp.Copy | exp.Command: 8297 start = self._prev 8298 8299 self._match(TokenType.INTO) 8300 8301 this = ( 8302 self._parse_select(nested=True, parse_subquery_alias=False) 8303 if self._match(TokenType.L_PAREN, advance=False) 8304 else self._parse_table(schema=True) 8305 ) 8306 8307 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8308 8309 files = self._parse_csv(self._parse_file_location) 8310 credentials = self._parse_credentials() 8311 8312 self._match_text_seq("WITH") 8313 8314 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8315 8316 # Fallback case 8317 if self._curr: 8318 return self._parse_as_command(start) 8319 8320 return self.expression( 8321 exp.Copy, 8322 this=this, 8323 kind=kind, 8324 credentials=credentials, 8325 files=files, 8326 params=params, 8327 ) 8328 8329 def _parse_normalize(self) -> exp.Normalize: 8330 return self.expression( 8331 exp.Normalize, 8332 this=self._parse_bitwise(), 8333 form=self._match(TokenType.COMMA) and self._parse_var(), 8334 ) 8335 8336 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8337 args = self._parse_csv(lambda: self._parse_lambda()) 8338 8339 this = seq_get(args, 0) 8340 decimals = seq_get(args, 1) 8341 8342 return expr_type( 8343 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8344 ) 8345 8346 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8347 star_token = self._prev 8348 8349 if self._match_text_seq("COLUMNS", "(", advance=False): 8350 this = self._parse_function() 8351 if isinstance(this, exp.Columns): 8352 this.set("unpack", True) 8353 return this 8354 8355 return self.expression( 8356 exp.Star, 8357 **{ # type: ignore 8358 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8359 "replace": self._parse_star_op("REPLACE"), 8360 "rename": self._parse_star_op("RENAME"), 8361 }, 8362 ).update_positions(star_token) 8363 8364 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8365 privilege_parts = [] 8366 8367 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8368 # (end of privilege list) or L_PAREN (start of column list) are met 8369 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8370 privilege_parts.append(self._curr.text.upper()) 8371 self._advance() 8372 8373 this = exp.var(" ".join(privilege_parts)) 8374 expressions = ( 8375 self._parse_wrapped_csv(self._parse_column) 8376 if self._match(TokenType.L_PAREN, advance=False) 8377 else None 8378 ) 8379 8380 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8381 8382 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8383 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8384 principal = self._parse_id_var() 8385 8386 if not principal: 8387 return None 8388 8389 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8390 8391 def _parse_grant(self) -> exp.Grant | exp.Command: 8392 start = self._prev 8393 8394 privileges = self._parse_csv(self._parse_grant_privilege) 8395 8396 self._match(TokenType.ON) 8397 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8398 8399 # Attempt to parse the securable e.g. MySQL allows names 8400 # such as "foo.*", "*.*" which are not easily parseable yet 8401 securable = self._try_parse(self._parse_table_parts) 8402 8403 if not securable or not self._match_text_seq("TO"): 8404 return self._parse_as_command(start) 8405 8406 principals = self._parse_csv(self._parse_grant_principal) 8407 8408 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8409 8410 if self._curr: 8411 return self._parse_as_command(start) 8412 8413 return self.expression( 8414 exp.Grant, 8415 privileges=privileges, 8416 kind=kind, 8417 securable=securable, 8418 principals=principals, 8419 grant_option=grant_option, 8420 ) 8421 8422 def _parse_overlay(self) -> exp.Overlay: 8423 return self.expression( 8424 exp.Overlay, 8425 **{ # type: ignore 8426 "this": self._parse_bitwise(), 8427 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8428 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8429 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8430 }, 8431 ) 8432 8433 def _parse_format_name(self) -> exp.Property: 8434 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8435 # for FILE_FORMAT = <format_name> 8436 return self.expression( 8437 exp.Property, 8438 this=exp.var("FORMAT_NAME"), 8439 value=self._parse_string() or self._parse_table_parts(), 8440 ) 8441 8442 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8443 args: t.List[exp.Expression] = [] 8444 8445 if self._match(TokenType.DISTINCT): 8446 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8447 self._match(TokenType.COMMA) 8448 8449 args.extend(self._parse_csv(self._parse_assignment)) 8450 8451 return self.expression( 8452 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8453 ) 8454 8455 def _identifier_expression( 8456 self, token: t.Optional[Token] = None, **kwargs: t.Any 8457 ) -> exp.Identifier: 8458 token = token or self._prev 8459 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8460 expression.update_positions(token) 8461 return expression 8462 8463 def _build_pipe_cte( 8464 self, 8465 query: exp.Query, 8466 expressions: t.List[exp.Expression], 8467 alias_cte: t.Optional[exp.TableAlias] = None, 8468 ) -> exp.Select: 8469 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8470 if alias_cte: 8471 new_cte = alias_cte 8472 else: 8473 self._pipe_cte_counter += 1 8474 new_cte = f"__tmp{self._pipe_cte_counter}" 8475 8476 with_ = query.args.get("with") 8477 ctes = with_.pop() if with_ else None 8478 8479 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8480 if ctes: 8481 new_select.set("with", ctes) 8482 8483 return new_select.with_(new_cte, as_=query, copy=False) 8484 8485 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8486 select = self._parse_select(consume_pipe=False) 8487 if not select: 8488 return query 8489 8490 return self._build_pipe_cte( 8491 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8492 ) 8493 8494 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8495 limit = self._parse_limit() 8496 offset = self._parse_offset() 8497 if limit: 8498 curr_limit = query.args.get("limit", limit) 8499 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8500 query.limit(limit, copy=False) 8501 if offset: 8502 curr_offset = query.args.get("offset") 8503 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8504 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8505 8506 return query 8507 8508 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8509 this = self._parse_assignment() 8510 if self._match_text_seq("GROUP", "AND", advance=False): 8511 return this 8512 8513 this = self._parse_alias(this) 8514 8515 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8516 return self._parse_ordered(lambda: this) 8517 8518 return this 8519 8520 def _parse_pipe_syntax_aggregate_group_order_by( 8521 self, query: exp.Select, group_by_exists: bool = True 8522 ) -> exp.Select: 8523 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8524 aggregates_or_groups, orders = [], [] 8525 for element in expr: 8526 if isinstance(element, exp.Ordered): 8527 this = element.this 8528 if isinstance(this, exp.Alias): 8529 element.set("this", this.args["alias"]) 8530 orders.append(element) 8531 else: 8532 this = element 8533 aggregates_or_groups.append(this) 8534 8535 if group_by_exists: 8536 query.select(*aggregates_or_groups, copy=False).group_by( 8537 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8538 copy=False, 8539 ) 8540 else: 8541 query.select(*aggregates_or_groups, append=False, copy=False) 8542 8543 if orders: 8544 return query.order_by(*orders, append=False, copy=False) 8545 8546 return query 8547 8548 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8549 self._match_text_seq("AGGREGATE") 8550 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8551 8552 if self._match(TokenType.GROUP_BY) or ( 8553 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8554 ): 8555 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8556 8557 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8558 8559 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8560 first_setop = self.parse_set_operation(this=query) 8561 if not first_setop: 8562 return None 8563 8564 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8565 expr = self._parse_paren() 8566 return expr.assert_is(exp.Subquery).unnest() if expr else None 8567 8568 first_setop.this.pop() 8569 8570 setops = [ 8571 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8572 *self._parse_csv(_parse_and_unwrap_query), 8573 ] 8574 8575 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8576 with_ = query.args.get("with") 8577 ctes = with_.pop() if with_ else None 8578 8579 if isinstance(first_setop, exp.Union): 8580 query = query.union(*setops, copy=False, **first_setop.args) 8581 elif isinstance(first_setop, exp.Except): 8582 query = query.except_(*setops, copy=False, **first_setop.args) 8583 else: 8584 query = query.intersect(*setops, copy=False, **first_setop.args) 8585 8586 query.set("with", ctes) 8587 8588 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8589 8590 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8591 join = self._parse_join() 8592 if not join: 8593 return None 8594 8595 if isinstance(query, exp.Select): 8596 return query.join(join, copy=False) 8597 8598 return query 8599 8600 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8601 pivots = self._parse_pivots() 8602 if not pivots: 8603 return query 8604 8605 from_ = query.args.get("from") 8606 if from_: 8607 from_.this.set("pivots", pivots) 8608 8609 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8610 8611 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8612 self._match_text_seq("EXTEND") 8613 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8614 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8615 8616 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8617 sample = self._parse_table_sample() 8618 8619 with_ = query.args.get("with") 8620 if with_: 8621 with_.expressions[-1].this.set("sample", sample) 8622 else: 8623 query.set("sample", sample) 8624 8625 return query 8626 8627 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8628 if isinstance(query, exp.Subquery): 8629 query = exp.select("*").from_(query, copy=False) 8630 8631 if not query.args.get("from"): 8632 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8633 8634 while self._match(TokenType.PIPE_GT): 8635 start = self._curr 8636 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8637 if not parser: 8638 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8639 # keywords, making it tricky to disambiguate them without lookahead. The approach 8640 # here is to try and parse a set operation and if that fails, then try to parse a 8641 # join operator. If that fails as well, then the operator is not supported. 8642 parsed_query = self._parse_pipe_syntax_set_operator(query) 8643 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8644 if not parsed_query: 8645 self._retreat(start) 8646 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8647 break 8648 query = parsed_query 8649 else: 8650 query = parser(self, query) 8651 8652 return query 8653 8654 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8655 vars = self._parse_csv(self._parse_id_var) 8656 if not vars: 8657 return None 8658 8659 return self.expression( 8660 exp.DeclareItem, 8661 this=vars, 8662 kind=self._parse_types(), 8663 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8664 ) 8665 8666 def _parse_declare(self) -> exp.Declare | exp.Command: 8667 start = self._prev 8668 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8669 8670 if not expressions or self._curr: 8671 return self._parse_as_command(start) 8672 8673 return self.expression(exp.Declare, expressions=expressions) 8674 8675 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8676 exp_class = exp.Cast if strict else exp.TryCast 8677 8678 if exp_class == exp.TryCast: 8679 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8680 8681 return self.expression(exp_class, **kwargs)
32def build_var_map(args: t.List) -> exp.StarMap | exp.VarMap: 33 if len(args) == 1 and args[0].is_star: 34 return exp.StarMap(this=args[0]) 35 36 keys = [] 37 values = [] 38 for i in range(0, len(args), 2): 39 keys.append(args[i]) 40 values.append(args[i + 1]) 41 42 return exp.VarMap(keys=exp.array(*keys, copy=False), values=exp.array(*values, copy=False))
50def binary_range_parser( 51 expr_type: t.Type[exp.Expression], reverse_args: bool = False 52) -> t.Callable[[Parser, t.Optional[exp.Expression]], t.Optional[exp.Expression]]: 53 def _parse_binary_range( 54 self: Parser, this: t.Optional[exp.Expression] 55 ) -> t.Optional[exp.Expression]: 56 expression = self._parse_bitwise() 57 if reverse_args: 58 this, expression = expression, this 59 return self._parse_escape(self.expression(expr_type, this=this, expression=expression)) 60 61 return _parse_binary_range
64def build_logarithm(args: t.List, dialect: Dialect) -> exp.Func: 65 # Default argument order is base, expression 66 this = seq_get(args, 0) 67 expression = seq_get(args, 1) 68 69 if expression: 70 if not dialect.LOG_BASE_FIRST: 71 this, expression = expression, this 72 return exp.Log(this=this, expression=expression) 73 74 return (exp.Ln if dialect.parser_class.LOG_DEFAULTS_TO_LN else exp.Log)(this=this)
94def build_extract_json_with_path(expr_type: t.Type[E]) -> t.Callable[[t.List, Dialect], E]: 95 def _builder(args: t.List, dialect: Dialect) -> E: 96 expression = expr_type( 97 this=seq_get(args, 0), expression=dialect.to_json_path(seq_get(args, 1)) 98 ) 99 if len(args) > 2 and expr_type is exp.JSONExtract: 100 expression.set("expressions", args[2:]) 101 102 return expression 103 104 return _builder
107def build_mod(args: t.List) -> exp.Mod: 108 this = seq_get(args, 0) 109 expression = seq_get(args, 1) 110 111 # Wrap the operands if they are binary nodes, e.g. MOD(a + 1, 7) -> (a + 1) % 7 112 this = exp.Paren(this=this) if isinstance(this, exp.Binary) else this 113 expression = exp.Paren(this=expression) if isinstance(expression, exp.Binary) else expression 114 115 return exp.Mod(this=this, expression=expression)
127def build_array_constructor( 128 exp_class: t.Type[E], args: t.List, bracket_kind: TokenType, dialect: Dialect 129) -> exp.Expression: 130 array_exp = exp_class(expressions=args) 131 132 if exp_class == exp.Array and dialect.HAS_DISTINCT_ARRAY_CONSTRUCTORS: 133 array_exp.set("bracket_notation", bracket_kind == TokenType.L_BRACKET) 134 135 return array_exp
138def build_convert_timezone( 139 args: t.List, default_source_tz: t.Optional[str] = None 140) -> t.Union[exp.ConvertTimezone, exp.Anonymous]: 141 if len(args) == 2: 142 source_tz = exp.Literal.string(default_source_tz) if default_source_tz else None 143 return exp.ConvertTimezone( 144 source_tz=source_tz, target_tz=seq_get(args, 0), timestamp=seq_get(args, 1) 145 ) 146 147 return exp.ConvertTimezone.from_arg_list(args)
182class Parser(metaclass=_Parser): 183 """ 184 Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree. 185 186 Args: 187 error_level: The desired error level. 188 Default: ErrorLevel.IMMEDIATE 189 error_message_context: The amount of context to capture from a query string when displaying 190 the error message (in number of characters). 191 Default: 100 192 max_errors: Maximum number of error messages to include in a raised ParseError. 193 This is only relevant if error_level is ErrorLevel.RAISE. 194 Default: 3 195 """ 196 197 FUNCTIONS: t.Dict[str, t.Callable] = { 198 **{name: func.from_arg_list for name, func in exp.FUNCTION_BY_NAME.items()}, 199 **dict.fromkeys(("COALESCE", "IFNULL", "NVL"), build_coalesce), 200 "ARRAY": lambda args, dialect: exp.Array(expressions=args), 201 "ARRAYAGG": lambda args, dialect: exp.ArrayAgg( 202 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 203 ), 204 "ARRAY_AGG": lambda args, dialect: exp.ArrayAgg( 205 this=seq_get(args, 0), nulls_excluded=dialect.ARRAY_AGG_INCLUDES_NULLS is None or None 206 ), 207 "CHAR": lambda args: exp.Chr(expressions=args), 208 "CHR": lambda args: exp.Chr(expressions=args), 209 "COUNT": lambda args: exp.Count(this=seq_get(args, 0), expressions=args[1:], big_int=True), 210 "CONCAT": lambda args, dialect: exp.Concat( 211 expressions=args, 212 safe=not dialect.STRICT_STRING_CONCAT, 213 coalesce=dialect.CONCAT_COALESCE, 214 ), 215 "CONCAT_WS": lambda args, dialect: exp.ConcatWs( 216 expressions=args, 217 safe=not dialect.STRICT_STRING_CONCAT, 218 coalesce=dialect.CONCAT_COALESCE, 219 ), 220 "CONVERT_TIMEZONE": build_convert_timezone, 221 "DATE_TO_DATE_STR": lambda args: exp.Cast( 222 this=seq_get(args, 0), 223 to=exp.DataType(this=exp.DataType.Type.TEXT), 224 ), 225 "GENERATE_DATE_ARRAY": lambda args: exp.GenerateDateArray( 226 start=seq_get(args, 0), 227 end=seq_get(args, 1), 228 step=seq_get(args, 2) or exp.Interval(this=exp.Literal.string(1), unit=exp.var("DAY")), 229 ), 230 "GLOB": lambda args: exp.Glob(this=seq_get(args, 1), expression=seq_get(args, 0)), 231 "HEX": build_hex, 232 "JSON_EXTRACT": build_extract_json_with_path(exp.JSONExtract), 233 "JSON_EXTRACT_SCALAR": build_extract_json_with_path(exp.JSONExtractScalar), 234 "JSON_EXTRACT_PATH_TEXT": build_extract_json_with_path(exp.JSONExtractScalar), 235 "LIKE": build_like, 236 "LOG": build_logarithm, 237 "LOG2": lambda args: exp.Log(this=exp.Literal.number(2), expression=seq_get(args, 0)), 238 "LOG10": lambda args: exp.Log(this=exp.Literal.number(10), expression=seq_get(args, 0)), 239 "LOWER": build_lower, 240 "LPAD": lambda args: build_pad(args), 241 "LEFTPAD": lambda args: build_pad(args), 242 "LTRIM": lambda args: build_trim(args), 243 "MOD": build_mod, 244 "RIGHTPAD": lambda args: build_pad(args, is_left=False), 245 "RPAD": lambda args: build_pad(args, is_left=False), 246 "RTRIM": lambda args: build_trim(args, is_left=False), 247 "SCOPE_RESOLUTION": lambda args: exp.ScopeResolution(expression=seq_get(args, 0)) 248 if len(args) != 2 249 else exp.ScopeResolution(this=seq_get(args, 0), expression=seq_get(args, 1)), 250 "STRPOS": exp.StrPosition.from_arg_list, 251 "CHARINDEX": lambda args: build_locate_strposition(args), 252 "INSTR": exp.StrPosition.from_arg_list, 253 "LOCATE": lambda args: build_locate_strposition(args), 254 "TIME_TO_TIME_STR": lambda args: exp.Cast( 255 this=seq_get(args, 0), 256 to=exp.DataType(this=exp.DataType.Type.TEXT), 257 ), 258 "TO_HEX": build_hex, 259 "TS_OR_DS_TO_DATE_STR": lambda args: exp.Substring( 260 this=exp.Cast( 261 this=seq_get(args, 0), 262 to=exp.DataType(this=exp.DataType.Type.TEXT), 263 ), 264 start=exp.Literal.number(1), 265 length=exp.Literal.number(10), 266 ), 267 "UNNEST": lambda args: exp.Unnest(expressions=ensure_list(seq_get(args, 0))), 268 "UPPER": build_upper, 269 "VAR_MAP": build_var_map, 270 } 271 272 NO_PAREN_FUNCTIONS = { 273 TokenType.CURRENT_DATE: exp.CurrentDate, 274 TokenType.CURRENT_DATETIME: exp.CurrentDate, 275 TokenType.CURRENT_TIME: exp.CurrentTime, 276 TokenType.CURRENT_TIMESTAMP: exp.CurrentTimestamp, 277 TokenType.CURRENT_USER: exp.CurrentUser, 278 } 279 280 STRUCT_TYPE_TOKENS = { 281 TokenType.NESTED, 282 TokenType.OBJECT, 283 TokenType.STRUCT, 284 TokenType.UNION, 285 } 286 287 NESTED_TYPE_TOKENS = { 288 TokenType.ARRAY, 289 TokenType.LIST, 290 TokenType.LOWCARDINALITY, 291 TokenType.MAP, 292 TokenType.NULLABLE, 293 TokenType.RANGE, 294 *STRUCT_TYPE_TOKENS, 295 } 296 297 ENUM_TYPE_TOKENS = { 298 TokenType.DYNAMIC, 299 TokenType.ENUM, 300 TokenType.ENUM8, 301 TokenType.ENUM16, 302 } 303 304 AGGREGATE_TYPE_TOKENS = { 305 TokenType.AGGREGATEFUNCTION, 306 TokenType.SIMPLEAGGREGATEFUNCTION, 307 } 308 309 TYPE_TOKENS = { 310 TokenType.BIT, 311 TokenType.BOOLEAN, 312 TokenType.TINYINT, 313 TokenType.UTINYINT, 314 TokenType.SMALLINT, 315 TokenType.USMALLINT, 316 TokenType.INT, 317 TokenType.UINT, 318 TokenType.BIGINT, 319 TokenType.UBIGINT, 320 TokenType.INT128, 321 TokenType.UINT128, 322 TokenType.INT256, 323 TokenType.UINT256, 324 TokenType.MEDIUMINT, 325 TokenType.UMEDIUMINT, 326 TokenType.FIXEDSTRING, 327 TokenType.FLOAT, 328 TokenType.DOUBLE, 329 TokenType.UDOUBLE, 330 TokenType.CHAR, 331 TokenType.NCHAR, 332 TokenType.VARCHAR, 333 TokenType.NVARCHAR, 334 TokenType.BPCHAR, 335 TokenType.TEXT, 336 TokenType.MEDIUMTEXT, 337 TokenType.LONGTEXT, 338 TokenType.BLOB, 339 TokenType.MEDIUMBLOB, 340 TokenType.LONGBLOB, 341 TokenType.BINARY, 342 TokenType.VARBINARY, 343 TokenType.JSON, 344 TokenType.JSONB, 345 TokenType.INTERVAL, 346 TokenType.TINYBLOB, 347 TokenType.TINYTEXT, 348 TokenType.TIME, 349 TokenType.TIMETZ, 350 TokenType.TIMESTAMP, 351 TokenType.TIMESTAMP_S, 352 TokenType.TIMESTAMP_MS, 353 TokenType.TIMESTAMP_NS, 354 TokenType.TIMESTAMPTZ, 355 TokenType.TIMESTAMPLTZ, 356 TokenType.TIMESTAMPNTZ, 357 TokenType.DATETIME, 358 TokenType.DATETIME2, 359 TokenType.DATETIME64, 360 TokenType.SMALLDATETIME, 361 TokenType.DATE, 362 TokenType.DATE32, 363 TokenType.INT4RANGE, 364 TokenType.INT4MULTIRANGE, 365 TokenType.INT8RANGE, 366 TokenType.INT8MULTIRANGE, 367 TokenType.NUMRANGE, 368 TokenType.NUMMULTIRANGE, 369 TokenType.TSRANGE, 370 TokenType.TSMULTIRANGE, 371 TokenType.TSTZRANGE, 372 TokenType.TSTZMULTIRANGE, 373 TokenType.DATERANGE, 374 TokenType.DATEMULTIRANGE, 375 TokenType.DECIMAL, 376 TokenType.DECIMAL32, 377 TokenType.DECIMAL64, 378 TokenType.DECIMAL128, 379 TokenType.DECIMAL256, 380 TokenType.UDECIMAL, 381 TokenType.BIGDECIMAL, 382 TokenType.UUID, 383 TokenType.GEOGRAPHY, 384 TokenType.GEOGRAPHYPOINT, 385 TokenType.GEOMETRY, 386 TokenType.POINT, 387 TokenType.RING, 388 TokenType.LINESTRING, 389 TokenType.MULTILINESTRING, 390 TokenType.POLYGON, 391 TokenType.MULTIPOLYGON, 392 TokenType.HLLSKETCH, 393 TokenType.HSTORE, 394 TokenType.PSEUDO_TYPE, 395 TokenType.SUPER, 396 TokenType.SERIAL, 397 TokenType.SMALLSERIAL, 398 TokenType.BIGSERIAL, 399 TokenType.XML, 400 TokenType.YEAR, 401 TokenType.USERDEFINED, 402 TokenType.MONEY, 403 TokenType.SMALLMONEY, 404 TokenType.ROWVERSION, 405 TokenType.IMAGE, 406 TokenType.VARIANT, 407 TokenType.VECTOR, 408 TokenType.VOID, 409 TokenType.OBJECT, 410 TokenType.OBJECT_IDENTIFIER, 411 TokenType.INET, 412 TokenType.IPADDRESS, 413 TokenType.IPPREFIX, 414 TokenType.IPV4, 415 TokenType.IPV6, 416 TokenType.UNKNOWN, 417 TokenType.NOTHING, 418 TokenType.NULL, 419 TokenType.NAME, 420 TokenType.TDIGEST, 421 TokenType.DYNAMIC, 422 *ENUM_TYPE_TOKENS, 423 *NESTED_TYPE_TOKENS, 424 *AGGREGATE_TYPE_TOKENS, 425 } 426 427 SIGNED_TO_UNSIGNED_TYPE_TOKEN = { 428 TokenType.BIGINT: TokenType.UBIGINT, 429 TokenType.INT: TokenType.UINT, 430 TokenType.MEDIUMINT: TokenType.UMEDIUMINT, 431 TokenType.SMALLINT: TokenType.USMALLINT, 432 TokenType.TINYINT: TokenType.UTINYINT, 433 TokenType.DECIMAL: TokenType.UDECIMAL, 434 TokenType.DOUBLE: TokenType.UDOUBLE, 435 } 436 437 SUBQUERY_PREDICATES = { 438 TokenType.ANY: exp.Any, 439 TokenType.ALL: exp.All, 440 TokenType.EXISTS: exp.Exists, 441 TokenType.SOME: exp.Any, 442 } 443 444 RESERVED_TOKENS = { 445 *Tokenizer.SINGLE_TOKENS.values(), 446 TokenType.SELECT, 447 } - {TokenType.IDENTIFIER} 448 449 DB_CREATABLES = { 450 TokenType.DATABASE, 451 TokenType.DICTIONARY, 452 TokenType.FILE_FORMAT, 453 TokenType.MODEL, 454 TokenType.NAMESPACE, 455 TokenType.SCHEMA, 456 TokenType.SEMANTIC_VIEW, 457 TokenType.SEQUENCE, 458 TokenType.SINK, 459 TokenType.SOURCE, 460 TokenType.STAGE, 461 TokenType.STORAGE_INTEGRATION, 462 TokenType.STREAMLIT, 463 TokenType.TABLE, 464 TokenType.TAG, 465 TokenType.VIEW, 466 TokenType.WAREHOUSE, 467 } 468 469 CREATABLES = { 470 TokenType.COLUMN, 471 TokenType.CONSTRAINT, 472 TokenType.FOREIGN_KEY, 473 TokenType.FUNCTION, 474 TokenType.INDEX, 475 TokenType.PROCEDURE, 476 *DB_CREATABLES, 477 } 478 479 ALTERABLES = { 480 TokenType.INDEX, 481 TokenType.TABLE, 482 TokenType.VIEW, 483 } 484 485 # Tokens that can represent identifiers 486 ID_VAR_TOKENS = { 487 TokenType.ALL, 488 TokenType.ATTACH, 489 TokenType.VAR, 490 TokenType.ANTI, 491 TokenType.APPLY, 492 TokenType.ASC, 493 TokenType.ASOF, 494 TokenType.AUTO_INCREMENT, 495 TokenType.BEGIN, 496 TokenType.BPCHAR, 497 TokenType.CACHE, 498 TokenType.CASE, 499 TokenType.COLLATE, 500 TokenType.COMMAND, 501 TokenType.COMMENT, 502 TokenType.COMMIT, 503 TokenType.CONSTRAINT, 504 TokenType.COPY, 505 TokenType.CUBE, 506 TokenType.CURRENT_SCHEMA, 507 TokenType.DEFAULT, 508 TokenType.DELETE, 509 TokenType.DESC, 510 TokenType.DESCRIBE, 511 TokenType.DETACH, 512 TokenType.DICTIONARY, 513 TokenType.DIV, 514 TokenType.END, 515 TokenType.EXECUTE, 516 TokenType.EXPORT, 517 TokenType.ESCAPE, 518 TokenType.FALSE, 519 TokenType.FIRST, 520 TokenType.FILTER, 521 TokenType.FINAL, 522 TokenType.FORMAT, 523 TokenType.FULL, 524 TokenType.GET, 525 TokenType.IDENTIFIER, 526 TokenType.IS, 527 TokenType.ISNULL, 528 TokenType.INTERVAL, 529 TokenType.KEEP, 530 TokenType.KILL, 531 TokenType.LEFT, 532 TokenType.LIMIT, 533 TokenType.LOAD, 534 TokenType.MERGE, 535 TokenType.NATURAL, 536 TokenType.NEXT, 537 TokenType.OFFSET, 538 TokenType.OPERATOR, 539 TokenType.ORDINALITY, 540 TokenType.OVERLAPS, 541 TokenType.OVERWRITE, 542 TokenType.PARTITION, 543 TokenType.PERCENT, 544 TokenType.PIVOT, 545 TokenType.PRAGMA, 546 TokenType.PUT, 547 TokenType.RANGE, 548 TokenType.RECURSIVE, 549 TokenType.REFERENCES, 550 TokenType.REFRESH, 551 TokenType.RENAME, 552 TokenType.REPLACE, 553 TokenType.RIGHT, 554 TokenType.ROLLUP, 555 TokenType.ROW, 556 TokenType.ROWS, 557 TokenType.SEMI, 558 TokenType.SET, 559 TokenType.SETTINGS, 560 TokenType.SHOW, 561 TokenType.TEMPORARY, 562 TokenType.TOP, 563 TokenType.TRUE, 564 TokenType.TRUNCATE, 565 TokenType.UNIQUE, 566 TokenType.UNNEST, 567 TokenType.UNPIVOT, 568 TokenType.UPDATE, 569 TokenType.USE, 570 TokenType.VOLATILE, 571 TokenType.WINDOW, 572 *CREATABLES, 573 *SUBQUERY_PREDICATES, 574 *TYPE_TOKENS, 575 *NO_PAREN_FUNCTIONS, 576 } 577 ID_VAR_TOKENS.remove(TokenType.UNION) 578 579 TABLE_ALIAS_TOKENS = ID_VAR_TOKENS - { 580 TokenType.ANTI, 581 TokenType.APPLY, 582 TokenType.ASOF, 583 TokenType.FULL, 584 TokenType.LEFT, 585 TokenType.LOCK, 586 TokenType.NATURAL, 587 TokenType.RIGHT, 588 TokenType.SEMI, 589 TokenType.WINDOW, 590 } 591 592 ALIAS_TOKENS = ID_VAR_TOKENS 593 594 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS 595 596 ARRAY_CONSTRUCTORS = { 597 "ARRAY": exp.Array, 598 "LIST": exp.List, 599 } 600 601 COMMENT_TABLE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.IS} 602 603 UPDATE_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - {TokenType.SET} 604 605 TRIM_TYPES = {"LEADING", "TRAILING", "BOTH"} 606 607 FUNC_TOKENS = { 608 TokenType.COLLATE, 609 TokenType.COMMAND, 610 TokenType.CURRENT_DATE, 611 TokenType.CURRENT_DATETIME, 612 TokenType.CURRENT_SCHEMA, 613 TokenType.CURRENT_TIMESTAMP, 614 TokenType.CURRENT_TIME, 615 TokenType.CURRENT_USER, 616 TokenType.FILTER, 617 TokenType.FIRST, 618 TokenType.FORMAT, 619 TokenType.GET, 620 TokenType.GLOB, 621 TokenType.IDENTIFIER, 622 TokenType.INDEX, 623 TokenType.ISNULL, 624 TokenType.ILIKE, 625 TokenType.INSERT, 626 TokenType.LIKE, 627 TokenType.MERGE, 628 TokenType.NEXT, 629 TokenType.OFFSET, 630 TokenType.PRIMARY_KEY, 631 TokenType.RANGE, 632 TokenType.REPLACE, 633 TokenType.RLIKE, 634 TokenType.ROW, 635 TokenType.UNNEST, 636 TokenType.VAR, 637 TokenType.LEFT, 638 TokenType.RIGHT, 639 TokenType.SEQUENCE, 640 TokenType.DATE, 641 TokenType.DATETIME, 642 TokenType.TABLE, 643 TokenType.TIMESTAMP, 644 TokenType.TIMESTAMPTZ, 645 TokenType.TRUNCATE, 646 TokenType.WINDOW, 647 TokenType.XOR, 648 *TYPE_TOKENS, 649 *SUBQUERY_PREDICATES, 650 } 651 652 CONJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 653 TokenType.AND: exp.And, 654 } 655 656 ASSIGNMENT: t.Dict[TokenType, t.Type[exp.Expression]] = { 657 TokenType.COLON_EQ: exp.PropertyEQ, 658 } 659 660 DISJUNCTION: t.Dict[TokenType, t.Type[exp.Expression]] = { 661 TokenType.OR: exp.Or, 662 } 663 664 EQUALITY = { 665 TokenType.EQ: exp.EQ, 666 TokenType.NEQ: exp.NEQ, 667 TokenType.NULLSAFE_EQ: exp.NullSafeEQ, 668 } 669 670 COMPARISON = { 671 TokenType.GT: exp.GT, 672 TokenType.GTE: exp.GTE, 673 TokenType.LT: exp.LT, 674 TokenType.LTE: exp.LTE, 675 } 676 677 BITWISE = { 678 TokenType.AMP: exp.BitwiseAnd, 679 TokenType.CARET: exp.BitwiseXor, 680 TokenType.PIPE: exp.BitwiseOr, 681 } 682 683 TERM = { 684 TokenType.DASH: exp.Sub, 685 TokenType.PLUS: exp.Add, 686 TokenType.MOD: exp.Mod, 687 TokenType.COLLATE: exp.Collate, 688 } 689 690 FACTOR = { 691 TokenType.DIV: exp.IntDiv, 692 TokenType.LR_ARROW: exp.Distance, 693 TokenType.SLASH: exp.Div, 694 TokenType.STAR: exp.Mul, 695 } 696 697 EXPONENT: t.Dict[TokenType, t.Type[exp.Expression]] = {} 698 699 TIMES = { 700 TokenType.TIME, 701 TokenType.TIMETZ, 702 } 703 704 TIMESTAMPS = { 705 TokenType.TIMESTAMP, 706 TokenType.TIMESTAMPNTZ, 707 TokenType.TIMESTAMPTZ, 708 TokenType.TIMESTAMPLTZ, 709 *TIMES, 710 } 711 712 SET_OPERATIONS = { 713 TokenType.UNION, 714 TokenType.INTERSECT, 715 TokenType.EXCEPT, 716 } 717 718 JOIN_METHODS = { 719 TokenType.ASOF, 720 TokenType.NATURAL, 721 TokenType.POSITIONAL, 722 } 723 724 JOIN_SIDES = { 725 TokenType.LEFT, 726 TokenType.RIGHT, 727 TokenType.FULL, 728 } 729 730 JOIN_KINDS = { 731 TokenType.ANTI, 732 TokenType.CROSS, 733 TokenType.INNER, 734 TokenType.OUTER, 735 TokenType.SEMI, 736 TokenType.STRAIGHT_JOIN, 737 } 738 739 JOIN_HINTS: t.Set[str] = set() 740 741 LAMBDAS = { 742 TokenType.ARROW: lambda self, expressions: self.expression( 743 exp.Lambda, 744 this=self._replace_lambda( 745 self._parse_assignment(), 746 expressions, 747 ), 748 expressions=expressions, 749 ), 750 TokenType.FARROW: lambda self, expressions: self.expression( 751 exp.Kwarg, 752 this=exp.var(expressions[0].name), 753 expression=self._parse_assignment(), 754 ), 755 } 756 757 COLUMN_OPERATORS = { 758 TokenType.DOT: None, 759 TokenType.DOTCOLON: lambda self, this, to: self.expression( 760 exp.JSONCast, 761 this=this, 762 to=to, 763 ), 764 TokenType.DCOLON: lambda self, this, to: self.build_cast( 765 strict=self.STRICT_CAST, this=this, to=to 766 ), 767 TokenType.ARROW: lambda self, this, path: self.expression( 768 exp.JSONExtract, 769 this=this, 770 expression=self.dialect.to_json_path(path), 771 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 772 ), 773 TokenType.DARROW: lambda self, this, path: self.expression( 774 exp.JSONExtractScalar, 775 this=this, 776 expression=self.dialect.to_json_path(path), 777 only_json_types=self.JSON_ARROWS_REQUIRE_JSON_TYPE, 778 ), 779 TokenType.HASH_ARROW: lambda self, this, path: self.expression( 780 exp.JSONBExtract, 781 this=this, 782 expression=path, 783 ), 784 TokenType.DHASH_ARROW: lambda self, this, path: self.expression( 785 exp.JSONBExtractScalar, 786 this=this, 787 expression=path, 788 ), 789 TokenType.PLACEHOLDER: lambda self, this, key: self.expression( 790 exp.JSONBContains, 791 this=this, 792 expression=key, 793 ), 794 } 795 796 EXPRESSION_PARSERS = { 797 exp.Cluster: lambda self: self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 798 exp.Column: lambda self: self._parse_column(), 799 exp.Condition: lambda self: self._parse_assignment(), 800 exp.DataType: lambda self: self._parse_types(allow_identifiers=False, schema=True), 801 exp.Expression: lambda self: self._parse_expression(), 802 exp.From: lambda self: self._parse_from(joins=True), 803 exp.Group: lambda self: self._parse_group(), 804 exp.Having: lambda self: self._parse_having(), 805 exp.Hint: lambda self: self._parse_hint_body(), 806 exp.Identifier: lambda self: self._parse_id_var(), 807 exp.Join: lambda self: self._parse_join(), 808 exp.Lambda: lambda self: self._parse_lambda(), 809 exp.Lateral: lambda self: self._parse_lateral(), 810 exp.Limit: lambda self: self._parse_limit(), 811 exp.Offset: lambda self: self._parse_offset(), 812 exp.Order: lambda self: self._parse_order(), 813 exp.Ordered: lambda self: self._parse_ordered(), 814 exp.Properties: lambda self: self._parse_properties(), 815 exp.PartitionedByProperty: lambda self: self._parse_partitioned_by(), 816 exp.Qualify: lambda self: self._parse_qualify(), 817 exp.Returning: lambda self: self._parse_returning(), 818 exp.Select: lambda self: self._parse_select(), 819 exp.Sort: lambda self: self._parse_sort(exp.Sort, TokenType.SORT_BY), 820 exp.Table: lambda self: self._parse_table_parts(), 821 exp.TableAlias: lambda self: self._parse_table_alias(), 822 exp.Tuple: lambda self: self._parse_value(values=False), 823 exp.Whens: lambda self: self._parse_when_matched(), 824 exp.Where: lambda self: self._parse_where(), 825 exp.Window: lambda self: self._parse_named_window(), 826 exp.With: lambda self: self._parse_with(), 827 "JOIN_TYPE": lambda self: self._parse_join_parts(), 828 } 829 830 STATEMENT_PARSERS = { 831 TokenType.ALTER: lambda self: self._parse_alter(), 832 TokenType.ANALYZE: lambda self: self._parse_analyze(), 833 TokenType.BEGIN: lambda self: self._parse_transaction(), 834 TokenType.CACHE: lambda self: self._parse_cache(), 835 TokenType.COMMENT: lambda self: self._parse_comment(), 836 TokenType.COMMIT: lambda self: self._parse_commit_or_rollback(), 837 TokenType.COPY: lambda self: self._parse_copy(), 838 TokenType.CREATE: lambda self: self._parse_create(), 839 TokenType.DELETE: lambda self: self._parse_delete(), 840 TokenType.DESC: lambda self: self._parse_describe(), 841 TokenType.DESCRIBE: lambda self: self._parse_describe(), 842 TokenType.DROP: lambda self: self._parse_drop(), 843 TokenType.GRANT: lambda self: self._parse_grant(), 844 TokenType.INSERT: lambda self: self._parse_insert(), 845 TokenType.KILL: lambda self: self._parse_kill(), 846 TokenType.LOAD: lambda self: self._parse_load(), 847 TokenType.MERGE: lambda self: self._parse_merge(), 848 TokenType.PIVOT: lambda self: self._parse_simplified_pivot(), 849 TokenType.PRAGMA: lambda self: self.expression(exp.Pragma, this=self._parse_expression()), 850 TokenType.REFRESH: lambda self: self._parse_refresh(), 851 TokenType.ROLLBACK: lambda self: self._parse_commit_or_rollback(), 852 TokenType.SET: lambda self: self._parse_set(), 853 TokenType.TRUNCATE: lambda self: self._parse_truncate_table(), 854 TokenType.UNCACHE: lambda self: self._parse_uncache(), 855 TokenType.UNPIVOT: lambda self: self._parse_simplified_pivot(is_unpivot=True), 856 TokenType.UPDATE: lambda self: self._parse_update(), 857 TokenType.USE: lambda self: self._parse_use(), 858 TokenType.SEMICOLON: lambda self: exp.Semicolon(), 859 } 860 861 UNARY_PARSERS = { 862 TokenType.PLUS: lambda self: self._parse_unary(), # Unary + is handled as a no-op 863 TokenType.NOT: lambda self: self.expression(exp.Not, this=self._parse_equality()), 864 TokenType.TILDA: lambda self: self.expression(exp.BitwiseNot, this=self._parse_unary()), 865 TokenType.DASH: lambda self: self.expression(exp.Neg, this=self._parse_unary()), 866 TokenType.PIPE_SLASH: lambda self: self.expression(exp.Sqrt, this=self._parse_unary()), 867 TokenType.DPIPE_SLASH: lambda self: self.expression(exp.Cbrt, this=self._parse_unary()), 868 } 869 870 STRING_PARSERS = { 871 TokenType.HEREDOC_STRING: lambda self, token: self.expression( 872 exp.RawString, this=token.text 873 ), 874 TokenType.NATIONAL_STRING: lambda self, token: self.expression( 875 exp.National, this=token.text 876 ), 877 TokenType.RAW_STRING: lambda self, token: self.expression(exp.RawString, this=token.text), 878 TokenType.STRING: lambda self, token: self.expression( 879 exp.Literal, this=token.text, is_string=True 880 ), 881 TokenType.UNICODE_STRING: lambda self, token: self.expression( 882 exp.UnicodeString, 883 this=token.text, 884 escape=self._match_text_seq("UESCAPE") and self._parse_string(), 885 ), 886 } 887 888 NUMERIC_PARSERS = { 889 TokenType.BIT_STRING: lambda self, token: self.expression(exp.BitString, this=token.text), 890 TokenType.BYTE_STRING: lambda self, token: self.expression(exp.ByteString, this=token.text), 891 TokenType.HEX_STRING: lambda self, token: self.expression( 892 exp.HexString, 893 this=token.text, 894 is_integer=self.dialect.HEX_STRING_IS_INTEGER_TYPE or None, 895 ), 896 TokenType.NUMBER: lambda self, token: self.expression( 897 exp.Literal, this=token.text, is_string=False 898 ), 899 } 900 901 PRIMARY_PARSERS = { 902 **STRING_PARSERS, 903 **NUMERIC_PARSERS, 904 TokenType.INTRODUCER: lambda self, token: self._parse_introducer(token), 905 TokenType.NULL: lambda self, _: self.expression(exp.Null), 906 TokenType.TRUE: lambda self, _: self.expression(exp.Boolean, this=True), 907 TokenType.FALSE: lambda self, _: self.expression(exp.Boolean, this=False), 908 TokenType.SESSION_PARAMETER: lambda self, _: self._parse_session_parameter(), 909 TokenType.STAR: lambda self, _: self._parse_star_ops(), 910 } 911 912 PLACEHOLDER_PARSERS = { 913 TokenType.PLACEHOLDER: lambda self: self.expression(exp.Placeholder), 914 TokenType.PARAMETER: lambda self: self._parse_parameter(), 915 TokenType.COLON: lambda self: ( 916 self.expression(exp.Placeholder, this=self._prev.text) 917 if self._match_set(self.COLON_PLACEHOLDER_TOKENS) 918 else None 919 ), 920 } 921 922 RANGE_PARSERS = { 923 TokenType.AT_GT: binary_range_parser(exp.ArrayContainsAll), 924 TokenType.BETWEEN: lambda self, this: self._parse_between(this), 925 TokenType.GLOB: binary_range_parser(exp.Glob), 926 TokenType.ILIKE: binary_range_parser(exp.ILike), 927 TokenType.IN: lambda self, this: self._parse_in(this), 928 TokenType.IRLIKE: binary_range_parser(exp.RegexpILike), 929 TokenType.IS: lambda self, this: self._parse_is(this), 930 TokenType.LIKE: binary_range_parser(exp.Like), 931 TokenType.LT_AT: binary_range_parser(exp.ArrayContainsAll, reverse_args=True), 932 TokenType.OVERLAPS: binary_range_parser(exp.Overlaps), 933 TokenType.RLIKE: binary_range_parser(exp.RegexpLike), 934 TokenType.SIMILAR_TO: binary_range_parser(exp.SimilarTo), 935 TokenType.FOR: lambda self, this: self._parse_comprehension(this), 936 } 937 938 PIPE_SYNTAX_TRANSFORM_PARSERS = { 939 "AGGREGATE": lambda self, query: self._parse_pipe_syntax_aggregate(query), 940 "AS": lambda self, query: self._build_pipe_cte( 941 query, [exp.Star()], self._parse_table_alias() 942 ), 943 "EXTEND": lambda self, query: self._parse_pipe_syntax_extend(query), 944 "LIMIT": lambda self, query: self._parse_pipe_syntax_limit(query), 945 "ORDER BY": lambda self, query: query.order_by( 946 self._parse_order(), append=False, copy=False 947 ), 948 "PIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 949 "SELECT": lambda self, query: self._parse_pipe_syntax_select(query), 950 "TABLESAMPLE": lambda self, query: self._parse_pipe_syntax_tablesample(query), 951 "UNPIVOT": lambda self, query: self._parse_pipe_syntax_pivot(query), 952 "WHERE": lambda self, query: query.where(self._parse_where(), copy=False), 953 } 954 955 PROPERTY_PARSERS: t.Dict[str, t.Callable] = { 956 "ALLOWED_VALUES": lambda self: self.expression( 957 exp.AllowedValuesProperty, expressions=self._parse_csv(self._parse_primary) 958 ), 959 "ALGORITHM": lambda self: self._parse_property_assignment(exp.AlgorithmProperty), 960 "AUTO": lambda self: self._parse_auto_property(), 961 "AUTO_INCREMENT": lambda self: self._parse_property_assignment(exp.AutoIncrementProperty), 962 "BACKUP": lambda self: self.expression( 963 exp.BackupProperty, this=self._parse_var(any_token=True) 964 ), 965 "BLOCKCOMPRESSION": lambda self: self._parse_blockcompression(), 966 "CHARSET": lambda self, **kwargs: self._parse_character_set(**kwargs), 967 "CHARACTER SET": lambda self, **kwargs: self._parse_character_set(**kwargs), 968 "CHECKSUM": lambda self: self._parse_checksum(), 969 "CLUSTER BY": lambda self: self._parse_cluster(), 970 "CLUSTERED": lambda self: self._parse_clustered_by(), 971 "COLLATE": lambda self, **kwargs: self._parse_property_assignment( 972 exp.CollateProperty, **kwargs 973 ), 974 "COMMENT": lambda self: self._parse_property_assignment(exp.SchemaCommentProperty), 975 "CONTAINS": lambda self: self._parse_contains_property(), 976 "COPY": lambda self: self._parse_copy_property(), 977 "DATABLOCKSIZE": lambda self, **kwargs: self._parse_datablocksize(**kwargs), 978 "DATA_DELETION": lambda self: self._parse_data_deletion_property(), 979 "DEFINER": lambda self: self._parse_definer(), 980 "DETERMINISTIC": lambda self: self.expression( 981 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 982 ), 983 "DISTRIBUTED": lambda self: self._parse_distributed_property(), 984 "DUPLICATE": lambda self: self._parse_composite_key_property(exp.DuplicateKeyProperty), 985 "DYNAMIC": lambda self: self.expression(exp.DynamicProperty), 986 "DISTKEY": lambda self: self._parse_distkey(), 987 "DISTSTYLE": lambda self: self._parse_property_assignment(exp.DistStyleProperty), 988 "EMPTY": lambda self: self.expression(exp.EmptyProperty), 989 "ENGINE": lambda self: self._parse_property_assignment(exp.EngineProperty), 990 "ENVIRONMENT": lambda self: self.expression( 991 exp.EnviromentProperty, expressions=self._parse_wrapped_csv(self._parse_assignment) 992 ), 993 "EXECUTE": lambda self: self._parse_property_assignment(exp.ExecuteAsProperty), 994 "EXTERNAL": lambda self: self.expression(exp.ExternalProperty), 995 "FALLBACK": lambda self, **kwargs: self._parse_fallback(**kwargs), 996 "FORMAT": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 997 "FREESPACE": lambda self: self._parse_freespace(), 998 "GLOBAL": lambda self: self.expression(exp.GlobalProperty), 999 "HEAP": lambda self: self.expression(exp.HeapProperty), 1000 "ICEBERG": lambda self: self.expression(exp.IcebergProperty), 1001 "IMMUTABLE": lambda self: self.expression( 1002 exp.StabilityProperty, this=exp.Literal.string("IMMUTABLE") 1003 ), 1004 "INHERITS": lambda self: self.expression( 1005 exp.InheritsProperty, expressions=self._parse_wrapped_csv(self._parse_table) 1006 ), 1007 "INPUT": lambda self: self.expression(exp.InputModelProperty, this=self._parse_schema()), 1008 "JOURNAL": lambda self, **kwargs: self._parse_journal(**kwargs), 1009 "LANGUAGE": lambda self: self._parse_property_assignment(exp.LanguageProperty), 1010 "LAYOUT": lambda self: self._parse_dict_property(this="LAYOUT"), 1011 "LIFETIME": lambda self: self._parse_dict_range(this="LIFETIME"), 1012 "LIKE": lambda self: self._parse_create_like(), 1013 "LOCATION": lambda self: self._parse_property_assignment(exp.LocationProperty), 1014 "LOCK": lambda self: self._parse_locking(), 1015 "LOCKING": lambda self: self._parse_locking(), 1016 "LOG": lambda self, **kwargs: self._parse_log(**kwargs), 1017 "MATERIALIZED": lambda self: self.expression(exp.MaterializedProperty), 1018 "MERGEBLOCKRATIO": lambda self, **kwargs: self._parse_mergeblockratio(**kwargs), 1019 "MODIFIES": lambda self: self._parse_modifies_property(), 1020 "MULTISET": lambda self: self.expression(exp.SetProperty, multi=True), 1021 "NO": lambda self: self._parse_no_property(), 1022 "ON": lambda self: self._parse_on_property(), 1023 "ORDER BY": lambda self: self._parse_order(skip_order_token=True), 1024 "OUTPUT": lambda self: self.expression(exp.OutputModelProperty, this=self._parse_schema()), 1025 "PARTITION": lambda self: self._parse_partitioned_of(), 1026 "PARTITION BY": lambda self: self._parse_partitioned_by(), 1027 "PARTITIONED BY": lambda self: self._parse_partitioned_by(), 1028 "PARTITIONED_BY": lambda self: self._parse_partitioned_by(), 1029 "PRIMARY KEY": lambda self: self._parse_primary_key(in_props=True), 1030 "RANGE": lambda self: self._parse_dict_range(this="RANGE"), 1031 "READS": lambda self: self._parse_reads_property(), 1032 "REMOTE": lambda self: self._parse_remote_with_connection(), 1033 "RETURNS": lambda self: self._parse_returns(), 1034 "STRICT": lambda self: self.expression(exp.StrictProperty), 1035 "STREAMING": lambda self: self.expression(exp.StreamingTableProperty), 1036 "ROW": lambda self: self._parse_row(), 1037 "ROW_FORMAT": lambda self: self._parse_property_assignment(exp.RowFormatProperty), 1038 "SAMPLE": lambda self: self.expression( 1039 exp.SampleProperty, this=self._match_text_seq("BY") and self._parse_bitwise() 1040 ), 1041 "SECURE": lambda self: self.expression(exp.SecureProperty), 1042 "SECURITY": lambda self: self._parse_security(), 1043 "SET": lambda self: self.expression(exp.SetProperty, multi=False), 1044 "SETTINGS": lambda self: self._parse_settings_property(), 1045 "SHARING": lambda self: self._parse_property_assignment(exp.SharingProperty), 1046 "SORTKEY": lambda self: self._parse_sortkey(), 1047 "SOURCE": lambda self: self._parse_dict_property(this="SOURCE"), 1048 "STABLE": lambda self: self.expression( 1049 exp.StabilityProperty, this=exp.Literal.string("STABLE") 1050 ), 1051 "STORED": lambda self: self._parse_stored(), 1052 "SYSTEM_VERSIONING": lambda self: self._parse_system_versioning_property(), 1053 "TBLPROPERTIES": lambda self: self._parse_wrapped_properties(), 1054 "TEMP": lambda self: self.expression(exp.TemporaryProperty), 1055 "TEMPORARY": lambda self: self.expression(exp.TemporaryProperty), 1056 "TO": lambda self: self._parse_to_table(), 1057 "TRANSIENT": lambda self: self.expression(exp.TransientProperty), 1058 "TRANSFORM": lambda self: self.expression( 1059 exp.TransformModelProperty, expressions=self._parse_wrapped_csv(self._parse_expression) 1060 ), 1061 "TTL": lambda self: self._parse_ttl(), 1062 "USING": lambda self: self._parse_property_assignment(exp.FileFormatProperty), 1063 "UNLOGGED": lambda self: self.expression(exp.UnloggedProperty), 1064 "VOLATILE": lambda self: self._parse_volatile_property(), 1065 "WITH": lambda self: self._parse_with_property(), 1066 } 1067 1068 CONSTRAINT_PARSERS = { 1069 "AUTOINCREMENT": lambda self: self._parse_auto_increment(), 1070 "AUTO_INCREMENT": lambda self: self._parse_auto_increment(), 1071 "CASESPECIFIC": lambda self: self.expression(exp.CaseSpecificColumnConstraint, not_=False), 1072 "CHARACTER SET": lambda self: self.expression( 1073 exp.CharacterSetColumnConstraint, this=self._parse_var_or_string() 1074 ), 1075 "CHECK": lambda self: self.expression( 1076 exp.CheckColumnConstraint, 1077 this=self._parse_wrapped(self._parse_assignment), 1078 enforced=self._match_text_seq("ENFORCED"), 1079 ), 1080 "COLLATE": lambda self: self.expression( 1081 exp.CollateColumnConstraint, 1082 this=self._parse_identifier() or self._parse_column(), 1083 ), 1084 "COMMENT": lambda self: self.expression( 1085 exp.CommentColumnConstraint, this=self._parse_string() 1086 ), 1087 "COMPRESS": lambda self: self._parse_compress(), 1088 "CLUSTERED": lambda self: self.expression( 1089 exp.ClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1090 ), 1091 "NONCLUSTERED": lambda self: self.expression( 1092 exp.NonClusteredColumnConstraint, this=self._parse_wrapped_csv(self._parse_ordered) 1093 ), 1094 "DEFAULT": lambda self: self.expression( 1095 exp.DefaultColumnConstraint, this=self._parse_bitwise() 1096 ), 1097 "ENCODE": lambda self: self.expression(exp.EncodeColumnConstraint, this=self._parse_var()), 1098 "EPHEMERAL": lambda self: self.expression( 1099 exp.EphemeralColumnConstraint, this=self._parse_bitwise() 1100 ), 1101 "EXCLUDE": lambda self: self.expression( 1102 exp.ExcludeColumnConstraint, this=self._parse_index_params() 1103 ), 1104 "FOREIGN KEY": lambda self: self._parse_foreign_key(), 1105 "FORMAT": lambda self: self.expression( 1106 exp.DateFormatColumnConstraint, this=self._parse_var_or_string() 1107 ), 1108 "GENERATED": lambda self: self._parse_generated_as_identity(), 1109 "IDENTITY": lambda self: self._parse_auto_increment(), 1110 "INLINE": lambda self: self._parse_inline(), 1111 "LIKE": lambda self: self._parse_create_like(), 1112 "NOT": lambda self: self._parse_not_constraint(), 1113 "NULL": lambda self: self.expression(exp.NotNullColumnConstraint, allow_null=True), 1114 "ON": lambda self: ( 1115 self._match(TokenType.UPDATE) 1116 and self.expression(exp.OnUpdateColumnConstraint, this=self._parse_function()) 1117 ) 1118 or self.expression(exp.OnProperty, this=self._parse_id_var()), 1119 "PATH": lambda self: self.expression(exp.PathColumnConstraint, this=self._parse_string()), 1120 "PERIOD": lambda self: self._parse_period_for_system_time(), 1121 "PRIMARY KEY": lambda self: self._parse_primary_key(), 1122 "REFERENCES": lambda self: self._parse_references(match=False), 1123 "TITLE": lambda self: self.expression( 1124 exp.TitleColumnConstraint, this=self._parse_var_or_string() 1125 ), 1126 "TTL": lambda self: self.expression(exp.MergeTreeTTL, expressions=[self._parse_bitwise()]), 1127 "UNIQUE": lambda self: self._parse_unique(), 1128 "UPPERCASE": lambda self: self.expression(exp.UppercaseColumnConstraint), 1129 "WATERMARK": lambda self: self.expression( 1130 exp.WatermarkColumnConstraint, 1131 this=self._match(TokenType.FOR) and self._parse_column(), 1132 expression=self._match(TokenType.ALIAS) and self._parse_disjunction(), 1133 ), 1134 "WITH": lambda self: self.expression( 1135 exp.Properties, expressions=self._parse_wrapped_properties() 1136 ), 1137 "BUCKET": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1138 "TRUNCATE": lambda self: self._parse_partitioned_by_bucket_or_truncate(), 1139 } 1140 1141 def _parse_partitioned_by_bucket_or_truncate(self) -> t.Optional[exp.Expression]: 1142 if not self._match(TokenType.L_PAREN, advance=False): 1143 # Partitioning by bucket or truncate follows the syntax: 1144 # PARTITION BY (BUCKET(..) | TRUNCATE(..)) 1145 # If we don't have parenthesis after each keyword, we should instead parse this as an identifier 1146 self._retreat(self._index - 1) 1147 return None 1148 1149 klass = ( 1150 exp.PartitionedByBucket 1151 if self._prev.text.upper() == "BUCKET" 1152 else exp.PartitionByTruncate 1153 ) 1154 1155 args = self._parse_wrapped_csv(lambda: self._parse_primary() or self._parse_column()) 1156 this, expression = seq_get(args, 0), seq_get(args, 1) 1157 1158 if isinstance(this, exp.Literal): 1159 # Check for Iceberg partition transforms (bucket / truncate) and ensure their arguments are in the right order 1160 # - For Hive, it's `bucket(<num buckets>, <col name>)` or `truncate(<num_chars>, <col_name>)` 1161 # - For Trino, it's reversed - `bucket(<col name>, <num buckets>)` or `truncate(<col_name>, <num_chars>)` 1162 # Both variants are canonicalized in the latter i.e `bucket(<col name>, <num buckets>)` 1163 # 1164 # Hive ref: https://docs.aws.amazon.com/athena/latest/ug/querying-iceberg-creating-tables.html#querying-iceberg-partitioning 1165 # Trino ref: https://docs.aws.amazon.com/athena/latest/ug/create-table-as.html#ctas-table-properties 1166 this, expression = expression, this 1167 1168 return self.expression(klass, this=this, expression=expression) 1169 1170 ALTER_PARSERS = { 1171 "ADD": lambda self: self._parse_alter_table_add(), 1172 "AS": lambda self: self._parse_select(), 1173 "ALTER": lambda self: self._parse_alter_table_alter(), 1174 "CLUSTER BY": lambda self: self._parse_cluster(wrapped=True), 1175 "DELETE": lambda self: self.expression(exp.Delete, where=self._parse_where()), 1176 "DROP": lambda self: self._parse_alter_table_drop(), 1177 "RENAME": lambda self: self._parse_alter_table_rename(), 1178 "SET": lambda self: self._parse_alter_table_set(), 1179 "SWAP": lambda self: self.expression( 1180 exp.SwapTable, this=self._match(TokenType.WITH) and self._parse_table(schema=True) 1181 ), 1182 } 1183 1184 ALTER_ALTER_PARSERS = { 1185 "DISTKEY": lambda self: self._parse_alter_diststyle(), 1186 "DISTSTYLE": lambda self: self._parse_alter_diststyle(), 1187 "SORTKEY": lambda self: self._parse_alter_sortkey(), 1188 "COMPOUND": lambda self: self._parse_alter_sortkey(compound=True), 1189 } 1190 1191 SCHEMA_UNNAMED_CONSTRAINTS = { 1192 "CHECK", 1193 "EXCLUDE", 1194 "FOREIGN KEY", 1195 "LIKE", 1196 "PERIOD", 1197 "PRIMARY KEY", 1198 "UNIQUE", 1199 "WATERMARK", 1200 "BUCKET", 1201 "TRUNCATE", 1202 } 1203 1204 NO_PAREN_FUNCTION_PARSERS = { 1205 "ANY": lambda self: self.expression(exp.Any, this=self._parse_bitwise()), 1206 "CASE": lambda self: self._parse_case(), 1207 "CONNECT_BY_ROOT": lambda self: self.expression( 1208 exp.ConnectByRoot, this=self._parse_column() 1209 ), 1210 "IF": lambda self: self._parse_if(), 1211 } 1212 1213 INVALID_FUNC_NAME_TOKENS = { 1214 TokenType.IDENTIFIER, 1215 TokenType.STRING, 1216 } 1217 1218 FUNCTIONS_WITH_ALIASED_ARGS = {"STRUCT"} 1219 1220 KEY_VALUE_DEFINITIONS = (exp.Alias, exp.EQ, exp.PropertyEQ, exp.Slice) 1221 1222 FUNCTION_PARSERS = { 1223 **{ 1224 name: lambda self: self._parse_max_min_by(exp.ArgMax) for name in exp.ArgMax.sql_names() 1225 }, 1226 **{ 1227 name: lambda self: self._parse_max_min_by(exp.ArgMin) for name in exp.ArgMin.sql_names() 1228 }, 1229 "CAST": lambda self: self._parse_cast(self.STRICT_CAST), 1230 "CEIL": lambda self: self._parse_ceil_floor(exp.Ceil), 1231 "CONVERT": lambda self: self._parse_convert(self.STRICT_CAST), 1232 "DECODE": lambda self: self._parse_decode(), 1233 "EXTRACT": lambda self: self._parse_extract(), 1234 "FLOOR": lambda self: self._parse_ceil_floor(exp.Floor), 1235 "GAP_FILL": lambda self: self._parse_gap_fill(), 1236 "JSON_OBJECT": lambda self: self._parse_json_object(), 1237 "JSON_OBJECTAGG": lambda self: self._parse_json_object(agg=True), 1238 "JSON_TABLE": lambda self: self._parse_json_table(), 1239 "MATCH": lambda self: self._parse_match_against(), 1240 "NORMALIZE": lambda self: self._parse_normalize(), 1241 "OPENJSON": lambda self: self._parse_open_json(), 1242 "OVERLAY": lambda self: self._parse_overlay(), 1243 "POSITION": lambda self: self._parse_position(), 1244 "PREDICT": lambda self: self._parse_predict(), 1245 "SAFE_CAST": lambda self: self._parse_cast(False, safe=True), 1246 "STRING_AGG": lambda self: self._parse_string_agg(), 1247 "SUBSTRING": lambda self: self._parse_substring(), 1248 "TRIM": lambda self: self._parse_trim(), 1249 "TRY_CAST": lambda self: self._parse_cast(False, safe=True), 1250 "TRY_CONVERT": lambda self: self._parse_convert(False, safe=True), 1251 "XMLELEMENT": lambda self: self.expression( 1252 exp.XMLElement, 1253 this=self._match_text_seq("NAME") and self._parse_id_var(), 1254 expressions=self._match(TokenType.COMMA) and self._parse_csv(self._parse_expression), 1255 ), 1256 "XMLTABLE": lambda self: self._parse_xml_table(), 1257 } 1258 1259 QUERY_MODIFIER_PARSERS = { 1260 TokenType.MATCH_RECOGNIZE: lambda self: ("match", self._parse_match_recognize()), 1261 TokenType.PREWHERE: lambda self: ("prewhere", self._parse_prewhere()), 1262 TokenType.WHERE: lambda self: ("where", self._parse_where()), 1263 TokenType.GROUP_BY: lambda self: ("group", self._parse_group()), 1264 TokenType.HAVING: lambda self: ("having", self._parse_having()), 1265 TokenType.QUALIFY: lambda self: ("qualify", self._parse_qualify()), 1266 TokenType.WINDOW: lambda self: ("windows", self._parse_window_clause()), 1267 TokenType.ORDER_BY: lambda self: ("order", self._parse_order()), 1268 TokenType.LIMIT: lambda self: ("limit", self._parse_limit()), 1269 TokenType.FETCH: lambda self: ("limit", self._parse_limit()), 1270 TokenType.OFFSET: lambda self: ("offset", self._parse_offset()), 1271 TokenType.FOR: lambda self: ("locks", self._parse_locks()), 1272 TokenType.LOCK: lambda self: ("locks", self._parse_locks()), 1273 TokenType.TABLE_SAMPLE: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1274 TokenType.USING: lambda self: ("sample", self._parse_table_sample(as_modifier=True)), 1275 TokenType.CLUSTER_BY: lambda self: ( 1276 "cluster", 1277 self._parse_sort(exp.Cluster, TokenType.CLUSTER_BY), 1278 ), 1279 TokenType.DISTRIBUTE_BY: lambda self: ( 1280 "distribute", 1281 self._parse_sort(exp.Distribute, TokenType.DISTRIBUTE_BY), 1282 ), 1283 TokenType.SORT_BY: lambda self: ("sort", self._parse_sort(exp.Sort, TokenType.SORT_BY)), 1284 TokenType.CONNECT_BY: lambda self: ("connect", self._parse_connect(skip_start_token=True)), 1285 TokenType.START_WITH: lambda self: ("connect", self._parse_connect()), 1286 } 1287 QUERY_MODIFIER_TOKENS = set(QUERY_MODIFIER_PARSERS) 1288 1289 SET_PARSERS = { 1290 "GLOBAL": lambda self: self._parse_set_item_assignment("GLOBAL"), 1291 "LOCAL": lambda self: self._parse_set_item_assignment("LOCAL"), 1292 "SESSION": lambda self: self._parse_set_item_assignment("SESSION"), 1293 "TRANSACTION": lambda self: self._parse_set_transaction(), 1294 } 1295 1296 SHOW_PARSERS: t.Dict[str, t.Callable] = {} 1297 1298 TYPE_LITERAL_PARSERS = { 1299 exp.DataType.Type.JSON: lambda self, this, _: self.expression(exp.ParseJSON, this=this), 1300 } 1301 1302 TYPE_CONVERTERS: t.Dict[exp.DataType.Type, t.Callable[[exp.DataType], exp.DataType]] = {} 1303 1304 DDL_SELECT_TOKENS = {TokenType.SELECT, TokenType.WITH, TokenType.L_PAREN} 1305 1306 PRE_VOLATILE_TOKENS = {TokenType.CREATE, TokenType.REPLACE, TokenType.UNIQUE} 1307 1308 TRANSACTION_KIND = {"DEFERRED", "IMMEDIATE", "EXCLUSIVE"} 1309 TRANSACTION_CHARACTERISTICS: OPTIONS_TYPE = { 1310 "ISOLATION": ( 1311 ("LEVEL", "REPEATABLE", "READ"), 1312 ("LEVEL", "READ", "COMMITTED"), 1313 ("LEVEL", "READ", "UNCOMITTED"), 1314 ("LEVEL", "SERIALIZABLE"), 1315 ), 1316 "READ": ("WRITE", "ONLY"), 1317 } 1318 1319 CONFLICT_ACTIONS: OPTIONS_TYPE = dict.fromkeys( 1320 ("ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK", "UPDATE"), tuple() 1321 ) 1322 CONFLICT_ACTIONS["DO"] = ("NOTHING", "UPDATE") 1323 1324 CREATE_SEQUENCE: OPTIONS_TYPE = { 1325 "SCALE": ("EXTEND", "NOEXTEND"), 1326 "SHARD": ("EXTEND", "NOEXTEND"), 1327 "NO": ("CYCLE", "CACHE", "MAXVALUE", "MINVALUE"), 1328 **dict.fromkeys( 1329 ( 1330 "SESSION", 1331 "GLOBAL", 1332 "KEEP", 1333 "NOKEEP", 1334 "ORDER", 1335 "NOORDER", 1336 "NOCACHE", 1337 "CYCLE", 1338 "NOCYCLE", 1339 "NOMINVALUE", 1340 "NOMAXVALUE", 1341 "NOSCALE", 1342 "NOSHARD", 1343 ), 1344 tuple(), 1345 ), 1346 } 1347 1348 ISOLATED_LOADING_OPTIONS: OPTIONS_TYPE = {"FOR": ("ALL", "INSERT", "NONE")} 1349 1350 USABLES: OPTIONS_TYPE = dict.fromkeys( 1351 ("ROLE", "WAREHOUSE", "DATABASE", "SCHEMA", "CATALOG"), tuple() 1352 ) 1353 1354 CAST_ACTIONS: OPTIONS_TYPE = dict.fromkeys(("RENAME", "ADD"), ("FIELDS",)) 1355 1356 SCHEMA_BINDING_OPTIONS: OPTIONS_TYPE = { 1357 "TYPE": ("EVOLUTION",), 1358 **dict.fromkeys(("BINDING", "COMPENSATION", "EVOLUTION"), tuple()), 1359 } 1360 1361 PROCEDURE_OPTIONS: OPTIONS_TYPE = {} 1362 1363 EXECUTE_AS_OPTIONS: OPTIONS_TYPE = dict.fromkeys(("CALLER", "SELF", "OWNER"), tuple()) 1364 1365 KEY_CONSTRAINT_OPTIONS: OPTIONS_TYPE = { 1366 "NOT": ("ENFORCED",), 1367 "MATCH": ( 1368 "FULL", 1369 "PARTIAL", 1370 "SIMPLE", 1371 ), 1372 "INITIALLY": ("DEFERRED", "IMMEDIATE"), 1373 "USING": ( 1374 "BTREE", 1375 "HASH", 1376 ), 1377 **dict.fromkeys(("DEFERRABLE", "NORELY", "RELY"), tuple()), 1378 } 1379 1380 WINDOW_EXCLUDE_OPTIONS: OPTIONS_TYPE = { 1381 "NO": ("OTHERS",), 1382 "CURRENT": ("ROW",), 1383 **dict.fromkeys(("GROUP", "TIES"), tuple()), 1384 } 1385 1386 INSERT_ALTERNATIVES = {"ABORT", "FAIL", "IGNORE", "REPLACE", "ROLLBACK"} 1387 1388 CLONE_KEYWORDS = {"CLONE", "COPY"} 1389 HISTORICAL_DATA_PREFIX = {"AT", "BEFORE", "END"} 1390 HISTORICAL_DATA_KIND = {"OFFSET", "STATEMENT", "STREAM", "TIMESTAMP", "VERSION"} 1391 1392 OPCLASS_FOLLOW_KEYWORDS = {"ASC", "DESC", "NULLS", "WITH"} 1393 1394 OPTYPE_FOLLOW_TOKENS = {TokenType.COMMA, TokenType.R_PAREN} 1395 1396 TABLE_INDEX_HINT_TOKENS = {TokenType.FORCE, TokenType.IGNORE, TokenType.USE} 1397 1398 VIEW_ATTRIBUTES = {"ENCRYPTION", "SCHEMABINDING", "VIEW_METADATA"} 1399 1400 WINDOW_ALIAS_TOKENS = ID_VAR_TOKENS - {TokenType.ROWS} 1401 WINDOW_BEFORE_PAREN_TOKENS = {TokenType.OVER} 1402 WINDOW_SIDES = {"FOLLOWING", "PRECEDING"} 1403 1404 JSON_KEY_VALUE_SEPARATOR_TOKENS = {TokenType.COLON, TokenType.COMMA, TokenType.IS} 1405 1406 FETCH_TOKENS = ID_VAR_TOKENS - {TokenType.ROW, TokenType.ROWS, TokenType.PERCENT} 1407 1408 ADD_CONSTRAINT_TOKENS = { 1409 TokenType.CONSTRAINT, 1410 TokenType.FOREIGN_KEY, 1411 TokenType.INDEX, 1412 TokenType.KEY, 1413 TokenType.PRIMARY_KEY, 1414 TokenType.UNIQUE, 1415 } 1416 1417 DISTINCT_TOKENS = {TokenType.DISTINCT} 1418 1419 NULL_TOKENS = {TokenType.NULL} 1420 1421 UNNEST_OFFSET_ALIAS_TOKENS = TABLE_ALIAS_TOKENS - SET_OPERATIONS 1422 1423 SELECT_START_TOKENS = {TokenType.L_PAREN, TokenType.WITH, TokenType.SELECT} 1424 1425 COPY_INTO_VARLEN_OPTIONS = {"FILE_FORMAT", "COPY_OPTIONS", "FORMAT_OPTIONS", "CREDENTIAL"} 1426 1427 IS_JSON_PREDICATE_KIND = {"VALUE", "SCALAR", "ARRAY", "OBJECT"} 1428 1429 ODBC_DATETIME_LITERALS = { 1430 "d": exp.Date, 1431 "t": exp.Time, 1432 "ts": exp.Timestamp, 1433 } 1434 1435 ON_CONDITION_TOKENS = {"ERROR", "NULL", "TRUE", "FALSE", "EMPTY"} 1436 1437 PRIVILEGE_FOLLOW_TOKENS = {TokenType.ON, TokenType.COMMA, TokenType.L_PAREN} 1438 1439 # The style options for the DESCRIBE statement 1440 DESCRIBE_STYLES = {"ANALYZE", "EXTENDED", "FORMATTED", "HISTORY"} 1441 1442 # The style options for the ANALYZE statement 1443 ANALYZE_STYLES = { 1444 "BUFFER_USAGE_LIMIT", 1445 "FULL", 1446 "LOCAL", 1447 "NO_WRITE_TO_BINLOG", 1448 "SAMPLE", 1449 "SKIP_LOCKED", 1450 "VERBOSE", 1451 } 1452 1453 ANALYZE_EXPRESSION_PARSERS = { 1454 "ALL": lambda self: self._parse_analyze_columns(), 1455 "COMPUTE": lambda self: self._parse_analyze_statistics(), 1456 "DELETE": lambda self: self._parse_analyze_delete(), 1457 "DROP": lambda self: self._parse_analyze_histogram(), 1458 "ESTIMATE": lambda self: self._parse_analyze_statistics(), 1459 "LIST": lambda self: self._parse_analyze_list(), 1460 "PREDICATE": lambda self: self._parse_analyze_columns(), 1461 "UPDATE": lambda self: self._parse_analyze_histogram(), 1462 "VALIDATE": lambda self: self._parse_analyze_validate(), 1463 } 1464 1465 PARTITION_KEYWORDS = {"PARTITION", "SUBPARTITION"} 1466 1467 AMBIGUOUS_ALIAS_TOKENS = (TokenType.LIMIT, TokenType.OFFSET) 1468 1469 OPERATION_MODIFIERS: t.Set[str] = set() 1470 1471 RECURSIVE_CTE_SEARCH_KIND = {"BREADTH", "DEPTH", "CYCLE"} 1472 1473 MODIFIABLES = (exp.Query, exp.Table, exp.TableFromRows) 1474 1475 STRICT_CAST = True 1476 1477 PREFIXED_PIVOT_COLUMNS = False 1478 IDENTIFY_PIVOT_STRINGS = False 1479 1480 LOG_DEFAULTS_TO_LN = False 1481 1482 # Whether the table sample clause expects CSV syntax 1483 TABLESAMPLE_CSV = False 1484 1485 # The default method used for table sampling 1486 DEFAULT_SAMPLING_METHOD: t.Optional[str] = None 1487 1488 # Whether the SET command needs a delimiter (e.g. "=") for assignments 1489 SET_REQUIRES_ASSIGNMENT_DELIMITER = True 1490 1491 # Whether the TRIM function expects the characters to trim as its first argument 1492 TRIM_PATTERN_FIRST = False 1493 1494 # Whether string aliases are supported `SELECT COUNT(*) 'count'` 1495 STRING_ALIASES = False 1496 1497 # Whether query modifiers such as LIMIT are attached to the UNION node (vs its right operand) 1498 MODIFIERS_ATTACHED_TO_SET_OP = True 1499 SET_OP_MODIFIERS = {"order", "limit", "offset"} 1500 1501 # Whether to parse IF statements that aren't followed by a left parenthesis as commands 1502 NO_PAREN_IF_COMMANDS = True 1503 1504 # Whether the -> and ->> operators expect documents of type JSON (e.g. Postgres) 1505 JSON_ARROWS_REQUIRE_JSON_TYPE = False 1506 1507 # Whether the `:` operator is used to extract a value from a VARIANT column 1508 COLON_IS_VARIANT_EXTRACT = False 1509 1510 # Whether or not a VALUES keyword needs to be followed by '(' to form a VALUES clause. 1511 # If this is True and '(' is not found, the keyword will be treated as an identifier 1512 VALUES_FOLLOWED_BY_PAREN = True 1513 1514 # Whether implicit unnesting is supported, e.g. SELECT 1 FROM y.z AS z, z.a (Redshift) 1515 SUPPORTS_IMPLICIT_UNNEST = False 1516 1517 # Whether or not interval spans are supported, INTERVAL 1 YEAR TO MONTHS 1518 INTERVAL_SPANS = True 1519 1520 # Whether a PARTITION clause can follow a table reference 1521 SUPPORTS_PARTITION_SELECTION = False 1522 1523 # Whether the `name AS expr` schema/column constraint requires parentheses around `expr` 1524 WRAPPED_TRANSFORM_COLUMN_CONSTRAINT = True 1525 1526 # Whether the 'AS' keyword is optional in the CTE definition syntax 1527 OPTIONAL_ALIAS_TOKEN_CTE = True 1528 1529 # Whether renaming a column with an ALTER statement requires the presence of the COLUMN keyword 1530 ALTER_RENAME_REQUIRES_COLUMN = True 1531 1532 # Whether all join types have the same precedence, i.e., they "naturally" produce a left-deep tree. 1533 # In standard SQL, joins that use the JOIN keyword take higher precedence than comma-joins. That is 1534 # to say, JOIN operators happen before comma operators. This is not the case in some dialects, such 1535 # as BigQuery, where all joins have the same precedence. 1536 JOINS_HAVE_EQUAL_PRECEDENCE = False 1537 1538 # Whether TIMESTAMP <literal> can produce a zone-aware timestamp 1539 ZONE_AWARE_TIMESTAMP_CONSTRUCTOR = False 1540 1541 # Whether map literals support arbitrary expressions as keys. 1542 # When True, allows complex keys like arrays or literals: {[1, 2]: 3}, {1: 2} (e.g. DuckDB). 1543 # When False, keys are typically restricted to identifiers. 1544 MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS = False 1545 1546 # Whether JSON_EXTRACT requires a JSON expression as the first argument, e.g this 1547 # is true for Snowflake but not for BigQuery which can also process strings 1548 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = False 1549 1550 __slots__ = ( 1551 "error_level", 1552 "error_message_context", 1553 "max_errors", 1554 "dialect", 1555 "sql", 1556 "errors", 1557 "_tokens", 1558 "_index", 1559 "_curr", 1560 "_next", 1561 "_prev", 1562 "_prev_comments", 1563 "_pipe_cte_counter", 1564 ) 1565 1566 # Autofilled 1567 SHOW_TRIE: t.Dict = {} 1568 SET_TRIE: t.Dict = {} 1569 1570 def __init__( 1571 self, 1572 error_level: t.Optional[ErrorLevel] = None, 1573 error_message_context: int = 100, 1574 max_errors: int = 3, 1575 dialect: DialectType = None, 1576 ): 1577 from sqlglot.dialects import Dialect 1578 1579 self.error_level = error_level or ErrorLevel.IMMEDIATE 1580 self.error_message_context = error_message_context 1581 self.max_errors = max_errors 1582 self.dialect = Dialect.get_or_raise(dialect) 1583 self.reset() 1584 1585 def reset(self): 1586 self.sql = "" 1587 self.errors = [] 1588 self._tokens = [] 1589 self._index = 0 1590 self._curr = None 1591 self._next = None 1592 self._prev = None 1593 self._prev_comments = None 1594 self._pipe_cte_counter = 0 1595 1596 def parse( 1597 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1598 ) -> t.List[t.Optional[exp.Expression]]: 1599 """ 1600 Parses a list of tokens and returns a list of syntax trees, one tree 1601 per parsed SQL statement. 1602 1603 Args: 1604 raw_tokens: The list of tokens. 1605 sql: The original SQL string, used to produce helpful debug messages. 1606 1607 Returns: 1608 The list of the produced syntax trees. 1609 """ 1610 return self._parse( 1611 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1612 ) 1613 1614 def parse_into( 1615 self, 1616 expression_types: exp.IntoType, 1617 raw_tokens: t.List[Token], 1618 sql: t.Optional[str] = None, 1619 ) -> t.List[t.Optional[exp.Expression]]: 1620 """ 1621 Parses a list of tokens into a given Expression type. If a collection of Expression 1622 types is given instead, this method will try to parse the token list into each one 1623 of them, stopping at the first for which the parsing succeeds. 1624 1625 Args: 1626 expression_types: The expression type(s) to try and parse the token list into. 1627 raw_tokens: The list of tokens. 1628 sql: The original SQL string, used to produce helpful debug messages. 1629 1630 Returns: 1631 The target Expression. 1632 """ 1633 errors = [] 1634 for expression_type in ensure_list(expression_types): 1635 parser = self.EXPRESSION_PARSERS.get(expression_type) 1636 if not parser: 1637 raise TypeError(f"No parser registered for {expression_type}") 1638 1639 try: 1640 return self._parse(parser, raw_tokens, sql) 1641 except ParseError as e: 1642 e.errors[0]["into_expression"] = expression_type 1643 errors.append(e) 1644 1645 raise ParseError( 1646 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1647 errors=merge_errors(errors), 1648 ) from errors[-1] 1649 1650 def _parse( 1651 self, 1652 parse_method: t.Callable[[Parser], t.Optional[exp.Expression]], 1653 raw_tokens: t.List[Token], 1654 sql: t.Optional[str] = None, 1655 ) -> t.List[t.Optional[exp.Expression]]: 1656 self.reset() 1657 self.sql = sql or "" 1658 1659 total = len(raw_tokens) 1660 chunks: t.List[t.List[Token]] = [[]] 1661 1662 for i, token in enumerate(raw_tokens): 1663 if token.token_type == TokenType.SEMICOLON: 1664 if token.comments: 1665 chunks.append([token]) 1666 1667 if i < total - 1: 1668 chunks.append([]) 1669 else: 1670 chunks[-1].append(token) 1671 1672 expressions = [] 1673 1674 for tokens in chunks: 1675 self._index = -1 1676 self._tokens = tokens 1677 self._advance() 1678 1679 expressions.append(parse_method(self)) 1680 1681 if self._index < len(self._tokens): 1682 self.raise_error("Invalid expression / Unexpected token") 1683 1684 self.check_errors() 1685 1686 return expressions 1687 1688 def check_errors(self) -> None: 1689 """Logs or raises any found errors, depending on the chosen error level setting.""" 1690 if self.error_level == ErrorLevel.WARN: 1691 for error in self.errors: 1692 logger.error(str(error)) 1693 elif self.error_level == ErrorLevel.RAISE and self.errors: 1694 raise ParseError( 1695 concat_messages(self.errors, self.max_errors), 1696 errors=merge_errors(self.errors), 1697 ) 1698 1699 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1700 """ 1701 Appends an error in the list of recorded errors or raises it, depending on the chosen 1702 error level setting. 1703 """ 1704 token = token or self._curr or self._prev or Token.string("") 1705 start = token.start 1706 end = token.end + 1 1707 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1708 highlight = self.sql[start:end] 1709 end_context = self.sql[end : end + self.error_message_context] 1710 1711 error = ParseError.new( 1712 f"{message}. Line {token.line}, Col: {token.col}.\n" 1713 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1714 description=message, 1715 line=token.line, 1716 col=token.col, 1717 start_context=start_context, 1718 highlight=highlight, 1719 end_context=end_context, 1720 ) 1721 1722 if self.error_level == ErrorLevel.IMMEDIATE: 1723 raise error 1724 1725 self.errors.append(error) 1726 1727 def expression( 1728 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1729 ) -> E: 1730 """ 1731 Creates a new, validated Expression. 1732 1733 Args: 1734 exp_class: The expression class to instantiate. 1735 comments: An optional list of comments to attach to the expression. 1736 kwargs: The arguments to set for the expression along with their respective values. 1737 1738 Returns: 1739 The target expression. 1740 """ 1741 instance = exp_class(**kwargs) 1742 instance.add_comments(comments) if comments else self._add_comments(instance) 1743 return self.validate_expression(instance) 1744 1745 def _add_comments(self, expression: t.Optional[exp.Expression]) -> None: 1746 if expression and self._prev_comments: 1747 expression.add_comments(self._prev_comments) 1748 self._prev_comments = None 1749 1750 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1751 """ 1752 Validates an Expression, making sure that all its mandatory arguments are set. 1753 1754 Args: 1755 expression: The expression to validate. 1756 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1757 1758 Returns: 1759 The validated expression. 1760 """ 1761 if self.error_level != ErrorLevel.IGNORE: 1762 for error_message in expression.error_messages(args): 1763 self.raise_error(error_message) 1764 1765 return expression 1766 1767 def _find_sql(self, start: Token, end: Token) -> str: 1768 return self.sql[start.start : end.end + 1] 1769 1770 def _is_connected(self) -> bool: 1771 return self._prev and self._curr and self._prev.end + 1 == self._curr.start 1772 1773 def _advance(self, times: int = 1) -> None: 1774 self._index += times 1775 self._curr = seq_get(self._tokens, self._index) 1776 self._next = seq_get(self._tokens, self._index + 1) 1777 1778 if self._index > 0: 1779 self._prev = self._tokens[self._index - 1] 1780 self._prev_comments = self._prev.comments 1781 else: 1782 self._prev = None 1783 self._prev_comments = None 1784 1785 def _retreat(self, index: int) -> None: 1786 if index != self._index: 1787 self._advance(index - self._index) 1788 1789 def _warn_unsupported(self) -> None: 1790 if len(self._tokens) <= 1: 1791 return 1792 1793 # We use _find_sql because self.sql may comprise multiple chunks, and we're only 1794 # interested in emitting a warning for the one being currently processed. 1795 sql = self._find_sql(self._tokens[0], self._tokens[-1])[: self.error_message_context] 1796 1797 logger.warning( 1798 f"'{sql}' contains unsupported syntax. Falling back to parsing as a 'Command'." 1799 ) 1800 1801 def _parse_command(self) -> exp.Command: 1802 self._warn_unsupported() 1803 return self.expression( 1804 exp.Command, 1805 comments=self._prev_comments, 1806 this=self._prev.text.upper(), 1807 expression=self._parse_string(), 1808 ) 1809 1810 def _try_parse(self, parse_method: t.Callable[[], T], retreat: bool = False) -> t.Optional[T]: 1811 """ 1812 Attemps to backtrack if a parse function that contains a try/catch internally raises an error. 1813 This behavior can be different depending on the uset-set ErrorLevel, so _try_parse aims to 1814 solve this by setting & resetting the parser state accordingly 1815 """ 1816 index = self._index 1817 error_level = self.error_level 1818 1819 self.error_level = ErrorLevel.IMMEDIATE 1820 try: 1821 this = parse_method() 1822 except ParseError: 1823 this = None 1824 finally: 1825 if not this or retreat: 1826 self._retreat(index) 1827 self.error_level = error_level 1828 1829 return this 1830 1831 def _parse_comment(self, allow_exists: bool = True) -> exp.Expression: 1832 start = self._prev 1833 exists = self._parse_exists() if allow_exists else None 1834 1835 self._match(TokenType.ON) 1836 1837 materialized = self._match_text_seq("MATERIALIZED") 1838 kind = self._match_set(self.CREATABLES) and self._prev 1839 if not kind: 1840 return self._parse_as_command(start) 1841 1842 if kind.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 1843 this = self._parse_user_defined_function(kind=kind.token_type) 1844 elif kind.token_type == TokenType.TABLE: 1845 this = self._parse_table(alias_tokens=self.COMMENT_TABLE_ALIAS_TOKENS) 1846 elif kind.token_type == TokenType.COLUMN: 1847 this = self._parse_column() 1848 else: 1849 this = self._parse_id_var() 1850 1851 self._match(TokenType.IS) 1852 1853 return self.expression( 1854 exp.Comment, 1855 this=this, 1856 kind=kind.text, 1857 expression=self._parse_string(), 1858 exists=exists, 1859 materialized=materialized, 1860 ) 1861 1862 def _parse_to_table( 1863 self, 1864 ) -> exp.ToTableProperty: 1865 table = self._parse_table_parts(schema=True) 1866 return self.expression(exp.ToTableProperty, this=table) 1867 1868 # https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/mergetree#mergetree-table-ttl 1869 def _parse_ttl(self) -> exp.Expression: 1870 def _parse_ttl_action() -> t.Optional[exp.Expression]: 1871 this = self._parse_bitwise() 1872 1873 if self._match_text_seq("DELETE"): 1874 return self.expression(exp.MergeTreeTTLAction, this=this, delete=True) 1875 if self._match_text_seq("RECOMPRESS"): 1876 return self.expression( 1877 exp.MergeTreeTTLAction, this=this, recompress=self._parse_bitwise() 1878 ) 1879 if self._match_text_seq("TO", "DISK"): 1880 return self.expression( 1881 exp.MergeTreeTTLAction, this=this, to_disk=self._parse_string() 1882 ) 1883 if self._match_text_seq("TO", "VOLUME"): 1884 return self.expression( 1885 exp.MergeTreeTTLAction, this=this, to_volume=self._parse_string() 1886 ) 1887 1888 return this 1889 1890 expressions = self._parse_csv(_parse_ttl_action) 1891 where = self._parse_where() 1892 group = self._parse_group() 1893 1894 aggregates = None 1895 if group and self._match(TokenType.SET): 1896 aggregates = self._parse_csv(self._parse_set_item) 1897 1898 return self.expression( 1899 exp.MergeTreeTTL, 1900 expressions=expressions, 1901 where=where, 1902 group=group, 1903 aggregates=aggregates, 1904 ) 1905 1906 def _parse_statement(self) -> t.Optional[exp.Expression]: 1907 if self._curr is None: 1908 return None 1909 1910 if self._match_set(self.STATEMENT_PARSERS): 1911 comments = self._prev_comments 1912 stmt = self.STATEMENT_PARSERS[self._prev.token_type](self) 1913 stmt.add_comments(comments, prepend=True) 1914 return stmt 1915 1916 if self._match_set(self.dialect.tokenizer_class.COMMANDS): 1917 return self._parse_command() 1918 1919 expression = self._parse_expression() 1920 expression = self._parse_set_operations(expression) if expression else self._parse_select() 1921 return self._parse_query_modifiers(expression) 1922 1923 def _parse_drop(self, exists: bool = False) -> exp.Drop | exp.Command: 1924 start = self._prev 1925 temporary = self._match(TokenType.TEMPORARY) 1926 materialized = self._match_text_seq("MATERIALIZED") 1927 1928 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 1929 if not kind: 1930 return self._parse_as_command(start) 1931 1932 concurrently = self._match_text_seq("CONCURRENTLY") 1933 if_exists = exists or self._parse_exists() 1934 1935 if kind == "COLUMN": 1936 this = self._parse_column() 1937 else: 1938 this = self._parse_table_parts( 1939 schema=True, is_db_reference=self._prev.token_type == TokenType.SCHEMA 1940 ) 1941 1942 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 1943 1944 if self._match(TokenType.L_PAREN, advance=False): 1945 expressions = self._parse_wrapped_csv(self._parse_types) 1946 else: 1947 expressions = None 1948 1949 return self.expression( 1950 exp.Drop, 1951 exists=if_exists, 1952 this=this, 1953 expressions=expressions, 1954 kind=self.dialect.CREATABLE_KIND_MAPPING.get(kind) or kind, 1955 temporary=temporary, 1956 materialized=materialized, 1957 cascade=self._match_text_seq("CASCADE"), 1958 constraints=self._match_text_seq("CONSTRAINTS"), 1959 purge=self._match_text_seq("PURGE"), 1960 cluster=cluster, 1961 concurrently=concurrently, 1962 ) 1963 1964 def _parse_exists(self, not_: bool = False) -> t.Optional[bool]: 1965 return ( 1966 self._match_text_seq("IF") 1967 and (not not_ or self._match(TokenType.NOT)) 1968 and self._match(TokenType.EXISTS) 1969 ) 1970 1971 def _parse_create(self) -> exp.Create | exp.Command: 1972 # Note: this can't be None because we've matched a statement parser 1973 start = self._prev 1974 1975 replace = ( 1976 start.token_type == TokenType.REPLACE 1977 or self._match_pair(TokenType.OR, TokenType.REPLACE) 1978 or self._match_pair(TokenType.OR, TokenType.ALTER) 1979 ) 1980 refresh = self._match_pair(TokenType.OR, TokenType.REFRESH) 1981 1982 unique = self._match(TokenType.UNIQUE) 1983 1984 if self._match_text_seq("CLUSTERED", "COLUMNSTORE"): 1985 clustered = True 1986 elif self._match_text_seq("NONCLUSTERED", "COLUMNSTORE") or self._match_text_seq( 1987 "COLUMNSTORE" 1988 ): 1989 clustered = False 1990 else: 1991 clustered = None 1992 1993 if self._match_pair(TokenType.TABLE, TokenType.FUNCTION, advance=False): 1994 self._advance() 1995 1996 properties = None 1997 create_token = self._match_set(self.CREATABLES) and self._prev 1998 1999 if not create_token: 2000 # exp.Properties.Location.POST_CREATE 2001 properties = self._parse_properties() 2002 create_token = self._match_set(self.CREATABLES) and self._prev 2003 2004 if not properties or not create_token: 2005 return self._parse_as_command(start) 2006 2007 concurrently = self._match_text_seq("CONCURRENTLY") 2008 exists = self._parse_exists(not_=True) 2009 this = None 2010 expression: t.Optional[exp.Expression] = None 2011 indexes = None 2012 no_schema_binding = None 2013 begin = None 2014 end = None 2015 clone = None 2016 2017 def extend_props(temp_props: t.Optional[exp.Properties]) -> None: 2018 nonlocal properties 2019 if properties and temp_props: 2020 properties.expressions.extend(temp_props.expressions) 2021 elif temp_props: 2022 properties = temp_props 2023 2024 if create_token.token_type in (TokenType.FUNCTION, TokenType.PROCEDURE): 2025 this = self._parse_user_defined_function(kind=create_token.token_type) 2026 2027 # exp.Properties.Location.POST_SCHEMA ("schema" here is the UDF's type signature) 2028 extend_props(self._parse_properties()) 2029 2030 expression = self._match(TokenType.ALIAS) and self._parse_heredoc() 2031 extend_props(self._parse_properties()) 2032 2033 if not expression: 2034 if self._match(TokenType.COMMAND): 2035 expression = self._parse_as_command(self._prev) 2036 else: 2037 begin = self._match(TokenType.BEGIN) 2038 return_ = self._match_text_seq("RETURN") 2039 2040 if self._match(TokenType.STRING, advance=False): 2041 # Takes care of BigQuery's JavaScript UDF definitions that end in an OPTIONS property 2042 # # https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_function_statement 2043 expression = self._parse_string() 2044 extend_props(self._parse_properties()) 2045 else: 2046 expression = self._parse_user_defined_function_expression() 2047 2048 end = self._match_text_seq("END") 2049 2050 if return_: 2051 expression = self.expression(exp.Return, this=expression) 2052 elif create_token.token_type == TokenType.INDEX: 2053 # Postgres allows anonymous indexes, eg. CREATE INDEX IF NOT EXISTS ON t(c) 2054 if not self._match(TokenType.ON): 2055 index = self._parse_id_var() 2056 anonymous = False 2057 else: 2058 index = None 2059 anonymous = True 2060 2061 this = self._parse_index(index=index, anonymous=anonymous) 2062 elif create_token.token_type in self.DB_CREATABLES: 2063 table_parts = self._parse_table_parts( 2064 schema=True, is_db_reference=create_token.token_type == TokenType.SCHEMA 2065 ) 2066 2067 # exp.Properties.Location.POST_NAME 2068 self._match(TokenType.COMMA) 2069 extend_props(self._parse_properties(before=True)) 2070 2071 this = self._parse_schema(this=table_parts) 2072 2073 # exp.Properties.Location.POST_SCHEMA and POST_WITH 2074 extend_props(self._parse_properties()) 2075 2076 has_alias = self._match(TokenType.ALIAS) 2077 if not self._match_set(self.DDL_SELECT_TOKENS, advance=False): 2078 # exp.Properties.Location.POST_ALIAS 2079 extend_props(self._parse_properties()) 2080 2081 if create_token.token_type == TokenType.SEQUENCE: 2082 expression = self._parse_types() 2083 extend_props(self._parse_properties()) 2084 else: 2085 expression = self._parse_ddl_select() 2086 2087 # Some dialects also support using a table as an alias instead of a SELECT. 2088 # Here we fallback to this as an alternative. 2089 if not expression and has_alias: 2090 expression = self._try_parse(self._parse_table_parts) 2091 2092 if create_token.token_type == TokenType.TABLE: 2093 # exp.Properties.Location.POST_EXPRESSION 2094 extend_props(self._parse_properties()) 2095 2096 indexes = [] 2097 while True: 2098 index = self._parse_index() 2099 2100 # exp.Properties.Location.POST_INDEX 2101 extend_props(self._parse_properties()) 2102 if not index: 2103 break 2104 else: 2105 self._match(TokenType.COMMA) 2106 indexes.append(index) 2107 elif create_token.token_type == TokenType.VIEW: 2108 if self._match_text_seq("WITH", "NO", "SCHEMA", "BINDING"): 2109 no_schema_binding = True 2110 elif create_token.token_type in (TokenType.SINK, TokenType.SOURCE): 2111 extend_props(self._parse_properties()) 2112 2113 shallow = self._match_text_seq("SHALLOW") 2114 2115 if self._match_texts(self.CLONE_KEYWORDS): 2116 copy = self._prev.text.lower() == "copy" 2117 clone = self.expression( 2118 exp.Clone, this=self._parse_table(schema=True), shallow=shallow, copy=copy 2119 ) 2120 2121 if self._curr and not self._match_set((TokenType.R_PAREN, TokenType.COMMA), advance=False): 2122 return self._parse_as_command(start) 2123 2124 create_kind_text = create_token.text.upper() 2125 return self.expression( 2126 exp.Create, 2127 this=this, 2128 kind=self.dialect.CREATABLE_KIND_MAPPING.get(create_kind_text) or create_kind_text, 2129 replace=replace, 2130 refresh=refresh, 2131 unique=unique, 2132 expression=expression, 2133 exists=exists, 2134 properties=properties, 2135 indexes=indexes, 2136 no_schema_binding=no_schema_binding, 2137 begin=begin, 2138 end=end, 2139 clone=clone, 2140 concurrently=concurrently, 2141 clustered=clustered, 2142 ) 2143 2144 def _parse_sequence_properties(self) -> t.Optional[exp.SequenceProperties]: 2145 seq = exp.SequenceProperties() 2146 2147 options = [] 2148 index = self._index 2149 2150 while self._curr: 2151 self._match(TokenType.COMMA) 2152 if self._match_text_seq("INCREMENT"): 2153 self._match_text_seq("BY") 2154 self._match_text_seq("=") 2155 seq.set("increment", self._parse_term()) 2156 elif self._match_text_seq("MINVALUE"): 2157 seq.set("minvalue", self._parse_term()) 2158 elif self._match_text_seq("MAXVALUE"): 2159 seq.set("maxvalue", self._parse_term()) 2160 elif self._match(TokenType.START_WITH) or self._match_text_seq("START"): 2161 self._match_text_seq("=") 2162 seq.set("start", self._parse_term()) 2163 elif self._match_text_seq("CACHE"): 2164 # T-SQL allows empty CACHE which is initialized dynamically 2165 seq.set("cache", self._parse_number() or True) 2166 elif self._match_text_seq("OWNED", "BY"): 2167 # "OWNED BY NONE" is the default 2168 seq.set("owned", None if self._match_text_seq("NONE") else self._parse_column()) 2169 else: 2170 opt = self._parse_var_from_options(self.CREATE_SEQUENCE, raise_unmatched=False) 2171 if opt: 2172 options.append(opt) 2173 else: 2174 break 2175 2176 seq.set("options", options if options else None) 2177 return None if self._index == index else seq 2178 2179 def _parse_property_before(self) -> t.Optional[exp.Expression]: 2180 # only used for teradata currently 2181 self._match(TokenType.COMMA) 2182 2183 kwargs = { 2184 "no": self._match_text_seq("NO"), 2185 "dual": self._match_text_seq("DUAL"), 2186 "before": self._match_text_seq("BEFORE"), 2187 "default": self._match_text_seq("DEFAULT"), 2188 "local": (self._match_text_seq("LOCAL") and "LOCAL") 2189 or (self._match_text_seq("NOT", "LOCAL") and "NOT LOCAL"), 2190 "after": self._match_text_seq("AFTER"), 2191 "minimum": self._match_texts(("MIN", "MINIMUM")), 2192 "maximum": self._match_texts(("MAX", "MAXIMUM")), 2193 } 2194 2195 if self._match_texts(self.PROPERTY_PARSERS): 2196 parser = self.PROPERTY_PARSERS[self._prev.text.upper()] 2197 try: 2198 return parser(self, **{k: v for k, v in kwargs.items() if v}) 2199 except TypeError: 2200 self.raise_error(f"Cannot parse property '{self._prev.text}'") 2201 2202 return None 2203 2204 def _parse_wrapped_properties(self) -> t.List[exp.Expression]: 2205 return self._parse_wrapped_csv(self._parse_property) 2206 2207 def _parse_property(self) -> t.Optional[exp.Expression]: 2208 if self._match_texts(self.PROPERTY_PARSERS): 2209 return self.PROPERTY_PARSERS[self._prev.text.upper()](self) 2210 2211 if self._match(TokenType.DEFAULT) and self._match_texts(self.PROPERTY_PARSERS): 2212 return self.PROPERTY_PARSERS[self._prev.text.upper()](self, default=True) 2213 2214 if self._match_text_seq("COMPOUND", "SORTKEY"): 2215 return self._parse_sortkey(compound=True) 2216 2217 if self._match_text_seq("SQL", "SECURITY"): 2218 return self.expression(exp.SqlSecurityProperty, definer=self._match_text_seq("DEFINER")) 2219 2220 index = self._index 2221 key = self._parse_column() 2222 2223 if not self._match(TokenType.EQ): 2224 self._retreat(index) 2225 return self._parse_sequence_properties() 2226 2227 # Transform the key to exp.Dot if it's dotted identifiers wrapped in exp.Column or to exp.Var otherwise 2228 if isinstance(key, exp.Column): 2229 key = key.to_dot() if len(key.parts) > 1 else exp.var(key.name) 2230 2231 value = self._parse_bitwise() or self._parse_var(any_token=True) 2232 2233 # Transform the value to exp.Var if it was parsed as exp.Column(exp.Identifier()) 2234 if isinstance(value, exp.Column): 2235 value = exp.var(value.name) 2236 2237 return self.expression(exp.Property, this=key, value=value) 2238 2239 def _parse_stored(self) -> t.Union[exp.FileFormatProperty, exp.StorageHandlerProperty]: 2240 if self._match_text_seq("BY"): 2241 return self.expression(exp.StorageHandlerProperty, this=self._parse_var_or_string()) 2242 2243 self._match(TokenType.ALIAS) 2244 input_format = self._parse_string() if self._match_text_seq("INPUTFORMAT") else None 2245 output_format = self._parse_string() if self._match_text_seq("OUTPUTFORMAT") else None 2246 2247 return self.expression( 2248 exp.FileFormatProperty, 2249 this=( 2250 self.expression( 2251 exp.InputOutputFormat, 2252 input_format=input_format, 2253 output_format=output_format, 2254 ) 2255 if input_format or output_format 2256 else self._parse_var_or_string() or self._parse_number() or self._parse_id_var() 2257 ), 2258 hive_format=True, 2259 ) 2260 2261 def _parse_unquoted_field(self) -> t.Optional[exp.Expression]: 2262 field = self._parse_field() 2263 if isinstance(field, exp.Identifier) and not field.quoted: 2264 field = exp.var(field) 2265 2266 return field 2267 2268 def _parse_property_assignment(self, exp_class: t.Type[E], **kwargs: t.Any) -> E: 2269 self._match(TokenType.EQ) 2270 self._match(TokenType.ALIAS) 2271 2272 return self.expression(exp_class, this=self._parse_unquoted_field(), **kwargs) 2273 2274 def _parse_properties(self, before: t.Optional[bool] = None) -> t.Optional[exp.Properties]: 2275 properties = [] 2276 while True: 2277 if before: 2278 prop = self._parse_property_before() 2279 else: 2280 prop = self._parse_property() 2281 if not prop: 2282 break 2283 for p in ensure_list(prop): 2284 properties.append(p) 2285 2286 if properties: 2287 return self.expression(exp.Properties, expressions=properties) 2288 2289 return None 2290 2291 def _parse_fallback(self, no: bool = False) -> exp.FallbackProperty: 2292 return self.expression( 2293 exp.FallbackProperty, no=no, protection=self._match_text_seq("PROTECTION") 2294 ) 2295 2296 def _parse_security(self) -> t.Optional[exp.SecurityProperty]: 2297 if self._match_texts(("NONE", "DEFINER", "INVOKER")): 2298 security_specifier = self._prev.text.upper() 2299 return self.expression(exp.SecurityProperty, this=security_specifier) 2300 return None 2301 2302 def _parse_settings_property(self) -> exp.SettingsProperty: 2303 return self.expression( 2304 exp.SettingsProperty, expressions=self._parse_csv(self._parse_assignment) 2305 ) 2306 2307 def _parse_volatile_property(self) -> exp.VolatileProperty | exp.StabilityProperty: 2308 if self._index >= 2: 2309 pre_volatile_token = self._tokens[self._index - 2] 2310 else: 2311 pre_volatile_token = None 2312 2313 if pre_volatile_token and pre_volatile_token.token_type in self.PRE_VOLATILE_TOKENS: 2314 return exp.VolatileProperty() 2315 2316 return self.expression(exp.StabilityProperty, this=exp.Literal.string("VOLATILE")) 2317 2318 def _parse_retention_period(self) -> exp.Var: 2319 # Parse TSQL's HISTORY_RETENTION_PERIOD: {INFINITE | <number> DAY | DAYS | MONTH ...} 2320 number = self._parse_number() 2321 number_str = f"{number} " if number else "" 2322 unit = self._parse_var(any_token=True) 2323 return exp.var(f"{number_str}{unit}") 2324 2325 def _parse_system_versioning_property( 2326 self, with_: bool = False 2327 ) -> exp.WithSystemVersioningProperty: 2328 self._match(TokenType.EQ) 2329 prop = self.expression( 2330 exp.WithSystemVersioningProperty, 2331 **{ # type: ignore 2332 "on": True, 2333 "with": with_, 2334 }, 2335 ) 2336 2337 if self._match_text_seq("OFF"): 2338 prop.set("on", False) 2339 return prop 2340 2341 self._match(TokenType.ON) 2342 if self._match(TokenType.L_PAREN): 2343 while self._curr and not self._match(TokenType.R_PAREN): 2344 if self._match_text_seq("HISTORY_TABLE", "="): 2345 prop.set("this", self._parse_table_parts()) 2346 elif self._match_text_seq("DATA_CONSISTENCY_CHECK", "="): 2347 prop.set("data_consistency", self._advance_any() and self._prev.text.upper()) 2348 elif self._match_text_seq("HISTORY_RETENTION_PERIOD", "="): 2349 prop.set("retention_period", self._parse_retention_period()) 2350 2351 self._match(TokenType.COMMA) 2352 2353 return prop 2354 2355 def _parse_data_deletion_property(self) -> exp.DataDeletionProperty: 2356 self._match(TokenType.EQ) 2357 on = self._match_text_seq("ON") or not self._match_text_seq("OFF") 2358 prop = self.expression(exp.DataDeletionProperty, on=on) 2359 2360 if self._match(TokenType.L_PAREN): 2361 while self._curr and not self._match(TokenType.R_PAREN): 2362 if self._match_text_seq("FILTER_COLUMN", "="): 2363 prop.set("filter_column", self._parse_column()) 2364 elif self._match_text_seq("RETENTION_PERIOD", "="): 2365 prop.set("retention_period", self._parse_retention_period()) 2366 2367 self._match(TokenType.COMMA) 2368 2369 return prop 2370 2371 def _parse_distributed_property(self) -> exp.DistributedByProperty: 2372 kind = "HASH" 2373 expressions: t.Optional[t.List[exp.Expression]] = None 2374 if self._match_text_seq("BY", "HASH"): 2375 expressions = self._parse_wrapped_csv(self._parse_id_var) 2376 elif self._match_text_seq("BY", "RANDOM"): 2377 kind = "RANDOM" 2378 2379 # If the BUCKETS keyword is not present, the number of buckets is AUTO 2380 buckets: t.Optional[exp.Expression] = None 2381 if self._match_text_seq("BUCKETS") and not self._match_text_seq("AUTO"): 2382 buckets = self._parse_number() 2383 2384 return self.expression( 2385 exp.DistributedByProperty, 2386 expressions=expressions, 2387 kind=kind, 2388 buckets=buckets, 2389 order=self._parse_order(), 2390 ) 2391 2392 def _parse_composite_key_property(self, expr_type: t.Type[E]) -> E: 2393 self._match_text_seq("KEY") 2394 expressions = self._parse_wrapped_id_vars() 2395 return self.expression(expr_type, expressions=expressions) 2396 2397 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 2398 if self._match_text_seq("(", "SYSTEM_VERSIONING"): 2399 prop = self._parse_system_versioning_property(with_=True) 2400 self._match_r_paren() 2401 return prop 2402 2403 if self._match(TokenType.L_PAREN, advance=False): 2404 return self._parse_wrapped_properties() 2405 2406 if self._match_text_seq("JOURNAL"): 2407 return self._parse_withjournaltable() 2408 2409 if self._match_texts(self.VIEW_ATTRIBUTES): 2410 return self.expression(exp.ViewAttributeProperty, this=self._prev.text.upper()) 2411 2412 if self._match_text_seq("DATA"): 2413 return self._parse_withdata(no=False) 2414 elif self._match_text_seq("NO", "DATA"): 2415 return self._parse_withdata(no=True) 2416 2417 if self._match(TokenType.SERDE_PROPERTIES, advance=False): 2418 return self._parse_serde_properties(with_=True) 2419 2420 if self._match(TokenType.SCHEMA): 2421 return self.expression( 2422 exp.WithSchemaBindingProperty, 2423 this=self._parse_var_from_options(self.SCHEMA_BINDING_OPTIONS), 2424 ) 2425 2426 if self._match_texts(self.PROCEDURE_OPTIONS, advance=False): 2427 return self.expression( 2428 exp.WithProcedureOptions, expressions=self._parse_csv(self._parse_procedure_option) 2429 ) 2430 2431 if not self._next: 2432 return None 2433 2434 return self._parse_withisolatedloading() 2435 2436 def _parse_procedure_option(self) -> exp.Expression | None: 2437 if self._match_text_seq("EXECUTE", "AS"): 2438 return self.expression( 2439 exp.ExecuteAsProperty, 2440 this=self._parse_var_from_options(self.EXECUTE_AS_OPTIONS, raise_unmatched=False) 2441 or self._parse_string(), 2442 ) 2443 2444 return self._parse_var_from_options(self.PROCEDURE_OPTIONS) 2445 2446 # https://dev.mysql.com/doc/refman/8.0/en/create-view.html 2447 def _parse_definer(self) -> t.Optional[exp.DefinerProperty]: 2448 self._match(TokenType.EQ) 2449 2450 user = self._parse_id_var() 2451 self._match(TokenType.PARAMETER) 2452 host = self._parse_id_var() or (self._match(TokenType.MOD) and self._prev.text) 2453 2454 if not user or not host: 2455 return None 2456 2457 return exp.DefinerProperty(this=f"{user}@{host}") 2458 2459 def _parse_withjournaltable(self) -> exp.WithJournalTableProperty: 2460 self._match(TokenType.TABLE) 2461 self._match(TokenType.EQ) 2462 return self.expression(exp.WithJournalTableProperty, this=self._parse_table_parts()) 2463 2464 def _parse_log(self, no: bool = False) -> exp.LogProperty: 2465 return self.expression(exp.LogProperty, no=no) 2466 2467 def _parse_journal(self, **kwargs) -> exp.JournalProperty: 2468 return self.expression(exp.JournalProperty, **kwargs) 2469 2470 def _parse_checksum(self) -> exp.ChecksumProperty: 2471 self._match(TokenType.EQ) 2472 2473 on = None 2474 if self._match(TokenType.ON): 2475 on = True 2476 elif self._match_text_seq("OFF"): 2477 on = False 2478 2479 return self.expression(exp.ChecksumProperty, on=on, default=self._match(TokenType.DEFAULT)) 2480 2481 def _parse_cluster(self, wrapped: bool = False) -> exp.Cluster: 2482 return self.expression( 2483 exp.Cluster, 2484 expressions=( 2485 self._parse_wrapped_csv(self._parse_ordered) 2486 if wrapped 2487 else self._parse_csv(self._parse_ordered) 2488 ), 2489 ) 2490 2491 def _parse_clustered_by(self) -> exp.ClusteredByProperty: 2492 self._match_text_seq("BY") 2493 2494 self._match_l_paren() 2495 expressions = self._parse_csv(self._parse_column) 2496 self._match_r_paren() 2497 2498 if self._match_text_seq("SORTED", "BY"): 2499 self._match_l_paren() 2500 sorted_by = self._parse_csv(self._parse_ordered) 2501 self._match_r_paren() 2502 else: 2503 sorted_by = None 2504 2505 self._match(TokenType.INTO) 2506 buckets = self._parse_number() 2507 self._match_text_seq("BUCKETS") 2508 2509 return self.expression( 2510 exp.ClusteredByProperty, 2511 expressions=expressions, 2512 sorted_by=sorted_by, 2513 buckets=buckets, 2514 ) 2515 2516 def _parse_copy_property(self) -> t.Optional[exp.CopyGrantsProperty]: 2517 if not self._match_text_seq("GRANTS"): 2518 self._retreat(self._index - 1) 2519 return None 2520 2521 return self.expression(exp.CopyGrantsProperty) 2522 2523 def _parse_freespace(self) -> exp.FreespaceProperty: 2524 self._match(TokenType.EQ) 2525 return self.expression( 2526 exp.FreespaceProperty, this=self._parse_number(), percent=self._match(TokenType.PERCENT) 2527 ) 2528 2529 def _parse_mergeblockratio( 2530 self, no: bool = False, default: bool = False 2531 ) -> exp.MergeBlockRatioProperty: 2532 if self._match(TokenType.EQ): 2533 return self.expression( 2534 exp.MergeBlockRatioProperty, 2535 this=self._parse_number(), 2536 percent=self._match(TokenType.PERCENT), 2537 ) 2538 2539 return self.expression(exp.MergeBlockRatioProperty, no=no, default=default) 2540 2541 def _parse_datablocksize( 2542 self, 2543 default: t.Optional[bool] = None, 2544 minimum: t.Optional[bool] = None, 2545 maximum: t.Optional[bool] = None, 2546 ) -> exp.DataBlocksizeProperty: 2547 self._match(TokenType.EQ) 2548 size = self._parse_number() 2549 2550 units = None 2551 if self._match_texts(("BYTES", "KBYTES", "KILOBYTES")): 2552 units = self._prev.text 2553 2554 return self.expression( 2555 exp.DataBlocksizeProperty, 2556 size=size, 2557 units=units, 2558 default=default, 2559 minimum=minimum, 2560 maximum=maximum, 2561 ) 2562 2563 def _parse_blockcompression(self) -> exp.BlockCompressionProperty: 2564 self._match(TokenType.EQ) 2565 always = self._match_text_seq("ALWAYS") 2566 manual = self._match_text_seq("MANUAL") 2567 never = self._match_text_seq("NEVER") 2568 default = self._match_text_seq("DEFAULT") 2569 2570 autotemp = None 2571 if self._match_text_seq("AUTOTEMP"): 2572 autotemp = self._parse_schema() 2573 2574 return self.expression( 2575 exp.BlockCompressionProperty, 2576 always=always, 2577 manual=manual, 2578 never=never, 2579 default=default, 2580 autotemp=autotemp, 2581 ) 2582 2583 def _parse_withisolatedloading(self) -> t.Optional[exp.IsolatedLoadingProperty]: 2584 index = self._index 2585 no = self._match_text_seq("NO") 2586 concurrent = self._match_text_seq("CONCURRENT") 2587 2588 if not self._match_text_seq("ISOLATED", "LOADING"): 2589 self._retreat(index) 2590 return None 2591 2592 target = self._parse_var_from_options(self.ISOLATED_LOADING_OPTIONS, raise_unmatched=False) 2593 return self.expression( 2594 exp.IsolatedLoadingProperty, no=no, concurrent=concurrent, target=target 2595 ) 2596 2597 def _parse_locking(self) -> exp.LockingProperty: 2598 if self._match(TokenType.TABLE): 2599 kind = "TABLE" 2600 elif self._match(TokenType.VIEW): 2601 kind = "VIEW" 2602 elif self._match(TokenType.ROW): 2603 kind = "ROW" 2604 elif self._match_text_seq("DATABASE"): 2605 kind = "DATABASE" 2606 else: 2607 kind = None 2608 2609 if kind in ("DATABASE", "TABLE", "VIEW"): 2610 this = self._parse_table_parts() 2611 else: 2612 this = None 2613 2614 if self._match(TokenType.FOR): 2615 for_or_in = "FOR" 2616 elif self._match(TokenType.IN): 2617 for_or_in = "IN" 2618 else: 2619 for_or_in = None 2620 2621 if self._match_text_seq("ACCESS"): 2622 lock_type = "ACCESS" 2623 elif self._match_texts(("EXCL", "EXCLUSIVE")): 2624 lock_type = "EXCLUSIVE" 2625 elif self._match_text_seq("SHARE"): 2626 lock_type = "SHARE" 2627 elif self._match_text_seq("READ"): 2628 lock_type = "READ" 2629 elif self._match_text_seq("WRITE"): 2630 lock_type = "WRITE" 2631 elif self._match_text_seq("CHECKSUM"): 2632 lock_type = "CHECKSUM" 2633 else: 2634 lock_type = None 2635 2636 override = self._match_text_seq("OVERRIDE") 2637 2638 return self.expression( 2639 exp.LockingProperty, 2640 this=this, 2641 kind=kind, 2642 for_or_in=for_or_in, 2643 lock_type=lock_type, 2644 override=override, 2645 ) 2646 2647 def _parse_partition_by(self) -> t.List[exp.Expression]: 2648 if self._match(TokenType.PARTITION_BY): 2649 return self._parse_csv(self._parse_assignment) 2650 return [] 2651 2652 def _parse_partition_bound_spec(self) -> exp.PartitionBoundSpec: 2653 def _parse_partition_bound_expr() -> t.Optional[exp.Expression]: 2654 if self._match_text_seq("MINVALUE"): 2655 return exp.var("MINVALUE") 2656 if self._match_text_seq("MAXVALUE"): 2657 return exp.var("MAXVALUE") 2658 return self._parse_bitwise() 2659 2660 this: t.Optional[exp.Expression | t.List[exp.Expression]] = None 2661 expression = None 2662 from_expressions = None 2663 to_expressions = None 2664 2665 if self._match(TokenType.IN): 2666 this = self._parse_wrapped_csv(self._parse_bitwise) 2667 elif self._match(TokenType.FROM): 2668 from_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2669 self._match_text_seq("TO") 2670 to_expressions = self._parse_wrapped_csv(_parse_partition_bound_expr) 2671 elif self._match_text_seq("WITH", "(", "MODULUS"): 2672 this = self._parse_number() 2673 self._match_text_seq(",", "REMAINDER") 2674 expression = self._parse_number() 2675 self._match_r_paren() 2676 else: 2677 self.raise_error("Failed to parse partition bound spec.") 2678 2679 return self.expression( 2680 exp.PartitionBoundSpec, 2681 this=this, 2682 expression=expression, 2683 from_expressions=from_expressions, 2684 to_expressions=to_expressions, 2685 ) 2686 2687 # https://www.postgresql.org/docs/current/sql-createtable.html 2688 def _parse_partitioned_of(self) -> t.Optional[exp.PartitionedOfProperty]: 2689 if not self._match_text_seq("OF"): 2690 self._retreat(self._index - 1) 2691 return None 2692 2693 this = self._parse_table(schema=True) 2694 2695 if self._match(TokenType.DEFAULT): 2696 expression: exp.Var | exp.PartitionBoundSpec = exp.var("DEFAULT") 2697 elif self._match_text_seq("FOR", "VALUES"): 2698 expression = self._parse_partition_bound_spec() 2699 else: 2700 self.raise_error("Expecting either DEFAULT or FOR VALUES clause.") 2701 2702 return self.expression(exp.PartitionedOfProperty, this=this, expression=expression) 2703 2704 def _parse_partitioned_by(self) -> exp.PartitionedByProperty: 2705 self._match(TokenType.EQ) 2706 return self.expression( 2707 exp.PartitionedByProperty, 2708 this=self._parse_schema() or self._parse_bracket(self._parse_field()), 2709 ) 2710 2711 def _parse_withdata(self, no: bool = False) -> exp.WithDataProperty: 2712 if self._match_text_seq("AND", "STATISTICS"): 2713 statistics = True 2714 elif self._match_text_seq("AND", "NO", "STATISTICS"): 2715 statistics = False 2716 else: 2717 statistics = None 2718 2719 return self.expression(exp.WithDataProperty, no=no, statistics=statistics) 2720 2721 def _parse_contains_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2722 if self._match_text_seq("SQL"): 2723 return self.expression(exp.SqlReadWriteProperty, this="CONTAINS SQL") 2724 return None 2725 2726 def _parse_modifies_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2727 if self._match_text_seq("SQL", "DATA"): 2728 return self.expression(exp.SqlReadWriteProperty, this="MODIFIES SQL DATA") 2729 return None 2730 2731 def _parse_no_property(self) -> t.Optional[exp.Expression]: 2732 if self._match_text_seq("PRIMARY", "INDEX"): 2733 return exp.NoPrimaryIndexProperty() 2734 if self._match_text_seq("SQL"): 2735 return self.expression(exp.SqlReadWriteProperty, this="NO SQL") 2736 return None 2737 2738 def _parse_on_property(self) -> t.Optional[exp.Expression]: 2739 if self._match_text_seq("COMMIT", "PRESERVE", "ROWS"): 2740 return exp.OnCommitProperty() 2741 if self._match_text_seq("COMMIT", "DELETE", "ROWS"): 2742 return exp.OnCommitProperty(delete=True) 2743 return self.expression(exp.OnProperty, this=self._parse_schema(self._parse_id_var())) 2744 2745 def _parse_reads_property(self) -> t.Optional[exp.SqlReadWriteProperty]: 2746 if self._match_text_seq("SQL", "DATA"): 2747 return self.expression(exp.SqlReadWriteProperty, this="READS SQL DATA") 2748 return None 2749 2750 def _parse_distkey(self) -> exp.DistKeyProperty: 2751 return self.expression(exp.DistKeyProperty, this=self._parse_wrapped(self._parse_id_var)) 2752 2753 def _parse_create_like(self) -> t.Optional[exp.LikeProperty]: 2754 table = self._parse_table(schema=True) 2755 2756 options = [] 2757 while self._match_texts(("INCLUDING", "EXCLUDING")): 2758 this = self._prev.text.upper() 2759 2760 id_var = self._parse_id_var() 2761 if not id_var: 2762 return None 2763 2764 options.append( 2765 self.expression(exp.Property, this=this, value=exp.var(id_var.this.upper())) 2766 ) 2767 2768 return self.expression(exp.LikeProperty, this=table, expressions=options) 2769 2770 def _parse_sortkey(self, compound: bool = False) -> exp.SortKeyProperty: 2771 return self.expression( 2772 exp.SortKeyProperty, this=self._parse_wrapped_id_vars(), compound=compound 2773 ) 2774 2775 def _parse_character_set(self, default: bool = False) -> exp.CharacterSetProperty: 2776 self._match(TokenType.EQ) 2777 return self.expression( 2778 exp.CharacterSetProperty, this=self._parse_var_or_string(), default=default 2779 ) 2780 2781 def _parse_remote_with_connection(self) -> exp.RemoteWithConnectionModelProperty: 2782 self._match_text_seq("WITH", "CONNECTION") 2783 return self.expression( 2784 exp.RemoteWithConnectionModelProperty, this=self._parse_table_parts() 2785 ) 2786 2787 def _parse_returns(self) -> exp.ReturnsProperty: 2788 value: t.Optional[exp.Expression] 2789 null = None 2790 is_table = self._match(TokenType.TABLE) 2791 2792 if is_table: 2793 if self._match(TokenType.LT): 2794 value = self.expression( 2795 exp.Schema, 2796 this="TABLE", 2797 expressions=self._parse_csv(self._parse_struct_types), 2798 ) 2799 if not self._match(TokenType.GT): 2800 self.raise_error("Expecting >") 2801 else: 2802 value = self._parse_schema(exp.var("TABLE")) 2803 elif self._match_text_seq("NULL", "ON", "NULL", "INPUT"): 2804 null = True 2805 value = None 2806 else: 2807 value = self._parse_types() 2808 2809 return self.expression(exp.ReturnsProperty, this=value, is_table=is_table, null=null) 2810 2811 def _parse_describe(self) -> exp.Describe: 2812 kind = self._match_set(self.CREATABLES) and self._prev.text 2813 style = self._match_texts(self.DESCRIBE_STYLES) and self._prev.text.upper() 2814 if self._match(TokenType.DOT): 2815 style = None 2816 self._retreat(self._index - 2) 2817 2818 format = self._parse_property() if self._match(TokenType.FORMAT, advance=False) else None 2819 2820 if self._match_set(self.STATEMENT_PARSERS, advance=False): 2821 this = self._parse_statement() 2822 else: 2823 this = self._parse_table(schema=True) 2824 2825 properties = self._parse_properties() 2826 expressions = properties.expressions if properties else None 2827 partition = self._parse_partition() 2828 return self.expression( 2829 exp.Describe, 2830 this=this, 2831 style=style, 2832 kind=kind, 2833 expressions=expressions, 2834 partition=partition, 2835 format=format, 2836 ) 2837 2838 def _parse_multitable_inserts(self, comments: t.Optional[t.List[str]]) -> exp.MultitableInserts: 2839 kind = self._prev.text.upper() 2840 expressions = [] 2841 2842 def parse_conditional_insert() -> t.Optional[exp.ConditionalInsert]: 2843 if self._match(TokenType.WHEN): 2844 expression = self._parse_disjunction() 2845 self._match(TokenType.THEN) 2846 else: 2847 expression = None 2848 2849 else_ = self._match(TokenType.ELSE) 2850 2851 if not self._match(TokenType.INTO): 2852 return None 2853 2854 return self.expression( 2855 exp.ConditionalInsert, 2856 this=self.expression( 2857 exp.Insert, 2858 this=self._parse_table(schema=True), 2859 expression=self._parse_derived_table_values(), 2860 ), 2861 expression=expression, 2862 else_=else_, 2863 ) 2864 2865 expression = parse_conditional_insert() 2866 while expression is not None: 2867 expressions.append(expression) 2868 expression = parse_conditional_insert() 2869 2870 return self.expression( 2871 exp.MultitableInserts, 2872 kind=kind, 2873 comments=comments, 2874 expressions=expressions, 2875 source=self._parse_table(), 2876 ) 2877 2878 def _parse_insert(self) -> t.Union[exp.Insert, exp.MultitableInserts]: 2879 comments = [] 2880 hint = self._parse_hint() 2881 overwrite = self._match(TokenType.OVERWRITE) 2882 ignore = self._match(TokenType.IGNORE) 2883 local = self._match_text_seq("LOCAL") 2884 alternative = None 2885 is_function = None 2886 2887 if self._match_text_seq("DIRECTORY"): 2888 this: t.Optional[exp.Expression] = self.expression( 2889 exp.Directory, 2890 this=self._parse_var_or_string(), 2891 local=local, 2892 row_format=self._parse_row_format(match_row=True), 2893 ) 2894 else: 2895 if self._match_set((TokenType.FIRST, TokenType.ALL)): 2896 comments += ensure_list(self._prev_comments) 2897 return self._parse_multitable_inserts(comments) 2898 2899 if self._match(TokenType.OR): 2900 alternative = self._match_texts(self.INSERT_ALTERNATIVES) and self._prev.text 2901 2902 self._match(TokenType.INTO) 2903 comments += ensure_list(self._prev_comments) 2904 self._match(TokenType.TABLE) 2905 is_function = self._match(TokenType.FUNCTION) 2906 2907 this = ( 2908 self._parse_table(schema=True, parse_partition=True) 2909 if not is_function 2910 else self._parse_function() 2911 ) 2912 if isinstance(this, exp.Table) and self._match(TokenType.ALIAS, advance=False): 2913 this.set("alias", self._parse_table_alias()) 2914 2915 returning = self._parse_returning() 2916 2917 return self.expression( 2918 exp.Insert, 2919 comments=comments, 2920 hint=hint, 2921 is_function=is_function, 2922 this=this, 2923 stored=self._match_text_seq("STORED") and self._parse_stored(), 2924 by_name=self._match_text_seq("BY", "NAME"), 2925 exists=self._parse_exists(), 2926 where=self._match_pair(TokenType.REPLACE, TokenType.WHERE) and self._parse_assignment(), 2927 partition=self._match(TokenType.PARTITION_BY) and self._parse_partitioned_by(), 2928 settings=self._match_text_seq("SETTINGS") and self._parse_settings_property(), 2929 expression=self._parse_derived_table_values() or self._parse_ddl_select(), 2930 conflict=self._parse_on_conflict(), 2931 returning=returning or self._parse_returning(), 2932 overwrite=overwrite, 2933 alternative=alternative, 2934 ignore=ignore, 2935 source=self._match(TokenType.TABLE) and self._parse_table(), 2936 ) 2937 2938 def _parse_kill(self) -> exp.Kill: 2939 kind = exp.var(self._prev.text) if self._match_texts(("CONNECTION", "QUERY")) else None 2940 2941 return self.expression( 2942 exp.Kill, 2943 this=self._parse_primary(), 2944 kind=kind, 2945 ) 2946 2947 def _parse_on_conflict(self) -> t.Optional[exp.OnConflict]: 2948 conflict = self._match_text_seq("ON", "CONFLICT") 2949 duplicate = self._match_text_seq("ON", "DUPLICATE", "KEY") 2950 2951 if not conflict and not duplicate: 2952 return None 2953 2954 conflict_keys = None 2955 constraint = None 2956 2957 if conflict: 2958 if self._match_text_seq("ON", "CONSTRAINT"): 2959 constraint = self._parse_id_var() 2960 elif self._match(TokenType.L_PAREN): 2961 conflict_keys = self._parse_csv(self._parse_id_var) 2962 self._match_r_paren() 2963 2964 action = self._parse_var_from_options(self.CONFLICT_ACTIONS) 2965 if self._prev.token_type == TokenType.UPDATE: 2966 self._match(TokenType.SET) 2967 expressions = self._parse_csv(self._parse_equality) 2968 else: 2969 expressions = None 2970 2971 return self.expression( 2972 exp.OnConflict, 2973 duplicate=duplicate, 2974 expressions=expressions, 2975 action=action, 2976 conflict_keys=conflict_keys, 2977 constraint=constraint, 2978 where=self._parse_where(), 2979 ) 2980 2981 def _parse_returning(self) -> t.Optional[exp.Returning]: 2982 if not self._match(TokenType.RETURNING): 2983 return None 2984 return self.expression( 2985 exp.Returning, 2986 expressions=self._parse_csv(self._parse_expression), 2987 into=self._match(TokenType.INTO) and self._parse_table_part(), 2988 ) 2989 2990 def _parse_row(self) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 2991 if not self._match(TokenType.FORMAT): 2992 return None 2993 return self._parse_row_format() 2994 2995 def _parse_serde_properties(self, with_: bool = False) -> t.Optional[exp.SerdeProperties]: 2996 index = self._index 2997 with_ = with_ or self._match_text_seq("WITH") 2998 2999 if not self._match(TokenType.SERDE_PROPERTIES): 3000 self._retreat(index) 3001 return None 3002 return self.expression( 3003 exp.SerdeProperties, 3004 **{ # type: ignore 3005 "expressions": self._parse_wrapped_properties(), 3006 "with": with_, 3007 }, 3008 ) 3009 3010 def _parse_row_format( 3011 self, match_row: bool = False 3012 ) -> t.Optional[exp.RowFormatSerdeProperty | exp.RowFormatDelimitedProperty]: 3013 if match_row and not self._match_pair(TokenType.ROW, TokenType.FORMAT): 3014 return None 3015 3016 if self._match_text_seq("SERDE"): 3017 this = self._parse_string() 3018 3019 serde_properties = self._parse_serde_properties() 3020 3021 return self.expression( 3022 exp.RowFormatSerdeProperty, this=this, serde_properties=serde_properties 3023 ) 3024 3025 self._match_text_seq("DELIMITED") 3026 3027 kwargs = {} 3028 3029 if self._match_text_seq("FIELDS", "TERMINATED", "BY"): 3030 kwargs["fields"] = self._parse_string() 3031 if self._match_text_seq("ESCAPED", "BY"): 3032 kwargs["escaped"] = self._parse_string() 3033 if self._match_text_seq("COLLECTION", "ITEMS", "TERMINATED", "BY"): 3034 kwargs["collection_items"] = self._parse_string() 3035 if self._match_text_seq("MAP", "KEYS", "TERMINATED", "BY"): 3036 kwargs["map_keys"] = self._parse_string() 3037 if self._match_text_seq("LINES", "TERMINATED", "BY"): 3038 kwargs["lines"] = self._parse_string() 3039 if self._match_text_seq("NULL", "DEFINED", "AS"): 3040 kwargs["null"] = self._parse_string() 3041 3042 return self.expression(exp.RowFormatDelimitedProperty, **kwargs) # type: ignore 3043 3044 def _parse_load(self) -> exp.LoadData | exp.Command: 3045 if self._match_text_seq("DATA"): 3046 local = self._match_text_seq("LOCAL") 3047 self._match_text_seq("INPATH") 3048 inpath = self._parse_string() 3049 overwrite = self._match(TokenType.OVERWRITE) 3050 self._match_pair(TokenType.INTO, TokenType.TABLE) 3051 3052 return self.expression( 3053 exp.LoadData, 3054 this=self._parse_table(schema=True), 3055 local=local, 3056 overwrite=overwrite, 3057 inpath=inpath, 3058 partition=self._parse_partition(), 3059 input_format=self._match_text_seq("INPUTFORMAT") and self._parse_string(), 3060 serde=self._match_text_seq("SERDE") and self._parse_string(), 3061 ) 3062 return self._parse_as_command(self._prev) 3063 3064 def _parse_delete(self) -> exp.Delete: 3065 # This handles MySQL's "Multiple-Table Syntax" 3066 # https://dev.mysql.com/doc/refman/8.0/en/delete.html 3067 tables = None 3068 if not self._match(TokenType.FROM, advance=False): 3069 tables = self._parse_csv(self._parse_table) or None 3070 3071 returning = self._parse_returning() 3072 3073 return self.expression( 3074 exp.Delete, 3075 tables=tables, 3076 this=self._match(TokenType.FROM) and self._parse_table(joins=True), 3077 using=self._match(TokenType.USING) and self._parse_table(joins=True), 3078 cluster=self._match(TokenType.ON) and self._parse_on_property(), 3079 where=self._parse_where(), 3080 returning=returning or self._parse_returning(), 3081 limit=self._parse_limit(), 3082 ) 3083 3084 def _parse_update(self) -> exp.Update: 3085 this = self._parse_table(joins=True, alias_tokens=self.UPDATE_ALIAS_TOKENS) 3086 expressions = self._match(TokenType.SET) and self._parse_csv(self._parse_equality) 3087 returning = self._parse_returning() 3088 return self.expression( 3089 exp.Update, 3090 **{ # type: ignore 3091 "this": this, 3092 "expressions": expressions, 3093 "from": self._parse_from(joins=True), 3094 "where": self._parse_where(), 3095 "returning": returning or self._parse_returning(), 3096 "order": self._parse_order(), 3097 "limit": self._parse_limit(), 3098 }, 3099 ) 3100 3101 def _parse_use(self) -> exp.Use: 3102 return self.expression( 3103 exp.Use, 3104 kind=self._parse_var_from_options(self.USABLES, raise_unmatched=False), 3105 this=self._parse_table(schema=False), 3106 ) 3107 3108 def _parse_uncache(self) -> exp.Uncache: 3109 if not self._match(TokenType.TABLE): 3110 self.raise_error("Expecting TABLE after UNCACHE") 3111 3112 return self.expression( 3113 exp.Uncache, exists=self._parse_exists(), this=self._parse_table(schema=True) 3114 ) 3115 3116 def _parse_cache(self) -> exp.Cache: 3117 lazy = self._match_text_seq("LAZY") 3118 self._match(TokenType.TABLE) 3119 table = self._parse_table(schema=True) 3120 3121 options = [] 3122 if self._match_text_seq("OPTIONS"): 3123 self._match_l_paren() 3124 k = self._parse_string() 3125 self._match(TokenType.EQ) 3126 v = self._parse_string() 3127 options = [k, v] 3128 self._match_r_paren() 3129 3130 self._match(TokenType.ALIAS) 3131 return self.expression( 3132 exp.Cache, 3133 this=table, 3134 lazy=lazy, 3135 options=options, 3136 expression=self._parse_select(nested=True), 3137 ) 3138 3139 def _parse_partition(self) -> t.Optional[exp.Partition]: 3140 if not self._match_texts(self.PARTITION_KEYWORDS): 3141 return None 3142 3143 return self.expression( 3144 exp.Partition, 3145 subpartition=self._prev.text.upper() == "SUBPARTITION", 3146 expressions=self._parse_wrapped_csv(self._parse_assignment), 3147 ) 3148 3149 def _parse_value(self, values: bool = True) -> t.Optional[exp.Tuple]: 3150 def _parse_value_expression() -> t.Optional[exp.Expression]: 3151 if self.dialect.SUPPORTS_VALUES_DEFAULT and self._match(TokenType.DEFAULT): 3152 return exp.var(self._prev.text.upper()) 3153 return self._parse_expression() 3154 3155 if self._match(TokenType.L_PAREN): 3156 expressions = self._parse_csv(_parse_value_expression) 3157 self._match_r_paren() 3158 return self.expression(exp.Tuple, expressions=expressions) 3159 3160 # In some dialects we can have VALUES 1, 2 which results in 1 column & 2 rows. 3161 expression = self._parse_expression() 3162 if expression: 3163 return self.expression(exp.Tuple, expressions=[expression]) 3164 return None 3165 3166 def _parse_projections(self) -> t.List[exp.Expression]: 3167 return self._parse_expressions() 3168 3169 def _parse_wrapped_select(self, table: bool = False) -> t.Optional[exp.Expression]: 3170 if self._match_set((TokenType.PIVOT, TokenType.UNPIVOT)): 3171 this: t.Optional[exp.Expression] = self._parse_simplified_pivot( 3172 is_unpivot=self._prev.token_type == TokenType.UNPIVOT 3173 ) 3174 elif self._match(TokenType.FROM): 3175 from_ = self._parse_from(skip_from_token=True, consume_pipe=True) 3176 # Support parentheses for duckdb FROM-first syntax 3177 select = self._parse_select() 3178 if select: 3179 select.set("from", from_) 3180 this = select 3181 else: 3182 this = exp.select("*").from_(t.cast(exp.From, from_)) 3183 else: 3184 this = ( 3185 self._parse_table(consume_pipe=True) 3186 if table 3187 else self._parse_select(nested=True, parse_set_operation=False) 3188 ) 3189 3190 # Transform exp.Values into a exp.Table to pass through parse_query_modifiers 3191 # in case a modifier (e.g. join) is following 3192 if table and isinstance(this, exp.Values) and this.alias: 3193 alias = this.args["alias"].pop() 3194 this = exp.Table(this=this, alias=alias) 3195 3196 this = self._parse_query_modifiers(self._parse_set_operations(this)) 3197 3198 return this 3199 3200 def _parse_select( 3201 self, 3202 nested: bool = False, 3203 table: bool = False, 3204 parse_subquery_alias: bool = True, 3205 parse_set_operation: bool = True, 3206 consume_pipe: bool = True, 3207 ) -> t.Optional[exp.Expression]: 3208 query = self._parse_select_query( 3209 nested=nested, 3210 table=table, 3211 parse_subquery_alias=parse_subquery_alias, 3212 parse_set_operation=parse_set_operation, 3213 ) 3214 3215 if ( 3216 consume_pipe 3217 and self._match(TokenType.PIPE_GT, advance=False) 3218 and isinstance(query, exp.Query) 3219 ): 3220 query = self._parse_pipe_syntax_query(query) 3221 query = query.subquery(copy=False) if query and table else query 3222 3223 return query 3224 3225 def _parse_select_query( 3226 self, 3227 nested: bool = False, 3228 table: bool = False, 3229 parse_subquery_alias: bool = True, 3230 parse_set_operation: bool = True, 3231 ) -> t.Optional[exp.Expression]: 3232 cte = self._parse_with() 3233 3234 if cte: 3235 this = self._parse_statement() 3236 3237 if not this: 3238 self.raise_error("Failed to parse any statement following CTE") 3239 return cte 3240 3241 if "with" in this.arg_types: 3242 this.set("with", cte) 3243 else: 3244 self.raise_error(f"{this.key} does not support CTE") 3245 this = cte 3246 3247 return this 3248 3249 # duckdb supports leading with FROM x 3250 from_ = ( 3251 self._parse_from(consume_pipe=True) 3252 if self._match(TokenType.FROM, advance=False) 3253 else None 3254 ) 3255 3256 if self._match(TokenType.SELECT): 3257 comments = self._prev_comments 3258 3259 hint = self._parse_hint() 3260 3261 if self._next and not self._next.token_type == TokenType.DOT: 3262 all_ = self._match(TokenType.ALL) 3263 distinct = self._match_set(self.DISTINCT_TOKENS) 3264 else: 3265 all_, distinct = None, None 3266 3267 kind = ( 3268 self._match(TokenType.ALIAS) 3269 and self._match_texts(("STRUCT", "VALUE")) 3270 and self._prev.text.upper() 3271 ) 3272 3273 if distinct: 3274 distinct = self.expression( 3275 exp.Distinct, 3276 on=self._parse_value(values=False) if self._match(TokenType.ON) else None, 3277 ) 3278 3279 if all_ and distinct: 3280 self.raise_error("Cannot specify both ALL and DISTINCT after SELECT") 3281 3282 operation_modifiers = [] 3283 while self._curr and self._match_texts(self.OPERATION_MODIFIERS): 3284 operation_modifiers.append(exp.var(self._prev.text.upper())) 3285 3286 limit = self._parse_limit(top=True) 3287 projections = self._parse_projections() 3288 3289 this = self.expression( 3290 exp.Select, 3291 kind=kind, 3292 hint=hint, 3293 distinct=distinct, 3294 expressions=projections, 3295 limit=limit, 3296 operation_modifiers=operation_modifiers or None, 3297 ) 3298 this.comments = comments 3299 3300 into = self._parse_into() 3301 if into: 3302 this.set("into", into) 3303 3304 if not from_: 3305 from_ = self._parse_from() 3306 3307 if from_: 3308 this.set("from", from_) 3309 3310 this = self._parse_query_modifiers(this) 3311 elif (table or nested) and self._match(TokenType.L_PAREN): 3312 this = self._parse_wrapped_select(table=table) 3313 3314 # We return early here so that the UNION isn't attached to the subquery by the 3315 # following call to _parse_set_operations, but instead becomes the parent node 3316 self._match_r_paren() 3317 return self._parse_subquery(this, parse_alias=parse_subquery_alias) 3318 elif self._match(TokenType.VALUES, advance=False): 3319 this = self._parse_derived_table_values() 3320 elif from_: 3321 this = exp.select("*").from_(from_.this, copy=False) 3322 elif self._match(TokenType.SUMMARIZE): 3323 table = self._match(TokenType.TABLE) 3324 this = self._parse_select() or self._parse_string() or self._parse_table() 3325 return self.expression(exp.Summarize, this=this, table=table) 3326 elif self._match(TokenType.DESCRIBE): 3327 this = self._parse_describe() 3328 elif self._match_text_seq("STREAM"): 3329 this = self._parse_function() 3330 if this: 3331 this = self.expression(exp.Stream, this=this) 3332 else: 3333 self._retreat(self._index - 1) 3334 else: 3335 this = None 3336 3337 return self._parse_set_operations(this) if parse_set_operation else this 3338 3339 def _parse_recursive_with_search(self) -> t.Optional[exp.RecursiveWithSearch]: 3340 self._match_text_seq("SEARCH") 3341 3342 kind = self._match_texts(self.RECURSIVE_CTE_SEARCH_KIND) and self._prev.text.upper() 3343 3344 if not kind: 3345 return None 3346 3347 self._match_text_seq("FIRST", "BY") 3348 3349 return self.expression( 3350 exp.RecursiveWithSearch, 3351 kind=kind, 3352 this=self._parse_id_var(), 3353 expression=self._match_text_seq("SET") and self._parse_id_var(), 3354 using=self._match_text_seq("USING") and self._parse_id_var(), 3355 ) 3356 3357 def _parse_with(self, skip_with_token: bool = False) -> t.Optional[exp.With]: 3358 if not skip_with_token and not self._match(TokenType.WITH): 3359 return None 3360 3361 comments = self._prev_comments 3362 recursive = self._match(TokenType.RECURSIVE) 3363 3364 last_comments = None 3365 expressions = [] 3366 while True: 3367 cte = self._parse_cte() 3368 if isinstance(cte, exp.CTE): 3369 expressions.append(cte) 3370 if last_comments: 3371 cte.add_comments(last_comments) 3372 3373 if not self._match(TokenType.COMMA) and not self._match(TokenType.WITH): 3374 break 3375 else: 3376 self._match(TokenType.WITH) 3377 3378 last_comments = self._prev_comments 3379 3380 return self.expression( 3381 exp.With, 3382 comments=comments, 3383 expressions=expressions, 3384 recursive=recursive, 3385 search=self._parse_recursive_with_search(), 3386 ) 3387 3388 def _parse_cte(self) -> t.Optional[exp.CTE]: 3389 index = self._index 3390 3391 alias = self._parse_table_alias(self.ID_VAR_TOKENS) 3392 if not alias or not alias.this: 3393 self.raise_error("Expected CTE to have alias") 3394 3395 if not self._match(TokenType.ALIAS) and not self.OPTIONAL_ALIAS_TOKEN_CTE: 3396 self._retreat(index) 3397 return None 3398 3399 comments = self._prev_comments 3400 3401 if self._match_text_seq("NOT", "MATERIALIZED"): 3402 materialized = False 3403 elif self._match_text_seq("MATERIALIZED"): 3404 materialized = True 3405 else: 3406 materialized = None 3407 3408 cte = self.expression( 3409 exp.CTE, 3410 this=self._parse_wrapped(self._parse_statement), 3411 alias=alias, 3412 materialized=materialized, 3413 comments=comments, 3414 ) 3415 3416 values = cte.this 3417 if isinstance(values, exp.Values): 3418 if values.alias: 3419 cte.set("this", exp.select("*").from_(values)) 3420 else: 3421 cte.set("this", exp.select("*").from_(exp.alias_(values, "_values", table=True))) 3422 3423 return cte 3424 3425 def _parse_table_alias( 3426 self, alias_tokens: t.Optional[t.Collection[TokenType]] = None 3427 ) -> t.Optional[exp.TableAlias]: 3428 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 3429 # so this section tries to parse the clause version and if it fails, it treats the token 3430 # as an identifier (alias) 3431 if self._can_parse_limit_or_offset(): 3432 return None 3433 3434 any_token = self._match(TokenType.ALIAS) 3435 alias = ( 3436 self._parse_id_var(any_token=any_token, tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 3437 or self._parse_string_as_identifier() 3438 ) 3439 3440 index = self._index 3441 if self._match(TokenType.L_PAREN): 3442 columns = self._parse_csv(self._parse_function_parameter) 3443 self._match_r_paren() if columns else self._retreat(index) 3444 else: 3445 columns = None 3446 3447 if not alias and not columns: 3448 return None 3449 3450 table_alias = self.expression(exp.TableAlias, this=alias, columns=columns) 3451 3452 # We bubble up comments from the Identifier to the TableAlias 3453 if isinstance(alias, exp.Identifier): 3454 table_alias.add_comments(alias.pop_comments()) 3455 3456 return table_alias 3457 3458 def _parse_subquery( 3459 self, this: t.Optional[exp.Expression], parse_alias: bool = True 3460 ) -> t.Optional[exp.Subquery]: 3461 if not this: 3462 return None 3463 3464 return self.expression( 3465 exp.Subquery, 3466 this=this, 3467 pivots=self._parse_pivots(), 3468 alias=self._parse_table_alias() if parse_alias else None, 3469 sample=self._parse_table_sample(), 3470 ) 3471 3472 def _implicit_unnests_to_explicit(self, this: E) -> E: 3473 from sqlglot.optimizer.normalize_identifiers import normalize_identifiers as _norm 3474 3475 refs = {_norm(this.args["from"].this.copy(), dialect=self.dialect).alias_or_name} 3476 for i, join in enumerate(this.args.get("joins") or []): 3477 table = join.this 3478 normalized_table = table.copy() 3479 normalized_table.meta["maybe_column"] = True 3480 normalized_table = _norm(normalized_table, dialect=self.dialect) 3481 3482 if isinstance(table, exp.Table) and not join.args.get("on"): 3483 if normalized_table.parts[0].name in refs: 3484 table_as_column = table.to_column() 3485 unnest = exp.Unnest(expressions=[table_as_column]) 3486 3487 # Table.to_column creates a parent Alias node that we want to convert to 3488 # a TableAlias and attach to the Unnest, so it matches the parser's output 3489 if isinstance(table.args.get("alias"), exp.TableAlias): 3490 table_as_column.replace(table_as_column.this) 3491 exp.alias_(unnest, None, table=[table.args["alias"].this], copy=False) 3492 3493 table.replace(unnest) 3494 3495 refs.add(normalized_table.alias_or_name) 3496 3497 return this 3498 3499 def _parse_query_modifiers( 3500 self, this: t.Optional[exp.Expression] 3501 ) -> t.Optional[exp.Expression]: 3502 if isinstance(this, self.MODIFIABLES): 3503 for join in self._parse_joins(): 3504 this.append("joins", join) 3505 for lateral in iter(self._parse_lateral, None): 3506 this.append("laterals", lateral) 3507 3508 while True: 3509 if self._match_set(self.QUERY_MODIFIER_PARSERS, advance=False): 3510 parser = self.QUERY_MODIFIER_PARSERS[self._curr.token_type] 3511 key, expression = parser(self) 3512 3513 if expression: 3514 this.set(key, expression) 3515 if key == "limit": 3516 offset = expression.args.pop("offset", None) 3517 3518 if offset: 3519 offset = exp.Offset(expression=offset) 3520 this.set("offset", offset) 3521 3522 limit_by_expressions = expression.expressions 3523 expression.set("expressions", None) 3524 offset.set("expressions", limit_by_expressions) 3525 continue 3526 break 3527 3528 if self.SUPPORTS_IMPLICIT_UNNEST and this and this.args.get("from"): 3529 this = self._implicit_unnests_to_explicit(this) 3530 3531 return this 3532 3533 def _parse_hint_fallback_to_string(self) -> t.Optional[exp.Hint]: 3534 start = self._curr 3535 while self._curr: 3536 self._advance() 3537 3538 end = self._tokens[self._index - 1] 3539 return exp.Hint(expressions=[self._find_sql(start, end)]) 3540 3541 def _parse_hint_function_call(self) -> t.Optional[exp.Expression]: 3542 return self._parse_function_call() 3543 3544 def _parse_hint_body(self) -> t.Optional[exp.Hint]: 3545 start_index = self._index 3546 should_fallback_to_string = False 3547 3548 hints = [] 3549 try: 3550 for hint in iter( 3551 lambda: self._parse_csv( 3552 lambda: self._parse_hint_function_call() or self._parse_var(upper=True), 3553 ), 3554 [], 3555 ): 3556 hints.extend(hint) 3557 except ParseError: 3558 should_fallback_to_string = True 3559 3560 if should_fallback_to_string or self._curr: 3561 self._retreat(start_index) 3562 return self._parse_hint_fallback_to_string() 3563 3564 return self.expression(exp.Hint, expressions=hints) 3565 3566 def _parse_hint(self) -> t.Optional[exp.Hint]: 3567 if self._match(TokenType.HINT) and self._prev_comments: 3568 return exp.maybe_parse(self._prev_comments[0], into=exp.Hint, dialect=self.dialect) 3569 3570 return None 3571 3572 def _parse_into(self) -> t.Optional[exp.Into]: 3573 if not self._match(TokenType.INTO): 3574 return None 3575 3576 temp = self._match(TokenType.TEMPORARY) 3577 unlogged = self._match_text_seq("UNLOGGED") 3578 self._match(TokenType.TABLE) 3579 3580 return self.expression( 3581 exp.Into, this=self._parse_table(schema=True), temporary=temp, unlogged=unlogged 3582 ) 3583 3584 def _parse_from( 3585 self, 3586 joins: bool = False, 3587 skip_from_token: bool = False, 3588 consume_pipe: bool = False, 3589 ) -> t.Optional[exp.From]: 3590 if not skip_from_token and not self._match(TokenType.FROM): 3591 return None 3592 3593 return self.expression( 3594 exp.From, 3595 comments=self._prev_comments, 3596 this=self._parse_table(joins=joins, consume_pipe=consume_pipe), 3597 ) 3598 3599 def _parse_match_recognize_measure(self) -> exp.MatchRecognizeMeasure: 3600 return self.expression( 3601 exp.MatchRecognizeMeasure, 3602 window_frame=self._match_texts(("FINAL", "RUNNING")) and self._prev.text.upper(), 3603 this=self._parse_expression(), 3604 ) 3605 3606 def _parse_match_recognize(self) -> t.Optional[exp.MatchRecognize]: 3607 if not self._match(TokenType.MATCH_RECOGNIZE): 3608 return None 3609 3610 self._match_l_paren() 3611 3612 partition = self._parse_partition_by() 3613 order = self._parse_order() 3614 3615 measures = ( 3616 self._parse_csv(self._parse_match_recognize_measure) 3617 if self._match_text_seq("MEASURES") 3618 else None 3619 ) 3620 3621 if self._match_text_seq("ONE", "ROW", "PER", "MATCH"): 3622 rows = exp.var("ONE ROW PER MATCH") 3623 elif self._match_text_seq("ALL", "ROWS", "PER", "MATCH"): 3624 text = "ALL ROWS PER MATCH" 3625 if self._match_text_seq("SHOW", "EMPTY", "MATCHES"): 3626 text += " SHOW EMPTY MATCHES" 3627 elif self._match_text_seq("OMIT", "EMPTY", "MATCHES"): 3628 text += " OMIT EMPTY MATCHES" 3629 elif self._match_text_seq("WITH", "UNMATCHED", "ROWS"): 3630 text += " WITH UNMATCHED ROWS" 3631 rows = exp.var(text) 3632 else: 3633 rows = None 3634 3635 if self._match_text_seq("AFTER", "MATCH", "SKIP"): 3636 text = "AFTER MATCH SKIP" 3637 if self._match_text_seq("PAST", "LAST", "ROW"): 3638 text += " PAST LAST ROW" 3639 elif self._match_text_seq("TO", "NEXT", "ROW"): 3640 text += " TO NEXT ROW" 3641 elif self._match_text_seq("TO", "FIRST"): 3642 text += f" TO FIRST {self._advance_any().text}" # type: ignore 3643 elif self._match_text_seq("TO", "LAST"): 3644 text += f" TO LAST {self._advance_any().text}" # type: ignore 3645 after = exp.var(text) 3646 else: 3647 after = None 3648 3649 if self._match_text_seq("PATTERN"): 3650 self._match_l_paren() 3651 3652 if not self._curr: 3653 self.raise_error("Expecting )", self._curr) 3654 3655 paren = 1 3656 start = self._curr 3657 3658 while self._curr and paren > 0: 3659 if self._curr.token_type == TokenType.L_PAREN: 3660 paren += 1 3661 if self._curr.token_type == TokenType.R_PAREN: 3662 paren -= 1 3663 3664 end = self._prev 3665 self._advance() 3666 3667 if paren > 0: 3668 self.raise_error("Expecting )", self._curr) 3669 3670 pattern = exp.var(self._find_sql(start, end)) 3671 else: 3672 pattern = None 3673 3674 define = ( 3675 self._parse_csv(self._parse_name_as_expression) 3676 if self._match_text_seq("DEFINE") 3677 else None 3678 ) 3679 3680 self._match_r_paren() 3681 3682 return self.expression( 3683 exp.MatchRecognize, 3684 partition_by=partition, 3685 order=order, 3686 measures=measures, 3687 rows=rows, 3688 after=after, 3689 pattern=pattern, 3690 define=define, 3691 alias=self._parse_table_alias(), 3692 ) 3693 3694 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 3695 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY) 3696 if not cross_apply and self._match_pair(TokenType.OUTER, TokenType.APPLY): 3697 cross_apply = False 3698 3699 if cross_apply is not None: 3700 this = self._parse_select(table=True) 3701 view = None 3702 outer = None 3703 elif self._match(TokenType.LATERAL): 3704 this = self._parse_select(table=True) 3705 view = self._match(TokenType.VIEW) 3706 outer = self._match(TokenType.OUTER) 3707 else: 3708 return None 3709 3710 if not this: 3711 this = ( 3712 self._parse_unnest() 3713 or self._parse_function() 3714 or self._parse_id_var(any_token=False) 3715 ) 3716 3717 while self._match(TokenType.DOT): 3718 this = exp.Dot( 3719 this=this, 3720 expression=self._parse_function() or self._parse_id_var(any_token=False), 3721 ) 3722 3723 ordinality: t.Optional[bool] = None 3724 3725 if view: 3726 table = self._parse_id_var(any_token=False) 3727 columns = self._parse_csv(self._parse_id_var) if self._match(TokenType.ALIAS) else [] 3728 table_alias: t.Optional[exp.TableAlias] = self.expression( 3729 exp.TableAlias, this=table, columns=columns 3730 ) 3731 elif isinstance(this, (exp.Subquery, exp.Unnest)) and this.alias: 3732 # We move the alias from the lateral's child node to the lateral itself 3733 table_alias = this.args["alias"].pop() 3734 else: 3735 ordinality = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 3736 table_alias = self._parse_table_alias() 3737 3738 return self.expression( 3739 exp.Lateral, 3740 this=this, 3741 view=view, 3742 outer=outer, 3743 alias=table_alias, 3744 cross_apply=cross_apply, 3745 ordinality=ordinality, 3746 ) 3747 3748 def _parse_join_parts( 3749 self, 3750 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 3751 return ( 3752 self._match_set(self.JOIN_METHODS) and self._prev, 3753 self._match_set(self.JOIN_SIDES) and self._prev, 3754 self._match_set(self.JOIN_KINDS) and self._prev, 3755 ) 3756 3757 def _parse_using_identifiers(self) -> t.List[exp.Expression]: 3758 def _parse_column_as_identifier() -> t.Optional[exp.Expression]: 3759 this = self._parse_column() 3760 if isinstance(this, exp.Column): 3761 return this.this 3762 return this 3763 3764 return self._parse_wrapped_csv(_parse_column_as_identifier, optional=True) 3765 3766 def _parse_join( 3767 self, skip_join_token: bool = False, parse_bracket: bool = False 3768 ) -> t.Optional[exp.Join]: 3769 if self._match(TokenType.COMMA): 3770 table = self._try_parse(self._parse_table) 3771 cross_join = self.expression(exp.Join, this=table) if table else None 3772 3773 if cross_join and self.JOINS_HAVE_EQUAL_PRECEDENCE: 3774 cross_join.set("kind", "CROSS") 3775 3776 return cross_join 3777 3778 index = self._index 3779 method, side, kind = self._parse_join_parts() 3780 hint = self._prev.text if self._match_texts(self.JOIN_HINTS) else None 3781 join = self._match(TokenType.JOIN) or (kind and kind.token_type == TokenType.STRAIGHT_JOIN) 3782 join_comments = self._prev_comments 3783 3784 if not skip_join_token and not join: 3785 self._retreat(index) 3786 kind = None 3787 method = None 3788 side = None 3789 3790 outer_apply = self._match_pair(TokenType.OUTER, TokenType.APPLY, False) 3791 cross_apply = self._match_pair(TokenType.CROSS, TokenType.APPLY, False) 3792 3793 if not skip_join_token and not join and not outer_apply and not cross_apply: 3794 return None 3795 3796 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table(parse_bracket=parse_bracket)} 3797 if kind and kind.token_type == TokenType.ARRAY and self._match(TokenType.COMMA): 3798 kwargs["expressions"] = self._parse_csv( 3799 lambda: self._parse_table(parse_bracket=parse_bracket) 3800 ) 3801 3802 if method: 3803 kwargs["method"] = method.text 3804 if side: 3805 kwargs["side"] = side.text 3806 if kind: 3807 kwargs["kind"] = kind.text 3808 if hint: 3809 kwargs["hint"] = hint 3810 3811 if self._match(TokenType.MATCH_CONDITION): 3812 kwargs["match_condition"] = self._parse_wrapped(self._parse_comparison) 3813 3814 if self._match(TokenType.ON): 3815 kwargs["on"] = self._parse_assignment() 3816 elif self._match(TokenType.USING): 3817 kwargs["using"] = self._parse_using_identifiers() 3818 elif ( 3819 not (outer_apply or cross_apply) 3820 and not isinstance(kwargs["this"], exp.Unnest) 3821 and not (kind and kind.token_type in (TokenType.CROSS, TokenType.ARRAY)) 3822 ): 3823 index = self._index 3824 joins: t.Optional[list] = list(self._parse_joins()) 3825 3826 if joins and self._match(TokenType.ON): 3827 kwargs["on"] = self._parse_assignment() 3828 elif joins and self._match(TokenType.USING): 3829 kwargs["using"] = self._parse_using_identifiers() 3830 else: 3831 joins = None 3832 self._retreat(index) 3833 3834 kwargs["this"].set("joins", joins if joins else None) 3835 3836 kwargs["pivots"] = self._parse_pivots() 3837 3838 comments = [c for token in (method, side, kind) if token for c in token.comments] 3839 comments = (join_comments or []) + comments 3840 return self.expression(exp.Join, comments=comments, **kwargs) 3841 3842 def _parse_opclass(self) -> t.Optional[exp.Expression]: 3843 this = self._parse_assignment() 3844 3845 if self._match_texts(self.OPCLASS_FOLLOW_KEYWORDS, advance=False): 3846 return this 3847 3848 if not self._match_set(self.OPTYPE_FOLLOW_TOKENS, advance=False): 3849 return self.expression(exp.Opclass, this=this, expression=self._parse_table_parts()) 3850 3851 return this 3852 3853 def _parse_index_params(self) -> exp.IndexParameters: 3854 using = self._parse_var(any_token=True) if self._match(TokenType.USING) else None 3855 3856 if self._match(TokenType.L_PAREN, advance=False): 3857 columns = self._parse_wrapped_csv(self._parse_with_operator) 3858 else: 3859 columns = None 3860 3861 include = self._parse_wrapped_id_vars() if self._match_text_seq("INCLUDE") else None 3862 partition_by = self._parse_partition_by() 3863 with_storage = self._match(TokenType.WITH) and self._parse_wrapped_properties() 3864 tablespace = ( 3865 self._parse_var(any_token=True) 3866 if self._match_text_seq("USING", "INDEX", "TABLESPACE") 3867 else None 3868 ) 3869 where = self._parse_where() 3870 3871 on = self._parse_field() if self._match(TokenType.ON) else None 3872 3873 return self.expression( 3874 exp.IndexParameters, 3875 using=using, 3876 columns=columns, 3877 include=include, 3878 partition_by=partition_by, 3879 where=where, 3880 with_storage=with_storage, 3881 tablespace=tablespace, 3882 on=on, 3883 ) 3884 3885 def _parse_index( 3886 self, index: t.Optional[exp.Expression] = None, anonymous: bool = False 3887 ) -> t.Optional[exp.Index]: 3888 if index or anonymous: 3889 unique = None 3890 primary = None 3891 amp = None 3892 3893 self._match(TokenType.ON) 3894 self._match(TokenType.TABLE) # hive 3895 table = self._parse_table_parts(schema=True) 3896 else: 3897 unique = self._match(TokenType.UNIQUE) 3898 primary = self._match_text_seq("PRIMARY") 3899 amp = self._match_text_seq("AMP") 3900 3901 if not self._match(TokenType.INDEX): 3902 return None 3903 3904 index = self._parse_id_var() 3905 table = None 3906 3907 params = self._parse_index_params() 3908 3909 return self.expression( 3910 exp.Index, 3911 this=index, 3912 table=table, 3913 unique=unique, 3914 primary=primary, 3915 amp=amp, 3916 params=params, 3917 ) 3918 3919 def _parse_table_hints(self) -> t.Optional[t.List[exp.Expression]]: 3920 hints: t.List[exp.Expression] = [] 3921 if self._match_pair(TokenType.WITH, TokenType.L_PAREN): 3922 # https://learn.microsoft.com/en-us/sql/t-sql/queries/hints-transact-sql-table?view=sql-server-ver16 3923 hints.append( 3924 self.expression( 3925 exp.WithTableHint, 3926 expressions=self._parse_csv( 3927 lambda: self._parse_function() or self._parse_var(any_token=True) 3928 ), 3929 ) 3930 ) 3931 self._match_r_paren() 3932 else: 3933 # https://dev.mysql.com/doc/refman/8.0/en/index-hints.html 3934 while self._match_set(self.TABLE_INDEX_HINT_TOKENS): 3935 hint = exp.IndexTableHint(this=self._prev.text.upper()) 3936 3937 self._match_set((TokenType.INDEX, TokenType.KEY)) 3938 if self._match(TokenType.FOR): 3939 hint.set("target", self._advance_any() and self._prev.text.upper()) 3940 3941 hint.set("expressions", self._parse_wrapped_id_vars()) 3942 hints.append(hint) 3943 3944 return hints or None 3945 3946 def _parse_table_part(self, schema: bool = False) -> t.Optional[exp.Expression]: 3947 return ( 3948 (not schema and self._parse_function(optional_parens=False)) 3949 or self._parse_id_var(any_token=False) 3950 or self._parse_string_as_identifier() 3951 or self._parse_placeholder() 3952 ) 3953 3954 def _parse_table_parts( 3955 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 3956 ) -> exp.Table: 3957 catalog = None 3958 db = None 3959 table: t.Optional[exp.Expression | str] = self._parse_table_part(schema=schema) 3960 3961 while self._match(TokenType.DOT): 3962 if catalog: 3963 # This allows nesting the table in arbitrarily many dot expressions if needed 3964 table = self.expression( 3965 exp.Dot, this=table, expression=self._parse_table_part(schema=schema) 3966 ) 3967 else: 3968 catalog = db 3969 db = table 3970 # "" used for tsql FROM a..b case 3971 table = self._parse_table_part(schema=schema) or "" 3972 3973 if ( 3974 wildcard 3975 and self._is_connected() 3976 and (isinstance(table, exp.Identifier) or not table) 3977 and self._match(TokenType.STAR) 3978 ): 3979 if isinstance(table, exp.Identifier): 3980 table.args["this"] += "*" 3981 else: 3982 table = exp.Identifier(this="*") 3983 3984 # We bubble up comments from the Identifier to the Table 3985 comments = table.pop_comments() if isinstance(table, exp.Expression) else None 3986 3987 if is_db_reference: 3988 catalog = db 3989 db = table 3990 table = None 3991 3992 if not table and not is_db_reference: 3993 self.raise_error(f"Expected table name but got {self._curr}") 3994 if not db and is_db_reference: 3995 self.raise_error(f"Expected database name but got {self._curr}") 3996 3997 table = self.expression( 3998 exp.Table, 3999 comments=comments, 4000 this=table, 4001 db=db, 4002 catalog=catalog, 4003 ) 4004 4005 changes = self._parse_changes() 4006 if changes: 4007 table.set("changes", changes) 4008 4009 at_before = self._parse_historical_data() 4010 if at_before: 4011 table.set("when", at_before) 4012 4013 pivots = self._parse_pivots() 4014 if pivots: 4015 table.set("pivots", pivots) 4016 4017 return table 4018 4019 def _parse_table( 4020 self, 4021 schema: bool = False, 4022 joins: bool = False, 4023 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 4024 parse_bracket: bool = False, 4025 is_db_reference: bool = False, 4026 parse_partition: bool = False, 4027 consume_pipe: bool = False, 4028 ) -> t.Optional[exp.Expression]: 4029 lateral = self._parse_lateral() 4030 if lateral: 4031 return lateral 4032 4033 unnest = self._parse_unnest() 4034 if unnest: 4035 return unnest 4036 4037 values = self._parse_derived_table_values() 4038 if values: 4039 return values 4040 4041 subquery = self._parse_select(table=True, consume_pipe=consume_pipe) 4042 if subquery: 4043 if not subquery.args.get("pivots"): 4044 subquery.set("pivots", self._parse_pivots()) 4045 return subquery 4046 4047 bracket = parse_bracket and self._parse_bracket(None) 4048 bracket = self.expression(exp.Table, this=bracket) if bracket else None 4049 4050 rows_from = self._match_text_seq("ROWS", "FROM") and self._parse_wrapped_csv( 4051 self._parse_table 4052 ) 4053 rows_from = self.expression(exp.Table, rows_from=rows_from) if rows_from else None 4054 4055 only = self._match(TokenType.ONLY) 4056 4057 this = t.cast( 4058 exp.Expression, 4059 bracket 4060 or rows_from 4061 or self._parse_bracket( 4062 self._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 4063 ), 4064 ) 4065 4066 if only: 4067 this.set("only", only) 4068 4069 # Postgres supports a wildcard (table) suffix operator, which is a no-op in this context 4070 self._match_text_seq("*") 4071 4072 parse_partition = parse_partition or self.SUPPORTS_PARTITION_SELECTION 4073 if parse_partition and self._match(TokenType.PARTITION, advance=False): 4074 this.set("partition", self._parse_partition()) 4075 4076 if schema: 4077 return self._parse_schema(this=this) 4078 4079 version = self._parse_version() 4080 4081 if version: 4082 this.set("version", version) 4083 4084 if self.dialect.ALIAS_POST_TABLESAMPLE: 4085 this.set("sample", self._parse_table_sample()) 4086 4087 alias = self._parse_table_alias(alias_tokens=alias_tokens or self.TABLE_ALIAS_TOKENS) 4088 if alias: 4089 this.set("alias", alias) 4090 4091 if isinstance(this, exp.Table) and self._match_text_seq("AT"): 4092 return self.expression( 4093 exp.AtIndex, this=this.to_column(copy=False), expression=self._parse_id_var() 4094 ) 4095 4096 this.set("hints", self._parse_table_hints()) 4097 4098 if not this.args.get("pivots"): 4099 this.set("pivots", self._parse_pivots()) 4100 4101 if not self.dialect.ALIAS_POST_TABLESAMPLE: 4102 this.set("sample", self._parse_table_sample()) 4103 4104 if joins: 4105 for join in self._parse_joins(): 4106 this.append("joins", join) 4107 4108 if self._match_pair(TokenType.WITH, TokenType.ORDINALITY): 4109 this.set("ordinality", True) 4110 this.set("alias", self._parse_table_alias()) 4111 4112 return this 4113 4114 def _parse_version(self) -> t.Optional[exp.Version]: 4115 if self._match(TokenType.TIMESTAMP_SNAPSHOT): 4116 this = "TIMESTAMP" 4117 elif self._match(TokenType.VERSION_SNAPSHOT): 4118 this = "VERSION" 4119 else: 4120 return None 4121 4122 if self._match_set((TokenType.FROM, TokenType.BETWEEN)): 4123 kind = self._prev.text.upper() 4124 start = self._parse_bitwise() 4125 self._match_texts(("TO", "AND")) 4126 end = self._parse_bitwise() 4127 expression: t.Optional[exp.Expression] = self.expression( 4128 exp.Tuple, expressions=[start, end] 4129 ) 4130 elif self._match_text_seq("CONTAINED", "IN"): 4131 kind = "CONTAINED IN" 4132 expression = self.expression( 4133 exp.Tuple, expressions=self._parse_wrapped_csv(self._parse_bitwise) 4134 ) 4135 elif self._match(TokenType.ALL): 4136 kind = "ALL" 4137 expression = None 4138 else: 4139 self._match_text_seq("AS", "OF") 4140 kind = "AS OF" 4141 expression = self._parse_type() 4142 4143 return self.expression(exp.Version, this=this, expression=expression, kind=kind) 4144 4145 def _parse_historical_data(self) -> t.Optional[exp.HistoricalData]: 4146 # https://docs.snowflake.com/en/sql-reference/constructs/at-before 4147 index = self._index 4148 historical_data = None 4149 if self._match_texts(self.HISTORICAL_DATA_PREFIX): 4150 this = self._prev.text.upper() 4151 kind = ( 4152 self._match(TokenType.L_PAREN) 4153 and self._match_texts(self.HISTORICAL_DATA_KIND) 4154 and self._prev.text.upper() 4155 ) 4156 expression = self._match(TokenType.FARROW) and self._parse_bitwise() 4157 4158 if expression: 4159 self._match_r_paren() 4160 historical_data = self.expression( 4161 exp.HistoricalData, this=this, kind=kind, expression=expression 4162 ) 4163 else: 4164 self._retreat(index) 4165 4166 return historical_data 4167 4168 def _parse_changes(self) -> t.Optional[exp.Changes]: 4169 if not self._match_text_seq("CHANGES", "(", "INFORMATION", "=>"): 4170 return None 4171 4172 information = self._parse_var(any_token=True) 4173 self._match_r_paren() 4174 4175 return self.expression( 4176 exp.Changes, 4177 information=information, 4178 at_before=self._parse_historical_data(), 4179 end=self._parse_historical_data(), 4180 ) 4181 4182 def _parse_unnest(self, with_alias: bool = True) -> t.Optional[exp.Unnest]: 4183 if not self._match(TokenType.UNNEST): 4184 return None 4185 4186 expressions = self._parse_wrapped_csv(self._parse_equality) 4187 offset = self._match_pair(TokenType.WITH, TokenType.ORDINALITY) 4188 4189 alias = self._parse_table_alias() if with_alias else None 4190 4191 if alias: 4192 if self.dialect.UNNEST_COLUMN_ONLY: 4193 if alias.args.get("columns"): 4194 self.raise_error("Unexpected extra column alias in unnest.") 4195 4196 alias.set("columns", [alias.this]) 4197 alias.set("this", None) 4198 4199 columns = alias.args.get("columns") or [] 4200 if offset and len(expressions) < len(columns): 4201 offset = columns.pop() 4202 4203 if not offset and self._match_pair(TokenType.WITH, TokenType.OFFSET): 4204 self._match(TokenType.ALIAS) 4205 offset = self._parse_id_var( 4206 any_token=False, tokens=self.UNNEST_OFFSET_ALIAS_TOKENS 4207 ) or exp.to_identifier("offset") 4208 4209 return self.expression(exp.Unnest, expressions=expressions, alias=alias, offset=offset) 4210 4211 def _parse_derived_table_values(self) -> t.Optional[exp.Values]: 4212 is_derived = self._match_pair(TokenType.L_PAREN, TokenType.VALUES) 4213 if not is_derived and not ( 4214 # ClickHouse's `FORMAT Values` is equivalent to `VALUES` 4215 self._match_text_seq("VALUES") or self._match_text_seq("FORMAT", "VALUES") 4216 ): 4217 return None 4218 4219 expressions = self._parse_csv(self._parse_value) 4220 alias = self._parse_table_alias() 4221 4222 if is_derived: 4223 self._match_r_paren() 4224 4225 return self.expression( 4226 exp.Values, expressions=expressions, alias=alias or self._parse_table_alias() 4227 ) 4228 4229 def _parse_table_sample(self, as_modifier: bool = False) -> t.Optional[exp.TableSample]: 4230 if not self._match(TokenType.TABLE_SAMPLE) and not ( 4231 as_modifier and self._match_text_seq("USING", "SAMPLE") 4232 ): 4233 return None 4234 4235 bucket_numerator = None 4236 bucket_denominator = None 4237 bucket_field = None 4238 percent = None 4239 size = None 4240 seed = None 4241 4242 method = self._parse_var(tokens=(TokenType.ROW,), upper=True) 4243 matched_l_paren = self._match(TokenType.L_PAREN) 4244 4245 if self.TABLESAMPLE_CSV: 4246 num = None 4247 expressions = self._parse_csv(self._parse_primary) 4248 else: 4249 expressions = None 4250 num = ( 4251 self._parse_factor() 4252 if self._match(TokenType.NUMBER, advance=False) 4253 else self._parse_primary() or self._parse_placeholder() 4254 ) 4255 4256 if self._match_text_seq("BUCKET"): 4257 bucket_numerator = self._parse_number() 4258 self._match_text_seq("OUT", "OF") 4259 bucket_denominator = bucket_denominator = self._parse_number() 4260 self._match(TokenType.ON) 4261 bucket_field = self._parse_field() 4262 elif self._match_set((TokenType.PERCENT, TokenType.MOD)): 4263 percent = num 4264 elif self._match(TokenType.ROWS) or not self.dialect.TABLESAMPLE_SIZE_IS_PERCENT: 4265 size = num 4266 else: 4267 percent = num 4268 4269 if matched_l_paren: 4270 self._match_r_paren() 4271 4272 if self._match(TokenType.L_PAREN): 4273 method = self._parse_var(upper=True) 4274 seed = self._match(TokenType.COMMA) and self._parse_number() 4275 self._match_r_paren() 4276 elif self._match_texts(("SEED", "REPEATABLE")): 4277 seed = self._parse_wrapped(self._parse_number) 4278 4279 if not method and self.DEFAULT_SAMPLING_METHOD: 4280 method = exp.var(self.DEFAULT_SAMPLING_METHOD) 4281 4282 return self.expression( 4283 exp.TableSample, 4284 expressions=expressions, 4285 method=method, 4286 bucket_numerator=bucket_numerator, 4287 bucket_denominator=bucket_denominator, 4288 bucket_field=bucket_field, 4289 percent=percent, 4290 size=size, 4291 seed=seed, 4292 ) 4293 4294 def _parse_pivots(self) -> t.Optional[t.List[exp.Pivot]]: 4295 return list(iter(self._parse_pivot, None)) or None 4296 4297 def _parse_joins(self) -> t.Iterator[exp.Join]: 4298 return iter(self._parse_join, None) 4299 4300 def _parse_unpivot_columns(self) -> t.Optional[exp.UnpivotColumns]: 4301 if not self._match(TokenType.INTO): 4302 return None 4303 4304 return self.expression( 4305 exp.UnpivotColumns, 4306 this=self._match_text_seq("NAME") and self._parse_column(), 4307 expressions=self._match_text_seq("VALUE") and self._parse_csv(self._parse_column), 4308 ) 4309 4310 # https://duckdb.org/docs/sql/statements/pivot 4311 def _parse_simplified_pivot(self, is_unpivot: t.Optional[bool] = None) -> exp.Pivot: 4312 def _parse_on() -> t.Optional[exp.Expression]: 4313 this = self._parse_bitwise() 4314 4315 if self._match(TokenType.IN): 4316 # PIVOT ... ON col IN (row_val1, row_val2) 4317 return self._parse_in(this) 4318 if self._match(TokenType.ALIAS, advance=False): 4319 # UNPIVOT ... ON (col1, col2, col3) AS row_val 4320 return self._parse_alias(this) 4321 4322 return this 4323 4324 this = self._parse_table() 4325 expressions = self._match(TokenType.ON) and self._parse_csv(_parse_on) 4326 into = self._parse_unpivot_columns() 4327 using = self._match(TokenType.USING) and self._parse_csv( 4328 lambda: self._parse_alias(self._parse_function()) 4329 ) 4330 group = self._parse_group() 4331 4332 return self.expression( 4333 exp.Pivot, 4334 this=this, 4335 expressions=expressions, 4336 using=using, 4337 group=group, 4338 unpivot=is_unpivot, 4339 into=into, 4340 ) 4341 4342 def _parse_pivot_in(self) -> exp.In: 4343 def _parse_aliased_expression() -> t.Optional[exp.Expression]: 4344 this = self._parse_select_or_expression() 4345 4346 self._match(TokenType.ALIAS) 4347 alias = self._parse_bitwise() 4348 if alias: 4349 if isinstance(alias, exp.Column) and not alias.db: 4350 alias = alias.this 4351 return self.expression(exp.PivotAlias, this=this, alias=alias) 4352 4353 return this 4354 4355 value = self._parse_column() 4356 4357 if not self._match_pair(TokenType.IN, TokenType.L_PAREN): 4358 self.raise_error("Expecting IN (") 4359 4360 if self._match(TokenType.ANY): 4361 exprs: t.List[exp.Expression] = ensure_list(exp.PivotAny(this=self._parse_order())) 4362 else: 4363 exprs = self._parse_csv(_parse_aliased_expression) 4364 4365 self._match_r_paren() 4366 return self.expression(exp.In, this=value, expressions=exprs) 4367 4368 def _parse_pivot_aggregation(self) -> t.Optional[exp.Expression]: 4369 func = self._parse_function() 4370 if not func: 4371 self.raise_error("Expecting an aggregation function in PIVOT") 4372 4373 return self._parse_alias(func) 4374 4375 def _parse_pivot(self) -> t.Optional[exp.Pivot]: 4376 index = self._index 4377 include_nulls = None 4378 4379 if self._match(TokenType.PIVOT): 4380 unpivot = False 4381 elif self._match(TokenType.UNPIVOT): 4382 unpivot = True 4383 4384 # https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot.html#syntax 4385 if self._match_text_seq("INCLUDE", "NULLS"): 4386 include_nulls = True 4387 elif self._match_text_seq("EXCLUDE", "NULLS"): 4388 include_nulls = False 4389 else: 4390 return None 4391 4392 expressions = [] 4393 4394 if not self._match(TokenType.L_PAREN): 4395 self._retreat(index) 4396 return None 4397 4398 if unpivot: 4399 expressions = self._parse_csv(self._parse_column) 4400 else: 4401 expressions = self._parse_csv(self._parse_pivot_aggregation) 4402 4403 if not expressions: 4404 self.raise_error("Failed to parse PIVOT's aggregation list") 4405 4406 if not self._match(TokenType.FOR): 4407 self.raise_error("Expecting FOR") 4408 4409 fields = [] 4410 while True: 4411 field = self._try_parse(self._parse_pivot_in) 4412 if not field: 4413 break 4414 fields.append(field) 4415 4416 default_on_null = self._match_text_seq("DEFAULT", "ON", "NULL") and self._parse_wrapped( 4417 self._parse_bitwise 4418 ) 4419 4420 group = self._parse_group() 4421 4422 self._match_r_paren() 4423 4424 pivot = self.expression( 4425 exp.Pivot, 4426 expressions=expressions, 4427 fields=fields, 4428 unpivot=unpivot, 4429 include_nulls=include_nulls, 4430 default_on_null=default_on_null, 4431 group=group, 4432 ) 4433 4434 if not self._match_set((TokenType.PIVOT, TokenType.UNPIVOT), advance=False): 4435 pivot.set("alias", self._parse_table_alias()) 4436 4437 if not unpivot: 4438 names = self._pivot_column_names(t.cast(t.List[exp.Expression], expressions)) 4439 4440 columns: t.List[exp.Expression] = [] 4441 all_fields = [] 4442 for pivot_field in pivot.fields: 4443 pivot_field_expressions = pivot_field.expressions 4444 4445 # The `PivotAny` expression corresponds to `ANY ORDER BY <column>`; we can't infer in this case. 4446 if isinstance(seq_get(pivot_field_expressions, 0), exp.PivotAny): 4447 continue 4448 4449 all_fields.append( 4450 [ 4451 fld.sql() if self.IDENTIFY_PIVOT_STRINGS else fld.alias_or_name 4452 for fld in pivot_field_expressions 4453 ] 4454 ) 4455 4456 if all_fields: 4457 if names: 4458 all_fields.append(names) 4459 4460 # Generate all possible combinations of the pivot columns 4461 # e.g PIVOT(sum(...) as total FOR year IN (2000, 2010) FOR country IN ('NL', 'US')) 4462 # generates the product between [[2000, 2010], ['NL', 'US'], ['total']] 4463 for fld_parts_tuple in itertools.product(*all_fields): 4464 fld_parts = list(fld_parts_tuple) 4465 4466 if names and self.PREFIXED_PIVOT_COLUMNS: 4467 # Move the "name" to the front of the list 4468 fld_parts.insert(0, fld_parts.pop(-1)) 4469 4470 columns.append(exp.to_identifier("_".join(fld_parts))) 4471 4472 pivot.set("columns", columns) 4473 4474 return pivot 4475 4476 def _pivot_column_names(self, aggregations: t.List[exp.Expression]) -> t.List[str]: 4477 return [agg.alias for agg in aggregations if agg.alias] 4478 4479 def _parse_prewhere(self, skip_where_token: bool = False) -> t.Optional[exp.PreWhere]: 4480 if not skip_where_token and not self._match(TokenType.PREWHERE): 4481 return None 4482 4483 return self.expression( 4484 exp.PreWhere, comments=self._prev_comments, this=self._parse_assignment() 4485 ) 4486 4487 def _parse_where(self, skip_where_token: bool = False) -> t.Optional[exp.Where]: 4488 if not skip_where_token and not self._match(TokenType.WHERE): 4489 return None 4490 4491 return self.expression( 4492 exp.Where, comments=self._prev_comments, this=self._parse_assignment() 4493 ) 4494 4495 def _parse_group(self, skip_group_by_token: bool = False) -> t.Optional[exp.Group]: 4496 if not skip_group_by_token and not self._match(TokenType.GROUP_BY): 4497 return None 4498 comments = self._prev_comments 4499 4500 elements: t.Dict[str, t.Any] = defaultdict(list) 4501 4502 if self._match(TokenType.ALL): 4503 elements["all"] = True 4504 elif self._match(TokenType.DISTINCT): 4505 elements["all"] = False 4506 4507 if self._match_set(self.QUERY_MODIFIER_TOKENS, advance=False): 4508 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4509 4510 while True: 4511 index = self._index 4512 4513 elements["expressions"].extend( 4514 self._parse_csv( 4515 lambda: None 4516 if self._match_set((TokenType.CUBE, TokenType.ROLLUP), advance=False) 4517 else self._parse_assignment() 4518 ) 4519 ) 4520 4521 before_with_index = self._index 4522 with_prefix = self._match(TokenType.WITH) 4523 4524 if self._match(TokenType.ROLLUP): 4525 elements["rollup"].append( 4526 self._parse_cube_or_rollup(exp.Rollup, with_prefix=with_prefix) 4527 ) 4528 elif self._match(TokenType.CUBE): 4529 elements["cube"].append( 4530 self._parse_cube_or_rollup(exp.Cube, with_prefix=with_prefix) 4531 ) 4532 elif self._match(TokenType.GROUPING_SETS): 4533 elements["grouping_sets"].append( 4534 self.expression( 4535 exp.GroupingSets, 4536 expressions=self._parse_wrapped_csv(self._parse_grouping_set), 4537 ) 4538 ) 4539 elif self._match_text_seq("TOTALS"): 4540 elements["totals"] = True # type: ignore 4541 4542 if before_with_index <= self._index <= before_with_index + 1: 4543 self._retreat(before_with_index) 4544 break 4545 4546 if index == self._index: 4547 break 4548 4549 return self.expression(exp.Group, comments=comments, **elements) # type: ignore 4550 4551 def _parse_cube_or_rollup(self, kind: t.Type[E], with_prefix: bool = False) -> E: 4552 return self.expression( 4553 kind, expressions=[] if with_prefix else self._parse_wrapped_csv(self._parse_column) 4554 ) 4555 4556 def _parse_grouping_set(self) -> t.Optional[exp.Expression]: 4557 if self._match(TokenType.L_PAREN): 4558 grouping_set = self._parse_csv(self._parse_column) 4559 self._match_r_paren() 4560 return self.expression(exp.Tuple, expressions=grouping_set) 4561 4562 return self._parse_column() 4563 4564 def _parse_having(self, skip_having_token: bool = False) -> t.Optional[exp.Having]: 4565 if not skip_having_token and not self._match(TokenType.HAVING): 4566 return None 4567 return self.expression( 4568 exp.Having, comments=self._prev_comments, this=self._parse_assignment() 4569 ) 4570 4571 def _parse_qualify(self) -> t.Optional[exp.Qualify]: 4572 if not self._match(TokenType.QUALIFY): 4573 return None 4574 return self.expression(exp.Qualify, this=self._parse_assignment()) 4575 4576 def _parse_connect_with_prior(self) -> t.Optional[exp.Expression]: 4577 self.NO_PAREN_FUNCTION_PARSERS["PRIOR"] = lambda self: self.expression( 4578 exp.Prior, this=self._parse_bitwise() 4579 ) 4580 connect = self._parse_assignment() 4581 self.NO_PAREN_FUNCTION_PARSERS.pop("PRIOR") 4582 return connect 4583 4584 def _parse_connect(self, skip_start_token: bool = False) -> t.Optional[exp.Connect]: 4585 if skip_start_token: 4586 start = None 4587 elif self._match(TokenType.START_WITH): 4588 start = self._parse_assignment() 4589 else: 4590 return None 4591 4592 self._match(TokenType.CONNECT_BY) 4593 nocycle = self._match_text_seq("NOCYCLE") 4594 connect = self._parse_connect_with_prior() 4595 4596 if not start and self._match(TokenType.START_WITH): 4597 start = self._parse_assignment() 4598 4599 return self.expression(exp.Connect, start=start, connect=connect, nocycle=nocycle) 4600 4601 def _parse_name_as_expression(self) -> t.Optional[exp.Expression]: 4602 this = self._parse_id_var(any_token=True) 4603 if self._match(TokenType.ALIAS): 4604 this = self.expression(exp.Alias, alias=this, this=self._parse_assignment()) 4605 return this 4606 4607 def _parse_interpolate(self) -> t.Optional[t.List[exp.Expression]]: 4608 if self._match_text_seq("INTERPOLATE"): 4609 return self._parse_wrapped_csv(self._parse_name_as_expression) 4610 return None 4611 4612 def _parse_order( 4613 self, this: t.Optional[exp.Expression] = None, skip_order_token: bool = False 4614 ) -> t.Optional[exp.Expression]: 4615 siblings = None 4616 if not skip_order_token and not self._match(TokenType.ORDER_BY): 4617 if not self._match(TokenType.ORDER_SIBLINGS_BY): 4618 return this 4619 4620 siblings = True 4621 4622 return self.expression( 4623 exp.Order, 4624 comments=self._prev_comments, 4625 this=this, 4626 expressions=self._parse_csv(self._parse_ordered), 4627 siblings=siblings, 4628 ) 4629 4630 def _parse_sort(self, exp_class: t.Type[E], token: TokenType) -> t.Optional[E]: 4631 if not self._match(token): 4632 return None 4633 return self.expression(exp_class, expressions=self._parse_csv(self._parse_ordered)) 4634 4635 def _parse_ordered( 4636 self, parse_method: t.Optional[t.Callable] = None 4637 ) -> t.Optional[exp.Ordered]: 4638 this = parse_method() if parse_method else self._parse_assignment() 4639 if not this: 4640 return None 4641 4642 if this.name.upper() == "ALL" and self.dialect.SUPPORTS_ORDER_BY_ALL: 4643 this = exp.var("ALL") 4644 4645 asc = self._match(TokenType.ASC) 4646 desc = self._match(TokenType.DESC) or (asc and False) 4647 4648 is_nulls_first = self._match_text_seq("NULLS", "FIRST") 4649 is_nulls_last = self._match_text_seq("NULLS", "LAST") 4650 4651 nulls_first = is_nulls_first or False 4652 explicitly_null_ordered = is_nulls_first or is_nulls_last 4653 4654 if ( 4655 not explicitly_null_ordered 4656 and ( 4657 (not desc and self.dialect.NULL_ORDERING == "nulls_are_small") 4658 or (desc and self.dialect.NULL_ORDERING != "nulls_are_small") 4659 ) 4660 and self.dialect.NULL_ORDERING != "nulls_are_last" 4661 ): 4662 nulls_first = True 4663 4664 if self._match_text_seq("WITH", "FILL"): 4665 with_fill = self.expression( 4666 exp.WithFill, 4667 **{ # type: ignore 4668 "from": self._match(TokenType.FROM) and self._parse_bitwise(), 4669 "to": self._match_text_seq("TO") and self._parse_bitwise(), 4670 "step": self._match_text_seq("STEP") and self._parse_bitwise(), 4671 "interpolate": self._parse_interpolate(), 4672 }, 4673 ) 4674 else: 4675 with_fill = None 4676 4677 return self.expression( 4678 exp.Ordered, this=this, desc=desc, nulls_first=nulls_first, with_fill=with_fill 4679 ) 4680 4681 def _parse_limit_options(self) -> exp.LimitOptions: 4682 percent = self._match(TokenType.PERCENT) 4683 rows = self._match_set((TokenType.ROW, TokenType.ROWS)) 4684 self._match_text_seq("ONLY") 4685 with_ties = self._match_text_seq("WITH", "TIES") 4686 return self.expression(exp.LimitOptions, percent=percent, rows=rows, with_ties=with_ties) 4687 4688 def _parse_limit( 4689 self, 4690 this: t.Optional[exp.Expression] = None, 4691 top: bool = False, 4692 skip_limit_token: bool = False, 4693 ) -> t.Optional[exp.Expression]: 4694 if skip_limit_token or self._match(TokenType.TOP if top else TokenType.LIMIT): 4695 comments = self._prev_comments 4696 if top: 4697 limit_paren = self._match(TokenType.L_PAREN) 4698 expression = self._parse_term() if limit_paren else self._parse_number() 4699 4700 if limit_paren: 4701 self._match_r_paren() 4702 4703 limit_options = self._parse_limit_options() 4704 else: 4705 limit_options = None 4706 expression = self._parse_term() 4707 4708 if self._match(TokenType.COMMA): 4709 offset = expression 4710 expression = self._parse_term() 4711 else: 4712 offset = None 4713 4714 limit_exp = self.expression( 4715 exp.Limit, 4716 this=this, 4717 expression=expression, 4718 offset=offset, 4719 comments=comments, 4720 limit_options=limit_options, 4721 expressions=self._parse_limit_by(), 4722 ) 4723 4724 return limit_exp 4725 4726 if self._match(TokenType.FETCH): 4727 direction = self._match_set((TokenType.FIRST, TokenType.NEXT)) 4728 direction = self._prev.text.upper() if direction else "FIRST" 4729 4730 count = self._parse_field(tokens=self.FETCH_TOKENS) 4731 4732 return self.expression( 4733 exp.Fetch, 4734 direction=direction, 4735 count=count, 4736 limit_options=self._parse_limit_options(), 4737 ) 4738 4739 return this 4740 4741 def _parse_offset(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4742 if not self._match(TokenType.OFFSET): 4743 return this 4744 4745 count = self._parse_term() 4746 self._match_set((TokenType.ROW, TokenType.ROWS)) 4747 4748 return self.expression( 4749 exp.Offset, this=this, expression=count, expressions=self._parse_limit_by() 4750 ) 4751 4752 def _can_parse_limit_or_offset(self) -> bool: 4753 if not self._match_set(self.AMBIGUOUS_ALIAS_TOKENS, advance=False): 4754 return False 4755 4756 index = self._index 4757 result = bool( 4758 self._try_parse(self._parse_limit, retreat=True) 4759 or self._try_parse(self._parse_offset, retreat=True) 4760 ) 4761 self._retreat(index) 4762 return result 4763 4764 def _parse_limit_by(self) -> t.Optional[t.List[exp.Expression]]: 4765 return self._match_text_seq("BY") and self._parse_csv(self._parse_bitwise) 4766 4767 def _parse_locks(self) -> t.List[exp.Lock]: 4768 locks = [] 4769 while True: 4770 update, key = None, None 4771 if self._match_text_seq("FOR", "UPDATE"): 4772 update = True 4773 elif self._match_text_seq("FOR", "SHARE") or self._match_text_seq( 4774 "LOCK", "IN", "SHARE", "MODE" 4775 ): 4776 update = False 4777 elif self._match_text_seq("FOR", "KEY", "SHARE"): 4778 update, key = False, True 4779 elif self._match_text_seq("FOR", "NO", "KEY", "UPDATE"): 4780 update, key = True, True 4781 else: 4782 break 4783 4784 expressions = None 4785 if self._match_text_seq("OF"): 4786 expressions = self._parse_csv(lambda: self._parse_table(schema=True)) 4787 4788 wait: t.Optional[bool | exp.Expression] = None 4789 if self._match_text_seq("NOWAIT"): 4790 wait = True 4791 elif self._match_text_seq("WAIT"): 4792 wait = self._parse_primary() 4793 elif self._match_text_seq("SKIP", "LOCKED"): 4794 wait = False 4795 4796 locks.append( 4797 self.expression( 4798 exp.Lock, update=update, expressions=expressions, wait=wait, key=key 4799 ) 4800 ) 4801 4802 return locks 4803 4804 def parse_set_operation( 4805 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4806 ) -> t.Optional[exp.Expression]: 4807 start = self._index 4808 _, side_token, kind_token = self._parse_join_parts() 4809 4810 side = side_token.text if side_token else None 4811 kind = kind_token.text if kind_token else None 4812 4813 if not self._match_set(self.SET_OPERATIONS): 4814 self._retreat(start) 4815 return None 4816 4817 token_type = self._prev.token_type 4818 4819 if token_type == TokenType.UNION: 4820 operation: t.Type[exp.SetOperation] = exp.Union 4821 elif token_type == TokenType.EXCEPT: 4822 operation = exp.Except 4823 else: 4824 operation = exp.Intersect 4825 4826 comments = self._prev.comments 4827 4828 if self._match(TokenType.DISTINCT): 4829 distinct: t.Optional[bool] = True 4830 elif self._match(TokenType.ALL): 4831 distinct = False 4832 else: 4833 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4834 if distinct is None: 4835 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4836 4837 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4838 "STRICT", "CORRESPONDING" 4839 ) 4840 if self._match_text_seq("CORRESPONDING"): 4841 by_name = True 4842 if not side and not kind: 4843 kind = "INNER" 4844 4845 on_column_list = None 4846 if by_name and self._match_texts(("ON", "BY")): 4847 on_column_list = self._parse_wrapped_csv(self._parse_column) 4848 4849 expression = self._parse_select( 4850 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4851 ) 4852 4853 return self.expression( 4854 operation, 4855 comments=comments, 4856 this=this, 4857 distinct=distinct, 4858 by_name=by_name, 4859 expression=expression, 4860 side=side, 4861 kind=kind, 4862 on=on_column_list, 4863 ) 4864 4865 def _parse_set_operations(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4866 while this: 4867 setop = self.parse_set_operation(this) 4868 if not setop: 4869 break 4870 this = setop 4871 4872 if isinstance(this, exp.SetOperation) and self.MODIFIERS_ATTACHED_TO_SET_OP: 4873 expression = this.expression 4874 4875 if expression: 4876 for arg in self.SET_OP_MODIFIERS: 4877 expr = expression.args.get(arg) 4878 if expr: 4879 this.set(arg, expr.pop()) 4880 4881 return this 4882 4883 def _parse_expression(self) -> t.Optional[exp.Expression]: 4884 return self._parse_alias(self._parse_assignment()) 4885 4886 def _parse_assignment(self) -> t.Optional[exp.Expression]: 4887 this = self._parse_disjunction() 4888 if not this and self._next and self._next.token_type in self.ASSIGNMENT: 4889 # This allows us to parse <non-identifier token> := <expr> 4890 this = exp.column( 4891 t.cast(str, self._advance_any(ignore_reserved=True) and self._prev.text) 4892 ) 4893 4894 while self._match_set(self.ASSIGNMENT): 4895 if isinstance(this, exp.Column) and len(this.parts) == 1: 4896 this = this.this 4897 4898 this = self.expression( 4899 self.ASSIGNMENT[self._prev.token_type], 4900 this=this, 4901 comments=self._prev_comments, 4902 expression=self._parse_assignment(), 4903 ) 4904 4905 return this 4906 4907 def _parse_disjunction(self) -> t.Optional[exp.Expression]: 4908 return self._parse_tokens(self._parse_conjunction, self.DISJUNCTION) 4909 4910 def _parse_conjunction(self) -> t.Optional[exp.Expression]: 4911 return self._parse_tokens(self._parse_equality, self.CONJUNCTION) 4912 4913 def _parse_equality(self) -> t.Optional[exp.Expression]: 4914 return self._parse_tokens(self._parse_comparison, self.EQUALITY) 4915 4916 def _parse_comparison(self) -> t.Optional[exp.Expression]: 4917 return self._parse_tokens(self._parse_range, self.COMPARISON) 4918 4919 def _parse_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4920 this = this or self._parse_bitwise() 4921 negate = self._match(TokenType.NOT) 4922 4923 if self._match_set(self.RANGE_PARSERS): 4924 expression = self.RANGE_PARSERS[self._prev.token_type](self, this) 4925 if not expression: 4926 return this 4927 4928 this = expression 4929 elif self._match(TokenType.ISNULL): 4930 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4931 4932 # Postgres supports ISNULL and NOTNULL for conditions. 4933 # https://blog.andreiavram.ro/postgresql-null-composite-type/ 4934 if self._match(TokenType.NOTNULL): 4935 this = self.expression(exp.Is, this=this, expression=exp.Null()) 4936 this = self.expression(exp.Not, this=this) 4937 4938 if negate: 4939 this = self._negate_range(this) 4940 4941 if self._match(TokenType.IS): 4942 this = self._parse_is(this) 4943 4944 return this 4945 4946 def _negate_range(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 4947 if not this: 4948 return this 4949 4950 return self.expression(exp.Not, this=this) 4951 4952 def _parse_is(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 4953 index = self._index - 1 4954 negate = self._match(TokenType.NOT) 4955 4956 if self._match_text_seq("DISTINCT", "FROM"): 4957 klass = exp.NullSafeEQ if negate else exp.NullSafeNEQ 4958 return self.expression(klass, this=this, expression=self._parse_bitwise()) 4959 4960 if self._match(TokenType.JSON): 4961 kind = self._match_texts(self.IS_JSON_PREDICATE_KIND) and self._prev.text.upper() 4962 4963 if self._match_text_seq("WITH"): 4964 _with = True 4965 elif self._match_text_seq("WITHOUT"): 4966 _with = False 4967 else: 4968 _with = None 4969 4970 unique = self._match(TokenType.UNIQUE) 4971 self._match_text_seq("KEYS") 4972 expression: t.Optional[exp.Expression] = self.expression( 4973 exp.JSON, **{"this": kind, "with": _with, "unique": unique} 4974 ) 4975 else: 4976 expression = self._parse_primary() or self._parse_null() 4977 if not expression: 4978 self._retreat(index) 4979 return None 4980 4981 this = self.expression(exp.Is, this=this, expression=expression) 4982 return self.expression(exp.Not, this=this) if negate else this 4983 4984 def _parse_in(self, this: t.Optional[exp.Expression], alias: bool = False) -> exp.In: 4985 unnest = self._parse_unnest(with_alias=False) 4986 if unnest: 4987 this = self.expression(exp.In, this=this, unnest=unnest) 4988 elif self._match_set((TokenType.L_PAREN, TokenType.L_BRACKET)): 4989 matched_l_paren = self._prev.token_type == TokenType.L_PAREN 4990 expressions = self._parse_csv(lambda: self._parse_select_or_expression(alias=alias)) 4991 4992 if len(expressions) == 1 and isinstance(expressions[0], exp.Query): 4993 this = self.expression(exp.In, this=this, query=expressions[0].subquery(copy=False)) 4994 else: 4995 this = self.expression(exp.In, this=this, expressions=expressions) 4996 4997 if matched_l_paren: 4998 self._match_r_paren(this) 4999 elif not self._match(TokenType.R_BRACKET, expression=this): 5000 self.raise_error("Expecting ]") 5001 else: 5002 this = self.expression(exp.In, this=this, field=self._parse_column()) 5003 5004 return this 5005 5006 def _parse_between(self, this: t.Optional[exp.Expression]) -> exp.Between: 5007 symmetric = None 5008 if self._match_text_seq("SYMMETRIC"): 5009 symmetric = True 5010 elif self._match_text_seq("ASYMMETRIC"): 5011 symmetric = False 5012 5013 low = self._parse_bitwise() 5014 self._match(TokenType.AND) 5015 high = self._parse_bitwise() 5016 5017 return self.expression( 5018 exp.Between, 5019 this=this, 5020 low=low, 5021 high=high, 5022 symmetric=symmetric, 5023 ) 5024 5025 def _parse_escape(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5026 if not self._match(TokenType.ESCAPE): 5027 return this 5028 return self.expression(exp.Escape, this=this, expression=self._parse_string()) 5029 5030 def _parse_interval(self, match_interval: bool = True) -> t.Optional[exp.Add | exp.Interval]: 5031 index = self._index 5032 5033 if not self._match(TokenType.INTERVAL) and match_interval: 5034 return None 5035 5036 if self._match(TokenType.STRING, advance=False): 5037 this = self._parse_primary() 5038 else: 5039 this = self._parse_term() 5040 5041 if not this or ( 5042 isinstance(this, exp.Column) 5043 and not this.table 5044 and not this.this.quoted 5045 and this.name.upper() == "IS" 5046 ): 5047 self._retreat(index) 5048 return None 5049 5050 unit = self._parse_function() or ( 5051 not self._match(TokenType.ALIAS, advance=False) 5052 and self._parse_var(any_token=True, upper=True) 5053 ) 5054 5055 # Most dialects support, e.g., the form INTERVAL '5' day, thus we try to parse 5056 # each INTERVAL expression into this canonical form so it's easy to transpile 5057 if this and this.is_number: 5058 this = exp.Literal.string(this.to_py()) 5059 elif this and this.is_string: 5060 parts = exp.INTERVAL_STRING_RE.findall(this.name) 5061 if parts and unit: 5062 # Unconsume the eagerly-parsed unit, since the real unit was part of the string 5063 unit = None 5064 self._retreat(self._index - 1) 5065 5066 if len(parts) == 1: 5067 this = exp.Literal.string(parts[0][0]) 5068 unit = self.expression(exp.Var, this=parts[0][1].upper()) 5069 if self.INTERVAL_SPANS and self._match_text_seq("TO"): 5070 unit = self.expression( 5071 exp.IntervalSpan, this=unit, expression=self._parse_var(any_token=True, upper=True) 5072 ) 5073 5074 interval = self.expression(exp.Interval, this=this, unit=unit) 5075 5076 index = self._index 5077 self._match(TokenType.PLUS) 5078 5079 # Convert INTERVAL 'val_1' unit_1 [+] ... [+] 'val_n' unit_n into a sum of intervals 5080 if self._match_set((TokenType.STRING, TokenType.NUMBER), advance=False): 5081 return self.expression( 5082 exp.Add, this=interval, expression=self._parse_interval(match_interval=False) 5083 ) 5084 5085 self._retreat(index) 5086 return interval 5087 5088 def _parse_bitwise(self) -> t.Optional[exp.Expression]: 5089 this = self._parse_term() 5090 5091 while True: 5092 if self._match_set(self.BITWISE): 5093 this = self.expression( 5094 self.BITWISE[self._prev.token_type], 5095 this=this, 5096 expression=self._parse_term(), 5097 ) 5098 elif self.dialect.DPIPE_IS_STRING_CONCAT and self._match(TokenType.DPIPE): 5099 this = self.expression( 5100 exp.DPipe, 5101 this=this, 5102 expression=self._parse_term(), 5103 safe=not self.dialect.STRICT_STRING_CONCAT, 5104 ) 5105 elif self._match(TokenType.DQMARK): 5106 this = self.expression( 5107 exp.Coalesce, this=this, expressions=ensure_list(self._parse_term()) 5108 ) 5109 elif self._match_pair(TokenType.LT, TokenType.LT): 5110 this = self.expression( 5111 exp.BitwiseLeftShift, this=this, expression=self._parse_term() 5112 ) 5113 elif self._match_pair(TokenType.GT, TokenType.GT): 5114 this = self.expression( 5115 exp.BitwiseRightShift, this=this, expression=self._parse_term() 5116 ) 5117 else: 5118 break 5119 5120 return this 5121 5122 def _parse_term(self) -> t.Optional[exp.Expression]: 5123 this = self._parse_factor() 5124 5125 while self._match_set(self.TERM): 5126 klass = self.TERM[self._prev.token_type] 5127 comments = self._prev_comments 5128 expression = self._parse_factor() 5129 5130 this = self.expression(klass, this=this, comments=comments, expression=expression) 5131 5132 if isinstance(this, exp.Collate): 5133 expr = this.expression 5134 5135 # Preserve collations such as pg_catalog."default" (Postgres) as columns, otherwise 5136 # fallback to Identifier / Var 5137 if isinstance(expr, exp.Column) and len(expr.parts) == 1: 5138 ident = expr.this 5139 if isinstance(ident, exp.Identifier): 5140 this.set("expression", ident if ident.quoted else exp.var(ident.name)) 5141 5142 return this 5143 5144 def _parse_factor(self) -> t.Optional[exp.Expression]: 5145 parse_method = self._parse_exponent if self.EXPONENT else self._parse_unary 5146 this = parse_method() 5147 5148 while self._match_set(self.FACTOR): 5149 klass = self.FACTOR[self._prev.token_type] 5150 comments = self._prev_comments 5151 expression = parse_method() 5152 5153 if not expression and klass is exp.IntDiv and self._prev.text.isalpha(): 5154 self._retreat(self._index - 1) 5155 return this 5156 5157 this = self.expression(klass, this=this, comments=comments, expression=expression) 5158 5159 if isinstance(this, exp.Div): 5160 this.args["typed"] = self.dialect.TYPED_DIVISION 5161 this.args["safe"] = self.dialect.SAFE_DIVISION 5162 5163 return this 5164 5165 def _parse_exponent(self) -> t.Optional[exp.Expression]: 5166 return self._parse_tokens(self._parse_unary, self.EXPONENT) 5167 5168 def _parse_unary(self) -> t.Optional[exp.Expression]: 5169 if self._match_set(self.UNARY_PARSERS): 5170 return self.UNARY_PARSERS[self._prev.token_type](self) 5171 return self._parse_at_time_zone(self._parse_type()) 5172 5173 def _parse_type( 5174 self, parse_interval: bool = True, fallback_to_identifier: bool = False 5175 ) -> t.Optional[exp.Expression]: 5176 interval = parse_interval and self._parse_interval() 5177 if interval: 5178 return interval 5179 5180 index = self._index 5181 data_type = self._parse_types(check_func=True, allow_identifiers=False) 5182 5183 # parse_types() returns a Cast if we parsed BQ's inline constructor <type>(<values>) e.g. 5184 # STRUCT<a INT, b STRING>(1, 'foo'), which is canonicalized to CAST(<values> AS <type>) 5185 if isinstance(data_type, exp.Cast): 5186 # This constructor can contain ops directly after it, for instance struct unnesting: 5187 # STRUCT<a INT, b STRING>(1, 'foo').* --> CAST(STRUCT(1, 'foo') AS STRUCT<a iNT, b STRING).* 5188 return self._parse_column_ops(data_type) 5189 5190 if data_type: 5191 index2 = self._index 5192 this = self._parse_primary() 5193 5194 if isinstance(this, exp.Literal): 5195 literal = this.name 5196 this = self._parse_column_ops(this) 5197 5198 parser = self.TYPE_LITERAL_PARSERS.get(data_type.this) 5199 if parser: 5200 return parser(self, this, data_type) 5201 5202 if ( 5203 self.ZONE_AWARE_TIMESTAMP_CONSTRUCTOR 5204 and data_type.is_type(exp.DataType.Type.TIMESTAMP) 5205 and TIME_ZONE_RE.search(literal) 5206 ): 5207 data_type = exp.DataType.build("TIMESTAMPTZ") 5208 5209 return self.expression(exp.Cast, this=this, to=data_type) 5210 5211 # The expressions arg gets set by the parser when we have something like DECIMAL(38, 0) 5212 # in the input SQL. In that case, we'll produce these tokens: DECIMAL ( 38 , 0 ) 5213 # 5214 # If the index difference here is greater than 1, that means the parser itself must have 5215 # consumed additional tokens such as the DECIMAL scale and precision in the above example. 5216 # 5217 # If it's not greater than 1, then it must be 1, because we've consumed at least the type 5218 # keyword, meaning that the expressions arg of the DataType must have gotten set by a 5219 # callable in the TYPE_CONVERTERS mapping. For example, Snowflake converts DECIMAL to 5220 # DECIMAL(38, 0)) in order to facilitate the data type's transpilation. 5221 # 5222 # In these cases, we don't really want to return the converted type, but instead retreat 5223 # and try to parse a Column or Identifier in the section below. 5224 if data_type.expressions and index2 - index > 1: 5225 self._retreat(index2) 5226 return self._parse_column_ops(data_type) 5227 5228 self._retreat(index) 5229 5230 if fallback_to_identifier: 5231 return self._parse_id_var() 5232 5233 this = self._parse_column() 5234 return this and self._parse_column_ops(this) 5235 5236 def _parse_type_size(self) -> t.Optional[exp.DataTypeParam]: 5237 this = self._parse_type() 5238 if not this: 5239 return None 5240 5241 if isinstance(this, exp.Column) and not this.table: 5242 this = exp.var(this.name.upper()) 5243 5244 return self.expression( 5245 exp.DataTypeParam, this=this, expression=self._parse_var(any_token=True) 5246 ) 5247 5248 def _parse_user_defined_type(self, identifier: exp.Identifier) -> t.Optional[exp.Expression]: 5249 type_name = identifier.name 5250 5251 while self._match(TokenType.DOT): 5252 type_name = f"{type_name}.{self._advance_any() and self._prev.text}" 5253 5254 return exp.DataType.build(type_name, udt=True) 5255 5256 def _parse_types( 5257 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 5258 ) -> t.Optional[exp.Expression]: 5259 index = self._index 5260 5261 this: t.Optional[exp.Expression] = None 5262 prefix = self._match_text_seq("SYSUDTLIB", ".") 5263 5264 if not self._match_set(self.TYPE_TOKENS): 5265 identifier = allow_identifiers and self._parse_id_var( 5266 any_token=False, tokens=(TokenType.VAR,) 5267 ) 5268 if isinstance(identifier, exp.Identifier): 5269 tokens = self.dialect.tokenize(identifier.sql(dialect=self.dialect)) 5270 5271 if len(tokens) != 1: 5272 self.raise_error("Unexpected identifier", self._prev) 5273 5274 if tokens[0].token_type in self.TYPE_TOKENS: 5275 self._prev = tokens[0] 5276 elif self.dialect.SUPPORTS_USER_DEFINED_TYPES: 5277 this = self._parse_user_defined_type(identifier) 5278 else: 5279 self._retreat(self._index - 1) 5280 return None 5281 else: 5282 return None 5283 5284 type_token = self._prev.token_type 5285 5286 if type_token == TokenType.PSEUDO_TYPE: 5287 return self.expression(exp.PseudoType, this=self._prev.text.upper()) 5288 5289 if type_token == TokenType.OBJECT_IDENTIFIER: 5290 return self.expression(exp.ObjectIdentifier, this=self._prev.text.upper()) 5291 5292 # https://materialize.com/docs/sql/types/map/ 5293 if type_token == TokenType.MAP and self._match(TokenType.L_BRACKET): 5294 key_type = self._parse_types( 5295 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5296 ) 5297 if not self._match(TokenType.FARROW): 5298 self._retreat(index) 5299 return None 5300 5301 value_type = self._parse_types( 5302 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5303 ) 5304 if not self._match(TokenType.R_BRACKET): 5305 self._retreat(index) 5306 return None 5307 5308 return exp.DataType( 5309 this=exp.DataType.Type.MAP, 5310 expressions=[key_type, value_type], 5311 nested=True, 5312 prefix=prefix, 5313 ) 5314 5315 nested = type_token in self.NESTED_TYPE_TOKENS 5316 is_struct = type_token in self.STRUCT_TYPE_TOKENS 5317 is_aggregate = type_token in self.AGGREGATE_TYPE_TOKENS 5318 expressions = None 5319 maybe_func = False 5320 5321 if self._match(TokenType.L_PAREN): 5322 if is_struct: 5323 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5324 elif nested: 5325 expressions = self._parse_csv( 5326 lambda: self._parse_types( 5327 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5328 ) 5329 ) 5330 if type_token == TokenType.NULLABLE and len(expressions) == 1: 5331 this = expressions[0] 5332 this.set("nullable", True) 5333 self._match_r_paren() 5334 return this 5335 elif type_token in self.ENUM_TYPE_TOKENS: 5336 expressions = self._parse_csv(self._parse_equality) 5337 elif is_aggregate: 5338 func_or_ident = self._parse_function(anonymous=True) or self._parse_id_var( 5339 any_token=False, tokens=(TokenType.VAR, TokenType.ANY) 5340 ) 5341 if not func_or_ident: 5342 return None 5343 expressions = [func_or_ident] 5344 if self._match(TokenType.COMMA): 5345 expressions.extend( 5346 self._parse_csv( 5347 lambda: self._parse_types( 5348 check_func=check_func, 5349 schema=schema, 5350 allow_identifiers=allow_identifiers, 5351 ) 5352 ) 5353 ) 5354 else: 5355 expressions = self._parse_csv(self._parse_type_size) 5356 5357 # https://docs.snowflake.com/en/sql-reference/data-types-vector 5358 if type_token == TokenType.VECTOR and len(expressions) == 2: 5359 expressions[0] = exp.DataType.build(expressions[0].name, dialect=self.dialect) 5360 5361 if not expressions or not self._match(TokenType.R_PAREN): 5362 self._retreat(index) 5363 return None 5364 5365 maybe_func = True 5366 5367 values: t.Optional[t.List[exp.Expression]] = None 5368 5369 if nested and self._match(TokenType.LT): 5370 if is_struct: 5371 expressions = self._parse_csv(lambda: self._parse_struct_types(type_required=True)) 5372 else: 5373 expressions = self._parse_csv( 5374 lambda: self._parse_types( 5375 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 5376 ) 5377 ) 5378 5379 if not self._match(TokenType.GT): 5380 self.raise_error("Expecting >") 5381 5382 if self._match_set((TokenType.L_BRACKET, TokenType.L_PAREN)): 5383 values = self._parse_csv(self._parse_assignment) 5384 if not values and is_struct: 5385 values = None 5386 self._retreat(self._index - 1) 5387 else: 5388 self._match_set((TokenType.R_BRACKET, TokenType.R_PAREN)) 5389 5390 if type_token in self.TIMESTAMPS: 5391 if self._match_text_seq("WITH", "TIME", "ZONE"): 5392 maybe_func = False 5393 tz_type = ( 5394 exp.DataType.Type.TIMETZ 5395 if type_token in self.TIMES 5396 else exp.DataType.Type.TIMESTAMPTZ 5397 ) 5398 this = exp.DataType(this=tz_type, expressions=expressions) 5399 elif self._match_text_seq("WITH", "LOCAL", "TIME", "ZONE"): 5400 maybe_func = False 5401 this = exp.DataType(this=exp.DataType.Type.TIMESTAMPLTZ, expressions=expressions) 5402 elif self._match_text_seq("WITHOUT", "TIME", "ZONE"): 5403 maybe_func = False 5404 elif type_token == TokenType.INTERVAL: 5405 unit = self._parse_var(upper=True) 5406 if unit: 5407 if self._match_text_seq("TO"): 5408 unit = exp.IntervalSpan(this=unit, expression=self._parse_var(upper=True)) 5409 5410 this = self.expression(exp.DataType, this=self.expression(exp.Interval, unit=unit)) 5411 else: 5412 this = self.expression(exp.DataType, this=exp.DataType.Type.INTERVAL) 5413 elif type_token == TokenType.VOID: 5414 this = exp.DataType(this=exp.DataType.Type.NULL) 5415 5416 if maybe_func and check_func: 5417 index2 = self._index 5418 peek = self._parse_string() 5419 5420 if not peek: 5421 self._retreat(index) 5422 return None 5423 5424 self._retreat(index2) 5425 5426 if not this: 5427 if self._match_text_seq("UNSIGNED"): 5428 unsigned_type_token = self.SIGNED_TO_UNSIGNED_TYPE_TOKEN.get(type_token) 5429 if not unsigned_type_token: 5430 self.raise_error(f"Cannot convert {type_token.value} to unsigned.") 5431 5432 type_token = unsigned_type_token or type_token 5433 5434 this = exp.DataType( 5435 this=exp.DataType.Type[type_token.value], 5436 expressions=expressions, 5437 nested=nested, 5438 prefix=prefix, 5439 ) 5440 5441 # Empty arrays/structs are allowed 5442 if values is not None: 5443 cls = exp.Struct if is_struct else exp.Array 5444 this = exp.cast(cls(expressions=values), this, copy=False) 5445 5446 elif expressions: 5447 this.set("expressions", expressions) 5448 5449 # https://materialize.com/docs/sql/types/list/#type-name 5450 while self._match(TokenType.LIST): 5451 this = exp.DataType(this=exp.DataType.Type.LIST, expressions=[this], nested=True) 5452 5453 index = self._index 5454 5455 # Postgres supports the INT ARRAY[3] syntax as a synonym for INT[3] 5456 matched_array = self._match(TokenType.ARRAY) 5457 5458 while self._curr: 5459 datatype_token = self._prev.token_type 5460 matched_l_bracket = self._match(TokenType.L_BRACKET) 5461 5462 if (not matched_l_bracket and not matched_array) or ( 5463 datatype_token == TokenType.ARRAY and self._match(TokenType.R_BRACKET) 5464 ): 5465 # Postgres allows casting empty arrays such as ARRAY[]::INT[], 5466 # not to be confused with the fixed size array parsing 5467 break 5468 5469 matched_array = False 5470 values = self._parse_csv(self._parse_assignment) or None 5471 if ( 5472 values 5473 and not schema 5474 and ( 5475 not self.dialect.SUPPORTS_FIXED_SIZE_ARRAYS or datatype_token == TokenType.ARRAY 5476 ) 5477 ): 5478 # Retreating here means that we should not parse the following values as part of the data type, e.g. in DuckDB 5479 # ARRAY[1] should retreat and instead be parsed into exp.Array in contrast to INT[x][y] which denotes a fixed-size array data type 5480 self._retreat(index) 5481 break 5482 5483 this = exp.DataType( 5484 this=exp.DataType.Type.ARRAY, expressions=[this], values=values, nested=True 5485 ) 5486 self._match(TokenType.R_BRACKET) 5487 5488 if self.TYPE_CONVERTERS and isinstance(this.this, exp.DataType.Type): 5489 converter = self.TYPE_CONVERTERS.get(this.this) 5490 if converter: 5491 this = converter(t.cast(exp.DataType, this)) 5492 5493 return this 5494 5495 def _parse_struct_types(self, type_required: bool = False) -> t.Optional[exp.Expression]: 5496 index = self._index 5497 5498 if ( 5499 self._curr 5500 and self._next 5501 and self._curr.token_type in self.TYPE_TOKENS 5502 and self._next.token_type in self.TYPE_TOKENS 5503 ): 5504 # Takes care of special cases like `STRUCT<list ARRAY<...>>` where the identifier is also a 5505 # type token. Without this, the list will be parsed as a type and we'll eventually crash 5506 this = self._parse_id_var() 5507 else: 5508 this = ( 5509 self._parse_type(parse_interval=False, fallback_to_identifier=True) 5510 or self._parse_id_var() 5511 ) 5512 5513 self._match(TokenType.COLON) 5514 5515 if ( 5516 type_required 5517 and not isinstance(this, exp.DataType) 5518 and not self._match_set(self.TYPE_TOKENS, advance=False) 5519 ): 5520 self._retreat(index) 5521 return self._parse_types() 5522 5523 return self._parse_column_def(this) 5524 5525 def _parse_at_time_zone(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5526 if not self._match_text_seq("AT", "TIME", "ZONE"): 5527 return this 5528 return self.expression(exp.AtTimeZone, this=this, zone=self._parse_unary()) 5529 5530 def _parse_column(self) -> t.Optional[exp.Expression]: 5531 this = self._parse_column_reference() 5532 column = self._parse_column_ops(this) if this else self._parse_bracket(this) 5533 5534 if self.dialect.SUPPORTS_COLUMN_JOIN_MARKS and column: 5535 column.set("join_mark", self._match(TokenType.JOIN_MARKER)) 5536 5537 return column 5538 5539 def _parse_column_reference(self) -> t.Optional[exp.Expression]: 5540 this = self._parse_field() 5541 if ( 5542 not this 5543 and self._match(TokenType.VALUES, advance=False) 5544 and self.VALUES_FOLLOWED_BY_PAREN 5545 and (not self._next or self._next.token_type != TokenType.L_PAREN) 5546 ): 5547 this = self._parse_id_var() 5548 5549 if isinstance(this, exp.Identifier): 5550 # We bubble up comments from the Identifier to the Column 5551 this = self.expression(exp.Column, comments=this.pop_comments(), this=this) 5552 5553 return this 5554 5555 def _parse_colon_as_variant_extract( 5556 self, this: t.Optional[exp.Expression] 5557 ) -> t.Optional[exp.Expression]: 5558 casts = [] 5559 json_path = [] 5560 escape = None 5561 5562 while self._match(TokenType.COLON): 5563 start_index = self._index 5564 5565 # Snowflake allows reserved keywords as json keys but advance_any() excludes TokenType.SELECT from any_tokens=True 5566 path = self._parse_column_ops( 5567 self._parse_field(any_token=True, tokens=(TokenType.SELECT,)) 5568 ) 5569 5570 # The cast :: operator has a lower precedence than the extraction operator :, so 5571 # we rearrange the AST appropriately to avoid casting the JSON path 5572 while isinstance(path, exp.Cast): 5573 casts.append(path.to) 5574 path = path.this 5575 5576 if casts: 5577 dcolon_offset = next( 5578 i 5579 for i, t in enumerate(self._tokens[start_index:]) 5580 if t.token_type == TokenType.DCOLON 5581 ) 5582 end_token = self._tokens[start_index + dcolon_offset - 1] 5583 else: 5584 end_token = self._prev 5585 5586 if path: 5587 # Escape single quotes from Snowflake's colon extraction (e.g. col:"a'b") as 5588 # it'll roundtrip to a string literal in GET_PATH 5589 if isinstance(path, exp.Identifier) and path.quoted: 5590 escape = True 5591 5592 json_path.append(self._find_sql(self._tokens[start_index], end_token)) 5593 5594 # The VARIANT extract in Snowflake/Databricks is parsed as a JSONExtract; Snowflake uses the json_path in GET_PATH() while 5595 # Databricks transforms it back to the colon/dot notation 5596 if json_path: 5597 json_path_expr = self.dialect.to_json_path(exp.Literal.string(".".join(json_path))) 5598 5599 if json_path_expr: 5600 json_path_expr.set("escape", escape) 5601 5602 this = self.expression( 5603 exp.JSONExtract, 5604 this=this, 5605 expression=json_path_expr, 5606 variant_extract=True, 5607 requires_json=self.JSON_EXTRACT_REQUIRES_JSON_EXPRESSION, 5608 ) 5609 5610 while casts: 5611 this = self.expression(exp.Cast, this=this, to=casts.pop()) 5612 5613 return this 5614 5615 def _parse_dcolon(self) -> t.Optional[exp.Expression]: 5616 return self._parse_types() 5617 5618 def _parse_column_ops(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 5619 this = self._parse_bracket(this) 5620 5621 while self._match_set(self.COLUMN_OPERATORS): 5622 op_token = self._prev.token_type 5623 op = self.COLUMN_OPERATORS.get(op_token) 5624 5625 if op_token in (TokenType.DCOLON, TokenType.DOTCOLON): 5626 field = self._parse_dcolon() 5627 if not field: 5628 self.raise_error("Expected type") 5629 elif op and self._curr: 5630 field = self._parse_column_reference() or self._parse_bracket() 5631 if isinstance(field, exp.Column) and self._match(TokenType.DOT, advance=False): 5632 field = self._parse_column_ops(field) 5633 else: 5634 field = self._parse_field(any_token=True, anonymous_func=True) 5635 5636 # Function calls can be qualified, e.g., x.y.FOO() 5637 # This converts the final AST to a series of Dots leading to the function call 5638 # https://cloud.google.com/bigquery/docs/reference/standard-sql/functions-reference#function_call_rules 5639 if isinstance(field, (exp.Func, exp.Window)) and this: 5640 this = this.transform( 5641 lambda n: n.to_dot(include_dots=False) if isinstance(n, exp.Column) else n 5642 ) 5643 5644 if op: 5645 this = op(self, this, field) 5646 elif isinstance(this, exp.Column) and not this.args.get("catalog"): 5647 this = self.expression( 5648 exp.Column, 5649 comments=this.comments, 5650 this=field, 5651 table=this.this, 5652 db=this.args.get("table"), 5653 catalog=this.args.get("db"), 5654 ) 5655 elif isinstance(field, exp.Window): 5656 # Move the exp.Dot's to the window's function 5657 window_func = self.expression(exp.Dot, this=this, expression=field.this) 5658 field.set("this", window_func) 5659 this = field 5660 else: 5661 this = self.expression(exp.Dot, this=this, expression=field) 5662 5663 if field and field.comments: 5664 t.cast(exp.Expression, this).add_comments(field.pop_comments()) 5665 5666 this = self._parse_bracket(this) 5667 5668 return self._parse_colon_as_variant_extract(this) if self.COLON_IS_VARIANT_EXTRACT else this 5669 5670 def _parse_paren(self) -> t.Optional[exp.Expression]: 5671 if not self._match(TokenType.L_PAREN): 5672 return None 5673 5674 comments = self._prev_comments 5675 query = self._parse_select() 5676 5677 if query: 5678 expressions = [query] 5679 else: 5680 expressions = self._parse_expressions() 5681 5682 this = self._parse_query_modifiers(seq_get(expressions, 0)) 5683 5684 if not this and self._match(TokenType.R_PAREN, advance=False): 5685 this = self.expression(exp.Tuple) 5686 elif isinstance(this, exp.UNWRAPPED_QUERIES): 5687 this = self._parse_subquery(this=this, parse_alias=False) 5688 elif isinstance(this, exp.Subquery): 5689 this = self._parse_subquery(this=self._parse_set_operations(this), parse_alias=False) 5690 elif len(expressions) > 1 or self._prev.token_type == TokenType.COMMA: 5691 this = self.expression(exp.Tuple, expressions=expressions) 5692 else: 5693 this = self.expression(exp.Paren, this=this) 5694 5695 if this: 5696 this.add_comments(comments) 5697 5698 self._match_r_paren(expression=this) 5699 return this 5700 5701 def _parse_primary(self) -> t.Optional[exp.Expression]: 5702 if self._match_set(self.PRIMARY_PARSERS): 5703 token_type = self._prev.token_type 5704 primary = self.PRIMARY_PARSERS[token_type](self, self._prev) 5705 5706 if token_type == TokenType.STRING: 5707 expressions = [primary] 5708 while self._match(TokenType.STRING): 5709 expressions.append(exp.Literal.string(self._prev.text)) 5710 5711 if len(expressions) > 1: 5712 return self.expression(exp.Concat, expressions=expressions) 5713 5714 return primary 5715 5716 if self._match_pair(TokenType.DOT, TokenType.NUMBER): 5717 return exp.Literal.number(f"0.{self._prev.text}") 5718 5719 return self._parse_paren() 5720 5721 def _parse_field( 5722 self, 5723 any_token: bool = False, 5724 tokens: t.Optional[t.Collection[TokenType]] = None, 5725 anonymous_func: bool = False, 5726 ) -> t.Optional[exp.Expression]: 5727 if anonymous_func: 5728 field = ( 5729 self._parse_function(anonymous=anonymous_func, any_token=any_token) 5730 or self._parse_primary() 5731 ) 5732 else: 5733 field = self._parse_primary() or self._parse_function( 5734 anonymous=anonymous_func, any_token=any_token 5735 ) 5736 return field or self._parse_id_var(any_token=any_token, tokens=tokens) 5737 5738 def _parse_function( 5739 self, 5740 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5741 anonymous: bool = False, 5742 optional_parens: bool = True, 5743 any_token: bool = False, 5744 ) -> t.Optional[exp.Expression]: 5745 # This allows us to also parse {fn <function>} syntax (Snowflake, MySQL support this) 5746 # See: https://community.snowflake.com/s/article/SQL-Escape-Sequences 5747 fn_syntax = False 5748 if ( 5749 self._match(TokenType.L_BRACE, advance=False) 5750 and self._next 5751 and self._next.text.upper() == "FN" 5752 ): 5753 self._advance(2) 5754 fn_syntax = True 5755 5756 func = self._parse_function_call( 5757 functions=functions, 5758 anonymous=anonymous, 5759 optional_parens=optional_parens, 5760 any_token=any_token, 5761 ) 5762 5763 if fn_syntax: 5764 self._match(TokenType.R_BRACE) 5765 5766 return func 5767 5768 def _parse_function_call( 5769 self, 5770 functions: t.Optional[t.Dict[str, t.Callable]] = None, 5771 anonymous: bool = False, 5772 optional_parens: bool = True, 5773 any_token: bool = False, 5774 ) -> t.Optional[exp.Expression]: 5775 if not self._curr: 5776 return None 5777 5778 comments = self._curr.comments 5779 prev = self._prev 5780 token = self._curr 5781 token_type = self._curr.token_type 5782 this = self._curr.text 5783 upper = this.upper() 5784 5785 parser = self.NO_PAREN_FUNCTION_PARSERS.get(upper) 5786 if optional_parens and parser and token_type not in self.INVALID_FUNC_NAME_TOKENS: 5787 self._advance() 5788 return self._parse_window(parser(self)) 5789 5790 if not self._next or self._next.token_type != TokenType.L_PAREN: 5791 if optional_parens and token_type in self.NO_PAREN_FUNCTIONS: 5792 self._advance() 5793 return self.expression(self.NO_PAREN_FUNCTIONS[token_type]) 5794 5795 return None 5796 5797 if any_token: 5798 if token_type in self.RESERVED_TOKENS: 5799 return None 5800 elif token_type not in self.FUNC_TOKENS: 5801 return None 5802 5803 self._advance(2) 5804 5805 parser = self.FUNCTION_PARSERS.get(upper) 5806 if parser and not anonymous: 5807 this = parser(self) 5808 else: 5809 subquery_predicate = self.SUBQUERY_PREDICATES.get(token_type) 5810 5811 if subquery_predicate: 5812 expr = None 5813 if self._curr.token_type in (TokenType.SELECT, TokenType.WITH): 5814 expr = self._parse_select() 5815 self._match_r_paren() 5816 elif prev and prev.token_type in (TokenType.LIKE, TokenType.ILIKE): 5817 # Backtrack one token since we've consumed the L_PAREN here. Instead, we'd like 5818 # to parse "LIKE [ANY | ALL] (...)" as a whole into an exp.Tuple or exp.Paren 5819 self._advance(-1) 5820 expr = self._parse_bitwise() 5821 5822 if expr: 5823 return self.expression(subquery_predicate, comments=comments, this=expr) 5824 5825 if functions is None: 5826 functions = self.FUNCTIONS 5827 5828 function = functions.get(upper) 5829 known_function = function and not anonymous 5830 5831 alias = not known_function or upper in self.FUNCTIONS_WITH_ALIASED_ARGS 5832 args = self._parse_csv(lambda: self._parse_lambda(alias=alias)) 5833 5834 post_func_comments = self._curr and self._curr.comments 5835 if known_function and post_func_comments: 5836 # If the user-inputted comment "/* sqlglot.anonymous */" is following the function 5837 # call we'll construct it as exp.Anonymous, even if it's "known" 5838 if any( 5839 comment.lstrip().startswith(exp.SQLGLOT_ANONYMOUS) 5840 for comment in post_func_comments 5841 ): 5842 known_function = False 5843 5844 if alias and known_function: 5845 args = self._kv_to_prop_eq(args) 5846 5847 if known_function: 5848 func_builder = t.cast(t.Callable, function) 5849 5850 if "dialect" in func_builder.__code__.co_varnames: 5851 func = func_builder(args, dialect=self.dialect) 5852 else: 5853 func = func_builder(args) 5854 5855 func = self.validate_expression(func, args) 5856 if self.dialect.PRESERVE_ORIGINAL_NAMES: 5857 func.meta["name"] = this 5858 5859 this = func 5860 else: 5861 if token_type == TokenType.IDENTIFIER: 5862 this = exp.Identifier(this=this, quoted=True).update_positions(token) 5863 5864 this = self.expression(exp.Anonymous, this=this, expressions=args) 5865 this = this.update_positions(token) 5866 5867 if isinstance(this, exp.Expression): 5868 this.add_comments(comments) 5869 5870 self._match_r_paren(this) 5871 return self._parse_window(this) 5872 5873 def _to_prop_eq(self, expression: exp.Expression, index: int) -> exp.Expression: 5874 return expression 5875 5876 def _kv_to_prop_eq( 5877 self, expressions: t.List[exp.Expression], parse_map: bool = False 5878 ) -> t.List[exp.Expression]: 5879 transformed = [] 5880 5881 for index, e in enumerate(expressions): 5882 if isinstance(e, self.KEY_VALUE_DEFINITIONS): 5883 if isinstance(e, exp.Alias): 5884 e = self.expression(exp.PropertyEQ, this=e.args.get("alias"), expression=e.this) 5885 5886 if not isinstance(e, exp.PropertyEQ): 5887 e = self.expression( 5888 exp.PropertyEQ, 5889 this=e.this if parse_map else exp.to_identifier(e.this.name), 5890 expression=e.expression, 5891 ) 5892 5893 if isinstance(e.this, exp.Column): 5894 e.this.replace(e.this.this) 5895 else: 5896 e = self._to_prop_eq(e, index) 5897 5898 transformed.append(e) 5899 5900 return transformed 5901 5902 def _parse_user_defined_function_expression(self) -> t.Optional[exp.Expression]: 5903 return self._parse_statement() 5904 5905 def _parse_function_parameter(self) -> t.Optional[exp.Expression]: 5906 return self._parse_column_def(this=self._parse_id_var(), computed_column=False) 5907 5908 def _parse_user_defined_function( 5909 self, kind: t.Optional[TokenType] = None 5910 ) -> t.Optional[exp.Expression]: 5911 this = self._parse_table_parts(schema=True) 5912 5913 if not self._match(TokenType.L_PAREN): 5914 return this 5915 5916 expressions = self._parse_csv(self._parse_function_parameter) 5917 self._match_r_paren() 5918 return self.expression( 5919 exp.UserDefinedFunction, this=this, expressions=expressions, wrapped=True 5920 ) 5921 5922 def _parse_introducer(self, token: Token) -> exp.Introducer | exp.Identifier: 5923 literal = self._parse_primary() 5924 if literal: 5925 return self.expression(exp.Introducer, this=token.text, expression=literal) 5926 5927 return self._identifier_expression(token) 5928 5929 def _parse_session_parameter(self) -> exp.SessionParameter: 5930 kind = None 5931 this = self._parse_id_var() or self._parse_primary() 5932 5933 if this and self._match(TokenType.DOT): 5934 kind = this.name 5935 this = self._parse_var() or self._parse_primary() 5936 5937 return self.expression(exp.SessionParameter, this=this, kind=kind) 5938 5939 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 5940 return self._parse_id_var() 5941 5942 def _parse_lambda(self, alias: bool = False) -> t.Optional[exp.Expression]: 5943 index = self._index 5944 5945 if self._match(TokenType.L_PAREN): 5946 expressions = t.cast( 5947 t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_lambda_arg) 5948 ) 5949 5950 if not self._match(TokenType.R_PAREN): 5951 self._retreat(index) 5952 else: 5953 expressions = [self._parse_lambda_arg()] 5954 5955 if self._match_set(self.LAMBDAS): 5956 return self.LAMBDAS[self._prev.token_type](self, expressions) 5957 5958 self._retreat(index) 5959 5960 this: t.Optional[exp.Expression] 5961 5962 if self._match(TokenType.DISTINCT): 5963 this = self.expression( 5964 exp.Distinct, expressions=self._parse_csv(self._parse_assignment) 5965 ) 5966 else: 5967 this = self._parse_select_or_expression(alias=alias) 5968 5969 return self._parse_limit( 5970 self._parse_order(self._parse_having_max(self._parse_respect_or_ignore_nulls(this))) 5971 ) 5972 5973 def _parse_schema(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 5974 index = self._index 5975 if not self._match(TokenType.L_PAREN): 5976 return this 5977 5978 # Disambiguate between schema and subquery/CTE, e.g. in INSERT INTO table (<expr>), 5979 # expr can be of both types 5980 if self._match_set(self.SELECT_START_TOKENS): 5981 self._retreat(index) 5982 return this 5983 args = self._parse_csv(lambda: self._parse_constraint() or self._parse_field_def()) 5984 self._match_r_paren() 5985 return self.expression(exp.Schema, this=this, expressions=args) 5986 5987 def _parse_field_def(self) -> t.Optional[exp.Expression]: 5988 return self._parse_column_def(self._parse_field(any_token=True)) 5989 5990 def _parse_column_def( 5991 self, this: t.Optional[exp.Expression], computed_column: bool = True 5992 ) -> t.Optional[exp.Expression]: 5993 # column defs are not really columns, they're identifiers 5994 if isinstance(this, exp.Column): 5995 this = this.this 5996 5997 if not computed_column: 5998 self._match(TokenType.ALIAS) 5999 6000 kind = self._parse_types(schema=True) 6001 6002 if self._match_text_seq("FOR", "ORDINALITY"): 6003 return self.expression(exp.ColumnDef, this=this, ordinality=True) 6004 6005 constraints: t.List[exp.Expression] = [] 6006 6007 if (not kind and self._match(TokenType.ALIAS)) or self._match_texts( 6008 ("ALIAS", "MATERIALIZED") 6009 ): 6010 persisted = self._prev.text.upper() == "MATERIALIZED" 6011 constraint_kind = exp.ComputedColumnConstraint( 6012 this=self._parse_assignment(), 6013 persisted=persisted or self._match_text_seq("PERSISTED"), 6014 not_null=self._match_pair(TokenType.NOT, TokenType.NULL), 6015 ) 6016 constraints.append(self.expression(exp.ColumnConstraint, kind=constraint_kind)) 6017 elif ( 6018 kind 6019 and self._match(TokenType.ALIAS, advance=False) 6020 and ( 6021 not self.WRAPPED_TRANSFORM_COLUMN_CONSTRAINT 6022 or (self._next and self._next.token_type == TokenType.L_PAREN) 6023 ) 6024 ): 6025 self._advance() 6026 constraints.append( 6027 self.expression( 6028 exp.ColumnConstraint, 6029 kind=exp.ComputedColumnConstraint( 6030 this=self._parse_disjunction(), 6031 persisted=self._match_texts(("STORED", "VIRTUAL")) 6032 and self._prev.text.upper() == "STORED", 6033 ), 6034 ) 6035 ) 6036 6037 while True: 6038 constraint = self._parse_column_constraint() 6039 if not constraint: 6040 break 6041 constraints.append(constraint) 6042 6043 if not kind and not constraints: 6044 return this 6045 6046 return self.expression(exp.ColumnDef, this=this, kind=kind, constraints=constraints) 6047 6048 def _parse_auto_increment( 6049 self, 6050 ) -> exp.GeneratedAsIdentityColumnConstraint | exp.AutoIncrementColumnConstraint: 6051 start = None 6052 increment = None 6053 order = None 6054 6055 if self._match(TokenType.L_PAREN, advance=False): 6056 args = self._parse_wrapped_csv(self._parse_bitwise) 6057 start = seq_get(args, 0) 6058 increment = seq_get(args, 1) 6059 elif self._match_text_seq("START"): 6060 start = self._parse_bitwise() 6061 self._match_text_seq("INCREMENT") 6062 increment = self._parse_bitwise() 6063 if self._match_text_seq("ORDER"): 6064 order = True 6065 elif self._match_text_seq("NOORDER"): 6066 order = False 6067 6068 if start and increment: 6069 return exp.GeneratedAsIdentityColumnConstraint( 6070 start=start, increment=increment, this=False, order=order 6071 ) 6072 6073 return exp.AutoIncrementColumnConstraint() 6074 6075 def _parse_auto_property(self) -> t.Optional[exp.AutoRefreshProperty]: 6076 if not self._match_text_seq("REFRESH"): 6077 self._retreat(self._index - 1) 6078 return None 6079 return self.expression(exp.AutoRefreshProperty, this=self._parse_var(upper=True)) 6080 6081 def _parse_compress(self) -> exp.CompressColumnConstraint: 6082 if self._match(TokenType.L_PAREN, advance=False): 6083 return self.expression( 6084 exp.CompressColumnConstraint, this=self._parse_wrapped_csv(self._parse_bitwise) 6085 ) 6086 6087 return self.expression(exp.CompressColumnConstraint, this=self._parse_bitwise()) 6088 6089 def _parse_generated_as_identity( 6090 self, 6091 ) -> ( 6092 exp.GeneratedAsIdentityColumnConstraint 6093 | exp.ComputedColumnConstraint 6094 | exp.GeneratedAsRowColumnConstraint 6095 ): 6096 if self._match_text_seq("BY", "DEFAULT"): 6097 on_null = self._match_pair(TokenType.ON, TokenType.NULL) 6098 this = self.expression( 6099 exp.GeneratedAsIdentityColumnConstraint, this=False, on_null=on_null 6100 ) 6101 else: 6102 self._match_text_seq("ALWAYS") 6103 this = self.expression(exp.GeneratedAsIdentityColumnConstraint, this=True) 6104 6105 self._match(TokenType.ALIAS) 6106 6107 if self._match_text_seq("ROW"): 6108 start = self._match_text_seq("START") 6109 if not start: 6110 self._match(TokenType.END) 6111 hidden = self._match_text_seq("HIDDEN") 6112 return self.expression(exp.GeneratedAsRowColumnConstraint, start=start, hidden=hidden) 6113 6114 identity = self._match_text_seq("IDENTITY") 6115 6116 if self._match(TokenType.L_PAREN): 6117 if self._match(TokenType.START_WITH): 6118 this.set("start", self._parse_bitwise()) 6119 if self._match_text_seq("INCREMENT", "BY"): 6120 this.set("increment", self._parse_bitwise()) 6121 if self._match_text_seq("MINVALUE"): 6122 this.set("minvalue", self._parse_bitwise()) 6123 if self._match_text_seq("MAXVALUE"): 6124 this.set("maxvalue", self._parse_bitwise()) 6125 6126 if self._match_text_seq("CYCLE"): 6127 this.set("cycle", True) 6128 elif self._match_text_seq("NO", "CYCLE"): 6129 this.set("cycle", False) 6130 6131 if not identity: 6132 this.set("expression", self._parse_range()) 6133 elif not this.args.get("start") and self._match(TokenType.NUMBER, advance=False): 6134 args = self._parse_csv(self._parse_bitwise) 6135 this.set("start", seq_get(args, 0)) 6136 this.set("increment", seq_get(args, 1)) 6137 6138 self._match_r_paren() 6139 6140 return this 6141 6142 def _parse_inline(self) -> exp.InlineLengthColumnConstraint: 6143 self._match_text_seq("LENGTH") 6144 return self.expression(exp.InlineLengthColumnConstraint, this=self._parse_bitwise()) 6145 6146 def _parse_not_constraint(self) -> t.Optional[exp.Expression]: 6147 if self._match_text_seq("NULL"): 6148 return self.expression(exp.NotNullColumnConstraint) 6149 if self._match_text_seq("CASESPECIFIC"): 6150 return self.expression(exp.CaseSpecificColumnConstraint, not_=True) 6151 if self._match_text_seq("FOR", "REPLICATION"): 6152 return self.expression(exp.NotForReplicationColumnConstraint) 6153 6154 # Unconsume the `NOT` token 6155 self._retreat(self._index - 1) 6156 return None 6157 6158 def _parse_column_constraint(self) -> t.Optional[exp.Expression]: 6159 this = self._match(TokenType.CONSTRAINT) and self._parse_id_var() 6160 6161 procedure_option_follows = ( 6162 self._match(TokenType.WITH, advance=False) 6163 and self._next 6164 and self._next.text.upper() in self.PROCEDURE_OPTIONS 6165 ) 6166 6167 if not procedure_option_follows and self._match_texts(self.CONSTRAINT_PARSERS): 6168 return self.expression( 6169 exp.ColumnConstraint, 6170 this=this, 6171 kind=self.CONSTRAINT_PARSERS[self._prev.text.upper()](self), 6172 ) 6173 6174 return this 6175 6176 def _parse_constraint(self) -> t.Optional[exp.Expression]: 6177 if not self._match(TokenType.CONSTRAINT): 6178 return self._parse_unnamed_constraint(constraints=self.SCHEMA_UNNAMED_CONSTRAINTS) 6179 6180 return self.expression( 6181 exp.Constraint, 6182 this=self._parse_id_var(), 6183 expressions=self._parse_unnamed_constraints(), 6184 ) 6185 6186 def _parse_unnamed_constraints(self) -> t.List[exp.Expression]: 6187 constraints = [] 6188 while True: 6189 constraint = self._parse_unnamed_constraint() or self._parse_function() 6190 if not constraint: 6191 break 6192 constraints.append(constraint) 6193 6194 return constraints 6195 6196 def _parse_unnamed_constraint( 6197 self, constraints: t.Optional[t.Collection[str]] = None 6198 ) -> t.Optional[exp.Expression]: 6199 if self._match(TokenType.IDENTIFIER, advance=False) or not self._match_texts( 6200 constraints or self.CONSTRAINT_PARSERS 6201 ): 6202 return None 6203 6204 constraint = self._prev.text.upper() 6205 if constraint not in self.CONSTRAINT_PARSERS: 6206 self.raise_error(f"No parser found for schema constraint {constraint}.") 6207 6208 return self.CONSTRAINT_PARSERS[constraint](self) 6209 6210 def _parse_unique_key(self) -> t.Optional[exp.Expression]: 6211 return self._parse_id_var(any_token=False) 6212 6213 def _parse_unique(self) -> exp.UniqueColumnConstraint: 6214 self._match_texts(("KEY", "INDEX")) 6215 return self.expression( 6216 exp.UniqueColumnConstraint, 6217 nulls=self._match_text_seq("NULLS", "NOT", "DISTINCT"), 6218 this=self._parse_schema(self._parse_unique_key()), 6219 index_type=self._match(TokenType.USING) and self._advance_any() and self._prev.text, 6220 on_conflict=self._parse_on_conflict(), 6221 options=self._parse_key_constraint_options(), 6222 ) 6223 6224 def _parse_key_constraint_options(self) -> t.List[str]: 6225 options = [] 6226 while True: 6227 if not self._curr: 6228 break 6229 6230 if self._match(TokenType.ON): 6231 action = None 6232 on = self._advance_any() and self._prev.text 6233 6234 if self._match_text_seq("NO", "ACTION"): 6235 action = "NO ACTION" 6236 elif self._match_text_seq("CASCADE"): 6237 action = "CASCADE" 6238 elif self._match_text_seq("RESTRICT"): 6239 action = "RESTRICT" 6240 elif self._match_pair(TokenType.SET, TokenType.NULL): 6241 action = "SET NULL" 6242 elif self._match_pair(TokenType.SET, TokenType.DEFAULT): 6243 action = "SET DEFAULT" 6244 else: 6245 self.raise_error("Invalid key constraint") 6246 6247 options.append(f"ON {on} {action}") 6248 else: 6249 var = self._parse_var_from_options( 6250 self.KEY_CONSTRAINT_OPTIONS, raise_unmatched=False 6251 ) 6252 if not var: 6253 break 6254 options.append(var.name) 6255 6256 return options 6257 6258 def _parse_references(self, match: bool = True) -> t.Optional[exp.Reference]: 6259 if match and not self._match(TokenType.REFERENCES): 6260 return None 6261 6262 expressions = None 6263 this = self._parse_table(schema=True) 6264 options = self._parse_key_constraint_options() 6265 return self.expression(exp.Reference, this=this, expressions=expressions, options=options) 6266 6267 def _parse_foreign_key(self) -> exp.ForeignKey: 6268 expressions = ( 6269 self._parse_wrapped_id_vars() 6270 if not self._match(TokenType.REFERENCES, advance=False) 6271 else None 6272 ) 6273 reference = self._parse_references() 6274 on_options = {} 6275 6276 while self._match(TokenType.ON): 6277 if not self._match_set((TokenType.DELETE, TokenType.UPDATE)): 6278 self.raise_error("Expected DELETE or UPDATE") 6279 6280 kind = self._prev.text.lower() 6281 6282 if self._match_text_seq("NO", "ACTION"): 6283 action = "NO ACTION" 6284 elif self._match(TokenType.SET): 6285 self._match_set((TokenType.NULL, TokenType.DEFAULT)) 6286 action = "SET " + self._prev.text.upper() 6287 else: 6288 self._advance() 6289 action = self._prev.text.upper() 6290 6291 on_options[kind] = action 6292 6293 return self.expression( 6294 exp.ForeignKey, 6295 expressions=expressions, 6296 reference=reference, 6297 options=self._parse_key_constraint_options(), 6298 **on_options, # type: ignore 6299 ) 6300 6301 def _parse_primary_key_part(self) -> t.Optional[exp.Expression]: 6302 return self._parse_ordered() or self._parse_field() 6303 6304 def _parse_period_for_system_time(self) -> t.Optional[exp.PeriodForSystemTimeConstraint]: 6305 if not self._match(TokenType.TIMESTAMP_SNAPSHOT): 6306 self._retreat(self._index - 1) 6307 return None 6308 6309 id_vars = self._parse_wrapped_id_vars() 6310 return self.expression( 6311 exp.PeriodForSystemTimeConstraint, 6312 this=seq_get(id_vars, 0), 6313 expression=seq_get(id_vars, 1), 6314 ) 6315 6316 def _parse_primary_key( 6317 self, wrapped_optional: bool = False, in_props: bool = False 6318 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 6319 desc = ( 6320 self._match_set((TokenType.ASC, TokenType.DESC)) 6321 and self._prev.token_type == TokenType.DESC 6322 ) 6323 6324 if not in_props and not self._match(TokenType.L_PAREN, advance=False): 6325 return self.expression( 6326 exp.PrimaryKeyColumnConstraint, 6327 desc=desc, 6328 options=self._parse_key_constraint_options(), 6329 ) 6330 6331 expressions = self._parse_wrapped_csv( 6332 self._parse_primary_key_part, optional=wrapped_optional 6333 ) 6334 6335 return self.expression( 6336 exp.PrimaryKey, 6337 expressions=expressions, 6338 include=self._parse_index_params(), 6339 options=self._parse_key_constraint_options(), 6340 ) 6341 6342 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 6343 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 6344 6345 def _parse_odbc_datetime_literal(self) -> exp.Expression: 6346 """ 6347 Parses a datetime column in ODBC format. We parse the column into the corresponding 6348 types, for example `{d'yyyy-mm-dd'}` will be parsed as a `Date` column, exactly the 6349 same as we did for `DATE('yyyy-mm-dd')`. 6350 6351 Reference: 6352 https://learn.microsoft.com/en-us/sql/odbc/reference/develop-app/date-time-and-timestamp-literals 6353 """ 6354 self._match(TokenType.VAR) 6355 exp_class = self.ODBC_DATETIME_LITERALS[self._prev.text.lower()] 6356 expression = self.expression(exp_class=exp_class, this=self._parse_string()) 6357 if not self._match(TokenType.R_BRACE): 6358 self.raise_error("Expected }") 6359 return expression 6360 6361 def _parse_bracket(self, this: t.Optional[exp.Expression] = None) -> t.Optional[exp.Expression]: 6362 if not self._match_set((TokenType.L_BRACKET, TokenType.L_BRACE)): 6363 return this 6364 6365 if self.MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS: 6366 map_token = seq_get(self._tokens, self._index - 2) 6367 parse_map = map_token is not None and map_token.text.upper() == "MAP" 6368 else: 6369 parse_map = False 6370 6371 bracket_kind = self._prev.token_type 6372 if ( 6373 bracket_kind == TokenType.L_BRACE 6374 and self._curr 6375 and self._curr.token_type == TokenType.VAR 6376 and self._curr.text.lower() in self.ODBC_DATETIME_LITERALS 6377 ): 6378 return self._parse_odbc_datetime_literal() 6379 6380 expressions = self._parse_csv( 6381 lambda: self._parse_bracket_key_value(is_map=bracket_kind == TokenType.L_BRACE) 6382 ) 6383 6384 if bracket_kind == TokenType.L_BRACKET and not self._match(TokenType.R_BRACKET): 6385 self.raise_error("Expected ]") 6386 elif bracket_kind == TokenType.L_BRACE and not self._match(TokenType.R_BRACE): 6387 self.raise_error("Expected }") 6388 6389 # https://duckdb.org/docs/sql/data_types/struct.html#creating-structs 6390 if bracket_kind == TokenType.L_BRACE: 6391 this = self.expression( 6392 exp.Struct, 6393 expressions=self._kv_to_prop_eq(expressions=expressions, parse_map=parse_map), 6394 ) 6395 elif not this: 6396 this = build_array_constructor( 6397 exp.Array, args=expressions, bracket_kind=bracket_kind, dialect=self.dialect 6398 ) 6399 else: 6400 constructor_type = self.ARRAY_CONSTRUCTORS.get(this.name.upper()) 6401 if constructor_type: 6402 return build_array_constructor( 6403 constructor_type, 6404 args=expressions, 6405 bracket_kind=bracket_kind, 6406 dialect=self.dialect, 6407 ) 6408 6409 expressions = apply_index_offset( 6410 this, expressions, -self.dialect.INDEX_OFFSET, dialect=self.dialect 6411 ) 6412 this = self.expression( 6413 exp.Bracket, 6414 this=this, 6415 expressions=expressions, 6416 comments=this.pop_comments(), 6417 ) 6418 6419 self._add_comments(this) 6420 return self._parse_bracket(this) 6421 6422 def _parse_slice(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6423 if self._match(TokenType.COLON): 6424 return self.expression(exp.Slice, this=this, expression=self._parse_assignment()) 6425 return this 6426 6427 def _parse_case(self) -> t.Optional[exp.Expression]: 6428 ifs = [] 6429 default = None 6430 6431 comments = self._prev_comments 6432 expression = self._parse_assignment() 6433 6434 while self._match(TokenType.WHEN): 6435 this = self._parse_assignment() 6436 self._match(TokenType.THEN) 6437 then = self._parse_assignment() 6438 ifs.append(self.expression(exp.If, this=this, true=then)) 6439 6440 if self._match(TokenType.ELSE): 6441 default = self._parse_assignment() 6442 6443 if not self._match(TokenType.END): 6444 if isinstance(default, exp.Interval) and default.this.sql().upper() == "END": 6445 default = exp.column("interval") 6446 else: 6447 self.raise_error("Expected END after CASE", self._prev) 6448 6449 return self.expression( 6450 exp.Case, comments=comments, this=expression, ifs=ifs, default=default 6451 ) 6452 6453 def _parse_if(self) -> t.Optional[exp.Expression]: 6454 if self._match(TokenType.L_PAREN): 6455 args = self._parse_csv( 6456 lambda: self._parse_alias(self._parse_assignment(), explicit=True) 6457 ) 6458 this = self.validate_expression(exp.If.from_arg_list(args), args) 6459 self._match_r_paren() 6460 else: 6461 index = self._index - 1 6462 6463 if self.NO_PAREN_IF_COMMANDS and index == 0: 6464 return self._parse_as_command(self._prev) 6465 6466 condition = self._parse_assignment() 6467 6468 if not condition: 6469 self._retreat(index) 6470 return None 6471 6472 self._match(TokenType.THEN) 6473 true = self._parse_assignment() 6474 false = self._parse_assignment() if self._match(TokenType.ELSE) else None 6475 self._match(TokenType.END) 6476 this = self.expression(exp.If, this=condition, true=true, false=false) 6477 6478 return this 6479 6480 def _parse_next_value_for(self) -> t.Optional[exp.Expression]: 6481 if not self._match_text_seq("VALUE", "FOR"): 6482 self._retreat(self._index - 1) 6483 return None 6484 6485 return self.expression( 6486 exp.NextValueFor, 6487 this=self._parse_column(), 6488 order=self._match(TokenType.OVER) and self._parse_wrapped(self._parse_order), 6489 ) 6490 6491 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 6492 this = self._parse_function() or self._parse_var_or_string(upper=True) 6493 6494 if self._match(TokenType.FROM): 6495 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6496 6497 if not self._match(TokenType.COMMA): 6498 self.raise_error("Expected FROM or comma after EXTRACT", self._prev) 6499 6500 return self.expression(exp.Extract, this=this, expression=self._parse_bitwise()) 6501 6502 def _parse_gap_fill(self) -> exp.GapFill: 6503 self._match(TokenType.TABLE) 6504 this = self._parse_table() 6505 6506 self._match(TokenType.COMMA) 6507 args = [this, *self._parse_csv(self._parse_lambda)] 6508 6509 gap_fill = exp.GapFill.from_arg_list(args) 6510 return self.validate_expression(gap_fill, args) 6511 6512 def _parse_cast(self, strict: bool, safe: t.Optional[bool] = None) -> exp.Expression: 6513 this = self._parse_assignment() 6514 6515 if not self._match(TokenType.ALIAS): 6516 if self._match(TokenType.COMMA): 6517 return self.expression(exp.CastToStrType, this=this, to=self._parse_string()) 6518 6519 self.raise_error("Expected AS after CAST") 6520 6521 fmt = None 6522 to = self._parse_types() 6523 6524 default = self._match(TokenType.DEFAULT) 6525 if default: 6526 default = self._parse_bitwise() 6527 self._match_text_seq("ON", "CONVERSION", "ERROR") 6528 6529 if self._match_set((TokenType.FORMAT, TokenType.COMMA)): 6530 fmt_string = self._parse_string() 6531 fmt = self._parse_at_time_zone(fmt_string) 6532 6533 if not to: 6534 to = exp.DataType.build(exp.DataType.Type.UNKNOWN) 6535 if to.this in exp.DataType.TEMPORAL_TYPES: 6536 this = self.expression( 6537 exp.StrToDate if to.this == exp.DataType.Type.DATE else exp.StrToTime, 6538 this=this, 6539 format=exp.Literal.string( 6540 format_time( 6541 fmt_string.this if fmt_string else "", 6542 self.dialect.FORMAT_MAPPING or self.dialect.TIME_MAPPING, 6543 self.dialect.FORMAT_TRIE or self.dialect.TIME_TRIE, 6544 ) 6545 ), 6546 safe=safe, 6547 ) 6548 6549 if isinstance(fmt, exp.AtTimeZone) and isinstance(this, exp.StrToTime): 6550 this.set("zone", fmt.args["zone"]) 6551 return this 6552 elif not to: 6553 self.raise_error("Expected TYPE after CAST") 6554 elif isinstance(to, exp.Identifier): 6555 to = exp.DataType.build(to.name, udt=True) 6556 elif to.this == exp.DataType.Type.CHAR: 6557 if self._match(TokenType.CHARACTER_SET): 6558 to = self.expression(exp.CharacterSet, this=self._parse_var_or_string()) 6559 6560 return self.build_cast( 6561 strict=strict, 6562 this=this, 6563 to=to, 6564 format=fmt, 6565 safe=safe, 6566 action=self._parse_var_from_options(self.CAST_ACTIONS, raise_unmatched=False), 6567 default=default, 6568 ) 6569 6570 def _parse_string_agg(self) -> exp.GroupConcat: 6571 if self._match(TokenType.DISTINCT): 6572 args: t.List[t.Optional[exp.Expression]] = [ 6573 self.expression(exp.Distinct, expressions=[self._parse_assignment()]) 6574 ] 6575 if self._match(TokenType.COMMA): 6576 args.extend(self._parse_csv(self._parse_assignment)) 6577 else: 6578 args = self._parse_csv(self._parse_assignment) # type: ignore 6579 6580 if self._match_text_seq("ON", "OVERFLOW"): 6581 # trino: LISTAGG(expression [, separator] [ON OVERFLOW overflow_behavior]) 6582 if self._match_text_seq("ERROR"): 6583 on_overflow: t.Optional[exp.Expression] = exp.var("ERROR") 6584 else: 6585 self._match_text_seq("TRUNCATE") 6586 on_overflow = self.expression( 6587 exp.OverflowTruncateBehavior, 6588 this=self._parse_string(), 6589 with_count=( 6590 self._match_text_seq("WITH", "COUNT") 6591 or not self._match_text_seq("WITHOUT", "COUNT") 6592 ), 6593 ) 6594 else: 6595 on_overflow = None 6596 6597 index = self._index 6598 if not self._match(TokenType.R_PAREN) and args: 6599 # postgres: STRING_AGG([DISTINCT] expression, separator [ORDER BY expression1 {ASC | DESC} [, ...]]) 6600 # bigquery: STRING_AGG([DISTINCT] expression [, separator] [ORDER BY key [{ASC | DESC}] [, ... ]] [LIMIT n]) 6601 # The order is parsed through `this` as a canonicalization for WITHIN GROUPs 6602 args[0] = self._parse_limit(this=self._parse_order(this=args[0])) 6603 return self.expression(exp.GroupConcat, this=args[0], separator=seq_get(args, 1)) 6604 6605 # Checks if we can parse an order clause: WITHIN GROUP (ORDER BY <order_by_expression_list> [ASC | DESC]). 6606 # This is done "manually", instead of letting _parse_window parse it into an exp.WithinGroup node, so that 6607 # the STRING_AGG call is parsed like in MySQL / SQLite and can thus be transpiled more easily to them. 6608 if not self._match_text_seq("WITHIN", "GROUP"): 6609 self._retreat(index) 6610 return self.validate_expression(exp.GroupConcat.from_arg_list(args), args) 6611 6612 # The corresponding match_r_paren will be called in parse_function (caller) 6613 self._match_l_paren() 6614 6615 return self.expression( 6616 exp.GroupConcat, 6617 this=self._parse_order(this=seq_get(args, 0)), 6618 separator=seq_get(args, 1), 6619 on_overflow=on_overflow, 6620 ) 6621 6622 def _parse_convert( 6623 self, strict: bool, safe: t.Optional[bool] = None 6624 ) -> t.Optional[exp.Expression]: 6625 this = self._parse_bitwise() 6626 6627 if self._match(TokenType.USING): 6628 to: t.Optional[exp.Expression] = self.expression( 6629 exp.CharacterSet, this=self._parse_var() 6630 ) 6631 elif self._match(TokenType.COMMA): 6632 to = self._parse_types() 6633 else: 6634 to = None 6635 6636 return self.build_cast(strict=strict, this=this, to=to, safe=safe) 6637 6638 def _parse_xml_table(self) -> exp.XMLTable: 6639 namespaces = None 6640 passing = None 6641 columns = None 6642 6643 if self._match_text_seq("XMLNAMESPACES", "("): 6644 namespaces = self._parse_xml_namespace() 6645 self._match_text_seq(")", ",") 6646 6647 this = self._parse_string() 6648 6649 if self._match_text_seq("PASSING"): 6650 # The BY VALUE keywords are optional and are provided for semantic clarity 6651 self._match_text_seq("BY", "VALUE") 6652 passing = self._parse_csv(self._parse_column) 6653 6654 by_ref = self._match_text_seq("RETURNING", "SEQUENCE", "BY", "REF") 6655 6656 if self._match_text_seq("COLUMNS"): 6657 columns = self._parse_csv(self._parse_field_def) 6658 6659 return self.expression( 6660 exp.XMLTable, 6661 this=this, 6662 namespaces=namespaces, 6663 passing=passing, 6664 columns=columns, 6665 by_ref=by_ref, 6666 ) 6667 6668 def _parse_xml_namespace(self) -> t.List[exp.XMLNamespace]: 6669 namespaces = [] 6670 6671 while True: 6672 if self._match(TokenType.DEFAULT): 6673 uri = self._parse_string() 6674 else: 6675 uri = self._parse_alias(self._parse_string()) 6676 namespaces.append(self.expression(exp.XMLNamespace, this=uri)) 6677 if not self._match(TokenType.COMMA): 6678 break 6679 6680 return namespaces 6681 6682 def _parse_decode(self) -> t.Optional[exp.Decode | exp.DecodeCase]: 6683 args = self._parse_csv(self._parse_assignment) 6684 6685 if len(args) < 3: 6686 return self.expression(exp.Decode, this=seq_get(args, 0), charset=seq_get(args, 1)) 6687 6688 return self.expression(exp.DecodeCase, expressions=args) 6689 6690 def _parse_json_key_value(self) -> t.Optional[exp.JSONKeyValue]: 6691 self._match_text_seq("KEY") 6692 key = self._parse_column() 6693 self._match_set(self.JSON_KEY_VALUE_SEPARATOR_TOKENS) 6694 self._match_text_seq("VALUE") 6695 value = self._parse_bitwise() 6696 6697 if not key and not value: 6698 return None 6699 return self.expression(exp.JSONKeyValue, this=key, expression=value) 6700 6701 def _parse_format_json(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6702 if not this or not self._match_text_seq("FORMAT", "JSON"): 6703 return this 6704 6705 return self.expression(exp.FormatJson, this=this) 6706 6707 def _parse_on_condition(self) -> t.Optional[exp.OnCondition]: 6708 # MySQL uses "X ON EMPTY Y ON ERROR" (e.g. JSON_VALUE) while Oracle uses the opposite (e.g. JSON_EXISTS) 6709 if self.dialect.ON_CONDITION_EMPTY_BEFORE_ERROR: 6710 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6711 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6712 else: 6713 error = self._parse_on_handling("ERROR", *self.ON_CONDITION_TOKENS) 6714 empty = self._parse_on_handling("EMPTY", *self.ON_CONDITION_TOKENS) 6715 6716 null = self._parse_on_handling("NULL", *self.ON_CONDITION_TOKENS) 6717 6718 if not empty and not error and not null: 6719 return None 6720 6721 return self.expression( 6722 exp.OnCondition, 6723 empty=empty, 6724 error=error, 6725 null=null, 6726 ) 6727 6728 def _parse_on_handling( 6729 self, on: str, *values: str 6730 ) -> t.Optional[str] | t.Optional[exp.Expression]: 6731 # Parses the "X ON Y" or "DEFAULT <expr> ON Y syntax, e.g. NULL ON NULL (Oracle, T-SQL, MySQL) 6732 for value in values: 6733 if self._match_text_seq(value, "ON", on): 6734 return f"{value} ON {on}" 6735 6736 index = self._index 6737 if self._match(TokenType.DEFAULT): 6738 default_value = self._parse_bitwise() 6739 if self._match_text_seq("ON", on): 6740 return default_value 6741 6742 self._retreat(index) 6743 6744 return None 6745 6746 @t.overload 6747 def _parse_json_object(self, agg: Lit[False]) -> exp.JSONObject: ... 6748 6749 @t.overload 6750 def _parse_json_object(self, agg: Lit[True]) -> exp.JSONObjectAgg: ... 6751 6752 def _parse_json_object(self, agg=False): 6753 star = self._parse_star() 6754 expressions = ( 6755 [star] 6756 if star 6757 else self._parse_csv(lambda: self._parse_format_json(self._parse_json_key_value())) 6758 ) 6759 null_handling = self._parse_on_handling("NULL", "NULL", "ABSENT") 6760 6761 unique_keys = None 6762 if self._match_text_seq("WITH", "UNIQUE"): 6763 unique_keys = True 6764 elif self._match_text_seq("WITHOUT", "UNIQUE"): 6765 unique_keys = False 6766 6767 self._match_text_seq("KEYS") 6768 6769 return_type = self._match_text_seq("RETURNING") and self._parse_format_json( 6770 self._parse_type() 6771 ) 6772 encoding = self._match_text_seq("ENCODING") and self._parse_var() 6773 6774 return self.expression( 6775 exp.JSONObjectAgg if agg else exp.JSONObject, 6776 expressions=expressions, 6777 null_handling=null_handling, 6778 unique_keys=unique_keys, 6779 return_type=return_type, 6780 encoding=encoding, 6781 ) 6782 6783 # Note: this is currently incomplete; it only implements the "JSON_value_column" part 6784 def _parse_json_column_def(self) -> exp.JSONColumnDef: 6785 if not self._match_text_seq("NESTED"): 6786 this = self._parse_id_var() 6787 kind = self._parse_types(allow_identifiers=False) 6788 nested = None 6789 else: 6790 this = None 6791 kind = None 6792 nested = True 6793 6794 path = self._match_text_seq("PATH") and self._parse_string() 6795 nested_schema = nested and self._parse_json_schema() 6796 6797 return self.expression( 6798 exp.JSONColumnDef, 6799 this=this, 6800 kind=kind, 6801 path=path, 6802 nested_schema=nested_schema, 6803 ) 6804 6805 def _parse_json_schema(self) -> exp.JSONSchema: 6806 self._match_text_seq("COLUMNS") 6807 return self.expression( 6808 exp.JSONSchema, 6809 expressions=self._parse_wrapped_csv(self._parse_json_column_def, optional=True), 6810 ) 6811 6812 def _parse_json_table(self) -> exp.JSONTable: 6813 this = self._parse_format_json(self._parse_bitwise()) 6814 path = self._match(TokenType.COMMA) and self._parse_string() 6815 error_handling = self._parse_on_handling("ERROR", "ERROR", "NULL") 6816 empty_handling = self._parse_on_handling("EMPTY", "ERROR", "NULL") 6817 schema = self._parse_json_schema() 6818 6819 return exp.JSONTable( 6820 this=this, 6821 schema=schema, 6822 path=path, 6823 error_handling=error_handling, 6824 empty_handling=empty_handling, 6825 ) 6826 6827 def _parse_match_against(self) -> exp.MatchAgainst: 6828 expressions = self._parse_csv(self._parse_column) 6829 6830 self._match_text_seq(")", "AGAINST", "(") 6831 6832 this = self._parse_string() 6833 6834 if self._match_text_seq("IN", "NATURAL", "LANGUAGE", "MODE"): 6835 modifier = "IN NATURAL LANGUAGE MODE" 6836 if self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6837 modifier = f"{modifier} WITH QUERY EXPANSION" 6838 elif self._match_text_seq("IN", "BOOLEAN", "MODE"): 6839 modifier = "IN BOOLEAN MODE" 6840 elif self._match_text_seq("WITH", "QUERY", "EXPANSION"): 6841 modifier = "WITH QUERY EXPANSION" 6842 else: 6843 modifier = None 6844 6845 return self.expression( 6846 exp.MatchAgainst, this=this, expressions=expressions, modifier=modifier 6847 ) 6848 6849 # https://learn.microsoft.com/en-us/sql/t-sql/functions/openjson-transact-sql?view=sql-server-ver16 6850 def _parse_open_json(self) -> exp.OpenJSON: 6851 this = self._parse_bitwise() 6852 path = self._match(TokenType.COMMA) and self._parse_string() 6853 6854 def _parse_open_json_column_def() -> exp.OpenJSONColumnDef: 6855 this = self._parse_field(any_token=True) 6856 kind = self._parse_types() 6857 path = self._parse_string() 6858 as_json = self._match_pair(TokenType.ALIAS, TokenType.JSON) 6859 6860 return self.expression( 6861 exp.OpenJSONColumnDef, this=this, kind=kind, path=path, as_json=as_json 6862 ) 6863 6864 expressions = None 6865 if self._match_pair(TokenType.R_PAREN, TokenType.WITH): 6866 self._match_l_paren() 6867 expressions = self._parse_csv(_parse_open_json_column_def) 6868 6869 return self.expression(exp.OpenJSON, this=this, path=path, expressions=expressions) 6870 6871 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 6872 args = self._parse_csv(self._parse_bitwise) 6873 6874 if self._match(TokenType.IN): 6875 return self.expression( 6876 exp.StrPosition, this=self._parse_bitwise(), substr=seq_get(args, 0) 6877 ) 6878 6879 if haystack_first: 6880 haystack = seq_get(args, 0) 6881 needle = seq_get(args, 1) 6882 else: 6883 haystack = seq_get(args, 1) 6884 needle = seq_get(args, 0) 6885 6886 return self.expression( 6887 exp.StrPosition, this=haystack, substr=needle, position=seq_get(args, 2) 6888 ) 6889 6890 def _parse_predict(self) -> exp.Predict: 6891 self._match_text_seq("MODEL") 6892 this = self._parse_table() 6893 6894 self._match(TokenType.COMMA) 6895 self._match_text_seq("TABLE") 6896 6897 return self.expression( 6898 exp.Predict, 6899 this=this, 6900 expression=self._parse_table(), 6901 params_struct=self._match(TokenType.COMMA) and self._parse_bitwise(), 6902 ) 6903 6904 def _parse_join_hint(self, func_name: str) -> exp.JoinHint: 6905 args = self._parse_csv(self._parse_table) 6906 return exp.JoinHint(this=func_name.upper(), expressions=args) 6907 6908 def _parse_substring(self) -> exp.Substring: 6909 # Postgres supports the form: substring(string [from int] [for int]) 6910 # https://www.postgresql.org/docs/9.1/functions-string.html @ Table 9-6 6911 6912 args = t.cast(t.List[t.Optional[exp.Expression]], self._parse_csv(self._parse_bitwise)) 6913 6914 if self._match(TokenType.FROM): 6915 args.append(self._parse_bitwise()) 6916 if self._match(TokenType.FOR): 6917 if len(args) == 1: 6918 args.append(exp.Literal.number(1)) 6919 args.append(self._parse_bitwise()) 6920 6921 return self.validate_expression(exp.Substring.from_arg_list(args), args) 6922 6923 def _parse_trim(self) -> exp.Trim: 6924 # https://www.w3resource.com/sql/character-functions/trim.php 6925 # https://docs.oracle.com/javadb/10.8.3.0/ref/rreftrimfunc.html 6926 6927 position = None 6928 collation = None 6929 expression = None 6930 6931 if self._match_texts(self.TRIM_TYPES): 6932 position = self._prev.text.upper() 6933 6934 this = self._parse_bitwise() 6935 if self._match_set((TokenType.FROM, TokenType.COMMA)): 6936 invert_order = self._prev.token_type == TokenType.FROM or self.TRIM_PATTERN_FIRST 6937 expression = self._parse_bitwise() 6938 6939 if invert_order: 6940 this, expression = expression, this 6941 6942 if self._match(TokenType.COLLATE): 6943 collation = self._parse_bitwise() 6944 6945 return self.expression( 6946 exp.Trim, this=this, position=position, expression=expression, collation=collation 6947 ) 6948 6949 def _parse_window_clause(self) -> t.Optional[t.List[exp.Expression]]: 6950 return self._match(TokenType.WINDOW) and self._parse_csv(self._parse_named_window) 6951 6952 def _parse_named_window(self) -> t.Optional[exp.Expression]: 6953 return self._parse_window(self._parse_id_var(), alias=True) 6954 6955 def _parse_respect_or_ignore_nulls( 6956 self, this: t.Optional[exp.Expression] 6957 ) -> t.Optional[exp.Expression]: 6958 if self._match_text_seq("IGNORE", "NULLS"): 6959 return self.expression(exp.IgnoreNulls, this=this) 6960 if self._match_text_seq("RESPECT", "NULLS"): 6961 return self.expression(exp.RespectNulls, this=this) 6962 return this 6963 6964 def _parse_having_max(self, this: t.Optional[exp.Expression]) -> t.Optional[exp.Expression]: 6965 if self._match(TokenType.HAVING): 6966 self._match_texts(("MAX", "MIN")) 6967 max = self._prev.text.upper() != "MIN" 6968 return self.expression( 6969 exp.HavingMax, this=this, expression=self._parse_column(), max=max 6970 ) 6971 6972 return this 6973 6974 def _parse_window( 6975 self, this: t.Optional[exp.Expression], alias: bool = False 6976 ) -> t.Optional[exp.Expression]: 6977 func = this 6978 comments = func.comments if isinstance(func, exp.Expression) else None 6979 6980 # T-SQL allows the OVER (...) syntax after WITHIN GROUP. 6981 # https://learn.microsoft.com/en-us/sql/t-sql/functions/percentile-disc-transact-sql?view=sql-server-ver16 6982 if self._match_text_seq("WITHIN", "GROUP"): 6983 order = self._parse_wrapped(self._parse_order) 6984 this = self.expression(exp.WithinGroup, this=this, expression=order) 6985 6986 if self._match_pair(TokenType.FILTER, TokenType.L_PAREN): 6987 self._match(TokenType.WHERE) 6988 this = self.expression( 6989 exp.Filter, this=this, expression=self._parse_where(skip_where_token=True) 6990 ) 6991 self._match_r_paren() 6992 6993 # SQL spec defines an optional [ { IGNORE | RESPECT } NULLS ] OVER 6994 # Some dialects choose to implement and some do not. 6995 # https://dev.mysql.com/doc/refman/8.0/en/window-function-descriptions.html 6996 6997 # There is some code above in _parse_lambda that handles 6998 # SELECT FIRST_VALUE(TABLE.COLUMN IGNORE|RESPECT NULLS) OVER ... 6999 7000 # The below changes handle 7001 # SELECT FIRST_VALUE(TABLE.COLUMN) IGNORE|RESPECT NULLS OVER ... 7002 7003 # Oracle allows both formats 7004 # (https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/img_text/first_value.html) 7005 # and Snowflake chose to do the same for familiarity 7006 # https://docs.snowflake.com/en/sql-reference/functions/first_value.html#usage-notes 7007 if isinstance(this, exp.AggFunc): 7008 ignore_respect = this.find(exp.IgnoreNulls, exp.RespectNulls) 7009 7010 if ignore_respect and ignore_respect is not this: 7011 ignore_respect.replace(ignore_respect.this) 7012 this = self.expression(ignore_respect.__class__, this=this) 7013 7014 this = self._parse_respect_or_ignore_nulls(this) 7015 7016 # bigquery select from window x AS (partition by ...) 7017 if alias: 7018 over = None 7019 self._match(TokenType.ALIAS) 7020 elif not self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS): 7021 return this 7022 else: 7023 over = self._prev.text.upper() 7024 7025 if comments and isinstance(func, exp.Expression): 7026 func.pop_comments() 7027 7028 if not self._match(TokenType.L_PAREN): 7029 return self.expression( 7030 exp.Window, 7031 comments=comments, 7032 this=this, 7033 alias=self._parse_id_var(False), 7034 over=over, 7035 ) 7036 7037 window_alias = self._parse_id_var(any_token=False, tokens=self.WINDOW_ALIAS_TOKENS) 7038 7039 first = self._match(TokenType.FIRST) 7040 if self._match_text_seq("LAST"): 7041 first = False 7042 7043 partition, order = self._parse_partition_and_order() 7044 kind = self._match_set((TokenType.ROWS, TokenType.RANGE)) and self._prev.text 7045 7046 if kind: 7047 self._match(TokenType.BETWEEN) 7048 start = self._parse_window_spec() 7049 self._match(TokenType.AND) 7050 end = self._parse_window_spec() 7051 exclude = ( 7052 self._parse_var_from_options(self.WINDOW_EXCLUDE_OPTIONS) 7053 if self._match_text_seq("EXCLUDE") 7054 else None 7055 ) 7056 7057 spec = self.expression( 7058 exp.WindowSpec, 7059 kind=kind, 7060 start=start["value"], 7061 start_side=start["side"], 7062 end=end["value"], 7063 end_side=end["side"], 7064 exclude=exclude, 7065 ) 7066 else: 7067 spec = None 7068 7069 self._match_r_paren() 7070 7071 window = self.expression( 7072 exp.Window, 7073 comments=comments, 7074 this=this, 7075 partition_by=partition, 7076 order=order, 7077 spec=spec, 7078 alias=window_alias, 7079 over=over, 7080 first=first, 7081 ) 7082 7083 # This covers Oracle's FIRST/LAST syntax: aggregate KEEP (...) OVER (...) 7084 if self._match_set(self.WINDOW_BEFORE_PAREN_TOKENS, advance=False): 7085 return self._parse_window(window, alias=alias) 7086 7087 return window 7088 7089 def _parse_partition_and_order( 7090 self, 7091 ) -> t.Tuple[t.List[exp.Expression], t.Optional[exp.Expression]]: 7092 return self._parse_partition_by(), self._parse_order() 7093 7094 def _parse_window_spec(self) -> t.Dict[str, t.Optional[str | exp.Expression]]: 7095 self._match(TokenType.BETWEEN) 7096 7097 return { 7098 "value": ( 7099 (self._match_text_seq("UNBOUNDED") and "UNBOUNDED") 7100 or (self._match_text_seq("CURRENT", "ROW") and "CURRENT ROW") 7101 or self._parse_bitwise() 7102 ), 7103 "side": self._match_texts(self.WINDOW_SIDES) and self._prev.text, 7104 } 7105 7106 def _parse_alias( 7107 self, this: t.Optional[exp.Expression], explicit: bool = False 7108 ) -> t.Optional[exp.Expression]: 7109 # In some dialects, LIMIT and OFFSET can act as both identifiers and keywords (clauses) 7110 # so this section tries to parse the clause version and if it fails, it treats the token 7111 # as an identifier (alias) 7112 if self._can_parse_limit_or_offset(): 7113 return this 7114 7115 any_token = self._match(TokenType.ALIAS) 7116 comments = self._prev_comments or [] 7117 7118 if explicit and not any_token: 7119 return this 7120 7121 if self._match(TokenType.L_PAREN): 7122 aliases = self.expression( 7123 exp.Aliases, 7124 comments=comments, 7125 this=this, 7126 expressions=self._parse_csv(lambda: self._parse_id_var(any_token)), 7127 ) 7128 self._match_r_paren(aliases) 7129 return aliases 7130 7131 alias = self._parse_id_var(any_token, tokens=self.ALIAS_TOKENS) or ( 7132 self.STRING_ALIASES and self._parse_string_as_identifier() 7133 ) 7134 7135 if alias: 7136 comments.extend(alias.pop_comments()) 7137 this = self.expression(exp.Alias, comments=comments, this=this, alias=alias) 7138 column = this.this 7139 7140 # Moves the comment next to the alias in `expr /* comment */ AS alias` 7141 if not this.comments and column and column.comments: 7142 this.comments = column.pop_comments() 7143 7144 return this 7145 7146 def _parse_id_var( 7147 self, 7148 any_token: bool = True, 7149 tokens: t.Optional[t.Collection[TokenType]] = None, 7150 ) -> t.Optional[exp.Expression]: 7151 expression = self._parse_identifier() 7152 if not expression and ( 7153 (any_token and self._advance_any()) or self._match_set(tokens or self.ID_VAR_TOKENS) 7154 ): 7155 quoted = self._prev.token_type == TokenType.STRING 7156 expression = self._identifier_expression(quoted=quoted) 7157 7158 return expression 7159 7160 def _parse_string(self) -> t.Optional[exp.Expression]: 7161 if self._match_set(self.STRING_PARSERS): 7162 return self.STRING_PARSERS[self._prev.token_type](self, self._prev) 7163 return self._parse_placeholder() 7164 7165 def _parse_string_as_identifier(self) -> t.Optional[exp.Identifier]: 7166 output = exp.to_identifier(self._match(TokenType.STRING) and self._prev.text, quoted=True) 7167 if output: 7168 output.update_positions(self._prev) 7169 return output 7170 7171 def _parse_number(self) -> t.Optional[exp.Expression]: 7172 if self._match_set(self.NUMERIC_PARSERS): 7173 return self.NUMERIC_PARSERS[self._prev.token_type](self, self._prev) 7174 return self._parse_placeholder() 7175 7176 def _parse_identifier(self) -> t.Optional[exp.Expression]: 7177 if self._match(TokenType.IDENTIFIER): 7178 return self._identifier_expression(quoted=True) 7179 return self._parse_placeholder() 7180 7181 def _parse_var( 7182 self, 7183 any_token: bool = False, 7184 tokens: t.Optional[t.Collection[TokenType]] = None, 7185 upper: bool = False, 7186 ) -> t.Optional[exp.Expression]: 7187 if ( 7188 (any_token and self._advance_any()) 7189 or self._match(TokenType.VAR) 7190 or (self._match_set(tokens) if tokens else False) 7191 ): 7192 return self.expression( 7193 exp.Var, this=self._prev.text.upper() if upper else self._prev.text 7194 ) 7195 return self._parse_placeholder() 7196 7197 def _advance_any(self, ignore_reserved: bool = False) -> t.Optional[Token]: 7198 if self._curr and (ignore_reserved or self._curr.token_type not in self.RESERVED_TOKENS): 7199 self._advance() 7200 return self._prev 7201 return None 7202 7203 def _parse_var_or_string(self, upper: bool = False) -> t.Optional[exp.Expression]: 7204 return self._parse_string() or self._parse_var(any_token=True, upper=upper) 7205 7206 def _parse_primary_or_var(self) -> t.Optional[exp.Expression]: 7207 return self._parse_primary() or self._parse_var(any_token=True) 7208 7209 def _parse_null(self) -> t.Optional[exp.Expression]: 7210 if self._match_set(self.NULL_TOKENS): 7211 return self.PRIMARY_PARSERS[TokenType.NULL](self, self._prev) 7212 return self._parse_placeholder() 7213 7214 def _parse_boolean(self) -> t.Optional[exp.Expression]: 7215 if self._match(TokenType.TRUE): 7216 return self.PRIMARY_PARSERS[TokenType.TRUE](self, self._prev) 7217 if self._match(TokenType.FALSE): 7218 return self.PRIMARY_PARSERS[TokenType.FALSE](self, self._prev) 7219 return self._parse_placeholder() 7220 7221 def _parse_star(self) -> t.Optional[exp.Expression]: 7222 if self._match(TokenType.STAR): 7223 return self.PRIMARY_PARSERS[TokenType.STAR](self, self._prev) 7224 return self._parse_placeholder() 7225 7226 def _parse_parameter(self) -> exp.Parameter: 7227 this = self._parse_identifier() or self._parse_primary_or_var() 7228 return self.expression(exp.Parameter, this=this) 7229 7230 def _parse_placeholder(self) -> t.Optional[exp.Expression]: 7231 if self._match_set(self.PLACEHOLDER_PARSERS): 7232 placeholder = self.PLACEHOLDER_PARSERS[self._prev.token_type](self) 7233 if placeholder: 7234 return placeholder 7235 self._advance(-1) 7236 return None 7237 7238 def _parse_star_op(self, *keywords: str) -> t.Optional[t.List[exp.Expression]]: 7239 if not self._match_texts(keywords): 7240 return None 7241 if self._match(TokenType.L_PAREN, advance=False): 7242 return self._parse_wrapped_csv(self._parse_expression) 7243 7244 expression = self._parse_expression() 7245 return [expression] if expression else None 7246 7247 def _parse_csv( 7248 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA 7249 ) -> t.List[exp.Expression]: 7250 parse_result = parse_method() 7251 items = [parse_result] if parse_result is not None else [] 7252 7253 while self._match(sep): 7254 self._add_comments(parse_result) 7255 parse_result = parse_method() 7256 if parse_result is not None: 7257 items.append(parse_result) 7258 7259 return items 7260 7261 def _parse_tokens( 7262 self, parse_method: t.Callable, expressions: t.Dict 7263 ) -> t.Optional[exp.Expression]: 7264 this = parse_method() 7265 7266 while self._match_set(expressions): 7267 this = self.expression( 7268 expressions[self._prev.token_type], 7269 this=this, 7270 comments=self._prev_comments, 7271 expression=parse_method(), 7272 ) 7273 7274 return this 7275 7276 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 7277 return self._parse_wrapped_csv(self._parse_id_var, optional=optional) 7278 7279 def _parse_wrapped_csv( 7280 self, parse_method: t.Callable, sep: TokenType = TokenType.COMMA, optional: bool = False 7281 ) -> t.List[exp.Expression]: 7282 return self._parse_wrapped( 7283 lambda: self._parse_csv(parse_method, sep=sep), optional=optional 7284 ) 7285 7286 def _parse_wrapped(self, parse_method: t.Callable, optional: bool = False) -> t.Any: 7287 wrapped = self._match(TokenType.L_PAREN) 7288 if not wrapped and not optional: 7289 self.raise_error("Expecting (") 7290 parse_result = parse_method() 7291 if wrapped: 7292 self._match_r_paren() 7293 return parse_result 7294 7295 def _parse_expressions(self) -> t.List[exp.Expression]: 7296 return self._parse_csv(self._parse_expression) 7297 7298 def _parse_select_or_expression(self, alias: bool = False) -> t.Optional[exp.Expression]: 7299 return self._parse_select() or self._parse_set_operations( 7300 self._parse_alias(self._parse_assignment(), explicit=True) 7301 if alias 7302 else self._parse_assignment() 7303 ) 7304 7305 def _parse_ddl_select(self) -> t.Optional[exp.Expression]: 7306 return self._parse_query_modifiers( 7307 self._parse_set_operations(self._parse_select(nested=True, parse_subquery_alias=False)) 7308 ) 7309 7310 def _parse_transaction(self) -> exp.Transaction | exp.Command: 7311 this = None 7312 if self._match_texts(self.TRANSACTION_KIND): 7313 this = self._prev.text 7314 7315 self._match_texts(("TRANSACTION", "WORK")) 7316 7317 modes = [] 7318 while True: 7319 mode = [] 7320 while self._match(TokenType.VAR): 7321 mode.append(self._prev.text) 7322 7323 if mode: 7324 modes.append(" ".join(mode)) 7325 if not self._match(TokenType.COMMA): 7326 break 7327 7328 return self.expression(exp.Transaction, this=this, modes=modes) 7329 7330 def _parse_commit_or_rollback(self) -> exp.Commit | exp.Rollback: 7331 chain = None 7332 savepoint = None 7333 is_rollback = self._prev.token_type == TokenType.ROLLBACK 7334 7335 self._match_texts(("TRANSACTION", "WORK")) 7336 7337 if self._match_text_seq("TO"): 7338 self._match_text_seq("SAVEPOINT") 7339 savepoint = self._parse_id_var() 7340 7341 if self._match(TokenType.AND): 7342 chain = not self._match_text_seq("NO") 7343 self._match_text_seq("CHAIN") 7344 7345 if is_rollback: 7346 return self.expression(exp.Rollback, savepoint=savepoint) 7347 7348 return self.expression(exp.Commit, chain=chain) 7349 7350 def _parse_refresh(self) -> exp.Refresh: 7351 self._match(TokenType.TABLE) 7352 return self.expression(exp.Refresh, this=self._parse_string() or self._parse_table()) 7353 7354 def _parse_column_def_with_exists(self): 7355 start = self._index 7356 self._match(TokenType.COLUMN) 7357 7358 exists_column = self._parse_exists(not_=True) 7359 expression = self._parse_field_def() 7360 7361 if not isinstance(expression, exp.ColumnDef): 7362 self._retreat(start) 7363 return None 7364 7365 expression.set("exists", exists_column) 7366 7367 return expression 7368 7369 def _parse_add_column(self) -> t.Optional[exp.ColumnDef]: 7370 if not self._prev.text.upper() == "ADD": 7371 return None 7372 7373 expression = self._parse_column_def_with_exists() 7374 if not expression: 7375 return None 7376 7377 # https://docs.databricks.com/delta/update-schema.html#explicitly-update-schema-to-add-columns 7378 if self._match_texts(("FIRST", "AFTER")): 7379 position = self._prev.text 7380 column_position = self.expression( 7381 exp.ColumnPosition, this=self._parse_column(), position=position 7382 ) 7383 expression.set("position", column_position) 7384 7385 return expression 7386 7387 def _parse_drop_column(self) -> t.Optional[exp.Drop | exp.Command]: 7388 drop = self._match(TokenType.DROP) and self._parse_drop() 7389 if drop and not isinstance(drop, exp.Command): 7390 drop.set("kind", drop.args.get("kind", "COLUMN")) 7391 return drop 7392 7393 # https://docs.aws.amazon.com/athena/latest/ug/alter-table-drop-partition.html 7394 def _parse_drop_partition(self, exists: t.Optional[bool] = None) -> exp.DropPartition: 7395 return self.expression( 7396 exp.DropPartition, expressions=self._parse_csv(self._parse_partition), exists=exists 7397 ) 7398 7399 def _parse_alter_table_add(self) -> t.List[exp.Expression]: 7400 def _parse_add_alteration() -> t.Optional[exp.Expression]: 7401 self._match_text_seq("ADD") 7402 if self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False): 7403 return self.expression( 7404 exp.AddConstraint, expressions=self._parse_csv(self._parse_constraint) 7405 ) 7406 7407 column_def = self._parse_add_column() 7408 if isinstance(column_def, exp.ColumnDef): 7409 return column_def 7410 7411 exists = self._parse_exists(not_=True) 7412 if self._match_pair(TokenType.PARTITION, TokenType.L_PAREN, advance=False): 7413 return self.expression( 7414 exp.AddPartition, 7415 exists=exists, 7416 this=self._parse_field(any_token=True), 7417 location=self._match_text_seq("LOCATION", advance=False) 7418 and self._parse_property(), 7419 ) 7420 7421 return None 7422 7423 if not self._match_set(self.ADD_CONSTRAINT_TOKENS, advance=False) and ( 7424 not self.dialect.ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN 7425 or self._match_text_seq("COLUMNS") 7426 ): 7427 schema = self._parse_schema() 7428 7429 return ( 7430 ensure_list(schema) 7431 if schema 7432 else self._parse_csv(self._parse_column_def_with_exists) 7433 ) 7434 7435 return self._parse_csv(_parse_add_alteration) 7436 7437 def _parse_alter_table_alter(self) -> t.Optional[exp.Expression]: 7438 if self._match_texts(self.ALTER_ALTER_PARSERS): 7439 return self.ALTER_ALTER_PARSERS[self._prev.text.upper()](self) 7440 7441 # Many dialects support the ALTER [COLUMN] syntax, so if there is no 7442 # keyword after ALTER we default to parsing this statement 7443 self._match(TokenType.COLUMN) 7444 column = self._parse_field(any_token=True) 7445 7446 if self._match_pair(TokenType.DROP, TokenType.DEFAULT): 7447 return self.expression(exp.AlterColumn, this=column, drop=True) 7448 if self._match_pair(TokenType.SET, TokenType.DEFAULT): 7449 return self.expression(exp.AlterColumn, this=column, default=self._parse_assignment()) 7450 if self._match(TokenType.COMMENT): 7451 return self.expression(exp.AlterColumn, this=column, comment=self._parse_string()) 7452 if self._match_text_seq("DROP", "NOT", "NULL"): 7453 return self.expression( 7454 exp.AlterColumn, 7455 this=column, 7456 drop=True, 7457 allow_null=True, 7458 ) 7459 if self._match_text_seq("SET", "NOT", "NULL"): 7460 return self.expression( 7461 exp.AlterColumn, 7462 this=column, 7463 allow_null=False, 7464 ) 7465 7466 if self._match_text_seq("SET", "VISIBLE"): 7467 return self.expression(exp.AlterColumn, this=column, visible="VISIBLE") 7468 if self._match_text_seq("SET", "INVISIBLE"): 7469 return self.expression(exp.AlterColumn, this=column, visible="INVISIBLE") 7470 7471 self._match_text_seq("SET", "DATA") 7472 self._match_text_seq("TYPE") 7473 return self.expression( 7474 exp.AlterColumn, 7475 this=column, 7476 dtype=self._parse_types(), 7477 collate=self._match(TokenType.COLLATE) and self._parse_term(), 7478 using=self._match(TokenType.USING) and self._parse_assignment(), 7479 ) 7480 7481 def _parse_alter_diststyle(self) -> exp.AlterDistStyle: 7482 if self._match_texts(("ALL", "EVEN", "AUTO")): 7483 return self.expression(exp.AlterDistStyle, this=exp.var(self._prev.text.upper())) 7484 7485 self._match_text_seq("KEY", "DISTKEY") 7486 return self.expression(exp.AlterDistStyle, this=self._parse_column()) 7487 7488 def _parse_alter_sortkey(self, compound: t.Optional[bool] = None) -> exp.AlterSortKey: 7489 if compound: 7490 self._match_text_seq("SORTKEY") 7491 7492 if self._match(TokenType.L_PAREN, advance=False): 7493 return self.expression( 7494 exp.AlterSortKey, expressions=self._parse_wrapped_id_vars(), compound=compound 7495 ) 7496 7497 self._match_texts(("AUTO", "NONE")) 7498 return self.expression( 7499 exp.AlterSortKey, this=exp.var(self._prev.text.upper()), compound=compound 7500 ) 7501 7502 def _parse_alter_table_drop(self) -> t.List[exp.Expression]: 7503 index = self._index - 1 7504 7505 partition_exists = self._parse_exists() 7506 if self._match(TokenType.PARTITION, advance=False): 7507 return self._parse_csv(lambda: self._parse_drop_partition(exists=partition_exists)) 7508 7509 self._retreat(index) 7510 return self._parse_csv(self._parse_drop_column) 7511 7512 def _parse_alter_table_rename(self) -> t.Optional[exp.AlterRename | exp.RenameColumn]: 7513 if self._match(TokenType.COLUMN) or not self.ALTER_RENAME_REQUIRES_COLUMN: 7514 exists = self._parse_exists() 7515 old_column = self._parse_column() 7516 to = self._match_text_seq("TO") 7517 new_column = self._parse_column() 7518 7519 if old_column is None or to is None or new_column is None: 7520 return None 7521 7522 return self.expression(exp.RenameColumn, this=old_column, to=new_column, exists=exists) 7523 7524 self._match_text_seq("TO") 7525 return self.expression(exp.AlterRename, this=self._parse_table(schema=True)) 7526 7527 def _parse_alter_table_set(self) -> exp.AlterSet: 7528 alter_set = self.expression(exp.AlterSet) 7529 7530 if self._match(TokenType.L_PAREN, advance=False) or self._match_text_seq( 7531 "TABLE", "PROPERTIES" 7532 ): 7533 alter_set.set("expressions", self._parse_wrapped_csv(self._parse_assignment)) 7534 elif self._match_text_seq("FILESTREAM_ON", advance=False): 7535 alter_set.set("expressions", [self._parse_assignment()]) 7536 elif self._match_texts(("LOGGED", "UNLOGGED")): 7537 alter_set.set("option", exp.var(self._prev.text.upper())) 7538 elif self._match_text_seq("WITHOUT") and self._match_texts(("CLUSTER", "OIDS")): 7539 alter_set.set("option", exp.var(f"WITHOUT {self._prev.text.upper()}")) 7540 elif self._match_text_seq("LOCATION"): 7541 alter_set.set("location", self._parse_field()) 7542 elif self._match_text_seq("ACCESS", "METHOD"): 7543 alter_set.set("access_method", self._parse_field()) 7544 elif self._match_text_seq("TABLESPACE"): 7545 alter_set.set("tablespace", self._parse_field()) 7546 elif self._match_text_seq("FILE", "FORMAT") or self._match_text_seq("FILEFORMAT"): 7547 alter_set.set("file_format", [self._parse_field()]) 7548 elif self._match_text_seq("STAGE_FILE_FORMAT"): 7549 alter_set.set("file_format", self._parse_wrapped_options()) 7550 elif self._match_text_seq("STAGE_COPY_OPTIONS"): 7551 alter_set.set("copy_options", self._parse_wrapped_options()) 7552 elif self._match_text_seq("TAG") or self._match_text_seq("TAGS"): 7553 alter_set.set("tag", self._parse_csv(self._parse_assignment)) 7554 else: 7555 if self._match_text_seq("SERDE"): 7556 alter_set.set("serde", self._parse_field()) 7557 7558 properties = self._parse_wrapped(self._parse_properties, optional=True) 7559 alter_set.set("expressions", [properties]) 7560 7561 return alter_set 7562 7563 def _parse_alter(self) -> exp.Alter | exp.Command: 7564 start = self._prev 7565 7566 alter_token = self._match_set(self.ALTERABLES) and self._prev 7567 if not alter_token: 7568 return self._parse_as_command(start) 7569 7570 exists = self._parse_exists() 7571 only = self._match_text_seq("ONLY") 7572 this = self._parse_table(schema=True) 7573 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 7574 7575 if self._next: 7576 self._advance() 7577 7578 parser = self.ALTER_PARSERS.get(self._prev.text.upper()) if self._prev else None 7579 if parser: 7580 actions = ensure_list(parser(self)) 7581 not_valid = self._match_text_seq("NOT", "VALID") 7582 options = self._parse_csv(self._parse_property) 7583 7584 if not self._curr and actions: 7585 return self.expression( 7586 exp.Alter, 7587 this=this, 7588 kind=alter_token.text.upper(), 7589 exists=exists, 7590 actions=actions, 7591 only=only, 7592 options=options, 7593 cluster=cluster, 7594 not_valid=not_valid, 7595 ) 7596 7597 return self._parse_as_command(start) 7598 7599 def _parse_analyze(self) -> exp.Analyze | exp.Command: 7600 start = self._prev 7601 # https://duckdb.org/docs/sql/statements/analyze 7602 if not self._curr: 7603 return self.expression(exp.Analyze) 7604 7605 options = [] 7606 while self._match_texts(self.ANALYZE_STYLES): 7607 if self._prev.text.upper() == "BUFFER_USAGE_LIMIT": 7608 options.append(f"BUFFER_USAGE_LIMIT {self._parse_number()}") 7609 else: 7610 options.append(self._prev.text.upper()) 7611 7612 this: t.Optional[exp.Expression] = None 7613 inner_expression: t.Optional[exp.Expression] = None 7614 7615 kind = self._curr and self._curr.text.upper() 7616 7617 if self._match(TokenType.TABLE) or self._match(TokenType.INDEX): 7618 this = self._parse_table_parts() 7619 elif self._match_text_seq("TABLES"): 7620 if self._match_set((TokenType.FROM, TokenType.IN)): 7621 kind = f"{kind} {self._prev.text.upper()}" 7622 this = self._parse_table(schema=True, is_db_reference=True) 7623 elif self._match_text_seq("DATABASE"): 7624 this = self._parse_table(schema=True, is_db_reference=True) 7625 elif self._match_text_seq("CLUSTER"): 7626 this = self._parse_table() 7627 # Try matching inner expr keywords before fallback to parse table. 7628 elif self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7629 kind = None 7630 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7631 else: 7632 # Empty kind https://prestodb.io/docs/current/sql/analyze.html 7633 kind = None 7634 this = self._parse_table_parts() 7635 7636 partition = self._try_parse(self._parse_partition) 7637 if not partition and self._match_texts(self.PARTITION_KEYWORDS): 7638 return self._parse_as_command(start) 7639 7640 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7641 if self._match_text_seq("WITH", "SYNC", "MODE") or self._match_text_seq( 7642 "WITH", "ASYNC", "MODE" 7643 ): 7644 mode = f"WITH {self._tokens[self._index - 2].text.upper()} MODE" 7645 else: 7646 mode = None 7647 7648 if self._match_texts(self.ANALYZE_EXPRESSION_PARSERS): 7649 inner_expression = self.ANALYZE_EXPRESSION_PARSERS[self._prev.text.upper()](self) 7650 7651 properties = self._parse_properties() 7652 return self.expression( 7653 exp.Analyze, 7654 kind=kind, 7655 this=this, 7656 mode=mode, 7657 partition=partition, 7658 properties=properties, 7659 expression=inner_expression, 7660 options=options, 7661 ) 7662 7663 # https://spark.apache.org/docs/3.5.1/sql-ref-syntax-aux-analyze-table.html 7664 def _parse_analyze_statistics(self) -> exp.AnalyzeStatistics: 7665 this = None 7666 kind = self._prev.text.upper() 7667 option = self._prev.text.upper() if self._match_text_seq("DELTA") else None 7668 expressions = [] 7669 7670 if not self._match_text_seq("STATISTICS"): 7671 self.raise_error("Expecting token STATISTICS") 7672 7673 if self._match_text_seq("NOSCAN"): 7674 this = "NOSCAN" 7675 elif self._match(TokenType.FOR): 7676 if self._match_text_seq("ALL", "COLUMNS"): 7677 this = "FOR ALL COLUMNS" 7678 if self._match_texts("COLUMNS"): 7679 this = "FOR COLUMNS" 7680 expressions = self._parse_csv(self._parse_column_reference) 7681 elif self._match_text_seq("SAMPLE"): 7682 sample = self._parse_number() 7683 expressions = [ 7684 self.expression( 7685 exp.AnalyzeSample, 7686 sample=sample, 7687 kind=self._prev.text.upper() if self._match(TokenType.PERCENT) else None, 7688 ) 7689 ] 7690 7691 return self.expression( 7692 exp.AnalyzeStatistics, kind=kind, option=option, this=this, expressions=expressions 7693 ) 7694 7695 # https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/ANALYZE.html 7696 def _parse_analyze_validate(self) -> exp.AnalyzeValidate: 7697 kind = None 7698 this = None 7699 expression: t.Optional[exp.Expression] = None 7700 if self._match_text_seq("REF", "UPDATE"): 7701 kind = "REF" 7702 this = "UPDATE" 7703 if self._match_text_seq("SET", "DANGLING", "TO", "NULL"): 7704 this = "UPDATE SET DANGLING TO NULL" 7705 elif self._match_text_seq("STRUCTURE"): 7706 kind = "STRUCTURE" 7707 if self._match_text_seq("CASCADE", "FAST"): 7708 this = "CASCADE FAST" 7709 elif self._match_text_seq("CASCADE", "COMPLETE") and self._match_texts( 7710 ("ONLINE", "OFFLINE") 7711 ): 7712 this = f"CASCADE COMPLETE {self._prev.text.upper()}" 7713 expression = self._parse_into() 7714 7715 return self.expression(exp.AnalyzeValidate, kind=kind, this=this, expression=expression) 7716 7717 def _parse_analyze_columns(self) -> t.Optional[exp.AnalyzeColumns]: 7718 this = self._prev.text.upper() 7719 if self._match_text_seq("COLUMNS"): 7720 return self.expression(exp.AnalyzeColumns, this=f"{this} {self._prev.text.upper()}") 7721 return None 7722 7723 def _parse_analyze_delete(self) -> t.Optional[exp.AnalyzeDelete]: 7724 kind = self._prev.text.upper() if self._match_text_seq("SYSTEM") else None 7725 if self._match_text_seq("STATISTICS"): 7726 return self.expression(exp.AnalyzeDelete, kind=kind) 7727 return None 7728 7729 def _parse_analyze_list(self) -> t.Optional[exp.AnalyzeListChainedRows]: 7730 if self._match_text_seq("CHAINED", "ROWS"): 7731 return self.expression(exp.AnalyzeListChainedRows, expression=self._parse_into()) 7732 return None 7733 7734 # https://dev.mysql.com/doc/refman/8.4/en/analyze-table.html 7735 def _parse_analyze_histogram(self) -> exp.AnalyzeHistogram: 7736 this = self._prev.text.upper() 7737 expression: t.Optional[exp.Expression] = None 7738 expressions = [] 7739 update_options = None 7740 7741 if self._match_text_seq("HISTOGRAM", "ON"): 7742 expressions = self._parse_csv(self._parse_column_reference) 7743 with_expressions = [] 7744 while self._match(TokenType.WITH): 7745 # https://docs.starrocks.io/docs/sql-reference/sql-statements/cbo_stats/ANALYZE_TABLE/ 7746 if self._match_texts(("SYNC", "ASYNC")): 7747 if self._match_text_seq("MODE", advance=False): 7748 with_expressions.append(f"{self._prev.text.upper()} MODE") 7749 self._advance() 7750 else: 7751 buckets = self._parse_number() 7752 if self._match_text_seq("BUCKETS"): 7753 with_expressions.append(f"{buckets} BUCKETS") 7754 if with_expressions: 7755 expression = self.expression(exp.AnalyzeWith, expressions=with_expressions) 7756 7757 if self._match_texts(("MANUAL", "AUTO")) and self._match( 7758 TokenType.UPDATE, advance=False 7759 ): 7760 update_options = self._prev.text.upper() 7761 self._advance() 7762 elif self._match_text_seq("USING", "DATA"): 7763 expression = self.expression(exp.UsingData, this=self._parse_string()) 7764 7765 return self.expression( 7766 exp.AnalyzeHistogram, 7767 this=this, 7768 expressions=expressions, 7769 expression=expression, 7770 update_options=update_options, 7771 ) 7772 7773 def _parse_merge(self) -> exp.Merge: 7774 self._match(TokenType.INTO) 7775 target = self._parse_table() 7776 7777 if target and self._match(TokenType.ALIAS, advance=False): 7778 target.set("alias", self._parse_table_alias()) 7779 7780 self._match(TokenType.USING) 7781 using = self._parse_table() 7782 7783 self._match(TokenType.ON) 7784 on = self._parse_assignment() 7785 7786 return self.expression( 7787 exp.Merge, 7788 this=target, 7789 using=using, 7790 on=on, 7791 whens=self._parse_when_matched(), 7792 returning=self._parse_returning(), 7793 ) 7794 7795 def _parse_when_matched(self) -> exp.Whens: 7796 whens = [] 7797 7798 while self._match(TokenType.WHEN): 7799 matched = not self._match(TokenType.NOT) 7800 self._match_text_seq("MATCHED") 7801 source = ( 7802 False 7803 if self._match_text_seq("BY", "TARGET") 7804 else self._match_text_seq("BY", "SOURCE") 7805 ) 7806 condition = self._parse_assignment() if self._match(TokenType.AND) else None 7807 7808 self._match(TokenType.THEN) 7809 7810 if self._match(TokenType.INSERT): 7811 this = self._parse_star() 7812 if this: 7813 then: t.Optional[exp.Expression] = self.expression(exp.Insert, this=this) 7814 else: 7815 then = self.expression( 7816 exp.Insert, 7817 this=exp.var("ROW") 7818 if self._match_text_seq("ROW") 7819 else self._parse_value(values=False), 7820 expression=self._match_text_seq("VALUES") and self._parse_value(), 7821 ) 7822 elif self._match(TokenType.UPDATE): 7823 expressions = self._parse_star() 7824 if expressions: 7825 then = self.expression(exp.Update, expressions=expressions) 7826 else: 7827 then = self.expression( 7828 exp.Update, 7829 expressions=self._match(TokenType.SET) 7830 and self._parse_csv(self._parse_equality), 7831 ) 7832 elif self._match(TokenType.DELETE): 7833 then = self.expression(exp.Var, this=self._prev.text) 7834 else: 7835 then = self._parse_var_from_options(self.CONFLICT_ACTIONS) 7836 7837 whens.append( 7838 self.expression( 7839 exp.When, 7840 matched=matched, 7841 source=source, 7842 condition=condition, 7843 then=then, 7844 ) 7845 ) 7846 return self.expression(exp.Whens, expressions=whens) 7847 7848 def _parse_show(self) -> t.Optional[exp.Expression]: 7849 parser = self._find_parser(self.SHOW_PARSERS, self.SHOW_TRIE) 7850 if parser: 7851 return parser(self) 7852 return self._parse_as_command(self._prev) 7853 7854 def _parse_set_item_assignment( 7855 self, kind: t.Optional[str] = None 7856 ) -> t.Optional[exp.Expression]: 7857 index = self._index 7858 7859 if kind in ("GLOBAL", "SESSION") and self._match_text_seq("TRANSACTION"): 7860 return self._parse_set_transaction(global_=kind == "GLOBAL") 7861 7862 left = self._parse_primary() or self._parse_column() 7863 assignment_delimiter = self._match_texts(("=", "TO")) 7864 7865 if not left or (self.SET_REQUIRES_ASSIGNMENT_DELIMITER and not assignment_delimiter): 7866 self._retreat(index) 7867 return None 7868 7869 right = self._parse_statement() or self._parse_id_var() 7870 if isinstance(right, (exp.Column, exp.Identifier)): 7871 right = exp.var(right.name) 7872 7873 this = self.expression(exp.EQ, this=left, expression=right) 7874 return self.expression(exp.SetItem, this=this, kind=kind) 7875 7876 def _parse_set_transaction(self, global_: bool = False) -> exp.Expression: 7877 self._match_text_seq("TRANSACTION") 7878 characteristics = self._parse_csv( 7879 lambda: self._parse_var_from_options(self.TRANSACTION_CHARACTERISTICS) 7880 ) 7881 return self.expression( 7882 exp.SetItem, 7883 expressions=characteristics, 7884 kind="TRANSACTION", 7885 **{"global": global_}, # type: ignore 7886 ) 7887 7888 def _parse_set_item(self) -> t.Optional[exp.Expression]: 7889 parser = self._find_parser(self.SET_PARSERS, self.SET_TRIE) 7890 return parser(self) if parser else self._parse_set_item_assignment(kind=None) 7891 7892 def _parse_set(self, unset: bool = False, tag: bool = False) -> exp.Set | exp.Command: 7893 index = self._index 7894 set_ = self.expression( 7895 exp.Set, expressions=self._parse_csv(self._parse_set_item), unset=unset, tag=tag 7896 ) 7897 7898 if self._curr: 7899 self._retreat(index) 7900 return self._parse_as_command(self._prev) 7901 7902 return set_ 7903 7904 def _parse_var_from_options( 7905 self, options: OPTIONS_TYPE, raise_unmatched: bool = True 7906 ) -> t.Optional[exp.Var]: 7907 start = self._curr 7908 if not start: 7909 return None 7910 7911 option = start.text.upper() 7912 continuations = options.get(option) 7913 7914 index = self._index 7915 self._advance() 7916 for keywords in continuations or []: 7917 if isinstance(keywords, str): 7918 keywords = (keywords,) 7919 7920 if self._match_text_seq(*keywords): 7921 option = f"{option} {' '.join(keywords)}" 7922 break 7923 else: 7924 if continuations or continuations is None: 7925 if raise_unmatched: 7926 self.raise_error(f"Unknown option {option}") 7927 7928 self._retreat(index) 7929 return None 7930 7931 return exp.var(option) 7932 7933 def _parse_as_command(self, start: Token) -> exp.Command: 7934 while self._curr: 7935 self._advance() 7936 text = self._find_sql(start, self._prev) 7937 size = len(start.text) 7938 self._warn_unsupported() 7939 return exp.Command(this=text[:size], expression=text[size:]) 7940 7941 def _parse_dict_property(self, this: str) -> exp.DictProperty: 7942 settings = [] 7943 7944 self._match_l_paren() 7945 kind = self._parse_id_var() 7946 7947 if self._match(TokenType.L_PAREN): 7948 while True: 7949 key = self._parse_id_var() 7950 value = self._parse_primary() 7951 if not key and value is None: 7952 break 7953 settings.append(self.expression(exp.DictSubProperty, this=key, value=value)) 7954 self._match(TokenType.R_PAREN) 7955 7956 self._match_r_paren() 7957 7958 return self.expression( 7959 exp.DictProperty, 7960 this=this, 7961 kind=kind.this if kind else None, 7962 settings=settings, 7963 ) 7964 7965 def _parse_dict_range(self, this: str) -> exp.DictRange: 7966 self._match_l_paren() 7967 has_min = self._match_text_seq("MIN") 7968 if has_min: 7969 min = self._parse_var() or self._parse_primary() 7970 self._match_text_seq("MAX") 7971 max = self._parse_var() or self._parse_primary() 7972 else: 7973 max = self._parse_var() or self._parse_primary() 7974 min = exp.Literal.number(0) 7975 self._match_r_paren() 7976 return self.expression(exp.DictRange, this=this, min=min, max=max) 7977 7978 def _parse_comprehension( 7979 self, this: t.Optional[exp.Expression] 7980 ) -> t.Optional[exp.Comprehension]: 7981 index = self._index 7982 expression = self._parse_column() 7983 if not self._match(TokenType.IN): 7984 self._retreat(index - 1) 7985 return None 7986 iterator = self._parse_column() 7987 condition = self._parse_assignment() if self._match_text_seq("IF") else None 7988 return self.expression( 7989 exp.Comprehension, 7990 this=this, 7991 expression=expression, 7992 iterator=iterator, 7993 condition=condition, 7994 ) 7995 7996 def _parse_heredoc(self) -> t.Optional[exp.Heredoc]: 7997 if self._match(TokenType.HEREDOC_STRING): 7998 return self.expression(exp.Heredoc, this=self._prev.text) 7999 8000 if not self._match_text_seq("$"): 8001 return None 8002 8003 tags = ["$"] 8004 tag_text = None 8005 8006 if self._is_connected(): 8007 self._advance() 8008 tags.append(self._prev.text.upper()) 8009 else: 8010 self.raise_error("No closing $ found") 8011 8012 if tags[-1] != "$": 8013 if self._is_connected() and self._match_text_seq("$"): 8014 tag_text = tags[-1] 8015 tags.append("$") 8016 else: 8017 self.raise_error("No closing $ found") 8018 8019 heredoc_start = self._curr 8020 8021 while self._curr: 8022 if self._match_text_seq(*tags, advance=False): 8023 this = self._find_sql(heredoc_start, self._prev) 8024 self._advance(len(tags)) 8025 return self.expression(exp.Heredoc, this=this, tag=tag_text) 8026 8027 self._advance() 8028 8029 self.raise_error(f"No closing {''.join(tags)} found") 8030 return None 8031 8032 def _find_parser( 8033 self, parsers: t.Dict[str, t.Callable], trie: t.Dict 8034 ) -> t.Optional[t.Callable]: 8035 if not self._curr: 8036 return None 8037 8038 index = self._index 8039 this = [] 8040 while True: 8041 # The current token might be multiple words 8042 curr = self._curr.text.upper() 8043 key = curr.split(" ") 8044 this.append(curr) 8045 8046 self._advance() 8047 result, trie = in_trie(trie, key) 8048 if result == TrieResult.FAILED: 8049 break 8050 8051 if result == TrieResult.EXISTS: 8052 subparser = parsers[" ".join(this)] 8053 return subparser 8054 8055 self._retreat(index) 8056 return None 8057 8058 def _match(self, token_type, advance=True, expression=None): 8059 if not self._curr: 8060 return None 8061 8062 if self._curr.token_type == token_type: 8063 if advance: 8064 self._advance() 8065 self._add_comments(expression) 8066 return True 8067 8068 return None 8069 8070 def _match_set(self, types, advance=True): 8071 if not self._curr: 8072 return None 8073 8074 if self._curr.token_type in types: 8075 if advance: 8076 self._advance() 8077 return True 8078 8079 return None 8080 8081 def _match_pair(self, token_type_a, token_type_b, advance=True): 8082 if not self._curr or not self._next: 8083 return None 8084 8085 if self._curr.token_type == token_type_a and self._next.token_type == token_type_b: 8086 if advance: 8087 self._advance(2) 8088 return True 8089 8090 return None 8091 8092 def _match_l_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8093 if not self._match(TokenType.L_PAREN, expression=expression): 8094 self.raise_error("Expecting (") 8095 8096 def _match_r_paren(self, expression: t.Optional[exp.Expression] = None) -> None: 8097 if not self._match(TokenType.R_PAREN, expression=expression): 8098 self.raise_error("Expecting )") 8099 8100 def _match_texts(self, texts, advance=True): 8101 if ( 8102 self._curr 8103 and self._curr.token_type != TokenType.STRING 8104 and self._curr.text.upper() in texts 8105 ): 8106 if advance: 8107 self._advance() 8108 return True 8109 return None 8110 8111 def _match_text_seq(self, *texts, advance=True): 8112 index = self._index 8113 for text in texts: 8114 if ( 8115 self._curr 8116 and self._curr.token_type != TokenType.STRING 8117 and self._curr.text.upper() == text 8118 ): 8119 self._advance() 8120 else: 8121 self._retreat(index) 8122 return None 8123 8124 if not advance: 8125 self._retreat(index) 8126 8127 return True 8128 8129 def _replace_lambda( 8130 self, node: t.Optional[exp.Expression], expressions: t.List[exp.Expression] 8131 ) -> t.Optional[exp.Expression]: 8132 if not node: 8133 return node 8134 8135 lambda_types = {e.name: e.args.get("to") or False for e in expressions} 8136 8137 for column in node.find_all(exp.Column): 8138 typ = lambda_types.get(column.parts[0].name) 8139 if typ is not None: 8140 dot_or_id = column.to_dot() if column.table else column.this 8141 8142 if typ: 8143 dot_or_id = self.expression( 8144 exp.Cast, 8145 this=dot_or_id, 8146 to=typ, 8147 ) 8148 8149 parent = column.parent 8150 8151 while isinstance(parent, exp.Dot): 8152 if not isinstance(parent.parent, exp.Dot): 8153 parent.replace(dot_or_id) 8154 break 8155 parent = parent.parent 8156 else: 8157 if column is node: 8158 node = dot_or_id 8159 else: 8160 column.replace(dot_or_id) 8161 return node 8162 8163 def _parse_truncate_table(self) -> t.Optional[exp.TruncateTable] | exp.Expression: 8164 start = self._prev 8165 8166 # Not to be confused with TRUNCATE(number, decimals) function call 8167 if self._match(TokenType.L_PAREN): 8168 self._retreat(self._index - 2) 8169 return self._parse_function() 8170 8171 # Clickhouse supports TRUNCATE DATABASE as well 8172 is_database = self._match(TokenType.DATABASE) 8173 8174 self._match(TokenType.TABLE) 8175 8176 exists = self._parse_exists(not_=False) 8177 8178 expressions = self._parse_csv( 8179 lambda: self._parse_table(schema=True, is_db_reference=is_database) 8180 ) 8181 8182 cluster = self._parse_on_property() if self._match(TokenType.ON) else None 8183 8184 if self._match_text_seq("RESTART", "IDENTITY"): 8185 identity = "RESTART" 8186 elif self._match_text_seq("CONTINUE", "IDENTITY"): 8187 identity = "CONTINUE" 8188 else: 8189 identity = None 8190 8191 if self._match_text_seq("CASCADE") or self._match_text_seq("RESTRICT"): 8192 option = self._prev.text 8193 else: 8194 option = None 8195 8196 partition = self._parse_partition() 8197 8198 # Fallback case 8199 if self._curr: 8200 return self._parse_as_command(start) 8201 8202 return self.expression( 8203 exp.TruncateTable, 8204 expressions=expressions, 8205 is_database=is_database, 8206 exists=exists, 8207 cluster=cluster, 8208 identity=identity, 8209 option=option, 8210 partition=partition, 8211 ) 8212 8213 def _parse_with_operator(self) -> t.Optional[exp.Expression]: 8214 this = self._parse_ordered(self._parse_opclass) 8215 8216 if not self._match(TokenType.WITH): 8217 return this 8218 8219 op = self._parse_var(any_token=True) 8220 8221 return self.expression(exp.WithOperator, this=this, op=op) 8222 8223 def _parse_wrapped_options(self) -> t.List[t.Optional[exp.Expression]]: 8224 self._match(TokenType.EQ) 8225 self._match(TokenType.L_PAREN) 8226 8227 opts: t.List[t.Optional[exp.Expression]] = [] 8228 option: exp.Expression | None 8229 while self._curr and not self._match(TokenType.R_PAREN): 8230 if self._match_text_seq("FORMAT_NAME", "="): 8231 # The FORMAT_NAME can be set to an identifier for Snowflake and T-SQL 8232 option = self._parse_format_name() 8233 else: 8234 option = self._parse_property() 8235 8236 if option is None: 8237 self.raise_error("Unable to parse option") 8238 break 8239 8240 opts.append(option) 8241 8242 return opts 8243 8244 def _parse_copy_parameters(self) -> t.List[exp.CopyParameter]: 8245 sep = TokenType.COMMA if self.dialect.COPY_PARAMS_ARE_CSV else None 8246 8247 options = [] 8248 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 8249 option = self._parse_var(any_token=True) 8250 prev = self._prev.text.upper() 8251 8252 # Different dialects might separate options and values by white space, "=" and "AS" 8253 self._match(TokenType.EQ) 8254 self._match(TokenType.ALIAS) 8255 8256 param = self.expression(exp.CopyParameter, this=option) 8257 8258 if prev in self.COPY_INTO_VARLEN_OPTIONS and self._match( 8259 TokenType.L_PAREN, advance=False 8260 ): 8261 # Snowflake FILE_FORMAT case, Databricks COPY & FORMAT options 8262 param.set("expressions", self._parse_wrapped_options()) 8263 elif prev == "FILE_FORMAT": 8264 # T-SQL's external file format case 8265 param.set("expression", self._parse_field()) 8266 else: 8267 param.set("expression", self._parse_unquoted_field()) 8268 8269 options.append(param) 8270 self._match(sep) 8271 8272 return options 8273 8274 def _parse_credentials(self) -> t.Optional[exp.Credentials]: 8275 expr = self.expression(exp.Credentials) 8276 8277 if self._match_text_seq("STORAGE_INTEGRATION", "="): 8278 expr.set("storage", self._parse_field()) 8279 if self._match_text_seq("CREDENTIALS"): 8280 # Snowflake case: CREDENTIALS = (...), Redshift case: CREDENTIALS <string> 8281 creds = ( 8282 self._parse_wrapped_options() if self._match(TokenType.EQ) else self._parse_field() 8283 ) 8284 expr.set("credentials", creds) 8285 if self._match_text_seq("ENCRYPTION"): 8286 expr.set("encryption", self._parse_wrapped_options()) 8287 if self._match_text_seq("IAM_ROLE"): 8288 expr.set("iam_role", self._parse_field()) 8289 if self._match_text_seq("REGION"): 8290 expr.set("region", self._parse_field()) 8291 8292 return expr 8293 8294 def _parse_file_location(self) -> t.Optional[exp.Expression]: 8295 return self._parse_field() 8296 8297 def _parse_copy(self) -> exp.Copy | exp.Command: 8298 start = self._prev 8299 8300 self._match(TokenType.INTO) 8301 8302 this = ( 8303 self._parse_select(nested=True, parse_subquery_alias=False) 8304 if self._match(TokenType.L_PAREN, advance=False) 8305 else self._parse_table(schema=True) 8306 ) 8307 8308 kind = self._match(TokenType.FROM) or not self._match_text_seq("TO") 8309 8310 files = self._parse_csv(self._parse_file_location) 8311 credentials = self._parse_credentials() 8312 8313 self._match_text_seq("WITH") 8314 8315 params = self._parse_wrapped(self._parse_copy_parameters, optional=True) 8316 8317 # Fallback case 8318 if self._curr: 8319 return self._parse_as_command(start) 8320 8321 return self.expression( 8322 exp.Copy, 8323 this=this, 8324 kind=kind, 8325 credentials=credentials, 8326 files=files, 8327 params=params, 8328 ) 8329 8330 def _parse_normalize(self) -> exp.Normalize: 8331 return self.expression( 8332 exp.Normalize, 8333 this=self._parse_bitwise(), 8334 form=self._match(TokenType.COMMA) and self._parse_var(), 8335 ) 8336 8337 def _parse_ceil_floor(self, expr_type: t.Type[TCeilFloor]) -> TCeilFloor: 8338 args = self._parse_csv(lambda: self._parse_lambda()) 8339 8340 this = seq_get(args, 0) 8341 decimals = seq_get(args, 1) 8342 8343 return expr_type( 8344 this=this, decimals=decimals, to=self._match_text_seq("TO") and self._parse_var() 8345 ) 8346 8347 def _parse_star_ops(self) -> t.Optional[exp.Expression]: 8348 star_token = self._prev 8349 8350 if self._match_text_seq("COLUMNS", "(", advance=False): 8351 this = self._parse_function() 8352 if isinstance(this, exp.Columns): 8353 this.set("unpack", True) 8354 return this 8355 8356 return self.expression( 8357 exp.Star, 8358 **{ # type: ignore 8359 "except": self._parse_star_op("EXCEPT", "EXCLUDE"), 8360 "replace": self._parse_star_op("REPLACE"), 8361 "rename": self._parse_star_op("RENAME"), 8362 }, 8363 ).update_positions(star_token) 8364 8365 def _parse_grant_privilege(self) -> t.Optional[exp.GrantPrivilege]: 8366 privilege_parts = [] 8367 8368 # Keep consuming consecutive keywords until comma (end of this privilege) or ON 8369 # (end of privilege list) or L_PAREN (start of column list) are met 8370 while self._curr and not self._match_set(self.PRIVILEGE_FOLLOW_TOKENS, advance=False): 8371 privilege_parts.append(self._curr.text.upper()) 8372 self._advance() 8373 8374 this = exp.var(" ".join(privilege_parts)) 8375 expressions = ( 8376 self._parse_wrapped_csv(self._parse_column) 8377 if self._match(TokenType.L_PAREN, advance=False) 8378 else None 8379 ) 8380 8381 return self.expression(exp.GrantPrivilege, this=this, expressions=expressions) 8382 8383 def _parse_grant_principal(self) -> t.Optional[exp.GrantPrincipal]: 8384 kind = self._match_texts(("ROLE", "GROUP")) and self._prev.text.upper() 8385 principal = self._parse_id_var() 8386 8387 if not principal: 8388 return None 8389 8390 return self.expression(exp.GrantPrincipal, this=principal, kind=kind) 8391 8392 def _parse_grant(self) -> exp.Grant | exp.Command: 8393 start = self._prev 8394 8395 privileges = self._parse_csv(self._parse_grant_privilege) 8396 8397 self._match(TokenType.ON) 8398 kind = self._match_set(self.CREATABLES) and self._prev.text.upper() 8399 8400 # Attempt to parse the securable e.g. MySQL allows names 8401 # such as "foo.*", "*.*" which are not easily parseable yet 8402 securable = self._try_parse(self._parse_table_parts) 8403 8404 if not securable or not self._match_text_seq("TO"): 8405 return self._parse_as_command(start) 8406 8407 principals = self._parse_csv(self._parse_grant_principal) 8408 8409 grant_option = self._match_text_seq("WITH", "GRANT", "OPTION") 8410 8411 if self._curr: 8412 return self._parse_as_command(start) 8413 8414 return self.expression( 8415 exp.Grant, 8416 privileges=privileges, 8417 kind=kind, 8418 securable=securable, 8419 principals=principals, 8420 grant_option=grant_option, 8421 ) 8422 8423 def _parse_overlay(self) -> exp.Overlay: 8424 return self.expression( 8425 exp.Overlay, 8426 **{ # type: ignore 8427 "this": self._parse_bitwise(), 8428 "expression": self._match_text_seq("PLACING") and self._parse_bitwise(), 8429 "from": self._match_text_seq("FROM") and self._parse_bitwise(), 8430 "for": self._match_text_seq("FOR") and self._parse_bitwise(), 8431 }, 8432 ) 8433 8434 def _parse_format_name(self) -> exp.Property: 8435 # Note: Although not specified in the docs, Snowflake does accept a string/identifier 8436 # for FILE_FORMAT = <format_name> 8437 return self.expression( 8438 exp.Property, 8439 this=exp.var("FORMAT_NAME"), 8440 value=self._parse_string() or self._parse_table_parts(), 8441 ) 8442 8443 def _parse_max_min_by(self, expr_type: t.Type[exp.AggFunc]) -> exp.AggFunc: 8444 args: t.List[exp.Expression] = [] 8445 8446 if self._match(TokenType.DISTINCT): 8447 args.append(self.expression(exp.Distinct, expressions=[self._parse_assignment()])) 8448 self._match(TokenType.COMMA) 8449 8450 args.extend(self._parse_csv(self._parse_assignment)) 8451 8452 return self.expression( 8453 expr_type, this=seq_get(args, 0), expression=seq_get(args, 1), count=seq_get(args, 2) 8454 ) 8455 8456 def _identifier_expression( 8457 self, token: t.Optional[Token] = None, **kwargs: t.Any 8458 ) -> exp.Identifier: 8459 token = token or self._prev 8460 expression = self.expression(exp.Identifier, this=token.text, **kwargs) 8461 expression.update_positions(token) 8462 return expression 8463 8464 def _build_pipe_cte( 8465 self, 8466 query: exp.Query, 8467 expressions: t.List[exp.Expression], 8468 alias_cte: t.Optional[exp.TableAlias] = None, 8469 ) -> exp.Select: 8470 new_cte: t.Optional[t.Union[str, exp.TableAlias]] 8471 if alias_cte: 8472 new_cte = alias_cte 8473 else: 8474 self._pipe_cte_counter += 1 8475 new_cte = f"__tmp{self._pipe_cte_counter}" 8476 8477 with_ = query.args.get("with") 8478 ctes = with_.pop() if with_ else None 8479 8480 new_select = exp.select(*expressions, copy=False).from_(new_cte, copy=False) 8481 if ctes: 8482 new_select.set("with", ctes) 8483 8484 return new_select.with_(new_cte, as_=query, copy=False) 8485 8486 def _parse_pipe_syntax_select(self, query: exp.Select) -> exp.Select: 8487 select = self._parse_select(consume_pipe=False) 8488 if not select: 8489 return query 8490 8491 return self._build_pipe_cte( 8492 query=query.select(*select.expressions, append=False), expressions=[exp.Star()] 8493 ) 8494 8495 def _parse_pipe_syntax_limit(self, query: exp.Select) -> exp.Select: 8496 limit = self._parse_limit() 8497 offset = self._parse_offset() 8498 if limit: 8499 curr_limit = query.args.get("limit", limit) 8500 if curr_limit.expression.to_py() >= limit.expression.to_py(): 8501 query.limit(limit, copy=False) 8502 if offset: 8503 curr_offset = query.args.get("offset") 8504 curr_offset = curr_offset.expression.to_py() if curr_offset else 0 8505 query.offset(exp.Literal.number(curr_offset + offset.expression.to_py()), copy=False) 8506 8507 return query 8508 8509 def _parse_pipe_syntax_aggregate_fields(self) -> t.Optional[exp.Expression]: 8510 this = self._parse_assignment() 8511 if self._match_text_seq("GROUP", "AND", advance=False): 8512 return this 8513 8514 this = self._parse_alias(this) 8515 8516 if self._match_set((TokenType.ASC, TokenType.DESC), advance=False): 8517 return self._parse_ordered(lambda: this) 8518 8519 return this 8520 8521 def _parse_pipe_syntax_aggregate_group_order_by( 8522 self, query: exp.Select, group_by_exists: bool = True 8523 ) -> exp.Select: 8524 expr = self._parse_csv(self._parse_pipe_syntax_aggregate_fields) 8525 aggregates_or_groups, orders = [], [] 8526 for element in expr: 8527 if isinstance(element, exp.Ordered): 8528 this = element.this 8529 if isinstance(this, exp.Alias): 8530 element.set("this", this.args["alias"]) 8531 orders.append(element) 8532 else: 8533 this = element 8534 aggregates_or_groups.append(this) 8535 8536 if group_by_exists: 8537 query.select(*aggregates_or_groups, copy=False).group_by( 8538 *[projection.args.get("alias", projection) for projection in aggregates_or_groups], 8539 copy=False, 8540 ) 8541 else: 8542 query.select(*aggregates_or_groups, append=False, copy=False) 8543 8544 if orders: 8545 return query.order_by(*orders, append=False, copy=False) 8546 8547 return query 8548 8549 def _parse_pipe_syntax_aggregate(self, query: exp.Select) -> exp.Select: 8550 self._match_text_seq("AGGREGATE") 8551 query = self._parse_pipe_syntax_aggregate_group_order_by(query, group_by_exists=False) 8552 8553 if self._match(TokenType.GROUP_BY) or ( 8554 self._match_text_seq("GROUP", "AND") and self._match(TokenType.ORDER_BY) 8555 ): 8556 query = self._parse_pipe_syntax_aggregate_group_order_by(query) 8557 8558 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8559 8560 def _parse_pipe_syntax_set_operator(self, query: exp.Query) -> t.Optional[exp.Query]: 8561 first_setop = self.parse_set_operation(this=query) 8562 if not first_setop: 8563 return None 8564 8565 def _parse_and_unwrap_query() -> t.Optional[exp.Select]: 8566 expr = self._parse_paren() 8567 return expr.assert_is(exp.Subquery).unnest() if expr else None 8568 8569 first_setop.this.pop() 8570 8571 setops = [ 8572 first_setop.expression.pop().assert_is(exp.Subquery).unnest(), 8573 *self._parse_csv(_parse_and_unwrap_query), 8574 ] 8575 8576 query = self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8577 with_ = query.args.get("with") 8578 ctes = with_.pop() if with_ else None 8579 8580 if isinstance(first_setop, exp.Union): 8581 query = query.union(*setops, copy=False, **first_setop.args) 8582 elif isinstance(first_setop, exp.Except): 8583 query = query.except_(*setops, copy=False, **first_setop.args) 8584 else: 8585 query = query.intersect(*setops, copy=False, **first_setop.args) 8586 8587 query.set("with", ctes) 8588 8589 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8590 8591 def _parse_pipe_syntax_join(self, query: exp.Query) -> t.Optional[exp.Query]: 8592 join = self._parse_join() 8593 if not join: 8594 return None 8595 8596 if isinstance(query, exp.Select): 8597 return query.join(join, copy=False) 8598 8599 return query 8600 8601 def _parse_pipe_syntax_pivot(self, query: exp.Select) -> exp.Select: 8602 pivots = self._parse_pivots() 8603 if not pivots: 8604 return query 8605 8606 from_ = query.args.get("from") 8607 if from_: 8608 from_.this.set("pivots", pivots) 8609 8610 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8611 8612 def _parse_pipe_syntax_extend(self, query: exp.Select) -> exp.Select: 8613 self._match_text_seq("EXTEND") 8614 query.select(*[exp.Star(), *self._parse_expressions()], append=False, copy=False) 8615 return self._build_pipe_cte(query=query, expressions=[exp.Star()]) 8616 8617 def _parse_pipe_syntax_tablesample(self, query: exp.Select) -> exp.Select: 8618 sample = self._parse_table_sample() 8619 8620 with_ = query.args.get("with") 8621 if with_: 8622 with_.expressions[-1].this.set("sample", sample) 8623 else: 8624 query.set("sample", sample) 8625 8626 return query 8627 8628 def _parse_pipe_syntax_query(self, query: exp.Query) -> t.Optional[exp.Query]: 8629 if isinstance(query, exp.Subquery): 8630 query = exp.select("*").from_(query, copy=False) 8631 8632 if not query.args.get("from"): 8633 query = exp.select("*").from_(query.subquery(copy=False), copy=False) 8634 8635 while self._match(TokenType.PIPE_GT): 8636 start = self._curr 8637 parser = self.PIPE_SYNTAX_TRANSFORM_PARSERS.get(self._curr.text.upper()) 8638 if not parser: 8639 # The set operators (UNION, etc) and the JOIN operator have a few common starting 8640 # keywords, making it tricky to disambiguate them without lookahead. The approach 8641 # here is to try and parse a set operation and if that fails, then try to parse a 8642 # join operator. If that fails as well, then the operator is not supported. 8643 parsed_query = self._parse_pipe_syntax_set_operator(query) 8644 parsed_query = parsed_query or self._parse_pipe_syntax_join(query) 8645 if not parsed_query: 8646 self._retreat(start) 8647 self.raise_error(f"Unsupported pipe syntax operator: '{start.text.upper()}'.") 8648 break 8649 query = parsed_query 8650 else: 8651 query = parser(self, query) 8652 8653 return query 8654 8655 def _parse_declareitem(self) -> t.Optional[exp.DeclareItem]: 8656 vars = self._parse_csv(self._parse_id_var) 8657 if not vars: 8658 return None 8659 8660 return self.expression( 8661 exp.DeclareItem, 8662 this=vars, 8663 kind=self._parse_types(), 8664 default=self._match(TokenType.DEFAULT) and self._parse_bitwise(), 8665 ) 8666 8667 def _parse_declare(self) -> exp.Declare | exp.Command: 8668 start = self._prev 8669 expressions = self._try_parse(lambda: self._parse_csv(self._parse_declareitem)) 8670 8671 if not expressions or self._curr: 8672 return self._parse_as_command(start) 8673 8674 return self.expression(exp.Declare, expressions=expressions) 8675 8676 def build_cast(self, strict: bool, **kwargs) -> exp.Cast: 8677 exp_class = exp.Cast if strict else exp.TryCast 8678 8679 if exp_class == exp.TryCast: 8680 kwargs["requires_string"] = self.dialect.TRY_CAST_REQUIRES_STRING 8681 8682 return self.expression(exp_class, **kwargs)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
1570 def __init__( 1571 self, 1572 error_level: t.Optional[ErrorLevel] = None, 1573 error_message_context: int = 100, 1574 max_errors: int = 3, 1575 dialect: DialectType = None, 1576 ): 1577 from sqlglot.dialects import Dialect 1578 1579 self.error_level = error_level or ErrorLevel.IMMEDIATE 1580 self.error_message_context = error_message_context 1581 self.max_errors = max_errors 1582 self.dialect = Dialect.get_or_raise(dialect) 1583 self.reset()
1596 def parse( 1597 self, raw_tokens: t.List[Token], sql: t.Optional[str] = None 1598 ) -> t.List[t.Optional[exp.Expression]]: 1599 """ 1600 Parses a list of tokens and returns a list of syntax trees, one tree 1601 per parsed SQL statement. 1602 1603 Args: 1604 raw_tokens: The list of tokens. 1605 sql: The original SQL string, used to produce helpful debug messages. 1606 1607 Returns: 1608 The list of the produced syntax trees. 1609 """ 1610 return self._parse( 1611 parse_method=self.__class__._parse_statement, raw_tokens=raw_tokens, sql=sql 1612 )
Parses a list of tokens and returns a list of syntax trees, one tree per parsed SQL statement.
Arguments:
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The list of the produced syntax trees.
1614 def parse_into( 1615 self, 1616 expression_types: exp.IntoType, 1617 raw_tokens: t.List[Token], 1618 sql: t.Optional[str] = None, 1619 ) -> t.List[t.Optional[exp.Expression]]: 1620 """ 1621 Parses a list of tokens into a given Expression type. If a collection of Expression 1622 types is given instead, this method will try to parse the token list into each one 1623 of them, stopping at the first for which the parsing succeeds. 1624 1625 Args: 1626 expression_types: The expression type(s) to try and parse the token list into. 1627 raw_tokens: The list of tokens. 1628 sql: The original SQL string, used to produce helpful debug messages. 1629 1630 Returns: 1631 The target Expression. 1632 """ 1633 errors = [] 1634 for expression_type in ensure_list(expression_types): 1635 parser = self.EXPRESSION_PARSERS.get(expression_type) 1636 if not parser: 1637 raise TypeError(f"No parser registered for {expression_type}") 1638 1639 try: 1640 return self._parse(parser, raw_tokens, sql) 1641 except ParseError as e: 1642 e.errors[0]["into_expression"] = expression_type 1643 errors.append(e) 1644 1645 raise ParseError( 1646 f"Failed to parse '{sql or raw_tokens}' into {expression_types}", 1647 errors=merge_errors(errors), 1648 ) from errors[-1]
Parses a list of tokens into a given Expression type. If a collection of Expression types is given instead, this method will try to parse the token list into each one of them, stopping at the first for which the parsing succeeds.
Arguments:
- expression_types: The expression type(s) to try and parse the token list into.
- raw_tokens: The list of tokens.
- sql: The original SQL string, used to produce helpful debug messages.
Returns:
The target Expression.
1688 def check_errors(self) -> None: 1689 """Logs or raises any found errors, depending on the chosen error level setting.""" 1690 if self.error_level == ErrorLevel.WARN: 1691 for error in self.errors: 1692 logger.error(str(error)) 1693 elif self.error_level == ErrorLevel.RAISE and self.errors: 1694 raise ParseError( 1695 concat_messages(self.errors, self.max_errors), 1696 errors=merge_errors(self.errors), 1697 )
Logs or raises any found errors, depending on the chosen error level setting.
1699 def raise_error(self, message: str, token: t.Optional[Token] = None) -> None: 1700 """ 1701 Appends an error in the list of recorded errors or raises it, depending on the chosen 1702 error level setting. 1703 """ 1704 token = token or self._curr or self._prev or Token.string("") 1705 start = token.start 1706 end = token.end + 1 1707 start_context = self.sql[max(start - self.error_message_context, 0) : start] 1708 highlight = self.sql[start:end] 1709 end_context = self.sql[end : end + self.error_message_context] 1710 1711 error = ParseError.new( 1712 f"{message}. Line {token.line}, Col: {token.col}.\n" 1713 f" {start_context}\033[4m{highlight}\033[0m{end_context}", 1714 description=message, 1715 line=token.line, 1716 col=token.col, 1717 start_context=start_context, 1718 highlight=highlight, 1719 end_context=end_context, 1720 ) 1721 1722 if self.error_level == ErrorLevel.IMMEDIATE: 1723 raise error 1724 1725 self.errors.append(error)
Appends an error in the list of recorded errors or raises it, depending on the chosen error level setting.
1727 def expression( 1728 self, exp_class: t.Type[E], comments: t.Optional[t.List[str]] = None, **kwargs 1729 ) -> E: 1730 """ 1731 Creates a new, validated Expression. 1732 1733 Args: 1734 exp_class: The expression class to instantiate. 1735 comments: An optional list of comments to attach to the expression. 1736 kwargs: The arguments to set for the expression along with their respective values. 1737 1738 Returns: 1739 The target expression. 1740 """ 1741 instance = exp_class(**kwargs) 1742 instance.add_comments(comments) if comments else self._add_comments(instance) 1743 return self.validate_expression(instance)
Creates a new, validated Expression.
Arguments:
- exp_class: The expression class to instantiate.
- comments: An optional list of comments to attach to the expression.
- kwargs: The arguments to set for the expression along with their respective values.
Returns:
The target expression.
1750 def validate_expression(self, expression: E, args: t.Optional[t.List] = None) -> E: 1751 """ 1752 Validates an Expression, making sure that all its mandatory arguments are set. 1753 1754 Args: 1755 expression: The expression to validate. 1756 args: An optional list of items that was used to instantiate the expression, if it's a Func. 1757 1758 Returns: 1759 The validated expression. 1760 """ 1761 if self.error_level != ErrorLevel.IGNORE: 1762 for error_message in expression.error_messages(args): 1763 self.raise_error(error_message) 1764 1765 return expression
Validates an Expression, making sure that all its mandatory arguments are set.
Arguments:
- expression: The expression to validate.
- args: An optional list of items that was used to instantiate the expression, if it's a Func.
Returns:
The validated expression.
4804 def parse_set_operation( 4805 self, this: t.Optional[exp.Expression], consume_pipe: bool = False 4806 ) -> t.Optional[exp.Expression]: 4807 start = self._index 4808 _, side_token, kind_token = self._parse_join_parts() 4809 4810 side = side_token.text if side_token else None 4811 kind = kind_token.text if kind_token else None 4812 4813 if not self._match_set(self.SET_OPERATIONS): 4814 self._retreat(start) 4815 return None 4816 4817 token_type = self._prev.token_type 4818 4819 if token_type == TokenType.UNION: 4820 operation: t.Type[exp.SetOperation] = exp.Union 4821 elif token_type == TokenType.EXCEPT: 4822 operation = exp.Except 4823 else: 4824 operation = exp.Intersect 4825 4826 comments = self._prev.comments 4827 4828 if self._match(TokenType.DISTINCT): 4829 distinct: t.Optional[bool] = True 4830 elif self._match(TokenType.ALL): 4831 distinct = False 4832 else: 4833 distinct = self.dialect.SET_OP_DISTINCT_BY_DEFAULT[operation] 4834 if distinct is None: 4835 self.raise_error(f"Expected DISTINCT or ALL for {operation.__name__}") 4836 4837 by_name = self._match_text_seq("BY", "NAME") or self._match_text_seq( 4838 "STRICT", "CORRESPONDING" 4839 ) 4840 if self._match_text_seq("CORRESPONDING"): 4841 by_name = True 4842 if not side and not kind: 4843 kind = "INNER" 4844 4845 on_column_list = None 4846 if by_name and self._match_texts(("ON", "BY")): 4847 on_column_list = self._parse_wrapped_csv(self._parse_column) 4848 4849 expression = self._parse_select( 4850 nested=True, parse_set_operation=False, consume_pipe=consume_pipe 4851 ) 4852 4853 return self.expression( 4854 operation, 4855 comments=comments, 4856 this=this, 4857 distinct=distinct, 4858 by_name=by_name, 4859 expression=expression, 4860 side=side, 4861 kind=kind, 4862 on=on_column_list, 4863 )