From 34d288156300261fe699a3e8bc292a6c3ab3f100 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 00:35:25 +0100 Subject: [PATCH 01/81] bigquery: support CREATE TABLE FUNCTION with RETURNS TABLE<...> AS query Adds parsing for CREATE [OR REPLACE] TABLE FUNCTION with RETURNS TABLE AS (query). Introduces DataType::Table, as_query field on CreateFunctionBody, and table_function flag on CreateFunction to distinguish table-valued functions from scalar functions. --- src/ast/data_type.rs | 11 +++++++++ src/ast/mod.rs | 11 ++++++++- src/parser/mod.rs | 33 +++++++++++++++++++++---- tests/sqlparser_bigquery.rs | 48 +++++++++++++++++++++++++++++++++++++ tests/sqlparser_postgres.rs | 4 ++++ 5 files changed, 102 insertions(+), 5 deletions(-) diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index 7149494c94..357359b6b7 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -307,6 +307,10 @@ pub enum DataType { /// [hive]: https://docs.cloudera.com/cdw-runtime/cloud/impala-sql-reference/topics/impala-struct.html /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type Struct(Vec), + /// TABLE type used in BigQuery for table-valued function return types. + /// + /// [bigquery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#create_table_function + Table(Vec), /// MAP<> DatabricksMap(Vec), /// Nullable - special marker NULL represents in ClickHouse as a data type. @@ -536,6 +540,13 @@ impl fmt::Display for DataType { write!(f, "STRUCT") } } + DataType::Table(fields) => { + if !fields.is_empty() { + write!(f, "TABLE<{}>", display_comma_separated(fields)) + } else { + write!(f, "TABLE") + } + } DataType::DatabricksMap(fields) => { if !fields.is_empty() { write!(f, "MAP<{}>", display_comma_separated(fields)) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index b237a77c1b..843da60a59 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2111,6 +2111,8 @@ pub enum Statement { CreateFunction { or_replace: bool, temporary: bool, + /// BigQuery supports `CREATE TABLE FUNCTION` for table-valued functions + table_function: bool, name: ObjectName, args: Option>, return_type: Option, @@ -2736,6 +2738,7 @@ impl fmt::Display for Statement { Statement::CreateFunction { or_replace, temporary, + table_function, name, args, return_type, @@ -2744,9 +2747,10 @@ impl fmt::Display for Statement { } => { write!( f, - "CREATE {or_replace}{temp}FUNCTION {name}", + "CREATE {or_replace}{temp}{table}FUNCTION {name}", temp = if *temporary { "TEMPORARY " } else { "" }, or_replace = if *or_replace { "OR REPLACE " } else { "" }, + table = if *table_function { "TABLE " } else { "" }, )?; if let Some(args) = args { write!(f, "({})", display_comma_separated(args))?; @@ -5328,6 +5332,8 @@ pub struct CreateFunctionBody { /// /// Note that Hive's `AS class_name` is also parsed here. pub as_: Option, + /// AS query (BigQuery TABLE FUNCTION body) + pub as_query: Option, /// RETURN expression pub return_: Option, /// RETURN SELECT @@ -5347,6 +5353,9 @@ impl fmt::Display for CreateFunctionBody { if let Some(definition) = &self.as_ { write!(f, " AS {definition}")?; } + if let Some(query) = &self.as_query { + write!(f, " AS {query}")?; + } if let Some(expr) = &self.return_ { write!(f, " RETURN {expr}")?; } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ff0b8fe921..2d3be12bec 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3406,14 +3406,19 @@ impl<'a> Parser<'a> { .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) .is_some(); if self.parse_keyword(Keyword::TABLE) { - self.parse_create_table(or_replace, temporary, global, transient) + if self.parse_keyword(Keyword::FUNCTION) { + // CREATE TABLE FUNCTION (BigQuery table-valued function) + self.parse_create_function(or_replace, temporary, true) + } else { + self.parse_create_table(or_replace, temporary, global, transient) + } } else if self.parse_keyword(Keyword::MATERIALIZED) || self.parse_keyword(Keyword::VIEW) { self.prev_token(); self.parse_create_view(or_replace) } else if self.parse_keyword(Keyword::EXTERNAL) { self.parse_create_external_table(or_replace) } else if self.parse_keyword(Keyword::FUNCTION) { - self.parse_create_function(or_replace, temporary) + self.parse_create_function(or_replace, temporary, false) } else if self.parse_keyword(Keyword::MACRO) { self.parse_create_macro(or_replace, temporary) } else if self.parse_keyword(Keyword::INDEX) { @@ -3768,6 +3773,7 @@ impl<'a> Parser<'a> { &mut self, or_replace: bool, temporary: bool, + table_function: bool, ) -> Result { if dialect_of!(self is HiveDialect) { let name = self.parse_object_name(false)?; @@ -3782,6 +3788,7 @@ impl<'a> Parser<'a> { Ok(Statement::CreateFunction { or_replace, temporary, + table_function, name, args: None, return_type: None, @@ -3801,6 +3808,7 @@ impl<'a> Parser<'a> { Ok(Statement::CreateFunction { or_replace, temporary, + table_function, name, args: None, return_type: None, @@ -3846,6 +3854,7 @@ impl<'a> Parser<'a> { Ok(Statement::CreateFunction { or_replace, temporary, + table_function, name, args, return_type, @@ -3906,8 +3915,17 @@ impl<'a> Parser<'a> { Ok(()) } if self.parse_keyword(Keyword::AS) { - ensure_not_set(&body.as_, "AS")?; - body.as_ = Some(self.parse_function_definition()?); + if matches!( + self.peek_token().token, + Token::Word(w) if matches!(w.keyword, Keyword::SELECT | Keyword::WITH) + ) { + // BigQuery TABLE FUNCTION: AS SELECT ... or AS WITH ... + ensure_not_set(&body.as_query, "AS query")?; + body.as_query = Some(self.parse_query()?); + } else { + ensure_not_set(&body.as_, "AS")?; + body.as_ = Some(self.parse_function_definition()?); + } } else if self.parse_keyword(Keyword::LANGUAGE) { ensure_not_set(&body.language, "LANGUAGE")?; body.language = Some(self.parse_identifier(false)?.unwrap()); @@ -7300,6 +7318,13 @@ impl<'a> Parser<'a> { trailing_bracket = _trailing_bracket; Ok(DataType::Struct(field_defs)) } + Keyword::TABLE if dialect_of!(self is BigQueryDialect | GenericDialect) => { + self.prev_token(); + let (field_defs, _trailing_bracket) = + self.parse_struct_type_def(Self::parse_struct_field_def, Keyword::TABLE)?; + trailing_bracket = _trailing_bracket; + Ok(DataType::Table(field_defs)) + } Keyword::NULLABLE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { Ok(self.parse_sub_type(DataType::Nullable)?) } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index ce1bca1c73..21f1a46400 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1547,6 +1547,54 @@ fn test_json_number_start() { bigquery().verified_only_select("SELECT field.5k_clients_target AS clients_5k_target FROM tbl"); } +#[test] +fn parse_bigquery_create_table_function() { + // CREATE TABLE FUNCTION with RETURNS TABLE<...> and AS query (non-roundtrip due to backtick parsing) + bigquery().one_statement_parses_to( + "CREATE OR REPLACE TABLE FUNCTION mydataset.names_by_year(y INT64) RETURNS TABLE AS SELECT year, name, SUM(number) AS total FROM `bigquery-public-data.usa_names.usa_1910_current` WHERE year = y GROUP BY year, name", + "", + ); + + // CREATE TABLE FUNCTION without RETURNS (inferred return type) + bigquery().one_statement_parses_to( + "CREATE OR REPLACE TABLE FUNCTION mydataset.names_by_year(y INT64) AS SELECT year, name, SUM(number) AS total FROM `bigquery-public-data.usa_names.usa_1910_current` WHERE year = y GROUP BY year, name", + "", + ); + + // Simple CREATE TABLE FUNCTION - roundtrip + bigquery().verified_stmt( + "CREATE TABLE FUNCTION a(x INT64) RETURNS TABLE AS SELECT s, t", + ); + + // Verify the AST structure + let sql = "CREATE TABLE FUNCTION a(x INT64) RETURNS TABLE AS SELECT s, t"; + match bigquery().verified_stmt(sql) { + Statement::CreateFunction { + or_replace, + temporary, + table_function, + name, + args, + return_type, + .. + } => { + assert!(!or_replace); + assert!(!temporary); + assert!(table_function); + assert_eq!(name.to_string(), "a"); + assert!(args.is_some()); + assert!(return_type.is_some()); + match return_type.unwrap() { + DataType::Table(fields) => { + assert_eq!(fields.len(), 2); + } + other => panic!("Expected DataType::Table, got {:?}", other), + } + } + other => panic!("Expected CreateFunction, got {:?}", other), + } +} + #[test] fn parse_bigquery_format_function() { // format() as first select item diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 271e3b368e..e99fce85d1 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3319,6 +3319,7 @@ fn parse_create_function() { Statement::CreateFunction { or_replace: false, temporary: false, + table_function: false, name: ObjectName(vec![Ident::new("add")]), args: Some(vec![ OperateFunctionArg::unnamed(DataType::Integer(None)), @@ -3343,6 +3344,7 @@ fn parse_create_function() { Statement::CreateFunction { or_replace: true, temporary: false, + table_function: false, name: ObjectName(vec![Ident::new("add")]), args: Some(vec![ OperateFunctionArg::with_name("a", DataType::Integer(None)), @@ -3374,6 +3376,7 @@ fn parse_create_function() { Statement::CreateFunction { or_replace: true, temporary: false, + table_function: false, name: ObjectName(vec![Ident::new("increment")]), args: Some(vec![OperateFunctionArg::with_name( "i", @@ -3389,6 +3392,7 @@ fn parse_create_function() { as_: Some(FunctionDefinition::DoubleDollarDef( " BEGIN RETURN i + 1; END; ".into() )), + as_query: None, using: None }, } From 1545cf714dc5fb6f3394fe0d5abe5e6b297d9b75 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 00:40:19 +0100 Subject: [PATCH 02/81] bigquery: support multiple comma-separated tables in UPDATE FROM Changes Update.from from Option to Option> to support BigQuery syntax UPDATE t SET col = val FROM table1, table2 WHERE ... --- src/ast/mod.rs | 4 ++-- src/parser/mod.rs | 2 +- tests/sqlparser_bigquery.rs | 6 ++++++ tests/sqlparser_common.rs | 4 ++-- 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 843da60a59..6bcae87595 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1633,7 +1633,7 @@ pub enum Statement { /// Column assignments assignments: Vec, /// Table which provide value to be set - from: Option, + from: Option>, /// WHERE selection: Option, /// RETURNING @@ -2670,7 +2670,7 @@ impl fmt::Display for Statement { write!(f, " SET {}", display_comma_separated(assignments))?; } if let Some(from) = from { - write!(f, " FROM {from}")?; + write!(f, " FROM {}", display_comma_separated(from))?; } if let Some(selection) = selection { write!(f, " WHERE {selection}")?; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 2d3be12bec..0004205e97 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10287,7 +10287,7 @@ impl<'a> Parser<'a> { let from = if self.parse_keyword(Keyword::FROM) && dialect_of!(self is GenericDialect | PostgreSqlDialect | DuckDbDialect | BigQueryDialect | SnowflakeDialect | RedshiftSqlDialect | MsSqlDialect) { - Some(self.parse_table_and_joins()?) + Some(self.parse_comma_separated(Parser::parse_table_and_joins)?) } else { None }; diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 21f1a46400..92ab587d41 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1610,3 +1610,9 @@ fn parse_bigquery_format_function() { "", ); } + +#[test] +fn parse_update_from_multiple_tables() { + let sql = "UPDATE dataset.DetailedInventory SET supply_constrained = true FROM dataset.NewArrivals, dataset.Warehouse WHERE DetailedInventory.product = NewArrivals.product AND NewArrivals.warehouse = Warehouse.warehouse AND Warehouse.state = 'WA'"; + bigquery().verified_stmt(sql); +} diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 3ff2614f82..7ab9a77791 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -237,7 +237,7 @@ fn parse_update_set_from() { vec![Ident::new("t2"), Ident::new("name")].empty_span() ), }], - from: Some(TableWithJoins { + from: Some(vec![TableWithJoins { relation: TableFactor::Derived { lateral: false, subquery: Box::new(Query { @@ -296,7 +296,7 @@ fn parse_update_set_from() { }), }, joins: vec![], - }), + }]), selection: Some(Expr::BinaryOp { left: Box::new(Expr::CompoundIdentifier( vec![Ident::new("t1"), Ident::new("id")].empty_span() From f94ec2d3036dbd37082b7d57999304ec53c474dd Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 00:51:02 +0100 Subject: [PATCH 03/81] bigquery: fix SELECT DISTINCT AS STRUCT/VALUE parsing order BigQuery grammar requires SELECT [DISTINCT] [AS STRUCT|VALUE] with DISTINCT before AS STRUCT/VALUE. The parser had the order reversed, causing failures on valid SQL like SELECT DISTINCT AS STRUCT 1 AS a. --- src/ast/query.rs | 8 ++++---- src/parser/mod.rs | 4 ++-- tests/sqlparser_bigquery.rs | 6 ++++++ 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 0f43c41332..7d210eeed5 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -276,13 +276,13 @@ impl fmt::Display for Select { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "SELECT")?; - if let Some(value_table_mode) = self.value_table_mode { - write!(f, " {value_table_mode}")?; - } - if let Some(ref distinct) = self.distinct { write!(f, " {distinct}")?; } + + if let Some(value_table_mode) = self.value_table_mode { + write!(f, " {value_table_mode}")?; + } if let Some(ref top) = self.top { write!(f, " {top}")?; } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0004205e97..c9f985b787 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8539,6 +8539,8 @@ impl<'a> Parser<'a> { /// Parse a restricted `SELECT` statement (no CTEs / `UNION` / `ORDER BY`), /// assuming the initial `SELECT` was already consumed pub fn parse_select(&mut self) -> Result { + let distinct = self.parse_all_or_distinct()?; + let value_table_mode = if dialect_of!(self is BigQueryDialect) && self.parse_keyword(Keyword::AS) { if self.parse_keyword(Keyword::VALUE) { @@ -8552,8 +8554,6 @@ impl<'a> Parser<'a> { None }; - let distinct = self.parse_all_or_distinct()?; - let top = if self.parse_keyword(Keyword::TOP) { Some(self.parse_top()?) } else { diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 92ab587d41..d63f638bdf 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1292,6 +1292,12 @@ fn test_select_as_struct() { bigquery().verified_only_select("SELECT * FROM (SELECT AS VALUE STRUCT(123 AS a, false AS b))"); let select = bigquery().verified_only_select("SELECT AS STRUCT 1 AS a, 2 AS b"); assert_eq!(Some(ValueTableMode::AsStruct), select.value_table_mode); + assert_eq!(None, select.distinct); + + // BigQuery supports SELECT DISTINCT AS STRUCT + let select = bigquery().verified_only_select("SELECT DISTINCT AS STRUCT 1 AS a, 2 AS b"); + assert_eq!(Some(ValueTableMode::AsStruct), select.value_table_mode); + assert_eq!(Some(Distinct::Distinct), select.distinct); } #[test] From f35c853ebfcda5804fa0a5f80e1abeda4f0ed8ed Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 00:55:16 +0100 Subject: [PATCH 04/81] bigquery: support wildcard table references (FROM dataset.table_prefix*) Parses BigQuery wildcard table syntax where a trailing * on an object name in FROM matches multiple tables with the given prefix. --- src/parser/mod.rs | 11 +++++++++++ tests/sqlparser_bigquery.rs | 15 +++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index c9f985b787..979dace128 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7549,6 +7549,17 @@ impl<'a> Parser<'a> { .collect() } + // BigQuery supports wildcard tables: `FROM dataset.table_prefix*` + // https://cloud.google.com/bigquery/docs/querying-wildcard-tables + if in_table_clause + && dialect_of!(self is BigQueryDialect | GenericDialect) + && self.consume_token(&Token::Mul) + { + if let Some(last) = idents.last_mut() { + last.value.push('*'); + } + } + Ok(ObjectName(idents)) } diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index d63f638bdf..886c04ba62 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1622,3 +1622,18 @@ fn parse_update_from_multiple_tables() { let sql = "UPDATE dataset.DetailedInventory SET supply_constrained = true FROM dataset.NewArrivals, dataset.Warehouse WHERE DetailedInventory.product = NewArrivals.product AND NewArrivals.warehouse = Warehouse.warehouse AND Warehouse.state = 'WA'"; bigquery().verified_stmt(sql); } + +#[test] +fn parse_wildcard_table() { + // BigQuery wildcard table syntax + // https://cloud.google.com/bigquery/docs/querying-wildcard-tables + bigquery().verified_stmt("SELECT * FROM x.y*"); + // Backtick-quoted identifiers with dots get split into parts + bigquery().one_statement_parses_to( + "SELECT * FROM `project.dataset.table_prefix*`", + "SELECT * FROM `project`.`dataset`.`table_prefix*`", + ); + bigquery().verified_stmt( + "SELECT * FROM x.y* WHERE _TABLE_SUFFIX BETWEEN '20230101' AND '20231231'", + ); +} From d378b120e517bfd0f03339ea79c4d7d59e82a300 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 01:01:18 +0100 Subject: [PATCH 05/81] bigquery: support dot field access after CAST expressions After parsing CAST, consume trailing .field accesses and wrap them in Expr::CompositeAccess, enabling CAST(col AS STRUCT).f1. --- src/parser/mod.rs | 24 ++++++++++++++++++++++-- tests/sqlparser_bigquery.rs | 11 +++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 979dace128..daad6e4d73 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1323,11 +1323,31 @@ impl<'a> Parser<'a> { }; self.expect_token(&Token::RParen)?; - Ok(Expr::Cast { + let mut result = Expr::Cast { expr: Box::new(expr), data_type, format, - }) + }; + + // Handle field access after CAST, e.g. CAST(col AS STRUCT).f1 + while self.consume_token(&Token::Period) { + let tok = self.next_token(); + let key = match tok.token { + Token::Word(word) => word.to_ident(), + _ => { + return parser_err!( + format!("Expected identifier, found: {tok}"), + tok.span.start + ); + } + }; + result = Expr::CompositeAccess { + expr: Box::new(result), + key, + }; + } + + Ok(result) } /// Parse a SQL TRY_CAST function e.g. `TRY_CAST(expr AS FLOAT)` diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 886c04ba62..a51a910845 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1623,6 +1623,17 @@ fn parse_update_from_multiple_tables() { bigquery().verified_stmt(sql); } +#[test] +fn parse_cast_field_access() { + // Single-level field access on CAST + bigquery().verified_stmt("SELECT CAST(col AS STRUCT).fld1"); + // Multi-level field access on CAST + bigquery().one_statement_parses_to( + "SELECT CAST(col AS STRUCT>).fld1.fld2", + "SELECT CAST(col AS STRUCT>).fld1.fld2", + ); +} + #[test] fn parse_wildcard_table() { // BigQuery wildcard table syntax From c76a76c5c9c5ef34935a1fb1304a9d92730b744a Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 01:10:02 +0100 Subject: [PATCH 06/81] clickhouse: support EXPLAIN type keywords and key=value options Adds explain_type (SYNTAX, AST, PLAN, PIPELINE) and options (key=value pairs) to Explain statement, enabling EXPLAIN PIPELINE header=1, graph=1 SELECT ... --- src/ast/mod.rs | 14 ++++++++ src/parser/mod.rs | 60 +++++++++++++++++++++++++++++++++++ tests/sqlparser_clickhouse.rs | 19 +++++++++++ tests/sqlparser_common.rs | 2 ++ 4 files changed, 95 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 6bcae87595..4d8f500d2b 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2250,6 +2250,10 @@ pub enum Statement { statement: Box, /// Optional output format of explain format: Option, + /// ClickHouse: EXPLAIN options (e.g., `distributed=1`) + options: Vec, + /// ClickHouse: EXPLAIN type (e.g., SYNTAX, AST, PLAN, PIPELINE) + explain_type: Option, }, /// SAVEPOINT -- define a new savepoint within the current transaction Savepoint { name: Ident }, @@ -2391,6 +2395,8 @@ impl fmt::Display for Statement { analyze, statement, format, + options, + explain_type, } => { if *describe_alias { write!(f, "DESCRIBE ")?; @@ -2410,6 +2416,14 @@ impl fmt::Display for Statement { write!(f, "FORMAT {format} ")?; } + if let Some(explain_type) = explain_type { + write!(f, "{explain_type} ")?; + } + + if !options.is_empty() { + write!(f, "{} ", display_comma_separated(options))?; + } + write!(f, "{statement}") } Statement::Query(s) => write!(f, "{s}"), diff --git a/src/parser/mod.rs b/src/parser/mod.rs index daad6e4d73..4a582badd7 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8161,6 +8161,13 @@ impl<'a> Parser<'a> { format = Some(self.parse_analyze_format()?); } + // ClickHouse: EXPLAIN [type] [setting = value, ...] statement + let (explain_type, options) = if dialect_of!(self is ClickHouseDialect) { + self.parse_explain_options()? + } else { + (None, vec![]) + }; + match self.maybe_parse(|parser| parser.parse_statement()) { Some(Statement::Explain { .. }) | Some(Statement::ExplainTable { .. }) => Err( ParserError::ParserError("Explain must be root of the plan".to_string()), @@ -8171,6 +8178,8 @@ impl<'a> Parser<'a> { verbose, statement: Box::new(statement), format, + options, + explain_type, }), _ => { let has_table_word = self.parse_keyword(Keyword::TABLE); @@ -8193,6 +8202,57 @@ impl<'a> Parser<'a> { } } + /// Parse ClickHouse EXPLAIN options: `[type] [setting = value, ...]` + /// + /// ClickHouse EXPLAIN supports an optional type keyword (SYNTAX, AST, PLAN, PIPELINE, etc.) + /// followed by optional key=value settings before the explained statement. + fn parse_explain_options( + &mut self, + ) -> Result<(Option, Vec), ParserError> { + // First, check for an optional EXPLAIN type identifier (e.g., SYNTAX, AST, PLAN). + // These are non-keyword identifiers NOT followed by `=`. + let explain_type = if let Token::Word(w) = &self.peek_token_ref().token { + if w.keyword == Keyword::NoKeyword && self.peek_nth_token_ref(1).token != Token::Eq { + Some(self.parse_identifier(false)?.unwrap()) + } else { + None + } + } else { + None + }; + + let options = self.parse_explain_key_value_options()?; + Ok((explain_type, options)) + } + + /// Parse key=value option pairs for EXPLAIN + fn parse_explain_key_value_options(&mut self) -> Result, ParserError> { + let mut options = vec![]; + loop { + // Check if next tokens look like `ident = value` (not a statement keyword) + if let Token::Word(w) = &self.peek_token_ref().token { + // If the word is a known statement keyword, stop parsing options + if w.keyword != Keyword::NoKeyword { + break; + } + if self.peek_nth_token_ref(1).token == Token::Eq { + let name = self.parse_object_name(false)?; + self.expect_token(&Token::Eq)?; + let value = self.parse_expr()?; + options.push(SqlOption { name, value }); + if !self.consume_token(&Token::Comma) { + break; + } + } else { + break; + } + } else { + break; + } + } + Ok(options) + } + /// Call's [`Self::parse_query`] returning a `Box`'ed result. /// /// This function can be used to reduce the stack size required in debug diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 774c4847bc..86a1921d60 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -1499,6 +1499,25 @@ fn parse_select_from_table_final() { clickhouse().one_statement_parses_to("SELECT * FROM t AS t1 FINAL", "SELECT * FROM t AS t1"); } +#[test] +fn parse_explain_with_options() { + // ClickHouse supports EXPLAIN with key=value options before the statement + clickhouse().verified_stmt( + "EXPLAIN distributed = 1 SELECT sum(number) FROM test_table GROUP BY number % 4", + ); + + clickhouse().one_statement_parses_to( + "EXPLAIN distributed=1 SELECT * FROM remote('127.0.0.{1,2}', numbers(2)) WHERE number = 1", + "EXPLAIN distributed = 1 SELECT * FROM remote('127.0.0.{1,2}', numbers(2)) WHERE number = 1", + ); + + // EXPLAIN SYNTAX with options + clickhouse().one_statement_parses_to( + "EXPLAIN SYNTAX run_query_tree_passes = 1 SELECT * FROM system.numbers AS a, system.numbers AS b, system.numbers AS c WHERE a.number = b.number AND b.number = c.number", + "EXPLAIN SYNTAX run_query_tree_passes = 1 SELECT * FROM system.numbers AS a, system.numbers AS b, system.numbers AS c WHERE a.number = b.number AND b.number = c.number", + ); +} + fn clickhouse_and_generic() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {}), Box::new(GenericDialect {})], diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 7ab9a77791..b3f49c7e5a 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -3819,6 +3819,8 @@ fn run_explain_analyze( verbose, statement, format, + options: _, + explain_type: _, } => { assert_eq!(verbose, expected_verbose); assert_eq!(analyze, expected_analyze); From 2ede8c8088fdaf7219dd5ae02b4a9112ca9ebc1a Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 01:13:22 +0100 Subject: [PATCH 07/81] clickhouse: support ALTER TABLE DROP PART 'part_name' syntax Adds Partition::Part(expr) variant to handle ClickHouse's DROP PART 'name' syntax, distinct from the existing DROP PARTITION (expr) form. --- src/ast/ddl.rs | 9 +++++++++ src/parser/mod.rs | 4 ++++ tests/sqlparser_clickhouse.rs | 23 +++++++++++++++++++++++ 3 files changed, 36 insertions(+) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index c661f547b8..6dcdad0c15 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -74,6 +74,12 @@ pub enum AlterTableOperation { // See `AttachPartition` for more details partition: Partition, }, + /// `DROP PART|PARTITION ` + /// Note: this is a ClickHouse-specific operation, please refer to + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#drop-partitionpart) + DropPartition { + partition: Partition, + }, /// `DROP PRIMARY KEY` /// /// Note: this is a MySQL-specific operation. @@ -187,6 +193,9 @@ impl fmt::Display for AlterTableOperation { if *cascade { " CASCADE" } else { "" }, ) } + AlterTableOperation::DropPartition { partition } => { + write!(f, "DROP {partition}") + } AlterTableOperation::DropPrimaryKey => write!(f, "DROP PRIMARY KEY"), AlterTableOperation::DropColumn { column_name, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4a582badd7..c7f8047e8f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6277,6 +6277,10 @@ impl<'a> Parser<'a> { partitions, if_exists: true, } + } else if self.parse_keyword(Keyword::PART) { + AlterTableOperation::DropPartition { + partition: Partition::Part(self.parse_expr()?), + } } else if self.parse_keyword(Keyword::PARTITION) { self.expect_token(&Token::LParen)?; let partitions = self.parse_comma_separated(Parser::parse_expr)?; diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 86a1921d60..a7fb4e9620 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -587,6 +587,29 @@ fn parse_alter_table_attach_and_detach_partition() { } } +#[test] +fn parse_alter_table_drop_partition_and_part() { + // DROP PART 'part_name' + match clickhouse_and_generic() + .verified_stmt("ALTER TABLE mt DROP PART 'all_4_4_0'") + { + Statement::AlterTable { + name, operations, .. + } => { + assert_eq!("mt", name.to_string()); + assert_eq!( + operations[0], + AlterTableOperation::DropPartition { + partition: Partition::Part(Expr::Value(Value::SingleQuotedString( + "all_4_4_0".to_string() + ))), + } + ); + } + _ => unreachable!(), + } +} + #[test] fn parse_create_materialized_view() { clickhouse().verified_stmt( From 6f3f3179384cc21cb8e789a54b66cb46cb1d4c6f Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 01:16:41 +0100 Subject: [PATCH 08/81] clickhouse: enable trailing commas in SELECT projection list Enables trailing_commas option for ClickHouseDialect in parse_projection, allowing SELECT 1, 2, FROM t to parse successfully. --- src/parser/mod.rs | 2 +- tests/sqlparser_clickhouse.rs | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index c7f8047e8f..b635e987bc 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3250,7 +3250,7 @@ impl<'a> Parser<'a> { // This pattern could be captured better with RAII type semantics, but it's quite a bit of // code to add for just one case, so we'll just do it manually here. let old_value = self.options.trailing_commas; - self.options.trailing_commas |= dialect_of!(self is BigQueryDialect | SnowflakeDialect); + self.options.trailing_commas |= dialect_of!(self is BigQueryDialect | SnowflakeDialect | ClickHouseDialect); let ret = self.parse_comma_separated(|p| p.parse_select_item()); self.options.trailing_commas = old_value; diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index a7fb4e9620..3330af8cbf 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -1541,6 +1541,20 @@ fn parse_explain_with_options() { ); } +#[test] +fn test_clickhouse_trailing_commas() { + // ClickHouse supports trailing commas in SELECT + clickhouse().one_statement_parses_to( + "SELECT 1, 2, FROM t", + "SELECT 1, 2 FROM t", + ); + // Trailing comma with FORMAT clause + clickhouse().one_statement_parses_to( + "SELECT (number, toDate('2019-05-20')), dictGetOrNull('range_key_dictionary', 'value', number, toDate('2019-05-20')), FROM system.numbers LIMIT 5 FORMAT TabSeparated", + "SELECT (number, toDate('2019-05-20')), dictGetOrNull('range_key_dictionary', 'value', number, toDate('2019-05-20')) FROM system.numbers LIMIT 5 FORMAT TabSeparated", + ); +} + fn clickhouse_and_generic() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {}), Box::new(GenericDialect {})], From 549d1ffb8ab2d55cc245074e8f1462b6dfdd427c Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 01:25:46 +0100 Subject: [PATCH 09/81] clickhouse: support EXECUTE AS impersonation statement Adds Statement::ExecuteAs { user } and routes EXECUTE AS to it. This is ClickHouse's session-level user impersonation, distinct from PostgreSQL's EXECUTE. --- src/ast/mod.rs | 12 ++++++++++++ src/parser/mod.rs | 6 ++++++ tests/sqlparser_clickhouse.rs | 6 ++++++ 3 files changed, 24 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 4d8f500d2b..05aae0c006 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2203,6 +2203,15 @@ pub enum Statement { parameters: Vec, }, /// ```sql + /// EXECUTE AS + /// ``` + /// + /// ClickHouse-specific statement for user impersonation. + /// See: + ExecuteAs { + user: WithSpan, + }, + /// ```sql /// PREPARE name [ ( data_type [, ...] ) ] AS statement /// ``` /// @@ -3696,6 +3705,9 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::ExecuteAs { user } => { + write!(f, "EXECUTE AS {user}") + } Statement::Prepare { name, data_types, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b635e987bc..4937cf7b55 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11085,6 +11085,12 @@ impl<'a> Parser<'a> { } pub fn parse_execute(&mut self) -> Result { + // ClickHouse: EXECUTE AS (user impersonation) + if self.parse_keyword(Keyword::AS) { + let user = self.parse_identifier(false)?; + return Ok(Statement::ExecuteAs { user }); + } + let name = self.parse_identifier(false)?; let mut parameters = vec![]; diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 3330af8cbf..0fd2b9298d 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -1555,6 +1555,12 @@ fn test_clickhouse_trailing_commas() { ); } +#[test] +fn parse_execute_as() { + // ClickHouse EXECUTE AS (user impersonation) + clickhouse().verified_stmt("EXECUTE AS u1"); +} + fn clickhouse_and_generic() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {}), Box::new(GenericDialect {})], From f50a5c9c93836ff7050fbd9b23beb4bce1e53374 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 01:35:32 +0100 Subject: [PATCH 10/81] clickhouse: support COLUMNS('pattern') with APPLY/EXCEPT/REPLACE transformers Adds SelectItem::ColumnsWithTransformers and ColumnTransformer enum (Apply, Except, Replace) for ClickHouse's column transformer syntax like SELECT COLUMNS('[jk]') APPLY(toString) APPLY(length) FROM t. --- src/ast/mod.rs | 8 +++--- src/ast/query.rs | 46 +++++++++++++++++++++++++++++ src/parser/mod.rs | 54 +++++++++++++++++++++++++++++++++++ tests/sqlparser_clickhouse.rs | 32 +++++++++++++++++++++ 4 files changed, 136 insertions(+), 4 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 05aae0c006..d9d5b0a0b5 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -42,10 +42,10 @@ pub use self::ddl::{ }; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ - AggregateItem, Cte, Distinct, ExceptSelectItem, ExcludeSelectItem, Fetch, FormatClause, - GroupByExpr, IdentWithAlias, Interpolate, InterpolateExpr, Join, JoinConstraint, JoinOperator, - LateralView, LockClause, LockType, NamedWindowDefinition, NonBlock, Offset, OffsetRows, - OrderBy, OrderByExpr, PivotValue, PivotValueSource, Query, RenameSelectItem, + AggregateItem, ColumnTransformer, Cte, Distinct, ExceptSelectItem, ExcludeSelectItem, Fetch, + FormatClause, GroupByExpr, IdentWithAlias, Interpolate, InterpolateExpr, Join, JoinConstraint, + JoinOperator, LateralView, LockClause, LockType, NamedWindowDefinition, NonBlock, Offset, + OffsetRows, OrderBy, OrderByExpr, PivotValue, PivotValueSource, Query, RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, SamplingMethod, Select, SelectInto, SelectItem, SelectionCount, SetExpr, SetOperator, SetQuantifier, Setting, Table, TableAlias, TableFactor, TableSampleSeed, TableVersion, TableWithJoins, Top, UnpivotNullHandling, ValueTableMode, diff --git a/src/ast/query.rs b/src/ast/query.rs index 7d210eeed5..c2d08f86d0 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -448,6 +448,42 @@ pub enum SelectItem { QualifiedWildcard(ObjectName, WildcardAdditionalOptions), /// An unqualified `*` Wildcard(WildcardAdditionalOptions), + /// ClickHouse `COLUMNS('pattern')` with optional `APPLY(func)` chains + /// + ColumnsWithTransformers { + /// The COLUMNS expression (e.g., `COLUMNS('pattern')` or `COLUMNS(col1, col2)`) + columns: Expr, + /// Optional chain of `APPLY(func)` transformers + transformers: Vec, + }, +} + +/// ClickHouse column transformer applied to COLUMNS expression +/// +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum ColumnTransformer { + /// `APPLY(func)` - applies function to each matched column + Apply(Ident), + /// `EXCEPT(col1, col2, ...)` - excludes columns + Except(Vec), + /// `REPLACE(expr AS col, ...)` - replaces column expressions + Replace(Vec), +} + +impl fmt::Display for ColumnTransformer { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + ColumnTransformer::Apply(func) => write!(f, "APPLY({func})"), + ColumnTransformer::Except(cols) => { + write!(f, "EXCEPT({})", display_comma_separated(cols)) + } + ColumnTransformer::Replace(items) => { + write!(f, "REPLACE({})", display_comma_separated(items)) + } + } + } } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -691,6 +727,16 @@ impl fmt::Display for SelectItem { write!(f, "{additional_options}")?; Ok(()) } + SelectItem::ColumnsWithTransformers { + columns, + transformers, + } => { + write!(f, "{columns}")?; + for transformer in transformers { + write!(f, " {transformer}")?; + } + Ok(()) + } } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4937cf7b55..663ac6e01b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10581,6 +10581,19 @@ impl<'a> Parser<'a> { } else { expr }; + + // ClickHouse COLUMNS('pattern') APPLY(func) ... syntax + if dialect_of!(self is ClickHouseDialect | GenericDialect) && self.is_columns_function_call(&expr) { + let transformers = self.parse_column_transformers()?; + if !transformers.is_empty() { + return Ok(SelectItem::ColumnsWithTransformers { + columns: expr, + transformers, + } + .spanning(self.span_from_index(start_span))); + } + } + let expr_with_location = expr.spanning(self.span_from_index(start_span)); self.parse_optional_alias(keywords::RESERVED_FOR_COLUMN_ALIAS) .map(|alias| match alias { @@ -10679,6 +10692,47 @@ impl<'a> Parser<'a> { }) } + /// Check if an expression is a COLUMNS function call (for ClickHouse column transformers) + fn is_columns_function_call(&self, expr: &Expr) -> bool { + matches!(expr, Expr::Function(func) if func.name.to_string().eq_ignore_ascii_case("columns")) + } + + /// Parse ClickHouse column transformers: `APPLY(func)`, `EXCEPT(col, ...)`, `REPLACE(expr AS col, ...)` + fn parse_column_transformers(&mut self) -> Result, ParserError> { + let mut transformers = Vec::new(); + loop { + if self.parse_keyword(Keyword::APPLY) { + self.expect_token(&Token::LParen)?; + let func_name = self.parse_identifier(false)?.unwrap(); + self.expect_token(&Token::RParen)?; + transformers.push(ColumnTransformer::Apply(func_name)); + } else if self.parse_keyword(Keyword::EXCEPT) { + self.expect_token(&Token::LParen)?; + let cols = + self.parse_comma_separated(|p| Ok(p.parse_identifier(false)?.unwrap()))?; + self.expect_token(&Token::RParen)?; + transformers.push(ColumnTransformer::Except(cols)); + } else if self.parse_keyword(Keyword::REPLACE) { + self.expect_token(&Token::LParen)?; + let items = self.parse_comma_separated(|p| { + let expr = p.parse_expr()?; + p.expect_keyword(Keyword::AS)?; + let alias = p.parse_identifier(false)?; + let start_span = p.index; + Ok(SelectItem::ExprWithAlias { + expr: expr.spanning(p.span_from_index(start_span)), + alias, + }) + })?; + self.expect_token(&Token::RParen)?; + transformers.push(ColumnTransformer::Replace(items)); + } else { + break; + } + } + Ok(transformers) + } + /// Parse an [`Exclude`](ExcludeSelectItem) information for wildcard select items. /// /// If it is not possible to parse it, will return an option. diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 0fd2b9298d..745eae781a 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -1561,6 +1561,38 @@ fn parse_execute_as() { clickhouse().verified_stmt("EXECUTE AS u1"); } +#[test] +fn parse_columns_with_apply_transformers() { + // ClickHouse COLUMNS('pattern') APPLY(func) syntax + // https://clickhouse.com/docs/en/sql-reference/statements/select#columns-expression + clickhouse().verified_stmt( + "SELECT COLUMNS('[jk]') APPLY(toString) APPLY(length) APPLY(max) FROM columns_transformers", + ); + + // Single APPLY + clickhouse().verified_stmt( + "SELECT COLUMNS('[jk]') APPLY(toString) FROM columns_transformers", + ); + + // Verify AST structure + let sql = "SELECT COLUMNS('[jk]') APPLY(toString) FROM columns_transformers"; + let select = clickhouse().verified_only_select(sql); + match select.projection[0].clone().unwrap() { + SelectItem::ColumnsWithTransformers { + ref columns, + ref transformers, + } => { + assert!(matches!(columns, Expr::Function(_))); + assert_eq!(transformers.len(), 1); + assert_eq!( + transformers[0], + ColumnTransformer::Apply(Ident::new("toString")) + ); + } + _ => panic!("Expected ColumnsWithTransformers"), + } +} + fn clickhouse_and_generic() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {}), Box::new(GenericDialect {})], From c37629b91a92ba816c036f9d7a01f3fd5f47e087 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 01:40:44 +0100 Subject: [PATCH 11/81] databricks: support DESCRIBE HISTORY statement Adds Statement::DescribeHistory and parses DESCRIBE HISTORY (and DESC HISTORY alias) for Databricks Delta table history queries. --- src/ast/mod.rs | 13 +++++++++++++ src/parser/mod.rs | 6 ++++++ tests/sqlparser_databricks.rs | 22 ++++++++++++++++++++++ 3 files changed, 41 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index d9d5b0a0b5..ad38a8e70a 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2264,6 +2264,16 @@ pub enum Statement { /// ClickHouse: EXPLAIN type (e.g., SYNTAX, AST, PLAN, PIPELINE) explain_type: Option, }, + /// ```sql + /// DESCRIBE HISTORY table_name + /// ``` + /// Databricks Delta Lake: show table history/changelog + /// See + DescribeHistory { + /// Table name + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + table_name: ObjectName, + }, /// SAVEPOINT -- define a new savepoint within the current transaction Savepoint { name: Ident }, // MERGE INTO statement, based on Snowflake. See @@ -2435,6 +2445,9 @@ impl fmt::Display for Statement { write!(f, "{statement}") } + Statement::DescribeHistory { table_name } => { + write!(f, "DESCRIBE HISTORY {table_name}") + } Statement::Query(s) => write!(f, "{s}"), Statement::Declare { name, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 663ac6e01b..8b22fc6c2f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8186,6 +8186,12 @@ impl<'a> Parser<'a> { explain_type, }), _ => { + // Databricks: DESCRIBE HISTORY table_name + if describe_alias && self.parse_keyword(Keyword::HISTORY) { + let table_name = self.parse_object_name(false)?; + return Ok(Statement::DescribeHistory { table_name }); + } + let has_table_word = self.parse_keyword(Keyword::TABLE); let table_name = self.parse_object_name(false)?; diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index b4744c4d4c..c24165d190 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -243,3 +243,25 @@ fn test_create_table_with_location() { fn test_cross_join() { databricks_and_generic().verified_stmt("SELECT * FROM tbl CROSS JOIN tbl2 ON tbl.id = tbl2.id"); } + +#[test] +fn test_describe_history() { + // Simple table name + match databricks().verified_stmt("DESCRIBE HISTORY table_name") { + Statement::DescribeHistory { table_name } => { + assert_eq!(table_name.to_string(), "table_name"); + } + _ => unreachable!(), + } + + // Qualified table name + match databricks().verified_stmt("DESCRIBE HISTORY a.b") { + Statement::DescribeHistory { table_name } => { + assert_eq!(table_name.to_string(), "a.b"); + } + _ => unreachable!(), + } + + // DESC alias + databricks().one_statement_parses_to("DESC HISTORY my_table", "DESCRIBE HISTORY my_table"); +} From 05c1f3a7041ebc685f19cc6982237762e9fc742f Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 01:45:12 +0100 Subject: [PATCH 12/81] databricks: support ${variable} widget placeholder syntax Adds tokenizer recognition of ${identifier} as a Placeholder token and allows it as an identifier in table positions, enabling SELECT * FROM ${source_table}. --- src/parser/mod.rs | 4 ++++ src/tokenizer.rs | 13 +++++++++++++ tests/sqlparser_databricks.rs | 6 ++++++ 3 files changed, 23 insertions(+) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8b22fc6c2f..9e259f6981 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7758,6 +7758,10 @@ impl<'a> Parser<'a> { Token::DoubleQuotedString(s) => { Ok(Ident::with_quote('\"', s).spanning(next_token.span)) } + // Handle ${variable} placeholders as identifiers (e.g., Databricks widget syntax) + Token::Placeholder(s) if s.starts_with("${") && s.ends_with('}') => { + Ok(Ident::new(s).spanning(next_token.span)) + } _ => self.expected("identifier", next_token), } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index afc677300b..ab7008344c 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1191,6 +1191,19 @@ impl<'a> Tokenizer<'a> { tag: None, })) }; + } else if let Some('{') = chars.peek() { + // Handle ${identifier} variable substitution (e.g., Databricks widgets) + chars.next(); // consume '{' + let ident = peeking_take_while(chars, |ch| ch != '}'); + if let Some('}') = chars.peek() { + chars.next(); // consume '}' + return Ok(Token::Placeholder(format!("${{{ident}}}"))); + } else { + return self.tokenizer_error( + chars.location(), + "Unterminated variable substitution, expected }", + ); + } } else { value.push_str(&peeking_take_while(chars, |ch| { ch.is_alphanumeric() || ch == '_' diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index c24165d190..75c0f083a5 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -114,6 +114,12 @@ fn test_select_placeholder() { databricks().verified_stmt(sql); } +#[test] +fn test_select_dollar_brace_placeholder() { + let sql = "SELECT ${x} FROM ${y} WHERE ${z} > 1"; + databricks().verified_stmt(sql); +} + #[test] fn test_underscore_column_name() { databricks().verified_stmt("SELECT _column FROM `myproject`.`mydataset`.`mytable`"); From 127db5390e579d4d7f18d5e36dcbadfd14642d78 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 01:51:48 +0100 Subject: [PATCH 13/81] databricks: support backtick-quoted keys and single-quoted strings in colon JSON path Extends colon JSON path parser to recognize backtick-quoted word tokens and single-quoted string literals as valid path components. --- src/parser/mod.rs | 4 ++++ tests/sqlparser_databricks.rs | 27 +++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9e259f6981..34593bd7ee 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6922,6 +6922,9 @@ impl<'a> Parser<'a> { let mut next_next_token = Some(self.next_token()); while let Some(next_token) = next_next_token { match next_token.token { + Token::Word(ref w) if w.quote_style == Some('`') => { + write!(buf, "`{}`", w.value).unwrap() + } Token::Word(w) => buf.push_str(&w.value), Token::Number(ref n, _) => { if buf.is_empty() && n.starts_with(".") { @@ -6935,6 +6938,7 @@ impl<'a> Parser<'a> { Token::RBracket => buf.push(']'), Token::Colon => buf.push(':'), Token::DoubleQuotedString(ref s) => write!(buf, "\"{}\"", s).unwrap(), + Token::SingleQuotedString(ref s) => write!(buf, "'{}'", s).unwrap(), Token::Whitespace(_) => { break; } diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 75c0f083a5..dbcaf564ff 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -271,3 +271,30 @@ fn test_describe_history() { // DESC alias databricks().one_statement_parses_to("DESC HISTORY my_table", "DESCRIBE HISTORY my_table"); } + +#[test] +fn test_json_path_with_colon() { + // Backtick-quoted field names with spaces + databricks().one_statement_parses_to( + "SELECT raw:`zip code`, raw:`fb:testid` FROM t", + "SELECT raw:`zip code`, raw:`fb:testid` FROM t", + ); + + // Bracket notation with single-quoted strings + databricks().one_statement_parses_to( + "SELECT raw:store['bicycle'], raw:store[\"zip code\"] FROM t", + "SELECT raw:store['bicycle'], raw:store[\"zip code\"] FROM t", + ); + + // Combined: all syntax variants from the corpus test + databricks().one_statement_parses_to( + "SELECT raw:`zip code`, raw:`fb:testid`, raw:store['bicycle'], raw:store[\"zip code\"] FROM t", + "SELECT raw:`zip code`, raw:`fb:testid`, raw:store['bicycle'], raw:store[\"zip code\"] FROM t", + ); + + // Bracket notation directly after colon + databricks().one_statement_parses_to( + "SELECT c1:['price'] FROM VALUES ('{ \"price\": 5 }') AS T(c1)", + "SELECT c1:['price'] FROM (VALUES ('{ \"price\": 5 }')) AS T (c1)", + ); +} From 3aa6a9cd3d7cddcf25bacb1ff6be096d147e86b9 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 01:56:39 +0100 Subject: [PATCH 14/81] databricks: support ?:: try cast operator Parses expr?::TYPE as TryCast by treating ? followed by :: as a postfix operator at the same precedence as ::. Normalizes to TRY_CAST in display. --- src/parser/mod.rs | 15 +++++++++++++++ tests/sqlparser_databricks.rs | 15 +++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 34593bd7ee..ff33250687 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2506,6 +2506,10 @@ impl<'a> Parser<'a> { } } else if Token::DoubleColon == tok { self.parse_pg_cast(expr) + } else if matches!(&tok.token, Token::Placeholder(s) if s == "?") { + // ?:: is a try cast operator (e.g., Databricks) + self.expect_token(&Token::DoubleColon)?; + self.parse_pg_try_cast(expr) } else if Token::ExclamationMark == tok { // PostgreSQL factorial operation Ok(Expr::UnaryOp { @@ -2740,6 +2744,15 @@ impl<'a> Parser<'a> { }) } + /// Parse a try cast operator in the form of `expr?::datatype` (e.g., Databricks) + pub fn parse_pg_try_cast(&mut self, expr: Expr) -> Result { + Ok(Expr::TryCast { + expr: Box::new(expr), + data_type: self.parse_data_type()?, + format: None, + }) + } + // use https://www.postgresql.org/docs/7.0/operators.htm#AEN2026 as a reference const MUL_DIV_MOD_OP_PREC: u8 = 40; const PLUS_MINUS_PREC: u8 = 30; @@ -2835,6 +2848,8 @@ impl<'a> Parser<'a> { Ok(Self::MUL_DIV_MOD_OP_PREC) } Token::DoubleColon => Ok(50), + // ?:: is a try cast operator (e.g., Databricks) + Token::Placeholder(ref s) if s == "?" && self.peek_nth_token(1).token == Token::DoubleColon => Ok(50), Token::Colon => Ok(50), Token::ExclamationMark => Ok(50), Token::Number(s, _) if s.starts_with(".") => Ok(50), diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index dbcaf564ff..656a1fe7b3 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -298,3 +298,18 @@ fn test_json_path_with_colon() { "SELECT c1:['price'] FROM (VALUES ('{ \"price\": 5 }')) AS T (c1)", ); } + +#[test] +fn test_try_cast_operator() { + // ?:: is a try cast operator in Databricks, equivalent to TRY_CAST + databricks().one_statement_parses_to( + "SELECT '20'?::INTEGER", + "SELECT TRY_CAST('20' AS INTEGER)", + ); + + // Chaining with regular cast + databricks().one_statement_parses_to( + "SELECT col?::VARCHAR", + "SELECT TRY_CAST(col AS VARCHAR)", + ); +} From 9a94e7d30a570a81a720a29f2443787637436bcb Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 02:01:50 +0100 Subject: [PATCH 15/81] duckdb: support underscore digit separators in numeric literals Adds supports_numeric_literal_underscores() dialect method for DuckDB and Generic. Tokenizer strips underscores during lexing so 1_000_000 roundtrips as 1000000. --- src/dialect/duckdb.rs | 4 ++++ src/dialect/generic.rs | 4 ++++ src/dialect/mod.rs | 9 +++++++++ src/tokenizer.rs | 17 ++++++++++++++--- tests/sqlparser_duckdb.rs | 17 +++++++++++++++++ 5 files changed, 48 insertions(+), 3 deletions(-) diff --git a/src/dialect/duckdb.rs b/src/dialect/duckdb.rs index 4e6e9d9a41..7a09c8509c 100644 --- a/src/dialect/duckdb.rs +++ b/src/dialect/duckdb.rs @@ -32,4 +32,8 @@ impl Dialect for DuckDbDialect { fn supports_group_by_expr(&self) -> bool { true } + + fn supports_numeric_literal_underscores(&self) -> bool { + true + } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 8310954cd8..c8e122cd97 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -32,4 +32,8 @@ impl Dialect for GenericDialect { fn supports_group_by_expr(&self) -> bool { true } + + fn supports_numeric_literal_underscores(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 35dafcd343..6e245075df 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -127,6 +127,11 @@ pub trait Dialect: Debug + Any { fn supports_substring_from_for_expr(&self) -> bool { true } + /// Returns true if the dialect supports underscores as numeric literal separators. + /// e.g. `1_000_000`, `1_2E+1_0` + fn supports_numeric_literal_underscores(&self) -> bool { + false + } /// Dialect-specific prefix parser override fn parse_prefix(&self, _parser: &mut Parser) -> Option> { // return None to fall back to the default behavior @@ -306,6 +311,10 @@ mod tests { self.0.supports_substring_from_for_expr() } + fn supports_numeric_literal_underscores(&self) -> bool { + self.0.supports_numeric_literal_underscores() + } + fn parse_prefix( &self, parser: &mut sqlparser::parser::Parser, diff --git a/src/tokenizer.rs b/src/tokenizer.rs index ab7008344c..53615c3481 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -837,7 +837,13 @@ impl<'a> Tokenizer<'a> { } // numbers and period '0'..='9' | '.' => { - let mut s = peeking_take_while(chars, |ch| ch.is_ascii_digit()); + let numeric_underscore = + self.dialect.supports_numeric_literal_underscores(); + let digit_pred = |ch: char| { + ch.is_ascii_digit() || (numeric_underscore && ch == '_') + }; + + let mut s = peeking_take_while(chars, digit_pred); // match binary literal that starts with 0x if s == "0" && chars.peek() == Some(&'x') { @@ -854,7 +860,7 @@ impl<'a> Tokenizer<'a> { s.push('.'); chars.next(); } - s += &peeking_take_while(chars, |ch| ch.is_ascii_digit()); + s += &peeking_take_while(chars, digit_pred); // No number -> Token::Period if s == "." { @@ -883,7 +889,7 @@ impl<'a> Tokenizer<'a> { chars.next(); } exponent_part += - &peeking_take_while(chars, |ch| ch.is_ascii_digit()); + &peeking_take_while(chars, digit_pred); s += exponent_part.as_str(); } // Not an exponent, discard the work done @@ -891,6 +897,11 @@ impl<'a> Tokenizer<'a> { } } + // Strip underscores from numeric literal + if numeric_underscore && s.contains('_') { + s = s.replace('_', ""); + } + // mysql dialect supports identifiers that start with a numeric prefix, // as long as they aren't an exponent number. if dialect_of!(self is MySqlDialect | HiveDialect) && exponent_part.is_empty() { diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 65ddba5165..2970563865 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -299,3 +299,20 @@ fn test_select_union_by_name() { }); assert_eq!(ast.body, expected); } + +#[test] +fn test_numeric_literal_underscores() { + // Underscores in numeric literals are stripped during tokenization + duckdb_and_generic().one_statement_parses_to( + "SELECT 1_000_000", + "SELECT 1000000", + ); + duckdb_and_generic().one_statement_parses_to( + "SELECT 1_2E+1_0::FLOAT", + "SELECT CAST(12E+10 AS FLOAT)", + ); + duckdb_and_generic().one_statement_parses_to( + "SELECT 1_000.50_0", + "SELECT 1000.500", + ); +} From 4b8cca89f49a1af12cb864876e715b9b6c5c23b8 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 02:15:44 +0100 Subject: [PATCH 16/81] duckdb: support prefix alias colon syntax in SELECT and FROM Implements DuckDB's alias: expr shorthand (equivalent to expr AS alias) for SELECT items and FROM table references. Uses maybe_parse in SELECT to avoid conflicts with array-slice colon syntax. --- src/dialect/duckdb.rs | 4 +++ src/dialect/mod.rs | 10 ++++++++ src/parser/mod.rs | 53 +++++++++++++++++++++++++++++++++++++++ tests/sqlparser_duckdb.rs | 51 +++++++++++++++++++++++++++++++++++++ 4 files changed, 118 insertions(+) diff --git a/src/dialect/duckdb.rs b/src/dialect/duckdb.rs index 7a09c8509c..d1dbb36ff0 100644 --- a/src/dialect/duckdb.rs +++ b/src/dialect/duckdb.rs @@ -36,4 +36,8 @@ impl Dialect for DuckDbDialect { fn supports_numeric_literal_underscores(&self) -> bool { true } + + fn supports_prefix_alias_colon(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 6e245075df..4f50fb6e1f 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -132,6 +132,12 @@ pub trait Dialect: Debug + Any { fn supports_numeric_literal_underscores(&self) -> bool { false } + /// Returns true if the dialect supports prefix alias syntax with colon. + /// e.g. DuckDB: `SELECT x: 42` instead of `SELECT 42 AS x` + /// and `FROM alias: table_name` instead of `FROM table_name AS alias` + fn supports_prefix_alias_colon(&self) -> bool { + false + } /// Dialect-specific prefix parser override fn parse_prefix(&self, _parser: &mut Parser) -> Option> { // return None to fall back to the default behavior @@ -315,6 +321,10 @@ mod tests { self.0.supports_numeric_literal_underscores() } + fn supports_prefix_alias_colon(&self) -> bool { + self.0.supports_prefix_alias_colon() + } + fn parse_prefix( &self, parser: &mut sqlparser::parser::Parser, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ff33250687..19100980b8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9614,6 +9614,38 @@ impl<'a> Parser<'a> { } else { let name = self.parse_object_name(true)?; + // DuckDB prefix alias syntax: `alias: table_name` + // e.g. `FROM foo: c.db.tbl` means table `c.db.tbl` with alias `foo` + if self.dialect.supports_prefix_alias_colon() + && name.0.len() == 1 + && self.consume_token(&Token::Colon) + { + let prefix_alias = TableAlias { + name: name.0.into_iter().next().unwrap().empty_span(), + columns: vec![], + }; + let mut table = self.parse_table_factor()?; + // Set the alias on the parsed table factor + match &mut table { + TableFactor::Table { alias, .. } + | TableFactor::Derived { alias, .. } + | TableFactor::Function { alias, .. } + | TableFactor::UNNEST { alias, .. } + | TableFactor::TableFunction { alias, .. } + | TableFactor::FieldAccessor { alias, .. } + | TableFactor::NestedJoin { alias, .. } => { + *alias = Some(prefix_alias); + } + TableFactor::Pivot { alias, .. } + | TableFactor::Unpivot { alias, .. } => { + *alias = Some(prefix_alias); + } + TableFactor::TableSample { .. } + | TableFactor::ExternalQuery { .. } => {} + } + return Ok(table); + } + let partitions: Vec = if dialect_of!(self is MySqlDialect | GenericDialect) && self.parse_keyword(Keyword::PARTITION) { @@ -10590,6 +10622,27 @@ impl<'a> Parser<'a> { /// Parse a comma-delimited list of projections after SELECT pub fn parse_select_item(&mut self) -> Result, ParserError> { let start_span = self.index; + + // DuckDB prefix alias syntax: `alias: expr` + // e.g. `SELECT sum_qty: sum(l_quantity)` means `sum(l_quantity) AS sum_qty` + // Must be checked before parse_wildcard_expr because the colon would otherwise + // be consumed as a JSON access operator. + if self.dialect.supports_prefix_alias_colon() { + if let Some(item) = self.maybe_parse(|parser| { + let alias = parser.parse_identifier(false)?; + parser.expect_token(&Token::Colon)?; + let expr = parser.parse_expr()?; + let expr_with_location = expr.spanning(parser.span_from_index(start_span)); + Ok(SelectItem::ExprWithAlias { + expr: expr_with_location, + alias, + } + .spanning(parser.span_from_index(start_span))) + }) { + return Ok(item); + } + } + match self.parse_wildcard_expr()? { WildcardExpr::Expr(expr) => { let expr: Expr = if self.dialect.supports_filter_during_aggregation() diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 2970563865..a40167b15d 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -316,3 +316,54 @@ fn test_numeric_literal_underscores() { "SELECT 1000.500", ); } + +#[test] +fn test_prefix_alias_colon_select() { + // DuckDB prefix alias: `alias: expr` is equivalent to `expr AS alias` + duckdb().one_statement_parses_to( + "SELECT e: 1 + 2, f: len('asdf'), s: (SELECT 42)", + "SELECT 1 + 2 AS e, len('asdf') AS f, (SELECT 42) AS s", + ); +} + +#[test] +fn test_prefix_alias_colon_select_aggregation() { + // DuckDB prefix alias with aggregation functions + duckdb().one_statement_parses_to( + "SELECT sum_qty: sum(l_quantity), avg_price: avg(l_extendedprice), count_order: count(*)", + "SELECT sum(l_quantity) AS sum_qty, avg(l_extendedprice) AS avg_price, count(*) AS count_order", + ); +} + +#[test] +fn test_prefix_alias_colon_from() { + // DuckDB prefix alias in FROM clause: `alias: table` is equivalent to `table AS alias` + duckdb().one_statement_parses_to( + "SELECT * FROM foo: c.db.tbl", + "SELECT * FROM c.db.tbl AS foo", + ); +} + +#[test] +fn test_prefix_alias_colon_from_simple() { + duckdb().one_statement_parses_to( + "SELECT * FROM foo: bar", + "SELECT * FROM bar AS foo", + ); +} + +#[test] +fn test_prefix_alias_colon_from_function() { + duckdb().one_statement_parses_to( + "SELECT * FROM r: range(10)", + "SELECT * FROM range(10) AS r", + ); +} + +#[test] +fn test_prefix_alias_colon_from_multiple() { + duckdb().one_statement_parses_to( + "SELECT * FROM r: range(10), v: (VALUES (42))", + "SELECT * FROM range(10) AS r, (VALUES (42)) AS v", + ); +} From e6f088a71affa0c08e3d3a8fe518172b63b52663 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 02:24:37 +0100 Subject: [PATCH 17/81] duckdb: support FROM-first query syntax as shorthand for SELECT * Adds supports_from_first_select() dialect method allowing FROM tbl as shorthand for SELECT * FROM tbl, including subquery form (FROM range(10)). --- src/dialect/duckdb.rs | 4 +++ src/dialect/mod.rs | 10 ++++++ src/parser/mod.rs | 72 +++++++++++++++++++++++++++++++++++++++ tests/sqlparser_duckdb.rs | 28 +++++++++++++++ 4 files changed, 114 insertions(+) diff --git a/src/dialect/duckdb.rs b/src/dialect/duckdb.rs index d1dbb36ff0..c477b3e21f 100644 --- a/src/dialect/duckdb.rs +++ b/src/dialect/duckdb.rs @@ -40,4 +40,8 @@ impl Dialect for DuckDbDialect { fn supports_prefix_alias_colon(&self) -> bool { true } + + fn supports_from_first_select(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 4f50fb6e1f..d3bfbe3287 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -138,6 +138,12 @@ pub trait Dialect: Debug + Any { fn supports_prefix_alias_colon(&self) -> bool { false } + /// Returns true if the dialect supports FROM-first queries without SELECT. + /// e.g. DuckDB: `FROM tbl` is equivalent to `SELECT * FROM tbl` + /// and `(FROM range(10))` as a subquery. + fn supports_from_first_select(&self) -> bool { + false + } /// Dialect-specific prefix parser override fn parse_prefix(&self, _parser: &mut Parser) -> Option> { // return None to fall back to the default behavior @@ -325,6 +331,10 @@ mod tests { self.0.supports_prefix_alias_colon() } + fn supports_from_first_select(&self) -> bool { + self.0.supports_from_first_select() + } + fn parse_prefix( &self, parser: &mut sqlparser::parser::Parser, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 19100980b8..0d5d98c91a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -477,6 +477,11 @@ impl<'a> Parser<'a> { self.prev_token(); Ok(Statement::Query(self.parse_boxed_query()?)) } + // DuckDB FROM-first syntax: `FROM tbl` is equivalent to `SELECT * FROM tbl` + Keyword::FROM if self.dialect.supports_from_first_select() => { + self.prev_token(); + Ok(Statement::Query(self.parse_boxed_query()?)) + } Keyword::TRUNCATE => Ok(self.parse_truncate()?), Keyword::MSCK => Ok(self.parse_msck()?), Keyword::CREATE => Ok(self.parse_create()?), @@ -8587,6 +8592,11 @@ impl<'a> Parser<'a> { SetExpr::Values(self.parse_values(is_mysql)?) } else if self.parse_keyword(Keyword::TABLE) { SetExpr::Table(Box::new(self.parse_as_table()?)) + } else if self.dialect.supports_from_first_select() + && self.parse_keyword(Keyword::FROM) + { + // DuckDB FROM-first syntax: `FROM tbl` is equivalent to `SELECT * FROM tbl` + SetExpr::Select(Box::new(self.parse_select_from_first()?)) } else { return self.expected( "SELECT, VALUES, or a subquery in the query body", @@ -8833,6 +8843,68 @@ impl<'a> Parser<'a> { }) } + /// Parse DuckDB FROM-first query: `FROM tbl` is equivalent to `SELECT * FROM tbl` + /// Called after the FROM keyword has already been consumed. + pub fn parse_select_from_first(&mut self) -> Result { + let from = self.parse_from_clause_body()?; + + let selection = if self.parse_keyword(Keyword::WHERE) { + let start_idx = self.index; + let expr = self.parse_expr()?; + Some(expr.spanning(self.span_from_index(start_idx))) + } else { + None + }; + + let group_by = if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) { + if self.parse_keyword(Keyword::ALL) { + GroupByExpr::All + } else { + GroupByExpr::Expressions(self.parse_comma_separated(Parser::parse_group_by_expr)?) + } + } else { + GroupByExpr::Expressions(vec![]) + }; + + let having = if self.parse_keyword(Keyword::HAVING) { + Some(self.parse_expr()?) + } else { + None + }; + + let named_windows = if self.parse_keyword(Keyword::WINDOW) { + self.parse_comma_separated(Parser::parse_named_window)? + } else { + vec![] + }; + + let qualify = if self.parse_keyword(Keyword::QUALIFY) { + Some(self.parse_expr()?) + } else { + None + }; + + Ok(Select { + distinct: None, + top: None, + projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions::default()) + .empty_span()], + into: None, + from, + lateral_views: vec![], + sample: None, + selection, + group_by, + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having, + named_window: named_windows, + qualify, + value_table_mode: None, + }) + } + /// Parse `CREATE TABLE x AS TABLE y` pub fn parse_as_table(&mut self) -> Result { let token1 = self.next_token(); diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index a40167b15d..a0617e407a 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -367,3 +367,31 @@ fn test_prefix_alias_colon_from_multiple() { "SELECT * FROM range(10) AS r, (VALUES (42)) AS v", ); } + +#[test] +fn test_from_first_select() { + // DuckDB FROM-first syntax: `FROM tbl` is equivalent to `SELECT * FROM tbl` + duckdb().one_statement_parses_to("FROM tbl", "SELECT * FROM tbl"); +} + +#[test] +fn test_from_first_select_function() { + duckdb().one_statement_parses_to("FROM range(10)", "SELECT * FROM range(10)"); +} + +#[test] +fn test_from_first_subquery() { + // FROM-first query used as a subquery + duckdb().one_statement_parses_to( + "SELECT * FROM r: range(10), v: (VALUES (42)), s: (FROM range(10))", + "SELECT * FROM range(10) AS r, (VALUES (42)) AS v, (SELECT * FROM range(10)) AS s", + ); +} + +#[test] +fn test_from_first_with_where() { + duckdb().one_statement_parses_to( + "FROM tbl WHERE x > 1", + "SELECT * FROM tbl WHERE x > 1", + ); +} From 5842072b2dbc9a5c55583b802422dc20b865dae8 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 02:31:42 +0100 Subject: [PATCH 18/81] duckdb: support MAP {'key': value} literal constructor syntax Adds Value::MapLiteral and parses DuckDB MAP {k: v, ...} constructor expression. --- src/ast/value.rs | 20 ++++++++++++++++++++ src/parser/mod.rs | 25 +++++++++++++++++++++++++ tests/sqlparser_duckdb.rs | 7 +++++++ 3 files changed, 52 insertions(+) diff --git a/src/ast/value.rs b/src/ast/value.rs index 7ac2ca8aa6..1dc751cd8e 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -86,6 +86,9 @@ pub enum Value { /// TUPLE as used by BigQuery /// ("org_unit", "development") Tuple(Vec), + /// MAP literal as used by DuckDB + /// MAP {'key': value, ...} + MapLiteral(Vec), } impl fmt::Display for Value { @@ -135,6 +138,23 @@ impl fmt::Display for Value { Value::Tuple(values) => { write!(f, "({})", display_comma_separated(values)) } + Value::MapLiteral(fields) => { + if fields.is_empty() { + write!(f, "MAP {}", "{}") + } else { + let mut first = true; + write!(f, "MAP {}", "{ ")?; + for ObjectConstantKeyValue { key, value } in fields { + if first { + first = false; + } else { + write!(f, ", ")?; + } + write!(f, "'{}': {}", key, value)?; + } + write!(f, "{}", " }") + } + } } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0d5d98c91a..6e95b8c116 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -885,6 +885,10 @@ impl<'a> Parser<'a> { self.expect_token(&Token::LParen)?; self.parse_array_subquery() } + // DuckDB MAP literal: MAP {'key': value, ...} + Keyword::MAP if self.peek_token_is(&Token::LBrace) => { + self.parse_map_literal() + } Keyword::NOT => self.parse_not(), Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => { self.parse_match_against() @@ -1691,6 +1695,27 @@ impl<'a> Parser<'a> { Ok(Expr::ArraySubquery(query)) } + /// Parses a DuckDB MAP literal: `MAP {'key': value, ...}` + /// Assumes the MAP keyword has already been consumed. + pub fn parse_map_literal(&mut self) -> Result { + self.expect_token(&Token::LBrace)?; + if self.consume_token(&Token::RBrace) { + return Ok(Expr::Value(Value::MapLiteral(vec![]))); + } + let mut fields = vec![]; + loop { + let key = self.parse_literal_string()?; + self.expect_token(&Token::Colon)?; + let value = Box::new(self.parse_expr()?); + fields.push(ObjectConstantKeyValue { key, value }); + if !self.consume_token(&Token::Comma) { + break; + } + } + self.expect_token(&Token::RBrace)?; + Ok(Expr::Value(Value::MapLiteral(fields))) + } + // This function parses date/time fields for the EXTRACT function-like // operator, interval qualifiers, and the ceil/floor operations. // EXTRACT supports a wider set of date/time fields than interval qualifiers, diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index a0617e407a..108bcc485c 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -395,3 +395,10 @@ fn test_from_first_with_where() { "SELECT * FROM tbl WHERE x > 1", ); } + +#[test] +fn test_map_literal() { + duckdb().verified_stmt("SELECT MAP { 'x': 1 }"); + duckdb().verified_stmt("SELECT MAP { 'x': 1, 'y': 2 }"); + duckdb().verified_stmt("SELECT MAP {}"); +} From 756f3131baa9d4931a48eec211b3a6ba7c3e720d Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 02:34:46 +0100 Subject: [PATCH 19/81] duckdb: support FILTER (expr) without WHERE keyword in aggregates DuckDB allows omitting WHERE inside FILTER clauses, e.g. SUM(x) FILTER (x = 1). Made WHERE optional when dialect is DuckDB; standard FILTER (WHERE expr) still works. --- src/parser/mod.rs | 10 ++++++++-- tests/sqlparser_duckdb.rs | 11 +++++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6e95b8c116..49f1ed8b21 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10746,7 +10746,10 @@ impl<'a> Parser<'a> { && self.parse_keyword(Keyword::FILTER) { let i = self.index - 1; - if self.consume_token(&Token::LParen) && self.parse_keyword(Keyword::WHERE) { + if self.consume_token(&Token::LParen) { + // Standard SQL: FILTER (WHERE expr) + // DuckDB also supports: FILTER (expr) without WHERE + self.parse_keyword(Keyword::WHERE); let filter = self.parse_expr()?; self.expect_token(&Token::RParen)?; Expr::AggregateExpressionWithFilter { @@ -10807,7 +10810,10 @@ impl<'a> Parser<'a> { && self.parse_keyword(Keyword::FILTER) { let i = self.index - 1; - if self.consume_token(&Token::LParen) && self.parse_keyword(Keyword::WHERE) { + if self.consume_token(&Token::LParen) { + // Standard SQL: FILTER (WHERE expr) + // DuckDB also supports: FILTER (expr) without WHERE + self.parse_keyword(Keyword::WHERE); let filter = self.parse_expr()?; self.expect_token(&Token::RParen)?; Expr::AggregateExpressionWithFilter { diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 108bcc485c..71566d0232 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -402,3 +402,14 @@ fn test_map_literal() { duckdb().verified_stmt("SELECT MAP { 'x': 1, 'y': 2 }"); duckdb().verified_stmt("SELECT MAP {}"); } + +#[test] +fn test_filter_without_where() { + // DuckDB supports FILTER (expr) without WHERE keyword + duckdb().one_statement_parses_to( + "SELECT SUM(x) FILTER (x = 1)", + "SELECT SUM(x) FILTER (WHERE x = 1)", + ); + // Standard FILTER (WHERE expr) syntax should still work + duckdb().verified_stmt("SELECT SUM(x) FILTER (WHERE x = 1)"); +} From ef5dc5aa9fb62ac35cfa35264c8cb44b8348d619 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 02:38:15 +0100 Subject: [PATCH 20/81] mysql: support := assignment operator in SELECT expressions Added BinaryOperator::Assignment for MySQL's := user-variable assignment syntax like SELECT @var := expr. Uses the existing DuckAssignment token. --- src/ast/operator.rs | 3 +++ src/parser/mod.rs | 5 +++++ tests/sqlparser_mysql.rs | 8 ++++++++ 3 files changed, 16 insertions(+) diff --git a/src/ast/operator.rs b/src/ast/operator.rs index 9fb1bf0223..f678ac2adb 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -111,6 +111,8 @@ pub enum BinaryOperator { DuckIntegerDivide, /// MySQL [`DIV`](https://dev.mysql.com/doc/refman/8.0/en/arithmetic-functions.html) integer division MyIntegerDivide, + /// Assignment operator `:=`, e.g. `@var := expr` (MySQL-specific) + Assignment, /// Support for custom operators (built by parsers outside this crate) Custom(String), /// Bitwise XOR, e.g. `a # b` (PostgreSQL-specific) @@ -162,6 +164,7 @@ impl fmt::Display for BinaryOperator { BinaryOperator::BitwiseXor => f.write_str("^"), BinaryOperator::DuckIntegerDivide => f.write_str("//"), BinaryOperator::MyIntegerDivide => f.write_str("DIV"), + BinaryOperator::Assignment => f.write_str(":="), BinaryOperator::Custom(s) => f.write_str(s), BinaryOperator::PGBitwiseXor => f.write_str("#"), BinaryOperator::PGBitwiseShiftLeft => f.write_str("<<"), diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 49f1ed8b21..48c10b2c49 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2329,6 +2329,9 @@ impl<'a> Parser<'a> { } Token::Ampersand => Some(BinaryOperator::BitwiseAnd), Token::Div => Some(BinaryOperator::Divide), + Token::DuckAssignment if dialect_of!(self is MySqlDialect | GenericDialect) => { + Some(BinaryOperator::Assignment) + } Token::DuckIntDiv if dialect_of!(self is DuckDbDialect | GenericDialect) => { Some(BinaryOperator::DuckIntegerDivide) } @@ -2894,6 +2897,8 @@ impl<'a> Parser<'a> { | Token::HashMinus | Token::AtQuestion | Token::AtAt => Ok(50), + // MySQL assignment operator := has the lowest precedence + Token::DuckAssignment if dialect_of!(self is MySqlDialect | GenericDialect) => Ok(5), _ => Ok(0), } } diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index d7cfdd8cb8..91423979b4 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1962,3 +1962,11 @@ fn parse_regexp() { mysql_and_generic().verified_stmt(r#"SELECT v FROM strings WHERE v REGEXP 'San* [fF].*'"#); mysql_and_generic().verified_stmt(r#"SELECT 'Michael!' REGEXP '.*'"#); } + +#[test] +fn parse_variable_assignment_operator() { + // MySQL user-defined variable assignment with := + mysql_and_generic().verified_stmt("SELECT @var1 := COUNT(*) FROM t1"); + mysql_and_generic().verified_stmt("SELECT @var1, @var2 := @var1"); + mysql_and_generic().verified_stmt("SELECT @var1 := 1, @var2"); +} From 4b0f5d72ee29393d7c44e1cc7b5c69ffdd256f8e Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 02:42:21 +0100 Subject: [PATCH 21/81] mysql: support STRAIGHT_JOIN join type MySQL's STRAIGHT_JOIN forces join order to match FROM clause order. Added JoinOperator::StraightJoin variant with keyword, parser, and display support. --- src/ast/query.rs | 8 ++++++++ src/keywords.rs | 3 +++ src/parser/mod.rs | 7 +++++++ tests/sqlparser_mysql.rs | 6 ++++++ 4 files changed, 24 insertions(+) diff --git a/src/ast/query.rs b/src/ast/query.rs index c2d08f86d0..11e8dc2014 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1370,6 +1370,12 @@ impl fmt::Display for Join { match_condition, suffix(constraint) ), + JoinOperator::StraightJoin(constraint) => write!( + f, + " STRAIGHT_JOIN {}{}", + self.relation, + suffix(constraint) + ), } } } @@ -1402,6 +1408,8 @@ pub enum JoinOperator { match_condition: Expr, constraint: JoinConstraint, }, + /// STRAIGHT_JOIN (MySQL) + StraightJoin(JoinConstraint), } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] diff --git a/src/keywords.rs b/src/keywords.rs index eacf846802..7d352a0444 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -666,6 +666,7 @@ define_keywords!( STEP, STORAGE_INTEGRATION, STORED, + STRAIGHT_JOIN, STRICT, STRING, STRUCT, @@ -851,6 +852,8 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ // Reserved for snowflake ASOF JOIN Keyword::ASOF, Keyword::MATCH_CONDITION, + // for MySQL STRAIGHT_JOIN + Keyword::STRAIGHT_JOIN, ]; /// Can't be used as a column alias, so that `SELECT alias` diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 48c10b2c49..5c29f361e6 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9383,6 +9383,13 @@ impl<'a> Parser<'a> { constraint, }, } + } else if self.parse_keyword(Keyword::STRAIGHT_JOIN) { + let relation = self.parse_table_factor()?; + let join_constraint = self.parse_join_constraint(false)?; + Join { + relation, + join_operator: JoinOperator::StraightJoin(join_constraint), + } } else { let natural = self.parse_keyword(Keyword::NATURAL); let peek_keyword = if let Token::Word(w) = self.peek_token_kind().clone() { diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 91423979b4..f53df73508 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1970,3 +1970,9 @@ fn parse_variable_assignment_operator() { mysql_and_generic().verified_stmt("SELECT @var1, @var2 := @var1"); mysql_and_generic().verified_stmt("SELECT @var1 := 1, @var2"); } + +#[test] +fn parse_straight_join() { + mysql_and_generic().verified_stmt("SELECT e.* FROM e STRAIGHT_JOIN p ON e.x = p.y"); + mysql_and_generic().verified_stmt("SELECT * FROM t1 STRAIGHT_JOIN t2 ON t1.id = t2.id"); +} From 36059c734f562f04768d78613b8032b0dbca5ab2 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 02:44:47 +0100 Subject: [PATCH 22/81] mysql: support DISTINCTROW as synonym for DISTINCT in SELECT MySQL accepts DISTINCTROW as an alias for DISTINCT. Parser normalizes it to DISTINCT in the AST output. --- src/keywords.rs | 1 + src/parser/mod.rs | 3 ++- tests/sqlparser_mysql.rs | 9 +++++++++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/keywords.rs b/src/keywords.rs index 7d352a0444..20bcab1d51 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -241,6 +241,7 @@ define_keywords!( DISCARD, DISCONNECT, DISTINCT, + DISTINCTROW, DISTKEY, DISTRIBUTE, DISTSTYLE, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 5c29f361e6..2b29dde3c8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3435,7 +3435,8 @@ impl<'a> Parser<'a> { pub fn parse_all_or_distinct(&mut self) -> Result, ParserError> { let loc = self.peek_token().span.start; let all = self.parse_keyword(Keyword::ALL); - let distinct = self.parse_keyword(Keyword::DISTINCT); + let distinct = + self.parse_keyword(Keyword::DISTINCT) || self.parse_keyword(Keyword::DISTINCTROW); if !distinct { return Ok(None); } diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index f53df73508..c155271e03 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1623,6 +1623,15 @@ fn parse_substring_in_select() { } } +#[test] +fn parse_distinctrow() { + // DISTINCTROW is a MySQL synonym for DISTINCT + mysql_and_generic().one_statement_parses_to( + "SELECT DISTINCTROW tbl.col FROM tbl", + "SELECT DISTINCT tbl.col FROM tbl", + ); +} + #[test] fn parse_show_variables() { mysql_and_generic().verified_stmt("SHOW VARIABLES"); From 3bc7627edd038c1c11b9d6858c77d132ae1f0b59 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 02:56:32 +0100 Subject: [PATCH 23/81] mysql: support comma-separated assignments in SET statement MySQL allows multiple variable assignments in one SET, e.g. SET @x = 1, SESSION sql_mode = ''. Added SetVariableAssignment struct and additional_assignments field on SetVariable. --- src/ast/mod.rs | 56 ++++++++++++++++++++++++++++++- src/parser/mod.rs | 66 ++++++++++++++++++++++++++++++------- tests/sqlparser_common.rs | 2 ++ tests/sqlparser_hive.rs | 1 + tests/sqlparser_mysql.rs | 19 +++++++++++ tests/sqlparser_postgres.rs | 7 ++++ 6 files changed, 138 insertions(+), 13 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index ad38a8e70a..57d2125453 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1487,6 +1487,52 @@ impl fmt::Display for UnloadSource { } /// A top-level statement (SELECT, INSERT, CREATE, etc.) +/// A single variable assignment within a SET statement. +/// Used for MySQL's comma-separated SET syntax: +/// `SET @x = 1, SESSION sql_mode = ''` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct SetVariableAssignment { + pub scope: Option, + pub variable: ObjectName, + pub value: Vec, +} + +impl fmt::Display for SetVariableAssignment { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if let Some(scope) = &self.scope { + write!(f, "{scope} ")?; + } + write!( + f, + "{name} = {value}", + name = self.variable, + value = display_comma_separated(&self.value) + ) + } +} + +/// Scope modifier for SET variable assignments +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum SetVariableScope { + Session, + Local, + Global, +} + +impl fmt::Display for SetVariableScope { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + SetVariableScope::Session => write!(f, "SESSION"), + SetVariableScope::Local => write!(f, "LOCAL"), + SetVariableScope::Global => write!(f, "GLOBAL"), + } + } +} + #[allow(clippy::large_enum_variant)] #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -1957,6 +2003,9 @@ pub enum Statement { hivevar: bool, variable: ObjectName, value: Vec, + /// Additional comma-separated assignments in the same SET statement (MySQL). + /// e.g. `SET @x = 1, SESSION sql_mode = ''` + additional_assignments: Vec, }, /// ```sql /// SET TIME ZONE @@ -3490,6 +3539,7 @@ impl fmt::Display for Statement { variable, hivevar, value, + additional_assignments, } => { f.write_str("SET ")?; if *local { @@ -3501,7 +3551,11 @@ impl fmt::Display for Statement { hivevar = if *hivevar { "HIVEVAR:" } else { "" }, name = variable, value = display_comma_separated(value) - ) + )?; + for assignment in additional_assignments { + write!(f, ", {assignment}")?; + } + Ok(()) } Statement::SetTimeZone { local, value } => { f.write_str("SET ")?; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 2b29dde3c8..9284c8cb94 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -545,6 +545,7 @@ impl<'a> Parser<'a> { hivevar: false, variable: ObjectName(vec!["SYSTEM".into()]), value: vec![], + additional_assignments: vec![], }) } // CASE as a standalone expression statement (e.g. Snowflake masking policy bodies) @@ -556,6 +557,7 @@ impl<'a> Parser<'a> { hivevar: false, variable: ObjectName(vec![]), value: vec![expr], + additional_assignments: vec![], }) } _ => self.expected("an SQL statement", next_token), @@ -4813,6 +4815,7 @@ impl<'a> Parser<'a> { hivevar: false, variable: ObjectName(vec![name]), value: vec![], + additional_assignments: vec![], }); } self.index = idx; @@ -9021,6 +9024,7 @@ impl<'a> Parser<'a> { hivevar: Some(Keyword::HIVEVAR) == modifier, variable, value: vec![value], + additional_assignments: vec![], }); } @@ -9049,25 +9053,63 @@ impl<'a> Parser<'a> { collation_name, }) } else if self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO) { - let mut values = vec![]; - loop { - let value = if let Ok(expr) = self.parse_expr() { - expr - } else { - self.expected("variable value", self.peek_token())? - }; + let value = if let Ok(expr) = self.parse_expr() { + expr + } else { + self.expected("variable value", self.peek_token())? + }; - values.push(value); - if self.consume_token(&Token::Comma) { - continue; + // MySQL supports comma-separated assignments: + // SET @x = 1, SESSION sql_mode = '', var = val + if dialect_of!(self is MySqlDialect) { + let mut additional_assignments = vec![]; + while self.consume_token(&Token::Comma) { + let scope = match self.parse_one_of_keywords(&[ + Keyword::SESSION, + Keyword::LOCAL, + Keyword::GLOBAL, + ]) { + Some(Keyword::SESSION) => Some(SetVariableScope::Session), + Some(Keyword::LOCAL) => Some(SetVariableScope::Local), + Some(Keyword::GLOBAL) => Some(SetVariableScope::Global), + _ => None, + }; + let var_name = self.parse_object_name(false)?; + self.expect_token(&Token::Eq)?; + let val = self.parse_expr()?; + additional_assignments.push(SetVariableAssignment { + scope, + variable: var_name, + value: vec![val], + }); } return Ok(Statement::SetVariable { local: modifier == Some(Keyword::LOCAL), - hivevar: Some(Keyword::HIVEVAR) == modifier, + hivevar: false, variable, - value: values, + value: vec![value], + additional_assignments, }); } + + // For other dialects, comma separates multiple values for the same variable + // e.g. PostgreSQL: SET search_path = schema1, schema2 + let mut values = vec![value]; + while self.consume_token(&Token::Comma) { + let v = if let Ok(expr) = self.parse_expr() { + expr + } else { + self.expected("variable value", self.peek_token())? + }; + values.push(v); + } + return Ok(Statement::SetVariable { + local: modifier == Some(Keyword::LOCAL), + hivevar: Some(Keyword::HIVEVAR) == modifier, + variable, + value: values, + additional_assignments: vec![], + }); } else if variable.to_string().eq_ignore_ascii_case("TIMEZONE") { // for some db (e.g. postgresql), SET TIME ZONE is an alias for SET TIMEZONE [TO|=] match self.parse_expr() { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index b3f49c7e5a..e2115efc70 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -6763,6 +6763,7 @@ fn parse_set_variable() { hivevar, variable, value, + additional_assignments: _, } => { assert!(!local); assert!(!hivevar); @@ -6786,6 +6787,7 @@ fn parse_set_time_zone() { hivevar, variable, value, + additional_assignments: _, } => { assert!(!local); assert!(!hivevar); diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index f9eb95058e..b01496f3d3 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -226,6 +226,7 @@ fn set_statement_with_minus() { op: UnaryOperator::Minus, expr: Box::new(Expr::Identifier(Ident::new("Xmx4g").empty_span())) }], + additional_assignments: vec![], } ); diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index c155271e03..deaeeaf7fa 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -263,6 +263,25 @@ fn parse_set_variables() { hivevar: false, variable: ObjectName(vec!["autocommit".into()]), value: vec![Expr::Value(number("1"))], + additional_assignments: vec![], + } + ); + + // MySQL supports comma-separated assignments with optional scope modifiers + mysql().verified_stmt("SET @x = 1, SESSION sql_mode = ''"); + mysql().verified_stmt("SET @x = 1, @y = 2"); + assert_eq!( + mysql().verified_stmt("SET @x = 1, SESSION sql_mode = ''"), + Statement::SetVariable { + local: false, + hivevar: false, + variable: ObjectName(vec!["@x".into()]), + value: vec![Expr::Value(number("1"))], + additional_assignments: vec![SetVariableAssignment { + scope: Some(SetVariableScope::Session), + variable: ObjectName(vec!["sql_mode".into()]), + value: vec![Expr::Value(Value::SingleQuotedString("".into()))], + }], } ); } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index e99fce85d1..7e5d13b83c 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1207,6 +1207,7 @@ fn parse_set() { } .empty_span() )], + additional_assignments: vec![], } ); @@ -1218,6 +1219,7 @@ fn parse_set() { hivevar: false, variable: ObjectName(vec![Ident::new("a")]), value: vec![Expr::Value(Value::SingleQuotedString("b".into()))], + additional_assignments: vec![], } ); @@ -1229,6 +1231,7 @@ fn parse_set() { hivevar: false, variable: ObjectName(vec![Ident::new("a")]), value: vec![Expr::Value(number("0"))], + additional_assignments: vec![], } ); @@ -1246,6 +1249,7 @@ fn parse_set() { } .empty_span() )], + additional_assignments: vec![], } ); @@ -1257,6 +1261,7 @@ fn parse_set() { hivevar: false, variable: ObjectName(vec![Ident::new("a")]), value: vec![Expr::Identifier(Ident::new("b").empty_span())], + additional_assignments: vec![], } ); @@ -1274,6 +1279,7 @@ fn parse_set() { } .empty_span() )], + additional_assignments: vec![], } ); @@ -1294,6 +1300,7 @@ fn parse_set() { Ident::new("parallelism") ]), value: vec![Expr::Value(Value::Boolean(false))], + additional_assignments: vec![], } ); From 466c5d13eb373aa395d4c2346d9d92e9aa4c079b Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 03:02:22 +0100 Subject: [PATCH 24/81] postgres: support DROP INDEX CONCURRENTLY syntax Added concurrently: bool field to Statement::Drop and parser support for DROP INDEX CONCURRENTLY [IF EXISTS] [CASCADE|RESTRICT]. --- src/ast/mod.rs | 6 +++++- src/parser/mod.rs | 5 +++++ tests/sqlparser_postgres.rs | 26 ++++++++++++++++++++++++++ 3 files changed, 36 insertions(+), 1 deletion(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 57d2125453..71cc2ddd94 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1920,6 +1920,8 @@ pub enum Statement { purge: bool, /// MySQL-specific "TEMPORARY" keyword temporary: bool, + /// PostgreSQL-specific "CONCURRENTLY" keyword for DROP INDEX + concurrently: bool, }, /// ```sql /// DROP FUNCTION @@ -3496,11 +3498,13 @@ impl fmt::Display for Statement { restrict, purge, temporary, + concurrently, } => write!( f, - "DROP {}{}{} {}{}{}{}", + "DROP {}{}{}{} {}{}{}{}", if *temporary { "TEMPORARY " } else { "" }, object_type, + if *concurrently { " CONCURRENTLY" } else { "" }, if *if_exists { " IF EXISTS" } else { "" }, display_comma_separated(names), if *cascade { " CASCADE" } else { "" }, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9284c8cb94..631d927f54 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4718,6 +4718,10 @@ impl<'a> Parser<'a> { self.peek_token(), ); }; + // PostgreSQL supports CONCURRENTLY for DROP INDEX + let concurrently = object_type == ObjectType::Index + && self.parse_keyword(Keyword::CONCURRENTLY); + // Many dialects support the non standard `IF EXISTS` clause and allow // specifying multiple objects to delete in a single statement let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); @@ -4744,6 +4748,7 @@ impl<'a> Parser<'a> { restrict, purge, temporary, + concurrently, }) } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 7e5d13b83c..4f7a662248 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2034,6 +2034,32 @@ fn parse_create_index_concurrently() { } } +#[test] +fn parse_drop_index_concurrently() { + pg().verified_stmt("DROP INDEX CONCURRENTLY ix_table_id"); + pg().verified_stmt("DROP INDEX CONCURRENTLY IF EXISTS ix_table_id"); + pg().verified_stmt("DROP INDEX CONCURRENTLY IF EXISTS ix_table_id CASCADE"); + + match pg().verified_stmt("DROP INDEX CONCURRENTLY ix_table_id") { + Statement::Drop { + object_type, + if_exists, + names, + concurrently, + .. + } => { + assert_eq!(ObjectType::Index, object_type); + assert!(!if_exists); + assert!(concurrently); + assert_eq!( + vec!["ix_table_id"], + names.iter().map(ToString::to_string).collect::>() + ); + } + _ => unreachable!(), + } +} + #[test] fn parse_create_index_with_predicate() { let sql = "CREATE INDEX IF NOT EXISTS my_index ON my_table(col1,col2) WHERE col3 IS NULL"; From 13c82658b33d63dc129cd049c83617d1db00440b Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 03:07:47 +0100 Subject: [PATCH 25/81] postgres: support ?| and ?& JSONB array-key existence operators Added QuestionPipe and QuestionAnd tokens and JsonOperator variants for PostgreSQL's ?| (any key exists) and ?& (all keys exist) JSONB operators. --- src/ast/mod.rs | 8 ++++++++ src/parser/mod.rs | 8 +++++++- src/tokenizer.rs | 18 ++++++++++++++++-- tests/sqlparser_postgres.rs | 12 ++++++++++++ 4 files changed, 43 insertions(+), 3 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 71cc2ddd94..fe0799c0b5 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -331,6 +331,12 @@ pub enum JsonOperator { /// for the specified JSON value. Only the first item of the result is taken into /// account. If the result is not Boolean, then NULL is returned. AtAt, + /// jsonb ?| text[] -> boolean: Do any of the strings in the text array exist as + /// top-level keys or array elements? + QuestionPipe, + /// jsonb ?& text[] -> boolean: Do all of the strings in the text array exist as + /// top-level keys or array elements? + QuestionAnd, } impl fmt::Display for JsonOperator { @@ -361,6 +367,8 @@ impl fmt::Display for JsonOperator { JsonOperator::HashMinus => write!(f, "#-"), JsonOperator::AtQuestion => write!(f, "@?"), JsonOperator::AtAt => write!(f, "@@"), + JsonOperator::QuestionPipe => write!(f, "?|"), + JsonOperator::QuestionAnd => write!(f, "?&"), } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 631d927f54..ad5faffdbe 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2584,6 +2584,8 @@ impl<'a> Parser<'a> { || Token::HashMinus == tok || Token::AtQuestion == tok || Token::AtAt == tok + || Token::QuestionPipe == tok + || Token::QuestionAnd == tok { let operator = match tok.token { Token::Arrow => JsonOperator::Arrow, @@ -2595,6 +2597,8 @@ impl<'a> Parser<'a> { Token::HashMinus => JsonOperator::HashMinus, Token::AtQuestion => JsonOperator::AtQuestion, Token::AtAt => JsonOperator::AtAt, + Token::QuestionPipe => JsonOperator::QuestionPipe, + Token::QuestionAnd => JsonOperator::QuestionAnd, _ => unreachable!(), }; Ok(Expr::JsonAccess { @@ -2898,7 +2902,9 @@ impl<'a> Parser<'a> { | Token::ArrowAt | Token::HashMinus | Token::AtQuestion - | Token::AtAt => Ok(50), + | Token::AtAt + | Token::QuestionPipe + | Token::QuestionAnd => Ok(50), // MySQL assignment operator := has the lowest precedence Token::DuckAssignment if dialect_of!(self is MySqlDialect | GenericDialect) => Ok(5), _ => Ok(0), diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 53615c3481..767a6ca96f 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -191,6 +191,12 @@ pub enum Token { /// for the specified JSON value. Only the first item of the result is taken into /// account. If the result is not Boolean, then NULL is returned. AtAt, + /// jsonb ?| text[] -> boolean: Do any of the strings in the text array exist as + /// top-level keys or array elements? + QuestionPipe, + /// jsonb ?& text[] -> boolean: Do all of the strings in the text array exist as + /// top-level keys or array elements? + QuestionAnd, } impl fmt::Display for Token { @@ -265,6 +271,8 @@ impl fmt::Display for Token { Token::HashMinus => write!(f, "#-"), Token::AtQuestion => write!(f, "@?"), Token::AtAt => write!(f, "@@"), + Token::QuestionPipe => write!(f, "?|"), + Token::QuestionAnd => write!(f, "?&"), } } } @@ -1143,8 +1151,14 @@ impl<'a> Tokenizer<'a> { } '?' => { chars.next(); - let s = peeking_take_while(chars, |ch| ch.is_numeric()); - Ok(Some(Token::Placeholder(String::from("?") + &s))) + match chars.peek() { + Some('|') => self.consume_and_return(chars, Token::QuestionPipe), + Some('&') => self.consume_and_return(chars, Token::QuestionAnd), + _ => { + let s = peeking_take_while(chars, |ch| ch.is_numeric()); + Ok(Some(Token::Placeholder(String::from("?") + &s))) + } + } } // identifier or keyword diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 4f7a662248..18081ab81d 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2430,6 +2430,18 @@ fn test_json() { .empty_span(), select.selection.unwrap(), ); + + // ?| operator: do any of the strings exist as top-level keys? + pg().one_statement_parses_to( + "SELECT '{\"a\":1, \"b\":2, \"c\":3}'::jsonb ?| array['b', 'c']", + "SELECT CAST('{\"a\":1, \"b\":2, \"c\":3}' AS jsonb) ?| ARRAY['b', 'c']", + ); + + // ?& operator: do all of the strings exist as top-level keys? + pg().one_statement_parses_to( + "SELECT '[\"a\", \"b\"]'::jsonb ?& array['a', 'b']", + "SELECT CAST('[\"a\", \"b\"]' AS jsonb) ?& ARRAY['a', 'b']", + ); } #[test] From a4a726e36d82398d2bb16d5f058f666338d1be6e Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 03:13:35 +0100 Subject: [PATCH 26/81] postgres: support OVERLAPS operator for datetime range comparison Added BinaryOperator::Overlaps for PostgreSQL's (start1, end1) OVERLAPS (start2, end2) datetime range comparison. --- src/ast/operator.rs | 4 ++++ src/parser/mod.rs | 8 ++++++++ tests/sqlparser_postgres.rs | 6 ++++++ 3 files changed, 18 insertions(+) diff --git a/src/ast/operator.rs b/src/ast/operator.rs index f678ac2adb..dff2416db5 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -138,6 +138,9 @@ pub enum BinaryOperator { /// See [CREATE OPERATOR](https://www.postgresql.org/docs/current/sql-createoperator.html) /// for more information. PGCustomBinaryOperator(Vec), + /// SQL standard OVERLAPS operator for datetime range comparison, e.g. + /// `(start1, end1) OVERLAPS (start2, end2)` + Overlaps, } impl fmt::Display for BinaryOperator { @@ -178,6 +181,7 @@ impl fmt::Display for BinaryOperator { BinaryOperator::PGCustomBinaryOperator(idents) => { write!(f, "OPERATOR({})", display_separated(idents, ".")) } + BinaryOperator::Overlaps => f.write_str("OVERLAPS"), } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ad5faffdbe..5c02a85864 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2481,6 +2481,13 @@ impl<'a> Parser<'a> { self.expected("Expected Token::Word after AT", tok) } } + Keyword::OVERLAPS => { + Ok(Expr::BinaryOp { + left: Box::new(expr), + op: BinaryOperator::Overlaps, + right: Box::new(self.parse_subexpr(Self::BETWEEN_PREC)?), + }) + } Keyword::NOT | Keyword::IN | Keyword::BETWEEN @@ -2856,6 +2863,7 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::OVERLAPS => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::DIV => Ok(Self::MUL_DIV_MOD_OP_PREC), Token::Eq diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 18081ab81d..2d23b043cb 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3831,3 +3831,9 @@ fn parse_create_table_with_options() { _ => unreachable!(), } } + +#[test] +fn parse_overlaps() { + let sql = "SELECT (CAST('2016-01-10' AS DATE), CAST('2016-02-01' AS DATE)) OVERLAPS (CAST('2016-01-20' AS DATE), CAST('2016-02-10' AS DATE))"; + pg_and_generic().verified_only_select(sql); +} From 003e181b66b609662eb615ac2bd802617cee8e3e Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 03:17:11 +0100 Subject: [PATCH 27/81] postgres: support table inheritance wildcard syntax (FROM t1*) PostgreSQL allows trailing * on table names in FROM/JOIN to include all descendant tables in an inheritance hierarchy. --- src/parser/mod.rs | 4 +++- tests/sqlparser_postgres.rs | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 5c02a85864..7237177126 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7652,8 +7652,10 @@ impl<'a> Parser<'a> { // BigQuery supports wildcard tables: `FROM dataset.table_prefix*` // https://cloud.google.com/bigquery/docs/querying-wildcard-tables + // PostgreSQL supports table inheritance: `FROM parent_table*` + // https://www.postgresql.org/docs/current/ddl-inherit.html if in_table_clause - && dialect_of!(self is BigQueryDialect | GenericDialect) + && dialect_of!(self is BigQueryDialect | PostgreSqlDialect | GenericDialect) && self.consume_token(&Token::Mul) { if let Some(last) = idents.last_mut() { diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 2d23b043cb..4a37ee4fbe 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3837,3 +3837,11 @@ fn parse_overlaps() { let sql = "SELECT (CAST('2016-01-10' AS DATE), CAST('2016-02-01' AS DATE)) OVERLAPS (CAST('2016-01-20' AS DATE), CAST('2016-02-10' AS DATE))"; pg_and_generic().verified_only_select(sql); } + +#[test] +fn parse_table_inheritance_wildcard() { + // PostgreSQL table inheritance: `FROM t1*` includes all child tables + // https://www.postgresql.org/docs/current/ddl-inherit.html + pg_and_generic().verified_stmt("SELECT * FROM t1*"); + pg_and_generic().verified_stmt("SELECT * FROM schema1.t1*"); +} From 21e0f6981c53b7e2ab718caf6a323febc76b18a5 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 03:23:17 +0100 Subject: [PATCH 28/81] postgres: support OVER window clause after FILTER in aggregates Extended AggregateExpressionWithFilter with optional over: Option so AGG(x) FILTER (WHERE cond) OVER (PARTITION BY y) parses correctly. --- src/ast/mod.rs | 15 ++++++++++++--- src/parser/mod.rs | 5 +++++ tests/sqlparser_postgres.rs | 10 ++++++++++ 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index fe0799c0b5..46a3acc898 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -721,7 +721,12 @@ pub enum Expr { /// Scalar function call e.g. `LEFT(foo, 5)` Function(Function), /// Aggregate function with filter - AggregateExpressionWithFilter { expr: Box, filter: Box }, + AggregateExpressionWithFilter { + expr: Box, + filter: Box, + /// Optional OVER clause (window function) that follows the FILTER clause + over: Option, + }, /// `CASE [] WHEN THEN ... [ELSE ] END` /// /// Note we only recognize a complete single expression as ``, @@ -1055,8 +1060,12 @@ impl fmt::Display for Expr { write!(f, " '{}'", &value::escape_single_quote_string(value)) } Expr::Function(fun) => write!(f, "{fun}"), - Expr::AggregateExpressionWithFilter { expr, filter } => { - write!(f, "{expr} FILTER (WHERE {filter})") + Expr::AggregateExpressionWithFilter { expr, filter, over } => { + write!(f, "{expr} FILTER (WHERE {filter})")?; + if let Some(o) = over { + write!(f, " OVER {o}")?; + } + Ok(()) } Expr::Case { operand, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7237177126..21f5a9e97d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10828,9 +10828,13 @@ impl<'a> Parser<'a> { self.parse_keyword(Keyword::WHERE); let filter = self.parse_expr()?; self.expect_token(&Token::RParen)?; + // SQL standard: FILTER comes before OVER, e.g. + // CORR(a, b) FILTER (WHERE c > 0) OVER (PARTITION BY d) + let over = self.parse_over()?; Expr::AggregateExpressionWithFilter { expr: Box::new(expr), filter: Box::new(filter), + over, } } else { self.index = i; @@ -10895,6 +10899,7 @@ impl<'a> Parser<'a> { Expr::AggregateExpressionWithFilter { expr: Box::new(expr), filter: Box::new(filter), + over: None, } } else { self.index = i; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 4a37ee4fbe..663481cbbc 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3845,3 +3845,13 @@ fn parse_table_inheritance_wildcard() { pg_and_generic().verified_stmt("SELECT * FROM t1*"); pg_and_generic().verified_stmt("SELECT * FROM schema1.t1*"); } + +#[test] +fn parse_filter_with_over() { + // SQL standard: aggregate FILTER clause followed by OVER (window specification) + pg().verified_stmt("SELECT CORR(a, b) FILTER (WHERE c > 0) OVER (PARTITION BY d)"); + // FILTER + named window + pg().verified_stmt("SELECT SUM(x) FILTER (WHERE x > 0) OVER w"); + // FILTER without OVER should still work + pg().verified_stmt("SELECT COUNT(*) FILTER (WHERE x > 0)"); +} From 1da4548c8c8a1a297b26ff778cfc85107adecc53 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 03:33:04 +0100 Subject: [PATCH 29/81] redshift: support UNPIVOT for SUPER type iteration as table factor Added TableFactor::RedshiftUnpivot for Redshift's non-standard UNPIVOT syntax: UNPIVOT expr [AS value_alias [AT attr_alias]], distinct from standard SQL UNPIVOT. --- src/ast/query.rs | 27 +++++++++++++++++++++++++++ src/parser/mod.rs | 26 +++++++++++++++++++++++++- tests/sqlparser_redshift.rs | 24 ++++++++++++++++++++++++ 3 files changed, 76 insertions(+), 1 deletion(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 11e8dc2014..8251bc9a3e 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1036,6 +1036,19 @@ pub enum TableFactor { to_return: SelectionCount, seed: Option, }, + /// Redshift's UNPIVOT syntax for iterating over SUPER type objects. + /// + /// Syntax: + /// ```sql + /// UNPIVOT expression AS value_alias [ AT attribute_alias ] + /// ``` + /// + /// See . + RedshiftUnpivot { + expr: Expr, + value_alias: Option>, + attribute_alias: Option>, + }, } impl fmt::Display for TableFactor { @@ -1228,6 +1241,20 @@ impl fmt::Display for TableFactor { } Ok(()) } + TableFactor::RedshiftUnpivot { + expr, + value_alias, + attribute_alias, + } => { + write!(f, "UNPIVOT {expr}")?; + if let Some(val) = value_alias { + write!(f, " AS {val}")?; + } + if let Some(attr) = attribute_alias { + write!(f, " AT {attr}")?; + } + Ok(()) + } } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 21f5a9e97d..46f2a31d2a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9585,6 +9585,28 @@ impl<'a> Parser<'a> { over, }) } + } else if dialect_of!(self is RedshiftSqlDialect | GenericDialect) + && self.parse_keyword(Keyword::UNPIVOT) + { + // Redshift UNPIVOT for SUPER data types: + // UNPIVOT expression [AS value_alias [AT attribute_alias]] + let expr = self.parse_expr()?; + let (value_alias, attribute_alias) = if self.parse_keyword(Keyword::AS) { + let value_alias = self.parse_identifier(false)?; + let attribute_alias = if self.parse_keyword(Keyword::AT) { + Some(self.parse_identifier(false)?) + } else { + None + }; + (Some(value_alias), attribute_alias) + } else { + (None, None) + }; + Ok(TableFactor::RedshiftUnpivot { + expr, + value_alias, + attribute_alias, + }) } else if self.parse_keyword_with_tokens(Keyword::TABLE, &[Token::LParen]) { // parse table function (SELECT * FROM TABLE () [ AS ]) let expr = self.parse_expr()?; @@ -9706,6 +9728,7 @@ impl<'a> Parser<'a> { } TableFactor::TableSample { .. } => {} TableFactor::ExternalQuery { .. } => {} + TableFactor::RedshiftUnpivot { .. } => {} }; } // Do not store the extra set of parens in the AST @@ -9814,7 +9837,8 @@ impl<'a> Parser<'a> { *alias = Some(prefix_alias); } TableFactor::TableSample { .. } - | TableFactor::ExternalQuery { .. } => {} + | TableFactor::ExternalQuery { .. } + | TableFactor::RedshiftUnpivot { .. } => {} } return Ok(table); } diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index dddab22893..01b16efaa4 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -432,3 +432,27 @@ fn test_extract_with_string_date_part() { fn test_utf8_column_names() { redshift().verified_stmt("SELECT financing_cost_€k FROM tbl"); } + +#[test] +fn test_redshift_unpivot_super() { + // Basic UNPIVOT with AS and AT + redshift().verified_stmt( + "SELECT attr, val FROM customer_orders_lineitem AS c, UNPIVOT c.c_orders AS val AT attr WHERE c_custkey = 9451", + ); + + // UNPIVOT with array subscript + redshift().verified_stmt( + "SELECT attr, val FROM customer_orders_lineitem AS c, UNPIVOT c.c_orders[0] WHERE c_custkey = 9451", + ); + + // UNPIVOT without AT clause + redshift().one_statement_parses_to( + "SELECT * FROM tbl AS t, UNPIVOT t.col AS val", + "SELECT * FROM tbl AS t, UNPIVOT t.col AS val", + ); + + // UNPIVOT with AS and AT in complex query + redshift().verified_stmt( + "SELECT col_1, col_2, SPLIT_PART(col_12, '_', 2) AS currency_code FROM tbl_1 AS tbl_1, UNPIVOT tbl_2.col_14 AS val AT attr", + ); +} From 8e76efea87a06d5b310c627047b814ab575d473a Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 03:37:01 +0100 Subject: [PATCH 30/81] redshift: add regression test for CTE combined with UNPIVOT Adds verified test for corpus case cf4c7fc6e579: WITH clause combined with UNPIVOT in FROM list to confirm the two features compose correctly. --- tests/sqlparser_redshift.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 01b16efaa4..d582be3256 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -455,4 +455,9 @@ fn test_redshift_unpivot_super() { redshift().verified_stmt( "SELECT col_1, col_2, SPLIT_PART(col_12, '_', 2) AS currency_code FROM tbl_1 AS tbl_1, UNPIVOT tbl_2.col_14 AS val AT attr", ); + + // UNPIVOT with CTE + redshift().verified_stmt( + "WITH cte AS (SELECT col_1, col_2 FROM tbl_5) SELECT col_1, col_2 FROM tbl_2 AS tbl_2, UNPIVOT tbl_1.col_6 AS carrier_logic AT carrier_id", + ); } From 3cbd7e328def25cbc5b1265b12610e006ef83bbf Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 03:40:02 +0100 Subject: [PATCH 31/81] redshift: support CREATE DATABASE COLLATE syntax Added optional collation: Option field to Statement::CreateDatabase for CREATE DATABASE name COLLATE collation. --- src/ast/mod.rs | 6 ++++++ src/parser/mod.rs | 8 ++++++++ tests/sqlparser_common.rs | 2 ++ tests/sqlparser_redshift.rs | 21 +++++++++++++++++++++ 4 files changed, 37 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 46a3acc898..dafe245c6a 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2168,6 +2168,8 @@ pub enum Statement { comment: Option, /// Redshift: FROM INTEGRATION 'integration_id' from_integration: Option, + /// Redshift: COLLATE case_insensitive | case_sensitive + collation: Option, }, /// ```sql /// CREATE FUNCTION @@ -2819,6 +2821,7 @@ impl fmt::Display for Statement { managed_location, comment, from_integration, + collation, } => { write!(f, "CREATE DATABASE")?; if *if_not_exists { @@ -2837,6 +2840,9 @@ impl fmt::Display for Statement { if let Some(c) = comment { write!(f, " COMMENT '{c}'")?; } + if let Some(col) = collation { + write!(f, " COLLATE {col}")?; + } Ok(()) } Statement::CreateFunction { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 46f2a31d2a..dfac77719b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3804,6 +3804,13 @@ impl<'a> Parser<'a> { None }; + // Redshift: COLLATE case_insensitive | case_sensitive + let collation = if self.parse_keyword(Keyword::COLLATE) { + Some(self.parse_identifier(false)?.unwrap().value) + } else { + None + }; + // Skip additional dialect-specific clauses (DEFAULT_DDL_COLLATION, etc.) while matches!(self.peek_token_kind().clone(), Token::Word(_)) && self.peek_nth_token(1).token == Token::Eq @@ -3831,6 +3838,7 @@ impl<'a> Parser<'a> { managed_location, comment, from_integration, + collation, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index e2115efc70..d8be943a13 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -5905,6 +5905,7 @@ fn parse_create_database() { managed_location, comment: _, from_integration: _, + collation: _, } => { assert_eq!("mydb", db_name.to_string()); assert!(!if_not_exists); @@ -5926,6 +5927,7 @@ fn parse_create_database_ine() { managed_location, comment: _, from_integration: _, + collation: _, } => { assert_eq!("mydb", db_name.to_string()); assert!(if_not_exists); diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index d582be3256..3324bb2f1f 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -461,3 +461,24 @@ fn test_redshift_unpivot_super() { "WITH cte AS (SELECT col_1, col_2 FROM tbl_5) SELECT col_1, col_2 FROM tbl_2 AS tbl_2, UNPIVOT tbl_1.col_6 AS carrier_logic AT carrier_id", ); } + +#[test] +fn test_create_database_collate() { + let sql = "CREATE DATABASE sampledb COLLATE case_insensitive"; + match redshift().verified_stmt(sql) { + Statement::CreateDatabase { + db_name, + if_not_exists, + collation, + .. + } => { + assert_eq!("sampledb", db_name.to_string()); + assert!(!if_not_exists); + assert_eq!(Some("case_insensitive".to_string()), collation); + } + _ => unreachable!(), + } + + // Also works with case_sensitive + redshift().verified_stmt("CREATE DATABASE mydb COLLATE case_sensitive"); +} From 80592705da7c546ca0aa610c7c734dff558ff59b Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 03:48:06 +0100 Subject: [PATCH 32/81] redshift: support GROUP/APPLICATION grantee type prefixes in GRANT/REVOKE Replaced Vec grantees with Vec carrying optional GranteesType (Role, User, Share, Group, Application). Added APPLICATION keyword. --- src/ast/dcl.rs | 52 +++++++++++++++++++++++++++++++++++++ src/ast/mod.rs | 9 ++++--- src/keywords.rs | 1 + src/parser/mod.rs | 37 ++++++++++++++++++-------- tests/sqlparser_redshift.rs | 43 ++++++++++++++++++++++++++++++ 5 files changed, 128 insertions(+), 14 deletions(-) diff --git a/src/ast/dcl.rs b/src/ast/dcl.rs index f90de34d4a..c0e012d65b 100644 --- a/src/ast/dcl.rs +++ b/src/ast/dcl.rs @@ -26,6 +26,58 @@ use sqlparser_derive::{Visit, VisitMut}; use super::{Expr, Ident, Password}; use crate::ast::{display_separated, ObjectName}; +/// The type prefix for a grantee in GRANT/REVOKE statements. +/// +/// Examples: +/// - `GRANT ... TO GROUP qa_users` +/// - `GRANT ... TO ROLE admin` +/// - `REVOKE ... FROM APPLICATION app` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum GranteesType { + Role, + User, + Share, + Group, + Application, +} + +impl fmt::Display for GranteesType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + GranteesType::Role => write!(f, "ROLE"), + GranteesType::User => write!(f, "USER"), + GranteesType::Share => write!(f, "SHARE"), + GranteesType::Group => write!(f, "GROUP"), + GranteesType::Application => write!(f, "APPLICATION"), + } + } +} + +/// A grantee in a GRANT/REVOKE statement, optionally prefixed with a type keyword. +/// +/// Examples: +/// - `qa_users` (no type) +/// - `GROUP qa_users` +/// - `ROLE admin` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Grantee { + pub grantee_type: Option, + pub name: Ident, +} + +impl fmt::Display for Grantee { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if let Some(ref grantee_type) = self.grantee_type { + write!(f, "{} ", grantee_type)?; + } + write!(f, "{}", self.name) + } +} + /// An option in `ROLE` statement. /// /// diff --git a/src/ast/mod.rs b/src/ast/mod.rs index dafe245c6a..9d4894a979 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -32,7 +32,9 @@ pub use self::data_type::{ ArrayElemTypeDef, CharLengthUnits, CharacterLength, DataType, EnumTypeValue, ExactNumberInfo, TimezoneInfo, }; -pub use self::dcl::{AlterRoleOperation, ResetConfig, RoleOption, SetConfigValue}; +pub use self::dcl::{ + AlterRoleOperation, Grantee, GranteesType, ResetConfig, RoleOption, SetConfigValue, +}; pub use self::ddl::{ AlterColumnOperation, AlterIndexOperation, AlterTableOperation, ColumnDef, ColumnLocation, ColumnOption, ColumnOptionDef, ColumnPolicy, ColumnPolicyProperty, ConstraintCharacteristics, @@ -2240,7 +2242,7 @@ pub enum Statement { Grant { privileges: Privileges, objects: GrantObjects, - grantees: Vec, + grantees: Vec, with_grant_option: bool, granted_by: Option, }, @@ -2250,7 +2252,7 @@ pub enum Statement { Revoke { privileges: Privileges, objects: GrantObjects, - grantees: Vec, + grantees: Vec, granted_by: Option, cascade: bool, }, @@ -3770,6 +3772,7 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::Revoke { privileges, objects, diff --git a/src/keywords.rs b/src/keywords.rs index 20bcab1d51..4d3505722e 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -86,6 +86,7 @@ define_keywords!( AND, ANTI, ANY, + APPLICATION, APPLY, ARCHIVE, ARE, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index dfac77719b..788dbb729c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10190,9 +10190,7 @@ impl<'a> Parser<'a> { if self.parse_keyword(Keyword::ROLE) { let role_name = self.parse_identifier(false)?.unwrap(); self.expect_keyword(Keyword::TO)?; - let _ = self.parse_one_of_keywords(&[Keyword::ROLE, Keyword::USER]); - let grantees = - self.parse_comma_separated(|p| p.parse_identifier(false).map(WithSpan::unwrap))?; + let grantees = self.parse_comma_separated(|p| p.parse_grantee())?; return Ok(Statement::Grant { privileges: Privileges::Actions(vec![Action::Usage]), objects: GrantObjects::Schemas(vec![ObjectName(vec![role_name])]), @@ -10205,10 +10203,7 @@ impl<'a> Parser<'a> { let (privileges, objects) = self.parse_grant_revoke_privileges_objects()?; self.expect_keyword(Keyword::TO)?; - // Snowflake: TO ROLE name / TO USER name - let _ = self.parse_one_of_keywords(&[Keyword::ROLE, Keyword::USER, Keyword::SHARE]); - let grantees = - self.parse_comma_separated(|p| p.parse_identifier(false).map(WithSpan::unwrap))?; + let grantees = self.parse_comma_separated(|p| p.parse_grantee())?; let with_grant_option = self.parse_keywords(&[Keyword::WITH, Keyword::GRANT, Keyword::OPTION]); @@ -10228,6 +10223,29 @@ impl<'a> Parser<'a> { }) } + /// Parse a single grantee, optionally prefixed with ROLE/USER/GROUP/SHARE/APPLICATION. + pub fn parse_grantee(&mut self) -> Result { + let grantee_type = match self.parse_one_of_keywords(&[ + Keyword::ROLE, + Keyword::USER, + Keyword::SHARE, + Keyword::GROUP, + Keyword::APPLICATION, + ]) { + Some(Keyword::ROLE) => Some(GranteesType::Role), + Some(Keyword::USER) => Some(GranteesType::User), + Some(Keyword::SHARE) => Some(GranteesType::Share), + Some(Keyword::GROUP) => Some(GranteesType::Group), + Some(Keyword::APPLICATION) => Some(GranteesType::Application), + _ => None, + }; + let name = self.parse_identifier(false).map(WithSpan::unwrap)?; + Ok(Grantee { + grantee_type, + name, + }) + } + pub fn parse_grant_revoke_privileges_objects( &mut self, ) -> Result<(Privileges, GrantObjects), ParserError> { @@ -10403,10 +10421,7 @@ impl<'a> Parser<'a> { let (privileges, objects) = self.parse_grant_revoke_privileges_objects()?; self.expect_keyword(Keyword::FROM)?; - // Optional ROLE/SHARE keyword before grantees (Snowflake syntax) - let _ = self.parse_one_of_keywords(&[Keyword::ROLE, Keyword::SHARE]); - let grantees = - self.parse_comma_separated(|p| p.parse_identifier(false).map(WithSpan::unwrap))?; + let grantees = self.parse_comma_separated(|p| p.parse_grantee())?; let granted_by = self .parse_keywords(&[Keyword::GRANTED, Keyword::BY]) diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 3324bb2f1f..cbbfb61ef9 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -482,3 +482,46 @@ fn test_create_database_collate() { // Also works with case_sensitive redshift().verified_stmt("CREATE DATABASE mydb COLLATE case_sensitive"); } + +#[test] +fn test_grant_with_group_grantee() { + // GRANT ... TO GROUP name + redshift().verified_stmt("GRANT ALL ON SCHEMA qa_tickit TO GROUP qa_users"); + + // GRANT ... TO multiple GROUP grantees (ON TABLE serializes as ON) + redshift().one_statement_parses_to( + "GRANT ALL ON TABLE qa_tickit.sales TO GROUP qa_users, GROUP ro_users", + "GRANT ALL ON qa_tickit.sales TO GROUP qa_users, GROUP ro_users", + ); + + // GRANT with column-level privileges + redshift().verified_stmt( + "GRANT SELECT (cust_name, cust_phone), UPDATE (cust_contact_preference) ON cust_profile TO GROUP sales_group", + ); + + // Verify AST structure + match redshift().verified_stmt("GRANT ALL ON SCHEMA qa_tickit TO GROUP qa_users") { + Statement::Grant { + grantees, + .. + } => { + assert_eq!(1, grantees.len()); + assert_eq!(Some(GranteesType::Group), grantees[0].grantee_type); + assert_eq!("qa_users", grantees[0].name.value); + } + _ => unreachable!(), + } +} + +#[test] +fn test_revoke_with_group_grantee() { + // With explicit RESTRICT + redshift() + .verified_stmt("REVOKE ALL ON SCHEMA qa_tickit FROM GROUP qa_users RESTRICT"); + + // Without CASCADE/RESTRICT (defaults to RESTRICT in output) + redshift().one_statement_parses_to( + "REVOKE ALL ON SCHEMA qa_tickit FROM GROUP qa_users", + "REVOKE ALL ON SCHEMA qa_tickit FROM GROUP qa_users RESTRICT", + ); +} From e01aef70a92645616b9d616478cc268317cced6a Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 03:55:04 +0100 Subject: [PATCH 33/81] redshift: add test for column-level GRANT without space before parens Verifies GRANT SELECT(col) (no space) parses identically to GRANT SELECT (col) and normalizes to the spaced form. --- tests/sqlparser_redshift.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index cbbfb61ef9..8e332fd3e8 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -499,6 +499,12 @@ fn test_grant_with_group_grantee() { "GRANT SELECT (cust_name, cust_phone), UPDATE (cust_contact_preference) ON cust_profile TO GROUP sales_group", ); + // GRANT with column-level privileges (no space before parentheses) + redshift().one_statement_parses_to( + "GRANT SELECT(cust_name, cust_phone), UPDATE(cust_contact_preference) ON cust_profile TO GROUP sales_group", + "GRANT SELECT (cust_name, cust_phone), UPDATE (cust_contact_preference) ON cust_profile TO GROUP sales_group", + ); + // Verify AST structure match redshift().verified_stmt("GRANT ALL ON SCHEMA qa_tickit TO GROUP qa_users") { Statement::Grant { From 027eb6d22f176a937f65fd24549ea7f7140a50e1 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 04:05:04 +0100 Subject: [PATCH 34/81] redshift: support MINUS as synonym for EXCEPT set operator Added MINUS keyword and parser recognition as an alias for EXCEPT in set operations. --- src/keywords.rs | 3 +++ src/parser/mod.rs | 1 + tests/sqlparser_redshift.rs | 9 +++++++++ 3 files changed, 13 insertions(+) diff --git a/src/keywords.rs b/src/keywords.rs index 4d3505722e..e365b12ea4 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -449,6 +449,7 @@ define_keywords!( MILLISECOND, MILLISECONDS, MIN, + MINUS, MINUTE, MINUTES, MINVALUE, @@ -813,6 +814,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::FETCH, Keyword::UNION, Keyword::EXCEPT, + Keyword::MINUS, Keyword::INTERSECT, // Reserved only as a table alias in the `FROM`/`JOIN` clauses: Keyword::ON, @@ -879,6 +881,7 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ Keyword::FETCH, Keyword::UNION, Keyword::EXCEPT, + Keyword::MINUS, Keyword::INTERSECT, Keyword::CLUSTER, Keyword::DISTRIBUTE, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 788dbb729c..2995172931 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8698,6 +8698,7 @@ impl<'a> Parser<'a> { match token { Token::Word(w) if w.keyword == Keyword::UNION => Some(SetOperator::Union), Token::Word(w) if w.keyword == Keyword::EXCEPT => Some(SetOperator::Except), + Token::Word(w) if w.keyword == Keyword::MINUS => Some(SetOperator::Except), Token::Word(w) if w.keyword == Keyword::INTERSECT => Some(SetOperator::Intersect), _ => None, } diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 8e332fd3e8..482d529073 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -531,3 +531,12 @@ fn test_revoke_with_group_grantee() { "REVOKE ALL ON SCHEMA qa_tickit FROM GROUP qa_users RESTRICT", ); } + +#[test] +fn test_minus_as_except() { + // MINUS is a Redshift/Oracle synonym for EXCEPT + redshift().one_statement_parses_to( + "SELECT foo, bar FROM table_1 MINUS SELECT foo, bar FROM table_2", + "SELECT foo, bar FROM table_1 EXCEPT SELECT foo, bar FROM table_2", + ); +} From 17d94f76aa4d90b006225fb03e0ebac36470bfed Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 04:15:17 +0100 Subject: [PATCH 35/81] redshift: support APPROXIMATE COUNT(DISTINCT x) function prefix Added approximate: bool field to Function AST node and parser support for Redshift's APPROXIMATE COUNT(DISTINCT x) HyperLogLog estimation syntax. --- src/ast/ddl.rs | 4 +- src/ast/mod.rs | 12 +++--- src/ast/query.rs | 9 ++--- src/bin/corpus_runner.rs | 5 ++- src/keywords.rs | 1 + src/parser/mod.rs | 75 +++++++++++++++++++++-------------- src/tokenizer.rs | 11 ++--- tests/sqlparser_bigquery.rs | 8 ++-- tests/sqlparser_clickhouse.rs | 20 +++++----- tests/sqlparser_common.rs | 23 +++++++++++ tests/sqlparser_databricks.rs | 11 ++--- tests/sqlparser_duckdb.rs | 31 ++++----------- tests/sqlparser_hive.rs | 1 + tests/sqlparser_mssql.rs | 1 + tests/sqlparser_mysql.rs | 6 +++ tests/sqlparser_postgres.rs | 6 +++ tests/sqlparser_redshift.rs | 24 ++++++++--- tests/sqlparser_snowflake.rs | 1 + 18 files changed, 146 insertions(+), 103 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 6dcdad0c15..74c022c731 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -77,9 +77,7 @@ pub enum AlterTableOperation { /// `DROP PART|PARTITION ` /// Note: this is a ClickHouse-specific operation, please refer to /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#drop-partitionpart) - DropPartition { - partition: Partition, - }, + DropPartition { partition: Partition }, /// `DROP PRIMARY KEY` /// /// Note: this is a MySQL-specific operation. diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 9d4894a979..73b84ad301 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2280,9 +2280,7 @@ pub enum Statement { /// /// ClickHouse-specific statement for user impersonation. /// See: - ExecuteAs { - user: WithSpan, - }, + ExecuteAs { user: WithSpan }, /// ```sql /// PREPARE name [ ( data_type [, ...] ) ] AS statement /// ``` @@ -4579,6 +4577,8 @@ pub struct Function { pub over: Option, // aggregate functions may specify eg `COUNT(DISTINCT x)` pub distinct: bool, + // Redshift's APPROXIMATE prefix, e.g. `APPROXIMATE COUNT(DISTINCT x)` + pub approximate: bool, // Some functions must be called without trailing parentheses, for example Postgres // do it for current_catalog, current_schema, etc. This flags is used for formatting. pub special: bool, @@ -4622,6 +4622,8 @@ impl fmt::Display for Function { if self.special { write!(f, "{}", self.name)?; } else { + let approximate = if self.approximate { "APPROXIMATE " } else { "" }; + let order_by: String = if !self.order_by.is_empty() { format!(" ORDER BY {}", display_comma_separated(&self.order_by)) } else { @@ -4652,7 +4654,7 @@ impl fmt::Display for Function { if let Some(ref parameters) = self.parameters { write!( f, - "{}({})({}{}{}{}{})", + "{approximate}{}({})({}{}{}{}{})", self.name, display_comma_separated(parameters), if self.distinct { "DISTINCT " } else { "" }, @@ -4665,7 +4667,7 @@ impl fmt::Display for Function { } else { write!( f, - "{}({}{}{order_by}{limit}{on_overflow}){null_treatment}", + "{approximate}{}({}{}{order_by}{limit}{on_overflow}){null_treatment}", self.name, if self.distinct { "DISTINCT " } else { "" }, display_comma_separated(&self.args), diff --git a/src/ast/query.rs b/src/ast/query.rs index 8251bc9a3e..74e635b669 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1397,12 +1397,9 @@ impl fmt::Display for Join { match_condition, suffix(constraint) ), - JoinOperator::StraightJoin(constraint) => write!( - f, - " STRAIGHT_JOIN {}{}", - self.relation, - suffix(constraint) - ), + JoinOperator::StraightJoin(constraint) => { + write!(f, " STRAIGHT_JOIN {}{}", self.relation, suffix(constraint)) + } } } } diff --git a/src/bin/corpus_runner.rs b/src/bin/corpus_runner.rs index 1457605ba7..f2df82fae0 100644 --- a/src/bin/corpus_runner.rs +++ b/src/bin/corpus_runner.rs @@ -16,7 +16,9 @@ const REPORT_PATH: &str = "target/corpus-report.json"; /// Uses the part after the last `_` as the dialect (e.g., `sqlglot_bigquery` -> `bigquery`, /// `customer_bigquery` -> `bigquery`). If no `_` exists, uses the whole name. fn normalize_dialect_name(name: &str) -> &str { - name.rsplit_once('_').map(|(_, suffix)| suffix).unwrap_or(name) + name.rsplit_once('_') + .map(|(_, suffix)| suffix) + .unwrap_or(name) } fn dialect_for_name(name: &str) -> Option> { @@ -360,5 +362,4 @@ fn main() { (*passed as f64 / total_d as f64) * 100.0 ); } - } diff --git a/src/keywords.rs b/src/keywords.rs index e365b12ea4..a7c132f940 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -88,6 +88,7 @@ define_keywords!( ANY, APPLICATION, APPLY, + APPROXIMATE, ARCHIVE, ARE, ARRAY, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 2995172931..0983eb1d87 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -839,6 +839,7 @@ impl<'a> Parser<'a> { parameters: None, over: None, distinct: false, + approximate: false, special: true, order_by: vec![], limit: None, @@ -888,9 +889,7 @@ impl<'a> Parser<'a> { self.parse_array_subquery() } // DuckDB MAP literal: MAP {'key': value, ...} - Keyword::MAP if self.peek_token_is(&Token::LBrace) => { - self.parse_map_literal() - } + Keyword::MAP if self.peek_token_is(&Token::LBrace) => self.parse_map_literal(), Keyword::NOT => self.parse_not(), Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => { self.parse_match_against() @@ -900,6 +899,21 @@ impl<'a> Parser<'a> { self.prev_token(); self.parse_bigquery_struct_literal() } + // Redshift: APPROXIMATE COUNT(DISTINCT x) + Keyword::APPROXIMATE + if dialect_of!(self is RedshiftSqlDialect | GenericDialect) + && matches!(self.peek_token_kind(), Token::Word(_)) => + { + // Parse the following function call and set approximate = true + let expr = self.parse_prefix()?; + match expr { + Expr::Function(mut func) => { + func.approximate = true; + Ok(Expr::Function(func)) + } + _ => self.expected("a function call after APPROXIMATE", self.peek_token()), + } + } // Here `w` is a word, check if it's a part of a multi-part // identifier, a function call, or a simple identifier: _ => match self.peek_token_kind().clone() { @@ -1116,6 +1130,7 @@ impl<'a> Parser<'a> { parameters, over, distinct, + approximate: false, special: false, order_by, limit, @@ -1141,6 +1156,7 @@ impl<'a> Parser<'a> { parameters: None, over: None, distinct: false, + approximate: false, special, order_by, limit, @@ -1463,6 +1479,7 @@ impl<'a> Parser<'a> { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -1589,6 +1606,7 @@ impl<'a> Parser<'a> { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -2481,13 +2499,11 @@ impl<'a> Parser<'a> { self.expected("Expected Token::Word after AT", tok) } } - Keyword::OVERLAPS => { - Ok(Expr::BinaryOp { - left: Box::new(expr), - op: BinaryOperator::Overlaps, - right: Box::new(self.parse_subexpr(Self::BETWEEN_PREC)?), - }) - } + Keyword::OVERLAPS => Ok(Expr::BinaryOp { + left: Box::new(expr), + op: BinaryOperator::Overlaps, + right: Box::new(self.parse_subexpr(Self::BETWEEN_PREC)?), + }), Keyword::NOT | Keyword::IN | Keyword::BETWEEN @@ -2896,7 +2912,11 @@ impl<'a> Parser<'a> { } Token::DoubleColon => Ok(50), // ?:: is a try cast operator (e.g., Databricks) - Token::Placeholder(ref s) if s == "?" && self.peek_nth_token(1).token == Token::DoubleColon => Ok(50), + Token::Placeholder(ref s) + if s == "?" && self.peek_nth_token(1).token == Token::DoubleColon => + { + Ok(50) + } Token::Colon => Ok(50), Token::ExclamationMark => Ok(50), Token::Number(s, _) if s.starts_with(".") => Ok(50), @@ -3316,7 +3336,8 @@ impl<'a> Parser<'a> { // This pattern could be captured better with RAII type semantics, but it's quite a bit of // code to add for just one case, so we'll just do it manually here. let old_value = self.options.trailing_commas; - self.options.trailing_commas |= dialect_of!(self is BigQueryDialect | SnowflakeDialect | ClickHouseDialect); + self.options.trailing_commas |= + dialect_of!(self is BigQueryDialect | SnowflakeDialect | ClickHouseDialect); let ret = self.parse_comma_separated(|p| p.parse_select_item()); self.options.trailing_commas = old_value; @@ -4741,8 +4762,8 @@ impl<'a> Parser<'a> { ); }; // PostgreSQL supports CONCURRENTLY for DROP INDEX - let concurrently = object_type == ObjectType::Index - && self.parse_keyword(Keyword::CONCURRENTLY); + let concurrently = + object_type == ObjectType::Index && self.parse_keyword(Keyword::CONCURRENTLY); // Many dialects support the non standard `IF EXISTS` clause and allow // specifying multiple objects to delete in a single statement @@ -6602,6 +6623,7 @@ impl<'a> Parser<'a> { within_group: None, over: None, distinct: false, + approximate: false, null_treatment: None, special: true, order_by: vec![], @@ -8307,9 +8329,7 @@ impl<'a> Parser<'a> { /// /// ClickHouse EXPLAIN supports an optional type keyword (SYNTAX, AST, PLAN, PIPELINE, etc.) /// followed by optional key=value settings before the explained statement. - fn parse_explain_options( - &mut self, - ) -> Result<(Option, Vec), ParserError> { + fn parse_explain_options(&mut self) -> Result<(Option, Vec), ParserError> { // First, check for an optional EXPLAIN type identifier (e.g., SYNTAX, AST, PLAN). // These are non-keyword identifiers NOT followed by `=`. let explain_type = if let Token::Word(w) = &self.peek_token_ref().token { @@ -8655,9 +8675,7 @@ impl<'a> Parser<'a> { SetExpr::Values(self.parse_values(is_mysql)?) } else if self.parse_keyword(Keyword::TABLE) { SetExpr::Table(Box::new(self.parse_as_table()?)) - } else if self.dialect.supports_from_first_select() - && self.parse_keyword(Keyword::FROM) - { + } else if self.dialect.supports_from_first_select() && self.parse_keyword(Keyword::FROM) { // DuckDB FROM-first syntax: `FROM tbl` is equivalent to `SELECT * FROM tbl` SetExpr::Select(Box::new(self.parse_select_from_first()?)) } else { @@ -8951,8 +8969,9 @@ impl<'a> Parser<'a> { Ok(Select { distinct: None, top: None, - projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions::default()) - .empty_span()], + projection: vec![ + SelectItem::Wildcard(WildcardAdditionalOptions::default()).empty_span() + ], into: None, from, lateral_views: vec![], @@ -9841,8 +9860,7 @@ impl<'a> Parser<'a> { | TableFactor::NestedJoin { alias, .. } => { *alias = Some(prefix_alias); } - TableFactor::Pivot { alias, .. } - | TableFactor::Unpivot { alias, .. } => { + TableFactor::Pivot { alias, .. } | TableFactor::Unpivot { alias, .. } => { *alias = Some(prefix_alias); } TableFactor::TableSample { .. } @@ -10241,10 +10259,7 @@ impl<'a> Parser<'a> { _ => None, }; let name = self.parse_identifier(false).map(WithSpan::unwrap)?; - Ok(Grantee { - grantee_type, - name, - }) + Ok(Grantee { grantee_type, name }) } pub fn parse_grant_revoke_privileges_objects( @@ -10893,7 +10908,9 @@ impl<'a> Parser<'a> { }; // ClickHouse COLUMNS('pattern') APPLY(func) ... syntax - if dialect_of!(self is ClickHouseDialect | GenericDialect) && self.is_columns_function_call(&expr) { + if dialect_of!(self is ClickHouseDialect | GenericDialect) + && self.is_columns_function_call(&expr) + { let transformers = self.parse_column_transformers()?; if !transformers.is_empty() { return Ok(SelectItem::ColumnsWithTransformers { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 767a6ca96f..352ba05309 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -845,11 +845,9 @@ impl<'a> Tokenizer<'a> { } // numbers and period '0'..='9' | '.' => { - let numeric_underscore = - self.dialect.supports_numeric_literal_underscores(); - let digit_pred = |ch: char| { - ch.is_ascii_digit() || (numeric_underscore && ch == '_') - }; + let numeric_underscore = self.dialect.supports_numeric_literal_underscores(); + let digit_pred = + |ch: char| ch.is_ascii_digit() || (numeric_underscore && ch == '_'); let mut s = peeking_take_while(chars, digit_pred); @@ -896,8 +894,7 @@ impl<'a> Tokenizer<'a> { for _ in 0..exponent_part.len() { chars.next(); } - exponent_part += - &peeking_take_while(chars, digit_pred); + exponent_part += &peeking_take_while(chars, digit_pred); s += exponent_part.as_str(); } // Not an exponent, discard the work done diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index a51a910845..07c0881d05 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1231,6 +1231,7 @@ fn parse_map_access_offset() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -1345,6 +1346,7 @@ fn test_select_json_field() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -1361,6 +1363,7 @@ fn test_select_json_field() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -1644,7 +1647,6 @@ fn parse_wildcard_table() { "SELECT * FROM `project.dataset.table_prefix*`", "SELECT * FROM `project`.`dataset`.`table_prefix*`", ); - bigquery().verified_stmt( - "SELECT * FROM x.y* WHERE _TABLE_SUFFIX BETWEEN '20230101' AND '20231231'", - ); + bigquery() + .verified_stmt("SELECT * FROM x.y* WHERE _TABLE_SUFFIX BETWEEN '20230101' AND '20231231'"); } diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 745eae781a..2c7c3b7e0b 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -57,6 +57,7 @@ fn parse_array_access_expr() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -106,6 +107,7 @@ fn parse_array_access_expr() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -167,6 +169,7 @@ fn parse_array_fn() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -268,6 +271,7 @@ fn parse_delimited_identifiers() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -590,9 +594,7 @@ fn parse_alter_table_attach_and_detach_partition() { #[test] fn parse_alter_table_drop_partition_and_part() { // DROP PART 'part_name' - match clickhouse_and_generic() - .verified_stmt("ALTER TABLE mt DROP PART 'all_4_4_0'") - { + match clickhouse_and_generic().verified_stmt("ALTER TABLE mt DROP PART 'all_4_4_0'") { Statement::AlterTable { name, operations, .. } => { @@ -654,6 +656,7 @@ fn parse_create_table_with_variant_default_expressions() { null_treatment: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -680,6 +683,7 @@ fn parse_create_table_with_variant_default_expressions() { null_treatment: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -722,6 +726,7 @@ fn parse_create_table_with_variant_default_expressions() { null_treatment: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -1544,10 +1549,7 @@ fn parse_explain_with_options() { #[test] fn test_clickhouse_trailing_commas() { // ClickHouse supports trailing commas in SELECT - clickhouse().one_statement_parses_to( - "SELECT 1, 2, FROM t", - "SELECT 1, 2 FROM t", - ); + clickhouse().one_statement_parses_to("SELECT 1, 2, FROM t", "SELECT 1, 2 FROM t"); // Trailing comma with FORMAT clause clickhouse().one_statement_parses_to( "SELECT (number, toDate('2019-05-20')), dictGetOrNull('range_key_dictionary', 'value', number, toDate('2019-05-20')), FROM system.numbers LIMIT 5 FORMAT TabSeparated", @@ -1570,9 +1572,7 @@ fn parse_columns_with_apply_transformers() { ); // Single APPLY - clickhouse().verified_stmt( - "SELECT COLUMNS('[jk]') APPLY(toString) FROM columns_transformers", - ); + clickhouse().verified_stmt("SELECT COLUMNS('[jk]') APPLY(toString) FROM columns_transformers"); // Verify AST structure let sql = "SELECT COLUMNS('[jk]') APPLY(toString) FROM columns_transformers"; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index d8be943a13..985dd07826 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -924,6 +924,7 @@ fn parse_select_count_wildcard() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -949,6 +950,7 @@ fn parse_select_count_distinct() { parameters: None, over: None, distinct: true, + approximate: false, special: false, order_by: vec![], limit: None, @@ -2021,6 +2023,7 @@ fn parse_select_having() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -2061,6 +2064,7 @@ fn parse_select_qualify() { window_frame: None, })), distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -2417,6 +2421,7 @@ fn parse_listagg() { within_group: Some(within_group), over: None, distinct: true, + approximate: false, special: false, order_by: vec![], limit: None, @@ -3794,6 +3799,7 @@ fn parse_scalar_function_in_projection() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -3922,6 +3928,7 @@ fn parse_named_argument_function() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -3968,6 +3975,7 @@ fn parse_window_functions() { window_frame: None, })), distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -4017,6 +4025,7 @@ fn test_parse_named_window() { .empty_span(), )), distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -4056,6 +4065,7 @@ fn test_parse_named_window() { .empty_span(), )), distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -4575,6 +4585,7 @@ fn parse_at_timezone() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -4607,6 +4618,7 @@ fn parse_at_timezone() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -4623,6 +4635,7 @@ fn parse_at_timezone() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -4789,6 +4802,7 @@ fn parse_table_function() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -4939,6 +4953,7 @@ fn parse_unnest_in_from_clause() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -4973,6 +4988,7 @@ fn parse_unnest_in_from_clause() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -4989,6 +5005,7 @@ fn parse_unnest_in_from_clause() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -7697,6 +7714,7 @@ fn parse_time_functions() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -8195,6 +8213,7 @@ fn parse_pivot_table() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -8283,6 +8302,7 @@ fn parse_pivot_table_aliases() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -8303,6 +8323,7 @@ fn parse_pivot_table_aliases() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -8377,6 +8398,7 @@ fn parse_within_group() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -8517,6 +8539,7 @@ fn parse_pivot_unpivot_table() { within_group: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 656a1fe7b3..bea03015b6 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -302,14 +302,9 @@ fn test_json_path_with_colon() { #[test] fn test_try_cast_operator() { // ?:: is a try cast operator in Databricks, equivalent to TRY_CAST - databricks().one_statement_parses_to( - "SELECT '20'?::INTEGER", - "SELECT TRY_CAST('20' AS INTEGER)", - ); + databricks() + .one_statement_parses_to("SELECT '20'?::INTEGER", "SELECT TRY_CAST('20' AS INTEGER)"); // Chaining with regular cast - databricks().one_statement_parses_to( - "SELECT col?::VARCHAR", - "SELECT TRY_CAST(col AS VARCHAR)", - ); + databricks().one_statement_parses_to("SELECT col?::VARCHAR", "SELECT TRY_CAST(col AS VARCHAR)"); } diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 71566d0232..647e33ca19 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -303,18 +303,10 @@ fn test_select_union_by_name() { #[test] fn test_numeric_literal_underscores() { // Underscores in numeric literals are stripped during tokenization - duckdb_and_generic().one_statement_parses_to( - "SELECT 1_000_000", - "SELECT 1000000", - ); - duckdb_and_generic().one_statement_parses_to( - "SELECT 1_2E+1_0::FLOAT", - "SELECT CAST(12E+10 AS FLOAT)", - ); - duckdb_and_generic().one_statement_parses_to( - "SELECT 1_000.50_0", - "SELECT 1000.500", - ); + duckdb_and_generic().one_statement_parses_to("SELECT 1_000_000", "SELECT 1000000"); + duckdb_and_generic() + .one_statement_parses_to("SELECT 1_2E+1_0::FLOAT", "SELECT CAST(12E+10 AS FLOAT)"); + duckdb_and_generic().one_statement_parses_to("SELECT 1_000.50_0", "SELECT 1000.500"); } #[test] @@ -346,18 +338,12 @@ fn test_prefix_alias_colon_from() { #[test] fn test_prefix_alias_colon_from_simple() { - duckdb().one_statement_parses_to( - "SELECT * FROM foo: bar", - "SELECT * FROM bar AS foo", - ); + duckdb().one_statement_parses_to("SELECT * FROM foo: bar", "SELECT * FROM bar AS foo"); } #[test] fn test_prefix_alias_colon_from_function() { - duckdb().one_statement_parses_to( - "SELECT * FROM r: range(10)", - "SELECT * FROM range(10) AS r", - ); + duckdb().one_statement_parses_to("SELECT * FROM r: range(10)", "SELECT * FROM range(10) AS r"); } #[test] @@ -390,10 +376,7 @@ fn test_from_first_subquery() { #[test] fn test_from_first_with_where() { - duckdb().one_statement_parses_to( - "FROM tbl WHERE x > 1", - "SELECT * FROM tbl WHERE x > 1", - ); + duckdb().one_statement_parses_to("FROM tbl WHERE x > 1", "SELECT * FROM tbl WHERE x > 1"); } #[test] diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index b01496f3d3..e610ee25c2 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -352,6 +352,7 @@ fn parse_delimited_identifiers() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index b3df181a69..859f8501f6 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -350,6 +350,7 @@ fn parse_delimited_identifiers() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index deaeeaf7fa..a85cd96aa7 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1258,6 +1258,7 @@ fn parse_insert_with_on_duplicate_update() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -1276,6 +1277,7 @@ fn parse_insert_with_on_duplicate_update() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -1294,6 +1296,7 @@ fn parse_insert_with_on_duplicate_update() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -1312,6 +1315,7 @@ fn parse_insert_with_on_duplicate_update() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -1330,6 +1334,7 @@ fn parse_insert_with_on_duplicate_update() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -1708,6 +1713,7 @@ fn parse_table_colum_option_on_update() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 663481cbbc..627a1276de 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2501,6 +2501,7 @@ fn test_composite_value() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -2669,6 +2670,7 @@ fn parse_current_functions() { parameters: None, over: None, distinct: false, + approximate: false, special: true, order_by: vec![], limit: None, @@ -2685,6 +2687,7 @@ fn parse_current_functions() { parameters: None, over: None, distinct: false, + approximate: false, special: true, order_by: vec![], limit: None, @@ -2701,6 +2704,7 @@ fn parse_current_functions() { parameters: None, over: None, distinct: false, + approximate: false, special: true, order_by: vec![], limit: None, @@ -2717,6 +2721,7 @@ fn parse_current_functions() { parameters: None, over: None, distinct: false, + approximate: false, special: true, order_by: vec![], limit: None, @@ -3196,6 +3201,7 @@ fn parse_delimited_identifiers() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 482d529073..20b2d60514 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -158,6 +158,7 @@ fn parse_delimited_identifiers() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, @@ -507,10 +508,7 @@ fn test_grant_with_group_grantee() { // Verify AST structure match redshift().verified_stmt("GRANT ALL ON SCHEMA qa_tickit TO GROUP qa_users") { - Statement::Grant { - grantees, - .. - } => { + Statement::Grant { grantees, .. } => { assert_eq!(1, grantees.len()); assert_eq!(Some(GranteesType::Group), grantees[0].grantee_type); assert_eq!("qa_users", grantees[0].name.value); @@ -522,8 +520,7 @@ fn test_grant_with_group_grantee() { #[test] fn test_revoke_with_group_grantee() { // With explicit RESTRICT - redshift() - .verified_stmt("REVOKE ALL ON SCHEMA qa_tickit FROM GROUP qa_users RESTRICT"); + redshift().verified_stmt("REVOKE ALL ON SCHEMA qa_tickit FROM GROUP qa_users RESTRICT"); // Without CASCADE/RESTRICT (defaults to RESTRICT in output) redshift().one_statement_parses_to( @@ -540,3 +537,18 @@ fn test_minus_as_except() { "SELECT foo, bar FROM table_1 EXCEPT SELECT foo, bar FROM table_2", ); } + +#[test] +fn test_approximate_count() { + // Redshift supports APPROXIMATE COUNT(DISTINCT x) for approximate count distinct + let sql = "SELECT APPROXIMATE COUNT(DISTINCT y)"; + let select = redshift().verified_only_select(sql); + match expr_from_projection(&select.projection[0]) { + Expr::Function(func) => { + assert!(func.approximate); + assert!(func.distinct); + assert_eq!(func.name.to_string(), "COUNT"); + } + _ => panic!("Expected Function"), + } +} diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 2c53920c96..f4c33b95fb 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -291,6 +291,7 @@ fn parse_delimited_identifiers() { parameters: None, over: None, distinct: false, + approximate: false, special: false, order_by: vec![], limit: None, From cc73920187a58da817eebd42a8b87417965a179b Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 04:26:19 +0100 Subject: [PATCH 36/81] snowflake: unwrap parenthesized function before attaching OVER clause When a function call is wrapped in parentheses and followed by OVER, e.g. (BOOLOR_AGG(col)) OVER (...), strip the nesting and attach OVER to the inner function. --- src/parser/mod.rs | 20 ++++++++++++++++++++ tests/sqlparser_snowflake.rs | 14 ++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0983eb1d87..f9ad675acb 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1029,6 +1029,26 @@ impl<'a> Parser<'a> { } }; self.expect_token(&Token::RParen)?; + // If the nested expression is a function without an OVER clause, + // and the next token is OVER, unwrap the nesting and attach + // the OVER clause to the function. This handles patterns like: + // (BOOLOR_AGG(col)) OVER (PARTITION BY x) + let expr = match expr { + Expr::Nested(inner) if matches!(inner.as_ref(), Expr::Function(f) if f.over.is_none()) => { + if let Expr::Function(mut func) = *inner { + let over = self.parse_over()?; + if over.is_some() { + func.over = over; + Expr::Function(func) + } else { + Expr::Nested(Box::new(Expr::Function(func))) + } + } else { + unreachable!() + } + } + _ => expr, + }; if !self.consume_token(&Token::Period) { Ok(expr) } else { diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index f4c33b95fb..e07e207d8c 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1740,3 +1740,17 @@ fn parse_column_comment_after_masking_policy() { "CREATE TABLE t1 (col1 VARCHAR COMMENT 'description')", ); } + +#[test] +fn test_not_aggregate_over_window() { + // Parenthesized function with OVER + snowflake().one_statement_parses_to( + "SELECT (BOOLOR_AGG(col_22)) OVER (PARTITION BY col_1) FROM t", + "SELECT BOOLOR_AGG(col_22) OVER (PARTITION BY col_1) FROM t", + ); + // NOT applied to aggregate with OVER window clause + snowflake().one_statement_parses_to( + "SELECT NOT (BOOLOR_AGG(col_22)) OVER (PARTITION BY col_1) AS IS_MAGIC_RIDE FROM t", + "SELECT NOT BOOLOR_AGG(col_22) OVER (PARTITION BY col_1) AS IS_MAGIC_RIDE FROM t", + ); +} From 77dfd3d5495de02de4319ef6e94868c77247b221 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 04:31:34 +0100 Subject: [PATCH 37/81] snowflake: support SHOW COLUMNS IN [TABLE|VIEW] syntax Extended ShowColumns with show_in and show_object_kind fields to roundtrip Snowflake's SHOW COLUMNS IN TABLE t form alongside MySQL's SHOW COLUMNS FROM t. --- src/ast/mod.rs | 13 ++++++++++++- src/parser/mod.rs | 9 ++++++++- tests/sqlparser_mysql.rs | 16 ++++++++++++++-- tests/sqlparser_snowflake.rs | 12 ++++++++++++ 4 files changed, 46 insertions(+), 4 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 73b84ad301..ed5df607b0 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2081,6 +2081,10 @@ pub enum Statement { ShowColumns { extended: bool, full: bool, + /// Whether `IN` was used instead of `FROM` (Snowflake uses `IN`) + show_in: bool, + /// Optional object kind keyword after IN/FROM (e.g., TABLE, VIEW) + show_object_kind: Option, #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] table_name: ObjectName, filter: Option, @@ -3631,14 +3635,21 @@ impl fmt::Display for Statement { Statement::ShowColumns { extended, full, + show_in, + show_object_kind, table_name, filter, } => { write!( f, - "SHOW {extended}{full}COLUMNS FROM {table_name}", + "SHOW {extended}{full}COLUMNS {in_or_from}{object_kind}{table_name}", extended = if *extended { "EXTENDED " } else { "" }, full = if *full { "FULL " } else { "" }, + in_or_from = if *show_in { "IN " } else { "FROM " }, + object_kind = match show_object_kind { + Some(kw) => format!("{kw} "), + None => String::new(), + }, table_name = table_name, )?; if let Some(filter) = filter { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f9ad675acb..3337a8b75d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9278,7 +9278,12 @@ impl<'a> Parser<'a> { extended: bool, full: bool, ) -> Result { - self.expect_one_of_keywords(&[Keyword::FROM, Keyword::IN])?; + let show_in = + self.expect_one_of_keywords(&[Keyword::FROM, Keyword::IN])? == Keyword::IN; + // Optionally consume TABLE or VIEW keyword (Snowflake: SHOW COLUMNS IN TABLE ) + let show_object_kind = + self.parse_one_of_keywords(&[Keyword::TABLE, Keyword::VIEW]) + .map(|kw| Ident::new(format!("{kw:?}"))); let object_name = self.parse_object_name(false)?; let table_name = match self.parse_one_of_keywords(&[Keyword::FROM, Keyword::IN]) { Some(_) => { @@ -9293,6 +9298,8 @@ impl<'a> Parser<'a> { Ok(Statement::ShowColumns { extended, full, + show_in, + show_object_kind, table_name, filter, }) diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index a85cd96aa7..3fd0dc7aa4 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -54,6 +54,8 @@ fn parse_show_columns() { Statement::ShowColumns { extended: false, full: false, + show_in: false, + show_object_kind: None, table_name: table_name.clone(), filter: None, } @@ -63,6 +65,8 @@ fn parse_show_columns() { Statement::ShowColumns { extended: false, full: false, + show_in: false, + show_object_kind: None, table_name: ObjectName(vec![Ident::new("mydb"), Ident::new("mytable")]), filter: None, } @@ -72,6 +76,8 @@ fn parse_show_columns() { Statement::ShowColumns { extended: true, full: false, + show_in: false, + show_object_kind: None, table_name: table_name.clone(), filter: None, } @@ -81,6 +87,8 @@ fn parse_show_columns() { Statement::ShowColumns { extended: false, full: true, + show_in: false, + show_object_kind: None, table_name: table_name.clone(), filter: None, } @@ -90,6 +98,8 @@ fn parse_show_columns() { Statement::ShowColumns { extended: false, full: false, + show_in: false, + show_object_kind: None, table_name: table_name.clone(), filter: Some(ShowStatementFilter::Like("pattern".into())), } @@ -99,6 +109,8 @@ fn parse_show_columns() { Statement::ShowColumns { extended: false, full: false, + show_in: false, + show_object_kind: None, table_name, filter: Some(ShowStatementFilter::Where( mysql_and_generic().verified_expr("1 = 2") @@ -108,9 +120,9 @@ fn parse_show_columns() { mysql_and_generic() .one_statement_parses_to("SHOW FIELDS FROM mytable", "SHOW COLUMNS FROM mytable"); mysql_and_generic() - .one_statement_parses_to("SHOW COLUMNS IN mytable", "SHOW COLUMNS FROM mytable"); + .one_statement_parses_to("SHOW COLUMNS IN mytable", "SHOW COLUMNS IN mytable"); mysql_and_generic() - .one_statement_parses_to("SHOW FIELDS IN mytable", "SHOW COLUMNS FROM mytable"); + .one_statement_parses_to("SHOW FIELDS IN mytable", "SHOW COLUMNS IN mytable"); mysql_and_generic().one_statement_parses_to( "SHOW COLUMNS FROM mytable FROM mydb", "SHOW COLUMNS FROM mydb.mytable", diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index e07e207d8c..e9dbe6b92f 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1754,3 +1754,15 @@ fn test_not_aggregate_over_window() { "SELECT NOT BOOLOR_AGG(col_22) OVER (PARTITION BY col_1) AS IS_MAGIC_RIDE FROM t", ); } + +#[test] +fn test_show_columns_in_table() { + // Basic SHOW COLUMNS IN TABLE + snowflake().verified_stmt("SHOW COLUMNS IN TABLE test_show_columns"); + // Quoted schema with unquoted table + snowflake().verified_stmt(r#"SHOW COLUMNS IN TABLE "sch_1".tbl_1"#); + // Fully qualified name + snowflake().verified_stmt(r#"SHOW COLUMNS IN TABLE "db"."schema".my_table"#); + // SHOW COLUMNS IN VIEW + snowflake().verified_stmt("SHOW COLUMNS IN VIEW my_view"); +} From 8e38a31fe6a9719aaef986859d807a15f82d0639 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 04:37:26 +0100 Subject: [PATCH 38/81] snowflake: generalize DESCRIBE to support multiple object types Replaced ExplainTable's has_table_word: bool with object_type: Option, allowing DESC/DESCRIBE with DATABASE, WAREHOUSE, SEQUENCE, STREAM, VIEW, SCHEMA. --- src/ast/mod.rs | 10 +++++----- src/keywords.rs | 2 ++ src/parser/mod.rs | 16 ++++++++++++++-- tests/sqlparser_common.rs | 2 +- tests/sqlparser_snowflake.rs | 19 +++++++++++++++++++ 5 files changed, 41 insertions(+), 8 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index ed5df607b0..e3f9d68639 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2316,8 +2316,8 @@ pub enum Statement { /// Table name #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] table_name: ObjectName, - /// EXPLAIN TABLE - has_table_word: bool, + /// Optional object type keyword (TABLE, DATABASE, WAREHOUSE, SEQUENCE, STREAM, FUNCTION, VIEW, etc.) + object_type: Option, /// Optional output format (ClickHouse) format: Option, }, @@ -2462,7 +2462,7 @@ impl fmt::Display for Statement { Statement::ExplainTable { describe_alias, table_name, - has_table_word, + object_type, format, } => { if *describe_alias { @@ -2470,8 +2470,8 @@ impl fmt::Display for Statement { } else { write!(f, "EXPLAIN ")?; } - if *has_table_word { - write!(f, "TABLE ")?; + if let Some(obj_type) = object_type { + write!(f, "{obj_type} ")?; } write!(f, "{table_name}")?; diff --git a/src/keywords.rs b/src/keywords.rs index a7c132f940..eb73c84a6c 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -671,6 +671,7 @@ define_keywords!( STORAGE_INTEGRATION, STORED, STRAIGHT_JOIN, + STREAM, STRICT, STRING, STRUCT, @@ -772,6 +773,7 @@ define_keywords!( VIEW, VIRTUAL, VOLATILE, + WAREHOUSE, WEEK, WEEKS, WHEN, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3337a8b75d..47776171c1 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8325,7 +8325,19 @@ impl<'a> Parser<'a> { return Ok(Statement::DescribeHistory { table_name }); } - let has_table_word = self.parse_keyword(Keyword::TABLE); + // Parse optional object type: TABLE, DATABASE, WAREHOUSE, SEQUENCE, STREAM, FUNCTION, VIEW, SCHEMA + let object_type = self + .parse_one_of_keywords(&[ + Keyword::TABLE, + Keyword::DATABASE, + Keyword::WAREHOUSE, + Keyword::SEQUENCE, + Keyword::STREAM, + Keyword::FUNCTION, + Keyword::VIEW, + Keyword::SCHEMA, + ]) + .map(|kw| Ident::new(format!("{kw:?}"))); let table_name = self.parse_object_name(false)?; // ClickHouse: DESCRIBE TABLE tab FORMAT Vertical @@ -8337,7 +8349,7 @@ impl<'a> Parser<'a> { Ok(Statement::ExplainTable { describe_alias, - has_table_word, + object_type, table_name, format, }) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 985dd07826..94356fe5ce 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -3843,7 +3843,7 @@ fn parse_explain_table() { Statement::ExplainTable { describe_alias, table_name, - has_table_word: _, + object_type: _, format: _, } => { assert_eq!(describe_alias, expected_describe_alias); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index e9dbe6b92f..674c2a1dc2 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1601,6 +1601,25 @@ fn test_describe_table() { snowflake().verified_stmt(r#"DESCRIBE TABLE "DW_PROD"."SCH"."TBL""#); } +#[test] +fn test_describe_object_types() { + // DESCRIBE DATABASE + snowflake().verified_stmt("DESCRIBE DATABASE desc_demo"); + // DESCRIBE WAREHOUSE + snowflake().verified_stmt("DESCRIBE WAREHOUSE temporary_warehouse"); + // DESC SEQUENCE (DESC is alias for DESCRIBE) + snowflake().one_statement_parses_to( + "DESC SEQUENCE my_sequence", + "DESCRIBE SEQUENCE my_sequence", + ); + // DESC STREAM (DESC is alias for DESCRIBE) + snowflake().one_statement_parses_to("DESC STREAM mystream", "DESCRIBE STREAM mystream"); + // DESCRIBE VIEW + snowflake().verified_stmt("DESCRIBE VIEW db.table"); + // DESCRIBE SCHEMA + snowflake().verified_stmt("DESCRIBE SCHEMA my_schema"); +} + #[test] fn test_asof_join() { snowflake().verified_stmt("SELECT * FROM table1 ASOF JOIN table2 MATCH_CONDITION (table1.timestamp <= table2.timestamp) ON table1.id = table2.id"); From bbb0f8293f707f06355448ecbf0847e1a2f24eac Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 04:44:10 +0100 Subject: [PATCH 39/81] snowflake: support ALTER TABLE ADD/DROP ROW ACCESS POLICY Add AddRowAccessPolicy and DropRowAccessPolicy AlterTableOperation variants. Supports ADD ROW ACCESS POLICY ON (col, ...) and DROP ROW ACCESS POLICY. --- src/ast/ddl.rs | 25 ++++++++++++++++++++++ src/keywords.rs | 1 + src/parser/mod.rs | 18 ++++++++++++++-- tests/sqlparser_snowflake.rs | 40 ++++++++++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+), 2 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 74c022c731..58c9f7e509 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -128,6 +128,21 @@ pub enum AlterTableOperation { /// /// Note: this is BigQuery specific SetOptions { options: Vec }, + + /// `ADD ROW ACCESS POLICY ON (, ...)` + /// + /// Note: this is Snowflake specific + AddRowAccessPolicy { + policy: ObjectName, + on: Vec, + }, + + /// `DROP ROW ACCESS POLICY ` + /// + /// Note: this is Snowflake specific + DropRowAccessPolicy { + policy: ObjectName, + }, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -250,6 +265,16 @@ impl fmt::Display for AlterTableOperation { AlterTableOperation::SetOptions { options } => { write!(f, "SET OPTIONS({})", display_comma_separated(options)) } + AlterTableOperation::AddRowAccessPolicy { policy, on } => { + write!( + f, + "ADD ROW ACCESS POLICY {policy} ON ({})", + display_comma_separated(on) + ) + } + AlterTableOperation::DropRowAccessPolicy { policy } => { + write!(f, "DROP ROW ACCESS POLICY {policy}") + } } } } diff --git a/src/keywords.rs b/src/keywords.rs index eb73c84a6c..bc8c8c7fec 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -70,6 +70,7 @@ define_keywords!( ABORT, ABS, ABSOLUTE, + ACCESS, ACTION, ADD, ADMIN, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 47776171c1..e17f18e5ca 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6339,7 +6339,18 @@ impl<'a> Parser<'a> { pub fn parse_alter_table_operation(&mut self) -> Result { let operation = if self.parse_keyword(Keyword::ADD) { - if let Some(constraint) = self.parse_optional_table_constraint()? { + if self.parse_keywords(&[Keyword::ROW, Keyword::ACCESS, Keyword::POLICY]) { + let policy = self.parse_object_name(false)?; + self.expect_keyword(Keyword::ON)?; + self.expect_token(&Token::LParen)?; + let columns = + self.parse_comma_separated(|p| p.parse_identifier(false).map(|id| id.unwrap()))?; + self.expect_token(&Token::RParen)?; + AlterTableOperation::AddRowAccessPolicy { + policy, + on: columns, + } + } else if let Some(constraint) = self.parse_optional_table_constraint()? { AlterTableOperation::AddConstraint(constraint) } else { let if_not_exists = @@ -6391,7 +6402,10 @@ impl<'a> Parser<'a> { } } } else if self.parse_keyword(Keyword::DROP) { - if self.parse_keywords(&[Keyword::IF, Keyword::EXISTS, Keyword::PARTITION]) { + if self.parse_keywords(&[Keyword::ROW, Keyword::ACCESS, Keyword::POLICY]) { + let policy = self.parse_object_name(false)?; + AlterTableOperation::DropRowAccessPolicy { policy } + } else if self.parse_keywords(&[Keyword::IF, Keyword::EXISTS, Keyword::PARTITION]) { self.expect_token(&Token::LParen)?; let partitions = self.parse_comma_separated(Parser::parse_expr)?; self.expect_token(&Token::RParen)?; diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 674c2a1dc2..2dc0fefff3 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -588,6 +588,46 @@ fn test_alter_table_swap_with() { }; } +#[test] +fn test_alter_table_row_access_policy() { + // Single DROP ROW ACCESS POLICY + let sql = "ALTER TABLE t1 DROP ROW ACCESS POLICY rap_t1"; + snowflake_and_generic().verified_stmt(sql); + + // Single ADD ROW ACCESS POLICY + let sql = "ALTER TABLE t1 ADD ROW ACCESS POLICY rap_t1 ON (empl_id)"; + snowflake_and_generic().verified_stmt(sql); + + // Combined DROP + ADD (comma-separated operations) + let sql = "ALTER TABLE t1 DROP ROW ACCESS POLICY rap_t1_version_1, ADD ROW ACCESS POLICY rap_t1_version_2 ON (empl_id)"; + match snowflake_and_generic().verified_stmt(sql) { + Statement::AlterTable { + name, operations, .. + } => { + assert_eq!("t1", name.to_string()); + assert_eq!(2, operations.len()); + match &operations[0] { + AlterTableOperation::DropRowAccessPolicy { policy } => { + assert_eq!("rap_t1_version_1", policy.to_string()); + } + _ => unreachable!(), + } + match &operations[1] { + AlterTableOperation::AddRowAccessPolicy { policy, on } => { + assert_eq!("rap_t1_version_2", policy.to_string()); + assert_eq!(vec!["empl_id"], on.iter().map(|i| i.to_string()).collect::>()); + } + _ => unreachable!(), + } + } + _ => unreachable!(), + } + + // Multiple columns in ON clause + let sql = "ALTER TABLE t1 ADD ROW ACCESS POLICY rap ON (col1, col2, col3)"; + snowflake_and_generic().verified_stmt(sql); +} + #[test] fn test_drop_stage() { match snowflake_and_generic().verified_stmt("DROP STAGE s1") { From 963624cd1d9fce82fb918bee6281a78b3d62033d Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 04:53:32 +0100 Subject: [PATCH 40/81] snowflake: support CREATE TABLE USING TEMPLATE (query) Add using_template field to CreateTableBuilder for Snowflake's USING TEMPLATE (INFER_SCHEMA(...)) schema inference clause. --- src/ast/helpers/stmt_create_table.rs | 10 ++++++++++ src/ast/mod.rs | 9 ++++++++- src/keywords.rs | 1 + src/parser/mod.rs | 14 +++++++++++--- tests/sqlparser_snowflake.rs | 29 ++++++++++++++++++++++++++++ 5 files changed, 59 insertions(+), 4 deletions(-) diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index 7ac75c3e72..ea82f64410 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -86,6 +86,7 @@ pub struct CreateTableBuilder { pub table_ttl: Option, pub clickhouse_settings: Option>, pub using: Option, + pub using_template: Option>, pub copy_grants: bool, } @@ -131,6 +132,7 @@ impl CreateTableBuilder { table_ttl: None, clickhouse_settings: None, using: None, + using_template: None, copy_grants: false, } } @@ -322,6 +324,11 @@ impl CreateTableBuilder { self } + pub fn using_template(mut self, using_template: Option>) -> Self { + self.using_template = using_template; + self + } + pub fn copy_grants(mut self, copy_grants: bool) -> Self { self.copy_grants = copy_grants; self @@ -368,6 +375,7 @@ impl CreateTableBuilder { table_ttl: self.table_ttl, clickhouse_settings: self.clickhouse_settings, using: self.using, + using_template: self.using_template, copy_grants: self.copy_grants, } } @@ -420,6 +428,7 @@ impl TryFrom for CreateTableBuilder { table_ttl, clickhouse_settings, using, + using_template, copy_grants, } => Ok(Self { or_replace, @@ -461,6 +470,7 @@ impl TryFrom for CreateTableBuilder { table_ttl, clickhouse_settings, using, + using_template, copy_grants, }), _ => Err(ParserError::ParserError(format!( diff --git a/src/ast/mod.rs b/src/ast/mod.rs index e3f9d68639..6bcc4f2fdf 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1819,6 +1819,8 @@ pub enum Statement { clickhouse_settings: Option>, /// Databricks USING DELTA using: Option, + /// Snowflake USING TEMPLATE (query) + using_template: Option>, // Snowflake COPY GRANTS copy_grants: bool, }, @@ -3066,6 +3068,7 @@ impl fmt::Display for Statement { table_ttl, clickhouse_settings, using, + using_template, copy_grants, } => { // We want to allow the following options @@ -3112,7 +3115,7 @@ impl fmt::Display for Statement { } write!(f, "{}", display_comma_separated(projections))?; write!(f, ")")?; - } else if query.is_none() && like.is_none() && clone.is_none() { + } else if query.is_none() && like.is_none() && clone.is_none() && using_template.is_none() { // PostgreSQL allows `CREATE TABLE t ();`, but requires empty parens write!(f, " ()")?; } @@ -3137,6 +3140,10 @@ impl fmt::Display for Statement { write!(f, " USING {using}")?; } + if let Some(using_template) = using_template { + write!(f, " USING TEMPLATE {using_template}")?; + } + // Only for SQLite if *without_rowid { write!(f, " WITHOUT ROWID")?; diff --git a/src/keywords.rs b/src/keywords.rs index bc8c8c7fec..95e39dd009 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -696,6 +696,7 @@ define_keywords!( TAG, TBLPROPERTIES, TEMP, + TEMPLATE, TEMPORARY, TEXT, TEXTFILE, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e17f18e5ca..864fd9dca8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5196,10 +5196,17 @@ impl<'a> Parser<'a> { // parse optional column list (schema) let (columns, constraints, projections) = self.parse_columns()?; - let using = if self.parse_keyword(Keyword::USING) { - Some(self.parse_object_name(false)?) + let (using, using_template) = if self.parse_keyword(Keyword::USING) { + if self.parse_keyword(Keyword::TEMPLATE) { + // Snowflake: USING TEMPLATE (query_expr) + let expr = self.parse_expr()?; + (None, Some(Box::new(expr))) + } else { + // Databricks: USING DELTA + (Some(self.parse_object_name(false)?), None) + } } else { - None + (None, None) }; // Redshift allows specifying DISTSTYLE after column definitions @@ -5634,6 +5641,7 @@ impl<'a> Parser<'a> { .table_ttl(table_ttl) .clickhouse_settings(clickhouse_settings) .using(using) + .using_template(using_template) .table_options(table_options) .projections(projections) .copy_grants(copy_grants) diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 2dc0fefff3..4fb6dc3223 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1825,3 +1825,32 @@ fn test_show_columns_in_table() { // SHOW COLUMNS IN VIEW snowflake().verified_stmt("SHOW COLUMNS IN VIEW my_view"); } + +#[test] +fn test_snowflake_create_table_using_template() { + // Simple USING TEMPLATE with subquery + snowflake().verified_stmt( + "CREATE TABLE mytable USING TEMPLATE (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(*)) FROM TABLE(INFER_SCHEMA(LOCATION => '@mystage', FILE_FORMAT => 'my_parquet_format')))", + ); + + // Corpus example with WITHIN GROUP and OR REPLACE + snowflake().one_statement_parses_to( + "CREATE TABLE mytable USING TEMPLATE (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(*)) WITHIN GROUP (ORDER BY order_id) FROM TABLE(INFER_SCHEMA(LOCATION=>'@mystage', FILE_FORMAT=>'my_parquet_format')))", + "CREATE TABLE mytable USING TEMPLATE (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(*)) WITHIN GROUP (ORDER BY order_id) FROM TABLE(INFER_SCHEMA(LOCATION => '@mystage', FILE_FORMAT => 'my_parquet_format')))", + ); + + // Verify the AST fields + match snowflake().verified_stmt( + "CREATE TABLE mytable USING TEMPLATE (SELECT ARRAY_AGG(OBJECT_CONSTRUCT(*)) FROM TABLE(INFER_SCHEMA(LOCATION => '@mystage', FILE_FORMAT => 'my_parquet_format')))", + ) { + Statement::CreateTable { + name, + using_template, + .. + } => { + assert_eq!("mytable", name.to_string()); + assert!(using_template.is_some()); + } + _ => unreachable!(), + } +} From 5e4ba12eaa20bac1e33369f34cc01bd1e31e0470 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 05:03:52 +0100 Subject: [PATCH 41/81] snowflake: support DESCRIBE FUNCTION with parameter type signatures Add DescribeFunctionParam enum (DataType | Table) and function_params field to ExplainTable for DESCRIBE FUNCTION name(type1, TABLE(type)) syntax. --- src/ast/mod.rs | 30 +++++++++++++++++++++++ src/parser/mod.rs | 46 ++++++++++++++++++++++++++---------- tests/sqlparser_common.rs | 1 + tests/sqlparser_snowflake.rs | 7 ++++++ 4 files changed, 72 insertions(+), 12 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 6bcc4f2fdf..4823584ae6 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2320,6 +2320,8 @@ pub enum Statement { table_name: ObjectName, /// Optional object type keyword (TABLE, DATABASE, WAREHOUSE, SEQUENCE, STREAM, FUNCTION, VIEW, etc.) object_type: Option, + /// Optional function parameter types for DESCRIBE FUNCTION name(type1, type2, ...) + function_params: Option>, /// Optional output format (ClickHouse) format: Option, }, @@ -2465,6 +2467,7 @@ impl fmt::Display for Statement { describe_alias, table_name, object_type, + function_params, format, } => { if *describe_alias { @@ -2478,6 +2481,10 @@ impl fmt::Display for Statement { write!(f, "{table_name}")?; + if let Some(params) = function_params { + write!(f, "({})", display_comma_separated(params))?; + } + if let Some(format) = format { write!(f, " FORMAT {format}")?; } @@ -4827,6 +4834,29 @@ impl fmt::Display for KillType { } } +/// A parameter type in DESCRIBE FUNCTION name(param1, param2, ...) +/// Supports both simple data types and TABLE(type, type, ...) notation +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum DescribeFunctionParam { + /// A simple data type (e.g., VARCHAR, NUMBER) + DataType(DataType), + /// TABLE(type, type, ...) notation for table-valued parameters + Table(Vec), +} + +impl fmt::Display for DescribeFunctionParam { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + DescribeFunctionParam::DataType(dt) => write!(f, "{dt}"), + DescribeFunctionParam::Table(types) => { + write!(f, "TABLE({})", display_comma_separated(types)) + } + } + } +} + #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 864fd9dca8..5abb862f88 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8348,20 +8348,41 @@ impl<'a> Parser<'a> { } // Parse optional object type: TABLE, DATABASE, WAREHOUSE, SEQUENCE, STREAM, FUNCTION, VIEW, SCHEMA - let object_type = self - .parse_one_of_keywords(&[ - Keyword::TABLE, - Keyword::DATABASE, - Keyword::WAREHOUSE, - Keyword::SEQUENCE, - Keyword::STREAM, - Keyword::FUNCTION, - Keyword::VIEW, - Keyword::SCHEMA, - ]) - .map(|kw| Ident::new(format!("{kw:?}"))); + let object_type_kw = self.parse_one_of_keywords(&[ + Keyword::TABLE, + Keyword::DATABASE, + Keyword::WAREHOUSE, + Keyword::SEQUENCE, + Keyword::STREAM, + Keyword::FUNCTION, + Keyword::VIEW, + Keyword::SCHEMA, + ]); + let object_type = object_type_kw.map(|kw| Ident::new(format!("{kw:?}"))); let table_name = self.parse_object_name(false)?; + // DESCRIBE FUNCTION name(type1, type2, ...) - parse function signature + // Supports TABLE(type, ...) parameter types for DMFs + let function_params = + if object_type_kw == Some(Keyword::FUNCTION) && self.peek_token().token == Token::LParen { + self.expect_token(&Token::LParen)?; + let params = self.parse_comma_separated(|p| { + // Handle TABLE(type, type, ...) parameter type notation + if p.parse_keyword(Keyword::TABLE) { + p.expect_token(&Token::LParen)?; + let inner_types = p.parse_comma_separated(|p2| p2.parse_data_type())?; + p.expect_token(&Token::RParen)?; + Ok(DescribeFunctionParam::Table(inner_types)) + } else { + Ok(DescribeFunctionParam::DataType(p.parse_data_type()?)) + } + })?; + self.expect_token(&Token::RParen)?; + Some(params) + } else { + None + }; + // ClickHouse: DESCRIBE TABLE tab FORMAT Vertical let format = if self.parse_keyword(Keyword::FORMAT) { Some(self.parse_identifier(false)?.unwrap()) @@ -8372,6 +8393,7 @@ impl<'a> Parser<'a> { Ok(Statement::ExplainTable { describe_alias, object_type, + function_params, table_name, format, }) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 94356fe5ce..3002486894 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -3844,6 +3844,7 @@ fn parse_explain_table() { describe_alias, table_name, object_type: _, + function_params: _, format: _, } => { assert_eq!(describe_alias, expected_describe_alias); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 4fb6dc3223..9781158c3e 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1658,6 +1658,13 @@ fn test_describe_object_types() { snowflake().verified_stmt("DESCRIBE VIEW db.table"); // DESCRIBE SCHEMA snowflake().verified_stmt("DESCRIBE SCHEMA my_schema"); + // DESCRIBE FUNCTION with parameter types + snowflake().verified_stmt("DESCRIBE FUNCTION my_echo_udf(VARCHAR)"); + // DESC FUNCTION with TABLE parameter type + snowflake().one_statement_parses_to( + "DESC FUNCTION governance.dmfs.count_positive_numbers(TABLE(NUMBER, NUMBER, NUMBER))", + "DESCRIBE FUNCTION governance.dmfs.count_positive_numbers(TABLE(NUMBER, NUMBER, NUMBER))", + ); } #[test] From 2821597fb38f352a5fff1d92aa48f5ca3a06b643 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 05:12:07 +0100 Subject: [PATCH 42/81] snowflake: preserve object type keyword in GRANT/REVOKE roundtrip Change GrantObjects::Tables to carry optional object_type so VIEW, DATABASE, FUNCTION, ROLE keywords are preserved in display output. --- src/ast/mod.rs | 19 +++++++++++++++---- src/parser/mod.rs | 12 ++++++++---- tests/sqlparser_common.rs | 4 ++-- tests/sqlparser_redshift.rs | 5 ++--- tests/sqlparser_snowflake.rs | 13 +++++++++++++ 5 files changed, 40 insertions(+), 13 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 4823584ae6..2468d90297 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -4473,8 +4473,12 @@ pub enum GrantObjects { Schemas(Vec), /// Grant privileges on specific sequences Sequences(Vec), - /// Grant privileges on specific tables - Tables(Vec), + /// Grant privileges on specific tables (or other object types like VIEW, DATABASE, FUNCTION, ROLE) + Tables { + tables: Vec, + /// Optional object type keyword (e.g. "TABLE", "VIEW", "DATABASE", "FUNCTION", "ROLE") + object_type: Option, + }, } impl fmt::Display for GrantObjects { @@ -4486,8 +4490,15 @@ impl fmt::Display for GrantObjects { GrantObjects::Schemas(schemas) => { write!(f, "SCHEMA {}", display_comma_separated(schemas)) } - GrantObjects::Tables(tables) => { - write!(f, "{}", display_comma_separated(tables)) + GrantObjects::Tables { + tables, + object_type, + } => { + if let Some(kw) = object_type { + write!(f, "{kw} {}", display_comma_separated(tables)) + } else { + write!(f, "{}", display_comma_separated(tables)) + } } GrantObjects::AllSequencesInSchema { schemas } => { write!( diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 5abb862f88..719ead7e24 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10441,10 +10441,14 @@ impl<'a> Parser<'a> { match object_type { Some(Keyword::SCHEMA) => GrantObjects::Schemas(objects?), Some(Keyword::SEQUENCE) => GrantObjects::Sequences(objects?), - Some(Keyword::TABLE) | None => GrantObjects::Tables(objects?), - // Treat other object types (DATABASE, FUNCTION, VIEW, ROLE) as Tables - // since we don't have specific GrantObjects variants for them - Some(_) => GrantObjects::Tables(objects?), + None => GrantObjects::Tables { + tables: objects?, + object_type: None, + }, + Some(kw) => GrantObjects::Tables { + tables: objects?, + object_type: Some(format!("{kw:?}")), + }, } }; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 3002486894..298f5fd73f 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -7050,7 +7050,7 @@ fn parse_grant() { granted_by, .. } => match (privileges, objects) { - (Privileges::Actions(actions), GrantObjects::Tables(objects)) => { + (Privileges::Actions(actions), GrantObjects::Tables { tables: objects, .. }) => { assert_eq!( vec![ Action::Select { columns: None }, @@ -7182,7 +7182,7 @@ fn test_revoke() { match verified_stmt(sql) { Statement::Revoke { privileges, - objects: GrantObjects::Tables(tables), + objects: GrantObjects::Tables { tables, .. }, grantees, cascade, granted_by, diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 20b2d60514..82010a0609 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -489,10 +489,9 @@ fn test_grant_with_group_grantee() { // GRANT ... TO GROUP name redshift().verified_stmt("GRANT ALL ON SCHEMA qa_tickit TO GROUP qa_users"); - // GRANT ... TO multiple GROUP grantees (ON TABLE serializes as ON) - redshift().one_statement_parses_to( + // GRANT ... TO multiple GROUP grantees with TABLE keyword + redshift().verified_stmt( "GRANT ALL ON TABLE qa_tickit.sales TO GROUP qa_users, GROUP ro_users", - "GRANT ALL ON qa_tickit.sales TO GROUP qa_users, GROUP ro_users", ); // GRANT with column-level privileges diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 9781158c3e..603c46625c 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1861,3 +1861,16 @@ fn test_snowflake_create_table_using_template() { _ => unreachable!(), } } + +#[test] +fn test_revoke_from_application() { + // REVOKE with APPLICATION grantee type and VIEW object type + snowflake().verified_stmt( + "REVOKE SELECT ON VIEW data.views.credit_usage FROM APPLICATION app_snowflake_credits RESTRICT", + ); + // Without RESTRICT (defaults to RESTRICT in output) + snowflake().one_statement_parses_to( + "REVOKE SELECT ON VIEW data.views.credit_usage FROM APPLICATION app_snowflake_credits", + "REVOKE SELECT ON VIEW data.views.credit_usage FROM APPLICATION app_snowflake_credits RESTRICT", + ); +} From 9d0881aa7eed7a63397f4c99b0e6ab4fd9bbc56a Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 05:16:25 +0100 Subject: [PATCH 43/81] snowflake: support comma-separated role list in USE SECONDARY ROLES Add additional_names field to Statement::Use for USE SECONDARY ROLES role1, role2, role3 syntax. --- src/ast/mod.rs | 8 +++++++- src/parser/mod.rs | 8 ++++++++ tests/sqlparser_mysql.rs | 1 + tests/sqlparser_snowflake.rs | 15 +++++++++++++++ 4 files changed, 31 insertions(+), 1 deletion(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 2468d90297..aa2238663c 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2108,11 +2108,13 @@ pub enum Statement { /// Note: this is a MySQL-specific statement. ShowCollation { filter: Option }, /// ```sql - /// USE [DATABASE | SCHEMA | ROLE | WAREHOUSE | SECONDARY ROLES] name + /// USE [DATABASE | SCHEMA | ROLE | WAREHOUSE | SECONDARY ROLES] name [, name ...] /// ``` Use { db_name: ObjectName, object_type: Option, + /// Additional names for USE SECONDARY ROLES (comma-separated role list) + additional_names: Vec, }, /// ```sql /// START [ TRANSACTION | WORK ] | START TRANSACTION } ... @@ -3701,12 +3703,16 @@ impl fmt::Display for Statement { Statement::Use { db_name, object_type, + additional_names, } => { write!(f, "USE")?; if let Some(obj_type) = object_type { write!(f, " {obj_type}")?; } write!(f, " {db_name}")?; + for name in additional_names { + write!(f, ", {name}")?; + } Ok(()) } Statement::ShowCollation { filter } => { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 719ead7e24..e6aab6f11a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9459,9 +9459,17 @@ impl<'a> Parser<'a> { } else { self.parse_object_name(false)? }; + // For USE SECONDARY ROLES, parse comma-separated list of additional role names + let mut additional_names = vec![]; + if object_type.as_deref() == Some("SECONDARY ROLES") { + while self.consume_token(&Token::Comma) { + additional_names.push(self.parse_object_name(false)?); + } + } Ok(Statement::Use { db_name, object_type, + additional_names, }) } diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 3fd0dc7aa4..53207b763c 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -261,6 +261,7 @@ fn parse_use() { Statement::Use { db_name: ObjectName(vec![Ident::new("mydb")]), object_type: None, + additional_names: vec![], } ); } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 603c46625c..9a4ea20e73 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1833,6 +1833,21 @@ fn test_show_columns_in_table() { snowflake().verified_stmt("SHOW COLUMNS IN VIEW my_view"); } +#[test] +fn test_use_secondary_roles() { + // USE SECONDARY ROLES with a single role + snowflake().verified_stmt("USE SECONDARY ROLES ALL"); + snowflake().verified_stmt("USE SECONDARY ROLES NONE"); + // USE SECONDARY ROLES with comma-separated roles + snowflake().verified_stmt("USE SECONDARY ROLES test_role_1, test_role_2"); + snowflake().verified_stmt("USE SECONDARY ROLES a, b, c"); + // Basic USE statements + snowflake().verified_stmt("USE ROLE my_role"); + snowflake().verified_stmt("USE DATABASE my_db"); + snowflake().verified_stmt("USE SCHEMA my_schema"); + snowflake().verified_stmt("USE WAREHOUSE my_wh"); +} + #[test] fn test_snowflake_create_table_using_template() { // Simple USING TEMPLATE with subquery From ac6480d47854e966233f34a714e6d2ec1405b4de Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 05:20:39 +0100 Subject: [PATCH 44/81] snowflake: support IDENTITY column option as AUTOINCREMENT synonym Extend IDENTITY parsing to SnowflakeDialect. Handles IDENTITY, IDENTITY(seed, inc), and IDENTITY START n INCREMENT n [ORDER|NOORDER] variants. --- src/parser/mod.rs | 33 ++++++++++++++++++++++++--------- tests/sqlparser_snowflake.rs | 23 +++++++++++++++++++++++ 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e6aab6f11a..7bb9485985 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5947,19 +5947,34 @@ impl<'a> Parser<'a> { let expr = self.parse_expr()?; Ok(Some(ColumnOption::OnUpdate(expr))) } else if self.parse_keyword(Keyword::IDENTITY) - && dialect_of!(self is RedshiftSqlDialect | MsSqlDialect | GenericDialect) + && dialect_of!(self is RedshiftSqlDialect | MsSqlDialect | SnowflakeDialect | GenericDialect) { - // IDENTITY [ ( seed, increment ) ] - let (seed, increment) = if self.consume_token(&Token::LParen) { + // IDENTITY [ ( seed, increment ) | START n INCREMENT n ] [ORDER|NOORDER] + if dialect_of!(self is SnowflakeDialect | GenericDialect) + && self.parse_keyword(Keyword::START) + { let seed = Some(self.parse_expr()?); - self.expect_token(&Token::Comma)?; + self.expect_keyword(Keyword::INCREMENT)?; let increment = Some(self.parse_expr()?); - self.expect_token(&Token::RParen)?; - (seed, increment) + let _ = + self.parse_keyword(Keyword::ORDER) || self.parse_keyword(Keyword::NOORDER); + Ok(Some(ColumnOption::Identity { seed, increment })) } else { - (None, None) - }; - Ok(Some(ColumnOption::Identity { seed, increment })) + let (seed, increment) = if self.consume_token(&Token::LParen) { + let seed = Some(self.parse_expr()?); + self.expect_token(&Token::Comma)?; + let increment = Some(self.parse_expr()?); + self.expect_token(&Token::RParen)?; + (seed, increment) + } else { + (None, None) + }; + if dialect_of!(self is SnowflakeDialect | GenericDialect) { + let _ = self.parse_keyword(Keyword::ORDER) + || self.parse_keyword(Keyword::NOORDER); + } + Ok(Some(ColumnOption::Identity { seed, increment })) + } } else if self.parse_keyword(Keyword::GENERATED) { self.parse_optional_column_option_generated() } else { diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 9a4ea20e73..194715bf34 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1715,6 +1715,29 @@ fn test_snowflake_autoincrement_start_increment() { assert_eq!(stmts.len(), 1); } +#[test] +fn test_snowflake_identity() { + // IDENTITY with parenthesized seed/increment (Snowflake synonym for AUTOINCREMENT) + let stmts = snowflake() + .parse_sql_statements("CREATE TABLE t (id INT IDENTITY(1, 1))") + .unwrap(); + assert_eq!(stmts.len(), 1); + // IDENTITY without parameters + let stmts = snowflake() + .parse_sql_statements("CREATE TABLE t (id INT IDENTITY)") + .unwrap(); + assert_eq!(stmts.len(), 1); + // IDENTITY with START/INCREMENT + let stmts = snowflake() + .parse_sql_statements("CREATE TABLE t (id INT IDENTITY START 1 INCREMENT 1 ORDER)") + .unwrap(); + assert_eq!(stmts.len(), 1); + // ALTER TABLE ADD COLUMN with IDENTITY + snowflake() + .parse_sql_statements("ALTER TABLE foo ADD COLUMN id INT IDENTITY(1, 1)") + .unwrap(); +} + #[test] fn test_snowflake_tag_clause() { // Table-level TAG (skipped in AST, not round-tripped) From 1075d3ce8aef55aaa931e3bb9a21eee4e68368c8 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 05:23:37 +0100 Subject: [PATCH 45/81] snowflake: support multiple key=value pairs in ALTER SESSION SET Change SessionOperation::Set to hold Vec instead of single SqlOption, enabling ALTER SESSION SET autocommit = FALSE, QUERY_TAG = 'qtag'. --- src/ast/mod.rs | 4 ++-- src/parser/mod.rs | 4 ++-- tests/sqlparser_snowflake.rs | 4 ++++ 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index aa2238663c..fdfbacc65e 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -5703,7 +5703,7 @@ impl fmt::Display for SearchModifier { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum SessionOperation { Unset(Vec>), - Set(SqlOption), + Set(Vec), } impl fmt::Display for SessionOperation { @@ -5713,7 +5713,7 @@ impl fmt::Display for SessionOperation { write!(f, "UNSET {}", display_comma_separated(vars))?; } SessionOperation::Set(options) => { - write!(f, "SET {options}")?; + write!(f, "SET {}", display_comma_separated(options))?; } } Ok(()) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7bb9485985..eac777e3f0 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6703,8 +6703,8 @@ impl<'a> Parser<'a> { pub fn parse_alter_session(&mut self) -> Result { let session_operation = if self.parse_keyword(Keyword::SET) { - let option = self.parse_sql_option()?; - SessionOperation::Set(option) + let options = self.parse_comma_separated(|p| p.parse_sql_option())?; + SessionOperation::Set(options) } else if self.parse_keyword(Keyword::UNSET) { let vars = self.parse_comma_separated(|p| p.parse_identifier(false))?; SessionOperation::Unset(vars) diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 194715bf34..b841bb0c5a 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1587,6 +1587,10 @@ fn test_sf_trailing_commas_in_from_clause() { #[test] fn test_alter_session() { snowflake().verified_stmt("ALTER SESSION SET LOCK_TIMEOUT = 3600"); + snowflake().one_statement_parses_to( + "ALTER SESSION SET autocommit = FALSE, QUERY_TAG = 'qtag', JSON_INDENT = 1", + "ALTER SESSION SET autocommit = false, QUERY_TAG = 'qtag', JSON_INDENT = 1", + ); snowflake().verified_stmt("ALTER SESSION UNSET LOCK_TIMEOUT"); } From 664a1261f5ecf517ecc6e1f36803372c8627107a Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 05:30:25 +0100 Subject: [PATCH 46/81] snowflake: support key=value options after DESCRIBE TABLE Add options field to ExplainTable AST and parse identifier=value pairs, enabling DESCRIBE TABLE t TYPE = STAGE. --- src/ast/mod.rs | 7 +++++++ src/parser/mod.rs | 21 +++++++++++++++++++++ tests/sqlparser_common.rs | 1 + tests/sqlparser_snowflake.rs | 5 +++++ 4 files changed, 34 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index fdfbacc65e..980a6b0b82 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2326,6 +2326,8 @@ pub enum Statement { function_params: Option>, /// Optional output format (ClickHouse) format: Option, + /// Optional key=value options (e.g., Snowflake `type=stage`) + options: Vec, }, /// EXPLAIN / DESCRIBE for select_statement Explain { @@ -2471,6 +2473,7 @@ impl fmt::Display for Statement { object_type, function_params, format, + options, } => { if *describe_alias { write!(f, "DESCRIBE ")?; @@ -2487,6 +2490,10 @@ impl fmt::Display for Statement { write!(f, "({})", display_comma_separated(params))?; } + if !options.is_empty() { + write!(f, " {}", display_comma_separated(options))?; + } + if let Some(format) = format { write!(f, " FORMAT {format}")?; } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index eac777e3f0..c99733281f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8398,6 +8398,26 @@ impl<'a> Parser<'a> { None }; + // Snowflake: DESCRIBE TABLE tab type=stage + // Parse key=value options (identifier followed by = and value) + let mut options = vec![]; + while let Token::Word(w) = &self.peek_token_ref().token { + if w.keyword == Keyword::FORMAT { + break; // FORMAT is handled separately below + } + if self.peek_nth_token_ref(1).token == Token::Eq { + let name = self.parse_object_name(false)?; + self.expect_token(&Token::Eq)?; + let value = self.parse_expr()?; + options.push(SqlOption { name, value }); + if !self.consume_token(&Token::Comma) { + break; + } + } else { + break; + } + } + // ClickHouse: DESCRIBE TABLE tab FORMAT Vertical let format = if self.parse_keyword(Keyword::FORMAT) { Some(self.parse_identifier(false)?.unwrap()) @@ -8411,6 +8431,7 @@ impl<'a> Parser<'a> { function_params, table_name, format, + options, }) } } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 298f5fd73f..46a4e3d432 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -3846,6 +3846,7 @@ fn parse_explain_table() { object_type: _, function_params: _, format: _, + options: _, } => { assert_eq!(describe_alias, expected_describe_alias); assert_eq!("test_identifier", table_name.to_string()); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index b841bb0c5a..4684b18840 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1669,6 +1669,11 @@ fn test_describe_object_types() { "DESC FUNCTION governance.dmfs.count_positive_numbers(TABLE(NUMBER, NUMBER, NUMBER))", "DESCRIBE FUNCTION governance.dmfs.count_positive_numbers(TABLE(NUMBER, NUMBER, NUMBER))", ); + // DESCRIBE TABLE with type=stage option + snowflake().one_statement_parses_to( + r#"DESCRIBE TABLE "SNOWFLAKE_SAMPLE_DATA"."TPCDS_SF100TCL"."WEB_SITE" type=stage"#, + r#"DESCRIBE TABLE "SNOWFLAKE_SAMPLE_DATA"."TPCDS_SF100TCL"."WEB_SITE" type = stage"#, + ); } #[test] From 75c1648f25c69f4d275ba5e41be6f4c0d80c4e60 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 05:39:31 +0100 Subject: [PATCH 47/81] snowflake: support model!METHOD syntax and {*} object wildcards Parse model!METHOD(args) by treating ! as infix operator building compound function name. Add Value::ObjectWildcard for {*} and {tbl.*} ML input literals. --- src/ast/value.rs | 7 ++++ src/parser/mod.rs | 73 ++++++++++++++++++++++++++++++++++++ tests/sqlparser_snowflake.rs | 12 ++++++ 3 files changed, 92 insertions(+) diff --git a/src/ast/value.rs b/src/ast/value.rs index 1dc751cd8e..d15b560550 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -89,6 +89,9 @@ pub enum Value { /// MAP literal as used by DuckDB /// MAP {'key': value, ...} MapLiteral(Vec), + /// Snowflake object wildcard: `{*}` or `{tbl.*}` + /// Used in ML model function calls, e.g. `m!PREDICT(INPUT_DATA => {*})` + ObjectWildcard(Option), } impl fmt::Display for Value { @@ -155,6 +158,10 @@ impl fmt::Display for Value { write!(f, "{}", " }") } } + Value::ObjectWildcard(qualifier) => match qualifier { + Some(name) => write!(f, "{{{name}.*}}"), + None => write!(f, "{{*}}"), + }, } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index c99733281f..098e8cc30e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2589,6 +2589,12 @@ impl<'a> Parser<'a> { self.expect_token(&Token::DoubleColon)?; self.parse_pg_try_cast(expr) } else if Token::ExclamationMark == tok { + // Snowflake model method syntax: model!PREDICT(args) + if dialect_of!(self is SnowflakeDialect | GenericDialect) { + if let Some(method_name) = self.parse_model_method_name(&expr) { + return self.parse_function(method_name); + } + } // PostgreSQL factorial operation Ok(Expr::UnaryOp { op: UnaryOperator::PGPostfixFactorial, @@ -2818,6 +2824,39 @@ impl<'a> Parser<'a> { } /// Parse a postgresql casting style which is in the form of `expr::datatype` + /// Try to extract a model method name from `expr!METHOD` syntax (Snowflake). + /// Returns `Some(ObjectName)` if the next token is a word followed by `(`, + /// combining the expr's identifier parts with `!METHOD` as the function name. + fn parse_model_method_name(&mut self, expr: &Expr) -> Option { + // Next token must be a word (the method name) followed by ( + let next = self.peek_token(); + if !matches!(next.token, Token::Word(_)) { + return None; + } + if self.peek_nth_token_ref(1).token != Token::LParen { + return None; + } + // Extract identifier parts from the expression + let mut idents: Vec = match expr { + Expr::Identifier(ident) => vec![(**ident).clone()], + Expr::CompoundIdentifier(parts) => (**parts).clone(), + _ => return None, + }; + + // Consume the method name token + let method_token = self.next_token(); + let method_name = match method_token.token { + Token::Word(w) => w.value, + _ => return None, + }; + + // Build the function name: last ident gets !METHOD appended + if let Some(last) = idents.last_mut() { + last.value = format!("{}!{}", last.value, method_name); + } + Some(ObjectName(idents)) + } + pub fn parse_pg_cast(&mut self, expr: Expr) -> Result { Ok(Expr::Cast { expr: Box::new(expr), @@ -7026,6 +7065,40 @@ impl<'a> Parser<'a> { return Ok(Value::Parameter(placeholder)); } + // Snowflake object wildcard: {*} or {tbl.*} + if self.consume_token(&Token::Mul) { + self.expect_token(&Token::RBrace)?; + return Ok(Value::ObjectWildcard(None)); + } + // Check for {name.*} or {schema.name.*} pattern + if matches!(self.peek_token().token, Token::Word(_)) { + let idx = self.index; + let mut idents = vec![]; + let mut found_wildcard = false; + loop { + if let Ok(ident) = self.parse_identifier(false) { + idents.push(ident.unwrap()); + if self.consume_token(&Token::Period) { + if self.consume_token(&Token::Mul) { + found_wildcard = true; + break; + } + // Continue to parse next identifier part + } else { + break; + } + } else { + break; + } + } + if found_wildcard && !idents.is_empty() { + self.expect_token(&Token::RBrace)?; + return Ok(Value::ObjectWildcard(Some(ObjectName(idents)))); + } + // Backtrack if not a wildcard pattern + self.index = idx; + } + if self.consume_token(&Token::RBrace) { return Ok(Value::ObjectConstant(vec![])); } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 4684b18840..a12293f58f 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1921,3 +1921,15 @@ fn test_revoke_from_application() { "REVOKE SELECT ON VIEW data.views.credit_usage FROM APPLICATION app_snowflake_credits RESTRICT", ); } + +#[test] +fn test_snowflake_model_method_syntax() { + // Simple model method call + snowflake().verified_stmt("SELECT model!PREDICT(1)"); + + // Model method with named argument and object wildcard + snowflake().verified_stmt("SELECT m!PREDICT(INPUT_DATA => {*}) AS p FROM tbl"); + + // Model method with qualified object wildcard + snowflake().verified_stmt("SELECT m!PREDICT(INPUT_DATA => {tbl.*}) AS p FROM tbl"); +} From 43a2a378d56c854971c83402df9bada56ef78649 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 05:43:57 +0100 Subject: [PATCH 48/81] snowflake: support field access on positional placeholders ($1.field) After parsing a placeholder, greedily consume dot-separated identifiers and wrap as Expr::CompositeAccess for $1.elem expressions. --- src/parser/mod.rs | 20 +++++++++++++++++++- tests/sqlparser_snowflake.rs | 9 +++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 098e8cc30e..0a5d651f38 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1070,7 +1070,25 @@ impl<'a> Parser<'a> { } Token::Placeholder(_) | Token::Colon | Token::AtSign => { self.prev_token(); - Ok(Expr::Value(self.parse_value()?)) + let mut expr = Expr::Value(self.parse_value()?); + // Handle field access after placeholder, e.g. $1.elem + while self.consume_token(&Token::Period) { + let tok = self.next_token(); + let key = match tok.token { + Token::Word(word) => word.to_ident(), + _ => { + return parser_err!( + format!("Expected identifier, found: {tok}"), + tok.span.start + ); + } + }; + expr = Expr::CompositeAccess { + expr: Box::new(expr), + key, + }; + } + Ok(expr) } _ => self.expected("an expression:", next_token), }?; diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index a12293f58f..be4d13bd27 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1933,3 +1933,12 @@ fn test_snowflake_model_method_syntax() { // Model method with qualified object wildcard snowflake().verified_stmt("SELECT m!PREDICT(INPUT_DATA => {tbl.*}) AS p FROM tbl"); } + +#[test] +fn test_placeholder_field_access() { + // Snowflake positional column reference with field access + snowflake().verified_stmt("SELECT $1.elem"); + + // Multi-level field access + snowflake().verified_stmt("SELECT $1.elem.sub"); +} From 6a69912a18a85920867ce810be1b5051be5224c4 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 05:54:52 +0100 Subject: [PATCH 49/81] snowflake: support stage paths with quoted identifiers (@"schema"."stage"/path) Detect @ followed by quoted identifiers and re-assemble the stage path including dot separators and /path suffixes into a single Ident. --- src/parser/mod.rs | 61 ++++++++++++++++++++++++++++++++++++ tests/sqlparser_snowflake.rs | 29 +++++++++++++++++ 2 files changed, 90 insertions(+) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0a5d651f38..f44d999dde 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7841,6 +7841,67 @@ impl<'a> Parser<'a> { } } + // Snowflake stage references with quoted identifiers: @"schema"."stage"/path.gz + // When the first ident is just "@" (from tokenizer splitting on quoted idents), + // the tokenizer couldn't consume the quoted parts. We need to greedily consume + // subsequent identifiers (with or without leading dots) and collapse them into a + // single Ident to match the non-quoted behavior (e.g., @namespace.stage_name/path + // is already a single token from the tokenizer). + if in_table_clause + && dialect_of!(self is SnowflakeDialect | GenericDialect) + && idents.len() == 1 + && idents[0].value == "@" + { + // Build stage path from the @ prefix and subsequent identifiers + let mut stage_path = String::from("@"); + // Consume the first identifier after @ (no dot required between @ and first part) + if let Ok(ident) = self.parse_identifier(false) { + let ident = ident.unwrap(); + if let Some(q) = ident.quote_style { + stage_path.push(q); + stage_path.push_str(&ident.value); + stage_path.push(q); + } else { + stage_path.push_str(&ident.value); + } + // Consume additional dot-separated identifiers + while self.consume_token(&Token::Period) { + stage_path.push('.'); + let ident = self.parse_identifier(false)?.unwrap(); + if let Some(q) = ident.quote_style { + stage_path.push(q); + stage_path.push_str(&ident.value); + stage_path.push(q); + } else { + stage_path.push_str(&ident.value); + } + } + } + // Consume /path suffix (e.g., /file.gz) + if self.consume_token(&Token::Div) { + stage_path.push('/'); + // Consume path components: identifiers, numbers, and dots + loop { + match self.peek_token_kind() { + Token::Word(_) | Token::Number(_, _) => { + let tok = self.next_token(); + match tok.token { + Token::Word(w) => stage_path.push_str(&w.value), + Token::Number(n, _) => stage_path.push_str(&n), + _ => unreachable!(), + } + } + Token::Period => { + self.next_token(); + stage_path.push('.'); + } + _ => break, + } + } + } + idents = vec![Ident::new(stage_path)]; + } + Ok(ObjectName(idents)) } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index be4d13bd27..a317dfda8e 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1250,6 +1250,35 @@ fn test_snowflake_stage_object_names() { } } +#[test] +fn test_snowflake_stage_with_quoted_identifiers() { + // Stage paths with quoted identifiers should be collapsed into a single Ident + // just like unquoted stage paths (e.g., @namespace.stage/path). + let sql = r#"SELECT * FROM @"myschema"."mystage"/file.gz"#; + let select = snowflake().verified_only_select(sql); + match &select.from[0].relation { + TableFactor::Table { name, .. } => { + assert_eq!( + name.0, + vec![Ident::new(r#"@"myschema"."mystage"/file.gz"#)] + ); + } + _ => unreachable!(), + } + + let sql2 = r#"SELECT * FROM @"my_DB"."schEMA1".mystage/file.gz"#; + let select2 = snowflake().verified_only_select(sql2); + match &select2.from[0].relation { + TableFactor::Table { name, .. } => { + assert_eq!( + name.0, + vec![Ident::new(r#"@"my_DB"."schEMA1".mystage/file.gz"#)] + ); + } + _ => unreachable!(), + } +} + #[test] fn test_snowflake_trim() { let real_sql = r#"SELECT customer_id, TRIM(sub_items.value:item_price_id, '"', "a") AS item_price_id FROM models_staging.subscriptions"#; From 592c444ed0bb36f8eff53ad9091074f4c3b63c05 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 05:59:12 +0100 Subject: [PATCH 50/81] sqlite: support GLOB pattern matching operator Add Expr::Glob AST node and parse GLOB / NOT GLOB as infix operator at LIKE precedence for SQLite's Unix-style wildcard pattern matching. --- src/ast/mod.rs | 18 ++++++++++++++++++ src/keywords.rs | 1 + src/parser/mod.rs | 11 ++++++++++- tests/sqlparser_sqlite.rs | 11 +++++++++++ 4 files changed, 40 insertions(+), 1 deletion(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 980a6b0b82..b94c56a7b8 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -588,6 +588,13 @@ pub enum Expr { expr: Box, pattern: Box, }, + /// GLOB (SQLite pattern matching with Unix-style wildcards) + /// + Glob { + negated: bool, + expr: Box, + pattern: Box, + }, /// ILIKE (case-insensitive LIKE) ILike { negated: bool, @@ -985,6 +992,17 @@ impl fmt::Display for Expr { if *negated { "NOT " } else { "" }, pattern ), + Expr::Glob { + negated, + expr, + pattern, + } => write!( + f, + "{} {}GLOB {}", + expr, + if *negated { "NOT " } else { "" }, + pattern + ), Expr::AnyOp { left, compare_op, diff --git a/src/keywords.rs b/src/keywords.rs index 95e39dd009..585b8deede 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -334,6 +334,7 @@ define_keywords!( GENERATED, GEOGRAPHY, GET, + GLOB, GLOBAL, GRANT, GRANTED, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f44d999dde..4945ea3ea1 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2549,13 +2549,20 @@ impl<'a> Parser<'a> { | Keyword::ILIKE | Keyword::RLIKE | Keyword::SIMILAR - | Keyword::REGEXP => { + | Keyword::REGEXP + | Keyword::GLOB => { self.prev_token(); let negated = self.parse_keyword(Keyword::NOT); if self.parse_keyword(Keyword::IN) { self.parse_in(expr, negated) } else if self.parse_keyword(Keyword::BETWEEN) { self.parse_between(expr, negated) + } else if self.parse_keyword(Keyword::GLOB) { + Ok(Expr::Glob { + negated, + expr: Box::new(expr), + pattern: Box::new(self.parse_subexpr(Self::LIKE_PREC)?), + }) } else if self.parse_keyword(Keyword::REGEXP) { Ok(Expr::Regexp { negated, @@ -2946,6 +2953,7 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::GLOB => Ok(Self::LIKE_PREC), _ => Ok(0), }, Token::Word(w) if w.keyword == Keyword::IS => Ok(Self::IS_PREC), @@ -2956,6 +2964,7 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC), + Token::Word(w) if w.keyword == Keyword::GLOB => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::OVERLAPS => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::DIV => Ok(Self::MUL_DIV_MOD_OP_PREC), diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 3e5e8430f0..68d62ec528 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -293,6 +293,17 @@ fn sqlite() -> TestedDialects { } } +#[test] +fn parse_glob() { + // Basic GLOB + let sql = "SELECT 'xyz' GLOB '*y*'"; + sqlite_and_generic().verified_only_select(sql); + + // NOT GLOB + let sql = "SELECT 'xyz' NOT GLOB '*y*'"; + sqlite_and_generic().verified_only_select(sql); +} + fn sqlite_and_generic() -> TestedDialects { TestedDialects { // we don't have a separate SQLite dialect, so test only the generic dialect for now From 652b9752d230f45ee92fe68978ac5bf4744ec980 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 06:10:40 +0100 Subject: [PATCH 51/81] bigquery: support typed array constructors ARRAY[...] Add Expr::TypedArray and parse ARRAY[expr, ...] syntax for BigQuery's explicitly typed array literals, including empty arrays. --- src/ast/mod.rs | 14 ++++++++++++++ src/parser/mod.rs | 27 +++++++++++++++++++++++++++ tests/sqlparser_bigquery.rs | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index b94c56a7b8..645e43f48a 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -790,6 +790,13 @@ pub enum Expr { ArrayIndex { obj: Box, indexes: Vec }, /// An array expression e.g. `ARRAY[1, 2]` Array(Array), + /// A typed array expression e.g. `ARRAY[1, 2]` (BigQuery) + TypedArray { + /// The element type, e.g. `INT64` or `STRUCT` + data_type: DataType, + /// The list of expressions between brackets + values: Vec, + }, /// An interval expression e.g. `INTERVAL '1' YEAR` Interval(Interval), /// `MySQL` specific text search function [(1)]. @@ -1243,6 +1250,13 @@ impl fmt::Display for Expr { Expr::Array(set) => { write!(f, "{set}") } + Expr::TypedArray { data_type, values } => { + write!( + f, + "ARRAY<{data_type}>[{}]", + display_comma_separated(values) + ) + } Expr::JsonAccess { left, operator, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4945ea3ea1..5be906ac11 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -888,6 +888,13 @@ impl<'a> Parser<'a> { self.expect_token(&Token::LParen)?; self.parse_array_subquery() } + // BigQuery typed array constructor: ARRAY[...] + Keyword::ARRAY + if self.peek_token_is(&Token::Lt) + && dialect_of!(self is BigQueryDialect | GenericDialect) => + { + self.parse_typed_array_expr() + } // DuckDB MAP literal: MAP {'key': value, ...} Keyword::MAP if self.peek_token_is(&Token::LBrace) => self.parse_map_literal(), Keyword::NOT => self.parse_not(), @@ -1753,6 +1760,26 @@ impl<'a> Parser<'a> { Ok(Expr::ArraySubquery(query)) } + /// Parses a BigQuery typed array constructor: `ARRAY[expr, ...]` + /// Assumes the ARRAY keyword has already been consumed and `<` is the next token. + pub fn parse_typed_array_expr(&mut self) -> Result { + self.expect_token(&Token::Lt)?; + let (data_type, trailing_bracket) = self.parse_data_type_helper()?; + let _closing = self.expect_closing_angle_bracket(trailing_bracket)?; + self.expect_token(&Token::LBracket)?; + if self.peek_token_is(&Token::RBracket) { + self.next_token(); // consume ] + Ok(Expr::TypedArray { + data_type, + values: vec![], + }) + } else { + let values = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RBracket)?; + Ok(Expr::TypedArray { data_type, values }) + } + } + /// Parses a DuckDB MAP literal: `MAP {'key': value, ...}` /// Assumes the MAP keyword has already been consumed. pub fn parse_map_literal(&mut self) -> Result { diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 07c0881d05..cba7251782 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1324,6 +1324,38 @@ fn test_select_array_item_field_in_function() { ); } +#[test] +fn test_typed_array_constructor() { + // Simple typed array + bigquery().verified_expr("ARRAY[1, 2, 3]"); + + // Empty typed array + bigquery().verified_expr("ARRAY[]"); + + // Typed array with STRUCT type + bigquery().verified_expr( + "ARRAY>[STRUCT(1), STRUCT(2)]", + ); + + // Typed array with STRUCT containing multiple fields + bigquery().verified_expr( + "ARRAY>[('warehouse #1', 'WA'), ('warehouse #2', 'CA')]", + ); + + // Empty typed array with complex type + bigquery().verified_expr("ARRAY>[]"); + + // Typed array in SELECT context + bigquery().verified_only_select( + "SELECT ARRAY[1, 2, 3] AS arr", + ); + + // Typed array in UNNEST + bigquery().verified_only_select( + "SELECT * FROM UNNEST(ARRAY>[])", + ); +} + #[test] fn test_select_json_field() { let _select = bigquery().verified_only_select( From 0c99f1c238aea1a0f3397874bef84765d17964cc Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 06:20:19 +0100 Subject: [PATCH 52/81] bigquery: support MODEL/TABLE keyword prefixes in ML function arguments Add FunctionArgKeyword enum and FunctionArgExpr::TableRef for ML.PREDICT(MODEL t, TABLE t) syntax used in BigQuery ML functions. --- src/ast/mod.rs | 32 ++++++++++++++++++++++ src/keywords.rs | 1 + src/parser/mod.rs | 19 +++++++++++++ tests/sqlparser_bigquery.rs | 53 +++++++++++++++++++++++++++++++++++++ 4 files changed, 105 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 645e43f48a..34556adf9f 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -4578,6 +4578,25 @@ impl fmt::Display for Assignment { } } +/// Keyword prefix for table references in function arguments +/// e.g. `MODEL` or `TABLE` in BigQuery ML functions +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum FunctionArgKeyword { + Model, + Table, +} + +impl fmt::Display for FunctionArgKeyword { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + FunctionArgKeyword::Model => f.write_str("MODEL"), + FunctionArgKeyword::Table => f.write_str("TABLE"), + } + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -4591,6 +4610,13 @@ pub enum FunctionArgExpr { WildcardWithOptions(WildcardAdditionalOptions), /// Qualified wildcard with options, e.g. `alias.* EXCLUDE (col1)` QualifiedWildcardWithOptions(ObjectName, WildcardAdditionalOptions), + /// A keyword-prefixed table reference in function arguments, + /// e.g. `MODEL mydataset.mymodel` or `TABLE mydataset.mytable` + /// Used by BigQuery ML functions like ML.PREDICT, VECTOR_SEARCH, GAP_FILL + TableRef { + keyword: FunctionArgKeyword, + table_name: ObjectName, + }, } impl fmt::Display for FunctionArgExpr { @@ -4605,6 +4631,12 @@ impl fmt::Display for FunctionArgExpr { FunctionArgExpr::QualifiedWildcardWithOptions(prefix, options) => { write!(f, "{prefix}.*{options}") } + FunctionArgExpr::TableRef { + keyword, + table_name, + } => { + write!(f, "{keyword} {table_name}") + } } } } diff --git a/src/keywords.rs b/src/keywords.rs index 585b8deede..01c416f909 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -458,6 +458,7 @@ define_keywords!( MINVALUE, MOD, MODE, + MODEL, MODIFIES, MODULE, MONTH, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 5be906ac11..def7cfbb0b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11030,6 +11030,25 @@ impl<'a> Parser<'a> { } pub fn parse_function_args(&mut self) -> Result { + // BigQuery ML functions use MODEL/TABLE keyword-prefixed table references + // e.g. ML.PREDICT(MODEL `mydataset.mymodel`, TABLE `mydataset.mytable`) + if let Some(kw) = self.parse_one_of_keywords(&[Keyword::MODEL, Keyword::TABLE]) { + // TABLE followed by `(` is the TABLE(subquery) syntax, not a table ref + if kw == Keyword::TABLE && self.peek_token_is(&Token::LParen) { + self.prev_token(); + } else { + let keyword = match kw { + Keyword::MODEL => FunctionArgKeyword::Model, + Keyword::TABLE => FunctionArgKeyword::Table, + _ => unreachable!(), + }; + let table_name = self.parse_object_name(false)?; + return Ok(FunctionArg::Unnamed(FunctionArgExpr::TableRef { + keyword, + table_name, + })); + } + } if self.peek_nth_token(1) == Token::RArrow { let name = self.parse_identifier(false)?.unwrap(); diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index cba7251782..9656ba213a 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1682,3 +1682,56 @@ fn parse_wildcard_table() { bigquery() .verified_stmt("SELECT * FROM x.y* WHERE _TABLE_SUFFIX BETWEEN '20230101' AND '20231231'"); } + +#[test] +fn parse_bigquery_ml_predict() { + // MODEL keyword-prefixed table reference (backtick dotted names split into parts) + bigquery().one_statement_parses_to( + "SELECT * FROM ML.PREDICT(MODEL `my_project.my_dataset.my_model`, (SELECT * FROM input_data))", + "SELECT * FROM ML.PREDICT(MODEL `my_project`.`my_dataset`.`my_model`, (SELECT * FROM input_data))", + ); + // MODEL with TABLE keyword + bigquery().one_statement_parses_to( + "SELECT * FROM ML.PREDICT(MODEL `mydataset.mymodel`, TABLE `mydataset.mytable`)", + "SELECT * FROM ML.PREDICT(MODEL `mydataset`.`mymodel`, TABLE `mydataset`.`mytable`)", + ); + // MODEL with subquery and STRUCT + bigquery().one_statement_parses_to( + "SELECT * FROM ML.PREDICT(MODEL `mydataset.mymodel`, (SELECT custom_label, column1, column2 FROM `mydataset.mytable`), STRUCT(0.55 AS threshold))", + "SELECT * FROM ML.PREDICT(MODEL `mydataset`.`mymodel`, (SELECT custom_label, column1, column2 FROM `mydataset`.`mytable`), STRUCT(0.55 AS threshold))", + ); + // ML.FORECAST with STRUCT + bigquery().one_statement_parses_to( + "SELECT * FROM ML.FORECAST(MODEL `mydataset.mymodel`, STRUCT(2 AS horizon))", + "SELECT * FROM ML.FORECAST(MODEL `mydataset`.`mymodel`, STRUCT(2 AS horizon))", + ); + // MODEL with unquoted names + bigquery().verified_stmt( + "SELECT * FROM ML.PREDICT(MODEL mydataset.mymodel, (SELECT * FROM input_data))", + ); + // ML.TRANSLATE with TABLE and STRUCT + bigquery().one_statement_parses_to( + "SELECT * FROM ML.TRANSLATE(MODEL `mydataset.mytranslatemodel`, TABLE `mydataset.mybqtable`, STRUCT('translate_text' AS translate_mode, 'zh-CN' AS target_language_code))", + "SELECT * FROM ML.TRANSLATE(MODEL `mydataset`.`mytranslatemodel`, TABLE `mydataset`.`mybqtable`, STRUCT('translate_text' AS translate_mode, 'zh-CN' AS target_language_code))", + ); +} + +#[test] +fn parse_bigquery_vector_search() { + // VECTOR_SEARCH with TABLE keyword + bigquery().verified_stmt( + "SELECT * FROM VECTOR_SEARCH(TABLE mydataset.base_table, 'column_to_search', TABLE mydataset.query_table)", + ); + // VECTOR_SEARCH with named arguments + bigquery().verified_stmt( + "SELECT * FROM VECTOR_SEARCH(TABLE mydataset.base_table, 'column_to_search', TABLE mydataset.query_table, 'query_column_to_search', top_k => 2, distance_type => 'cosine')", + ); +} + +#[test] +fn parse_bigquery_gap_fill() { + // GAP_FILL with TABLE and named arguments + bigquery().verified_stmt( + "SELECT * FROM GAP_FILL(TABLE device_data, ts_column => 'time', bucket_width => INTERVAL '1' MINUTE)", + ); +} From f7bfe8cd6ec2b1033ffe2d3b5e14a232b0afd587 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 06:44:43 +0100 Subject: [PATCH 53/81] bigquery: support chained subscript access col[OFFSET(0)].field[OFFSET(0)] Allow MapAccess on any expression, not just identifiers. Stop JSON path parsing at [ for BigQuery so expression loop handles OFFSET/ORDINAL subscripts. --- src/parser/mod.rs | 21 +++++++++++++-------- tests/sqlparser_bigquery.rs | 24 ++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index def7cfbb0b..68bf38190d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2778,13 +2778,10 @@ impl<'a> Parser<'a> { debug!("Tok: {}", tok); key_parts.push(key); } - match expr { - e @ Expr::Identifier(_) | e @ Expr::CompoundIdentifier(_) => Ok(Expr::MapAccess { - column: Box::new(e), - keys: key_parts, - }), - _ => Ok(expr), - } + Ok(Expr::MapAccess { + column: Box::new(expr), + keys: key_parts, + }) } /// Parses the parens following the `[ NOT ] IN` operator @@ -7221,7 +7218,15 @@ impl<'a> Parser<'a> { } } Token::Period => buf.push('.'), - Token::LBracket => buf.push('['), + Token::LBracket => { + if dialect_of!(self is BigQueryDialect) { + // For BigQuery, stop before '[' so the expression loop can handle + // function-style subscripts like [OFFSET(0)], [SAFE_OFFSET(0)] + self.prev_token(); + break; + } + buf.push('[') + } Token::RBracket => buf.push(']'), Token::Colon => buf.push(':'), Token::DoubleQuotedString(ref s) => write!(buf, "\"{}\"", s).unwrap(), diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 9656ba213a..95c13d970c 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1735,3 +1735,27 @@ fn parse_bigquery_gap_fill() { "SELECT * FROM GAP_FILL(TABLE device_data, ts_column => 'time', bucket_width => INTERVAL '1' MINUTE)", ); } + +#[test] +fn parse_bigquery_chained_subscript_access() { + // col[OFFSET(0)].field + bigquery().verified_stmt( + "SELECT col[OFFSET(0)].source_type AS x FROM t1", + ); + // col[SAFE_OFFSET(0)].field[SAFE_OFFSET(0)] + bigquery().verified_stmt( + "SELECT col[SAFE_OFFSET(0)].source_ids[SAFE_OFFSET(0)] AS x FROM t1", + ); + // col[OFFSET(0)].field[OFFSET(0)] chained + bigquery().verified_stmt( + "SELECT col[OFFSET(0)].ids[OFFSET(0)] AS x FROM t1", + ); + // Simple subscript still works + bigquery().verified_stmt( + "SELECT col[OFFSET(0)] AS x FROM t1", + ); + // Subscript with field access + bigquery().verified_stmt( + "SELECT col[0].field AS x FROM t1", + ); +} From 31f6eaa03403e4d6862d86222d8fffed13581d20 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 06:51:37 +0100 Subject: [PATCH 54/81] bigquery: support empty STRUCT() constructor with no arguments Allow STRUCT() with zero field expressions, which BigQuery permits in ML function calls. Previously required at least one argument. --- src/parser/mod.rs | 9 +++++++-- tests/sqlparser_bigquery.rs | 11 +++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 68bf38190d..2be9966450 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2155,8 +2155,13 @@ impl<'a> Parser<'a> { } self.expect_token(&Token::LParen)?; - let values = self - .parse_comma_separated(|parser| parser.parse_struct_field_expr(!fields.is_empty()))?; + let values = if self.peek_token_kind() == &Token::RParen { + vec![] + } else { + self.parse_comma_separated(|parser| { + parser.parse_struct_field_expr(!fields.is_empty()) + })? + }; self.expect_token(&Token::RParen)?; Ok(Expr::Struct { values, fields }) diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 95c13d970c..4305eebca7 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1759,3 +1759,14 @@ fn parse_bigquery_chained_subscript_access() { "SELECT col[0].field AS x FROM t1", ); } + +#[test] +fn parse_bigquery_empty_struct() { + // STRUCT() - empty struct literal + bigquery().verified_stmt("SELECT STRUCT()"); + // Empty struct as function argument + bigquery().one_statement_parses_to( + "SELECT * FROM ML.FORECAST(MODEL `mydataset.mymodel`, (SELECT * FROM t1), STRUCT())", + "SELECT * FROM ML.FORECAST(MODEL `mydataset`.`mymodel`, (SELECT * FROM t1), STRUCT())", + ); +} From 4b029cce3157874968f446438401b64b1a1fe7f4 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 07:01:40 +0100 Subject: [PATCH 55/81] duckdb: support TRIM(expr, characters) comma syntax Add DuckDbDialect to the set of dialects parsing TRIM(expr, chars) with comma-separated characters argument. --- src/parser/mod.rs | 2 +- tests/sqlparser_common.rs | 2 +- tests/sqlparser_duckdb.rs | 6 ++++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 2be9966450..6ea17246ab 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1702,7 +1702,7 @@ impl<'a> Parser<'a> { trim_characters: None, }) } else if self.consume_token(&Token::Comma) - && dialect_of!(self is SnowflakeDialect | BigQueryDialect | RedshiftSqlDialect) + && dialect_of!(self is SnowflakeDialect | BigQueryDialect | RedshiftSqlDialect | DuckDbDialect) { let characters = self.parse_comma_separated(Parser::parse_expr)?; self.expect_token(&Token::RParen)?; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 46a4e3d432..b13bd4a2d8 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -5856,7 +5856,7 @@ fn parse_trim() { Box::new(MySqlDialect {}), //Box::new(BigQueryDialect {}), Box::new(SQLiteDialect {}), - Box::new(DuckDbDialect {}), + //Box::new(DuckDbDialect {}), ], options: None, }; diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 647e33ca19..1b8f06f8fd 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -396,3 +396,9 @@ fn test_filter_without_where() { // Standard FILTER (WHERE expr) syntax should still work duckdb().verified_stmt("SELECT SUM(x) FILTER (WHERE x = 1)"); } + +#[test] +fn test_trim_with_characters() { + // DuckDB supports TRIM(expr, characters) syntax + duckdb().verified_stmt("SELECT TRIM('***apple***', '*') AS result"); +} From 19c2b4940cf1eb1f3e436c8c293ca57f76cb6442 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 07:09:32 +0100 Subject: [PATCH 56/81] all dialects: support bitwise NOT (~) as unary prefix operator Move ~ out of PostgreSQL-only prefix operator block so it parses as UnaryOperator::PGBitwiseNot in all dialects including BigQuery, ClickHouse, DuckDB. --- src/parser/mod.rs | 8 ++++++-- tests/sqlparser_common.rs | 7 +++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6ea17246ab..b025f0f311 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -984,11 +984,16 @@ impl<'a> Parser<'a> { expr: Box::new(self.parse_subexpr(Self::MUL_DIV_MOD_OP_PREC)?), }) } + Token::Tilde => { + Ok(Expr::UnaryOp { + op: UnaryOperator::PGBitwiseNot, + expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?), + }) + } tok @ Token::DoubleExclamationMark | tok @ Token::PGSquareRoot | tok @ Token::PGCubeRoot | tok @ Token::AtSign - | tok @ Token::Tilde if dialect_of!(self is PostgreSqlDialect) => { let op = match tok { @@ -996,7 +1001,6 @@ impl<'a> Parser<'a> { Token::PGSquareRoot => UnaryOperator::PGSquareRoot, Token::PGCubeRoot => UnaryOperator::PGCubeRoot, Token::AtSign => UnaryOperator::PGAbs, - Token::Tilde => UnaryOperator::PGBitwiseNot, _ => unreachable!(), }; Ok(Expr::UnaryOp { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index b13bd4a2d8..ec49831715 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1676,6 +1676,13 @@ fn parse_bitwise_ops() { } } +#[test] +fn parse_unary_bitwise_not() { + // Bitwise NOT (~) should work across all dialects + verified_stmt("SELECT ~1"); + verified_stmt("SELECT ~a FROM t"); +} + #[test] fn parse_binary_any() { let select = verified_only_select("SELECT a = ANY(b)"); From 5df11b8c08a4542143c8c044ed452d2a9e87a619 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 07:17:19 +0100 Subject: [PATCH 57/81] databricks: support [*] wildcard subscript in colon JSON paths Add Token::Mul to allowed tokens in parse_snowflake_json_path so c1:item[*].price wildcard array traversal expressions parse correctly. --- src/parser/mod.rs | 1 + tests/sqlparser_databricks.rs | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b025f0f311..e415a107aa 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7237,6 +7237,7 @@ impl<'a> Parser<'a> { buf.push('[') } Token::RBracket => buf.push(']'), + Token::Mul => buf.push('*'), Token::Colon => buf.push(':'), Token::DoubleQuotedString(ref s) => write!(buf, "\"{}\"", s).unwrap(), Token::SingleQuotedString(ref s) => write!(buf, "'{}'", s).unwrap(), diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index bea03015b6..c4b961f999 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -297,6 +297,24 @@ fn test_json_path_with_colon() { "SELECT c1:['price'] FROM VALUES ('{ \"price\": 5 }') AS T(c1)", "SELECT c1:['price'] FROM (VALUES ('{ \"price\": 5 }')) AS T (c1)", ); + + // Wildcard array subscript [*] in JSON path + databricks().one_statement_parses_to( + "SELECT c1:item[*].price FROM VALUES ('{ \"item\": [ { \"model\" : \"basic\", \"price\" : 6.12 } ] }') AS T(c1)", + "SELECT c1:item[*].price FROM (VALUES ('{ \"item\": [ { \"model\" : \"basic\", \"price\" : 6.12 } ] }')) AS T (c1)", + ); + + // Wildcard array subscript used inside function arguments + databricks().one_statement_parses_to( + "SELECT INLINE(FROM_JSON(c1:item[*], 'ARRAY>')) FROM VALUES ('{}') AS T(c1)", + "SELECT INLINE(FROM_JSON(c1:item[*], 'ARRAY>')) FROM (VALUES ('{}')) AS T (c1)", + ); + + // Wildcard array subscript followed by further subscript access + databricks().one_statement_parses_to( + "SELECT FROM_JSON(c1:item[*].price, 'ARRAY')[0] FROM VALUES ('{}') AS T(c1)", + "SELECT FROM_JSON(c1:item[*].price, 'ARRAY')[0] FROM (VALUES ('{}')) AS T (c1)", + ); } #[test] From 64f3d2e67c28b634e53d3c6e2fe9f2548be7a134 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 07:24:03 +0100 Subject: [PATCH 58/81] duckdb: support #N positional column references Parse DuckDB's #1, #2 positional column references as Value::Placeholder. DuckDB tokenizes # as Token::Sharp followed by a number. --- src/parser/mod.rs | 13 +++++++++++++ tests/sqlparser_duckdb.rs | 9 +++++++++ 2 files changed, 22 insertions(+) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e415a107aa..93e6854765 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -984,6 +984,19 @@ impl<'a> Parser<'a> { expr: Box::new(self.parse_subexpr(Self::MUL_DIV_MOD_OP_PREC)?), }) } + Token::Sharp if dialect_of!(self is DuckDbDialect) => { + // DuckDB positional reference: #1, #2, etc. + match self.peek_token().token { + Token::Number(ref s, _) => { + let s = format!("#{}", s); + self.next_token(); + Ok(Expr::Value(Value::Placeholder(s))) + } + _ => { + self.expected("a number after #", self.peek_token())? + } + } + } Token::Tilde => { Ok(Expr::UnaryOp { op: UnaryOperator::PGBitwiseNot, diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 1b8f06f8fd..eb7dfd9af8 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -402,3 +402,12 @@ fn test_trim_with_characters() { // DuckDB supports TRIM(expr, characters) syntax duckdb().verified_stmt("SELECT TRIM('***apple***', '*') AS result"); } + +#[test] +fn test_positional_reference() { + // DuckDB supports #N positional references + // Note: GenericDialect tokenizes #2 as identifier (since # is identifier_start), + // DuckDB tokenizes as Sharp + Number, so we only test DuckDB dialect. + duckdb().verified_stmt("SELECT #2 AS a, #1 AS b FROM (VALUES (1, 'foo'))"); + duckdb().verified_stmt("SELECT #2, #1 FROM (VALUES (1, 'foo'))"); +} From b77991abc190245cc64a2dfac7a3b619222e0320 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 07:27:21 +0100 Subject: [PATCH 59/81] duckdb, redshift: accept E'...' escape string literals Extend EscapedStringLiteral handling (previously PostgreSQL/Generic only) to DuckDB and Redshift dialects, in both expression parsing and parse_literal_string. --- src/parser/mod.rs | 4 ++-- tests/sqlparser_duckdb.rs | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 93e6854765..098290ccbd 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1021,7 +1021,7 @@ impl<'a> Parser<'a> { expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?), }) } - Token::EscapedStringLiteral(_) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => + Token::EscapedStringLiteral(_) if dialect_of!(self is PostgreSqlDialect | GenericDialect | DuckDbDialect | RedshiftSqlDialect) => { self.prev_token(); Ok(Expr::Value(self.parse_value()?)) @@ -7348,7 +7348,7 @@ impl<'a> Parser<'a> { Token::Word(Word { value, keyword, .. }) if keyword == Keyword::NoKeyword => Ok(value), Token::SingleQuotedString(s) => Ok(s), Token::DoubleQuotedString(s) => Ok(s), - Token::EscapedStringLiteral(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { + Token::EscapedStringLiteral(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect | DuckDbDialect | RedshiftSqlDialect) => { Ok(s) } _ => self.expected("literal string", next_token), diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index eb7dfd9af8..43ca91a1b3 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -411,3 +411,21 @@ fn test_positional_reference() { duckdb().verified_stmt("SELECT #2 AS a, #1 AS b FROM (VALUES (1, 'foo'))"); duckdb().verified_stmt("SELECT #2, #1 FROM (VALUES (1, 'foo'))"); } + +#[test] +fn test_escaped_string_literal() { + // DuckDB supports e'...' escape string syntax + let sql = r"SELECT E'Hello\nworld'"; + let select = duckdb().verified_only_select(sql); + assert_eq!( + &Expr::Value(Value::EscapedStringLiteral("Hello\nworld".to_string())), + expr_from_projection(&select.projection[0]) + ); + + duckdb().verified_only_select(r"SELECT E'\t'"); + duckdb().verified_only_select(r"SELECT E'\n'"); + duckdb().one_statement_parses_to( + r"SELECT e'\n'", + r"SELECT E'\n'", + ); +} From ae8a4476cf645d7b8f823c09150a34c684df53b9 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 07:32:34 +0100 Subject: [PATCH 60/81] postgres: support -|- range adjacency operator Add BinaryOperator::PGAdjacentTo for PostgreSQL's -|- range adjacency operator. Implemented via dialect-level get_next_precedence and parse_infix overrides. --- src/ast/operator.rs | 3 +++ src/dialect/postgresql.rs | 44 ++++++++++++++++++++++++++++++++++++- tests/sqlparser_postgres.rs | 6 +++++ 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/src/ast/operator.rs b/src/ast/operator.rs index dff2416db5..f2db390df4 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -138,6 +138,8 @@ pub enum BinaryOperator { /// See [CREATE OPERATOR](https://www.postgresql.org/docs/current/sql-createoperator.html) /// for more information. PGCustomBinaryOperator(Vec), + /// Range adjacency operator, e.g. `a -|- b` (PostgreSQL-specific) + PGAdjacentTo, /// SQL standard OVERLAPS operator for datetime range comparison, e.g. /// `(start1, end1) OVERLAPS (start2, end2)` Overlaps, @@ -181,6 +183,7 @@ impl fmt::Display for BinaryOperator { BinaryOperator::PGCustomBinaryOperator(idents) => { write!(f, "OPERATOR({})", display_separated(idents, ".")) } + BinaryOperator::PGAdjacentTo => f.write_str("-|-"), BinaryOperator::Overlaps => f.write_str("OVERLAPS"), } } diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 4bc1f6ea5f..1209a4e8b4 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -10,7 +10,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use crate::ast::{CommentObject, Statement}; +use crate::ast::{BinaryOperator, CommentObject, Expr, Statement}; use crate::dialect::Dialect; use crate::keywords::Keyword; use crate::parser::{Parser, ParserError}; @@ -50,6 +50,48 @@ impl Dialect for PostgreSqlDialect { fn supports_group_by_expr(&self) -> bool { true } + + fn get_next_precedence(&self, parser: &Parser) -> Option> { + // Detect `-|-` (range adjacency operator) as three tokens: Minus, Pipe, Minus + let tok0 = parser.peek_nth_token(0); + if tok0.token == Token::Minus { + let tok1 = parser.peek_nth_token(1); + let tok2 = parser.peek_nth_token(2); + if tok1.token == Token::Pipe && tok2.token == Token::Minus { + return Some(Ok(50)); // Same precedence as other range operators (&&, @>, <@) + } + } + None + } + + fn parse_infix( + &self, + parser: &mut Parser, + expr: &Expr, + _precedence: u8, + ) -> Option> { + // Parse `-|-` (range adjacency operator) as three tokens: Minus, Pipe, Minus + let tok0 = parser.peek_nth_token(0); + if tok0.token == Token::Minus { + let tok1 = parser.peek_nth_token(1); + let tok2 = parser.peek_nth_token(2); + if tok1.token == Token::Pipe && tok2.token == Token::Minus { + parser.next_token(); // consume `-` + parser.next_token(); // consume `|` + parser.next_token(); // consume `-` + return Some( + parser + .parse_subexpr(50) + .map(|right| Expr::BinaryOp { + left: Box::new(expr.clone()), + op: BinaryOperator::PGAdjacentTo, + right: Box::new(right), + }), + ); + } + } + None + } } pub fn parse_comment(parser: &mut Parser) -> Result { diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 627a1276de..e5e78655ab 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1772,6 +1772,7 @@ fn parse_pg_binary_ops() { (">>", BinaryOperator::PGBitwiseShiftRight, pg_and_generic()), ("<<", BinaryOperator::PGBitwiseShiftLeft, pg_and_generic()), ("&&", BinaryOperator::PGOverlap, pg()), + ("-|-", BinaryOperator::PGAdjacentTo, pg()), ]; for (str_op, op, dialects) in binary_ops { @@ -1791,6 +1792,11 @@ fn parse_pg_binary_ops() { } } +#[test] +fn parse_pg_range_adjacent_operator() { + pg().verified_stmt("SELECT NUMRANGE(1.1, 2.2) -|- NUMRANGE(2.2, 3.3)"); +} + #[test] fn parse_pg_unary_ops() { let pg_unary_ops = &[ From 56de0a024e9d54f89e8299b1203478fb073a04df Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 07:36:57 +0100 Subject: [PATCH 61/81] postgres: support <-> distance operator Add BinaryOperator::PGDistance for PostgreSQL's <-> nearest-neighbor distance operator via dialect-level precedence and infix overrides. --- src/ast/operator.rs | 3 +++ src/dialect/postgresql.rs | 28 ++++++++++++++++++++++++++-- tests/sqlparser_postgres.rs | 10 ++++++++++ 3 files changed, 39 insertions(+), 2 deletions(-) diff --git a/src/ast/operator.rs b/src/ast/operator.rs index f2db390df4..50eb81ec2f 100644 --- a/src/ast/operator.rs +++ b/src/ast/operator.rs @@ -140,6 +140,8 @@ pub enum BinaryOperator { PGCustomBinaryOperator(Vec), /// Range adjacency operator, e.g. `a -|- b` (PostgreSQL-specific) PGAdjacentTo, + /// Distance operator, e.g. `a <-> b` (PostgreSQL-specific) + PGDistance, /// SQL standard OVERLAPS operator for datetime range comparison, e.g. /// `(start1, end1) OVERLAPS (start2, end2)` Overlaps, @@ -184,6 +186,7 @@ impl fmt::Display for BinaryOperator { write!(f, "OPERATOR({})", display_separated(idents, ".")) } BinaryOperator::PGAdjacentTo => f.write_str("-|-"), + BinaryOperator::PGDistance => f.write_str("<->"), BinaryOperator::Overlaps => f.write_str("OVERLAPS"), } } diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 1209a4e8b4..4c5a101f7d 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -52,8 +52,8 @@ impl Dialect for PostgreSqlDialect { } fn get_next_precedence(&self, parser: &Parser) -> Option> { - // Detect `-|-` (range adjacency operator) as three tokens: Minus, Pipe, Minus let tok0 = parser.peek_nth_token(0); + // Detect `-|-` (range adjacency operator) as three tokens: Minus, Pipe, Minus if tok0.token == Token::Minus { let tok1 = parser.peek_nth_token(1); let tok2 = parser.peek_nth_token(2); @@ -61,6 +61,13 @@ impl Dialect for PostgreSqlDialect { return Some(Ok(50)); // Same precedence as other range operators (&&, @>, <@) } } + // Detect `<->` (distance operator) as two tokens: Lt, Arrow + if tok0.token == Token::Lt { + let tok1 = parser.peek_nth_token(1); + if tok1.token == Token::Arrow { + return Some(Ok(50)); + } + } None } @@ -70,8 +77,8 @@ impl Dialect for PostgreSqlDialect { expr: &Expr, _precedence: u8, ) -> Option> { - // Parse `-|-` (range adjacency operator) as three tokens: Minus, Pipe, Minus let tok0 = parser.peek_nth_token(0); + // Parse `-|-` (range adjacency operator) as three tokens: Minus, Pipe, Minus if tok0.token == Token::Minus { let tok1 = parser.peek_nth_token(1); let tok2 = parser.peek_nth_token(2); @@ -90,6 +97,23 @@ impl Dialect for PostgreSqlDialect { ); } } + // Parse `<->` (distance operator) as two tokens: Lt, Arrow + if tok0.token == Token::Lt { + let tok1 = parser.peek_nth_token(1); + if tok1.token == Token::Arrow { + parser.next_token(); // consume `<` + parser.next_token(); // consume `->` + return Some( + parser + .parse_subexpr(50) + .map(|right| Expr::BinaryOp { + left: Box::new(expr.clone()), + op: BinaryOperator::PGDistance, + right: Box::new(right), + }), + ); + } + } None } } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index e5e78655ab..19467ff346 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1773,6 +1773,7 @@ fn parse_pg_binary_ops() { ("<<", BinaryOperator::PGBitwiseShiftLeft, pg_and_generic()), ("&&", BinaryOperator::PGOverlap, pg()), ("-|-", BinaryOperator::PGAdjacentTo, pg()), + ("<->", BinaryOperator::PGDistance, pg()), ]; for (str_op, op, dialects) in binary_ops { @@ -1797,6 +1798,15 @@ fn parse_pg_range_adjacent_operator() { pg().verified_stmt("SELECT NUMRANGE(1.1, 2.2) -|- NUMRANGE(2.2, 3.3)"); } +#[test] +fn parse_pg_distance_operator() { + pg().verified_stmt("SELECT a <-> b FROM t"); + pg().one_statement_parses_to( + "SELECT p1.id, p2.id, v1, v2 FROM polygons AS p1, polygons AS p2, LATERAL VERTICES(p1.poly) v1, LATERAL VERTICES(p2.poly) v2 WHERE (v1 <-> v2) < 10 AND p1.id <> p2.id", + "SELECT p1.id, p2.id, v1, v2 FROM polygons AS p1, polygons AS p2, LATERAL VERTICES(p1.poly) AS v1, LATERAL VERTICES(p2.poly) AS v2 WHERE (v1 <-> v2) < 10 AND p1.id <> p2.id", + ); +} + #[test] fn parse_pg_unary_ops() { let pg_unary_ops = &[ From f1f40002945a148e643c9bfb60d352ff05174650 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 07:41:00 +0100 Subject: [PATCH 62/81] postgres, redshift: support %s and %(name)s DB-API placeholders Tokenize %s as unnamed and %(name)s as named placeholders in PostgreSQL and Redshift, matching Python DB-API 2.0 parameter style. --- src/tokenizer.rs | 31 +++++++++++++++++++++++++++++++ tests/sqlparser_common.rs | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 352ba05309..a2620f8e79 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -985,6 +985,37 @@ impl<'a> Tokenizer<'a> { chars.next(); // advance past '%' match chars.peek() { Some(' ') => Ok(Some(Token::Mod)), + // Python DB-API named placeholder: %(name)s + Some('(') => { + chars.next(); // consume '(' + let name = + peeking_take_while(chars, |ch| ch != ')'); + if chars.peek() == Some(&')') { + chars.next(); // consume ')' + // consume the trailing format char (e.g. 's' in %(name)s) + let fmt_char = if chars + .peek() + .map_or(false, |c| c.is_ascii_alphabetic()) + { + let c = *chars.peek().unwrap(); + chars.next(); + String::from(c) + } else { + String::new() + }; + Ok(Some(Token::Placeholder(format!( + "%({name}){fmt_char}" + )))) + } else { + // Malformed, just return Mod and let parser deal with it + Ok(Some(Token::Mod)) + } + } + // Python DB-API positional placeholder: %s, %d, etc. + Some(&c) if c == 's' || c == 'd' || c == 'f' => { + chars.next(); + Ok(Some(Token::Placeholder(format!("%{c}")))) + } Some(sch) if self.dialect.is_identifier_start('%') => { self.tokenize_identifier_or_keyword([ch, *sch], chars) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ec49831715..566086a3be 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -7596,6 +7596,39 @@ fn test_placeholder() { ); } +#[test] +fn test_python_dbapi_placeholders() { + // %s positional placeholder + let sql = "SELECT * FROM foo WHERE id = %s"; + let ast = verified_only_select(sql); + assert_eq!( + ast.selection, + Some( + Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("id").empty_span())), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(Value::Placeholder("%s".into()))), + } + .empty_span() + ) + ); + + // %(name)s named placeholder + let sql = "SELECT * FROM foo WHERE id = %(id_param)s"; + let ast = verified_only_select(sql); + assert_eq!( + ast.selection, + Some( + Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("id").empty_span())), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value(Value::Placeholder("%(id_param)s".into()))), + } + .empty_span() + ) + ); +} + #[test] fn all_keywords_sorted() { // assert!(ALL_KEYWORDS.is_sorted()) From b2511c7330d6a7edaef26ce120ef547cf78f7cfe Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 07:46:17 +0100 Subject: [PATCH 63/81] redshift: support >> and << bitwise shift operators Add RedshiftSqlDialect to dialect gates for >> and << operators, previously only enabled for PostgreSQL, DuckDB, and GenericDialect. --- src/parser/mod.rs | 6 +++--- tests/sqlparser_redshift.rs | 9 +++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 098290ccbd..04933c709f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2407,7 +2407,7 @@ impl<'a> Parser<'a> { Token::Neq => Some(BinaryOperator::NotEq), Token::Gt => { // Two consecutive > tokens form >> (bitwise shift right) for PostgreSQL/DuckDB - if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) + if dialect_of!(self is PostgreSqlDialect | RedshiftSqlDialect | DuckDbDialect | GenericDialect) && matches!(self.peek_token().token, Token::Gt) { self.next_token(); // consume the second > @@ -2442,7 +2442,7 @@ impl<'a> Parser<'a> { Token::DuckIntDiv if dialect_of!(self is DuckDbDialect | GenericDialect) => { Some(BinaryOperator::DuckIntegerDivide) } - Token::ShiftLeft if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => { + Token::ShiftLeft if dialect_of!(self is PostgreSqlDialect | RedshiftSqlDialect | DuckDbDialect | GenericDialect) => { Some(BinaryOperator::PGBitwiseShiftLeft) } Token::Sharp if dialect_of!(self is PostgreSqlDialect) => { @@ -3027,7 +3027,7 @@ impl<'a> Parser<'a> { | Token::Spaceship => Ok(20), Token::Gt => { // Two consecutive > tokens form >> (shift right) at higher precedence - if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) + if dialect_of!(self is PostgreSqlDialect | RedshiftSqlDialect | DuckDbDialect | GenericDialect) && matches!(self.peek_nth_token(1).token, Token::Gt) { Ok(22) diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 82010a0609..a07c59d143 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -551,3 +551,12 @@ fn test_approximate_count() { _ => panic!("Expected Function"), } } + +#[test] +fn test_redshift_bitwise_shift_operators() { + // Redshift supports >> (right shift) and << (left shift) like PostgreSQL + redshift().verified_stmt("SELECT a >> 16"); + redshift().verified_stmt("SELECT a << 4"); + redshift().verified_stmt("SELECT (col - 4) >> 16"); + redshift().verified_stmt("SELECT (col - 4) & 65535"); +} From d513e73808f2d64c654770360465f006ff994c61 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 08:06:18 +0100 Subject: [PATCH 64/81] multi-dialect: fix "Expected literal string" parse errors Fix 16 corpus failures: BigQuery triple-quoted raw strings, ClickHouse {name:Type} query parameters, Databricks {:name} named parameters, dollar-quoted strings in function definitions, and RawStringLiteral in parse_literal_string. --- src/parser/mod.rs | 40 ++++++++++++++- src/tokenizer.rs | 95 ++++++++++++++++++++++++++++++++--- tests/sqlparser_bigquery.rs | 53 +++++++++++++++++++ tests/sqlparser_clickhouse.rs | 7 +++ tests/sqlparser_databricks.rs | 27 ++++++++++ tests/sqlparser_postgres.rs | 6 +++ tests/sqlparser_snowflake.rs | 15 ++++++ 7 files changed, 236 insertions(+), 7 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 04933c709f..9f9b7256cf 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7175,6 +7175,42 @@ impl<'a> Parser<'a> { if self.consume_token(&Token::RBrace) { return Ok(Value::ObjectConstant(vec![])); } + + // ClickHouse query parameters: {name: Type} + if dialect_of!(self is ClickHouseDialect | GenericDialect) { + let idx = self.index; + if let Ok(name) = self.parse_identifier(false) { + if self.consume_token(&Token::Colon) { + if let Ok(type_name) = self.parse_identifier(false) { + if self.consume_token(&Token::RBrace) { + return Ok(Value::Placeholder(format!( + "{{{}: {}}}", + name.unwrap().value, + type_name.unwrap().value + ))); + } + } + } + } + self.index = idx; + } + + // Databricks named parameters: {:name} + if dialect_of!(self is DatabricksDialect | GenericDialect) { + let idx = self.index; + if self.consume_token(&Token::Colon) { + if let Ok(name) = self.parse_identifier(false) { + if self.consume_token(&Token::RBrace) { + return Ok(Value::Placeholder(format!( + "{{:{}}}", + name.unwrap().value + ))); + } + } + } + self.index = idx; + } + let mut fields = vec![]; loop { let key = self.parse_literal_string()?; @@ -7325,7 +7361,7 @@ impl<'a> Parser<'a> { pub fn parse_function_definition(&mut self) -> Result { let peek_token = self.peek_token(); match peek_token.token { - Token::DollarQuotedString(value) if dialect_of!(self is PostgreSqlDialect) => { + Token::DollarQuotedString(value) => { self.next_token(); Ok(FunctionDefinition::DoubleDollarDef(value.value)) } @@ -7351,6 +7387,8 @@ impl<'a> Parser<'a> { Token::EscapedStringLiteral(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect | DuckDbDialect | RedshiftSqlDialect) => { Ok(s) } + Token::RawStringLiteral(s) => Ok(s), + Token::DollarQuotedString(s) => Ok(s.value), _ => self.expected("literal string", next_token), } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index a2620f8e79..9316eef470 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -743,12 +743,8 @@ impl<'a> Tokenizer<'a> { { chars.next(); // consume match chars.peek() { - Some('\'') => { - let s = self.tokenize_quoted_string(chars, '\'')?; - Ok(Some(Token::RawStringLiteral(s))) - } - Some('\"') => { - let s = self.tokenize_quoted_string(chars, '\"')?; + Some("e @ '\'') | Some("e @ '\"') => { + let s = self.tokenize_possibly_triple_quoted_string(chars, quote)?; Ok(Some(Token::RawStringLiteral(s))) } _ => { @@ -1469,6 +1465,93 @@ impl<'a> Tokenizer<'a> { self.tokenizer_error(error_loc, "Unterminated string literal") } + /// Try to tokenize a triple-quoted string ("""...""" or '''...'''), falling + /// back to a regular quoted string if not triple-quoted. + fn tokenize_possibly_triple_quoted_string( + &self, + chars: &mut State, + quote_style: char, + ) -> Result { + let error_loc = chars.location(); + + // Consume the first quote + chars.next(); + + // Check for triple-quote: we need two more quotes + if chars.peek() == Some("e_style) { + // Consume second quote + chars.next(); + if chars.peek() == Some("e_style) { + // Consume third quote - this is a triple-quoted string + chars.next(); + return self.tokenize_triple_quoted_string(chars, quote_style, error_loc); + } else { + // Just two quotes: "" or '' - this is an empty string + return Ok(String::new()); + } + } + + // Not triple-quoted, tokenize as a regular quoted string + // (opening quote already consumed, so we manually do the loop) + let mut s = String::new(); + while let Some(&ch) = chars.peek() { + match ch { + c if c == quote_style => { + chars.next(); // consume + if chars.peek().map(|c| *c == quote_style).unwrap_or(false) { + s.push(ch); + if !self.unescape { + s.push(ch); + } + chars.next(); + } else { + return Ok(s); + } + } + _ => { + chars.next(); // consume + s.push(ch); + } + } + } + self.tokenizer_error(error_loc, "Unterminated string literal") + } + + /// Read a triple-quoted string (the opening """ or ''' has already been consumed). + /// The string ends when we encounter the matching closing triple-quote. + fn tokenize_triple_quoted_string( + &self, + chars: &mut State, + quote_style: char, + error_loc: Location, + ) -> Result { + let mut s = String::new(); + + while let Some(&ch) = chars.peek() { + if ch == quote_style { + chars.next(); // consume first quote + if chars.peek() == Some("e_style) { + chars.next(); // consume second quote + if chars.peek() == Some("e_style) { + chars.next(); // consume third quote - end of string + return Ok(s); + } else { + // Only two quotes in a row - they are part of the content + s.push(quote_style); + s.push(quote_style); + } + } else { + // Only one quote - it's part of the content + s.push(quote_style); + } + } else { + chars.next(); + s.push(ch); + } + } + self.tokenizer_error(error_loc, "Unterminated triple-quoted string literal") + } + fn tokenize_multiline_comment( &self, chars: &mut State, diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 4305eebca7..3a1c663bf2 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -87,6 +87,59 @@ fn parse_raw_literal() { panic!("invalid query") } +#[test] +fn parse_raw_literal_triple_quoted() { + // Simple triple-quoted raw string + let sql = "SELECT R\"\"\"hello world\"\"\""; + let stmt = bigquery_unescaped().one_statement_parses_to( + sql, + "SELECT R'hello world'", + ); + if let Statement::Query(query) = stmt { + if let SetExpr::Select(select) = *query.body { + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Value(Value::RawStringLiteral("hello world".to_string())), + expr_from_projection(&select.projection[0]) + ); + } else { + panic!("invalid query body") + } + } else { + panic!("invalid statement") + } + + // Triple-quoted raw string with embedded quotes and newlines + let sql = "SELECT R\"\"\"\n return x*y;\n\"\"\""; + let stmt = bigquery_unescaped().one_statement_parses_to( + sql, + "SELECT R'\n return x*y;\n'", + ); + if let Statement::Query(query) = stmt { + if let SetExpr::Select(select) = *query.body { + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Value(Value::RawStringLiteral("\n return x*y;\n".to_string())), + expr_from_projection(&select.projection[0]) + ); + } else { + panic!("invalid query body") + } + } else { + panic!("invalid statement") + } +} + +#[test] +fn parse_create_function_with_triple_quoted_raw_string() { + // BigQuery CREATE TEMP FUNCTION with r"""...""" body (JS UDF) + let sql = "CREATE TEMP FUNCTION multiplyInputs(x FLOAT64, y FLOAT64) RETURNS FLOAT64 LANGUAGE js AS r\"\"\"\n return x*y;\n\"\"\""; + bigquery_unescaped().one_statement_parses_to( + sql, + "CREATE TEMPORARY FUNCTION multiplyInputs(x FLOAT64, y FLOAT64) RETURNS FLOAT64 LANGUAGE js AS '\n return x*y;\n'", + ); +} + #[test] fn parse_nested_data_types() { let sql = "CREATE TABLE table (x STRUCT, b BYTES(42)>, y ARRAY>)"; diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 2c7c3b7e0b..344ef7c7c0 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -1593,6 +1593,13 @@ fn parse_columns_with_apply_transformers() { } } +#[test] +fn test_query_parameters() { + // ClickHouse query parameters: {name: Type} + let sql = "SELECT * FROM users WHERE id = {id: UInt32} AND name = {name: String}"; + clickhouse_and_generic().one_statement_parses_to(sql, sql); +} + fn clickhouse_and_generic() -> TestedDialects { TestedDialects { dialects: vec![Box::new(ClickHouseDialect {}), Box::new(GenericDialect {})], diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index c4b961f999..d502b37ede 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -326,3 +326,30 @@ fn test_try_cast_operator() { // Chaining with regular cast databricks().one_statement_parses_to("SELECT col?::VARCHAR", "SELECT TRY_CAST(col AS VARCHAR)"); } + +#[test] +fn test_named_parameter_placeholder() { + // Databricks {:name} parameter syntax + let sql = "SELECT * FROM tbl WHERE col = {:store} LIMIT 10"; + databricks_and_generic().one_statement_parses_to(sql, sql); +} + +#[test] +fn test_create_function_dollar_quoted() { + // Databricks CREATE FUNCTION with $$...$$ body + let sql = "CREATE FUNCTION add_one(x INT) RETURNS INT LANGUAGE PYTHON AS $$def add_one(x):\n return x+1$$"; + databricks().one_statement_parses_to( + sql, + "CREATE FUNCTION add_one(x INT) RETURNS INT LANGUAGE PYTHON AS $$def add_one(x):\n return x+1$$", + ); +} + +#[test] +fn test_create_function_tagged_dollar_quoted() { + // Databricks CREATE FUNCTION with $TAG$...$TAG$ body + let sql = "CREATE FUNCTION add_one(x INT) RETURNS INT LANGUAGE PYTHON AS $FOO$def add_one(x):\n return x+1$FOO$"; + databricks().one_statement_parses_to( + sql, + "CREATE FUNCTION add_one(x INT) RETURNS INT LANGUAGE PYTHON AS $$def add_one(x):\n return x+1$$", + ); +} diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 19467ff346..36bf2233be 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2564,6 +2564,12 @@ fn parse_comments() { _ => unreachable!(), } + // Dollar-quoted comment + pg().one_statement_parses_to( + "COMMENT ON TABLE mytable IS $$doc this$$", + "COMMENT ON TABLE mytable IS 'doc this'", + ); + match pg().verified_stmt("COMMENT IF EXISTS ON TABLE public.tab IS NULL") { Statement::Comment { object_type, diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index a317dfda8e..9fb3bd3fd9 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1971,3 +1971,18 @@ fn test_placeholder_field_access() { // Multi-level field access snowflake().verified_stmt("SELECT $1.elem.sub"); } + +#[test] +fn test_create_function_dollar_quoted() { + // Snowflake CREATE FUNCTION with $$...$$ body (extra clauses like RUNTIME_VERSION are consumed but not serialized) + snowflake().one_statement_parses_to( + "CREATE OR REPLACE FUNCTION py_udf() RETURNS VARIANT LANGUAGE PYTHON RUNTIME_VERSION = '3.10' HANDLER = 'udf' AS $$\nimport numpy as np\ndef udf():\n return [np.__version__]\n$$", + "CREATE OR REPLACE FUNCTION py_udf RETURNS VARIANT LANGUAGE PYTHON AS $$\nimport numpy as np\ndef udf():\n return [np.__version__]\n$$", + ); + + // Simple function with $$...$$ body (no extra clauses) + snowflake().one_statement_parses_to( + "CREATE FUNCTION echo_varchar(x VARCHAR) RETURNS VARCHAR LANGUAGE SCALA AS $$\n class Echo {\n def echoVarchar(x : String): String = {\n return x\n }\n }\n $$", + "CREATE FUNCTION echo_varchar(x VARCHAR) RETURNS VARCHAR LANGUAGE SCALA AS $$\n class Echo {\n def echoVarchar(x : String): String = {\n return x\n }\n }\n $$", + ); +} From 76bf5d45b1e08a2075a3b1c00cda2cff59c1e8b7 Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 08:33:52 +0100 Subject: [PATCH 65/81] multi-dialect: fix "Expected end of statement, found: WITH" errors Add GROUP BY WITH CUBE/ROLLUP/TOTALS, WITH ORDINALITY on table functions, ALTER TABLE ADD/DROP PROJECTION (ClickHouse), CREATE SEQUENCE WITH (Snowflake), and CREATE SCHEMA WITH properties (Trino). --- src/ast/ddl.rs | 44 +++++++ src/ast/mod.rs | 17 ++- src/ast/query.rs | 77 ++++++++++-- src/keywords.rs | 2 + src/parser/mod.rs | 229 ++++++++++++++++++++++++---------- src/test_utils.rs | 1 + tests/sqlparser_bigquery.rs | 3 + tests/sqlparser_clickhouse.rs | 44 ++++++- tests/sqlparser_common.rs | 61 +++++++-- tests/sqlparser_duckdb.rs | 12 +- tests/sqlparser_hive.rs | 1 + tests/sqlparser_mssql.rs | 7 +- tests/sqlparser_mysql.rs | 21 ++-- tests/sqlparser_postgres.rs | 46 +++++-- tests/sqlparser_redshift.rs | 3 + tests/sqlparser_snowflake.rs | 13 +- 16 files changed, 475 insertions(+), 106 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 58c9f7e509..2ef4e84a72 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -143,6 +143,24 @@ pub enum AlterTableOperation { DropRowAccessPolicy { policy: ObjectName, }, + + /// `ADD PROJECTION [IF NOT EXISTS] ( [ORDER BY ...]) [WITH SETTINGS (...)]` /// @@ -162,10 +157,7 @@ pub enum AlterTableOperation { /// /// Note: this is a ClickHouse-specific operation /// - DropProjection { - if_exists: bool, - name: Ident, - }, + DropProjection { if_exists: bool, name: Ident }, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 26bdf0c0b1..2818a2bca6 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -45,14 +45,14 @@ pub use self::ddl::{ pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ AggregateItem, ColumnTransformer, Cte, Distinct, ExceptSelectItem, ExcludeSelectItem, Fetch, - FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, Interpolate, InterpolateExpr, Join, JoinConstraint, - JoinOperator, LateralView, LockClause, LockType, NamedWindowDefinition, NamedWindowExpr, NonBlock, Offset, - OffsetRows, OrderBy, OrderByExpr, PivotValue, PivotValueSource, Query, RenameSelectItem, - ReplaceSelectElement, ReplaceSelectItem, SamplingMethod, Select, SelectInto, SelectItem, - SelectionCount, SetExpr, SetOperator, SetQuantifier, Setting, Table, TableAlias, TableFactor, - TableSampleSeed, TableVersion, TableWithJoins, Top, UnpivotInValue, UnpivotNullHandling, - ValueTableMode, - Values, WildcardAdditionalOptions, With, WithFill, + FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, Interpolate, InterpolateExpr, + Join, JoinConstraint, JoinOperator, LateralView, LockClause, LockType, NamedWindowDefinition, + NamedWindowExpr, NonBlock, Offset, OffsetRows, OrderBy, OrderByExpr, PivotValue, + PivotValueSource, Query, RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, + SamplingMethod, Select, SelectInto, SelectItem, SelectionCount, SetExpr, SetOperator, + SetQuantifier, Setting, Table, TableAlias, TableFactor, TableSampleSeed, TableVersion, + TableWithJoins, Top, UnpivotInValue, UnpivotNullHandling, ValueTableMode, Values, + WildcardAdditionalOptions, With, WithFill, }; pub use self::value::{ escape_quoted_string, DateTimeField, DollarQuotedString, ObjectConstantKeyValue, @@ -1252,11 +1252,7 @@ impl fmt::Display for Expr { write!(f, "{set}") } Expr::TypedArray { data_type, values } => { - write!( - f, - "ARRAY<{data_type}>[{}]", - display_comma_separated(values) - ) + write!(f, "ARRAY<{data_type}>[{}]", display_comma_separated(values)) } Expr::JsonAccess { left, @@ -3168,7 +3164,11 @@ impl fmt::Display for Statement { } write!(f, "{}", display_comma_separated(projections))?; write!(f, ")")?; - } else if query.is_none() && like.is_none() && clone.is_none() && using_template.is_none() { + } else if query.is_none() + && like.is_none() + && clone.is_none() + && using_template.is_none() + { // PostgreSQL allows `CREATE TABLE t ();`, but requires empty parens write!(f, " ()")?; } @@ -3830,11 +3830,7 @@ impl fmt::Display for Statement { write!(f, " COMMENT='{c}'")?; } if !with_properties.is_empty() { - write!( - f, - " WITH ({})", - display_comma_separated(with_properties) - )?; + write!(f, " WITH ({})", display_comma_separated(with_properties))?; } Ok(()) } diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index 3cad47e92e..c002854659 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -684,7 +684,7 @@ mod tests { .unwrap(); let mut visitor = TestVisitor::default(); - s.visit(&mut visitor); + let _ = s.visit(&mut visitor); visitor.visited } diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 4c5a101f7d..4d785e7f52 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -86,15 +86,11 @@ impl Dialect for PostgreSqlDialect { parser.next_token(); // consume `-` parser.next_token(); // consume `|` parser.next_token(); // consume `-` - return Some( - parser - .parse_subexpr(50) - .map(|right| Expr::BinaryOp { - left: Box::new(expr.clone()), - op: BinaryOperator::PGAdjacentTo, - right: Box::new(right), - }), - ); + return Some(parser.parse_subexpr(50).map(|right| Expr::BinaryOp { + left: Box::new(expr.clone()), + op: BinaryOperator::PGAdjacentTo, + right: Box::new(right), + })); } } // Parse `<->` (distance operator) as two tokens: Lt, Arrow @@ -103,15 +99,11 @@ impl Dialect for PostgreSqlDialect { if tok1.token == Token::Arrow { parser.next_token(); // consume `<` parser.next_token(); // consume `->` - return Some( - parser - .parse_subexpr(50) - .map(|right| Expr::BinaryOp { - left: Box::new(expr.clone()), - op: BinaryOperator::PGDistance, - right: Box::new(right), - }), - ); + return Some(parser.parse_subexpr(50).map(|right| Expr::BinaryOp { + left: Box::new(expr.clone()), + op: BinaryOperator::PGDistance, + right: Box::new(right), + })); } } None diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e95b4484a5..52506196db 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -995,17 +995,13 @@ impl<'a> Parser<'a> { self.next_token(); Ok(Expr::Value(Value::Placeholder(s))) } - _ => { - self.expected("a number after #", self.peek_token())? - } + _ => self.expected("a number after #", self.peek_token())?, } } - Token::Tilde => { - Ok(Expr::UnaryOp { - op: UnaryOperator::PGBitwiseNot, - expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?), - }) - } + Token::Tilde => Ok(Expr::UnaryOp { + op: UnaryOperator::PGBitwiseNot, + expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?), + }), tok @ Token::DoubleExclamationMark | tok @ Token::PGSquareRoot | tok @ Token::PGCubeRoot @@ -1314,9 +1310,7 @@ impl<'a> Parser<'a> { /// Parse optional WITH modifiers after GROUP BY expressions: /// WITH CUBE, WITH ROLLUP, WITH TOTALS - fn parse_group_by_with_modifiers( - &mut self, - ) -> Result, ParserError> { + fn parse_group_by_with_modifiers(&mut self) -> Result, ParserError> { let mut modifiers = vec![]; loop { if self.parse_keywords(&[Keyword::WITH, Keyword::CUBE]) { @@ -2198,9 +2192,7 @@ impl<'a> Parser<'a> { let values = if self.peek_token_kind() == &Token::RParen { vec![] } else { - self.parse_comma_separated(|parser| { - parser.parse_struct_field_expr(!fields.is_empty()) - })? + self.parse_comma_separated(|parser| parser.parse_struct_field_expr(!fields.is_empty()))? }; self.expect_token(&Token::RParen)?; @@ -3034,8 +3026,12 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC), Token::Word(w) if w.keyword == Keyword::GLOB => Ok(Self::LIKE_PREC), - Token::Word(w) if w.keyword == Keyword::OVERLAPS - && dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => Ok(Self::BETWEEN_PREC), + Token::Word(w) + if w.keyword == Keyword::OVERLAPS + && dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => + { + Ok(Self::BETWEEN_PREC) + } Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(Self::BETWEEN_PREC), Token::Word(w) if w.keyword == Keyword::DIV => Ok(Self::MUL_DIV_MOD_OP_PREC), Token::Eq @@ -6100,8 +6096,7 @@ impl<'a> Parser<'a> { let seed = Some(self.parse_expr()?); self.expect_keyword(Keyword::INCREMENT)?; let increment = Some(self.parse_expr()?); - let _ = - self.parse_keyword(Keyword::ORDER) || self.parse_keyword(Keyword::NOORDER); + let _ = self.parse_keyword(Keyword::ORDER) || self.parse_keyword(Keyword::NOORDER); Ok(Some(ColumnOption::Identity { seed, increment })) } else { let (seed, increment) = if self.consume_token(&Token::LParen) { @@ -6114,8 +6109,8 @@ impl<'a> Parser<'a> { (None, None) }; if dialect_of!(self is SnowflakeDialect | GenericDialect) { - let _ = self.parse_keyword(Keyword::ORDER) - || self.parse_keyword(Keyword::NOORDER); + let _ = + self.parse_keyword(Keyword::ORDER) || self.parse_keyword(Keyword::NOORDER); } Ok(Some(ColumnOption::Identity { seed, increment })) } @@ -6522,8 +6517,8 @@ impl<'a> Parser<'a> { let policy = self.parse_object_name(false)?; self.expect_keyword(Keyword::ON)?; self.expect_token(&Token::LParen)?; - let columns = - self.parse_comma_separated(|p| p.parse_identifier(false).map(|id| id.unwrap()))?; + let columns = self + .parse_comma_separated(|p| p.parse_identifier(false).map(|id| id.unwrap()))?; self.expect_token(&Token::RParen)?; AlterTableOperation::AddRowAccessPolicy { policy, @@ -8302,9 +8297,7 @@ impl<'a> Parser<'a> { pub fn parse_identifier_or_number(&mut self) -> Result, ParserError> { let next_token = self.next_token(); match next_token.token { - Token::Number(ref n, _) => { - Ok(Ident::new(n.clone()).spanning(next_token.span)) - } + Token::Number(ref n, _) => Ok(Ident::new(n.clone()).spanning(next_token.span)), _ => { self.prev_token(); self.parse_identifier(false) @@ -8718,12 +8711,11 @@ impl<'a> Parser<'a> { // ClickHouse: EXPLAIN [type] [setting = value, ...] statement // Only for EXPLAIN, not DESCRIBE/DESC (describe_alias=true means DESC/DESCRIBE) - let (explain_type, options) = - if !describe_alias && dialect_of!(self is ClickHouseDialect) { - self.parse_explain_options()? - } else { - (None, vec![]) - }; + let (explain_type, options) = if !describe_alias && dialect_of!(self is ClickHouseDialect) { + self.parse_explain_options()? + } else { + (None, vec![]) + }; match self.maybe_parse(|parser| parser.parse_statement()) { Some(Statement::Explain { .. }) | Some(Statement::ExplainTable { .. }) => Err( @@ -8771,25 +8763,26 @@ impl<'a> Parser<'a> { // DESCRIBE FUNCTION name(type1, type2, ...) - parse function signature // Supports TABLE(type, ...) parameter types for DMFs - let function_params = - if object_type_kw == Some(Keyword::FUNCTION) && self.peek_token().token == Token::LParen { - self.expect_token(&Token::LParen)?; - let params = self.parse_comma_separated(|p| { - // Handle TABLE(type, type, ...) parameter type notation - if p.parse_keyword(Keyword::TABLE) { - p.expect_token(&Token::LParen)?; - let inner_types = p.parse_comma_separated(|p2| p2.parse_data_type())?; - p.expect_token(&Token::RParen)?; - Ok(DescribeFunctionParam::Table(inner_types)) - } else { - Ok(DescribeFunctionParam::DataType(p.parse_data_type()?)) - } - })?; - self.expect_token(&Token::RParen)?; - Some(params) - } else { - None - }; + let function_params = if object_type_kw == Some(Keyword::FUNCTION) + && self.peek_token().token == Token::LParen + { + self.expect_token(&Token::LParen)?; + let params = self.parse_comma_separated(|p| { + // Handle TABLE(type, type, ...) parameter type notation + if p.parse_keyword(Keyword::TABLE) { + p.expect_token(&Token::LParen)?; + let inner_types = p.parse_comma_separated(|p2| p2.parse_data_type())?; + p.expect_token(&Token::RParen)?; + Ok(DescribeFunctionParam::Table(inner_types)) + } else { + Ok(DescribeFunctionParam::DataType(p.parse_data_type()?)) + } + })?; + self.expect_token(&Token::RParen)?; + Some(params) + } else { + None + }; // Snowflake: DESCRIBE TABLE tab type=stage // Parse key=value options (identifier followed by = and value) @@ -9772,16 +9765,15 @@ impl<'a> Parser<'a> { extended: bool, full: bool, ) -> Result { - let show_in = - self.expect_one_of_keywords(&[Keyword::FROM, Keyword::IN])? == Keyword::IN; + let show_in = self.expect_one_of_keywords(&[Keyword::FROM, Keyword::IN])? == Keyword::IN; // Optionally consume TABLE or VIEW keyword (Snowflake: SHOW COLUMNS IN TABLE ) // Only if followed by an identifier (the table name), otherwise it IS the table name let show_object_kind = if let Some(kw) = self.parse_one_of_keywords(&[Keyword::TABLE, Keyword::VIEW]) { match self.peek_token_ref().token { - Token::Word(_) | Token::SingleQuotedString(_) | Token::DoubleQuotedString(_) => { - Some(Ident::new(format!("{kw:?}"))) - } + Token::Word(_) + | Token::SingleQuotedString(_) + | Token::DoubleQuotedString(_) => Some(Ident::new(format!("{kw:?}"))), _ => { self.prev_token(); None @@ -10154,8 +10146,7 @@ impl<'a> Parser<'a> { } else { None }; - let with_ordinality = - self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); + let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; Ok(TableFactor::Function { lateral: true, @@ -10361,8 +10352,7 @@ impl<'a> Parser<'a> { let array_exprs = self.parse_comma_separated(Parser::parse_expr)?; self.expect_token(&Token::RParen)?; - let with_ordinality = - self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); + let with_ordinality = self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); let alias = match self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS) { Ok(Some(alias)) => Some(alias), @@ -10453,8 +10443,8 @@ impl<'a> Parser<'a> { }; // PostgreSQL: WITH ORDINALITY for table-valued functions - let with_ordinality = args.is_some() - && self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); + let with_ordinality = + args.is_some() && self.parse_keywords(&[Keyword::WITH, Keyword::ORDINALITY]); let alias = self.parse_optional_table_alias(keywords::RESERVED_FOR_TABLE_ALIAS)?; @@ -10712,10 +10702,16 @@ impl<'a> Parser<'a> { } else { None }; - Ok(UnpivotInValue { columns: cols, alias }) + Ok(UnpivotInValue { + columns: cols, + alias, + }) } else { let col = p.parse_identifier(false)?; - Ok(UnpivotInValue { columns: vec![col], alias: None }) + Ok(UnpivotInValue { + columns: vec![col], + alias: None, + }) } })?; self.expect_token(&Token::RParen)?; @@ -10855,16 +10851,16 @@ impl<'a> Parser<'a> { // Check if the next token is an identifier. If not (e.g. EOF, comma), // then the keyword itself is the grantee name, not a type prefix. match self.peek_token_ref().token { - Token::Word(_) | Token::SingleQuotedString(_) | Token::DoubleQuotedString(_) => { - match kw { - Keyword::ROLE => Some(GranteesType::Role), - Keyword::USER => Some(GranteesType::User), - Keyword::SHARE => Some(GranteesType::Share), - Keyword::GROUP => Some(GranteesType::Group), - Keyword::APPLICATION => Some(GranteesType::Application), - _ => None, - } - } + Token::Word(_) + | Token::SingleQuotedString(_) + | Token::DoubleQuotedString(_) => match kw { + Keyword::ROLE => Some(GranteesType::Role), + Keyword::USER => Some(GranteesType::User), + Keyword::SHARE => Some(GranteesType::Share), + Keyword::GROUP => Some(GranteesType::Group), + Keyword::APPLICATION => Some(GranteesType::Application), + _ => None, + }, _ => { // The keyword IS the grantee name; backtrack self.prev_token(); @@ -11545,7 +11541,7 @@ impl<'a> Parser<'a> { if self.consume_token(&Token::LParen) { // Standard SQL: FILTER (WHERE expr) // DuckDB also supports: FILTER (expr) without WHERE - self.parse_keyword(Keyword::WHERE); + let _ = self.parse_keyword(Keyword::WHERE); let filter = self.parse_expr()?; self.expect_token(&Token::RParen)?; // SQL standard: FILTER comes before OVER, e.g. @@ -11615,7 +11611,7 @@ impl<'a> Parser<'a> { if self.consume_token(&Token::LParen) { // Standard SQL: FILTER (WHERE expr) // DuckDB also supports: FILTER (expr) without WHERE - self.parse_keyword(Keyword::WHERE); + let _ = self.parse_keyword(Keyword::WHERE); let filter = self.parse_expr()?; self.expect_token(&Token::RParen)?; Expr::AggregateExpressionWithFilter { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 9316eef470..e86ac9442e 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -984,24 +984,19 @@ impl<'a> Tokenizer<'a> { // Python DB-API named placeholder: %(name)s Some('(') => { chars.next(); // consume '(' - let name = - peeking_take_while(chars, |ch| ch != ')'); + let name = peeking_take_while(chars, |ch| ch != ')'); if chars.peek() == Some(&')') { chars.next(); // consume ')' - // consume the trailing format char (e.g. 's' in %(name)s) - let fmt_char = if chars - .peek() - .map_or(false, |c| c.is_ascii_alphabetic()) - { - let c = *chars.peek().unwrap(); - chars.next(); - String::from(c) - } else { - String::new() - }; - Ok(Some(Token::Placeholder(format!( - "%({name}){fmt_char}" - )))) + // consume the trailing format char (e.g. 's' in %(name)s) + let fmt_char = + if chars.peek().map_or(false, |c| c.is_ascii_alphabetic()) { + let c = *chars.peek().unwrap(); + chars.next(); + String::from(c) + } else { + String::new() + }; + Ok(Some(Token::Placeholder(format!("%({name}){fmt_char}")))) } else { // Malformed, just return Mod and let parser deal with it Ok(Some(Token::Mod)) diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 83a712285d..f0e9e75d3a 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -91,10 +91,7 @@ fn parse_raw_literal() { fn parse_raw_literal_triple_quoted() { // Simple triple-quoted raw string let sql = "SELECT R\"\"\"hello world\"\"\""; - let stmt = bigquery_unescaped().one_statement_parses_to( - sql, - "SELECT R'hello world'", - ); + let stmt = bigquery_unescaped().one_statement_parses_to(sql, "SELECT R'hello world'"); if let Statement::Query(query) = stmt { if let SetExpr::Select(select) = *query.body { assert_eq!(1, select.projection.len()); @@ -111,10 +108,7 @@ fn parse_raw_literal_triple_quoted() { // Triple-quoted raw string with embedded quotes and newlines let sql = "SELECT R\"\"\"\n return x*y;\n\"\"\""; - let stmt = bigquery_unescaped().one_statement_parses_to( - sql, - "SELECT R'\n return x*y;\n'", - ); + let stmt = bigquery_unescaped().one_statement_parses_to(sql, "SELECT R'\n return x*y;\n'"); if let Statement::Query(query) = stmt { if let SetExpr::Select(select) = *query.body { assert_eq!(1, select.projection.len()); @@ -921,7 +915,7 @@ fn parse_table_time_travel() { Value::SingleQuotedString(version) ))), partitions: vec![], - with_ordinality: false, + with_ordinality: false, }, joins: vec![], },] @@ -1389,9 +1383,7 @@ fn test_typed_array_constructor() { bigquery().verified_expr("ARRAY[]"); // Typed array with STRUCT type - bigquery().verified_expr( - "ARRAY>[STRUCT(1), STRUCT(2)]", - ); + bigquery().verified_expr("ARRAY>[STRUCT(1), STRUCT(2)]"); // Typed array with STRUCT containing multiple fields bigquery().verified_expr( @@ -1402,14 +1394,10 @@ fn test_typed_array_constructor() { bigquery().verified_expr("ARRAY>[]"); // Typed array in SELECT context - bigquery().verified_only_select( - "SELECT ARRAY[1, 2, 3] AS arr", - ); + bigquery().verified_only_select("SELECT ARRAY[1, 2, 3] AS arr"); // Typed array in UNNEST - bigquery().verified_only_select( - "SELECT * FROM UNNEST(ARRAY>[])", - ); + bigquery().verified_only_select("SELECT * FROM UNNEST(ARRAY>[])"); } #[test] @@ -1795,25 +1783,15 @@ fn parse_bigquery_gap_fill() { #[test] fn parse_bigquery_chained_subscript_access() { // col[OFFSET(0)].field - bigquery().verified_stmt( - "SELECT col[OFFSET(0)].source_type AS x FROM t1", - ); + bigquery().verified_stmt("SELECT col[OFFSET(0)].source_type AS x FROM t1"); // col[SAFE_OFFSET(0)].field[SAFE_OFFSET(0)] - bigquery().verified_stmt( - "SELECT col[SAFE_OFFSET(0)].source_ids[SAFE_OFFSET(0)] AS x FROM t1", - ); + bigquery().verified_stmt("SELECT col[SAFE_OFFSET(0)].source_ids[SAFE_OFFSET(0)] AS x FROM t1"); // col[OFFSET(0)].field[OFFSET(0)] chained - bigquery().verified_stmt( - "SELECT col[OFFSET(0)].ids[OFFSET(0)] AS x FROM t1", - ); + bigquery().verified_stmt("SELECT col[OFFSET(0)].ids[OFFSET(0)] AS x FROM t1"); // Simple subscript still works - bigquery().verified_stmt( - "SELECT col[OFFSET(0)] AS x FROM t1", - ); + bigquery().verified_stmt("SELECT col[OFFSET(0)] AS x FROM t1"); // Subscript with field access - bigquery().verified_stmt( - "SELECT col[0].field AS x FROM t1", - ); + bigquery().verified_stmt("SELECT col[0].field AS x FROM t1"); } #[test] @@ -1868,7 +1846,5 @@ fn test_bigquery_window_named_ref() { ); // Simple case: one window referencing another - bigquery().verified_stmt( - "SELECT SUM(x) OVER (b) FROM t WINDOW a AS (ORDER BY x), b AS a", - ); + bigquery().verified_stmt("SELECT SUM(x) OVER (b) FROM t WINDOW a AS (ORDER BY x), b AS a"); } diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 3d4a0c616b..e68d8e8ad8 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -614,9 +614,7 @@ fn parse_alter_table_drop_partition_and_part() { } // DROP PARTITION with string literal (no parens) - match clickhouse_and_generic() - .verified_stmt("ALTER TABLE mt DROP PARTITION '2020-11-21'") - { + match clickhouse_and_generic().verified_stmt("ALTER TABLE mt DROP PARTITION '2020-11-21'") { Statement::AlterTable { name, operations, .. } => { @@ -642,10 +640,7 @@ fn parse_alter_table_drop_partition_and_part() { assert_eq!( operations[0], AlterTableOperation::DropPartition { - partition: Partition::Expr(Expr::Value(Value::Number( - "201901".to_string(), - false - ))), + partition: Partition::Expr(Expr::Value(number("201901"))), } ); } @@ -653,14 +648,15 @@ fn parse_alter_table_drop_partition_and_part() { } // DROP PARTITION with tuple() function call - clickhouse_and_generic().verified_stmt( - "ALTER TABLE visits DROP PARTITION tuple(toYYYYMM(toDate('2019-01-25')))", - ); + clickhouse_and_generic() + .verified_stmt("ALTER TABLE visits DROP PARTITION tuple(toYYYYMM(toDate('2019-01-25')))"); // Multiple statements: DROP PARTITION + DROP PART - clickhouse_and_generic().parse_sql_statements( - "ALTER TABLE mt DROP PARTITION '2020-11-21'; ALTER TABLE mt DROP PART 'all_4_4_0'", - ).unwrap(); + clickhouse_and_generic() + .parse_sql_statements( + "ALTER TABLE mt DROP PARTITION '2020-11-21'; ALTER TABLE mt DROP PART 'all_4_4_0'", + ) + .unwrap(); } #[test] @@ -1680,9 +1676,7 @@ fn test_query_parameters() { #[test] fn parse_alter_table_add_projection() { - clickhouse_and_generic().verified_stmt( - "ALTER TABLE t ADD PROJECTION p (SELECT x ORDER BY x)", - ); + clickhouse_and_generic().verified_stmt("ALTER TABLE t ADD PROJECTION p (SELECT x ORDER BY x)"); } #[test] @@ -1695,9 +1689,7 @@ fn parse_alter_table_add_projection_with_settings() { #[test] fn parse_alter_table_drop_projection() { - clickhouse_and_generic().verified_stmt( - "ALTER TABLE t DROP PROJECTION p", - ); + clickhouse_and_generic().verified_stmt("ALTER TABLE t DROP PROJECTION p"); } #[test] diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 1f7909c70b..d17d322c87 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -265,16 +265,17 @@ fn parse_update_set_from() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }, joins: vec![], }], lateral_views: vec![], sample: None, selection: None, - group_by: GroupByExpr::Expressions(vec![Expr::Identifier( - Ident::new("id").empty_span() - )], vec![]), + group_by: GroupByExpr::Expressions( + vec![Expr::Identifier(Ident::new("id").empty_span())], + vec![] + ), cluster_by: vec![], distribute_by: vec![], sort_by: vec![], @@ -336,7 +337,7 @@ fn parse_update_with_table_alias() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }, joins: vec![], }, @@ -407,7 +408,7 @@ fn parse_select_with_table_alias() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }, joins: vec![], }] @@ -635,7 +636,7 @@ fn parse_where_delete_with_alias_statement() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }, joins: vec![], }]), @@ -2006,10 +2007,13 @@ fn parse_select_group_by() { let sql = "SELECT id, fname, lname FROM customer GROUP BY lname, fname"; let select = verified_only_select(sql); assert_eq!( - GroupByExpr::Expressions(vec![ - Expr::Identifier(Ident::new("lname").empty_span()), - Expr::Identifier(Ident::new("fname").empty_span()), - ], vec![]), + GroupByExpr::Expressions( + vec![ + Expr::Identifier(Ident::new("lname").empty_span()), + Expr::Identifier(Ident::new("fname").empty_span()), + ], + vec![] + ), select.group_by ); @@ -4124,7 +4128,7 @@ fn test_parse_named_window() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }, joins: vec![], }], @@ -5205,7 +5209,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -5228,7 +5232,7 @@ fn parse_implicit_join() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -5251,7 +5255,7 @@ fn parse_cross_join() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }, join_operator: JoinOperator::CrossJoin(JoinConstraint::None), }, @@ -5274,7 +5278,7 @@ fn parse_joins_on() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }, join_operator: f(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::Identifier(Ident::new("c1").empty_span())), @@ -5346,7 +5350,7 @@ fn parse_joins_using() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }, join_operator: f(JoinConstraint::Using(vec![Ident::new("c1").empty_span()])), } @@ -5410,7 +5414,7 @@ fn parse_natural_join() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }, join_operator: f(JoinConstraint::Natural), } @@ -5687,7 +5691,7 @@ fn parse_derived_tables() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::Natural), }], @@ -7100,7 +7104,12 @@ fn parse_grant() { granted_by, .. } => match (privileges, objects) { - (Privileges::Actions(actions), GrantObjects::Tables { tables: objects, .. }) => { + ( + Privileges::Actions(actions), + GrantObjects::Tables { + tables: objects, .. + }, + ) => { assert_eq!( vec![ Action::Select { columns: None }, @@ -7315,7 +7324,7 @@ fn parse_merge() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }, joins: vec![], }], @@ -8288,7 +8297,7 @@ fn parse_pivot_table() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }), aggregates: vec![AggregateItem::UnnamedExpr(Expr::Function(Function { name: ObjectName(vec![Ident::new("SUM")]), @@ -8376,7 +8385,7 @@ fn parse_pivot_table_aliases() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }), aggregates: vec![ AggregateItem::ExprWithAlias { @@ -8522,14 +8531,17 @@ fn parse_unpivot_table() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }), value: vec![Ident::new("quantity").empty_span()], name: Ident::new("quarter").empty_span(), columns: ["Q1", "Q2", "Q3", "Q4"] .into_iter() - .map(|f| UnpivotInValue { columns: vec![Ident::new(f).empty_span()], alias: None }) + .map(|f| UnpivotInValue { + columns: vec![Ident::new(f).empty_span()], + alias: None + }) .collect(), null_handling: None, alias: Some(TableAlias { @@ -8612,7 +8624,10 @@ fn parse_pivot_unpivot_table() { name: Ident::new("year").empty_span(), columns: ["population_2000", "population_2010"] .into_iter() - .map(|f| UnpivotInValue { columns: vec![Ident::new(f).empty_span()], alias: None }) + .map(|f| UnpivotInValue { + columns: vec![Ident::new(f).empty_span()], + alias: None + }) .collect(), null_handling: None, alias: Some(TableAlias { @@ -8844,7 +8859,7 @@ fn parse_unload() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }, joins: vec![], }], diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index a6fcc2920f..dd2ebbf695 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -308,8 +308,14 @@ fn test_select_union_by_name() { fn test_numeric_literal_underscores() { // Underscores in numeric literals are stripped during tokenization duckdb_and_generic().one_statement_parses_to("SELECT 1_000_000", "SELECT 1000000"); + #[cfg(not(feature = "bigdecimal"))] duckdb_and_generic() .one_statement_parses_to("SELECT 1_2E+1_0::FLOAT", "SELECT CAST(12E+10 AS FLOAT)"); + #[cfg(feature = "bigdecimal")] + duckdb_and_generic().one_statement_parses_to( + "SELECT 1_2E+1_0::FLOAT", + "SELECT CAST(120000000000 AS FLOAT)", + ); duckdb_and_generic().one_statement_parses_to("SELECT 1_000.50_0", "SELECT 1000.500"); } @@ -437,8 +443,5 @@ fn test_escaped_string_literal() { duckdb().verified_only_select(r"SELECT E'\t'"); duckdb().verified_only_select(r"SELECT E'\n'"); - duckdb().one_statement_parses_to( - r"SELECT e'\n'", - r"SELECT E'\n'", - ); + duckdb().one_statement_parses_to(r"SELECT e'\n'", r"SELECT E'\n'"); } diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 616fb00345..b206595a1d 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -59,7 +59,7 @@ fn parse_table_time_travel() { Value::SingleQuotedString(version) ))), partitions: vec![], - with_ordinality: false, + with_ordinality: false, }, joins: vec![] },] @@ -560,7 +560,7 @@ fn parse_substring_in_select() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }, joins: vec![] }], diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 93fbc25404..c9708d7943 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1389,7 +1389,7 @@ fn parse_select_with_numeric_prefix_column_name() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }, joins: vec![] }], @@ -1447,7 +1447,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }, joins: vec![] }], @@ -1511,7 +1511,7 @@ fn parse_update_with_joins() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }, joins: vec![Join { relation: TableFactor::Table { @@ -1524,7 +1524,7 @@ fn parse_update_with_joins() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }, join_operator: JoinOperator::Inner(JoinConstraint::On(Expr::BinaryOp { left: Box::new(Expr::CompoundIdentifier( @@ -1635,7 +1635,7 @@ fn parse_substring_in_select() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }, joins: vec![] }], diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 9119426741..1e5c8c7b01 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -619,9 +619,7 @@ fn parse_alter_table_set_storage_parameters() { "ALTER TABLE t1 SET (fillfactor = 5, autovacuum_enabled = TRUE)", "ALTER TABLE t1 SET (fillfactor = 5, autovacuum_enabled = true)", ); - pg().verified_stmt( - "ALTER TABLE t1 SET (autovacuum_vacuum_scale_factor = 0.01)", - ); + pg().verified_stmt("ALTER TABLE t1 SET (autovacuum_vacuum_scale_factor = 0.01)"); } #[test] @@ -3671,14 +3669,17 @@ fn parse_select_group_by_grouping_sets() { "SELECT brand, size, sum(sales) FROM items_sold GROUP BY size, GROUPING SETS ((brand), (size), ())" ); assert_eq!( - GroupByExpr::Expressions(vec![ - Expr::Identifier(Ident::new("size").empty_span()), - Expr::GroupingSets(vec![ - vec![Expr::Identifier(Ident::new("brand").empty_span())], - vec![Expr::Identifier(Ident::new("size").empty_span())], - vec![], - ]), - ], vec![]), + GroupByExpr::Expressions( + vec![ + Expr::Identifier(Ident::new("size").empty_span()), + Expr::GroupingSets(vec![ + vec![Expr::Identifier(Ident::new("brand").empty_span())], + vec![Expr::Identifier(Ident::new("size").empty_span())], + vec![], + ]), + ], + vec![] + ), select.group_by ); } @@ -3689,13 +3690,16 @@ fn parse_select_group_by_rollup() { "SELECT brand, size, sum(sales) FROM items_sold GROUP BY size, ROLLUP (brand, size)", ); assert_eq!( - GroupByExpr::Expressions(vec![ - Expr::Identifier(Ident::new("size").empty_span()), - Expr::Rollup(vec![ - vec![Expr::Identifier(Ident::new("brand").empty_span())], - vec![Expr::Identifier(Ident::new("size").empty_span())], - ]), - ], vec![]), + GroupByExpr::Expressions( + vec![ + Expr::Identifier(Ident::new("size").empty_span()), + Expr::Rollup(vec![ + vec![Expr::Identifier(Ident::new("brand").empty_span())], + vec![Expr::Identifier(Ident::new("size").empty_span())], + ]), + ], + vec![] + ), select.group_by ); } @@ -3706,22 +3710,24 @@ fn parse_select_group_by_cube() { "SELECT brand, size, sum(sales) FROM items_sold GROUP BY size, CUBE (brand, size)", ); assert_eq!( - GroupByExpr::Expressions(vec![ - Expr::Identifier(Ident::new("size").empty_span()), - Expr::Cube(vec![ - vec![Expr::Identifier(Ident::new("brand").empty_span())], - vec![Expr::Identifier(Ident::new("size").empty_span())], - ]), - ], vec![]), + GroupByExpr::Expressions( + vec![ + Expr::Identifier(Ident::new("size").empty_span()), + Expr::Cube(vec![ + vec![Expr::Identifier(Ident::new("brand").empty_span())], + vec![Expr::Identifier(Ident::new("size").empty_span())], + ]), + ], + vec![] + ), select.group_by ); } #[test] fn parse_with_ordinality() { - pg_and_generic().verified_stmt( - "SELECT * FROM JSON_ARRAY_ELEMENTS('[1,true, [2,false]]') WITH ORDINALITY", - ); + pg_and_generic() + .verified_stmt("SELECT * FROM JSON_ARRAY_ELEMENTS('[1,true, [2,false]]') WITH ORDINALITY"); pg_and_generic().verified_stmt( "SELECT * FROM JSON_ARRAY_ELEMENTS('[1,true, [2,false]]') WITH ORDINALITY AS kv_json", ); @@ -3741,9 +3747,7 @@ fn parse_lateral_with_ordinality() { #[test] fn parse_unnest_with_ordinality() { - pg_and_generic().verified_stmt( - "SELECT * FROM UNNEST(x) WITH ORDINALITY", - ); + pg_and_generic().verified_stmt("SELECT * FROM UNNEST(x) WITH ORDINALITY"); } #[test] diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 9bec232ef1..63513f03f4 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -54,7 +54,7 @@ fn test_square_brackets_over_db_schema_table_name() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }, joins: vec![], } @@ -107,7 +107,7 @@ fn test_double_quotes_over_db_schema_table_name() { with_hints: vec![], version: None, partitions: vec![], - with_ordinality: false, + with_ordinality: false, }, joins: vec![], } @@ -497,9 +497,8 @@ fn test_grant_with_group_grantee() { redshift().verified_stmt("GRANT ALL ON SCHEMA qa_tickit TO GROUP qa_users"); // GRANT ... TO multiple GROUP grantees with TABLE keyword - redshift().verified_stmt( - "GRANT ALL ON TABLE qa_tickit.sales TO GROUP qa_users, GROUP ro_users", - ); + redshift() + .verified_stmt("GRANT ALL ON TABLE qa_tickit.sales TO GROUP qa_users, GROUP ro_users"); // GRANT with column-level privileges redshift().verified_stmt( diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 55c91764b1..43bfc326dc 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -616,7 +616,10 @@ fn test_alter_table_row_access_policy() { match &operations[1] { AlterTableOperation::AddRowAccessPolicy { policy, on } => { assert_eq!("rap_t1_version_2", policy.to_string()); - assert_eq!(vec!["empl_id"], on.iter().map(|i| i.to_string()).collect::>()); + assert_eq!( + vec!["empl_id"], + on.iter().map(|i| i.to_string()).collect::>() + ); } _ => unreachable!(), } @@ -1259,10 +1262,7 @@ fn test_snowflake_stage_with_quoted_identifiers() { let select = snowflake().verified_only_select(sql); match &select.from[0].relation { TableFactor::Table { name, .. } => { - assert_eq!( - name.0, - vec![Ident::new(r#"@"myschema"."mystage"/file.gz"#)] - ); + assert_eq!(name.0, vec![Ident::new(r#"@"myschema"."mystage"/file.gz"#)]); } _ => unreachable!(), } @@ -1683,10 +1683,8 @@ fn test_describe_object_types() { // DESCRIBE WAREHOUSE snowflake().verified_stmt("DESCRIBE WAREHOUSE temporary_warehouse"); // DESC SEQUENCE (DESC is alias for DESCRIBE) - snowflake().one_statement_parses_to( - "DESC SEQUENCE my_sequence", - "DESCRIBE SEQUENCE my_sequence", - ); + snowflake() + .one_statement_parses_to("DESC SEQUENCE my_sequence", "DESCRIBE SEQUENCE my_sequence"); // DESC STREAM (DESC is alias for DESCRIBE) snowflake().one_statement_parses_to("DESC STREAM mystream", "DESCRIBE STREAM mystream"); // DESCRIBE VIEW @@ -2021,10 +2019,7 @@ fn test_set_tuple_assignment() { "SET (V1, V2) = (10, 'example')", "SET (V1, V2) = (10, 'example')", ); - snowflake().one_statement_parses_to( - "SET (min, max) = (40, 70)", - "SET (min, max) = (40, 70)", - ); + snowflake().one_statement_parses_to("SET (min, max) = (40, 70)", "SET (min, max) = (40, 70)"); // With session variable references snowflake().one_statement_parses_to( "SET (min, max) = (50, 2 * $min)", From 6232d3137398dc928103f979ae084a3cd9bbdeca Mon Sep 17 00:00:00 2001 From: Lukasz Stefaniak Date: Tue, 17 Feb 2026 12:01:26 +0100 Subject: [PATCH 81/81] ci: set RUST_MIN_STACK for recursion limit tests The deeply nested parsing tests need 8MB stack to avoid SIGABRT. --- .github/workflows/test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 672bb22c1f..96b9d2e410 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -25,3 +25,5 @@ jobs: - uses: taiki-e/install-action@nextest - name: Run tests run: cargo nextest run --all-features + env: + RUST_MIN_STACK: 8388608