Skip to content

Commit 27b0b08

Browse files
committed
fix: resolve all corpus test regressions (14 fixes)
Fix 14 corpus regressions introduced by parser-fixes branch: 1. ClickHouse DESCRIBE (3 files): parse_explain_options was consuming the table name as explain_type for DESC/DESCRIBE. Now only runs for EXPLAIN. 2. ClickHouse END keyword (1 file): trailing commas detection treated END as a reserved keyword terminator inside function args like runningConcurrency(start, end). Disable trailing_commas inside function argument parsing. 3. ANSI REVOKE FROM user (2 files): parse_grantee consumed USER as a grantee type prefix, leaving no identifier. Now checks if the next token is an identifier before treating the keyword as a type prefix. 4. BigQuery/Snowflake MODEL keyword (2 files): MODEL was consumed as a BigQuery ML function arg prefix in all dialects. Now gated to BigQuery/Generic only and checks for comma/rparen after keyword. 5. BigQuery .* EXCEPT (2 files): Token::Mul in parse_snowflake_json_path consumed * from .* wildcard syntax. Now only consumes * inside brackets [*] for array wildcard traversal. 6. BigQuery OVERLAPS (1 file): OVERLAPS as infix operator consumed the identifier in non-Postgres dialects. Now gated to PostgreSQL/DuckDB/ Generic. 7. Databricks DESCRIBE HISTORY (1 file): HISTORY keyword consumed even when followed by period (history.tbl). Now checks for period and backtracks, also gated to Databricks/Generic. 8. Redshift APPROXIMATE (1 file): APPROXIMATE consumed even when not followed by a function call. Now requires next token to be word+lparen. 9. Snowflake SHOW COLUMNS IN VIEW (1 file): VIEW consumed as object kind even at EOF. Now backtracks if not followed by an identifier.
1 parent 6833cef commit 27b0b08

File tree

1 file changed

+95
-33
lines changed

1 file changed

+95
-33
lines changed

src/parser/mod.rs

Lines changed: 95 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -911,7 +911,8 @@ impl<'a> Parser<'a> {
911911
// Redshift: APPROXIMATE COUNT(DISTINCT x)
912912
Keyword::APPROXIMATE
913913
if dialect_of!(self is RedshiftSqlDialect | GenericDialect)
914-
&& matches!(self.peek_token_kind(), Token::Word(_)) =>
914+
&& matches!(self.peek_token_kind(), Token::Word(_))
915+
&& self.peek_nth_token_ref(1).token == Token::LParen =>
915916
{
916917
// Parse the following function call and set approximate = true
917918
let expr = self.parse_prefix()?;
@@ -3033,7 +3034,8 @@ impl<'a> Parser<'a> {
30333034
Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(Self::LIKE_PREC),
30343035
Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(Self::LIKE_PREC),
30353036
Token::Word(w) if w.keyword == Keyword::GLOB => Ok(Self::LIKE_PREC),
3036-
Token::Word(w) if w.keyword == Keyword::OVERLAPS => Ok(Self::BETWEEN_PREC),
3037+
Token::Word(w) if w.keyword == Keyword::OVERLAPS
3038+
&& dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect) => Ok(Self::BETWEEN_PREC),
30373039
Token::Word(w) if w.keyword == Keyword::OPERATOR => Ok(Self::BETWEEN_PREC),
30383040
Token::Word(w) if w.keyword == Keyword::DIV => Ok(Self::MUL_DIV_MOD_OP_PREC),
30393041
Token::Eq
@@ -7393,7 +7395,9 @@ impl<'a> Parser<'a> {
73937395
buf.push('[')
73947396
}
73957397
Token::RBracket => buf.push(']'),
7396-
Token::Mul => buf.push('*'),
7398+
// Only consume * inside brackets [*] for array wildcard traversal.
7399+
// Bare * after . is a qualified wildcard (.* EXCEPT) not a JSON path.
7400+
Token::Mul if buf.ends_with('[') => buf.push('*'),
73977401
Token::Colon => buf.push(':'),
73987402
Token::DoubleQuotedString(ref s) => write!(buf, "\"{}\"", s).unwrap(),
73997403
Token::SingleQuotedString(ref s) => write!(buf, "'{}'", s).unwrap(),
@@ -8713,11 +8717,13 @@ impl<'a> Parser<'a> {
87138717
}
87148718

87158719
// ClickHouse: EXPLAIN [type] [setting = value, ...] statement
8716-
let (explain_type, options) = if dialect_of!(self is ClickHouseDialect) {
8717-
self.parse_explain_options()?
8718-
} else {
8719-
(None, vec![])
8720-
};
8720+
// Only for EXPLAIN, not DESCRIBE/DESC (describe_alias=true means DESC/DESCRIBE)
8721+
let (explain_type, options) =
8722+
if !describe_alias && dialect_of!(self is ClickHouseDialect) {
8723+
self.parse_explain_options()?
8724+
} else {
8725+
(None, vec![])
8726+
};
87218727

87228728
match self.maybe_parse(|parser| parser.parse_statement()) {
87238729
Some(Statement::Explain { .. }) | Some(Statement::ExplainTable { .. }) => Err(
@@ -8734,9 +8740,19 @@ impl<'a> Parser<'a> {
87348740
}),
87358741
_ => {
87368742
// Databricks: DESCRIBE HISTORY table_name
8737-
if describe_alias && self.parse_keyword(Keyword::HISTORY) {
8738-
let table_name = self.parse_object_name(false)?;
8739-
return Ok(Statement::DescribeHistory { table_name });
8743+
// Only match if HISTORY is followed by an identifier (not `.`)
8744+
// to avoid consuming `history.tbl` as DESCRIBE HISTORY
8745+
if describe_alias
8746+
&& dialect_of!(self is DatabricksDialect | GenericDialect)
8747+
&& self.parse_keyword(Keyword::HISTORY)
8748+
{
8749+
if matches!(self.peek_token_ref().token, Token::Period) {
8750+
// `DESCRIBE history.tbl` - HISTORY is the schema name
8751+
self.prev_token();
8752+
} else {
8753+
let table_name = self.parse_object_name(false)?;
8754+
return Ok(Statement::DescribeHistory { table_name });
8755+
}
87408756
}
87418757

87428758
// Parse optional object type: TABLE, DATABASE, WAREHOUSE, SEQUENCE, STREAM, FUNCTION, VIEW, SCHEMA
@@ -9759,9 +9775,21 @@ impl<'a> Parser<'a> {
97599775
let show_in =
97609776
self.expect_one_of_keywords(&[Keyword::FROM, Keyword::IN])? == Keyword::IN;
97619777
// Optionally consume TABLE or VIEW keyword (Snowflake: SHOW COLUMNS IN TABLE <name>)
9778+
// Only if followed by an identifier (the table name), otherwise it IS the table name
97629779
let show_object_kind =
9763-
self.parse_one_of_keywords(&[Keyword::TABLE, Keyword::VIEW])
9764-
.map(|kw| Ident::new(format!("{kw:?}")));
9780+
if let Some(kw) = self.parse_one_of_keywords(&[Keyword::TABLE, Keyword::VIEW]) {
9781+
match self.peek_token_ref().token {
9782+
Token::Word(_) | Token::SingleQuotedString(_) | Token::DoubleQuotedString(_) => {
9783+
Some(Ident::new(format!("{kw:?}")))
9784+
}
9785+
_ => {
9786+
self.prev_token();
9787+
None
9788+
}
9789+
}
9790+
} else {
9791+
None
9792+
};
97659793
let object_name = self.parse_object_name(false)?;
97669794
let table_name = match self.parse_one_of_keywords(&[Keyword::FROM, Keyword::IN]) {
97679795
Some(_) => {
@@ -10823,11 +10851,27 @@ impl<'a> Parser<'a> {
1082310851
Keyword::GROUP,
1082410852
Keyword::APPLICATION,
1082510853
]) {
10826-
Some(Keyword::ROLE) => Some(GranteesType::Role),
10827-
Some(Keyword::USER) => Some(GranteesType::User),
10828-
Some(Keyword::SHARE) => Some(GranteesType::Share),
10829-
Some(Keyword::GROUP) => Some(GranteesType::Group),
10830-
Some(Keyword::APPLICATION) => Some(GranteesType::Application),
10854+
Some(kw) => {
10855+
// Check if the next token is an identifier. If not (e.g. EOF, comma),
10856+
// then the keyword itself is the grantee name, not a type prefix.
10857+
match self.peek_token_ref().token {
10858+
Token::Word(_) | Token::SingleQuotedString(_) | Token::DoubleQuotedString(_) => {
10859+
match kw {
10860+
Keyword::ROLE => Some(GranteesType::Role),
10861+
Keyword::USER => Some(GranteesType::User),
10862+
Keyword::SHARE => Some(GranteesType::Share),
10863+
Keyword::GROUP => Some(GranteesType::Group),
10864+
Keyword::APPLICATION => Some(GranteesType::Application),
10865+
_ => None,
10866+
}
10867+
}
10868+
_ => {
10869+
// The keyword IS the grantee name; backtrack
10870+
self.prev_token();
10871+
None
10872+
}
10873+
}
10874+
}
1083110875
_ => None,
1083210876
};
1083310877
let name = self.parse_identifier(false).map(WithSpan::unwrap)?;
@@ -11289,21 +11333,29 @@ impl<'a> Parser<'a> {
1128911333
pub fn parse_function_args(&mut self) -> Result<FunctionArg, ParserError> {
1129011334
// BigQuery ML functions use MODEL/TABLE keyword-prefixed table references
1129111335
// e.g. ML.PREDICT(MODEL `mydataset.mymodel`, TABLE `mydataset.mytable`)
11292-
if let Some(kw) = self.parse_one_of_keywords(&[Keyword::MODEL, Keyword::TABLE]) {
11293-
// TABLE followed by `(` is the TABLE(subquery) syntax, not a table ref
11294-
if kw == Keyword::TABLE && self.peek_token_is(&Token::LParen) {
11295-
self.prev_token();
11296-
} else {
11297-
let keyword = match kw {
11298-
Keyword::MODEL => FunctionArgKeyword::Model,
11299-
Keyword::TABLE => FunctionArgKeyword::Table,
11300-
_ => unreachable!(),
11301-
};
11302-
let table_name = self.parse_object_name(false)?;
11303-
return Ok(FunctionArg::Unnamed(FunctionArgExpr::TableRef {
11304-
keyword,
11305-
table_name,
11306-
}));
11336+
if dialect_of!(self is BigQueryDialect | GenericDialect) {
11337+
if let Some(kw) = self.parse_one_of_keywords(&[Keyword::MODEL, Keyword::TABLE]) {
11338+
// TABLE followed by `(` is the TABLE(subquery) syntax, not a table ref
11339+
// MODEL/TABLE followed by `,` or `)` means the keyword is used as an identifier
11340+
if (kw == Keyword::TABLE && self.peek_token_is(&Token::LParen))
11341+
|| matches!(
11342+
self.peek_token_ref().token,
11343+
Token::Comma | Token::RParen | Token::EOF
11344+
)
11345+
{
11346+
self.prev_token();
11347+
} else {
11348+
let keyword = match kw {
11349+
Keyword::MODEL => FunctionArgKeyword::Model,
11350+
Keyword::TABLE => FunctionArgKeyword::Table,
11351+
_ => unreachable!(),
11352+
};
11353+
let table_name = self.parse_object_name(false)?;
11354+
return Ok(FunctionArg::Unnamed(FunctionArgExpr::TableRef {
11355+
keyword,
11356+
table_name,
11357+
}));
11358+
}
1130711359
}
1130811360
}
1130911361
if self.peek_nth_token(1) == Token::RArrow {
@@ -11352,7 +11404,12 @@ impl<'a> Parser<'a> {
1135211404
if self.consume_token(&Token::RParen) {
1135311405
Ok(vec![])
1135411406
} else {
11407+
// Disable trailing commas inside function args - keywords like END
11408+
// should be parsed as identifiers, not as trailing comma terminators
11409+
let old_value = self.options.trailing_commas;
11410+
self.options.trailing_commas = false;
1135511411
let args = self.parse_comma_separated(Parser::parse_function_args)?;
11412+
self.options.trailing_commas = old_value;
1135611413
self.expect_token(&Token::RParen)?;
1135711414
Ok(args)
1135811415
}
@@ -11394,7 +11451,12 @@ impl<'a> Parser<'a> {
1139411451
));
1139511452
}
1139611453

11454+
// Disable trailing commas inside function args - keywords like END
11455+
// should be parsed as identifiers, not as trailing comma terminators
11456+
let old_trailing = self.options.trailing_commas;
11457+
self.options.trailing_commas = false;
1139711458
let args = self.parse_comma_separated(Parser::parse_function_args)?;
11459+
self.options.trailing_commas = old_trailing;
1139811460
let on_overflow = if self.parse_keywords(&[Keyword::ON, Keyword::OVERFLOW]) {
1139911461
if self.parse_keyword(Keyword::ERROR) {
1140011462
Some(OnOverflow::Error)

0 commit comments

Comments
 (0)