From 8ab27146f1bae0d9ee737fd1835dc9b6047021aa Mon Sep 17 00:00:00 2001 From: Ayush Dattagupta Date: Tue, 16 Aug 2022 13:48:55 -0700 Subject: [PATCH 1/4] Update sqlparser version to use main from git --- datafusion/common/Cargo.toml | 2 +- datafusion/core/Cargo.toml | 2 +- datafusion/expr/Cargo.toml | 2 +- datafusion/sql/Cargo.toml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index 20cb0967022f8..d4f5caa3308a2 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -47,4 +47,4 @@ ordered-float = "3.0" parquet = { version = "20.0.0", features = ["arrow"], optional = true } pyo3 = { version = "0.16", optional = true } serde_json = "1.0" -sqlparser = "0.20" +sqlparser = { git = "https://github.com/sqlparser-rs/sqlparser-rs" } diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index 189e53d637df4..92a6095bb2b11 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -85,7 +85,7 @@ pyo3 = { version = "0.16", optional = true } rand = "0.8" rayon = { version = "1.5", optional = true } smallvec = { version = "1.6", features = ["union"] } -sqlparser = "0.20" +sqlparser = { git = "https://github.com/sqlparser-rs/sqlparser-rs" } tempfile = "3" tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] } tokio-stream = "0.1" diff --git a/datafusion/expr/Cargo.toml b/datafusion/expr/Cargo.toml index 6d368c63cb3ad..b30a573866d23 100644 --- a/datafusion/expr/Cargo.toml +++ b/datafusion/expr/Cargo.toml @@ -38,4 +38,4 @@ path = "src/lib.rs" ahash = { version = "0.8", default-features = false, features = ["runtime-rng"] } arrow = { version = "20.0.0", features = ["prettyprint"] } datafusion-common = { path = "../common", version = "11.0.0" } -sqlparser = "0.20" +sqlparser = { git = "https://github.com/sqlparser-rs/sqlparser-rs" } diff --git a/datafusion/sql/Cargo.toml b/datafusion/sql/Cargo.toml index 6ad1da9e7fef1..916d345d9a92f 100644 --- a/datafusion/sql/Cargo.toml +++ b/datafusion/sql/Cargo.toml @@ -42,5 +42,5 @@ arrow = { version = "20.0.0", features = ["prettyprint"] } datafusion-common = { path = "../common", version = "11.0.0" } datafusion-expr = { path = "../expr", version = "11.0.0" } hashbrown = "0.12" -sqlparser = "0.20" +sqlparser = { git = "https://github.com/sqlparser-rs/sqlparser-rs" } tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", "sync", "fs", "parking_lot"] } From bd827f6c5fefb880cbafa2d38158bbcbe7d8085d Mon Sep 17 00:00:00 2001 From: Ayush Dattagupta Date: Tue, 16 Aug 2022 13:49:47 -0700 Subject: [PATCH 2/4] Update SqlExpr::Trim struct to match latest sqlparser changes --- datafusion/sql/src/planner.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/datafusion/sql/src/planner.rs b/datafusion/sql/src/planner.rs index 28c82f80246f9..5834f505d38c8 100644 --- a/datafusion/sql/src/planner.rs +++ b/datafusion/sql/src/planner.rs @@ -1960,21 +1960,21 @@ impl<'a, S: ContextProvider> SqlToRel<'a, S> { )) } - SQLExpr::Trim { expr, trim_where } => { - let (fun, where_expr) = match trim_where { - Some((TrimWhereField::Leading, expr)) => { - (BuiltinScalarFunction::Ltrim, Some(expr)) + SQLExpr::Trim { expr, trim_where, trim_what } => { + let fun = match trim_where { + Some(TrimWhereField::Leading) => { + BuiltinScalarFunction::Ltrim } - Some((TrimWhereField::Trailing, expr)) => { - (BuiltinScalarFunction::Rtrim, Some(expr)) + Some(TrimWhereField::Trailing) => { + BuiltinScalarFunction::Rtrim } - Some((TrimWhereField::Both, expr)) => { - (BuiltinScalarFunction::Btrim, Some(expr)) + Some(TrimWhereField::Both) => { + BuiltinScalarFunction::Btrim } - None => (BuiltinScalarFunction::Trim, None), + None => BuiltinScalarFunction::Trim }; let arg = self.sql_expr_to_logical_expr(*expr, schema, ctes)?; - let args = match where_expr { + let args = match trim_what { Some(to_trim) => { let to_trim = self.sql_expr_to_logical_expr(*to_trim, schema, ctes)?; vec![arg, to_trim] From 214cea45eb98ed8b47cca7a75eb66faf8df98692 Mon Sep 17 00:00:00 2001 From: Ayush Dattagupta Date: Mon, 22 Aug 2022 11:22:29 -0700 Subject: [PATCH 3/4] Update trim function signature to accept two inputs --- datafusion/expr/src/function.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/expr/src/function.rs b/datafusion/expr/src/function.rs index 263c4540f37ca..7851edb137122 100644 --- a/datafusion/expr/src/function.rs +++ b/datafusion/expr/src/function.rs @@ -296,7 +296,6 @@ pub fn signature(fun: &BuiltinScalarFunction) -> Signature { | BuiltinScalarFunction::SHA256 | BuiltinScalarFunction::SHA384 | BuiltinScalarFunction::SHA512 - | BuiltinScalarFunction::Trim | BuiltinScalarFunction::Upper => Signature::uniform( 1, vec![DataType::Utf8, DataType::LargeUtf8], @@ -304,7 +303,8 @@ pub fn signature(fun: &BuiltinScalarFunction) -> Signature { ), BuiltinScalarFunction::Btrim | BuiltinScalarFunction::Ltrim - | BuiltinScalarFunction::Rtrim => Signature::one_of( + | BuiltinScalarFunction::Rtrim + | BuiltinScalarFunction::Trim => Signature::one_of( vec![ TypeSignature::Exact(vec![DataType::Utf8]), TypeSignature::Exact(vec![DataType::Utf8, DataType::Utf8]), From bc7caf5f0a274266c16aed9571d4d4e420b5e554 Mon Sep 17 00:00:00 2001 From: Ayush Dattagupta Date: Mon, 22 Aug 2022 11:23:14 -0700 Subject: [PATCH 4/4] Add more trim testcases --- datafusion/core/tests/sql/expr.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/datafusion/core/tests/sql/expr.rs b/datafusion/core/tests/sql/expr.rs index 4fa1f54d22bfc..48a95884cad10 100644 --- a/datafusion/core/tests/sql/expr.rs +++ b/datafusion/core/tests/sql/expr.rs @@ -907,15 +907,21 @@ async fn test_string_expressions() -> Result<()> { test_expression!("to_hex(9223372036854775807)", "7fffffffffffffff"); test_expression!("to_hex(CAST(NULL AS int))", "NULL"); test_expression!("trim(' tom ')", "tom"); + test_expression!("trim(LEADING ' tom ')", "tom "); + test_expression!("trim(TRAILING ' tom ')", " tom"); + test_expression!("trim(BOTH ' tom ')", "tom"); test_expression!("trim(LEADING ' ' FROM ' tom ')", "tom "); test_expression!("trim(TRAILING ' ' FROM ' tom ')", " tom"); test_expression!("trim(BOTH ' ' FROM ' tom ')", "tom"); + test_expression!("trim(' ' FROM ' tom ')", "tom"); test_expression!("trim(LEADING 'x' FROM 'xxxtomxxx')", "tomxxx"); test_expression!("trim(TRAILING 'x' FROM 'xxxtomxxx')", "xxxtom"); test_expression!("trim(BOTH 'x' FROM 'xxxtomxx')", "tom"); + test_expression!("trim('x' FROM 'xxxtomxx')", "tom"); test_expression!("trim(LEADING 'xy' FROM 'xyxabcxyzdefxyx')", "abcxyzdefxyx"); test_expression!("trim(TRAILING 'xy' FROM 'xyxabcxyzdefxyx')", "xyxabcxyzdef"); test_expression!("trim(BOTH 'xy' FROM 'xyxabcxyzdefxyx')", "abcxyzdef"); + test_expression!("trim('xy' FROM 'xyxabcxyzdefxyx')", "abcxyzdef"); test_expression!("trim(' tom')", "tom"); test_expression!("trim('')", ""); test_expression!("trim('tom ')", "tom");