diff --git a/datafusion/core/tests/sql/explain_analyze.rs b/datafusion/core/tests/sql/explain_analyze.rs index a1d9a02cf6b14..6283d77e16eb8 100644 --- a/datafusion/core/tests/sql/explain_analyze.rs +++ b/datafusion/core/tests/sql/explain_analyze.rs @@ -18,6 +18,7 @@ use super::*; use datafusion::config::ConfigOptions; +use datafusion::physical_plan::collect; use datafusion::physical_plan::display::DisplayableExecutionPlan; use datafusion::physical_plan::metrics::Timestamp; diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs index 40ae75cd7f802..3a72e0d561c56 100644 --- a/datafusion/core/tests/sql/mod.rs +++ b/datafusion/core/tests/sql/mod.rs @@ -22,7 +22,6 @@ use arrow::{ util::display::array_value_to_string, }; -use datafusion::datasource::TableProvider; use datafusion::error::Result; use datafusion::logical_expr::{Aggregate, LogicalPlan, TableScan}; use datafusion::physical_plan::metrics::MetricValue; @@ -31,7 +30,6 @@ use datafusion::physical_plan::ExecutionPlanVisitor; use datafusion::prelude::*; use datafusion::test_util; use datafusion::{assert_batches_eq, assert_batches_sorted_eq}; -use datafusion::{datasource::MemTable, physical_plan::collect}; use datafusion::{execution::context::SessionContext, physical_plan::displayable}; use datafusion_common::{assert_contains, assert_not_contains}; use object_store::path::Path; @@ -322,21 +320,6 @@ async fn register_alltypes_parquet(ctx: &SessionContext) { .unwrap(); } -/// Return a new table provider that has a single Int32 column with -/// values between `seq_start` and `seq_end` -pub fn table_with_sequence( - seq_start: i32, - seq_end: i32, -) -> Result> { - let schema = Arc::new(Schema::new(vec![Field::new("i", DataType::Int32, true)])); - let arr = Arc::new(Int32Array::from((seq_start..=seq_end).collect::>())); - let partitions = vec![vec![RecordBatch::try_new( - schema.clone(), - vec![arr as ArrayRef], - )?]]; - Ok(Arc::new(MemTable::try_new(schema, partitions)?)) -} - pub struct ExplainNormalizer { replacements: Vec<(String, String)>, } diff --git a/datafusion/core/tests/sql/select.rs b/datafusion/core/tests/sql/select.rs index 4a782e54b070c..d796a20b03b5c 100644 --- a/datafusion/core/tests/sql/select.rs +++ b/datafusion/core/tests/sql/select.rs @@ -571,93 +571,3 @@ async fn test_named_query_parameters() -> Result<()> { assert_batches_sorted_eq!(expected, &results); Ok(()) } - -#[tokio::test] -async fn parallel_query_with_filter() -> Result<()> { - let tmp_dir = TempDir::new()?; - let partition_count = 4; - let ctx = create_ctx_with_partition(&tmp_dir, partition_count).await?; - - let dataframe = ctx - .sql("SELECT c1, c2 FROM test WHERE c1 > 0 AND c1 < 3") - .await?; - let results = dataframe.collect().await.unwrap(); - let expected = vec![ - "+----+----+", - "| c1 | c2 |", - "+----+----+", - "| 1 | 1 |", - "| 1 | 10 |", - "| 1 | 2 |", - "| 1 | 3 |", - "| 1 | 4 |", - "| 1 | 5 |", - "| 1 | 6 |", - "| 1 | 7 |", - "| 1 | 8 |", - "| 1 | 9 |", - "| 2 | 1 |", - "| 2 | 10 |", - "| 2 | 2 |", - "| 2 | 3 |", - "| 2 | 4 |", - "| 2 | 5 |", - "| 2 | 6 |", - "| 2 | 7 |", - "| 2 | 8 |", - "| 2 | 9 |", - "+----+----+", - ]; - assert_batches_sorted_eq!(expected, &results); - - Ok(()) -} - -#[tokio::test] -async fn boolean_literal() -> Result<()> { - let results = - execute_with_partition("SELECT c1, c3 FROM test WHERE c1 > 2 AND c3 = true", 4) - .await?; - - let expected = [ - "+----+------+", - "| c1 | c3 |", - "+----+------+", - "| 3 | true |", - "| 3 | true |", - "| 3 | true |", - "| 3 | true |", - "| 3 | true |", - "+----+------+", - ]; - assert_batches_sorted_eq!(expected, &results); - - Ok(()) -} - -#[tokio::test] -async fn unprojected_filter() { - let config = SessionConfig::new(); - let ctx = SessionContext::new_with_config(config); - let df = ctx.read_table(table_with_sequence(1, 3).unwrap()).unwrap(); - - let df = df - .filter(col("i").gt(lit(2))) - .unwrap() - .select(vec![col("i") + col("i")]) - .unwrap(); - - let plan = df.clone().into_optimized_plan().unwrap(); - println!("{}", plan.display_indent()); - - let results = df.collect().await.unwrap(); - - let expected = [ - "+-----------------------+", - "| ?table?.i + ?table?.i |", - "+-----------------------+", - "| 6 |", - "+-----------------------+", - ]; - assert_batches_sorted_eq!(expected, &results); -} diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt index 9ffddc6e2d465..1b698e1e30d34 100644 --- a/datafusion/sqllogictest/test_files/select.slt +++ b/datafusion/sqllogictest/test_files/select.slt @@ -116,7 +116,7 @@ VALUES (1,2,3,4,5,6,7,8,9,10,11,12,13,NULL,'F',3.5) # Test non-literal expressions in VALUES query II -VALUES (1, CASE WHEN RANDOM() > 0.5 THEN 1 ELSE 1 END), +VALUES (1, CASE WHEN RANDOM() > 0.5 THEN 1 ELSE 1 END), (2, CASE WHEN RANDOM() > 0.5 THEN 2 ELSE 2 END); ---- 1 1 @@ -669,9 +669,104 @@ c 3 1 b 0 29 c 3 1 a 0 -85 c 3 1 b 4 -82 -# TODO: test_prepare_statement +# TODO: Test prepare statement +# Dependency on https://github.com/apache/arrow-datafusion/issues/4539#issuecomment-1755430857 -# TODO: parallel_query_with_filter +# TODO: Test named query parameters +# Dependency on https://github.com/apache/arrow-datafusion/issues/4539#issuecomment-1755430857 + + +###### +# Parallel query with filter +###### + +# Set up csv files +statement ok +CREATE TABLE src_table_base ( + c2 INT, + c3 BOOLEAN +) AS VALUES +(1, FALSE), +(2, TRUE), +(3, FALSE), +(4, TRUE), +(5, FALSE), +(6, TRUE), +(7, FALSE), +(8, TRUE), +(9, FALSE), +(10, TRUE); + +query IIB +COPY ( + SELECT 0 as c1, c2, c3 FROM src_table_base +) TO 'test_files/scratch/select/csv_partitions/partition-1.csv' +(FORMAT CSV, SINGLE_FILE_OUTPUT true); +---- +10 + +query IIB +COPY ( + SELECT 1 as c1, c2, c3 FROM src_table_base +) TO 'test_files/scratch/select/csv_partitions/partition-2.csv' +(FORMAT CSV, SINGLE_FILE_OUTPUT true); +---- +10 + +query IIB +COPY ( + SELECT 2 as c1, c2, c3 FROM src_table_base +) TO 'test_files/scratch/select/csv_partitions/partition-3.csv' +(FORMAT CSV, SINGLE_FILE_OUTPUT true); +---- +10 + +query IIB +COPY ( + SELECT 3 as c1, c2, c3 FROM src_table_base +) TO 'test_files/scratch/select/csv_partitions/partition-4.csv' +(FORMAT CSV, SINGLE_FILE_OUTPUT true); +---- +10 + +# Set up table +statement ok +CREATE EXTERNAL TABLE test ( + c1 INT, + c2 INT, + c3 BOOLEAN, +) +STORED AS CSV +WITH HEADER ROW +LOCATION 'test_files/scratch/select/csv_partitions' + +query II +SELECT c1, c2 FROM test WHERE c1 > 0 AND c1 < 3 ORDER BY c1, c2; +---- +1 1 +1 2 +1 3 +1 4 +1 5 +1 6 +1 7 +1 8 +1 9 +1 10 +2 1 +2 2 +2 3 +2 4 +2 5 +2 6 +2 7 +2 8 +2 9 +2 10 + +# Drop table +statement ok +DROP TABLE test; # query with filter string type coercion @@ -705,10 +800,54 @@ CREATE TABLE empty_table; statement ok SELECT * FROM empty_table +###### +# Boolean literal +###### -# TODO: boolean_literal +statement ok +CREATE EXTERNAL TABLE test_base ( + c1 INT, + c2 INT, + c3 BOOLEAN, +) +STORED AS CSV +WITH HEADER ROW +LOCATION 'test_files/scratch/select/csv_partitions' -# TODO: unprojected_filter +statement ok +CREATE TABLE test AS +SELECT + arrow_cast(c1, 'UInt32') as c1, + arrow_cast(c2, 'UInt64') as c2, + arrow_cast(c3, 'Boolean') as c3 +FROM test_base; + +query IB +SELECT c1, c3 FROM test WHERE c1 > 2 AND c3 = true +---- +3 true +3 true +3 true +3 true +3 true + +# Drop table +statement ok +DROP TABLE test; + + +###### +# Unprojected filter +###### + +statement ok +CREATE TABLE test(i INT) AS +VALUES (1), (2), (3); + +query I +SELECT i + i FROM test WHERE i > 2; +---- +6 # case sensitive in default dialect @@ -1121,7 +1260,7 @@ query II SELECT CASE WHEN B.x > 0 THEN A.x / B.x ELSE 0 END AS value1, CASE WHEN B.x > 0 AND B.y > 0 THEN A.x / B.x ELSE 0 END AS value3 -FROM t AS A, (SELECT * FROM t WHERE x = 0) AS B; +FROM t AS A, (SELECT * FROM t WHERE x = 0) AS B; ---- 0 0 0 0