From 6d9166c272d59a0a2ffd80f13bd33d74da0d324f Mon Sep 17 00:00:00 2001 From: tanruixiang Date: Thu, 18 Jan 2024 16:31:55 +0800 Subject: [PATCH 01/25] deps: bump datafusion --- Cargo.lock | 555 ++++++++++-------- Cargo.toml | 22 +- .../src/instance/reorder_memtable.rs | 7 +- .../src/row_iter/record_batch_stream.rs | 1 + src/common_types/src/datum.rs | 14 +- src/components/parquet_ext/src/meta_data.rs | 3 +- .../parquet_ext/src/prune/min_max.rs | 11 +- .../src/dist_sql_query/physical_plan.rs | 18 +- src/df_operator/src/scalar.rs | 2 +- src/df_operator/src/udaf.rs | 2 +- src/interpreters/src/insert.rs | 4 +- .../physical_optimizer/repartition.rs | 8 +- .../physical_plan_extension/prom_align.rs | 8 +- .../src/datafusion_impl/task_context.rs | 2 +- .../src/logical_optimizer/type_conversion.rs | 6 +- src/query_frontend/src/parser.rs | 24 +- src/query_frontend/src/planner.rs | 2 +- src/query_frontend/src/promql/convert.rs | 10 +- src/query_frontend/src/provider.rs | 7 + src/table_engine/src/predicate.rs | 5 +- src/table_engine/src/provider.rs | 7 +- 21 files changed, 421 insertions(+), 297 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5f23033061..7f9c89a359 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -90,7 +90,7 @@ version = "1.2.6-alpha" dependencies = [ "arc-swap 1.6.0", "arena", - "arrow 43.0.0", + "arrow 49.0.0", "async-stream", "async-trait", "atomic_enum", @@ -120,7 +120,7 @@ dependencies = [ "parquet_ext", "pin-project-lite", "prometheus 0.12.0", - "prost", + "prost 0.11.8", "rand 0.7.3", "remote_engine_client", "router", @@ -245,24 +245,24 @@ dependencies = [ [[package]] name = "arrow" -version = "43.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2feeebd77b34b0bc88f224e06d01c27da4733997cc4789a4e056196656cdc59a" +checksum = "5bc25126d18a012146a888a0298f2c22e1150327bd2765fc76d710a556b2d614" dependencies = [ "ahash 0.8.3", - "arrow-arith 43.0.0", - "arrow-array 43.0.0", - "arrow-buffer 43.0.0", - "arrow-cast 43.0.0", - "arrow-csv 43.0.0", - "arrow-data 43.0.0", - "arrow-ipc 43.0.0", - "arrow-json 43.0.0", - "arrow-ord 43.0.0", - "arrow-row 43.0.0", - "arrow-schema 43.0.0", - "arrow-select 43.0.0", - "arrow-string 43.0.0", + "arrow-arith 49.0.0", + "arrow-array 49.0.0", + "arrow-buffer 49.0.0", + "arrow-cast 49.0.0", + "arrow-csv 49.0.0", + "arrow-data 49.0.0", + "arrow-ipc 49.0.0", + "arrow-json 49.0.0", + "arrow-ord 49.0.0", + "arrow-row 49.0.0", + "arrow-schema 49.0.0", + "arrow-select 49.0.0", + "arrow-string 49.0.0", ] [[package]] @@ -282,14 +282,14 @@ dependencies = [ [[package]] name = "arrow-arith" -version = "43.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7173f5dc49c0ecb5135f52565af33afd3fdc9a12d13bd6f9973e8b96305e4b2e" +checksum = "34ccd45e217ffa6e53bbb0080990e77113bdd4e91ddb84e97b77649810bcf1a7" dependencies = [ - "arrow-array 43.0.0", - "arrow-buffer 43.0.0", - "arrow-data 43.0.0", - "arrow-schema 43.0.0", + "arrow-array 49.0.0", + "arrow-buffer 49.0.0", + "arrow-data 49.0.0", + "arrow-schema 49.0.0", "chrono", "half 2.2.1", "num", @@ -313,14 +313,14 @@ dependencies = [ [[package]] name = "arrow-array" -version = "43.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "63d7ea725f7d1f8bb2cffc53ef538557e95fc802e217d5be25122d402e22f3d0" +checksum = "6bda9acea48b25123c08340f3a8ac361aa0f74469bb36f5ee9acf923fce23e9d" dependencies = [ "ahash 0.8.3", - "arrow-buffer 43.0.0", - "arrow-data 43.0.0", - "arrow-schema 43.0.0", + "arrow-buffer 49.0.0", + "arrow-data 49.0.0", + "arrow-schema 49.0.0", "chrono", "chrono-tz", "half 2.2.1", @@ -340,10 +340,11 @@ dependencies = [ [[package]] name = "arrow-buffer" -version = "43.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdbe439e077f484e5000b9e1d47b5e4c0d15f2b311a8f5bcc682553d5d67a722" +checksum = "01a0fc21915b00fc6c2667b069c1b64bdd920982f426079bc4a7cab86822886c" dependencies = [ + "bytes", "half 2.2.1", "num", ] @@ -366,15 +367,16 @@ dependencies = [ [[package]] name = "arrow-cast" -version = "43.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93913cc14875770aa1eef5e310765e855effa352c094cb1c7c00607d0f37b4e1" +checksum = "5dc0368ed618d509636c1e3cc20db1281148190a78f43519487b2daf07b63b4a" dependencies = [ - "arrow-array 43.0.0", - "arrow-buffer 43.0.0", - "arrow-data 43.0.0", - "arrow-schema 43.0.0", - "arrow-select 43.0.0", + "arrow-array 49.0.0", + "arrow-buffer 49.0.0", + "arrow-data 49.0.0", + "arrow-schema 49.0.0", + "arrow-select 49.0.0", + "base64 0.21.0", "chrono", "comfy-table 7.0.1", "half 2.2.1", @@ -403,15 +405,15 @@ dependencies = [ [[package]] name = "arrow-csv" -version = "43.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef55b67c55ed877e6fe7b923121c19dae5e31ca70249ea2779a17b58fb0fbd9a" +checksum = "2e09aa6246a1d6459b3f14baeaa49606cfdbca34435c46320e14054d244987ca" dependencies = [ - "arrow-array 43.0.0", - "arrow-buffer 43.0.0", - "arrow-cast 43.0.0", - "arrow-data 43.0.0", - "arrow-schema 43.0.0", + "arrow-array 49.0.0", + "arrow-buffer 49.0.0", + "arrow-cast 49.0.0", + "arrow-data 49.0.0", + "arrow-schema 49.0.0", "chrono", "csv", "csv-core", @@ -434,12 +436,12 @@ dependencies = [ [[package]] name = "arrow-data" -version = "43.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4f4f4a3c54614126a71ab91f6631c9743eb4643d6e9318b74191da9dc6e028b" +checksum = "907fafe280a3874474678c1858b9ca4cb7fd83fb8034ff5b6d6376205a08c634" dependencies = [ - "arrow-buffer 43.0.0", - "arrow-schema 43.0.0", + "arrow-buffer 49.0.0", + "arrow-schema 49.0.0", "half 2.2.1", "num", ] @@ -460,16 +462,17 @@ dependencies = [ [[package]] name = "arrow-ipc" -version = "43.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d41a3659f984a524ef1c2981d43747b24d8eec78e2425267fcd0ef34ce71cd18" +checksum = "79a43d6808411886b8c7d4f6f7dd477029c1e77ffffffb7923555cc6579639cd" dependencies = [ - "arrow-array 43.0.0", - "arrow-buffer 43.0.0", - "arrow-cast 43.0.0", - "arrow-data 43.0.0", - "arrow-schema 43.0.0", + "arrow-array 49.0.0", + "arrow-buffer 49.0.0", + "arrow-cast 49.0.0", + "arrow-data 49.0.0", + "arrow-schema 49.0.0", "flatbuffers", + "lz4_flex", ] [[package]] @@ -494,15 +497,15 @@ dependencies = [ [[package]] name = "arrow-json" -version = "43.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10b95faa95a378f56ef32d84cc0104ea998c39ef7cd1faaa6b4cebf8ea92846d" +checksum = "d82565c91fd627922ebfe2810ee4e8346841b6f9361b87505a9acea38b614fee" dependencies = [ - "arrow-array 43.0.0", - "arrow-buffer 43.0.0", - "arrow-cast 43.0.0", - "arrow-data 43.0.0", - "arrow-schema 43.0.0", + "arrow-array 49.0.0", + "arrow-buffer 49.0.0", + "arrow-cast 49.0.0", + "arrow-data 49.0.0", + "arrow-schema 49.0.0", "chrono", "half 2.2.1", "indexmap 2.0.0", @@ -529,15 +532,15 @@ dependencies = [ [[package]] name = "arrow-ord" -version = "43.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c68549a4284d9f8b39586afb8d5ff8158b8f0286353a4844deb1d11cf1ba1f26" +checksum = "9b23b0e53c0db57c6749997fd343d4c0354c994be7eca67152dd2bdb9a3e1bb4" dependencies = [ - "arrow-array 43.0.0", - "arrow-buffer 43.0.0", - "arrow-data 43.0.0", - "arrow-schema 43.0.0", - "arrow-select 43.0.0", + "arrow-array 49.0.0", + "arrow-buffer 49.0.0", + "arrow-data 49.0.0", + "arrow-schema 49.0.0", + "arrow-select 49.0.0", "half 2.2.1", "num", ] @@ -559,15 +562,15 @@ dependencies = [ [[package]] name = "arrow-row" -version = "43.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a75a4a757afc301ce010adadff54d79d66140c4282ed3de565f6ccb716a5cf3" +checksum = "361249898d2d6d4a6eeb7484be6ac74977e48da12a4dd81a708d620cc558117a" dependencies = [ "ahash 0.8.3", - "arrow-array 43.0.0", - "arrow-buffer 43.0.0", - "arrow-data 43.0.0", - "arrow-schema 43.0.0", + "arrow-array 49.0.0", + "arrow-buffer 49.0.0", + "arrow-data 49.0.0", + "arrow-schema 49.0.0", "half 2.2.1", "hashbrown 0.14.0", ] @@ -580,9 +583,9 @@ checksum = "bc85923d8d6662cc66ac6602c7d1876872e671002d60993dfdf492a6badeae92" [[package]] name = "arrow-schema" -version = "43.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bebcb57eef570b15afbcf2d07d813eb476fde9f6dd69c81004d6476c197e87e" +checksum = "09e28a5e781bf1b0f981333684ad13f5901f4cd2f20589eab7cf1797da8fc167" [[package]] name = "arrow-select" @@ -599,14 +602,15 @@ dependencies = [ [[package]] name = "arrow-select" -version = "43.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6e2943fa433a48921e914417173816af64eef61c0a3d448280e6c40a62df221" +checksum = "4f6208466590960efc1d2a7172bc4ff18a67d6e25c529381d7f96ddaf0dc4036" dependencies = [ - "arrow-array 43.0.0", - "arrow-buffer 43.0.0", - "arrow-data 43.0.0", - "arrow-schema 43.0.0", + "ahash 0.8.3", + "arrow-array 49.0.0", + "arrow-buffer 49.0.0", + "arrow-data 49.0.0", + "arrow-schema 49.0.0", "num", ] @@ -627,37 +631,37 @@ dependencies = [ [[package]] name = "arrow-string" -version = "43.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbc92ed638851774f6d7af1ad900b92bc1486746497511868b4298fcbcfa35af" +checksum = "a4a48149c63c11c9ff571e50ab8f017d2a7cb71037a882b42f6354ed2da9acc7" dependencies = [ - "arrow-array 43.0.0", - "arrow-buffer 43.0.0", - "arrow-data 43.0.0", - "arrow-schema 43.0.0", - "arrow-select 43.0.0", + "arrow-array 49.0.0", + "arrow-buffer 49.0.0", + "arrow-data 49.0.0", + "arrow-schema 49.0.0", + "arrow-select 49.0.0", "num", "regex", - "regex-syntax 0.7.1", + "regex-syntax 0.8.2", ] [[package]] name = "arrow_ext" version = "1.2.6-alpha" dependencies = [ - "arrow 43.0.0", + "arrow 49.0.0", "serde", "snafu 0.6.10", - "zstd", + "zstd 0.12.3+zstd.1.5.2", ] [[package]] name = "arrow_util" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=a905863#a9058633c03f018607dc1e4f6ca090b82d46a30c" +source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997" dependencies = [ "ahash 0.8.3", - "arrow 43.0.0", + "arrow 49.0.0", "chrono", "comfy-table 6.1.4", "hashbrown 0.13.2", @@ -682,8 +686,8 @@ dependencies = [ "pin-project-lite", "tokio", "xz2", - "zstd", - "zstd-safe", + "zstd 0.12.3+zstd.1.5.2", + "zstd-safe 6.0.4+zstd.1.5.4", ] [[package]] @@ -750,9 +754,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.72" +version = "0.1.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc6dde6e4ed435a4c1ee4e73592f5ba9da2151af10076cc04858746af9352d09" +checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" dependencies = [ "proc-macro2", "quote", @@ -881,7 +885,7 @@ version = "1.2.6-alpha" dependencies = [ "analytic_engine", "arena", - "arrow 43.0.0", + "arrow 49.0.0", "base64 0.13.1", "bytes_ext", "clap 3.2.23", @@ -908,7 +912,7 @@ dependencies = [ "toml_ext", "trace_metric", "wal", - "zstd", + "zstd 0.12.3+zstd.1.5.2", ] [[package]] @@ -1452,7 +1456,7 @@ dependencies = [ "logger", "macros", "meta_client", - "prost", + "prost 0.11.8", "runtime", "serde", "serde_json", @@ -1519,7 +1523,7 @@ dependencies = [ name = "common_types" version = "1.2.6-alpha" dependencies = [ - "arrow 43.0.0", + "arrow 49.0.0", "arrow_ext", "bytes_ext", "chrono", @@ -1528,7 +1532,7 @@ dependencies = [ "horaedbproto 2.0.0", "macros", "paste 1.0.12", - "prost", + "prost 0.11.8", "rand 0.7.3", "seahash", "serde", @@ -1565,7 +1569,7 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2895653b4d9f1538a83970077cb01dfc77a4810524e51a110944688e916b18e" dependencies = [ - "prost", + "prost 0.11.8", "prost-types", "tonic 0.9.2", "tracing-core", @@ -2003,13 +2007,14 @@ dependencies = [ [[package]] name = "datafusion" -version = "27.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=9c3a537e25e5ab3299922864034f67fb2f79805d#9c3a537e25e5ab3299922864034f67fb2f79805d" +version = "34.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119" dependencies = [ "ahash 0.8.3", - "arrow 43.0.0", - "arrow-array 43.0.0", - "arrow-schema 43.0.0", + "arrow 49.0.0", + "arrow-array 49.0.0", + "arrow-ipc 49.0.0", + "arrow-schema 49.0.0", "async-compression", "async-trait", "bytes", @@ -2021,24 +2026,22 @@ dependencies = [ "datafusion-expr", "datafusion-optimizer", "datafusion-physical-expr", - "datafusion-row", + "datafusion-physical-plan", "datafusion-sql", "flate2", "futures 0.3.28", "glob", + "half 2.2.1", "hashbrown 0.14.0", "indexmap 2.0.0", - "itertools 0.11.0", - "lazy_static", + "itertools 0.12.0", "log", "num_cpus", - "object_store 0.6.1", + "object_store 0.8.0", "parking_lot 0.12.1", "parquet", - "percent-encoding", "pin-project-lite", "rand 0.8.5", - "smallvec", "sqlparser", "tempfile", "tokio", @@ -2046,34 +2049,42 @@ dependencies = [ "url", "uuid", "xz2", - "zstd", + "zstd 0.13.0", ] [[package]] name = "datafusion-common" -version = "27.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=9c3a537e25e5ab3299922864034f67fb2f79805d#9c3a537e25e5ab3299922864034f67fb2f79805d" +version = "34.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119" dependencies = [ - "arrow 43.0.0", - "arrow-array 43.0.0", + "ahash 0.8.3", + "arrow 49.0.0", + "arrow-array 49.0.0", + "arrow-buffer 49.0.0", + "arrow-schema 49.0.0", "chrono", + "half 2.2.1", + "libc", "num_cpus", - "object_store 0.6.1", + "object_store 0.8.0", "parquet", "sqlparser", ] [[package]] name = "datafusion-execution" -version = "27.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=9c3a537e25e5ab3299922864034f67fb2f79805d#9c3a537e25e5ab3299922864034f67fb2f79805d" +version = "34.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119" dependencies = [ + "arrow 49.0.0", + "chrono", "dashmap 5.4.0", "datafusion-common", "datafusion-expr", + "futures 0.3.28", "hashbrown 0.14.0", "log", - "object_store 0.6.1", + "object_store 0.8.0", "parking_lot 0.12.1", "rand 0.8.5", "tempfile", @@ -2082,13 +2093,14 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "27.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=9c3a537e25e5ab3299922864034f67fb2f79805d#9c3a537e25e5ab3299922864034f67fb2f79805d" +version = "34.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119" dependencies = [ "ahash 0.8.3", - "arrow 43.0.0", + "arrow 49.0.0", + "arrow-array 49.0.0", "datafusion-common", - "lazy_static", + "paste 1.0.12", "sqlparser", "strum 0.25.0", "strum_macros 0.25.1", @@ -2096,45 +2108,43 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "27.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=9c3a537e25e5ab3299922864034f67fb2f79805d#9c3a537e25e5ab3299922864034f67fb2f79805d" +version = "34.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119" dependencies = [ - "arrow 43.0.0", + "arrow 49.0.0", "async-trait", "chrono", "datafusion-common", "datafusion-expr", "datafusion-physical-expr", "hashbrown 0.14.0", - "itertools 0.11.0", + "itertools 0.12.0", "log", - "regex-syntax 0.7.1", + "regex-syntax 0.8.2", ] [[package]] name = "datafusion-physical-expr" -version = "27.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=9c3a537e25e5ab3299922864034f67fb2f79805d#9c3a537e25e5ab3299922864034f67fb2f79805d" +version = "34.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119" dependencies = [ "ahash 0.8.3", - "arrow 43.0.0", - "arrow-array 43.0.0", - "arrow-buffer 43.0.0", - "arrow-schema 43.0.0", + "arrow 49.0.0", + "arrow-array 49.0.0", + "arrow-buffer 49.0.0", + "arrow-ord 49.0.0", + "arrow-schema 49.0.0", "base64 0.21.0", "blake2", "blake3", "chrono", "datafusion-common", "datafusion-expr", - "datafusion-row", "half 2.2.1", "hashbrown 0.14.0", "hex", "indexmap 2.0.0", - "itertools 0.11.0", - "lazy_static", - "libc", + "itertools 0.12.0", "log", "md-5", "paste 1.0.12", @@ -2147,37 +2157,56 @@ dependencies = [ ] [[package]] -name = "datafusion-proto" -version = "27.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=9c3a537e25e5ab3299922864034f67fb2f79805d#9c3a537e25e5ab3299922864034f67fb2f79805d" +name = "datafusion-physical-plan" +version = "34.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119" dependencies = [ - "arrow 43.0.0", + "ahash 0.8.3", + "arrow 49.0.0", + "arrow-array 49.0.0", + "arrow-buffer 49.0.0", + "arrow-schema 49.0.0", + "async-trait", "chrono", - "datafusion", "datafusion-common", + "datafusion-execution", "datafusion-expr", - "object_store 0.6.1", - "prost", + "datafusion-physical-expr", + "futures 0.3.28", + "half 2.2.1", + "hashbrown 0.14.0", + "indexmap 2.0.0", + "itertools 0.12.0", + "log", + "once_cell", + "parking_lot 0.12.1", + "pin-project-lite", + "rand 0.8.5", + "tokio", + "uuid", ] [[package]] -name = "datafusion-row" -version = "27.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=9c3a537e25e5ab3299922864034f67fb2f79805d#9c3a537e25e5ab3299922864034f67fb2f79805d" +name = "datafusion-proto" +version = "34.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119" dependencies = [ - "arrow 43.0.0", + "arrow 49.0.0", + "chrono", + "datafusion", "datafusion-common", - "paste 1.0.12", - "rand 0.8.5", + "datafusion-expr", + "object_store 0.8.0", + "prost 0.12.3", ] [[package]] name = "datafusion-sql" -version = "27.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=9c3a537e25e5ab3299922864034f67fb2f79805d#9c3a537e25e5ab3299922864034f67fb2f79805d" +version = "34.0.0" +source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119" dependencies = [ - "arrow 43.0.0", - "arrow-schema 43.0.0", + "arrow 49.0.0", + "arrow-schema 49.0.0", "datafusion-common", "datafusion-expr", "log", @@ -2187,7 +2216,7 @@ dependencies = [ [[package]] name = "datafusion_util" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=a905863#a9058633c03f018607dc1e4f6ca090b82d46a30c" +source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997" dependencies = [ "async-trait", "datafusion", @@ -2305,7 +2334,7 @@ dependencies = [ name = "df_engine_extensions" version = "1.2.6-alpha" dependencies = [ - "arrow 43.0.0", + "arrow 49.0.0", "async-recursion", "async-trait", "catalog", @@ -2318,7 +2347,7 @@ dependencies = [ "insta", "lazy_static", "prometheus 0.12.0", - "prost", + "prost 0.11.8", "runtime", "snafu 0.6.10", "table_engine", @@ -2330,7 +2359,7 @@ dependencies = [ name = "df_operator" version = "1.2.6-alpha" dependencies = [ - "arrow 43.0.0", + "arrow 49.0.0", "base64 0.13.1", "bincode", "chrono", @@ -2470,7 +2499,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4319dc0fb739a6e84cb8678b8cf50c9bcfa4712ae826b33ecf00cc0850550a58" dependencies = [ "http", - "prost", + "prost 0.11.8", "tokio", "tokio-stream", "tonic 0.8.3", @@ -2808,12 +2837,12 @@ checksum = "8f5f3913fa0bfe7ee1fd8248b6b9f42a5af4b9d65ec2dd2c3c26132b950ecfc2" [[package]] name = "generated_types" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=a905863#a9058633c03f018607dc1e4f6ca090b82d46a30c" +source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997" dependencies = [ "pbjson", "pbjson-build", "pbjson-types", - "prost", + "prost 0.11.8", "prost-build", "serde", "tonic-build", @@ -3071,7 +3100,7 @@ dependencies = [ "thiserror", "tokio", "tonic 0.8.3", - "zstd", + "zstd 0.12.3+zstd.1.5.2", ] [[package]] @@ -3095,7 +3124,7 @@ version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5907c770ee20818978cf2050341ca2c4c7fb7888423ccb090cbb2fda250dfad7" dependencies = [ - "prost", + "prost 0.11.8", "protoc-bin-vendored", "tonic 0.8.3", "tonic-build", @@ -3107,7 +3136,7 @@ name = "horaedbproto" version = "2.0.0" source = "git+https://github.com/apache/incubator-horaedb-proto.git?rev=19ece8f771fc0b3e8e734072cc3d8040de6c74cb#19ece8f771fc0b3e8e734072cc3d8040de6c74cb" dependencies = [ - "prost", + "prost 0.11.8", "protoc-bin-vendored", "tonic 0.8.3", "tonic-build", @@ -3325,7 +3354,7 @@ dependencies = [ [[package]] name = "influxdb_influxql_parser" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=a905863#a9058633c03f018607dc1e4f6ca090b82d46a30c" +source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997" dependencies = [ "chrono", "chrono-tz", @@ -3367,7 +3396,7 @@ name = "interpreters" version = "1.2.6-alpha" dependencies = [ "analytic_engine", - "arrow 43.0.0", + "arrow 49.0.0", "async-trait", "catalog", "catalog_impls", @@ -3418,9 +3447,9 @@ dependencies = [ [[package]] name = "iox_query" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=a905863#a9058633c03f018607dc1e4f6ca090b82d46a30c" +source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997" dependencies = [ - "arrow 43.0.0", + "arrow 49.0.0", "arrow_util", "async-trait", "chrono", @@ -3442,9 +3471,9 @@ dependencies = [ [[package]] name = "iox_query_influxql" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=a905863#a9058633c03f018607dc1e4f6ca090b82d46a30c" +source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997" dependencies = [ - "arrow 43.0.0", + "arrow 49.0.0", "chrono", "chrono-tz", "datafusion", @@ -3497,6 +3526,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.6" @@ -3953,7 +3991,7 @@ dependencies = [ "horaedbproto 2.0.0", "logger", "macros", - "prost", + "prost 0.11.8", "reqwest", "serde", "serde_json", @@ -4314,9 +4352,9 @@ dependencies = [ [[package]] name = "num" -version = "0.4.0" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606" +checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" dependencies = [ "num-bigint", "num-complex", @@ -4456,16 +4494,16 @@ dependencies = [ [[package]] name = "object_store" -version = "0.6.1" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27c776db4f332b571958444982ff641d2531417a326ca368995073b639205d58" +checksum = "2524735495ea1268be33d200e1ee97455096a0846295a21548cd2f3541de7050" dependencies = [ "async-trait", "bytes", "chrono", "futures 0.3.28", "humantime 2.1.0", - "itertools 0.10.5", + "itertools 0.11.0", "parking_lot 0.12.1", "percent-encoding", "snafu 0.7.4", @@ -4497,7 +4535,7 @@ dependencies = [ "partitioned_lock", "prometheus 0.12.0", "prometheus-static-metric", - "prost", + "prost 0.11.8", "rand 0.7.3", "runtime", "serde", @@ -4545,13 +4583,13 @@ dependencies = [ "tokio", "tokio-util", "uuid", - "zstd", + "zstd 0.12.3+zstd.1.5.2", ] [[package]] name = "observability_deps" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=a905863#a9058633c03f018607dc1e4f6ca090b82d46a30c" +source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997" dependencies = [ "tracing", ] @@ -4675,18 +4713,18 @@ dependencies = [ [[package]] name = "parquet" -version = "43.0.0" +version = "49.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec7267a9607c3f955d4d0ac41b88a67cecc0d8d009173ad3da390699a6cb3750" +checksum = "af88740a842787da39b3d69ce5fbf6fce97d20211d3b299fee0a0da6430c74d4" dependencies = [ "ahash 0.8.3", - "arrow-array 43.0.0", - "arrow-buffer 43.0.0", - "arrow-cast 43.0.0", - "arrow-data 43.0.0", - "arrow-ipc 43.0.0", - "arrow-schema 43.0.0", - "arrow-select 43.0.0", + "arrow-array 49.0.0", + "arrow-buffer 49.0.0", + "arrow-cast 49.0.0", + "arrow-data 49.0.0", + "arrow-ipc 49.0.0", + "arrow-schema 49.0.0", + "arrow-select 49.0.0", "base64 0.21.0", "brotli", "bytes", @@ -4694,24 +4732,24 @@ dependencies = [ "flate2", "futures 0.3.28", "hashbrown 0.14.0", - "lz4", + "lz4_flex", "num", "num-bigint", - "object_store 0.6.1", + "object_store 0.8.0", "paste 1.0.12", "seq-macro", "snap", "thrift", "tokio", "twox-hash", - "zstd", + "zstd 0.13.0", ] [[package]] name = "parquet_ext" version = "1.2.6-alpha" dependencies = [ - "arrow 43.0.0", + "arrow 49.0.0", "arrow_ext", "async-trait", "bytes", @@ -4738,7 +4776,7 @@ name = "partition_table_engine" version = "1.2.6-alpha" dependencies = [ "analytic_engine", - "arrow 43.0.0", + "arrow 49.0.0", "async-trait", "common_types", "datafusion", @@ -4805,7 +4843,7 @@ checksum = "bdbb7b706f2afc610f3853550cdbbf6372fd324824a087806bd4480ea4996e24" dependencies = [ "heck", "itertools 0.10.5", - "prost", + "prost 0.11.8", "prost-types", ] @@ -4819,7 +4857,7 @@ dependencies = [ "chrono", "pbjson", "pbjson-build", - "prost", + "prost 0.11.8", "prost-build", "serde", ] @@ -5179,7 +5217,7 @@ dependencies = [ "async-trait", "bytes", "futures 0.3.28", - "prost", + "prost 0.11.8", "prost-build", "snap", "warp", @@ -5256,7 +5294,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e48e50df39172a3e7eb17e14642445da64996989bc212b583015435d39a58537" dependencies = [ "bytes", - "prost-derive", + "prost-derive 0.11.8", +] + +[[package]] +name = "prost" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "146c289cda302b98a28d40c8b3b90498d6e526dd24ac2ecea73e4e491685b94a" +dependencies = [ + "bytes", + "prost-derive 0.12.3", ] [[package]] @@ -5273,7 +5321,7 @@ dependencies = [ "multimap", "petgraph", "prettyplease 0.1.25", - "prost", + "prost 0.11.8", "prost-types", "regex", "syn 1.0.109", @@ -5294,13 +5342,26 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "prost-derive" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "efb6c9a1dd1def8e2124d17e83a20af56f1570d6c2d2bd9e266ccb768df3840e" +dependencies = [ + "anyhow", + "itertools 0.11.0", + "proc-macro2", + "quote", + "syn 2.0.48", +] + [[package]] name = "prost-types" version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "379119666929a1afd7a043aa6cf96fa67a6dce9af60c88095a4686dbce4c9c88" dependencies = [ - "prost", + "prost 0.11.8", ] [[package]] @@ -5363,7 +5424,7 @@ checksum = "9653c3ed92974e34c5a6e0a510864dab979760481714c172e0a34e437cb98804" name = "proxy" version = "1.2.6-alpha" dependencies = [ - "arrow 43.0.0", + "arrow 49.0.0", "arrow_ext", "async-trait", "bytes", @@ -5391,7 +5452,7 @@ dependencies = [ "prom-remote-api", "prometheus 0.12.0", "prometheus-static-metric", - "prost", + "prost 0.11.8", "query_engine", "query_frontend", "router", @@ -5409,7 +5470,7 @@ dependencies = [ "tokio-stream", "tonic 0.8.3", "warp", - "zstd", + "zstd 0.12.3+zstd.1.5.2", ] [[package]] @@ -5463,7 +5524,7 @@ dependencies = [ name = "query_engine" version = "1.2.6-alpha" dependencies = [ - "arrow 43.0.0", + "arrow 49.0.0", "async-trait", "bytes_ext", "catalog", @@ -5478,7 +5539,7 @@ dependencies = [ "iox_query", "logger", "macros", - "prost", + "prost 0.11.8", "query_frontend", "runtime", "serde", @@ -5493,7 +5554,7 @@ dependencies = [ name = "query_frontend" version = "1.2.6-alpha" dependencies = [ - "arrow 43.0.0", + "arrow 49.0.0", "async-trait", "catalog", "chrono", @@ -5529,9 +5590,9 @@ dependencies = [ [[package]] name = "query_functions" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=a905863#a9058633c03f018607dc1e4f6ca090b82d46a30c" +source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997" dependencies = [ - "arrow 43.0.0", + "arrow 49.0.0", "chrono", "datafusion", "itertools 0.10.5", @@ -5802,6 +5863,12 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c" +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + [[package]] name = "remote_engine_client" version = "1.2.6-alpha" @@ -6227,9 +6294,9 @@ dependencies = [ [[package]] name = "schema" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=a905863#a9058633c03f018607dc1e4f6ca090b82d46a30c" +source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997" dependencies = [ - "arrow 43.0.0", + "arrow 49.0.0", "hashbrown 0.13.2", "indexmap 1.9.3", "itertools 0.10.5", @@ -6353,7 +6420,7 @@ version = "1.2.6-alpha" dependencies = [ "analytic_engine", "arc-swap 1.6.0", - "arrow 43.0.0", + "arrow 49.0.0", "arrow_ext", "async-trait", "bytes_ext", @@ -6386,7 +6453,7 @@ dependencies = [ "prom-remote-api", "prometheus 0.12.0", "prometheus-static-metric", - "prost", + "prost 0.11.8", "proxy", "query_engine", "query_frontend", @@ -6407,7 +6474,7 @@ dependencies = [ "tonic 0.8.3", "wal", "warp", - "zstd", + "zstd 0.12.3+zstd.1.5.2", ] [[package]] @@ -6717,9 +6784,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.35.0" +version = "0.41.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca597d77c98894be1f965f2e4e2d2a61575d4998088e655476c73715c54b2b43" +checksum = "5cc2c25a6c66789625ef164b4c7d2e548d627902280c13710d33da8222169964" dependencies = [ "log", "serde", @@ -6728,13 +6795,13 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.1.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55fe75cb4a364c7f7ae06c7dbbc8d84bddd85d6cdf9975963c3935bc1991761e" +checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.48", ] [[package]] @@ -6897,7 +6964,7 @@ dependencies = [ name = "system_catalog" version = "1.2.6-alpha" dependencies = [ - "arrow 43.0.0", + "arrow 49.0.0", "async-trait", "bytes_ext", "catalog", @@ -6908,7 +6975,7 @@ dependencies = [ "horaedbproto 2.0.0", "logger", "macros", - "prost", + "prost 0.11.8", "snafu 0.6.10", "table_engine", "tokio", @@ -6927,7 +6994,7 @@ dependencies = [ name = "table_engine" version = "1.2.6-alpha" dependencies = [ - "arrow 43.0.0", + "arrow 49.0.0", "arrow_ext", "async-trait", "bytes_ext", @@ -6943,7 +7010,7 @@ dependencies = [ "lazy_static", "logger", "macros", - "prost", + "prost 0.11.8", "rand 0.7.3", "regex", "runtime", @@ -7024,7 +7091,7 @@ dependencies = [ [[package]] name = "test_helpers" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=a905863#a9058633c03f018607dc1e4f6ca090b82d46a30c" +source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997" dependencies = [ "dotenvy", "observability_deps", @@ -7038,7 +7105,7 @@ dependencies = [ name = "test_util" version = "1.2.6-alpha" dependencies = [ - "arrow 43.0.0", + "arrow 49.0.0", "chrono", "common_types", "env_logger", @@ -7375,8 +7442,8 @@ dependencies = [ "hyper-timeout", "percent-encoding", "pin-project", - "prost", - "prost-derive", + "prost 0.11.8", + "prost-derive 0.11.8", "rustls-pemfile 1.0.2", "tokio", "tokio-rustls 0.23.4", @@ -7408,7 +7475,7 @@ dependencies = [ "hyper-timeout", "percent-encoding", "pin-project", - "prost", + "prost 0.11.8", "tokio", "tokio-stream", "tower", @@ -7804,7 +7871,7 @@ dependencies = [ "macros", "message_queue", "prometheus 0.12.0", - "prost", + "prost 0.11.8", "rand 0.8.5", "rocksdb", "runtime", @@ -8433,7 +8500,16 @@ version = "0.12.3+zstd.1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76eea132fb024e0e13fd9c2f5d5d595d8a967aa72382ac2f9d39fcc95afd0806" dependencies = [ - "zstd-safe", + "zstd-safe 6.0.4+zstd.1.5.4", +] + +[[package]] +name = "zstd" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bffb3309596d527cfcba7dfc6ed6052f1d39dfbd7c867aa2e865e4a449c10110" +dependencies = [ + "zstd-safe 7.0.0", ] [[package]] @@ -8446,6 +8522,15 @@ dependencies = [ "zstd-sys", ] +[[package]] +name = "zstd-safe" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43747c7422e2924c11144d5229878b98180ef8b06cca4ab5af37afc8a8d8ea3e" +dependencies = [ + "zstd-sys", +] + [[package]] name = "zstd-sys" version = "2.0.7+zstd.1.5.4" diff --git a/Cargo.toml b/Cargo.toml index d195a121fd..aef95309bd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -85,8 +85,8 @@ members = [ [workspace.dependencies] alloc_tracker = { path = "src/components/alloc_tracker" } -arrow = { version = "43.0.0", features = ["prettyprint"] } -arrow_ipc = { version = "43.0.0" } +arrow = { version = "49.0.0", features = ["prettyprint"] } +arrow_ipc = { version = "49.0.0" } arrow_ext = { path = "src/components/arrow_ext" } analytic_engine = { path = "src/analytic_engine" } arena = { path = "src/components/arena" } @@ -107,8 +107,8 @@ cluster = { path = "src/cluster" } criterion = "0.5" horaedb-client = "1.0.2" common_types = { path = "src/common_types" } -datafusion = { git = "https://github.com/CeresDB/arrow-datafusion.git", rev = "9c3a537e25e5ab3299922864034f67fb2f79805d" } -datafusion-proto = { git = "https://github.com/CeresDB/arrow-datafusion.git", rev = "9c3a537e25e5ab3299922864034f67fb2f79805d" } +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "a154884545cfdeb1a6c20872b3882a5624cd1119"} +datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "a154884545cfdeb1a6c20872b3882a5624cd1119" } derive_builder = "0.12" df_operator = { path = "src/df_operator" } df_engine_extensions = { path = "src/df_engine_extensions" } @@ -121,10 +121,10 @@ hash_ext = { path = "src/components/hash_ext" } hex = "0.4.3" hyperloglog = { git = "https://github.com/jedisct1/rust-hyperloglog.git", rev = "425487ce910f26636fbde8c4d640b538431aad50" } id_allocator = { path = "src/components/id_allocator" } -influxql-logical-planner = { git = "https://github.com/CeresDB/influxql.git", rev = "a905863", package = "iox_query_influxql" } -influxql-parser = { git = "https://github.com/CeresDB/influxql.git", rev = "a905863", package = "influxdb_influxql_parser" } -influxql-query = { git = "https://github.com/CeresDB/influxql.git", rev = "a905863", package = "iox_query" } -influxql-schema = { git = "https://github.com/CeresDB/influxql.git", rev = "a905863", package = "schema" } +influxql-logical-planner = { git = "https://github.com/CeresDB/influxql.git", rev = "cafd1c73e375e218b646cef5024cd27c3855f997", package = "iox_query_influxql" } +influxql-parser = { git = "https://github.com/CeresDB/influxql.git", rev = "cafd1c73e375e218b646cef5024cd27c3855f997", package = "influxdb_influxql_parser" } +influxql-query = { git = "https://github.com/CeresDB/influxql.git", rev = "cafd1c73e375e218b646cef5024cd27c3855f997", package = "iox_query" } +influxql-schema = { git = "https://github.com/CeresDB/influxql.git", rev = "cafd1c73e375e218b646cef5024cd27c3855f997", package = "schema" } interpreters = { path = "src/interpreters" } itertools = "0.10.5" lz4_flex = { version = "0.11", default-features = false, features = ["frame"] } @@ -142,7 +142,7 @@ panic_ext = { path = "src/components/panic_ext" } partitioned_lock = { path = "src/components/partitioned_lock" } partition_table_engine = { path = "src/partition_table_engine" } parquet_ext = { path = "src/components/parquet_ext" } -parquet = { version = "43.0.0" } +parquet = { version = "49.0.0" } paste = "1.0" pin-project-lite = "0.2.8" pprof = "0.12.1" @@ -172,9 +172,9 @@ size_ext = { path = "src/components/size_ext" } smallvec = "1.6" slog = "2.7" spin = "0.9.6" -sqlparser = { version = "0.35", features = ["serde"] } -system_catalog = { path = "src/system_catalog" } system_statis = { path = "src/components/system_stats" } +sqlparser = { version = "0.41", features = ["serde"] } +system_catalog = { path = "src/system_catalog" } table_engine = { path = "src/table_engine" } table_kv = { path = "src/components/table_kv" } tempfile = "3.1.0" diff --git a/src/analytic_engine/src/instance/reorder_memtable.rs b/src/analytic_engine/src/instance/reorder_memtable.rs index e6eab4d135..0c7900e52f 100644 --- a/src/analytic_engine/src/instance/reorder_memtable.rs +++ b/src/analytic_engine/src/instance/reorder_memtable.rs @@ -147,8 +147,11 @@ impl ExecutionPlan for ScanMemIter { })) } - fn statistics(&self) -> Statistics { - Statistics::default() + fn statistics( + &self, + ) -> std::result::Result + { + Ok(Statistics::new_unknown(&self.schema())) } } diff --git a/src/analytic_engine/src/row_iter/record_batch_stream.rs b/src/analytic_engine/src/row_iter/record_batch_stream.rs index 2a39c648c0..49c41f2432 100644 --- a/src/analytic_engine/src/row_iter/record_batch_stream.rs +++ b/src/analytic_engine/src/row_iter/record_batch_stream.rs @@ -161,6 +161,7 @@ fn filter_record_batch( let filter_array = predicate .evaluate(record_batch) .map(|v| v.into_array(record_batch.num_rows())) + .context(FilterExec)? .context(FilterExec)?; let selected_rows = filter_array .as_any() diff --git a/src/common_types/src/datum.rs b/src/common_types/src/datum.rs index d152e9600a..4b8b373763 100644 --- a/src/common_types/src/datum.rs +++ b/src/common_types/src/datum.rs @@ -294,7 +294,7 @@ impl TryFrom<&SqlDataType> for DatumKind { SqlDataType::BigInt(_) => Ok(Self::Int64), SqlDataType::Int(_) => Ok(Self::Int32), SqlDataType::SmallInt(_) => Ok(Self::Int16), - SqlDataType::String => Ok(Self::String), + SqlDataType::String(_) => Ok(Self::String), SqlDataType::Varbinary(_) => Ok(Self::Varbinary), SqlDataType::Date => Ok(Self::Date), SqlDataType::Time(_, _) => Ok(Self::Time), @@ -1453,7 +1453,7 @@ impl Datum { ScalarValue::Date32(v) => v.map(Datum::Date), ScalarValue::Time64Nanosecond(v) => v.map(Datum::Time), ScalarValue::Dictionary(_, literal) => Datum::from_scalar_value(literal), - ScalarValue::List(_, _) + ScalarValue::List(_) | ScalarValue::Date64(_) | ScalarValue::Time32Second(_) | ScalarValue::Time32Millisecond(_) @@ -1467,10 +1467,12 @@ impl Datum { | ScalarValue::Decimal128(_, _, _) | ScalarValue::Null | ScalarValue::IntervalMonthDayNano(_) - | ScalarValue::Fixedsizelist(_, _, _) + | ScalarValue::FixedSizeList(_) | ScalarValue::DurationSecond(_) | ScalarValue::DurationMillisecond(_) | ScalarValue::DurationMicrosecond(_) + | ScalarValue::Decimal256(_, _, _) + | ScalarValue::LargeList(_) | ScalarValue::DurationNanosecond(_) => None, } } @@ -1502,7 +1504,7 @@ impl<'a> DatumView<'a> { v.map(|v| DatumView::Timestamp(Timestamp::new(v))) } ScalarValue::Dictionary(_, literal) => DatumView::from_scalar_value(literal), - ScalarValue::List(_, _) + ScalarValue::List(_) | ScalarValue::Date64(_) | ScalarValue::Time32Second(_) | ScalarValue::Time32Millisecond(_) @@ -1516,10 +1518,12 @@ impl<'a> DatumView<'a> { | ScalarValue::Decimal128(_, _, _) | ScalarValue::Null | ScalarValue::IntervalMonthDayNano(_) - | ScalarValue::Fixedsizelist(_, _, _) + | ScalarValue::FixedSizeList(_) | ScalarValue::DurationSecond(_) | ScalarValue::DurationMillisecond(_) | ScalarValue::DurationMicrosecond(_) + | ScalarValue::Decimal256(_, _, _) + | ScalarValue::LargeList(_) | ScalarValue::DurationNanosecond(_) => None, } } diff --git a/src/components/parquet_ext/src/meta_data.rs b/src/components/parquet_ext/src/meta_data.rs index 00a0bb3a17..ad18a36cb7 100644 --- a/src/components/parquet_ext/src/meta_data.rs +++ b/src/components/parquet_ext/src/meta_data.rs @@ -19,9 +19,10 @@ use std::{ops::Range, sync::Arc}; use async_trait::async_trait; use bytes::Bytes; +use datafusion::parquet::arrow::ParquetRecordBatchStreamBuilder; use generic_error::GenericResult; use parquet::{ - arrow::{arrow_reader::ArrowReaderOptions, ParquetRecordBatchStreamBuilder}, + arrow::arrow_reader::ArrowReaderOptions, errors::{ParquetError, Result}, file::{footer, metadata::ParquetMetaData}, }; diff --git a/src/components/parquet_ext/src/prune/min_max.rs b/src/components/parquet_ext/src/prune/min_max.rs index 8ea39299ef..5f478936d5 100644 --- a/src/components/parquet_ext/src/prune/min_max.rs +++ b/src/components/parquet_ext/src/prune/min_max.rs @@ -196,6 +196,15 @@ impl<'a> PruningStatistics for RowGroupPruningStatistics<'a> { fn null_counts(&self, _column: &Column) -> Option { None } + + // TODO: support this. + fn contained( + &self, + column: &Column, + values: &std::collections::HashSet, + ) -> Option { + None + } } #[cfg(test)] @@ -245,7 +254,7 @@ mod test { }) .collect(); let schema = SchemaType::group_type_builder("schema") - .with_fields(&mut fields) + .with_fields(fields) .build() .unwrap(); diff --git a/src/df_engine_extensions/src/dist_sql_query/physical_plan.rs b/src/df_engine_extensions/src/dist_sql_query/physical_plan.rs index feba491f50..dd430f520d 100644 --- a/src/df_engine_extensions/src/dist_sql_query/physical_plan.rs +++ b/src/df_engine_extensions/src/dist_sql_query/physical_plan.rs @@ -129,8 +129,10 @@ impl ExecutionPlan for UnresolvedPartitionedScan { )) } - fn statistics(&self) -> Statistics { - Statistics::default() + fn statistics( + &self, + ) -> Result { + Ok(Statistics::new_unknown(&self.schema())) } } @@ -367,8 +369,10 @@ impl ExecutionPlan for ResolvedPartitionedScan { Ok(Box::pin(record_stream)) } - fn statistics(&self) -> Statistics { - Statistics::default() + fn statistics( + &self, + ) -> Result { + Ok(Statistics::new_unknown(&self.schema())) } fn metrics(&self) -> Option { @@ -578,8 +582,10 @@ impl ExecutionPlan for UnresolvedSubTableScan { )) } - fn statistics(&self) -> Statistics { - Statistics::default() + fn statistics( + &self, + ) -> Result { + Ok(Statistics::new_unknown(&self.schema())) } } diff --git a/src/df_operator/src/scalar.rs b/src/df_operator/src/scalar.rs index 1535ebdbd4..e71f29148e 100644 --- a/src/df_operator/src/scalar.rs +++ b/src/df_operator/src/scalar.rs @@ -43,7 +43,7 @@ impl ScalarUdf { #[inline] pub fn name(&self) -> &str { - &self.df_udf.name + &self.df_udf.name() } /// Convert into datafusion's udf diff --git a/src/df_operator/src/udaf.rs b/src/df_operator/src/udaf.rs index 448a26c626..b2bb5838cd 100644 --- a/src/df_operator/src/udaf.rs +++ b/src/df_operator/src/udaf.rs @@ -50,7 +50,7 @@ impl AggregateUdf { #[inline] pub fn name(&self) -> &str { - &self.df_udaf.name + &self.df_udaf.name() } #[inline] diff --git a/src/interpreters/src/insert.rs b/src/interpreters/src/insert.rs index cac5af0cec..c67ff1dfc1 100644 --- a/src/interpreters/src/insert.rs +++ b/src/interpreters/src/insert.rs @@ -373,6 +373,6 @@ fn get_or_extract_column_from_row_groups( cached_column_values.insert(column_idx, columnar_value.clone()); Ok(columnar_value) })?; - - Ok(column.into_array(num_rows)) + // TODO: solve unwarp + Ok(column.into_array(num_rows).unwrap()) } diff --git a/src/query_engine/src/datafusion_impl/physical_optimizer/repartition.rs b/src/query_engine/src/datafusion_impl/physical_optimizer/repartition.rs index c963c75fad..24f261cd6d 100644 --- a/src/query_engine/src/datafusion_impl/physical_optimizer/repartition.rs +++ b/src/query_engine/src/datafusion_impl/physical_optimizer/repartition.rs @@ -21,7 +21,9 @@ use std::sync::Arc; use datafusion::{ config::ConfigOptions, - physical_optimizer::{optimizer::PhysicalOptimizerRule, repartition::Repartition}, + physical_optimizer::{ + enforce_distribution::EnforceDistribution, optimizer::PhysicalOptimizerRule, + }, physical_plan::ExecutionPlan, }; use logger::debug; @@ -34,7 +36,7 @@ pub struct RepartitionAdapter { impl Adapter for RepartitionAdapter { fn may_adapt(original_rule: OptimizeRuleRef) -> OptimizeRuleRef { - if original_rule.name() == Repartition::new().name() { + if original_rule.name() == EnforceDistribution::new().name() { Arc::new(Self { original_rule }) } else { original_rule @@ -67,4 +69,4 @@ impl PhysicalOptimizerRule for RepartitionAdapter { fn schema_check(&self) -> bool { true } -} +} \ No newline at end of file diff --git a/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs b/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs index a5a6161c9b..12c94076c9 100644 --- a/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs +++ b/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs @@ -236,9 +236,9 @@ impl ExecutionPlan for PromAlignExec { })) } - fn statistics(&self) -> Statistics { + fn statistics(&self) -> std::result::Result { // TODO(chenxiang) - Statistics::default() + Ok(Statistics::new_unknown(&self.schema())) } } @@ -514,7 +514,7 @@ impl Stream for PromAlignReader { if !tsid_samples.is_empty() { Poll::Ready(Some( self.samples_to_record_batch(schema, tsid_samples) - .map_err(DataFusionError::ArrowError), + .map_err(|err| DataFusionError::ArrowError(err, None)), )) } else { Poll::Ready(Some(Ok(RecordBatch::new_empty(schema)))) @@ -529,7 +529,7 @@ impl Stream for PromAlignReader { if !tsid_samples.is_empty() { return Poll::Ready(Some( self.samples_to_record_batch(schema, tsid_samples) - .map_err(DataFusionError::ArrowError), + .map_err(|err| DataFusionError::ArrowError(err, None)), )); } } diff --git a/src/query_engine/src/datafusion_impl/task_context.rs b/src/query_engine/src/datafusion_impl/task_context.rs index aee9812871..e0cc01ed50 100644 --- a/src/query_engine/src/datafusion_impl/task_context.rs +++ b/src/query_engine/src/datafusion_impl/task_context.rs @@ -116,7 +116,7 @@ impl Preprocessor { ctx: &Context, ) -> Result> { // Decode to datafusion physical plan. - let protobuf = protobuf::PhysicalPlanNode::decode(encoded_plan) + let protobuf = protobuf::PhysicalPlanNode::try_decode(encoded_plan) .box_err() .with_context(|| ExecutorWithCause { msg: Some("failed to decode plan".to_string()), diff --git a/src/query_frontend/src/logical_optimizer/type_conversion.rs b/src/query_frontend/src/logical_optimizer/type_conversion.rs index 89f0a14ec0..e8ccd42fc5 100644 --- a/src/query_frontend/src/logical_optimizer/type_conversion.rs +++ b/src/query_frontend/src/logical_optimizer/type_conversion.rs @@ -124,6 +124,7 @@ impl AnalyzerRule for TypeConversion { LogicalPlan::Subquery(_) | LogicalPlan::Statement { .. } | LogicalPlan::SubqueryAlias(_) + | LogicalPlan::Copy(_) | LogicalPlan::Unnest(_) | LogicalPlan::EmptyRelation { .. } => Ok(plan.clone()), } @@ -209,9 +210,10 @@ impl<'a> TypeRewriter<'a> { } } - let array = value.to_array(); + let array = value.to_array()?; ScalarValue::try_from_array( - &compute::cast(&array, data_type).map_err(DataFusionError::ArrowError)?, + &compute::cast(&array, data_type) + .map_err(|err| DataFusionError::ArrowError(err, None))?, // index: Converts a value in `array` at `index` into a ScalarValue 0, ) diff --git a/src/query_frontend/src/parser.rs b/src/query_frontend/src/parser.rs index e01c4d03bc..cae7256a01 100644 --- a/src/query_frontend/src/parser.rs +++ b/src/query_frontend/src/parser.rs @@ -352,11 +352,13 @@ impl<'a> Parser<'a> { is_dictionary = true; } } - if c.data_type != DataType::String && is_dictionary { - return parser_err!(format!( - "Only string column can be dictionary encoded: {:?}", - c.to_string() - )); + if let DataType::String(_) = c.data_type { + if is_dictionary { + return parser_err!(format!( + "Only string column can be dictionary encoded: {:?}", + c.to_string() + )); + } } } @@ -1001,7 +1003,7 @@ mod tests { let columns = vec![ make_column_def("c1", DataType::Timestamp(None, TimezoneInfo::None)), make_column_def("c2", DataType::Double), - make_column_def("c3", DataType::String), + make_column_def("c3", DataType::String(None)), ]; let sql = "CREATE TABLE mytbl(c1 timestamp, c2 double, c3 string,) ENGINE = XX"; @@ -1027,7 +1029,7 @@ mod tests { let columns = vec![ make_column_def("c1", DataType::Timestamp(None, TimezoneInfo::None)), make_comment_column_def("c2", DataType::Double, "id".to_string()), - make_comment_column_def("c3", DataType::String, "name".to_string()), + make_comment_column_def("c3", DataType::String(None), "name".to_string()), ]; let sql = "CREATE TABLE mytbl(c1 timestamp, c2 double comment 'id', c3 string comment 'name',) ENGINE = XX"; @@ -1053,7 +1055,7 @@ mod tests { let columns = vec![ make_column_def("c1", DataType::Timestamp(None, TimezoneInfo::None)), make_column_def("c2", DataType::Timestamp(None, TimezoneInfo::None)), - make_column_def("c3", DataType::String), + make_column_def("c3", DataType::String(None)), make_column_def("c4", DataType::Double), ]; @@ -1253,7 +1255,7 @@ mod tests { table_name: make_table_name("t"), columns: vec![ make_column_def("c1", DataType::Double), - make_column_def("c2", DataType::String), + make_column_def("c2", DataType::String(None)), ], }); expect_parse_ok(sql, expected).unwrap(); @@ -1277,7 +1279,7 @@ mod tests { table_name: make_table_name("t"), columns: vec![ make_column_def("c1", DataType::Double), - make_tag_column_def("c2", DataType::String), + make_tag_column_def("c2", DataType::String(None)), ], }); expect_parse_ok(sql, expected).unwrap(); @@ -1287,7 +1289,7 @@ mod tests { let sql = "ALTER TABLE t ADD COLUMN c1 string tag"; let expected = Statement::AlterAddColumn(AlterAddColumn { table_name: make_table_name("t"), - columns: vec![make_tag_column_def("c1", DataType::String)], + columns: vec![make_tag_column_def("c1", DataType::String(None))], }); expect_parse_ok(sql, expected).unwrap(); } diff --git a/src/query_frontend/src/planner.rs b/src/query_frontend/src/planner.rs index 8e02f5ee9e..e5c8a583ab 100644 --- a/src/query_frontend/src/planner.rs +++ b/src/query_frontend/src/planner.rs @@ -984,7 +984,7 @@ impl<'a, P: MetaProvider> PlannerDelegate<'a, P> { } } - let rows = build_row_group(schema, source, column_index_in_insert)?; + let rows = build_row_group(schema, source.unwrap(), column_index_in_insert)?; Ok(Plan::Insert(InsertPlan { table, diff --git a/src/query_frontend/src/promql/convert.rs b/src/query_frontend/src/promql/convert.rs index 297e71612c..6ff90d5bba 100644 --- a/src/query_frontend/src/promql/convert.rs +++ b/src/query_frontend/src/promql/convert.rs @@ -24,7 +24,7 @@ use common_types::{ use datafusion::{ logical_expr::{ avg, count, - expr::{Alias, ScalarUDF}, + expr::{Alias, ScalarFunction}, lit, logical_plan::{Extension, LogicalPlan, LogicalPlanBuilder}, max, min, sum, Expr as DataFusionExpr, @@ -316,11 +316,10 @@ impl Expr { // TSID is lost after aggregate, but PromAlignNode need a unique id, so // mock UUID as tsid based on groupby keys DataFusionExpr::Alias(Alias { - expr: Box::new(DataFusionExpr::ScalarUDF(ScalarUDF { - fun: Arc::new(create_unique_id(tag_exprs.len())), - args: tag_exprs.clone(), - })), + expr: Box::new(DataFusionExpr::ScalarFunction( + ScalarFunction::new_udf(Arc::new(create_unique_id(tag_exprs.len())), tag_exprs.clone()))), name: TSID_COLUMN.to_string(), + relation: None, }); let mut projection = tag_exprs.clone(); projection.extend(vec![ @@ -371,6 +370,7 @@ impl Expr { Ok(DataFusionExpr::Alias(Alias { expr: Box::new(expr), name: alias, + relation: None, })) } } diff --git a/src/query_frontend/src/provider.rs b/src/query_frontend/src/provider.rs index 4380829fef..67750fcb0e 100644 --- a/src/query_frontend/src/provider.rs +++ b/src/query_frontend/src/provider.rs @@ -413,6 +413,13 @@ impl<'a, P: MetaProvider> ContextProvider for ContextProviderAdapter<'a, P> { fn get_window_meta(&self, _name: &str) -> Option> { None } + + fn get_table_source( + &self, + name: TableReference, + ) -> datafusion::error::Result> { + self.get_table_provider(name) + } } struct SchemaProviderAdapter { diff --git a/src/table_engine/src/predicate.rs b/src/table_engine/src/predicate.rs index 723724f35e..e71180a0e0 100644 --- a/src/table_engine/src/predicate.rs +++ b/src/table_engine/src/predicate.rs @@ -329,6 +329,8 @@ impl<'a> TimeRangeExtractor<'a> { | Operator::BitwiseAnd | Operator::BitwiseOr | Operator::BitwiseXor + | Operator::AtArrow + | Operator::ArrowAt | Operator::BitwiseShiftRight | Operator::BitwiseShiftLeft | Operator::StringConcat => TimeRange::min_to_max(), @@ -432,15 +434,12 @@ impl<'a> TimeRangeExtractor<'a> { | Expr::TryCast { .. } | Expr::Sort { .. } | Expr::ScalarFunction { .. } - | Expr::ScalarUDF { .. } | Expr::AggregateFunction { .. } | Expr::WindowFunction { .. } - | Expr::AggregateUDF { .. } | Expr::Wildcard { .. } | Expr::Exists { .. } | Expr::InSubquery { .. } | Expr::ScalarSubquery(_) - | Expr::QualifiedWildcard { .. } | Expr::GroupingSet(_) | Expr::GetIndexedField { .. } | Expr::OuterReferenceColumn { .. } diff --git a/src/table_engine/src/provider.rs b/src/table_engine/src/provider.rs index d5e4c69f18..49f76460e2 100644 --- a/src/table_engine/src/provider.rs +++ b/src/table_engine/src/provider.rs @@ -467,9 +467,12 @@ impl ExecutionPlan for ScanTable { Some(metric_set) } - fn statistics(&self) -> Statistics { + fn statistics( + &self, + ) -> std::result::Result + { // TODO(yingwen): Implement this - Statistics::default() + Ok(Statistics::new_unknown(&self.schema())) } } From 3a447ac357a45c8046f769da749dcc13f8ec537c Mon Sep 17 00:00:00 2001 From: tanruixiang Date: Thu, 18 Jan 2024 16:48:24 +0800 Subject: [PATCH 02/25] fix --- Cargo.toml | 2 +- src/components/parquet_ext/src/prune/min_max.rs | 6 +++--- src/df_operator/src/scalar.rs | 2 +- src/df_operator/src/udaf.rs | 2 +- .../src/datafusion_impl/physical_optimizer/repartition.rs | 2 +- .../datafusion_impl/physical_plan_extension/prom_align.rs | 7 +++++-- 6 files changed, 12 insertions(+), 9 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index aef95309bd..16fcb93faf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -107,7 +107,7 @@ cluster = { path = "src/cluster" } criterion = "0.5" horaedb-client = "1.0.2" common_types = { path = "src/common_types" } -datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "a154884545cfdeb1a6c20872b3882a5624cd1119"} +datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "a154884545cfdeb1a6c20872b3882a5624cd1119" } datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "a154884545cfdeb1a6c20872b3882a5624cd1119" } derive_builder = "0.12" df_operator = { path = "src/df_operator" } diff --git a/src/components/parquet_ext/src/prune/min_max.rs b/src/components/parquet_ext/src/prune/min_max.rs index 5f478936d5..6bd3ad7496 100644 --- a/src/components/parquet_ext/src/prune/min_max.rs +++ b/src/components/parquet_ext/src/prune/min_max.rs @@ -200,8 +200,8 @@ impl<'a> PruningStatistics for RowGroupPruningStatistics<'a> { // TODO: support this. fn contained( &self, - column: &Column, - values: &std::collections::HashSet, + _column: &Column, + _values: &std::collections::HashSet, ) -> Option { None } @@ -239,7 +239,7 @@ mod test { } fn prepare_parquet_schema_descr(schema: &ArrowSchema) -> SchemaDescPtr { - let mut fields = schema + let fields = schema .fields() .iter() .map(|field| { diff --git a/src/df_operator/src/scalar.rs b/src/df_operator/src/scalar.rs index e71f29148e..4ae3372cfd 100644 --- a/src/df_operator/src/scalar.rs +++ b/src/df_operator/src/scalar.rs @@ -43,7 +43,7 @@ impl ScalarUdf { #[inline] pub fn name(&self) -> &str { - &self.df_udf.name() + self.df_udf.name() } /// Convert into datafusion's udf diff --git a/src/df_operator/src/udaf.rs b/src/df_operator/src/udaf.rs index b2bb5838cd..312990b252 100644 --- a/src/df_operator/src/udaf.rs +++ b/src/df_operator/src/udaf.rs @@ -50,7 +50,7 @@ impl AggregateUdf { #[inline] pub fn name(&self) -> &str { - &self.df_udaf.name() + self.df_udaf.name() } #[inline] diff --git a/src/query_engine/src/datafusion_impl/physical_optimizer/repartition.rs b/src/query_engine/src/datafusion_impl/physical_optimizer/repartition.rs index 24f261cd6d..d1406a75b9 100644 --- a/src/query_engine/src/datafusion_impl/physical_optimizer/repartition.rs +++ b/src/query_engine/src/datafusion_impl/physical_optimizer/repartition.rs @@ -69,4 +69,4 @@ impl PhysicalOptimizerRule for RepartitionAdapter { fn schema_check(&self) -> bool { true } -} \ No newline at end of file +} diff --git a/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs b/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs index 12c94076c9..c1dcb27bf2 100644 --- a/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs +++ b/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs @@ -236,7 +236,10 @@ impl ExecutionPlan for PromAlignExec { })) } - fn statistics(&self) -> std::result::Result { + fn statistics( + &self, + ) -> std::result::Result + { // TODO(chenxiang) Ok(Statistics::new_unknown(&self.schema())) } @@ -529,7 +532,7 @@ impl Stream for PromAlignReader { if !tsid_samples.is_empty() { return Poll::Ready(Some( self.samples_to_record_batch(schema, tsid_samples) - .map_err(|err| DataFusionError::ArrowError(err, None)), + .map_err(|err| DataFusionError::ArrowError(err, None)), )); } } From 24f4d5b68503761967493b8e7bf29c54209a9930 Mon Sep 17 00:00:00 2001 From: tanruixiang Date: Thu, 18 Jan 2024 17:07:09 +0800 Subject: [PATCH 03/25] fix --- src/df_engine_extensions/src/dist_sql_query/test_util.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/df_engine_extensions/src/dist_sql_query/test_util.rs b/src/df_engine_extensions/src/dist_sql_query/test_util.rs index 1f4e788fef..ffa988812c 100644 --- a/src/df_engine_extensions/src/dist_sql_query/test_util.rs +++ b/src/df_engine_extensions/src/dist_sql_query/test_util.rs @@ -263,7 +263,6 @@ impl TestContext { self.group_by.clone(), self.aggr_exprs.clone(), vec![None], - vec![None], input, input_schema.clone(), ) @@ -289,7 +288,6 @@ impl TestContext { final_group_by, self.aggr_exprs.clone(), vec![None], - vec![None], merge, input_schema, ) @@ -490,8 +488,8 @@ impl ExecutionPlan for MockScan { unimplemented!() } - fn statistics(&self) -> datafusion::physical_plan::Statistics { - unimplemented!() + fn statistics(&self) -> DfResult { + Ok(datafusion::physical_plan::Statistics::new_unknown(&self.schema())) } } From 88dc1f2b1be4a58c6701f96acb7c3daad1b168ff Mon Sep 17 00:00:00 2001 From: tanruixiang Date: Thu, 18 Jan 2024 17:09:25 +0800 Subject: [PATCH 04/25] fix --- src/df_engine_extensions/src/dist_sql_query/test_util.rs | 4 +++- src/query_engine/src/datafusion_impl/task_context.rs | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/df_engine_extensions/src/dist_sql_query/test_util.rs b/src/df_engine_extensions/src/dist_sql_query/test_util.rs index ffa988812c..813f142b96 100644 --- a/src/df_engine_extensions/src/dist_sql_query/test_util.rs +++ b/src/df_engine_extensions/src/dist_sql_query/test_util.rs @@ -489,7 +489,9 @@ impl ExecutionPlan for MockScan { } fn statistics(&self) -> DfResult { - Ok(datafusion::physical_plan::Statistics::new_unknown(&self.schema())) + Ok(datafusion::physical_plan::Statistics::new_unknown( + &self.schema(), + )) } } diff --git a/src/query_engine/src/datafusion_impl/task_context.rs b/src/query_engine/src/datafusion_impl/task_context.rs index e0cc01ed50..f5875a1331 100644 --- a/src/query_engine/src/datafusion_impl/task_context.rs +++ b/src/query_engine/src/datafusion_impl/task_context.rs @@ -40,7 +40,7 @@ use df_engine_extensions::dist_sql_query::{ }; use futures::future::BoxFuture; use generic_error::BoxError; -use prost::Message; + use runtime::Priority; use snafu::ResultExt; use table_engine::{ From 81adc4a43145f1d7dbcf8e5f2e235eb90ae5c900 Mon Sep 17 00:00:00 2001 From: tanruixiang Date: Fri, 26 Jan 2024 16:45:04 +0800 Subject: [PATCH 05/25] fix warning --- src/analytic_engine/src/instance/reorder_memtable.rs | 4 ++-- src/analytic_engine/src/row_iter/record_batch_stream.rs | 2 +- src/analytic_engine/src/sst/parquet/async_reader.rs | 2 +- src/query_engine/src/datafusion_impl/mod.rs | 4 ++-- src/query_engine/src/datafusion_impl/task_context.rs | 1 - src/query_frontend/src/logical_optimizer/mod.rs | 3 ++- src/query_frontend/src/promql/convert.rs | 2 +- src/query_frontend/src/promql/remote.rs | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/analytic_engine/src/instance/reorder_memtable.rs b/src/analytic_engine/src/instance/reorder_memtable.rs index 0c7900e52f..c37417bf64 100644 --- a/src/analytic_engine/src/instance/reorder_memtable.rs +++ b/src/analytic_engine/src/instance/reorder_memtable.rs @@ -262,8 +262,8 @@ impl Reorder { pub async fn into_stream(self) -> Result { // 1. Init datafusion context let runtime = Arc::new(RuntimeEnv::default()); - let state = SessionState::with_config_rt(SessionConfig::new(), runtime); - let ctx = SessionContext::with_state(state); + let state = SessionState::new_with_config_rt(SessionConfig::new(), runtime); + let ctx = SessionContext::new_with_state(state); let table_provider = Arc::new(MemIterProvider { arrow_schema: self.schema.to_arrow_schema_ref(), iter: Mutex::new(Some(self.iter)), diff --git a/src/analytic_engine/src/row_iter/record_batch_stream.rs b/src/analytic_engine/src/row_iter/record_batch_stream.rs index 49c41f2432..5740d73715 100644 --- a/src/analytic_engine/src/row_iter/record_batch_stream.rs +++ b/src/analytic_engine/src/row_iter/record_batch_stream.rs @@ -32,7 +32,7 @@ use common_types::{ use datafusion::{ common::ToDFSchema, error::DataFusionError, - optimizer::utils::conjunction, + logical_expr::utils::conjunction, physical_expr::{self, execution_props::ExecutionProps}, physical_plan::PhysicalExpr, }; diff --git a/src/analytic_engine/src/sst/parquet/async_reader.rs b/src/analytic_engine/src/sst/parquet/async_reader.rs index 94feeab2c5..49747b5376 100644 --- a/src/analytic_engine/src/sst/parquet/async_reader.rs +++ b/src/analytic_engine/src/sst/parquet/async_reader.rs @@ -219,7 +219,7 @@ impl<'a> Reader<'a> { ) -> Result> { // TODO: remove fixed partition let partition = 0; - let exprs = datafusion::optimizer::utils::conjunction(self.predicate.exprs().to_vec()); + let exprs = datafusion::logical_expr::utils::conjunction(self.predicate.exprs().to_vec()); let exprs = match exprs { Some(exprs) => exprs, None => return Ok(None), diff --git a/src/query_engine/src/datafusion_impl/mod.rs b/src/query_engine/src/datafusion_impl/mod.rs index 48e42c211b..482628f836 100644 --- a/src/query_engine/src/datafusion_impl/mod.rs +++ b/src/query_engine/src/datafusion_impl/mod.rs @@ -137,7 +137,7 @@ impl DfContextBuilder { // Using default logcial optimizer, if want to add more custom rule, using // `add_optimizer_rule` to add. - let state = SessionState::with_config_rt(df_session_config, self.runtime_env.clone()); - SessionContext::with_state(state) + let state = SessionState::new_with_config_rt(df_session_config, self.runtime_env.clone()); + SessionContext::new_with_state(state) } } diff --git a/src/query_engine/src/datafusion_impl/task_context.rs b/src/query_engine/src/datafusion_impl/task_context.rs index f5875a1331..d1ea667de9 100644 --- a/src/query_engine/src/datafusion_impl/task_context.rs +++ b/src/query_engine/src/datafusion_impl/task_context.rs @@ -40,7 +40,6 @@ use df_engine_extensions::dist_sql_query::{ }; use futures::future::BoxFuture; use generic_error::BoxError; - use runtime::Priority; use snafu::ResultExt; use table_engine::{ diff --git a/src/query_frontend/src/logical_optimizer/mod.rs b/src/query_frontend/src/logical_optimizer/mod.rs index 4d62e87750..8f2bf42a2c 100644 --- a/src/query_frontend/src/logical_optimizer/mod.rs +++ b/src/query_frontend/src/logical_optimizer/mod.rs @@ -30,7 +30,8 @@ use datafusion::{ use type_conversion::TypeConversion; pub fn optimize_plan(plan: &LogicalPlan) -> Result { - let state = SessionState::with_config_rt(SessionConfig::new(), Arc::new(RuntimeEnv::default())); + let state = + SessionState::new_with_config_rt(SessionConfig::new(), Arc::new(RuntimeEnv::default())); let state = register_analyzer_rules(state); // Register iox optimizers, used by influxql. let state = influxql_query::logical_optimizer::register_iox_logical_optimizers(state); diff --git a/src/query_frontend/src/promql/convert.rs b/src/query_frontend/src/promql/convert.rs index 6ff90d5bba..f364a0b101 100644 --- a/src/query_frontend/src/promql/convert.rs +++ b/src/query_frontend/src/promql/convert.rs @@ -578,7 +578,7 @@ impl Selector { .context(TableNotFound { name: &table })?; let table_provider = meta_provider - .get_table_provider(table_ref.table.name().into()) + .get_table_source(table_ref.table.name().into()) .context(TableProviderNotFound { name: &table })?; let schema = Schema::try_from(table_provider.schema()).context(BuildTableSchema)?; let timestamp_column_name = schema.timestamp_name().to_string(); diff --git a/src/query_frontend/src/promql/remote.rs b/src/query_frontend/src/promql/remote.rs index c687b51d0f..c3c1439ec7 100644 --- a/src/query_frontend/src/promql/remote.rs +++ b/src/query_frontend/src/promql/remote.rs @@ -64,7 +64,7 @@ pub fn remote_query_to_plan( let (metric, field, mut filters) = normalize_matchers(query.matchers)?; let table_provider = meta_provider - .get_table_provider(TableReference::bare(&metric)) + .get_table_source(TableReference::bare(&metric)) .context(TableProviderNotFound { name: &metric })?; let schema = Schema::try_from(table_provider.schema()).context(BuildTableSchema)?; let timestamp_col_name = schema.timestamp_name(); From b34a9b6fecefe3396f351855c19ec58b48f2e948 Mon Sep 17 00:00:00 2001 From: tanruixiang Date: Fri, 26 Jan 2024 16:49:36 +0800 Subject: [PATCH 06/25] fix --- src/query_frontend/src/promql/convert.rs | 5 +++-- src/query_frontend/src/promql/remote.rs | 3 +-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/query_frontend/src/promql/convert.rs b/src/query_frontend/src/promql/convert.rs index f364a0b101..e92e0b9922 100644 --- a/src/query_frontend/src/promql/convert.rs +++ b/src/query_frontend/src/promql/convert.rs @@ -27,9 +27,10 @@ use datafusion::{ expr::{Alias, ScalarFunction}, lit, logical_plan::{Extension, LogicalPlan, LogicalPlanBuilder}, - max, min, sum, Expr as DataFusionExpr, + max, min, sum, + utils::conjunction, + Expr as DataFusionExpr, }, - optimizer::utils::conjunction, prelude::ident, sql::planner::ContextProvider, }; diff --git a/src/query_frontend/src/promql/remote.rs b/src/query_frontend/src/promql/remote.rs index c3c1439ec7..e8fc99e8be 100644 --- a/src/query_frontend/src/promql/remote.rs +++ b/src/query_frontend/src/promql/remote.rs @@ -21,8 +21,7 @@ use std::sync::Arc; use common_types::{schema::Schema, time::TimeRange}; use datafusion::{ - logical_expr::{LogicalPlanBuilder, Operator}, - optimizer::utils::conjunction, + logical_expr::{utils::conjunction, LogicalPlanBuilder, Operator}, prelude::{ident, lit, Expr}, sql::{planner::ContextProvider, TableReference}, }; From 5f048e50d86d6c341b75cc36e9e56bc7af765a4c Mon Sep 17 00:00:00 2001 From: jiacai2050 Date: Fri, 26 Jan 2024 16:55:12 +0800 Subject: [PATCH 07/25] fix: remove bad optimize rule --- src/query_engine/src/datafusion_impl/mod.rs | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/query_engine/src/datafusion_impl/mod.rs b/src/query_engine/src/datafusion_impl/mod.rs index 482628f836..09e00ac0a5 100644 --- a/src/query_engine/src/datafusion_impl/mod.rs +++ b/src/query_engine/src/datafusion_impl/mod.rs @@ -24,7 +24,7 @@ use datafusion::{ runtime_env::{RuntimeConfig, RuntimeEnv}, FunctionRegistry, }, - prelude::{SessionConfig, SessionContext}, + prelude::{SessionConfig, SessionContext}, physical_optimizer::{output_requirements::OutputRequirements, aggregate_statistics::AggregateStatistics, join_selection::JoinSelection, limited_distinct_aggregation::LimitedDistinctAggregation, combine_partial_final_agg::CombinePartialFinalAggregate, enforce_sorting::EnforceSorting, coalesce_batches::CoalesceBatches, pipeline_checker::PipelineChecker, topk_aggregation::TopKAggregation}, }; use df_engine_extensions::codec::PhysicalExtensionCodecImpl; use table_engine::{provider::HoraeDBOptions, remote::RemoteEngineRef}; @@ -137,7 +137,23 @@ impl DfContextBuilder { // Using default logcial optimizer, if want to add more custom rule, using // `add_optimizer_rule` to add. - let state = SessionState::new_with_config_rt(df_session_config, self.runtime_env.clone()); + let mut state = SessionState::with_config_rt(df_session_config, self.runtime_env.clone()); + state = state.with_physical_optimizer_rules(vec![ + Arc::new(OutputRequirements::new_add_mode()), + Arc::new(AggregateStatistics::new()), + Arc::new(JoinSelection::new()), + Arc::new(LimitedDistinctAggregation::new()), + // TODO: this rule will throw this error + // Internal error: Children cannot be replaced in ScanTable + // Arc::new(EnforceDistribution::new()), + Arc::new(CombinePartialFinalAggregate::new()), + Arc::new(EnforceSorting::new()), + Arc::new(CoalesceBatches::new()), + Arc::new(OutputRequirements::new_remove_mode()), + Arc::new(PipelineChecker::new()), + Arc::new(TopKAggregation::new()), + // Arc::new(ProjectionPushdown::new()), + ]); SessionContext::new_with_state(state) } } From 5f6a6d47cd4d510e3b557cf9765e322b192a78f8 Mon Sep 17 00:00:00 2001 From: jiacai2050 Date: Fri, 26 Jan 2024 17:14:24 +0800 Subject: [PATCH 08/25] fix ut --- src/analytic_engine/src/instance/reorder_memtable.rs | 5 ++++- src/query_engine/src/datafusion_impl/mod.rs | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/analytic_engine/src/instance/reorder_memtable.rs b/src/analytic_engine/src/instance/reorder_memtable.rs index c37417bf64..fb28c4a13f 100644 --- a/src/analytic_engine/src/instance/reorder_memtable.rs +++ b/src/analytic_engine/src/instance/reorder_memtable.rs @@ -262,7 +262,10 @@ impl Reorder { pub async fn into_stream(self) -> Result { // 1. Init datafusion context let runtime = Arc::new(RuntimeEnv::default()); - let state = SessionState::new_with_config_rt(SessionConfig::new(), runtime); + let mut state = SessionState::new_with_config_rt(SessionConfig::new(), runtime); + // The physical optimizer rules have bug, and the plan here is simple, optimize is not required, + // so we disable it here. + state = state.with_physical_optimizer_rules(vec![]); let ctx = SessionContext::new_with_state(state); let table_provider = Arc::new(MemIterProvider { arrow_schema: self.schema.to_arrow_schema_ref(), diff --git a/src/query_engine/src/datafusion_impl/mod.rs b/src/query_engine/src/datafusion_impl/mod.rs index 09e00ac0a5..0d0d576349 100644 --- a/src/query_engine/src/datafusion_impl/mod.rs +++ b/src/query_engine/src/datafusion_impl/mod.rs @@ -152,6 +152,7 @@ impl DfContextBuilder { Arc::new(OutputRequirements::new_remove_mode()), Arc::new(PipelineChecker::new()), Arc::new(TopKAggregation::new()), + // TODO: This rule is not public, so we can't use it // Arc::new(ProjectionPushdown::new()), ]); SessionContext::new_with_state(state) From 7493d7e1ccfc1c81548ae988ea6dfadbb05bf130 Mon Sep 17 00:00:00 2001 From: jiacai2050 Date: Mon, 29 Jan 2024 10:40:19 +0800 Subject: [PATCH 09/25] ensure aggr expr same size with filter exprs --- src/df_engine_extensions/src/dist_sql_query/test_util.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/df_engine_extensions/src/dist_sql_query/test_util.rs b/src/df_engine_extensions/src/dist_sql_query/test_util.rs index 813f142b96..c1d00fb227 100644 --- a/src/df_engine_extensions/src/dist_sql_query/test_util.rs +++ b/src/df_engine_extensions/src/dist_sql_query/test_util.rs @@ -262,7 +262,7 @@ impl TestContext { AggregateMode::Partial, self.group_by.clone(), self.aggr_exprs.clone(), - vec![None], + vec![None; self.aggr_exprs.len()], input, input_schema.clone(), ) @@ -287,7 +287,7 @@ impl TestContext { AggregateMode::Final, final_group_by, self.aggr_exprs.clone(), - vec![None], + vec![None; self.aggr_exprs.len()], merge, input_schema, ) From 2fb92c44554c889aba5a3aa073c8e72ffba03900 Mon Sep 17 00:00:00 2001 From: jiacai2050 Date: Mon, 29 Jan 2024 11:01:45 +0800 Subject: [PATCH 10/25] fix clippy --- src/df_operator/src/scalar.rs | 1 + src/df_operator/src/udaf.rs | 1 + src/query_engine/src/datafusion_impl/mod.rs | 2 +- src/query_frontend/src/influxql/planner.rs | 2 +- .../src/logical_optimizer/type_conversion.rs | 6 +++--- src/query_frontend/src/provider.rs | 8 +------- 6 files changed, 8 insertions(+), 12 deletions(-) diff --git a/src/df_operator/src/scalar.rs b/src/df_operator/src/scalar.rs index 4ae3372cfd..58e8214c1a 100644 --- a/src/df_operator/src/scalar.rs +++ b/src/df_operator/src/scalar.rs @@ -31,6 +31,7 @@ pub struct ScalarUdf { } impl ScalarUdf { + #[allow(deprecated)] pub fn create(name: &str, func: ScalarFunction) -> Self { let signature = func.signature().to_datafusion_signature(); let return_type = func.return_type().to_datafusion_return_type(); diff --git a/src/df_operator/src/udaf.rs b/src/df_operator/src/udaf.rs index 312990b252..44f3913673 100644 --- a/src/df_operator/src/udaf.rs +++ b/src/df_operator/src/udaf.rs @@ -31,6 +31,7 @@ pub struct AggregateUdf { } impl AggregateUdf { + #[allow(deprecated)] pub fn create(name: &str, func: AggregateFunction) -> Self { let signature = func.signature().to_datafusion_signature(); let return_type = func.return_type().to_datafusion_return_type(); diff --git a/src/query_engine/src/datafusion_impl/mod.rs b/src/query_engine/src/datafusion_impl/mod.rs index 0d0d576349..46b96f01b0 100644 --- a/src/query_engine/src/datafusion_impl/mod.rs +++ b/src/query_engine/src/datafusion_impl/mod.rs @@ -137,7 +137,7 @@ impl DfContextBuilder { // Using default logcial optimizer, if want to add more custom rule, using // `add_optimizer_rule` to add. - let mut state = SessionState::with_config_rt(df_session_config, self.runtime_env.clone()); + let mut state = SessionState::new_with_config_rt(df_session_config, self.runtime_env.clone()); state = state.with_physical_optimizer_rules(vec![ Arc::new(OutputRequirements::new_add_mode()), Arc::new(AggregateStatistics::new()), diff --git a/src/query_frontend/src/influxql/planner.rs b/src/query_frontend/src/influxql/planner.rs index 3b21228ad3..ed8d9c1460 100644 --- a/src/query_frontend/src/influxql/planner.rs +++ b/src/query_frontend/src/influxql/planner.rs @@ -57,7 +57,7 @@ struct InfluxQLSchemaProvider<'a, P: MetaProvider> { impl<'a, P: MetaProvider> SchemaProvider for InfluxQLSchemaProvider<'a, P> { fn get_table_provider(&self, name: &str) -> datafusion::error::Result> { self.context_provider - .get_table_provider(name.into()) + .get_table_source(name.into()) .map_err(|e| { DataFusionError::Plan(format!( "measurement does not exist, measurement:{name}, source:{e}" diff --git a/src/query_frontend/src/logical_optimizer/type_conversion.rs b/src/query_frontend/src/logical_optimizer/type_conversion.rs index e8ccd42fc5..3d67f458e6 100644 --- a/src/query_frontend/src/logical_optimizer/type_conversion.rs +++ b/src/query_frontend/src/logical_optimizer/type_conversion.rs @@ -30,7 +30,7 @@ use datafusion::{ logical_expr::{ expr::{Expr, InList}, logical_plan::{Filter, LogicalPlan, TableScan}, - utils, Between, BinaryExpr, ExprSchemable, Operator, + Between, BinaryExpr, ExprSchemable, Operator, }, optimizer::analyzer::AnalyzerRule, scalar::ScalarValue, @@ -113,13 +113,13 @@ impl AnalyzerRule for TypeConversion { .map(|plan| self.analyze(plan.clone(), config)) .collect::>>()?; - let expr = plan + let exprs = plan .expressions() .into_iter() .map(|e| e.rewrite(&mut rewriter)) .collect::>>()?; - Ok(utils::from_plan(&plan, &expr, &new_inputs)?) + Ok(LogicalPlan::with_new_exprs(&plan, exprs, &new_inputs)?) } LogicalPlan::Subquery(_) | LogicalPlan::Statement { .. } diff --git a/src/query_frontend/src/provider.rs b/src/query_frontend/src/provider.rs index 67750fcb0e..5a9cdf8514 100644 --- a/src/query_frontend/src/provider.rs +++ b/src/query_frontend/src/provider.rs @@ -320,7 +320,7 @@ impl<'a, P: MetaProvider> MetaProvider for ContextProviderAdapter<'a, P> { } impl<'a, P: MetaProvider> ContextProvider for ContextProviderAdapter<'a, P> { - fn get_table_provider( + fn get_table_source( &self, name: TableReference, ) -> std::result::Result, DataFusionError> { @@ -414,12 +414,6 @@ impl<'a, P: MetaProvider> ContextProvider for ContextProviderAdapter<'a, P> { None } - fn get_table_source( - &self, - name: TableReference, - ) -> datafusion::error::Result> { - self.get_table_provider(name) - } } struct SchemaProviderAdapter { From cd38d616655f24c5fe46e83837e79754403ef26e Mon Sep 17 00:00:00 2001 From: jiacai2050 Date: Mon, 29 Jan 2024 18:47:32 +0800 Subject: [PATCH 11/25] bump df --- Cargo.lock | 38 +++++++++---------- Cargo.toml | 12 +++--- .../cases/common/dml/issue-302.result | 2 +- .../cases/common/dml/issue-341.result | 12 +++--- .../cases/common/dml/issue-59.result | 4 +- .../cases/common/explain/explain.result | 2 +- .../cases/common/optimizer/optimizer.result | 2 +- .../cases/env/local/ddl/query-plan.result | 22 +++++------ .../src/instance/reorder_memtable.rs | 4 +- src/common_types/src/datum.rs | 2 + src/query_engine/src/datafusion_impl/mod.rs | 16 ++++++-- .../src/logical_optimizer/type_conversion.rs | 2 +- src/query_frontend/src/parser.rs | 12 +++--- src/table_engine/src/provider.rs | 7 ++-- src/table_engine/src/table.rs | 1 + 15 files changed, 72 insertions(+), 66 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7f9c89a359..a1bb14c9ee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -658,7 +658,7 @@ dependencies = [ [[package]] name = "arrow_util" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997" +source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b" dependencies = [ "ahash 0.8.3", "arrow 49.0.0", @@ -2008,7 +2008,7 @@ dependencies = [ [[package]] name = "datafusion" version = "34.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119" +source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da" dependencies = [ "ahash 0.8.3", "arrow 49.0.0", @@ -2055,7 +2055,7 @@ dependencies = [ [[package]] name = "datafusion-common" version = "34.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119" +source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da" dependencies = [ "ahash 0.8.3", "arrow 49.0.0", @@ -2074,7 +2074,7 @@ dependencies = [ [[package]] name = "datafusion-execution" version = "34.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119" +source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da" dependencies = [ "arrow 49.0.0", "chrono", @@ -2094,7 +2094,7 @@ dependencies = [ [[package]] name = "datafusion-expr" version = "34.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119" +source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da" dependencies = [ "ahash 0.8.3", "arrow 49.0.0", @@ -2109,7 +2109,7 @@ dependencies = [ [[package]] name = "datafusion-optimizer" version = "34.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119" +source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da" dependencies = [ "arrow 49.0.0", "async-trait", @@ -2126,7 +2126,7 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" version = "34.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119" +source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da" dependencies = [ "ahash 0.8.3", "arrow 49.0.0", @@ -2159,7 +2159,7 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" version = "34.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119" +source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da" dependencies = [ "ahash 0.8.3", "arrow 49.0.0", @@ -2189,7 +2189,7 @@ dependencies = [ [[package]] name = "datafusion-proto" version = "34.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119" +source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da" dependencies = [ "arrow 49.0.0", "chrono", @@ -2203,7 +2203,7 @@ dependencies = [ [[package]] name = "datafusion-sql" version = "34.0.0" -source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119" +source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da" dependencies = [ "arrow 49.0.0", "arrow-schema 49.0.0", @@ -2216,7 +2216,7 @@ dependencies = [ [[package]] name = "datafusion_util" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997" +source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b" dependencies = [ "async-trait", "datafusion", @@ -2837,7 +2837,7 @@ checksum = "8f5f3913fa0bfe7ee1fd8248b6b9f42a5af4b9d65ec2dd2c3c26132b950ecfc2" [[package]] name = "generated_types" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997" +source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b" dependencies = [ "pbjson", "pbjson-build", @@ -3354,7 +3354,7 @@ dependencies = [ [[package]] name = "influxdb_influxql_parser" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997" +source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b" dependencies = [ "chrono", "chrono-tz", @@ -3447,7 +3447,7 @@ dependencies = [ [[package]] name = "iox_query" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997" +source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b" dependencies = [ "arrow 49.0.0", "arrow_util", @@ -3471,7 +3471,7 @@ dependencies = [ [[package]] name = "iox_query_influxql" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997" +source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b" dependencies = [ "arrow 49.0.0", "chrono", @@ -4589,7 +4589,7 @@ dependencies = [ [[package]] name = "observability_deps" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997" +source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b" dependencies = [ "tracing", ] @@ -5590,7 +5590,7 @@ dependencies = [ [[package]] name = "query_functions" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997" +source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b" dependencies = [ "arrow 49.0.0", "chrono", @@ -6294,7 +6294,7 @@ dependencies = [ [[package]] name = "schema" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997" +source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b" dependencies = [ "arrow 49.0.0", "hashbrown 0.13.2", @@ -7091,7 +7091,7 @@ dependencies = [ [[package]] name = "test_helpers" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997" +source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b" dependencies = [ "dotenvy", "observability_deps", diff --git a/Cargo.toml b/Cargo.toml index 16fcb93faf..a21209bde5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -107,8 +107,8 @@ cluster = { path = "src/cluster" } criterion = "0.5" horaedb-client = "1.0.2" common_types = { path = "src/common_types" } -datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "a154884545cfdeb1a6c20872b3882a5624cd1119" } -datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "a154884545cfdeb1a6c20872b3882a5624cd1119" } +datafusion = { git = "https://github.com/CeresDB/arrow-datafusion.git", rev = "2891cba41" } +datafusion-proto = { git = "https://github.com/CeresDB/arrow-datafusion.git", rev = "2891cba41" } derive_builder = "0.12" df_operator = { path = "src/df_operator" } df_engine_extensions = { path = "src/df_engine_extensions" } @@ -121,10 +121,10 @@ hash_ext = { path = "src/components/hash_ext" } hex = "0.4.3" hyperloglog = { git = "https://github.com/jedisct1/rust-hyperloglog.git", rev = "425487ce910f26636fbde8c4d640b538431aad50" } id_allocator = { path = "src/components/id_allocator" } -influxql-logical-planner = { git = "https://github.com/CeresDB/influxql.git", rev = "cafd1c73e375e218b646cef5024cd27c3855f997", package = "iox_query_influxql" } -influxql-parser = { git = "https://github.com/CeresDB/influxql.git", rev = "cafd1c73e375e218b646cef5024cd27c3855f997", package = "influxdb_influxql_parser" } -influxql-query = { git = "https://github.com/CeresDB/influxql.git", rev = "cafd1c73e375e218b646cef5024cd27c3855f997", package = "iox_query" } -influxql-schema = { git = "https://github.com/CeresDB/influxql.git", rev = "cafd1c73e375e218b646cef5024cd27c3855f997", package = "schema" } +influxql-logical-planner = { git = "https://github.com/CeresDB/influxql.git", rev = "5077dcc", package = "iox_query_influxql" } +influxql-parser = { git = "https://github.com/CeresDB/influxql.git", rev = "5077dcc", package = "influxdb_influxql_parser" } +influxql-query = { git = "https://github.com/CeresDB/influxql.git", rev = "5077dcc", package = "iox_query" } +influxql-schema = { git = "https://github.com/CeresDB/influxql.git", rev = "5077dcc", package = "schema" } interpreters = { path = "src/interpreters" } itertools = "0.10.5" lz4_flex = { version = "0.11", default-features = false, features = ["frame"] } diff --git a/integration_tests/cases/common/dml/issue-302.result b/integration_tests/cases/common/dml/issue-302.result index b57d881fd2..cd7afc3a36 100644 --- a/integration_tests/cases/common/dml/issue-302.result +++ b/integration_tests/cases/common/dml/issue-302.result @@ -12,7 +12,7 @@ affected_rows: 1 select `t`, count(distinct name) from issue302 group by `t`; -issue302.t,COUNT(DISTINCT issue302.name), +t,COUNT(DISTINCT issue302.name), Timestamp(1651737067000),Int64(0), diff --git a/integration_tests/cases/common/dml/issue-341.result b/integration_tests/cases/common/dml/issue-341.result index 902222590b..4e42d84c80 100644 --- a/integration_tests/cases/common/dml/issue-341.result +++ b/integration_tests/cases/common/dml/issue-341.result @@ -58,7 +58,7 @@ WHERE plan_type,plan, String("logical_plan"),String("TableScan: issue341_t1 projection=[timestamp, value], full_filters=[issue341_t1.value = Int32(3)]"), -String("physical_plan"),String("ScanTable: table=issue341_t1, parallelism=8, priority=Low\n"), +String("physical_plan"),String("ScanTable: table=issue341_t1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), -- FilterExec node should not be in plan. @@ -71,8 +71,8 @@ WHERE tag1 = "t3"; plan_type,plan, -String("logical_plan"),String("Projection: issue341_t1.timestamp, issue341_t1.value\n TableScan: issue341_t1 projection=[timestamp, value, tag1], full_filters=[issue341_t1.tag1 = Utf8(\"t3\")]"), -String("physical_plan"),String("ProjectionExec: expr=[timestamp@0 as timestamp, value@1 as value]\n ScanTable: table=issue341_t1, parallelism=8, priority=Low\n"), +String("logical_plan"),String("TableScan: issue341_t1 projection=[timestamp, value], full_filters=[issue341_t1.tag1 = Utf8(\"t3\")]"), +String("physical_plan"),String("ScanTable: table=issue341_t1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(1)\n"), -- Repeat operations above, but with overwrite table @@ -116,7 +116,7 @@ WHERE plan_type,plan, String("logical_plan"),String("Filter: issue341_t2.value = Float64(3)\n TableScan: issue341_t2 projection=[timestamp, value], partial_filters=[issue341_t2.value = Float64(3)]"), -String("physical_plan"),String("CoalesceBatchesExec: target_batch_size=8192\n FilterExec: value@1 = 3\n ScanTable: table=issue341_t2, parallelism=8, priority=Low\n"), +String("physical_plan"),String("CoalesceBatchesExec: target_batch_size=8192\n FilterExec: value@1 = 3\n ScanTable: table=issue341_t2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), -- When using tag as filter, FilterExec node should not be in plan. @@ -129,8 +129,8 @@ WHERE tag1 = "t3"; plan_type,plan, -String("logical_plan"),String("Projection: issue341_t2.timestamp, issue341_t2.value\n TableScan: issue341_t2 projection=[timestamp, value, tag1], full_filters=[issue341_t2.tag1 = Utf8(\"t3\")]"), -String("physical_plan"),String("ProjectionExec: expr=[timestamp@0 as timestamp, value@1 as value]\n ScanTable: table=issue341_t2, parallelism=8, priority=Low\n"), +String("logical_plan"),String("TableScan: issue341_t2 projection=[timestamp, value], full_filters=[issue341_t2.tag1 = Utf8(\"t3\")]"), +String("physical_plan"),String("ScanTable: table=issue341_t2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(1)\n"), DROP TABLE IF EXISTS `issue341_t1`; diff --git a/integration_tests/cases/common/dml/issue-59.result b/integration_tests/cases/common/dml/issue-59.result index 549c7019cd..36d818696a 100644 --- a/integration_tests/cases/common/dml/issue-59.result +++ b/integration_tests/cases/common/dml/issue-59.result @@ -24,8 +24,8 @@ FROM issue59 GROUP BY id+1; plan_type,plan, -String("logical_plan"),String("Projection: group_alias_0 AS issue59.id + Int64(1), COUNT(alias1) AS COUNT(DISTINCT issue59.account)\n Aggregate: groupBy=[[group_alias_0]], aggr=[[COUNT(alias1)]]\n Projection: group_alias_0, alias1\n Aggregate: groupBy=[[CAST(issue59.id AS Int64) + Int64(1) AS group_alias_0, issue59.account AS alias1]], aggr=[[]]\n TableScan: issue59 projection=[id, account]"), -String("physical_plan"),String("ProjectionExec: expr=[group_alias_0@0 as issue59.id + Int64(1), COUNT(alias1)@1 as COUNT(DISTINCT issue59.account)]\n AggregateExec: mode=FinalPartitioned, gby=[group_alias_0@0 as group_alias_0], aggr=[COUNT(alias1)]\n CoalesceBatchesExec: target_batch_size=8192\n RepartitionExec: partitioning=Hash([group_alias_0@0], 8), input_partitions=8\n AggregateExec: mode=Partial, gby=[group_alias_0@0 as group_alias_0], aggr=[COUNT(alias1)]\n ProjectionExec: expr=[group_alias_0@0 as group_alias_0, alias1@1 as alias1]\n AggregateExec: mode=FinalPartitioned, gby=[group_alias_0@0 as group_alias_0, alias1@1 as alias1], aggr=[]\n CoalesceBatchesExec: target_batch_size=8192\n RepartitionExec: partitioning=Hash([group_alias_0@0, alias1@1], 8), input_partitions=8\n AggregateExec: mode=Partial, gby=[CAST(id@0 AS Int64) + 1 as group_alias_0, account@1 as alias1], aggr=[]\n ScanTable: table=issue59, parallelism=8, priority=Low\n"), +String("logical_plan"),String("Projection: group_alias_0 AS issue59.id + Int64(1), COUNT(alias1) AS COUNT(DISTINCT issue59.account)\n Aggregate: groupBy=[[group_alias_0]], aggr=[[COUNT(alias1)]]\n Aggregate: groupBy=[[CAST(issue59.id AS Int64) + Int64(1) AS group_alias_0, issue59.account AS alias1]], aggr=[[]]\n TableScan: issue59 projection=[id, account]"), +String("physical_plan"),String("ProjectionExec: expr=[group_alias_0@0 as issue59.id + Int64(1), COUNT(alias1)@1 as COUNT(DISTINCT issue59.account)]\n AggregateExec: mode=SinglePartitioned, gby=[group_alias_0@0 as group_alias_0], aggr=[COUNT(alias1)]\n AggregateExec: mode=FinalPartitioned, gby=[group_alias_0@0 as group_alias_0, alias1@1 as alias1], aggr=[]\n AggregateExec: mode=Partial, gby=[CAST(id@0 AS Int64) + 1 as group_alias_0, account@1 as alias1], aggr=[]\n ScanTable: table=issue59, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), DROP TABLE IF EXISTS issue59; diff --git a/integration_tests/cases/common/explain/explain.result b/integration_tests/cases/common/explain/explain.result index 0cd06380d5..6cf09c078e 100644 --- a/integration_tests/cases/common/explain/explain.result +++ b/integration_tests/cases/common/explain/explain.result @@ -10,7 +10,7 @@ EXPLAIN SELECT t FROM `04_explain_t`; plan_type,plan, String("logical_plan"),String("TableScan: 04_explain_t projection=[t]"), -String("physical_plan"),String("ScanTable: table=04_explain_t, parallelism=8, priority=Low\n"), +String("physical_plan"),String("ScanTable: table=04_explain_t, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), DROP TABLE `04_explain_t`; diff --git a/integration_tests/cases/common/optimizer/optimizer.result b/integration_tests/cases/common/optimizer/optimizer.result index f9cfac2de9..e13dd456ce 100644 --- a/integration_tests/cases/common/optimizer/optimizer.result +++ b/integration_tests/cases/common/optimizer/optimizer.result @@ -10,7 +10,7 @@ EXPLAIN SELECT max(value) AS c1, avg(value) AS c2 FROM `07_optimizer_t` GROUP BY plan_type,plan, String("logical_plan"),String("Projection: MAX(07_optimizer_t.value) AS c1, AVG(07_optimizer_t.value) AS c2\n Aggregate: groupBy=[[07_optimizer_t.name]], aggr=[[MAX(07_optimizer_t.value), AVG(07_optimizer_t.value)]]\n TableScan: 07_optimizer_t projection=[name, value]"), -String("physical_plan"),String("ProjectionExec: expr=[MAX(07_optimizer_t.value)@1 as c1, AVG(07_optimizer_t.value)@2 as c2]\n AggregateExec: mode=FinalPartitioned, gby=[name@0 as name], aggr=[MAX(07_optimizer_t.value), AVG(07_optimizer_t.value)]\n CoalesceBatchesExec: target_batch_size=8192\n RepartitionExec: partitioning=Hash([name@0], 8), input_partitions=8\n AggregateExec: mode=Partial, gby=[name@0 as name], aggr=[MAX(07_optimizer_t.value), AVG(07_optimizer_t.value)]\n ScanTable: table=07_optimizer_t, parallelism=8, priority=Low\n"), +String("physical_plan"),String("ProjectionExec: expr=[MAX(07_optimizer_t.value)@1 as c1, AVG(07_optimizer_t.value)@2 as c2]\n AggregateExec: mode=SinglePartitioned, gby=[name@0 as name], aggr=[MAX(07_optimizer_t.value), AVG(07_optimizer_t.value)]\n ScanTable: table=07_optimizer_t, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), DROP TABLE `07_optimizer_t`; diff --git a/integration_tests/cases/env/local/ddl/query-plan.result b/integration_tests/cases/env/local/ddl/query-plan.result index a421856b4c..917767bf02 100644 --- a/integration_tests/cases/env/local/ddl/query-plan.result +++ b/integration_tests/cases/env/local/ddl/query-plan.result @@ -31,7 +31,7 @@ explain analyze select t from `03_dml_select_real_time_range` where t > 1695348001000; plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"), +String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"), -- This query should have higher priority @@ -40,7 +40,7 @@ explain analyze select t from `03_dml_select_real_time_range` where t >= 1695348001000 and t < 1695348002000; plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=High, metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), t < TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"), +String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=High, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), t < TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"), -- This query should not include memtable @@ -49,7 +49,7 @@ explain analyze select t from `03_dml_select_real_time_range` where t > 1695348002000; plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=0\n=0]\n"), +String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=0\n=0]\n"), -- SQLNESS ARG pre_cmd=flush @@ -60,7 +60,7 @@ explain analyze select t from `03_dml_select_real_time_range` where t > 1695348001000; plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=1\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_sst_1, fetched_columns:[tsid,t]:\n meta_data_cache_hit=false\n parallelism=1\n project_record_batch=xxs\n read_meta_data_duration=xxs\n row_mem=320\n row_num=3\n prune_row_groups:\n pruned_by_custom_filter=0\n pruned_by_min_max=0\n row_groups_after_prune=1\n total_row_groups=1\n use_custom_filter=false\n=0]\n"), +String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=1\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_sst_1, fetched_columns:[tsid,t]:\n meta_data_cache_hit=false\n parallelism=1\n project_record_batch=xxs\n read_meta_data_duration=xxs\n row_mem=320\n row_num=3\n prune_row_groups:\n pruned_by_custom_filter=0\n pruned_by_min_max=0\n row_groups_after_prune=1\n total_row_groups=1\n use_custom_filter=false\n=0]\n"), -- This query should not include SST @@ -68,7 +68,7 @@ explain analyze select t from `03_dml_select_real_time_range` where t > 1695348002000; plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=0\n=0]\n"), +String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=0\n=0]\n"), -- Table with an 'append' update mode @@ -100,9 +100,7 @@ affected_rows: 3 explain analyze select t from `03_append_mode_table` where t >= 1695348001000 and name = 'ceresdb'; -plan_type,plan, -String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=[output_rows=2, elapsed_compute=xxs]\n ScanTable: table=03_append_mode_table, parallelism=8, priority=Low, metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), name = Utf8(\"ceresdb\")], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=false\n chain_iter_0:\n num_memtables=1\n num_ssts=0\n scan_duration=xxs\n since_create=xxs\n since_init=xxs\n total_batch_fetched=1\n total_rows_fetched=2\n scan_memtable_1, fetched_columns:[t,name]:\n=0]\n"), - +Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to execute plan. Caused by: Internal error, msg:Failed to execute interpreter, err:Failed to execute select, err:Failed to execute physical plan, msg:failed to collect execution results, err:Stream error, msg:convert from arrow record batch, err:Execution error: Failed to read table, partition:0, err:Failed to scan table, table:03_append_mode_table, err:Failed to build chain iterator, table:03_append_mode_table, err:Fail to build stream from the memtable, err:Failed to generate datafusion physical expr, err:Schema error: No field named name. Valid fields are t.. sql:explain analyze select t from `03_append_mode_table`\nwhere t >= 1695348001000 and name = 'ceresdb';" }) -- Should just fetch projected columns from SST -- SQLNESS ARG pre_cmd=flush @@ -114,9 +112,7 @@ String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=[ou explain analyze select t from `03_append_mode_table` where t >= 1695348001000 and name = 'ceresdb'; -plan_type,plan, -String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=[output_rows=2, elapsed_compute=xxs]\n ScanTable: table=03_append_mode_table, parallelism=8, priority=Low, metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), name = Utf8(\"ceresdb\")], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=false\n chain_iter_0:\n num_memtables=0\n num_ssts=1\n scan_duration=xxs\n since_create=xxs\n since_init=xxs\n total_batch_fetched=1\n total_rows_fetched=2\n scan_sst_1, fetched_columns:[t,name]:\n meta_data_cache_hit=false\n parallelism=1\n project_record_batch=xxs\n read_meta_data_duration=xxs\n row_mem=408\n row_num=3\n prune_row_groups:\n pruned_by_custom_filter=0\n pruned_by_min_max=0\n row_groups_after_prune=1\n total_row_groups=1\n use_custom_filter=false\n=0]\n"), - +Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to execute plan. Caused by: Internal error, msg:Failed to execute interpreter, err:Failed to execute select, err:Failed to execute physical plan, msg:failed to collect execution results, err:Stream error, msg:convert from arrow record batch, err:Execution error: Failed to read table, partition:0, err:Failed to scan table, table:03_append_mode_table, err:Failed to build chain iterator, table:03_append_mode_table, err:Fail to build stream from the sst file, err:Failed to generate datafusion physical expr, err:Schema error: No field named name. Valid fields are t.. sql:explain analyze select t from `03_append_mode_table`\nwhere t >= 1695348001000 and name = 'ceresdb';" }) CREATE TABLE `TEST_QUERY_PRIORITY` ( NAME string TAG, @@ -136,7 +132,7 @@ explain analyze select TS from `TEST_QUERY_PRIORITY` where TS >= 1695348001000 and TS < 1695348002000; plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=TEST_QUERY_PRIORITY, parallelism=8, priority=High, metrics=[\nPredicate { exprs:[TS >= TimestampMillisecond(1695348001000, None), TS < TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n do_merge_sort=false\n=0]\n"), +String("Plan with Metrics"),String("ScanTable: table=TEST_QUERY_PRIORITY, parallelism=8, priority=High, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[TS >= TimestampMillisecond(1695348001000, None), TS < TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n do_merge_sort=false\n=0]\n"), -- This query should have higher priority @@ -145,7 +141,7 @@ explain analyze select TS from `TEST_QUERY_PRIORITY` where TS >= 1695348001000; plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=TEST_QUERY_PRIORITY, parallelism=8, priority=Low, metrics=[\nPredicate { exprs:[TS >= TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=false\n=0]\n"), +String("Plan with Metrics"),String("ScanTable: table=TEST_QUERY_PRIORITY, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[TS >= TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=false\n=0]\n"), DROP TABLE `03_dml_select_real_time_range`; diff --git a/src/analytic_engine/src/instance/reorder_memtable.rs b/src/analytic_engine/src/instance/reorder_memtable.rs index fb28c4a13f..2f9ac87b8e 100644 --- a/src/analytic_engine/src/instance/reorder_memtable.rs +++ b/src/analytic_engine/src/instance/reorder_memtable.rs @@ -263,8 +263,8 @@ impl Reorder { // 1. Init datafusion context let runtime = Arc::new(RuntimeEnv::default()); let mut state = SessionState::new_with_config_rt(SessionConfig::new(), runtime); - // The physical optimizer rules have bug, and the plan here is simple, optimize is not required, - // so we disable it here. + // The physical optimizer rules have bug, and the plan here is simple, optimize + // is not required, so we disable it here. state = state.with_physical_optimizer_rules(vec![]); let ctx = SessionContext::new_with_state(state); let table_provider = Arc::new(MemIterProvider { diff --git a/src/common_types/src/datum.rs b/src/common_types/src/datum.rs index 4b8b373763..9b22439a22 100644 --- a/src/common_types/src/datum.rs +++ b/src/common_types/src/datum.rs @@ -292,7 +292,9 @@ impl TryFrom<&SqlDataType> for DatumKind { SqlDataType::Double => Ok(Self::Double), SqlDataType::Boolean => Ok(Self::Boolean), SqlDataType::BigInt(_) => Ok(Self::Int64), + SqlDataType::Int64 => Ok(Self::Int64), SqlDataType::Int(_) => Ok(Self::Int32), + SqlDataType::Int8(_) => Ok(Self::Int8), SqlDataType::SmallInt(_) => Ok(Self::Int16), SqlDataType::String(_) => Ok(Self::String), SqlDataType::Varbinary(_) => Ok(Self::Varbinary), diff --git a/src/query_engine/src/datafusion_impl/mod.rs b/src/query_engine/src/datafusion_impl/mod.rs index 46b96f01b0..3c4f18f0c5 100644 --- a/src/query_engine/src/datafusion_impl/mod.rs +++ b/src/query_engine/src/datafusion_impl/mod.rs @@ -24,7 +24,14 @@ use datafusion::{ runtime_env::{RuntimeConfig, RuntimeEnv}, FunctionRegistry, }, - prelude::{SessionConfig, SessionContext}, physical_optimizer::{output_requirements::OutputRequirements, aggregate_statistics::AggregateStatistics, join_selection::JoinSelection, limited_distinct_aggregation::LimitedDistinctAggregation, combine_partial_final_agg::CombinePartialFinalAggregate, enforce_sorting::EnforceSorting, coalesce_batches::CoalesceBatches, pipeline_checker::PipelineChecker, topk_aggregation::TopKAggregation}, + physical_optimizer::{ + aggregate_statistics::AggregateStatistics, coalesce_batches::CoalesceBatches, + combine_partial_final_agg::CombinePartialFinalAggregate, enforce_sorting::EnforceSorting, + join_selection::JoinSelection, limited_distinct_aggregation::LimitedDistinctAggregation, + output_requirements::OutputRequirements, pipeline_checker::PipelineChecker, + projection_pushdown::ProjectionPushdown, topk_aggregation::TopKAggregation, + }, + prelude::{SessionConfig, SessionContext}, }; use df_engine_extensions::codec::PhysicalExtensionCodecImpl; use table_engine::{provider::HoraeDBOptions, remote::RemoteEngineRef}; @@ -137,7 +144,8 @@ impl DfContextBuilder { // Using default logcial optimizer, if want to add more custom rule, using // `add_optimizer_rule` to add. - let mut state = SessionState::new_with_config_rt(df_session_config, self.runtime_env.clone()); + let mut state = + SessionState::new_with_config_rt(df_session_config, self.runtime_env.clone()); state = state.with_physical_optimizer_rules(vec![ Arc::new(OutputRequirements::new_add_mode()), Arc::new(AggregateStatistics::new()), @@ -146,14 +154,14 @@ impl DfContextBuilder { // TODO: this rule will throw this error // Internal error: Children cannot be replaced in ScanTable // Arc::new(EnforceDistribution::new()), + // Arc::new(EnforceSorting::new()), Arc::new(CombinePartialFinalAggregate::new()), - Arc::new(EnforceSorting::new()), Arc::new(CoalesceBatches::new()), Arc::new(OutputRequirements::new_remove_mode()), Arc::new(PipelineChecker::new()), Arc::new(TopKAggregation::new()), // TODO: This rule is not public, so we can't use it - // Arc::new(ProjectionPushdown::new()), + Arc::new(ProjectionPushdown::new()), ]); SessionContext::new_with_state(state) } diff --git a/src/query_frontend/src/logical_optimizer/type_conversion.rs b/src/query_frontend/src/logical_optimizer/type_conversion.rs index 3d67f458e6..95076f33c2 100644 --- a/src/query_frontend/src/logical_optimizer/type_conversion.rs +++ b/src/query_frontend/src/logical_optimizer/type_conversion.rs @@ -30,7 +30,7 @@ use datafusion::{ logical_expr::{ expr::{Expr, InList}, logical_plan::{Filter, LogicalPlan, TableScan}, - Between, BinaryExpr, ExprSchemable, Operator, + Between, BinaryExpr, ExprSchemable, Operator, }, optimizer::analyzer::AnalyzerRule, scalar::ScalarValue, diff --git a/src/query_frontend/src/parser.rs b/src/query_frontend/src/parser.rs index cae7256a01..23efa0ade0 100644 --- a/src/query_frontend/src/parser.rs +++ b/src/query_frontend/src/parser.rs @@ -352,13 +352,11 @@ impl<'a> Parser<'a> { is_dictionary = true; } } - if let DataType::String(_) = c.data_type { - if is_dictionary { - return parser_err!(format!( - "Only string column can be dictionary encoded: {:?}", - c.to_string() - )); - } + if !matches!(c.data_type, DataType::String(_)) && is_dictionary { + return parser_err!(format!( + "Only string column can be dictionary encoded: {:?}", + c.to_string() + )); } } diff --git a/src/table_engine/src/provider.rs b/src/table_engine/src/provider.rs index 49f76460e2..63e5cc7d22 100644 --- a/src/table_engine/src/provider.rs +++ b/src/table_engine/src/provider.rs @@ -410,7 +410,7 @@ impl ExecutionPlan for ScanTable { // However, we have no inputs here, so `UnknownPartitioning` is suitable. // In datafusion, always set it to `UnknownPartitioning` in the scan plan, for // example: https://github.com/apache/arrow-datafusion/blob/cf152af6515f0808d840e1fe9c63b02802595826/datafusion/core/src/datasource/physical_plan/csv.rs#L175 - Partitioning::UnknownPartitioning(self.parallelism) + Partitioning::UnknownPartitioning(self.parallelism.max(1)) } fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> { @@ -480,10 +480,11 @@ impl DisplayAs for ScanTable { fn fmt_as(&self, _t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result { write!( f, - "ScanTable: table={}, parallelism={}, priority={:?}", + "ScanTable: table={}, parallelism={}, priority={:?}, partition_count={:?}", self.table.name(), self.request.opts.read_parallelism, - self.request.priority + self.request.priority, + self.output_partitioning() ) } } diff --git a/src/table_engine/src/table.rs b/src/table_engine/src/table.rs index 7365ca66a4..3c611b4395 100644 --- a/src/table_engine/src/table.rs +++ b/src/table_engine/src/table.rs @@ -421,6 +421,7 @@ impl fmt::Debug for ReadRequest { .field("projected", &projected) .field("predicate", &predicate) .field("priority", &self.priority) + .field("projected_schema", &self.projected_schema) .finish() } } From ee982f2b310e19de32df16c4d910ef24342f9370 Mon Sep 17 00:00:00 2001 From: jiacai2050 Date: Sun, 4 Feb 2024 17:43:52 +0800 Subject: [PATCH 12/25] fix tests --- Cargo.lock | 70 +++++++++---------- Cargo.toml | 14 ++-- .../cases/common/dml/issue-1087.result | 17 +++-- .../cases/common/dml/issue-59.result | 2 +- .../cases/common/optimizer/optimizer.result | 2 +- .../src/row_iter/record_batch_stream.rs | 2 +- .../src/sst/parquet/async_reader.rs | 2 +- .../parquet_ext/src/prune/min_max.rs | 8 --- src/query_engine/src/datafusion_impl/mod.rs | 26 +------ .../physical_plan_extension/prom_align.rs | 4 +- .../src/logical_optimizer/type_conversion.rs | 2 +- src/query_frontend/src/plan.rs | 1 + src/query_frontend/src/planner.rs | 2 +- src/query_frontend/src/promql/convert.rs | 2 +- src/query_frontend/src/promql/remote.rs | 3 +- src/table_engine/src/predicate.rs | 1 + 16 files changed, 68 insertions(+), 90 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a1bb14c9ee..f43a9036a5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -472,7 +472,6 @@ dependencies = [ "arrow-data 49.0.0", "arrow-schema 49.0.0", "flatbuffers", - "lz4_flex", ] [[package]] @@ -658,7 +657,7 @@ dependencies = [ [[package]] name = "arrow_util" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b" +source = "git+https://github.com/CeresDB/influxql.git?rev=b9fb3ca#b9fb3ca59fda99997a51cab7a56d34fb2126dd08" dependencies = [ "ahash 0.8.3", "arrow 49.0.0", @@ -2007,13 +2006,12 @@ dependencies = [ [[package]] name = "datafusion" -version = "34.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da" +version = "33.0.0" +source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1" dependencies = [ "ahash 0.8.3", "arrow 49.0.0", "arrow-array 49.0.0", - "arrow-ipc 49.0.0", "arrow-schema 49.0.0", "async-compression", "async-trait", @@ -2054,8 +2052,8 @@ dependencies = [ [[package]] name = "datafusion-common" -version = "34.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da" +version = "33.0.0" +source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1" dependencies = [ "ahash 0.8.3", "arrow 49.0.0", @@ -2073,8 +2071,8 @@ dependencies = [ [[package]] name = "datafusion-execution" -version = "34.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da" +version = "33.0.0" +source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1" dependencies = [ "arrow 49.0.0", "chrono", @@ -2093,8 +2091,8 @@ dependencies = [ [[package]] name = "datafusion-expr" -version = "34.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da" +version = "33.0.0" +source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1" dependencies = [ "ahash 0.8.3", "arrow 49.0.0", @@ -2108,8 +2106,8 @@ dependencies = [ [[package]] name = "datafusion-optimizer" -version = "34.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da" +version = "33.0.0" +source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1" dependencies = [ "arrow 49.0.0", "async-trait", @@ -2125,8 +2123,8 @@ dependencies = [ [[package]] name = "datafusion-physical-expr" -version = "34.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da" +version = "33.0.0" +source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1" dependencies = [ "ahash 0.8.3", "arrow 49.0.0", @@ -2158,8 +2156,8 @@ dependencies = [ [[package]] name = "datafusion-physical-plan" -version = "34.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da" +version = "33.0.0" +source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1" dependencies = [ "ahash 0.8.3", "arrow 49.0.0", @@ -2188,8 +2186,8 @@ dependencies = [ [[package]] name = "datafusion-proto" -version = "34.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da" +version = "33.0.0" +source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1" dependencies = [ "arrow 49.0.0", "chrono", @@ -2202,8 +2200,8 @@ dependencies = [ [[package]] name = "datafusion-sql" -version = "34.0.0" -source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da" +version = "33.0.0" +source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1" dependencies = [ "arrow 49.0.0", "arrow-schema 49.0.0", @@ -2216,7 +2214,7 @@ dependencies = [ [[package]] name = "datafusion_util" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b" +source = "git+https://github.com/CeresDB/influxql.git?rev=b9fb3ca#b9fb3ca59fda99997a51cab7a56d34fb2126dd08" dependencies = [ "async-trait", "datafusion", @@ -2837,7 +2835,7 @@ checksum = "8f5f3913fa0bfe7ee1fd8248b6b9f42a5af4b9d65ec2dd2c3c26132b950ecfc2" [[package]] name = "generated_types" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b" +source = "git+https://github.com/CeresDB/influxql.git?rev=b9fb3ca#b9fb3ca59fda99997a51cab7a56d34fb2126dd08" dependencies = [ "pbjson", "pbjson-build", @@ -3354,7 +3352,7 @@ dependencies = [ [[package]] name = "influxdb_influxql_parser" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b" +source = "git+https://github.com/CeresDB/influxql.git?rev=b9fb3ca#b9fb3ca59fda99997a51cab7a56d34fb2126dd08" dependencies = [ "chrono", "chrono-tz", @@ -3447,7 +3445,7 @@ dependencies = [ [[package]] name = "iox_query" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b" +source = "git+https://github.com/CeresDB/influxql.git?rev=b9fb3ca#b9fb3ca59fda99997a51cab7a56d34fb2126dd08" dependencies = [ "arrow 49.0.0", "arrow_util", @@ -3471,7 +3469,7 @@ dependencies = [ [[package]] name = "iox_query_influxql" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b" +source = "git+https://github.com/CeresDB/influxql.git?rev=b9fb3ca#b9fb3ca59fda99997a51cab7a56d34fb2126dd08" dependencies = [ "arrow 49.0.0", "chrono", @@ -4589,7 +4587,7 @@ dependencies = [ [[package]] name = "observability_deps" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b" +source = "git+https://github.com/CeresDB/influxql.git?rev=b9fb3ca#b9fb3ca59fda99997a51cab7a56d34fb2126dd08" dependencies = [ "tracing", ] @@ -5590,7 +5588,7 @@ dependencies = [ [[package]] name = "query_functions" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b" +source = "git+https://github.com/CeresDB/influxql.git?rev=b9fb3ca#b9fb3ca59fda99997a51cab7a56d34fb2126dd08" dependencies = [ "arrow 49.0.0", "chrono", @@ -6294,7 +6292,7 @@ dependencies = [ [[package]] name = "schema" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b" +source = "git+https://github.com/CeresDB/influxql.git?rev=b9fb3ca#b9fb3ca59fda99997a51cab7a56d34fb2126dd08" dependencies = [ "arrow 49.0.0", "hashbrown 0.13.2", @@ -6784,9 +6782,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.41.0" +version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cc2c25a6c66789625ef164b4c7d2e548d627902280c13710d33da8222169964" +checksum = "743b4dc2cbde11890ccb254a8fc9d537fa41b36da00de2a1c5e9848c9bc42bd7" dependencies = [ "log", "serde", @@ -6795,13 +6793,13 @@ dependencies = [ [[package]] name = "sqlparser_derive" -version = "0.2.2" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" +checksum = "55fe75cb4a364c7f7ae06c7dbbc8d84bddd85d6cdf9975963c3935bc1991761e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 1.0.109", ] [[package]] @@ -7091,7 +7089,7 @@ dependencies = [ [[package]] name = "test_helpers" version = "0.1.0" -source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b" +source = "git+https://github.com/CeresDB/influxql.git?rev=b9fb3ca#b9fb3ca59fda99997a51cab7a56d34fb2126dd08" dependencies = [ "dotenvy", "observability_deps", @@ -7714,7 +7712,7 @@ version = "1.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" dependencies = [ - "cfg-if 0.1.10", + "cfg-if 1.0.0", "rand 0.8.5", "static_assertions", ] diff --git a/Cargo.toml b/Cargo.toml index a21209bde5..b41694b31a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -107,8 +107,8 @@ cluster = { path = "src/cluster" } criterion = "0.5" horaedb-client = "1.0.2" common_types = { path = "src/common_types" } -datafusion = { git = "https://github.com/CeresDB/arrow-datafusion.git", rev = "2891cba41" } -datafusion-proto = { git = "https://github.com/CeresDB/arrow-datafusion.git", rev = "2891cba41" } +datafusion = { git = "https://github.com/CeresDB/arrow-datafusion.git", rev = "e21b03154" } +datafusion-proto = { git = "https://github.com/CeresDB/arrow-datafusion.git", rev = "e21b03154" } derive_builder = "0.12" df_operator = { path = "src/df_operator" } df_engine_extensions = { path = "src/df_engine_extensions" } @@ -121,10 +121,10 @@ hash_ext = { path = "src/components/hash_ext" } hex = "0.4.3" hyperloglog = { git = "https://github.com/jedisct1/rust-hyperloglog.git", rev = "425487ce910f26636fbde8c4d640b538431aad50" } id_allocator = { path = "src/components/id_allocator" } -influxql-logical-planner = { git = "https://github.com/CeresDB/influxql.git", rev = "5077dcc", package = "iox_query_influxql" } -influxql-parser = { git = "https://github.com/CeresDB/influxql.git", rev = "5077dcc", package = "influxdb_influxql_parser" } -influxql-query = { git = "https://github.com/CeresDB/influxql.git", rev = "5077dcc", package = "iox_query" } -influxql-schema = { git = "https://github.com/CeresDB/influxql.git", rev = "5077dcc", package = "schema" } +influxql-logical-planner = { git = "https://github.com/CeresDB/influxql.git", rev = "b9fb3ca", package = "iox_query_influxql" } +influxql-parser = { git = "https://github.com/CeresDB/influxql.git", rev = "b9fb3ca", package = "influxdb_influxql_parser" } +influxql-query = { git = "https://github.com/CeresDB/influxql.git", rev = "b9fb3ca", package = "iox_query" } +influxql-schema = { git = "https://github.com/CeresDB/influxql.git", rev = "b9fb3ca", package = "schema" } interpreters = { path = "src/interpreters" } itertools = "0.10.5" lz4_flex = { version = "0.11", default-features = false, features = ["frame"] } @@ -173,7 +173,7 @@ smallvec = "1.6" slog = "2.7" spin = "0.9.6" system_statis = { path = "src/components/system_stats" } -sqlparser = { version = "0.41", features = ["serde"] } +sqlparser = { version = "0.39.0", features = ["serde"] } system_catalog = { path = "src/system_catalog" } table_engine = { path = "src/table_engine" } table_kv = { path = "src/components/table_kv" } diff --git a/integration_tests/cases/common/dml/issue-1087.result b/integration_tests/cases/common/dml/issue-1087.result index d264f4d212..fc1e0d8d5e 100644 --- a/integration_tests/cases/common/dml/issue-1087.result +++ b/integration_tests/cases/common/dml/issue-1087.result @@ -17,6 +17,7 @@ String("logical_plan after inline_table_scan"),String("SAME TEXT AS ABOVE"), String("logical_plan after type_coercion"),String("SAME TEXT AS ABOVE"), String("logical_plan after count_wildcard_rule"),String("SAME TEXT AS ABOVE"), String("analyzed_logical_plan"),String("SAME TEXT AS ABOVE"), +String("logical_plan after eliminate_nested_union"),String("SAME TEXT AS ABOVE"), String("logical_plan after simplify_expressions"),String("SAME TEXT AS ABOVE"), String("logical_plan after unwrap_cast_in_comparison"),String("SAME TEXT AS ABOVE"), String("logical_plan after replace_distinct_aggregate"),String("SAME TEXT AS ABOVE"), @@ -33,6 +34,7 @@ String("logical_plan after eliminate_cross_join"),String("SAME TEXT AS ABOVE"), String("logical_plan after common_sub_expression_eliminate"),String("SAME TEXT AS ABOVE"), String("logical_plan after eliminate_limit"),String("SAME TEXT AS ABOVE"), String("logical_plan after propagate_empty_relation"),String("SAME TEXT AS ABOVE"), +String("logical_plan after eliminate_one_union"),String("SAME TEXT AS ABOVE"), String("logical_plan after filter_null_join_keys"),String("SAME TEXT AS ABOVE"), String("logical_plan after eliminate_outer_join"),String("SAME TEXT AS ABOVE"), String("logical_plan after push_down_limit"),String("SAME TEXT AS ABOVE"), @@ -46,6 +48,7 @@ String("logical_plan after eliminate_projection"),String("TableScan: issue_1087 String("logical_plan after push_down_limit"),String("SAME TEXT AS ABOVE"), String("logical_plan after influx_regex_to_datafusion_regex"),String("SAME TEXT AS ABOVE"), String("logical_plan after handle_gap_fill"),String("SAME TEXT AS ABOVE"), +String("logical_plan after eliminate_nested_union"),String("SAME TEXT AS ABOVE"), String("logical_plan after simplify_expressions"),String("SAME TEXT AS ABOVE"), String("logical_plan after unwrap_cast_in_comparison"),String("SAME TEXT AS ABOVE"), String("logical_plan after replace_distinct_aggregate"),String("SAME TEXT AS ABOVE"), @@ -62,6 +65,7 @@ String("logical_plan after eliminate_cross_join"),String("SAME TEXT AS ABOVE"), String("logical_plan after common_sub_expression_eliminate"),String("SAME TEXT AS ABOVE"), String("logical_plan after eliminate_limit"),String("SAME TEXT AS ABOVE"), String("logical_plan after propagate_empty_relation"),String("SAME TEXT AS ABOVE"), +String("logical_plan after eliminate_one_union"),String("SAME TEXT AS ABOVE"), String("logical_plan after filter_null_join_keys"),String("SAME TEXT AS ABOVE"), String("logical_plan after eliminate_outer_join"),String("SAME TEXT AS ABOVE"), String("logical_plan after push_down_limit"),String("SAME TEXT AS ABOVE"), @@ -76,17 +80,22 @@ String("logical_plan after push_down_limit"),String("SAME TEXT AS ABOVE"), String("logical_plan after influx_regex_to_datafusion_regex"),String("SAME TEXT AS ABOVE"), String("logical_plan after handle_gap_fill"),String("SAME TEXT AS ABOVE"), String("logical_plan"),String("TableScan: issue_1087 projection=[tsid, t, name, value]"), -String("initial_physical_plan"),String("ScanTable: table=issue_1087, parallelism=8, priority=Low\n"), +String("initial_physical_plan"),String("ScanTable: table=issue_1087, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), +String("initial_physical_plan_with_stats"),String("ScanTable: table=issue_1087, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:)]]\n"), +String("physical_plan after OutputRequirements"),String("OutputRequirementExec\n ScanTable: table=issue_1087, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), String("physical_plan after aggregate_statistics"),String("SAME TEXT AS ABOVE"), String("physical_plan after join_selection"),String("SAME TEXT AS ABOVE"), -String("physical_plan after PipelineFixer"),String("SAME TEXT AS ABOVE"), -String("physical_plan after repartition"),String("SAME TEXT AS ABOVE"), +String("physical_plan after LimitedDistinctAggregation"),String("SAME TEXT AS ABOVE"), String("physical_plan after EnforceDistribution"),String("SAME TEXT AS ABOVE"), String("physical_plan after CombinePartialFinalAggregate"),String("SAME TEXT AS ABOVE"), String("physical_plan after EnforceSorting"),String("SAME TEXT AS ABOVE"), String("physical_plan after coalesce_batches"),String("SAME TEXT AS ABOVE"), +String("physical_plan after OutputRequirements"),String("ScanTable: table=issue_1087, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), String("physical_plan after PipelineChecker"),String("SAME TEXT AS ABOVE"), -String("physical_plan"),String("ScanTable: table=issue_1087, parallelism=8, priority=Low\n"), +String("physical_plan after LimitAggregation"),String("SAME TEXT AS ABOVE"), +String("physical_plan after ProjectionPushdown"),String("SAME TEXT AS ABOVE"), +String("physical_plan"),String("ScanTable: table=issue_1087, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), +String("physical_plan_with_stats"),String("ScanTable: table=issue_1087, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:)]]\n"), DROP TABLE `issue_1087`; diff --git a/integration_tests/cases/common/dml/issue-59.result b/integration_tests/cases/common/dml/issue-59.result index 36d818696a..4f7544c87f 100644 --- a/integration_tests/cases/common/dml/issue-59.result +++ b/integration_tests/cases/common/dml/issue-59.result @@ -25,7 +25,7 @@ GROUP BY id+1; plan_type,plan, String("logical_plan"),String("Projection: group_alias_0 AS issue59.id + Int64(1), COUNT(alias1) AS COUNT(DISTINCT issue59.account)\n Aggregate: groupBy=[[group_alias_0]], aggr=[[COUNT(alias1)]]\n Aggregate: groupBy=[[CAST(issue59.id AS Int64) + Int64(1) AS group_alias_0, issue59.account AS alias1]], aggr=[[]]\n TableScan: issue59 projection=[id, account]"), -String("physical_plan"),String("ProjectionExec: expr=[group_alias_0@0 as issue59.id + Int64(1), COUNT(alias1)@1 as COUNT(DISTINCT issue59.account)]\n AggregateExec: mode=SinglePartitioned, gby=[group_alias_0@0 as group_alias_0], aggr=[COUNT(alias1)]\n AggregateExec: mode=FinalPartitioned, gby=[group_alias_0@0 as group_alias_0, alias1@1 as alias1], aggr=[]\n AggregateExec: mode=Partial, gby=[CAST(id@0 AS Int64) + 1 as group_alias_0, account@1 as alias1], aggr=[]\n ScanTable: table=issue59, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), +String("physical_plan"),String("ProjectionExec: expr=[group_alias_0@0 as issue59.id + Int64(1), COUNT(alias1)@1 as COUNT(DISTINCT issue59.account)]\n AggregateExec: mode=FinalPartitioned, gby=[group_alias_0@0 as group_alias_0], aggr=[COUNT(alias1)]\n CoalesceBatchesExec: target_batch_size=8192\n RepartitionExec: partitioning=Hash([group_alias_0@0], 8), input_partitions=8\n AggregateExec: mode=Partial, gby=[group_alias_0@0 as group_alias_0], aggr=[COUNT(alias1)]\n AggregateExec: mode=FinalPartitioned, gby=[group_alias_0@0 as group_alias_0, alias1@1 as alias1], aggr=[]\n CoalesceBatchesExec: target_batch_size=8192\n RepartitionExec: partitioning=Hash([group_alias_0@0, alias1@1], 8), input_partitions=8\n AggregateExec: mode=Partial, gby=[CAST(id@0 AS Int64) + 1 as group_alias_0, account@1 as alias1], aggr=[]\n ScanTable: table=issue59, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), DROP TABLE IF EXISTS issue59; diff --git a/integration_tests/cases/common/optimizer/optimizer.result b/integration_tests/cases/common/optimizer/optimizer.result index e13dd456ce..5df9f47e68 100644 --- a/integration_tests/cases/common/optimizer/optimizer.result +++ b/integration_tests/cases/common/optimizer/optimizer.result @@ -10,7 +10,7 @@ EXPLAIN SELECT max(value) AS c1, avg(value) AS c2 FROM `07_optimizer_t` GROUP BY plan_type,plan, String("logical_plan"),String("Projection: MAX(07_optimizer_t.value) AS c1, AVG(07_optimizer_t.value) AS c2\n Aggregate: groupBy=[[07_optimizer_t.name]], aggr=[[MAX(07_optimizer_t.value), AVG(07_optimizer_t.value)]]\n TableScan: 07_optimizer_t projection=[name, value]"), -String("physical_plan"),String("ProjectionExec: expr=[MAX(07_optimizer_t.value)@1 as c1, AVG(07_optimizer_t.value)@2 as c2]\n AggregateExec: mode=SinglePartitioned, gby=[name@0 as name], aggr=[MAX(07_optimizer_t.value), AVG(07_optimizer_t.value)]\n ScanTable: table=07_optimizer_t, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), +String("physical_plan"),String("ProjectionExec: expr=[MAX(07_optimizer_t.value)@1 as c1, AVG(07_optimizer_t.value)@2 as c2]\n AggregateExec: mode=FinalPartitioned, gby=[name@0 as name], aggr=[MAX(07_optimizer_t.value), AVG(07_optimizer_t.value)]\n CoalesceBatchesExec: target_batch_size=8192\n RepartitionExec: partitioning=Hash([name@0], 8), input_partitions=8\n AggregateExec: mode=Partial, gby=[name@0 as name], aggr=[MAX(07_optimizer_t.value), AVG(07_optimizer_t.value)]\n ScanTable: table=07_optimizer_t, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), DROP TABLE `07_optimizer_t`; diff --git a/src/analytic_engine/src/row_iter/record_batch_stream.rs b/src/analytic_engine/src/row_iter/record_batch_stream.rs index 5740d73715..49c41f2432 100644 --- a/src/analytic_engine/src/row_iter/record_batch_stream.rs +++ b/src/analytic_engine/src/row_iter/record_batch_stream.rs @@ -32,7 +32,7 @@ use common_types::{ use datafusion::{ common::ToDFSchema, error::DataFusionError, - logical_expr::utils::conjunction, + optimizer::utils::conjunction, physical_expr::{self, execution_props::ExecutionProps}, physical_plan::PhysicalExpr, }; diff --git a/src/analytic_engine/src/sst/parquet/async_reader.rs b/src/analytic_engine/src/sst/parquet/async_reader.rs index 49747b5376..94feeab2c5 100644 --- a/src/analytic_engine/src/sst/parquet/async_reader.rs +++ b/src/analytic_engine/src/sst/parquet/async_reader.rs @@ -219,7 +219,7 @@ impl<'a> Reader<'a> { ) -> Result> { // TODO: remove fixed partition let partition = 0; - let exprs = datafusion::logical_expr::utils::conjunction(self.predicate.exprs().to_vec()); + let exprs = datafusion::optimizer::utils::conjunction(self.predicate.exprs().to_vec()); let exprs = match exprs { Some(exprs) => exprs, None => return Ok(None), diff --git a/src/components/parquet_ext/src/prune/min_max.rs b/src/components/parquet_ext/src/prune/min_max.rs index 6bd3ad7496..4f5b27b22b 100644 --- a/src/components/parquet_ext/src/prune/min_max.rs +++ b/src/components/parquet_ext/src/prune/min_max.rs @@ -197,14 +197,6 @@ impl<'a> PruningStatistics for RowGroupPruningStatistics<'a> { None } - // TODO: support this. - fn contained( - &self, - _column: &Column, - _values: &std::collections::HashSet, - ) -> Option { - None - } } #[cfg(test)] diff --git a/src/query_engine/src/datafusion_impl/mod.rs b/src/query_engine/src/datafusion_impl/mod.rs index 3c4f18f0c5..218fbb90c9 100644 --- a/src/query_engine/src/datafusion_impl/mod.rs +++ b/src/query_engine/src/datafusion_impl/mod.rs @@ -24,13 +24,6 @@ use datafusion::{ runtime_env::{RuntimeConfig, RuntimeEnv}, FunctionRegistry, }, - physical_optimizer::{ - aggregate_statistics::AggregateStatistics, coalesce_batches::CoalesceBatches, - combine_partial_final_agg::CombinePartialFinalAggregate, enforce_sorting::EnforceSorting, - join_selection::JoinSelection, limited_distinct_aggregation::LimitedDistinctAggregation, - output_requirements::OutputRequirements, pipeline_checker::PipelineChecker, - projection_pushdown::ProjectionPushdown, topk_aggregation::TopKAggregation, - }, prelude::{SessionConfig, SessionContext}, }; use df_engine_extensions::codec::PhysicalExtensionCodecImpl; @@ -144,25 +137,8 @@ impl DfContextBuilder { // Using default logcial optimizer, if want to add more custom rule, using // `add_optimizer_rule` to add. - let mut state = + let state = SessionState::new_with_config_rt(df_session_config, self.runtime_env.clone()); - state = state.with_physical_optimizer_rules(vec![ - Arc::new(OutputRequirements::new_add_mode()), - Arc::new(AggregateStatistics::new()), - Arc::new(JoinSelection::new()), - Arc::new(LimitedDistinctAggregation::new()), - // TODO: this rule will throw this error - // Internal error: Children cannot be replaced in ScanTable - // Arc::new(EnforceDistribution::new()), - // Arc::new(EnforceSorting::new()), - Arc::new(CombinePartialFinalAggregate::new()), - Arc::new(CoalesceBatches::new()), - Arc::new(OutputRequirements::new_remove_mode()), - Arc::new(PipelineChecker::new()), - Arc::new(TopKAggregation::new()), - // TODO: This rule is not public, so we can't use it - Arc::new(ProjectionPushdown::new()), - ]); SessionContext::new_with_state(state) } } diff --git a/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs b/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs index c1dcb27bf2..c791024b1a 100644 --- a/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs +++ b/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs @@ -517,7 +517,7 @@ impl Stream for PromAlignReader { if !tsid_samples.is_empty() { Poll::Ready(Some( self.samples_to_record_batch(schema, tsid_samples) - .map_err(|err| DataFusionError::ArrowError(err, None)), + .map_err(|err| DataFusionError::ArrowError(err)), )) } else { Poll::Ready(Some(Ok(RecordBatch::new_empty(schema)))) @@ -532,7 +532,7 @@ impl Stream for PromAlignReader { if !tsid_samples.is_empty() { return Poll::Ready(Some( self.samples_to_record_batch(schema, tsid_samples) - .map_err(|err| DataFusionError::ArrowError(err, None)), + .map_err(|err| DataFusionError::ArrowError(err)), )); } } diff --git a/src/query_frontend/src/logical_optimizer/type_conversion.rs b/src/query_frontend/src/logical_optimizer/type_conversion.rs index 95076f33c2..eff51b0289 100644 --- a/src/query_frontend/src/logical_optimizer/type_conversion.rs +++ b/src/query_frontend/src/logical_optimizer/type_conversion.rs @@ -213,7 +213,7 @@ impl<'a> TypeRewriter<'a> { let array = value.to_array()?; ScalarValue::try_from_array( &compute::cast(&array, data_type) - .map_err(|err| DataFusionError::ArrowError(err, None))?, + .map_err(|err| DataFusionError::ArrowError(err))?, // index: Converts a value in `array` at `index` into a ScalarValue 0, ) diff --git a/src/query_frontend/src/plan.rs b/src/query_frontend/src/plan.rs index e5db6238eb..4ebe33e215 100644 --- a/src/query_frontend/src/plan.rs +++ b/src/query_frontend/src/plan.rs @@ -210,6 +210,7 @@ impl QueryPlan { // TODO: Currently we only consider the time range, consider other factors, such // as the number of series, or slow log metrics. pub fn decide_query_priority(&self, ctx: PriorityContext) -> Result> { + // return Ok(Some(Priority::High)); let threshold = ctx.time_range_threshold; let time_range = match self.extract_time_range()? { Some(v) => v, diff --git a/src/query_frontend/src/planner.rs b/src/query_frontend/src/planner.rs index e5c8a583ab..8e02f5ee9e 100644 --- a/src/query_frontend/src/planner.rs +++ b/src/query_frontend/src/planner.rs @@ -984,7 +984,7 @@ impl<'a, P: MetaProvider> PlannerDelegate<'a, P> { } } - let rows = build_row_group(schema, source.unwrap(), column_index_in_insert)?; + let rows = build_row_group(schema, source, column_index_in_insert)?; Ok(Plan::Insert(InsertPlan { table, diff --git a/src/query_frontend/src/promql/convert.rs b/src/query_frontend/src/promql/convert.rs index e92e0b9922..1d4a7e498a 100644 --- a/src/query_frontend/src/promql/convert.rs +++ b/src/query_frontend/src/promql/convert.rs @@ -22,13 +22,13 @@ use common_types::{ time::{TimeRange, Timestamp}, }; use datafusion::{ + optimizer::utils::conjunction, logical_expr::{ avg, count, expr::{Alias, ScalarFunction}, lit, logical_plan::{Extension, LogicalPlan, LogicalPlanBuilder}, max, min, sum, - utils::conjunction, Expr as DataFusionExpr, }, prelude::ident, diff --git a/src/query_frontend/src/promql/remote.rs b/src/query_frontend/src/promql/remote.rs index e8fc99e8be..c3c1439ec7 100644 --- a/src/query_frontend/src/promql/remote.rs +++ b/src/query_frontend/src/promql/remote.rs @@ -21,7 +21,8 @@ use std::sync::Arc; use common_types::{schema::Schema, time::TimeRange}; use datafusion::{ - logical_expr::{utils::conjunction, LogicalPlanBuilder, Operator}, + logical_expr::{LogicalPlanBuilder, Operator}, + optimizer::utils::conjunction, prelude::{ident, lit, Expr}, sql::{planner::ContextProvider, TableReference}, }; diff --git a/src/table_engine/src/predicate.rs b/src/table_engine/src/predicate.rs index e71180a0e0..b316b99e24 100644 --- a/src/table_engine/src/predicate.rs +++ b/src/table_engine/src/predicate.rs @@ -429,6 +429,7 @@ impl<'a> TimeRangeExtractor<'a> { | Expr::IsUnknown(_) | Expr::IsNotUnknown(_) | Expr::Negative(_) + | Expr::AggregateUDF(_) | Expr::Case { .. } | Expr::Cast { .. } | Expr::TryCast { .. } From 6398c19e5d8b21c7f088298b03b29bcb6d8550d3 Mon Sep 17 00:00:00 2001 From: jiacai2050 Date: Sun, 4 Feb 2024 17:52:25 +0800 Subject: [PATCH 13/25] fix clippy --- src/df_engine_extensions/src/dist_sql_query/test_util.rs | 6 ++++-- .../datafusion_impl/physical_plan_extension/prom_align.rs | 4 ++-- src/query_frontend/src/logical_optimizer/type_conversion.rs | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/df_engine_extensions/src/dist_sql_query/test_util.rs b/src/df_engine_extensions/src/dist_sql_query/test_util.rs index c1d00fb227..c42f9e3862 100644 --- a/src/df_engine_extensions/src/dist_sql_query/test_util.rs +++ b/src/df_engine_extensions/src/dist_sql_query/test_util.rs @@ -262,7 +262,8 @@ impl TestContext { AggregateMode::Partial, self.group_by.clone(), self.aggr_exprs.clone(), - vec![None; self.aggr_exprs.len()], + vec![None], + vec![None], input, input_schema.clone(), ) @@ -287,7 +288,8 @@ impl TestContext { AggregateMode::Final, final_group_by, self.aggr_exprs.clone(), - vec![None; self.aggr_exprs.len()], + vec![None], + vec![None], merge, input_schema, ) diff --git a/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs b/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs index c791024b1a..9fe8cc74c7 100644 --- a/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs +++ b/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs @@ -517,7 +517,7 @@ impl Stream for PromAlignReader { if !tsid_samples.is_empty() { Poll::Ready(Some( self.samples_to_record_batch(schema, tsid_samples) - .map_err(|err| DataFusionError::ArrowError(err)), + .map_err(DataFusionError::ArrowError), )) } else { Poll::Ready(Some(Ok(RecordBatch::new_empty(schema)))) @@ -532,7 +532,7 @@ impl Stream for PromAlignReader { if !tsid_samples.is_empty() { return Poll::Ready(Some( self.samples_to_record_batch(schema, tsid_samples) - .map_err(|err| DataFusionError::ArrowError(err)), + .map_err(DataFusionError::ArrowError), )); } } diff --git a/src/query_frontend/src/logical_optimizer/type_conversion.rs b/src/query_frontend/src/logical_optimizer/type_conversion.rs index eff51b0289..cdea2cc781 100644 --- a/src/query_frontend/src/logical_optimizer/type_conversion.rs +++ b/src/query_frontend/src/logical_optimizer/type_conversion.rs @@ -213,7 +213,7 @@ impl<'a> TypeRewriter<'a> { let array = value.to_array()?; ScalarValue::try_from_array( &compute::cast(&array, data_type) - .map_err(|err| DataFusionError::ArrowError(err))?, + .map_err(DataFusionError::ArrowError)?, // index: Converts a value in `array` at `index` into a ScalarValue 0, ) From 21e43033a04c6b316975a65653d39e919f8728ad Mon Sep 17 00:00:00 2001 From: jiacai2050 Date: Mon, 5 Feb 2024 17:38:48 +0800 Subject: [PATCH 14/25] fix fmt --- src/common_types/src/record_batch.rs | 16 ++++++++++++---- src/components/parquet_ext/src/prune/min_max.rs | 1 - src/query_engine/src/datafusion_impl/mod.rs | 3 +-- .../src/logical_optimizer/type_conversion.rs | 3 +-- src/query_frontend/src/plan.rs | 1 - src/query_frontend/src/promql/convert.rs | 5 ++--- src/query_frontend/src/provider.rs | 1 - 7 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/common_types/src/record_batch.rs b/src/common_types/src/record_batch.rs index 2a543ca552..058d4c41b4 100644 --- a/src/common_types/src/record_batch.rs +++ b/src/common_types/src/record_batch.rs @@ -24,7 +24,7 @@ use arrow::{ compute, datatypes::{DataType, Field, Schema, SchemaRef as ArrowSchemaRef, TimeUnit}, error::ArrowError, - record_batch::RecordBatch as ArrowRecordBatch, + record_batch::{RecordBatch as ArrowRecordBatch, RecordBatchOptions}, }; use arrow_ext::operation; use snafu::{ensure, Backtrace, OptionExt, ResultExt, Snafu}; @@ -128,10 +128,18 @@ impl RecordBatchData { let arrays = column_blocks .iter() .map(|column| column.to_arrow_array_ref()) - .collect(); - + .collect::>(); + + println!("debug column_blocks:{column_blocks:?}"); + println!("debug column_blocks2:{:?}", column_blocks.len()); + let mut options = RecordBatchOptions::new(); + if let Some(len) = arrays.first().map(|col| col.len()) { + options = options.with_row_count(Some(len)); + } else { + options = options.with_row_count(Some(0)); + } let arrow_record_batch = - ArrowRecordBatch::try_new(arrow_schema, arrays).context(CreateArrow)?; + ArrowRecordBatch::try_new_with_options(arrow_schema, arrays, &options).context(CreateArrow)?; Ok(RecordBatchData { arrow_record_batch, diff --git a/src/components/parquet_ext/src/prune/min_max.rs b/src/components/parquet_ext/src/prune/min_max.rs index 4f5b27b22b..0a717021a1 100644 --- a/src/components/parquet_ext/src/prune/min_max.rs +++ b/src/components/parquet_ext/src/prune/min_max.rs @@ -196,7 +196,6 @@ impl<'a> PruningStatistics for RowGroupPruningStatistics<'a> { fn null_counts(&self, _column: &Column) -> Option { None } - } #[cfg(test)] diff --git a/src/query_engine/src/datafusion_impl/mod.rs b/src/query_engine/src/datafusion_impl/mod.rs index 218fbb90c9..482628f836 100644 --- a/src/query_engine/src/datafusion_impl/mod.rs +++ b/src/query_engine/src/datafusion_impl/mod.rs @@ -137,8 +137,7 @@ impl DfContextBuilder { // Using default logcial optimizer, if want to add more custom rule, using // `add_optimizer_rule` to add. - let state = - SessionState::new_with_config_rt(df_session_config, self.runtime_env.clone()); + let state = SessionState::new_with_config_rt(df_session_config, self.runtime_env.clone()); SessionContext::new_with_state(state) } } diff --git a/src/query_frontend/src/logical_optimizer/type_conversion.rs b/src/query_frontend/src/logical_optimizer/type_conversion.rs index cdea2cc781..0aeaaba207 100644 --- a/src/query_frontend/src/logical_optimizer/type_conversion.rs +++ b/src/query_frontend/src/logical_optimizer/type_conversion.rs @@ -212,8 +212,7 @@ impl<'a> TypeRewriter<'a> { let array = value.to_array()?; ScalarValue::try_from_array( - &compute::cast(&array, data_type) - .map_err(DataFusionError::ArrowError)?, + &compute::cast(&array, data_type).map_err(DataFusionError::ArrowError)?, // index: Converts a value in `array` at `index` into a ScalarValue 0, ) diff --git a/src/query_frontend/src/plan.rs b/src/query_frontend/src/plan.rs index 4ebe33e215..e5db6238eb 100644 --- a/src/query_frontend/src/plan.rs +++ b/src/query_frontend/src/plan.rs @@ -210,7 +210,6 @@ impl QueryPlan { // TODO: Currently we only consider the time range, consider other factors, such // as the number of series, or slow log metrics. pub fn decide_query_priority(&self, ctx: PriorityContext) -> Result> { - // return Ok(Some(Priority::High)); let threshold = ctx.time_range_threshold; let time_range = match self.extract_time_range()? { Some(v) => v, diff --git a/src/query_frontend/src/promql/convert.rs b/src/query_frontend/src/promql/convert.rs index 1d4a7e498a..f364a0b101 100644 --- a/src/query_frontend/src/promql/convert.rs +++ b/src/query_frontend/src/promql/convert.rs @@ -22,15 +22,14 @@ use common_types::{ time::{TimeRange, Timestamp}, }; use datafusion::{ - optimizer::utils::conjunction, logical_expr::{ avg, count, expr::{Alias, ScalarFunction}, lit, logical_plan::{Extension, LogicalPlan, LogicalPlanBuilder}, - max, min, sum, - Expr as DataFusionExpr, + max, min, sum, Expr as DataFusionExpr, }, + optimizer::utils::conjunction, prelude::ident, sql::planner::ContextProvider, }; diff --git a/src/query_frontend/src/provider.rs b/src/query_frontend/src/provider.rs index 5a9cdf8514..6464725405 100644 --- a/src/query_frontend/src/provider.rs +++ b/src/query_frontend/src/provider.rs @@ -413,7 +413,6 @@ impl<'a, P: MetaProvider> ContextProvider for ContextProviderAdapter<'a, P> { fn get_window_meta(&self, _name: &str) -> Option> { None } - } struct SchemaProviderAdapter { From c10fe3c7aaffddd085d4db2bac0bf414c9084f3f Mon Sep 17 00:00:00 2001 From: jiacai2050 Date: Wed, 7 Feb 2024 15:12:03 +0800 Subject: [PATCH 15/25] debug fmt --- .../env/cluster/ddl/partition_table.result | 4 +- src/analytic_engine/src/row_iter/merge.rs | 3 ++ src/analytic_engine/src/table/mod.rs | 2 +- src/common_types/src/record_batch.rs | 47 +++++++++++-------- src/interpreters/src/tests.rs | 6 +-- 5 files changed, 36 insertions(+), 26 deletions(-) diff --git a/integration_tests/cases/env/cluster/ddl/partition_table.result b/integration_tests/cases/env/cluster/ddl/partition_table.result index d376718cc7..ee935cb3c1 100644 --- a/integration_tests/cases/env/cluster/ddl/partition_table.result +++ b/integration_tests/cases/env/cluster/ddl/partition_table.result @@ -83,7 +83,7 @@ UInt64(16367588166920223437),Timestamp(1651737067000),String("horaedb9"),Int32(0 EXPLAIN ANALYZE SELECT * from partition_table_t where name = "ceresdb0"; plan_type,plan, -String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:1, metrics=[\npartition_table_t:\n __partition_table_t_1:\n poll_duration=xxs\n total_duration=xxs\n wait_duration=xxs\n\n__partition_table_t_1:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n ScanTable: table=__partition_table_t_1, parallelism=8, priority=Low, metrics=[\nPredicate { exprs:[name = Utf8(\"ceresdb0\")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"), +String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:1, metrics=[\npartition_table_t:\n __partition_table_t_1:\n poll_duration=xxs\n total_duration=xxs\n wait_duration=xxs\n\n__partition_table_t_1:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n ScanTable: table=__partition_table_t_1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name = Utf8(\"ceresdb0\")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"), -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx @@ -92,7 +92,7 @@ String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:f EXPLAIN ANALYZE SELECT * from partition_table_t where name in ("ceresdb0", "ceresdb1", "ceresdb2", "ceresdb3", "ceresdb4"); plan_type,plan, -String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:3, metrics=[\npartition_table_t:\n __partition_table_t_x:\n poll_duration=xxs\n total_duration=xxs\n wait_duration=xxs\n __partition_table_t_x:\n poll_duration=xxs\n total_duration=xxs\n wait_duration=xxs\n __partition_table_t_x:\n poll_duration=xxs\n total_duration=xxs\n wait_duration=xxs\n\n__partition_table_t_x:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb1\"), Utf8(\"ceresdb2\"), Utf8(\"ceresdb3\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n\n__partition_table_t_x:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb1\"), Utf8(\"ceresdb2\"), Utf8(\"ceresdb3\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n\n__partition_table_t_x:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb1\"), Utf8(\"ceresdb2\"), Utf8(\"ceresdb3\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"), +String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:3, metrics=[\npartition_table_t:\n __partition_table_t_x:\n poll_duration=xxs\n total_duration=xxs\n wait_duration=xxs\n __partition_table_t_x:\n poll_duration=xxs\n total_duration=xxs\n wait_duration=xxs\n __partition_table_t_x:\n poll_duration=xxs\n total_duration=xxs\n wait_duration=xxs\n\n__partition_table_t_x:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb1\"), Utf8(\"ceresdb2\"), Utf8(\"ceresdb3\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n\n__partition_table_t_x:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb1\"), Utf8(\"ceresdb2\"), Utf8(\"ceresdb3\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n\n__partition_table_t_x:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb1\"), Utf8(\"ceresdb2\"), Utf8(\"ceresdb3\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"), ALTER TABLE partition_table_t ADD COLUMN (b string); diff --git a/src/analytic_engine/src/row_iter/merge.rs b/src/analytic_engine/src/row_iter/merge.rs index 88f58c2df9..67a2f4045d 100644 --- a/src/analytic_engine/src/row_iter/merge.rs +++ b/src/analytic_engine/src/row_iter/merge.rs @@ -486,6 +486,7 @@ impl BufferedStream { Ok(false) } Some(record_batch) => { + logger::info!("if necessary: {record_batch:?}"); self.state.as_mut().unwrap().reset(record_batch); Ok(true) } @@ -844,6 +845,7 @@ impl MergeIterator { None }; + logger::info!("debug fetch rows:{record_batch:?}"); self.reheap(buffered_stream).await?; @@ -873,6 +875,7 @@ impl MergeIterator { self.record_batch_builder.clear(); + logger::info!("fetch next batch, {}", self.record_batch_builder.len()); while !self.hot.is_empty() && self.record_batch_builder.len() < self.iter_options.batch_size { // no need to do merge sort if only one batch in the hot heap. diff --git a/src/analytic_engine/src/table/mod.rs b/src/analytic_engine/src/table/mod.rs index af381b5baa..e6dfd1dc2f 100644 --- a/src/analytic_engine/src/table/mod.rs +++ b/src/analytic_engine/src/table/mod.rs @@ -432,7 +432,7 @@ pub fn support_pushdown(schema: &Schema, need_dedup: bool, col_names: &[String]) // When table need dedup, only unique keys columns support pushdown col_names .iter() - .all(|col_name| schema.is_unique_column(col_name.as_str())) + .all(|col_name| !schema.is_unique_column(col_name.as_str())) } #[async_trait] diff --git a/src/common_types/src/record_batch.rs b/src/common_types/src/record_batch.rs index 058d4c41b4..6634c71e80 100644 --- a/src/common_types/src/record_batch.rs +++ b/src/common_types/src/record_batch.rs @@ -124,22 +124,18 @@ pub struct RecordBatchData { } impl RecordBatchData { - fn new(arrow_schema: ArrowSchemaRef, column_blocks: Vec) -> Result { + fn new( + arrow_schema: ArrowSchemaRef, + column_blocks: Vec, + options: RecordBatchOptions, + ) -> Result { let arrays = column_blocks .iter() .map(|column| column.to_arrow_array_ref()) .collect::>(); - - println!("debug column_blocks:{column_blocks:?}"); - println!("debug column_blocks2:{:?}", column_blocks.len()); - let mut options = RecordBatchOptions::new(); - if let Some(len) = arrays.first().map(|col| col.len()) { - options = options.with_row_count(Some(len)); - } else { - options = options.with_row_count(Some(0)); - } let arrow_record_batch = - ArrowRecordBatch::try_new_with_options(arrow_schema, arrays, &options).context(CreateArrow)?; + ArrowRecordBatch::try_new_with_options(arrow_schema, arrays, &options) + .context(CreateArrow)?; Ok(RecordBatchData { arrow_record_batch, @@ -148,10 +144,7 @@ impl RecordBatchData { } fn num_rows(&self) -> usize { - self.column_blocks - .first() - .map(|column| column.num_rows()) - .unwrap_or(0) + self.arrow_record_batch.num_rows() } fn take_column_block(&mut self, index: usize) -> ColumnBlock { @@ -238,6 +231,11 @@ impl RecordBatch { pub fn new(schema: RecordSchema, column_blocks: Vec) -> Result { ensure!(schema.num_columns() == column_blocks.len(), SchemaLen); + let num_rows = column_blocks + .first() + .map(|block| block.num_rows()) + .unwrap_or_default(); + let options = RecordBatchOptions::new().with_row_count(Some(num_rows)); // Validate schema and column_blocks. for (column_schema, column_block) in schema.columns().iter().zip(column_blocks.iter()) { ensure!( @@ -251,7 +249,7 @@ impl RecordBatch { } let arrow_schema = schema.to_arrow_schema_ref(); - let data = RecordBatchData::new(arrow_schema, column_blocks)?; + let data = RecordBatchData::new(arrow_schema, column_blocks, options)?; Ok(Self { schema, data }) } @@ -396,6 +394,7 @@ impl FetchedRecordBatch { let mut column_blocks = Vec::with_capacity(fetched_schema.num_columns()); let num_rows = arrow_record_batch.num_rows(); let num_columns = arrow_record_batch.num_columns(); + let options = RecordBatchOptions::new().with_row_count(Some(num_rows)); for (col_idx_opt, col_schema) in column_indexes.iter().zip(fetched_schema.columns()) { match col_idx_opt { Some(col_idx) => { @@ -427,7 +426,8 @@ impl FetchedRecordBatch { } } - let data = RecordBatchData::new(fetched_schema.to_arrow_schema_ref(), column_blocks)?; + let data = + RecordBatchData::new(fetched_schema.to_arrow_schema_ref(), column_blocks, options)?; Ok(FetchedRecordBatch { schema: fetched_schema, @@ -479,6 +479,8 @@ impl FetchedRecordBatch { // Get the schema after projection. let record_schema = projected_schema.to_record_schema(); let mut column_blocks = Vec::with_capacity(record_schema.num_columns()); + let num_rows = self.data.num_rows(); + let options = RecordBatchOptions::new().with_row_count(Some(num_rows)); for column_schema in record_schema.columns() { let column_index = @@ -493,8 +495,8 @@ impl FetchedRecordBatch { column_blocks.push(column_block); } - let data = RecordBatchData::new(record_schema.to_arrow_schema_ref(), column_blocks)?; - + let data = + RecordBatchData::new(record_schema.to_arrow_schema_ref(), column_blocks, options)?; Ok(RecordBatch { schema: record_schema, data, @@ -733,11 +735,16 @@ impl FetchedRecordBatchBuilder { .map(|builder| builder.build()) .collect(); let arrow_schema = self.fetched_schema.to_arrow_schema_ref(); + let num_rows = column_blocks + .first() + .map(|block| block.num_rows()) + .unwrap_or_default(); + let options = RecordBatchOptions::new().with_row_count(Some(num_rows)); Ok(FetchedRecordBatch { schema: self.fetched_schema.clone(), primary_key_indexes: self.primary_key_indexes.clone(), - data: RecordBatchData::new(arrow_schema, column_blocks)?, + data: RecordBatchData::new(arrow_schema, column_blocks, options)?, }) } } diff --git a/src/interpreters/src/tests.rs b/src/interpreters/src/tests.rs index 6d521738f7..a69944269b 100644 --- a/src/interpreters/src/tests.rs +++ b/src/interpreters/src/tests.rs @@ -117,7 +117,7 @@ where .enable_partition_table_access(enable_partition_table_access) .build(); let sql= format!("CREATE TABLE IF NOT EXISTS {table_name}(c1 string tag not null,ts timestamp not null, c3 string, timestamp key(ts),primary key(c1, ts)) \ - ENGINE=Analytic WITH (ttl='70d',update_mode='overwrite',arena_block_size='1KB')"); + ENGINE=Analytic WITH (enable_ttl='false',update_mode='overwrite',arena_block_size='1KB')"); let output = self.sql_to_output_with_context(&sql, ctx).await?; assert!( @@ -156,7 +156,7 @@ where .default_catalog_and_schema(DEFAULT_CATALOG.to_string(), DEFAULT_SCHEMA.to_string()) .enable_partition_table_access(enable_partition_table_access) .build(); - let sql = format!("select * from {table_name}"); + let sql = format!("explain analyze select * from {table_name}"); let output = self.sql_to_output_with_context(&sql, ctx).await?; let records = output.try_into().unwrap(); let expected = vec![ @@ -169,7 +169,7 @@ where ]; test_util::assert_record_batches_eq(&expected, records); - let sql = "select count(*) from test_table"; + let sql = "explain analyze select count(*) from test_table"; let output = self.sql_to_output(sql).await?; let records = output.try_into().unwrap(); let expected = vec![ From 93dafe9203cdec87b28ac9cc35c0f98d9e2ad23e Mon Sep 17 00:00:00 2001 From: jiacai2050 Date: Tue, 20 Feb 2024 20:54:46 +0800 Subject: [PATCH 16/25] fix test_interpreters_rocks --- src/common_types/src/record_batch.rs | 11 +++++------ src/interpreters/src/tests.rs | 18 +++++++++--------- src/proxy/src/grpc/prom_query.rs | 2 +- src/proxy/src/influxdb/types.rs | 2 +- src/table_engine/src/memory.rs | 2 +- 5 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/common_types/src/record_batch.rs b/src/common_types/src/record_batch.rs index 6634c71e80..2c787be465 100644 --- a/src/common_types/src/record_batch.rs +++ b/src/common_types/src/record_batch.rs @@ -228,13 +228,12 @@ impl RecordBatch { } } - pub fn new(schema: RecordSchema, column_blocks: Vec) -> Result { + pub fn new( + schema: RecordSchema, + column_blocks: Vec, + num_rows: usize, + ) -> Result { ensure!(schema.num_columns() == column_blocks.len(), SchemaLen); - - let num_rows = column_blocks - .first() - .map(|block| block.num_rows()) - .unwrap_or_default(); let options = RecordBatchOptions::new().with_row_count(Some(num_rows)); // Validate schema and column_blocks. for (column_schema, column_block) in schema.columns().iter().zip(column_blocks.iter()) { diff --git a/src/interpreters/src/tests.rs b/src/interpreters/src/tests.rs index a69944269b..f9c8c75bd9 100644 --- a/src/interpreters/src/tests.rs +++ b/src/interpreters/src/tests.rs @@ -156,8 +156,8 @@ where .default_catalog_and_schema(DEFAULT_CATALOG.to_string(), DEFAULT_SCHEMA.to_string()) .enable_partition_table_access(enable_partition_table_access) .build(); - let sql = format!("explain analyze select * from {table_name}"); - let output = self.sql_to_output_with_context(&sql, ctx).await?; + let sql = format!("select * from {table_name}"); + let output = self.sql_to_output_with_context(&sql, ctx.clone()).await?; let records = output.try_into().unwrap(); let expected = vec![ "+------------+---------------------+--------+--------+------------+--------------+", @@ -169,15 +169,15 @@ where ]; test_util::assert_record_batches_eq(&expected, records); - let sql = "explain analyze select count(*) from test_table"; - let output = self.sql_to_output(sql).await?; + let sql = format!("select count(*) from {table_name}"); + let output = self.sql_to_output_with_context(&sql, ctx).await?; let records = output.try_into().unwrap(); let expected = vec![ - "+-----------------+", - "| COUNT(UInt8(1)) |", - "+-----------------+", - "| 2 |", - "+-----------------+", + "+----------+", + "| COUNT(*) |", + "+----------+", + "| 2 |", + "+----------+", ]; test_util::assert_record_batches_eq(&expected, records); diff --git a/src/proxy/src/grpc/prom_query.rs b/src/proxy/src/grpc/prom_query.rs index 1c999ad0c0..673b6131a5 100644 --- a/src/proxy/src/grpc/prom_query.rs +++ b/src/proxy/src/grpc/prom_query.rs @@ -471,7 +471,7 @@ mod tests { let schema = build_schema(); let record_schema = schema.to_record_schema(); let column_blocks = build_column_block(); - let record_batch = RecordBatch::new(record_schema, column_blocks).unwrap(); + let record_batch = RecordBatch::new(record_schema, column_blocks, 4).unwrap(); let column_name = ColumnNames { timestamp: "timestamp".to_string(), diff --git a/src/proxy/src/influxdb/types.rs b/src/proxy/src/influxdb/types.rs index 117b5cf31c..cd2b229d6d 100644 --- a/src/proxy/src/influxdb/types.rs +++ b/src/proxy/src/influxdb/types.rs @@ -744,7 +744,7 @@ mod tests { fn test_influxql_result() { let record_schema = build_test_record_schema(); let column_blocks = build_test_column_blocks(); - let record_batch = RecordBatch::new(record_schema, column_blocks).unwrap(); + let record_batch = RecordBatch::new(record_schema, column_blocks, 4).unwrap(); let mut builder = InfluxqlResultBuilder::new(record_batch.schema(), 0).unwrap(); builder.add_record_batch(record_batch).unwrap(); diff --git a/src/table_engine/src/memory.rs b/src/table_engine/src/memory.rs index 689677052a..20cfe583e4 100644 --- a/src/table_engine/src/memory.rs +++ b/src/table_engine/src/memory.rs @@ -260,7 +260,7 @@ fn row_group_to_record_batch( column_blocks.push(column_block); } - RecordBatch::new(record_schema.clone(), column_blocks) + RecordBatch::new(record_schema.clone(), column_blocks, rows.num_rows()) .box_err() .context(ErrWithSource { msg: "failed to create RecordBatch", From c9ba7a5d28a276d8fe7d8001dd2a515365de21ab Mon Sep 17 00:00:00 2001 From: jiacai2050 Date: Tue, 20 Feb 2024 21:35:38 +0800 Subject: [PATCH 17/25] fix integration test --- integration_tests/cases/common/dml/issue-341.result | 8 ++++---- .../cases/env/cluster/ddl/partition_table.result | 8 ++++++-- .../cases/env/cluster/ddl/partition_table.sql | 4 ++++ 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/integration_tests/cases/common/dml/issue-341.result b/integration_tests/cases/common/dml/issue-341.result index 4e42d84c80..a68d4f5133 100644 --- a/integration_tests/cases/common/dml/issue-341.result +++ b/integration_tests/cases/common/dml/issue-341.result @@ -115,8 +115,8 @@ WHERE `value` = 3; plan_type,plan, -String("logical_plan"),String("Filter: issue341_t2.value = Float64(3)\n TableScan: issue341_t2 projection=[timestamp, value], partial_filters=[issue341_t2.value = Float64(3)]"), -String("physical_plan"),String("CoalesceBatchesExec: target_batch_size=8192\n FilterExec: value@1 = 3\n ScanTable: table=issue341_t2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), +String("logical_plan"),String("TableScan: issue341_t2 projection=[timestamp, value], full_filters=[issue341_t2.value = Float64(3)]"), +String("physical_plan"),String("ScanTable: table=issue341_t2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), -- When using tag as filter, FilterExec node should not be in plan. @@ -129,8 +129,8 @@ WHERE tag1 = "t3"; plan_type,plan, -String("logical_plan"),String("TableScan: issue341_t2 projection=[timestamp, value], full_filters=[issue341_t2.tag1 = Utf8(\"t3\")]"), -String("physical_plan"),String("ScanTable: table=issue341_t2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(1)\n"), +String("logical_plan"),String("Projection: issue341_t2.timestamp, issue341_t2.value\n Filter: issue341_t2.tag1 = Utf8(\"t3\")\n TableScan: issue341_t2 projection=[timestamp, value, tag1], partial_filters=[issue341_t2.tag1 = Utf8(\"t3\")]"), +String("physical_plan"),String("ProjectionExec: expr=[timestamp@0 as timestamp, value@1 as value]\n CoalesceBatchesExec: target_batch_size=8192\n FilterExec: tag1@2 = t3\n ScanTable: table=issue341_t2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), DROP TABLE IF EXISTS `issue341_t1`; diff --git a/integration_tests/cases/env/cluster/ddl/partition_table.result b/integration_tests/cases/env/cluster/ddl/partition_table.result index ee935cb3c1..3f7bb7d8bb 100644 --- a/integration_tests/cases/env/cluster/ddl/partition_table.result +++ b/integration_tests/cases/env/cluster/ddl/partition_table.result @@ -80,19 +80,23 @@ UInt64(16367588166920223437),Timestamp(1651737067000),String("horaedb9"),Int32(0 -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx -- SQLNESS REPLACE compute=\d+.?\d*(µ|m|n) compute=xx +-- SQLNESS REPLACE time=\d+.?\d*(µ|m|n) time=xx +-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx EXPLAIN ANALYZE SELECT * from partition_table_t where name = "ceresdb0"; plan_type,plan, -String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:1, metrics=[\npartition_table_t:\n __partition_table_t_1:\n poll_duration=xxs\n total_duration=xxs\n wait_duration=xxs\n\n__partition_table_t_1:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n ScanTable: table=__partition_table_t_1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name = Utf8(\"ceresdb0\")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"), +String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:4, metrics=xx\n CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: name@2 = ceresdb0, metrics=xx\n RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n ScanTable: table=__partition_table_t_0, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: name@2 = ceresdb0, metrics=xx\n RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n ScanTable: table=__partition_table_t_1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: name@2 = ceresdb0, metrics=xx\n RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n ScanTable: table=__partition_table_t_2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: name@2 = ceresdb0, metrics=xx\n RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n ScanTable: table=__partition_table_t_3, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"), -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx -- SQLNESS REPLACE compute=\d+.?\d*(µ|m|n) compute=xx -- SQLNESS REPLACE __partition_table_t_\d __partition_table_t_x +-- SQLNESS REPLACE time=\d+.?\d*(µ|m|n) time=xx +-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx EXPLAIN ANALYZE SELECT * from partition_table_t where name in ("ceresdb0", "ceresdb1", "ceresdb2", "ceresdb3", "ceresdb4"); plan_type,plan, -String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:3, metrics=[\npartition_table_t:\n __partition_table_t_x:\n poll_duration=xxs\n total_duration=xxs\n wait_duration=xxs\n __partition_table_t_x:\n poll_duration=xxs\n total_duration=xxs\n wait_duration=xxs\n __partition_table_t_x:\n poll_duration=xxs\n total_duration=xxs\n wait_duration=xxs\n\n__partition_table_t_x:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb1\"), Utf8(\"ceresdb2\"), Utf8(\"ceresdb3\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n\n__partition_table_t_x:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb1\"), Utf8(\"ceresdb2\"), Utf8(\"ceresdb3\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n\n__partition_table_t_x:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb1\"), Utf8(\"ceresdb2\"), Utf8(\"ceresdb3\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"), +String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:4, metrics=xx\n CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: Use name@2 IN (SET) ([Literal { value: Utf8(\"ceresdb0\") }, Literal { value: Utf8(\"ceresdb1\") }, Literal { value: Utf8(\"ceresdb2\") }, Literal { value: Utf8(\"ceresdb3\") }, Literal { value: Utf8(\"ceresdb4\") }]), metrics=xx\n RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: Use name@2 IN (SET) ([Literal { value: Utf8(\"ceresdb0\") }, Literal { value: Utf8(\"ceresdb1\") }, Literal { value: Utf8(\"ceresdb2\") }, Literal { value: Utf8(\"ceresdb3\") }, Literal { value: Utf8(\"ceresdb4\") }]), metrics=xx\n RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: Use name@2 IN (SET) ([Literal { value: Utf8(\"ceresdb0\") }, Literal { value: Utf8(\"ceresdb1\") }, Literal { value: Utf8(\"ceresdb2\") }, Literal { value: Utf8(\"ceresdb3\") }, Literal { value: Utf8(\"ceresdb4\") }]), metrics=xx\n RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: Use name@2 IN (SET) ([Literal { value: Utf8(\"ceresdb0\") }, Literal { value: Utf8(\"ceresdb1\") }, Literal { value: Utf8(\"ceresdb2\") }, Literal { value: Utf8(\"ceresdb3\") }, Literal { value: Utf8(\"ceresdb4\") }]), metrics=xx\n RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"), ALTER TABLE partition_table_t ADD COLUMN (b string); diff --git a/integration_tests/cases/env/cluster/ddl/partition_table.sql b/integration_tests/cases/env/cluster/ddl/partition_table.sql index a36b59ac2d..a87dfbb2cd 100644 --- a/integration_tests/cases/env/cluster/ddl/partition_table.sql +++ b/integration_tests/cases/env/cluster/ddl/partition_table.sql @@ -37,11 +37,15 @@ SELECT * from partition_table_t where name in ("horaedb5", "horaedb6", "horaedb7 -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx -- SQLNESS REPLACE compute=\d+.?\d*(µ|m|n) compute=xx +-- SQLNESS REPLACE time=\d+.?\d*(µ|m|n) time=xx +-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx EXPLAIN ANALYZE SELECT * from partition_table_t where name = "ceresdb0"; -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx -- SQLNESS REPLACE compute=\d+.?\d*(µ|m|n) compute=xx -- SQLNESS REPLACE __partition_table_t_\d __partition_table_t_x +-- SQLNESS REPLACE time=\d+.?\d*(µ|m|n) time=xx +-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx EXPLAIN ANALYZE SELECT * from partition_table_t where name in ("ceresdb0", "ceresdb1", "ceresdb2", "ceresdb3", "ceresdb4"); ALTER TABLE partition_table_t ADD COLUMN (b string); From be2ed54e4474a3b37b4e58b2849ddea7b9177607 Mon Sep 17 00:00:00 2001 From: jiacai2050 Date: Wed, 21 Feb 2024 10:18:25 +0800 Subject: [PATCH 18/25] update query plan --- .../cases/env/local/ddl/query-plan.result | 19 ++++++++++++++----- .../cases/env/local/ddl/query-plan.sql | 9 +++++++++ 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/integration_tests/cases/env/local/ddl/query-plan.result b/integration_tests/cases/env/local/ddl/query-plan.result index 917767bf02..f471cc3ced 100644 --- a/integration_tests/cases/env/local/ddl/query-plan.result +++ b/integration_tests/cases/env/local/ddl/query-plan.result @@ -27,48 +27,53 @@ affected_rows: 3 -- This query should include memtable -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx +-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx explain analyze select t from `03_dml_select_real_time_range` where t > 1695348001000; plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"), +String("Plan with Metrics"),String("CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: t@0 > 1695348001000, metrics=xx\n ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=3\n scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"), -- This query should have higher priority -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx +-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx explain analyze select t from `03_dml_select_real_time_range` where t >= 1695348001000 and t < 1695348002000; plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=High, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), t < TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"), +String("Plan with Metrics"),String("CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: t@0 >= 1695348001000 AND t@0 < 1695348002000, metrics=xx\n ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=High, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=3\n scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"), -- This query should not include memtable -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx +-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx explain analyze select t from `03_dml_select_real_time_range` where t > 1695348002000; plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=0\n=0]\n"), +String("Plan with Metrics"),String("CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: t@0 > 1695348002000, metrics=xx\n ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=0\n=0]\n"), -- SQLNESS ARG pre_cmd=flush -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx -- SQLNESS REPLACE project_record_batch=\d+.?\d*(µ|m|n) project_record_batch=xx +-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx -- This query should include SST explain analyze select t from `03_dml_select_real_time_range` where t > 1695348001000; plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=1\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_sst_1, fetched_columns:[tsid,t]:\n meta_data_cache_hit=false\n parallelism=1\n project_record_batch=xxs\n read_meta_data_duration=xxs\n row_mem=320\n row_num=3\n prune_row_groups:\n pruned_by_custom_filter=0\n pruned_by_min_max=0\n row_groups_after_prune=1\n total_row_groups=1\n use_custom_filter=false\n=0]\n"), +String("Plan with Metrics"),String("CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: t@0 > 1695348001000, metrics=xx\n ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=1\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=3\n scan_sst_1, fetched_columns:[tsid,t]:\n meta_data_cache_hit=false\n parallelism=1\n project_record_batch=xxs\n read_meta_data_duration=xxs\n row_mem=320\n row_num=3\n prune_row_groups:\n pruned_by_custom_filter=0\n pruned_by_min_max=0\n row_groups_after_prune=1\n total_row_groups=1\n use_custom_filter=false\n=0]\n"), -- This query should not include SST +-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx explain analyze select t from `03_dml_select_real_time_range` where t > 1695348002000; plan_type,plan, -String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=0\n=0]\n"), +String("Plan with Metrics"),String("CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: t@0 > 1695348002000, metrics=xx\n ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=0\n=0]\n"), -- Table with an 'append' update mode @@ -97,6 +102,7 @@ affected_rows: 3 -- SQLNESS REPLACE since_create=\d+.?\d*(µ|m|n) since_create=xx -- SQLNESS REPLACE since_init=\d+.?\d*(µ|m|n) since_init=xx -- SQLNESS REPLACE elapsed_compute=\d+.?\d*(µ|m|n) elapsed_compute=xx +-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx explain analyze select t from `03_append_mode_table` where t >= 1695348001000 and name = 'ceresdb'; @@ -109,6 +115,7 @@ Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to ex -- SQLNESS REPLACE since_init=\d+.?\d*(µ|m|n) since_init=xx -- SQLNESS REPLACE elapsed_compute=\d+.?\d*(µ|m|n) elapsed_compute=xx -- SQLNESS REPLACE project_record_batch=\d+.?\d*(µ|m|n) project_record_batch=xx +-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx explain analyze select t from `03_append_mode_table` where t >= 1695348001000 and name = 'ceresdb'; @@ -128,6 +135,7 @@ affected_rows: 0 -- This query should have higher priority -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx +-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx explain analyze select TS from `TEST_QUERY_PRIORITY` where TS >= 1695348001000 and TS < 1695348002000; @@ -137,6 +145,7 @@ String("Plan with Metrics"),String("ScanTable: table=TEST_QUERY_PRIORITY, parall -- This query should have higher priority -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx +-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx explain analyze select TS from `TEST_QUERY_PRIORITY` where TS >= 1695348001000; diff --git a/integration_tests/cases/env/local/ddl/query-plan.sql b/integration_tests/cases/env/local/ddl/query-plan.sql index 218e0f7ba1..5217b1a076 100644 --- a/integration_tests/cases/env/local/ddl/query-plan.sql +++ b/integration_tests/cases/env/local/ddl/query-plan.sql @@ -18,27 +18,32 @@ INSERT INTO `03_dml_select_real_time_range` (t, name, value) -- This query should include memtable -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx +-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx explain analyze select t from `03_dml_select_real_time_range` where t > 1695348001000; -- This query should have higher priority -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx +-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx explain analyze select t from `03_dml_select_real_time_range` where t >= 1695348001000 and t < 1695348002000; -- This query should not include memtable -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx +-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx explain analyze select t from `03_dml_select_real_time_range` where t > 1695348002000; -- SQLNESS ARG pre_cmd=flush -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx -- SQLNESS REPLACE project_record_batch=\d+.?\d*(µ|m|n) project_record_batch=xx +-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx -- This query should include SST explain analyze select t from `03_dml_select_real_time_range` where t > 1695348001000; -- This query should not include SST +-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx explain analyze select t from `03_dml_select_real_time_range` where t > 1695348002000; @@ -64,6 +69,7 @@ INSERT INTO `03_append_mode_table` (t, name, value) -- SQLNESS REPLACE since_create=\d+.?\d*(µ|m|n) since_create=xx -- SQLNESS REPLACE since_init=\d+.?\d*(µ|m|n) since_init=xx -- SQLNESS REPLACE elapsed_compute=\d+.?\d*(µ|m|n) elapsed_compute=xx +-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx explain analyze select t from `03_append_mode_table` where t >= 1695348001000 and name = 'ceresdb'; @@ -74,6 +80,7 @@ where t >= 1695348001000 and name = 'ceresdb'; -- SQLNESS REPLACE since_init=\d+.?\d*(µ|m|n) since_init=xx -- SQLNESS REPLACE elapsed_compute=\d+.?\d*(µ|m|n) elapsed_compute=xx -- SQLNESS REPLACE project_record_batch=\d+.?\d*(µ|m|n) project_record_batch=xx +-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx explain analyze select t from `03_append_mode_table` where t >= 1695348001000 and name = 'ceresdb'; @@ -89,11 +96,13 @@ CREATE TABLE `TEST_QUERY_PRIORITY` ( -- This query should have higher priority -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx +-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx explain analyze select TS from `TEST_QUERY_PRIORITY` where TS >= 1695348001000 and TS < 1695348002000; -- This query should have higher priority -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx +-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx explain analyze select TS from `TEST_QUERY_PRIORITY` where TS >= 1695348001000; From f51d460ad79d836a4de0c3fadad0ef5ba0b92114 Mon Sep 17 00:00:00 2001 From: jiacai2050 Date: Wed, 21 Feb 2024 22:16:42 +0800 Subject: [PATCH 19/25] fix missing column --- src/analytic_engine/src/table/mod.rs | 2 +- src/table_engine/src/provider.rs | 30 +++++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/src/analytic_engine/src/table/mod.rs b/src/analytic_engine/src/table/mod.rs index e6dfd1dc2f..82b74954f4 100644 --- a/src/analytic_engine/src/table/mod.rs +++ b/src/analytic_engine/src/table/mod.rs @@ -429,7 +429,7 @@ pub fn support_pushdown(schema: &Schema, need_dedup: bool, col_names: &[String]) return true; } - // When table need dedup, only unique keys columns support pushdown + // When table need dedup, only non-unique keys columns support pushdown col_names .iter() .all(|col_name| !schema.is_unique_column(col_name.as_str())) diff --git a/src/table_engine/src/provider.rs b/src/table_engine/src/provider.rs index 63e5cc7d22..7f2a00d1b8 100644 --- a/src/table_engine/src/provider.rs +++ b/src/table_engine/src/provider.rs @@ -19,6 +19,7 @@ use std::{ any::Any, + collections::HashSet, fmt, sync::{Arc, Mutex}, time::{Duration, Instant}, @@ -28,6 +29,7 @@ use arrow::datatypes::SchemaRef; use async_trait::async_trait; use common_types::{projected_schema::ProjectedSchema, request_id::RequestId, schema::Schema}; use datafusion::{ + common::tree_node::{TreeNode, VisitRecursion}, config::{ConfigEntry, ConfigExtension, ExtensionOptions}, datasource::TableProvider, error::{DataFusionError, Result}, @@ -230,9 +232,17 @@ impl TableProviderAdapter { priority, ); + let all_projections = if let Some(proj) = projection { + let mut all_projections = + collect_projection_from_expr(filters, &self.current_table_schema); + all_projections.extend(proj); + Some(all_projections.into_iter().collect::>()) + } else { + None + }; let predicate = self.check_and_build_predicate_from_filters(filters); let projected_schema = - ProjectedSchema::new(self.current_table_schema.clone(), projection.cloned()).map_err( + ProjectedSchema::new(self.current_table_schema.clone(), all_projections).map_err( |e| { DataFusionError::Internal(format!( "Invalid projection, plan:{self:?}, projection:{projection:?}, err:{e:?}" @@ -499,3 +509,21 @@ impl fmt::Debug for ScanTable { .finish() } } + +fn collect_projection_from_expr(exprs: &[Expr], schema: &Schema) -> HashSet { + let mut projections = HashSet::new(); + exprs.iter().for_each(|expr| { + _ = expr.apply(&mut |expr| match &expr { + Expr::Column(column) => { + if let Some(idx) = schema.index_of(&column.name) { + projections.insert(idx); + } + Ok(VisitRecursion::Stop) + } + Expr::ScalarVariable(_, _) | Expr::Literal(_) => Ok(VisitRecursion::Stop), + _ => Ok(VisitRecursion::Continue), + }); + }); + + projections +} From 7469ca90ea3e64b786ac019dff74ea8d77fd7155 Mon Sep 17 00:00:00 2001 From: jiacai2050 Date: Wed, 21 Feb 2024 22:31:35 +0800 Subject: [PATCH 20/25] fix pushdown --- src/analytic_engine/src/table/mod.rs | 5 +++-- src/table_engine/src/provider.rs | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/analytic_engine/src/table/mod.rs b/src/analytic_engine/src/table/mod.rs index 82b74954f4..674f6b3bd5 100644 --- a/src/analytic_engine/src/table/mod.rs +++ b/src/analytic_engine/src/table/mod.rs @@ -429,10 +429,11 @@ pub fn support_pushdown(schema: &Schema, need_dedup: bool, col_names: &[String]) return true; } - // When table need dedup, only non-unique keys columns support pushdown + // When table need dedup, only unique keys columns support pushdown + // See https://github.com/apache/incubator-horaedb/issues/605 col_names .iter() - .all(|col_name| !schema.is_unique_column(col_name.as_str())) + .all(|col_name| schema.is_unique_column(col_name.as_str())) } #[async_trait] diff --git a/src/table_engine/src/provider.rs b/src/table_engine/src/provider.rs index 7f2a00d1b8..f00af076c9 100644 --- a/src/table_engine/src/provider.rs +++ b/src/table_engine/src/provider.rs @@ -304,7 +304,7 @@ impl TableProviderAdapter { if support_pushdown { TableProviderFilterPushDown::Exact } else { - TableProviderFilterPushDown::Inexact + TableProviderFilterPushDown::Unsupported } }) .collect() From 792d94b3390b6bfeacf48c45aa2e815bd2ff2de2 Mon Sep 17 00:00:00 2001 From: jiacai2050 Date: Thu, 22 Feb 2024 14:04:18 +0800 Subject: [PATCH 21/25] fix missing columns --- .../cases/common/dml/issue-341.result | 10 +-- .../cases/env/local/ddl/query-plan.result | 18 +++--- src/common_types/src/projected_schema.rs | 6 +- src/table_engine/src/provider.rs | 61 ++++++++++++++----- 4 files changed, 66 insertions(+), 29 deletions(-) diff --git a/integration_tests/cases/common/dml/issue-341.result b/integration_tests/cases/common/dml/issue-341.result index a68d4f5133..c06388b824 100644 --- a/integration_tests/cases/common/dml/issue-341.result +++ b/integration_tests/cases/common/dml/issue-341.result @@ -72,7 +72,7 @@ WHERE plan_type,plan, String("logical_plan"),String("TableScan: issue341_t1 projection=[timestamp, value], full_filters=[issue341_t1.tag1 = Utf8(\"t3\")]"), -String("physical_plan"),String("ScanTable: table=issue341_t1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(1)\n"), +String("physical_plan"),String("ProjectionExec: expr=[timestamp@0 as timestamp, value@1 as value]\n ScanTable: table=issue341_t1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), -- Repeat operations above, but with overwrite table @@ -115,8 +115,8 @@ WHERE `value` = 3; plan_type,plan, -String("logical_plan"),String("TableScan: issue341_t2 projection=[timestamp, value], full_filters=[issue341_t2.value = Float64(3)]"), -String("physical_plan"),String("ScanTable: table=issue341_t2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), +String("logical_plan"),String("Filter: issue341_t2.value = Float64(3)\n TableScan: issue341_t2 projection=[timestamp, value]"), +String("physical_plan"),String("CoalesceBatchesExec: target_batch_size=8192\n FilterExec: value@1 = 3\n ScanTable: table=issue341_t2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), -- When using tag as filter, FilterExec node should not be in plan. @@ -129,8 +129,8 @@ WHERE tag1 = "t3"; plan_type,plan, -String("logical_plan"),String("Projection: issue341_t2.timestamp, issue341_t2.value\n Filter: issue341_t2.tag1 = Utf8(\"t3\")\n TableScan: issue341_t2 projection=[timestamp, value, tag1], partial_filters=[issue341_t2.tag1 = Utf8(\"t3\")]"), -String("physical_plan"),String("ProjectionExec: expr=[timestamp@0 as timestamp, value@1 as value]\n CoalesceBatchesExec: target_batch_size=8192\n FilterExec: tag1@2 = t3\n ScanTable: table=issue341_t2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), +String("logical_plan"),String("TableScan: issue341_t2 projection=[timestamp, value], full_filters=[issue341_t2.tag1 = Utf8(\"t3\")]"), +String("physical_plan"),String("ProjectionExec: expr=[timestamp@0 as timestamp, value@1 as value]\n ScanTable: table=issue341_t2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), DROP TABLE IF EXISTS `issue341_t1`; diff --git a/integration_tests/cases/env/local/ddl/query-plan.result b/integration_tests/cases/env/local/ddl/query-plan.result index f471cc3ced..1f63218401 100644 --- a/integration_tests/cases/env/local/ddl/query-plan.result +++ b/integration_tests/cases/env/local/ddl/query-plan.result @@ -32,7 +32,7 @@ explain analyze select t from `03_dml_select_real_time_range` where t > 1695348001000; plan_type,plan, -String("Plan with Metrics"),String("CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: t@0 > 1695348001000, metrics=xx\n ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=3\n scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"), +String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"), -- This query should have higher priority @@ -42,7 +42,7 @@ explain analyze select t from `03_dml_select_real_time_range` where t >= 1695348001000 and t < 1695348002000; plan_type,plan, -String("Plan with Metrics"),String("CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: t@0 >= 1695348001000 AND t@0 < 1695348002000, metrics=xx\n ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=High, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=3\n scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"), +String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=High, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), t < TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=1\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"), -- This query should not include memtable @@ -52,7 +52,7 @@ explain analyze select t from `03_dml_select_real_time_range` where t > 1695348002000; plan_type,plan, -String("Plan with Metrics"),String("CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: t@0 > 1695348002000, metrics=xx\n ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=0\n=0]\n"), +String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=0\n=0]\n"), -- SQLNESS ARG pre_cmd=flush @@ -64,7 +64,7 @@ explain analyze select t from `03_dml_select_real_time_range` where t > 1695348001000; plan_type,plan, -String("Plan with Metrics"),String("CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: t@0 > 1695348001000, metrics=xx\n ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=1\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=3\n scan_sst_1, fetched_columns:[tsid,t]:\n meta_data_cache_hit=false\n parallelism=1\n project_record_batch=xxs\n read_meta_data_duration=xxs\n row_mem=320\n row_num=3\n prune_row_groups:\n pruned_by_custom_filter=0\n pruned_by_min_max=0\n row_groups_after_prune=1\n total_row_groups=1\n use_custom_filter=false\n=0]\n"), +String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=1\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_sst_1, fetched_columns:[tsid,t]:\n meta_data_cache_hit=false\n parallelism=1\n project_record_batch=xxs\n read_meta_data_duration=xxs\n row_mem=320\n row_num=3\n prune_row_groups:\n pruned_by_custom_filter=0\n pruned_by_min_max=0\n row_groups_after_prune=1\n total_row_groups=1\n use_custom_filter=false\n=0]\n"), -- This query should not include SST @@ -73,7 +73,7 @@ explain analyze select t from `03_dml_select_real_time_range` where t > 1695348002000; plan_type,plan, -String("Plan with Metrics"),String("CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: t@0 > 1695348002000, metrics=xx\n ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=0\n=0]\n"), +String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=0\n=0]\n"), -- Table with an 'append' update mode @@ -106,7 +106,9 @@ affected_rows: 3 explain analyze select t from `03_append_mode_table` where t >= 1695348001000 and name = 'ceresdb'; -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to execute plan. Caused by: Internal error, msg:Failed to execute interpreter, err:Failed to execute select, err:Failed to execute physical plan, msg:failed to collect execution results, err:Stream error, msg:convert from arrow record batch, err:Execution error: Failed to read table, partition:0, err:Failed to scan table, table:03_append_mode_table, err:Failed to build chain iterator, table:03_append_mode_table, err:Fail to build stream from the memtable, err:Failed to generate datafusion physical expr, err:Schema error: No field named name. Valid fields are t.. sql:explain analyze select t from `03_append_mode_table`\nwhere t >= 1695348001000 and name = 'ceresdb';" }) +plan_type,plan, +String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=xx\n ScanTable: table=03_append_mode_table, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), name = Utf8(\"ceresdb\")], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=false\n chain_iter_0:\n num_memtables=1\n num_ssts=0\n scan_duration=xxs\n since_create=xxs\n since_init=xxs\n total_batch_fetched=1\n total_rows_fetched=2\n scan_memtable_1, fetched_columns:[t,name]:\n=0]\n"), + -- Should just fetch projected columns from SST -- SQLNESS ARG pre_cmd=flush @@ -119,7 +121,9 @@ Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to ex explain analyze select t from `03_append_mode_table` where t >= 1695348001000 and name = 'ceresdb'; -Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to execute plan. Caused by: Internal error, msg:Failed to execute interpreter, err:Failed to execute select, err:Failed to execute physical plan, msg:failed to collect execution results, err:Stream error, msg:convert from arrow record batch, err:Execution error: Failed to read table, partition:0, err:Failed to scan table, table:03_append_mode_table, err:Failed to build chain iterator, table:03_append_mode_table, err:Fail to build stream from the sst file, err:Failed to generate datafusion physical expr, err:Schema error: No field named name. Valid fields are t.. sql:explain analyze select t from `03_append_mode_table`\nwhere t >= 1695348001000 and name = 'ceresdb';" }) +plan_type,plan, +String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=xx\n ScanTable: table=03_append_mode_table, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), name = Utf8(\"ceresdb\")], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=false\n chain_iter_0:\n num_memtables=0\n num_ssts=1\n scan_duration=xxs\n since_create=xxs\n since_init=xxs\n total_batch_fetched=1\n total_rows_fetched=2\n scan_sst_1, fetched_columns:[t,name]:\n meta_data_cache_hit=false\n parallelism=1\n project_record_batch=xxs\n read_meta_data_duration=xxs\n row_mem=408\n row_num=3\n prune_row_groups:\n pruned_by_custom_filter=0\n pruned_by_min_max=0\n row_groups_after_prune=1\n total_row_groups=1\n use_custom_filter=false\n=0]\n"), + CREATE TABLE `TEST_QUERY_PRIORITY` ( NAME string TAG, diff --git a/src/common_types/src/projected_schema.rs b/src/common_types/src/projected_schema.rs index 30e9eb01e3..1eff7dc424 100644 --- a/src/common_types/src/projected_schema.rs +++ b/src/common_types/src/projected_schema.rs @@ -105,7 +105,7 @@ pub struct RowProjector { /// For example: /// source columns in sst: 0,1,2,3,4 /// target projection columns: 2,1,3 - /// + /// /// the actual columns in fetched record: 1,2,3 /// relative columns indexes in fetched record: 0,1,2 /// @@ -347,6 +347,10 @@ impl ProjectedSchema { pub fn table_schema(&self) -> &Schema { &self.0.table_schema } + + pub fn target_column_schema(&self, i: usize) -> &ColumnSchema { + self.0.target_record_schema.column(i) + } } impl From for horaedbproto::schema::ProjectedSchema { diff --git a/src/table_engine/src/provider.rs b/src/table_engine/src/provider.rs index f00af076c9..e467a01490 100644 --- a/src/table_engine/src/provider.rs +++ b/src/table_engine/src/provider.rs @@ -29,7 +29,6 @@ use arrow::datatypes::SchemaRef; use async_trait::async_trait; use common_types::{projected_schema::ProjectedSchema, request_id::RequestId, schema::Schema}; use datafusion::{ - common::tree_node::{TreeNode, VisitRecursion}, config::{ConfigEntry, ConfigExtension, ExtensionOptions}, datasource::TableProvider, error::{DataFusionError, Result}, @@ -37,8 +36,9 @@ use datafusion::{ logical_expr::{Expr, TableProviderFilterPushDown, TableSource, TableType}, physical_expr::PhysicalSortExpr, physical_plan::{ + expressions, metrics::{Count, MetricValue, MetricsSet}, - DisplayAs, DisplayFormatType, ExecutionPlan, Metric, Partitioning, + DisplayAs, DisplayFormatType, ExecutionPlan, Metric, Partitioning, PhysicalExpr, SendableRecordBatchStream as DfSendableRecordBatchStream, Statistics, }, }; @@ -232,11 +232,26 @@ impl TableProviderAdapter { priority, ); + let mut need_reprojection = false; let all_projections = if let Some(proj) = projection { - let mut all_projections = + let mut original_projections = proj.clone(); + let projections_from_filter = collect_projection_from_expr(filters, &self.current_table_schema); - all_projections.extend(proj); - Some(all_projections.into_iter().collect::>()) + for proj in projections_from_filter { + if !original_projections.contains(&proj) { + original_projections.push(proj); + // If the projection from filter has columns not in the original projection, + // we need to add a ProjectionExec plan to project the orignal columns. Eg: + // ``` + // select a from table where b > 1 + // ``` + // The original projection only contains a, but the filter has column b, so we + // need to query both a and b column from table but only + // output a column. + need_reprojection = true; + } + } + Some(original_projections) } else { None }; @@ -260,13 +275,32 @@ impl TableProviderAdapter { let request = ReadRequest { request_id, opts, - projected_schema, + projected_schema: projected_schema.clone(), predicate, metrics_collector: MetricsCollector::new(SCAN_TABLE_METRICS_COLLECTOR_NAME.to_string()), priority, }; - self.builder.build(request).await + if need_reprojection { + let original_projection = projection.unwrap(); + let projection = (0..original_projection.len()) + .map(|proj| { + let column = projected_schema.target_column_schema(proj); + + ( + Arc::new(expressions::Column::new(&column.name, proj)) + as Arc, + column.name.clone(), + ) + }) + .collect::>(); + let scan = self.builder.build(request).await?; + let plan = + datafusion::physical_plan::projection::ProjectionExec::try_new(projection, scan)?; + Ok(Arc::new(plan)) + } else { + self.builder.build(request).await + } } fn check_and_build_predicate_from_filters(&self, filters: &[Expr]) -> PredicateRef { @@ -513,16 +547,11 @@ impl fmt::Debug for ScanTable { fn collect_projection_from_expr(exprs: &[Expr], schema: &Schema) -> HashSet { let mut projections = HashSet::new(); exprs.iter().for_each(|expr| { - _ = expr.apply(&mut |expr| match &expr { - Expr::Column(column) => { - if let Some(idx) = schema.index_of(&column.name) { - projections.insert(idx); - } - Ok(VisitRecursion::Stop) + for col_name in visitor::find_columns_by_expr(expr) { + if let Some(idx) = schema.index_of(&col_name) { + projections.insert(idx); } - Expr::ScalarVariable(_, _) | Expr::Literal(_) => Ok(VisitRecursion::Stop), - _ => Ok(VisitRecursion::Continue), - }); + } }); projections From 0e1096aab79269793f164df25ed52912d64c8b08 Mon Sep 17 00:00:00 2001 From: jiacai2050 Date: Thu, 22 Feb 2024 14:30:22 +0800 Subject: [PATCH 22/25] add more comments --- .../env/cluster/ddl/partition_table.result | 4 +- src/table_engine/src/provider.rs | 50 +++++++++++-------- 2 files changed, 30 insertions(+), 24 deletions(-) diff --git a/integration_tests/cases/env/cluster/ddl/partition_table.result b/integration_tests/cases/env/cluster/ddl/partition_table.result index 3f7bb7d8bb..233c348318 100644 --- a/integration_tests/cases/env/cluster/ddl/partition_table.result +++ b/integration_tests/cases/env/cluster/ddl/partition_table.result @@ -85,7 +85,7 @@ UInt64(16367588166920223437),Timestamp(1651737067000),String("horaedb9"),Int32(0 EXPLAIN ANALYZE SELECT * from partition_table_t where name = "ceresdb0"; plan_type,plan, -String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:4, metrics=xx\n CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: name@2 = ceresdb0, metrics=xx\n RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n ScanTable: table=__partition_table_t_0, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: name@2 = ceresdb0, metrics=xx\n RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n ScanTable: table=__partition_table_t_1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: name@2 = ceresdb0, metrics=xx\n RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n ScanTable: table=__partition_table_t_2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: name@2 = ceresdb0, metrics=xx\n RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n ScanTable: table=__partition_table_t_3, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"), +String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:1, metrics=xx\n ScanTable: table=__partition_table_t_1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name = Utf8(\"ceresdb0\")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"), -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx @@ -96,7 +96,7 @@ String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:f EXPLAIN ANALYZE SELECT * from partition_table_t where name in ("ceresdb0", "ceresdb1", "ceresdb2", "ceresdb3", "ceresdb4"); plan_type,plan, -String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:4, metrics=xx\n CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: Use name@2 IN (SET) ([Literal { value: Utf8(\"ceresdb0\") }, Literal { value: Utf8(\"ceresdb1\") }, Literal { value: Utf8(\"ceresdb2\") }, Literal { value: Utf8(\"ceresdb3\") }, Literal { value: Utf8(\"ceresdb4\") }]), metrics=xx\n RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: Use name@2 IN (SET) ([Literal { value: Utf8(\"ceresdb0\") }, Literal { value: Utf8(\"ceresdb1\") }, Literal { value: Utf8(\"ceresdb2\") }, Literal { value: Utf8(\"ceresdb3\") }, Literal { value: Utf8(\"ceresdb4\") }]), metrics=xx\n RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: Use name@2 IN (SET) ([Literal { value: Utf8(\"ceresdb0\") }, Literal { value: Utf8(\"ceresdb1\") }, Literal { value: Utf8(\"ceresdb2\") }, Literal { value: Utf8(\"ceresdb3\") }, Literal { value: Utf8(\"ceresdb4\") }]), metrics=xx\n RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n FilterExec: Use name@2 IN (SET) ([Literal { value: Utf8(\"ceresdb0\") }, Literal { value: Utf8(\"ceresdb1\") }, Literal { value: Utf8(\"ceresdb2\") }, Literal { value: Utf8(\"ceresdb3\") }, Literal { value: Utf8(\"ceresdb4\") }]), metrics=xx\n RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=2\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=1\n total_rows_fetch_from_one=1\n scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"), +String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:3, metrics=xx\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb1\"), Utf8(\"ceresdb2\"), Utf8(\"ceresdb3\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n do_merge_sort=true\n iter_num=1\n merge_iter_0:\n init_duration=xxs\n num_memtables=0\n num_ssts=0\n scan_count=1\n scan_duration=xxs\n times_fetch_row_from_multiple=0\n times_fetch_rows_from_one=0\n total_rows_fetch_from_one=0\n scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"), ALTER TABLE partition_table_t ADD COLUMN (b string); diff --git a/src/table_engine/src/provider.rs b/src/table_engine/src/provider.rs index e467a01490..e9775fbec6 100644 --- a/src/table_engine/src/provider.rs +++ b/src/table_engine/src/provider.rs @@ -38,6 +38,7 @@ use datafusion::{ physical_plan::{ expressions, metrics::{Count, MetricValue, MetricsSet}, + projection::ProjectionExec, DisplayAs, DisplayFormatType, ExecutionPlan, Metric, Partitioning, PhysicalExpr, SendableRecordBatchStream as DfSendableRecordBatchStream, Statistics, }, @@ -240,14 +241,16 @@ impl TableProviderAdapter { for proj in projections_from_filter { if !original_projections.contains(&proj) { original_projections.push(proj); - // If the projection from filter has columns not in the original projection, - // we need to add a ProjectionExec plan to project the orignal columns. Eg: - // ``` + // If the projection from filters have columns not in the original projection, + // we need to add it to projection, and add a ProjectionExec plan to project the + // orignal columns. Eg: + // ```text // select a from table where b > 1 // ``` // The original projection only contains a, but the filter has column b, so we // need to query both a and b column from table but only - // output a column. + // output a column. More details can be found in: + // https://github.com/apache/arrow-datafusion/pull/9131#pullrequestreview-1865020767 need_reprojection = true; } } @@ -265,6 +268,22 @@ impl TableProviderAdapter { }, )?; + let projection_exprs = if need_reprojection { + let original_projection = projection.unwrap(); + let exprs = (0..original_projection.len()) + .map(|i| { + let column = projected_schema.target_column_schema(i); + ( + Arc::new(expressions::Column::new(&column.name, i)) + as Arc, + column.name.clone(), + ) + }) + .collect::>(); + Some(exprs) + } else { + None + }; let opts = ReadOptions { deadline, read_parallelism, @@ -275,31 +294,18 @@ impl TableProviderAdapter { let request = ReadRequest { request_id, opts, - projected_schema: projected_schema.clone(), + projected_schema, predicate, metrics_collector: MetricsCollector::new(SCAN_TABLE_METRICS_COLLECTOR_NAME.to_string()), priority, }; - if need_reprojection { - let original_projection = projection.unwrap(); - let projection = (0..original_projection.len()) - .map(|proj| { - let column = projected_schema.target_column_schema(proj); - - ( - Arc::new(expressions::Column::new(&column.name, proj)) - as Arc, - column.name.clone(), - ) - }) - .collect::>(); - let scan = self.builder.build(request).await?; - let plan = - datafusion::physical_plan::projection::ProjectionExec::try_new(projection, scan)?; + let scan = self.builder.build(request).await?; + if let Some(expr) = projection_exprs { + let plan = ProjectionExec::try_new(expr, scan)?; Ok(Arc::new(plan)) } else { - self.builder.build(request).await + Ok(scan) } } From 43a0f3437e63ba874e61b66e775a0b39d32b6d8b Mon Sep 17 00:00:00 2001 From: jiacai2050 Date: Thu, 22 Feb 2024 14:54:13 +0800 Subject: [PATCH 23/25] remove unwrap --- .../src/instance/reorder_memtable.rs | 5 +---- src/analytic_engine/src/row_iter/merge.rs | 3 --- src/interpreters/src/insert.rs | 4 ++-- .../physical_plan_extension/prom_align.rs | 19 ++++++++----------- 4 files changed, 11 insertions(+), 20 deletions(-) diff --git a/src/analytic_engine/src/instance/reorder_memtable.rs b/src/analytic_engine/src/instance/reorder_memtable.rs index 2f9ac87b8e..c37417bf64 100644 --- a/src/analytic_engine/src/instance/reorder_memtable.rs +++ b/src/analytic_engine/src/instance/reorder_memtable.rs @@ -262,10 +262,7 @@ impl Reorder { pub async fn into_stream(self) -> Result { // 1. Init datafusion context let runtime = Arc::new(RuntimeEnv::default()); - let mut state = SessionState::new_with_config_rt(SessionConfig::new(), runtime); - // The physical optimizer rules have bug, and the plan here is simple, optimize - // is not required, so we disable it here. - state = state.with_physical_optimizer_rules(vec![]); + let state = SessionState::new_with_config_rt(SessionConfig::new(), runtime); let ctx = SessionContext::new_with_state(state); let table_provider = Arc::new(MemIterProvider { arrow_schema: self.schema.to_arrow_schema_ref(), diff --git a/src/analytic_engine/src/row_iter/merge.rs b/src/analytic_engine/src/row_iter/merge.rs index 67a2f4045d..88f58c2df9 100644 --- a/src/analytic_engine/src/row_iter/merge.rs +++ b/src/analytic_engine/src/row_iter/merge.rs @@ -486,7 +486,6 @@ impl BufferedStream { Ok(false) } Some(record_batch) => { - logger::info!("if necessary: {record_batch:?}"); self.state.as_mut().unwrap().reset(record_batch); Ok(true) } @@ -845,7 +844,6 @@ impl MergeIterator { None }; - logger::info!("debug fetch rows:{record_batch:?}"); self.reheap(buffered_stream).await?; @@ -875,7 +873,6 @@ impl MergeIterator { self.record_batch_builder.clear(); - logger::info!("fetch next batch, {}", self.record_batch_builder.len()); while !self.hot.is_empty() && self.record_batch_builder.len() < self.iter_options.batch_size { // no need to do merge sort if only one batch in the hot heap. diff --git a/src/interpreters/src/insert.rs b/src/interpreters/src/insert.rs index c67ff1dfc1..cc455b3fb6 100644 --- a/src/interpreters/src/insert.rs +++ b/src/interpreters/src/insert.rs @@ -373,6 +373,6 @@ fn get_or_extract_column_from_row_groups( cached_column_values.insert(column_idx, columnar_value.clone()); Ok(columnar_value) })?; - // TODO: solve unwarp - Ok(column.into_array(num_rows).unwrap()) + + column.into_array(num_rows).context(DatafusionExecutor) } diff --git a/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs b/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs index 9fe8cc74c7..3b1a0cd9a7 100644 --- a/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs +++ b/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs @@ -37,7 +37,7 @@ use common_types::{ time::{TimeRange, Timestamp}, }; use datafusion::{ - error::{DataFusionError, Result as ArrowResult}, + error::{DataFusionError, Result as DataFusionResult}, execution::context::TaskContext, physical_expr::PhysicalSortExpr, physical_plan::{ @@ -93,15 +93,15 @@ impl PhysicalExpr for ExtractTsidExpr { self } - fn data_type(&self, _input_schema: &ArrowSchema) -> ArrowResult { + fn data_type(&self, _input_schema: &ArrowSchema) -> DataFusionResult { Ok(DataType::UInt64) } - fn nullable(&self, _input_schema: &ArrowSchema) -> ArrowResult { + fn nullable(&self, _input_schema: &ArrowSchema) -> DataFusionResult { Ok(false) } - fn evaluate(&self, batch: &RecordBatch) -> ArrowResult { + fn evaluate(&self, batch: &RecordBatch) -> DataFusionResult { let tsid_idx = batch .schema() .index_of(TSID_COLUMN) @@ -116,7 +116,7 @@ impl PhysicalExpr for ExtractTsidExpr { fn with_new_children( self: Arc, _children: Vec>, - ) -> ArrowResult> { + ) -> DataFusionResult> { Ok(self) } @@ -204,7 +204,7 @@ impl ExecutionPlan for PromAlignExec { fn with_new_children( self: Arc, children: Vec>, - ) -> ArrowResult> { + ) -> DataFusionResult> { match children.len() { 1 => Ok(Arc::new(PromAlignExec { input: children[0].clone(), @@ -222,7 +222,7 @@ impl ExecutionPlan for PromAlignExec { &self, partition: usize, context: Arc, - ) -> ArrowResult { + ) -> DataFusionResult { debug!("PromAlignExec: partition:{}", partition); Ok(Box::pin(PromAlignReader { input: self.input.execute(partition, context)?, @@ -236,10 +236,7 @@ impl ExecutionPlan for PromAlignExec { })) } - fn statistics( - &self, - ) -> std::result::Result - { + fn statistics(&self) -> DataFusionResult { // TODO(chenxiang) Ok(Statistics::new_unknown(&self.schema())) } From 5ebf1f26fadcdbe1ec349575c0d1a5e724c51144 Mon Sep 17 00:00:00 2001 From: jiacai2050 Date: Thu, 22 Feb 2024 15:01:04 +0800 Subject: [PATCH 24/25] fix influxdb tests --- src/proxy/src/influxdb/types.rs | 2 +- src/table_engine/src/provider.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/proxy/src/influxdb/types.rs b/src/proxy/src/influxdb/types.rs index cd2b229d6d..488f5dedfe 100644 --- a/src/proxy/src/influxdb/types.rs +++ b/src/proxy/src/influxdb/types.rs @@ -744,7 +744,7 @@ mod tests { fn test_influxql_result() { let record_schema = build_test_record_schema(); let column_blocks = build_test_column_blocks(); - let record_batch = RecordBatch::new(record_schema, column_blocks, 4).unwrap(); + let record_batch = RecordBatch::new(record_schema, column_blocks, 7).unwrap(); let mut builder = InfluxqlResultBuilder::new(record_batch.schema(), 0).unwrap(); builder.add_record_batch(record_batch).unwrap(); diff --git a/src/table_engine/src/provider.rs b/src/table_engine/src/provider.rs index e9775fbec6..bcca5ba897 100644 --- a/src/table_engine/src/provider.rs +++ b/src/table_engine/src/provider.rs @@ -344,7 +344,7 @@ impl TableProviderAdapter { if support_pushdown { TableProviderFilterPushDown::Exact } else { - TableProviderFilterPushDown::Unsupported + TableProviderFilterPushDown::Inexact } }) .collect() From cd70bfc2394255afdf401a6d885de2d8bd140021 Mon Sep 17 00:00:00 2001 From: jiacai2050 Date: Thu, 22 Feb 2024 17:32:02 +0800 Subject: [PATCH 25/25] fix memtable scan --- .../cases/common/dml/issue-341.result | 2 +- .../cases/common/function/aggregate.result | 43 +++++++++++++++++++ .../cases/common/function/aggregate.sql | 28 ++++++++++++ .../src/memtable/skiplist/iter.rs | 5 +++ src/common_types/src/record_batch.rs | 14 +++++- 5 files changed, 89 insertions(+), 3 deletions(-) diff --git a/integration_tests/cases/common/dml/issue-341.result b/integration_tests/cases/common/dml/issue-341.result index c06388b824..4d7da95cab 100644 --- a/integration_tests/cases/common/dml/issue-341.result +++ b/integration_tests/cases/common/dml/issue-341.result @@ -115,7 +115,7 @@ WHERE `value` = 3; plan_type,plan, -String("logical_plan"),String("Filter: issue341_t2.value = Float64(3)\n TableScan: issue341_t2 projection=[timestamp, value]"), +String("logical_plan"),String("Filter: issue341_t2.value = Float64(3)\n TableScan: issue341_t2 projection=[timestamp, value], partial_filters=[issue341_t2.value = Float64(3)]"), String("physical_plan"),String("CoalesceBatchesExec: target_batch_size=8192\n FilterExec: value@1 = 3\n ScanTable: table=issue341_t2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"), diff --git a/integration_tests/cases/common/function/aggregate.result b/integration_tests/cases/common/function/aggregate.result index 037e503a9f..f45a6841a8 100644 --- a/integration_tests/cases/common/function/aggregate.result +++ b/integration_tests/cases/common/function/aggregate.result @@ -105,7 +105,50 @@ COUNT(DISTINCT 02_function_aggregate_table1.arch), Int64(2), +CREATE TABLE `02_function_aggregate_table2` ( + `timestamp` timestamp NOT NULL, + `arch` string TAG, + `datacenter` string TAG, + `value` int, + `uvalue` uint64, + timestamp KEY (timestamp)) ENGINE=Analytic +WITH( + enable_ttl='false', + update_mode = 'append' +); + +affected_rows: 0 + +INSERT INTO `02_function_aggregate_table2` + (`timestamp`, `arch`, `datacenter`, `value`, `uvalue`) +VALUES + (1658304762, 'x86-64', 'china', 100, 10), + (1658304763, 'x86-64', 'china', 200, 10), + (1658304762, 'arm64', 'china', 110, 0), + (1658304763, 'arm64', 'china', 210, 0); + +affected_rows: 4 + +-- The should select empty column +SELECT count(*) FROM `02_function_aggregate_table1`; + +COUNT(*), +Int64(4), + + +-- Same with before, but query from sst +-- SQLNESS ARG pre_cmd=flush +SELECT count(*) FROM `02_function_aggregate_table1`; + +COUNT(*), +Int64(4), + + DROP TABLE `02_function_aggregate_table1`; affected_rows: 0 +DROP TABLE `02_function_aggregate_table2`; + +affected_rows: 0 + diff --git a/integration_tests/cases/common/function/aggregate.sql b/integration_tests/cases/common/function/aggregate.sql index c4f8dd50ea..8543245ae8 100644 --- a/integration_tests/cases/common/function/aggregate.sql +++ b/integration_tests/cases/common/function/aggregate.sql @@ -57,4 +57,32 @@ SELECT distinct(`arch`) FROM `02_function_aggregate_table1` ORDER BY `arch` DESC SELECT count(distinct(`arch`)) FROM `02_function_aggregate_table1`; +CREATE TABLE `02_function_aggregate_table2` ( + `timestamp` timestamp NOT NULL, + `arch` string TAG, + `datacenter` string TAG, + `value` int, + `uvalue` uint64, + timestamp KEY (timestamp)) ENGINE=Analytic +WITH( + enable_ttl='false', + update_mode = 'append' +); + +INSERT INTO `02_function_aggregate_table2` + (`timestamp`, `arch`, `datacenter`, `value`, `uvalue`) +VALUES + (1658304762, 'x86-64', 'china', 100, 10), + (1658304763, 'x86-64', 'china', 200, 10), + (1658304762, 'arm64', 'china', 110, 0), + (1658304763, 'arm64', 'china', 210, 0); + +-- The should select empty column +SELECT count(*) FROM `02_function_aggregate_table1`; + +-- Same with before, but query from sst +-- SQLNESS ARG pre_cmd=flush +SELECT count(*) FROM `02_function_aggregate_table1`; + DROP TABLE `02_function_aggregate_table1`; +DROP TABLE `02_function_aggregate_table2`; diff --git a/src/analytic_engine/src/memtable/skiplist/iter.rs b/src/analytic_engine/src/memtable/skiplist/iter.rs index 4787b754bd..cce3913dea 100644 --- a/src/analytic_engine/src/memtable/skiplist/iter.rs +++ b/src/analytic_engine/src/memtable/skiplist/iter.rs @@ -154,6 +154,7 @@ impl + Clone + Sync + Send> ColumnarIterImpl { assert!(self.batch_size > 0); let record_schema = self.row_projector.fetched_schema().clone(); + let is_empty_projection = record_schema.columns().is_empty(); let primary_key_indexes = self .row_projector .primary_key_indexes() @@ -183,6 +184,10 @@ impl + Clone + Sync + Send> ColumnarIterImpl { } } + if is_empty_projection { + builder.inc_row_num(num_rows); + } + if num_rows > 0 { if let Some(deadline) = self.deadline { let now = Instant::now(); diff --git a/src/common_types/src/record_batch.rs b/src/common_types/src/record_batch.rs index 2c787be465..0278aa7095 100644 --- a/src/common_types/src/record_batch.rs +++ b/src/common_types/src/record_batch.rs @@ -591,6 +591,7 @@ pub struct FetchedRecordBatchBuilder { fetched_schema: RecordSchema, primary_key_indexes: Option>, builders: Vec, + num_rows: usize, } impl FetchedRecordBatchBuilder { @@ -610,6 +611,7 @@ impl FetchedRecordBatchBuilder { fetched_schema, primary_key_indexes, builders, + num_rows: 0, } } @@ -633,6 +635,7 @@ impl FetchedRecordBatchBuilder { fetched_schema: record_schema, primary_key_indexes, builders, + num_rows: 0, } } @@ -680,6 +683,13 @@ impl FetchedRecordBatchBuilder { Ok(()) } + /// When the record batch contains no column, its row num may not be 0, so + /// we need to inc row num explicitly in this case. + /// See: https://github.com/apache/arrow-datafusion/pull/7920 + pub fn inc_row_num(&mut self, n: usize) { + self.num_rows += n; + } + /// Append `len` from `start` (inclusive) to this builder. /// /// REQUIRE: @@ -711,7 +721,7 @@ impl FetchedRecordBatchBuilder { self.builders .first() .map(|builder| builder.len()) - .unwrap_or(0) + .unwrap_or(self.num_rows) } /// Returns true if the builder is empty. @@ -737,7 +747,7 @@ impl FetchedRecordBatchBuilder { let num_rows = column_blocks .first() .map(|block| block.num_rows()) - .unwrap_or_default(); + .unwrap_or(self.num_rows); let options = RecordBatchOptions::new().with_row_count(Some(num_rows)); Ok(FetchedRecordBatch {