From 6d9166c272d59a0a2ffd80f13bd33d74da0d324f Mon Sep 17 00:00:00 2001
From: tanruixiang <tanruixiang0104@gmail.com>
Date: Thu, 18 Jan 2024 16:31:55 +0800
Subject: [PATCH 01/25] deps: bump datafusion

---
 Cargo.lock                                    | 555 ++++++++++--------
 Cargo.toml                                    |  22 +-
 .../src/instance/reorder_memtable.rs          |   7 +-
 .../src/row_iter/record_batch_stream.rs       |   1 +
 src/common_types/src/datum.rs                 |  14 +-
 src/components/parquet_ext/src/meta_data.rs   |   3 +-
 .../parquet_ext/src/prune/min_max.rs          |  11 +-
 .../src/dist_sql_query/physical_plan.rs       |  18 +-
 src/df_operator/src/scalar.rs                 |   2 +-
 src/df_operator/src/udaf.rs                   |   2 +-
 src/interpreters/src/insert.rs                |   4 +-
 .../physical_optimizer/repartition.rs         |   8 +-
 .../physical_plan_extension/prom_align.rs     |   8 +-
 .../src/datafusion_impl/task_context.rs       |   2 +-
 .../src/logical_optimizer/type_conversion.rs  |   6 +-
 src/query_frontend/src/parser.rs              |  24 +-
 src/query_frontend/src/planner.rs             |   2 +-
 src/query_frontend/src/promql/convert.rs      |  10 +-
 src/query_frontend/src/provider.rs            |   7 +
 src/table_engine/src/predicate.rs             |   5 +-
 src/table_engine/src/provider.rs              |   7 +-
 21 files changed, 421 insertions(+), 297 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 5f23033061..7f9c89a359 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -90,7 +90,7 @@ version = "1.2.6-alpha"
 dependencies = [
  "arc-swap 1.6.0",
  "arena",
- "arrow 43.0.0",
+ "arrow 49.0.0",
  "async-stream",
  "async-trait",
  "atomic_enum",
@@ -120,7 +120,7 @@ dependencies = [
  "parquet_ext",
  "pin-project-lite",
  "prometheus 0.12.0",
- "prost",
+ "prost 0.11.8",
  "rand 0.7.3",
  "remote_engine_client",
  "router",
@@ -245,24 +245,24 @@ dependencies = [
 
 [[package]]
 name = "arrow"
-version = "43.0.0"
+version = "49.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2feeebd77b34b0bc88f224e06d01c27da4733997cc4789a4e056196656cdc59a"
+checksum = "5bc25126d18a012146a888a0298f2c22e1150327bd2765fc76d710a556b2d614"
 dependencies = [
  "ahash 0.8.3",
- "arrow-arith 43.0.0",
- "arrow-array 43.0.0",
- "arrow-buffer 43.0.0",
- "arrow-cast 43.0.0",
- "arrow-csv 43.0.0",
- "arrow-data 43.0.0",
- "arrow-ipc 43.0.0",
- "arrow-json 43.0.0",
- "arrow-ord 43.0.0",
- "arrow-row 43.0.0",
- "arrow-schema 43.0.0",
- "arrow-select 43.0.0",
- "arrow-string 43.0.0",
+ "arrow-arith 49.0.0",
+ "arrow-array 49.0.0",
+ "arrow-buffer 49.0.0",
+ "arrow-cast 49.0.0",
+ "arrow-csv 49.0.0",
+ "arrow-data 49.0.0",
+ "arrow-ipc 49.0.0",
+ "arrow-json 49.0.0",
+ "arrow-ord 49.0.0",
+ "arrow-row 49.0.0",
+ "arrow-schema 49.0.0",
+ "arrow-select 49.0.0",
+ "arrow-string 49.0.0",
 ]
 
 [[package]]
@@ -282,14 +282,14 @@ dependencies = [
 
 [[package]]
 name = "arrow-arith"
-version = "43.0.0"
+version = "49.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7173f5dc49c0ecb5135f52565af33afd3fdc9a12d13bd6f9973e8b96305e4b2e"
+checksum = "34ccd45e217ffa6e53bbb0080990e77113bdd4e91ddb84e97b77649810bcf1a7"
 dependencies = [
- "arrow-array 43.0.0",
- "arrow-buffer 43.0.0",
- "arrow-data 43.0.0",
- "arrow-schema 43.0.0",
+ "arrow-array 49.0.0",
+ "arrow-buffer 49.0.0",
+ "arrow-data 49.0.0",
+ "arrow-schema 49.0.0",
  "chrono",
  "half 2.2.1",
  "num",
@@ -313,14 +313,14 @@ dependencies = [
 
 [[package]]
 name = "arrow-array"
-version = "43.0.0"
+version = "49.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "63d7ea725f7d1f8bb2cffc53ef538557e95fc802e217d5be25122d402e22f3d0"
+checksum = "6bda9acea48b25123c08340f3a8ac361aa0f74469bb36f5ee9acf923fce23e9d"
 dependencies = [
  "ahash 0.8.3",
- "arrow-buffer 43.0.0",
- "arrow-data 43.0.0",
- "arrow-schema 43.0.0",
+ "arrow-buffer 49.0.0",
+ "arrow-data 49.0.0",
+ "arrow-schema 49.0.0",
  "chrono",
  "chrono-tz",
  "half 2.2.1",
@@ -340,10 +340,11 @@ dependencies = [
 
 [[package]]
 name = "arrow-buffer"
-version = "43.0.0"
+version = "49.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bdbe439e077f484e5000b9e1d47b5e4c0d15f2b311a8f5bcc682553d5d67a722"
+checksum = "01a0fc21915b00fc6c2667b069c1b64bdd920982f426079bc4a7cab86822886c"
 dependencies = [
+ "bytes",
  "half 2.2.1",
  "num",
 ]
@@ -366,15 +367,16 @@ dependencies = [
 
 [[package]]
 name = "arrow-cast"
-version = "43.0.0"
+version = "49.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "93913cc14875770aa1eef5e310765e855effa352c094cb1c7c00607d0f37b4e1"
+checksum = "5dc0368ed618d509636c1e3cc20db1281148190a78f43519487b2daf07b63b4a"
 dependencies = [
- "arrow-array 43.0.0",
- "arrow-buffer 43.0.0",
- "arrow-data 43.0.0",
- "arrow-schema 43.0.0",
- "arrow-select 43.0.0",
+ "arrow-array 49.0.0",
+ "arrow-buffer 49.0.0",
+ "arrow-data 49.0.0",
+ "arrow-schema 49.0.0",
+ "arrow-select 49.0.0",
+ "base64 0.21.0",
  "chrono",
  "comfy-table 7.0.1",
  "half 2.2.1",
@@ -403,15 +405,15 @@ dependencies = [
 
 [[package]]
 name = "arrow-csv"
-version = "43.0.0"
+version = "49.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef55b67c55ed877e6fe7b923121c19dae5e31ca70249ea2779a17b58fb0fbd9a"
+checksum = "2e09aa6246a1d6459b3f14baeaa49606cfdbca34435c46320e14054d244987ca"
 dependencies = [
- "arrow-array 43.0.0",
- "arrow-buffer 43.0.0",
- "arrow-cast 43.0.0",
- "arrow-data 43.0.0",
- "arrow-schema 43.0.0",
+ "arrow-array 49.0.0",
+ "arrow-buffer 49.0.0",
+ "arrow-cast 49.0.0",
+ "arrow-data 49.0.0",
+ "arrow-schema 49.0.0",
  "chrono",
  "csv",
  "csv-core",
@@ -434,12 +436,12 @@ dependencies = [
 
 [[package]]
 name = "arrow-data"
-version = "43.0.0"
+version = "49.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d4f4f4a3c54614126a71ab91f6631c9743eb4643d6e9318b74191da9dc6e028b"
+checksum = "907fafe280a3874474678c1858b9ca4cb7fd83fb8034ff5b6d6376205a08c634"
 dependencies = [
- "arrow-buffer 43.0.0",
- "arrow-schema 43.0.0",
+ "arrow-buffer 49.0.0",
+ "arrow-schema 49.0.0",
  "half 2.2.1",
  "num",
 ]
@@ -460,16 +462,17 @@ dependencies = [
 
 [[package]]
 name = "arrow-ipc"
-version = "43.0.0"
+version = "49.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d41a3659f984a524ef1c2981d43747b24d8eec78e2425267fcd0ef34ce71cd18"
+checksum = "79a43d6808411886b8c7d4f6f7dd477029c1e77ffffffb7923555cc6579639cd"
 dependencies = [
- "arrow-array 43.0.0",
- "arrow-buffer 43.0.0",
- "arrow-cast 43.0.0",
- "arrow-data 43.0.0",
- "arrow-schema 43.0.0",
+ "arrow-array 49.0.0",
+ "arrow-buffer 49.0.0",
+ "arrow-cast 49.0.0",
+ "arrow-data 49.0.0",
+ "arrow-schema 49.0.0",
  "flatbuffers",
+ "lz4_flex",
 ]
 
 [[package]]
@@ -494,15 +497,15 @@ dependencies = [
 
 [[package]]
 name = "arrow-json"
-version = "43.0.0"
+version = "49.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "10b95faa95a378f56ef32d84cc0104ea998c39ef7cd1faaa6b4cebf8ea92846d"
+checksum = "d82565c91fd627922ebfe2810ee4e8346841b6f9361b87505a9acea38b614fee"
 dependencies = [
- "arrow-array 43.0.0",
- "arrow-buffer 43.0.0",
- "arrow-cast 43.0.0",
- "arrow-data 43.0.0",
- "arrow-schema 43.0.0",
+ "arrow-array 49.0.0",
+ "arrow-buffer 49.0.0",
+ "arrow-cast 49.0.0",
+ "arrow-data 49.0.0",
+ "arrow-schema 49.0.0",
  "chrono",
  "half 2.2.1",
  "indexmap 2.0.0",
@@ -529,15 +532,15 @@ dependencies = [
 
 [[package]]
 name = "arrow-ord"
-version = "43.0.0"
+version = "49.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c68549a4284d9f8b39586afb8d5ff8158b8f0286353a4844deb1d11cf1ba1f26"
+checksum = "9b23b0e53c0db57c6749997fd343d4c0354c994be7eca67152dd2bdb9a3e1bb4"
 dependencies = [
- "arrow-array 43.0.0",
- "arrow-buffer 43.0.0",
- "arrow-data 43.0.0",
- "arrow-schema 43.0.0",
- "arrow-select 43.0.0",
+ "arrow-array 49.0.0",
+ "arrow-buffer 49.0.0",
+ "arrow-data 49.0.0",
+ "arrow-schema 49.0.0",
+ "arrow-select 49.0.0",
  "half 2.2.1",
  "num",
 ]
@@ -559,15 +562,15 @@ dependencies = [
 
 [[package]]
 name = "arrow-row"
-version = "43.0.0"
+version = "49.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0a75a4a757afc301ce010adadff54d79d66140c4282ed3de565f6ccb716a5cf3"
+checksum = "361249898d2d6d4a6eeb7484be6ac74977e48da12a4dd81a708d620cc558117a"
 dependencies = [
  "ahash 0.8.3",
- "arrow-array 43.0.0",
- "arrow-buffer 43.0.0",
- "arrow-data 43.0.0",
- "arrow-schema 43.0.0",
+ "arrow-array 49.0.0",
+ "arrow-buffer 49.0.0",
+ "arrow-data 49.0.0",
+ "arrow-schema 49.0.0",
  "half 2.2.1",
  "hashbrown 0.14.0",
 ]
@@ -580,9 +583,9 @@ checksum = "bc85923d8d6662cc66ac6602c7d1876872e671002d60993dfdf492a6badeae92"
 
 [[package]]
 name = "arrow-schema"
-version = "43.0.0"
+version = "49.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2bebcb57eef570b15afbcf2d07d813eb476fde9f6dd69c81004d6476c197e87e"
+checksum = "09e28a5e781bf1b0f981333684ad13f5901f4cd2f20589eab7cf1797da8fc167"
 
 [[package]]
 name = "arrow-select"
@@ -599,14 +602,15 @@ dependencies = [
 
 [[package]]
 name = "arrow-select"
-version = "43.0.0"
+version = "49.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6e2943fa433a48921e914417173816af64eef61c0a3d448280e6c40a62df221"
+checksum = "4f6208466590960efc1d2a7172bc4ff18a67d6e25c529381d7f96ddaf0dc4036"
 dependencies = [
- "arrow-array 43.0.0",
- "arrow-buffer 43.0.0",
- "arrow-data 43.0.0",
- "arrow-schema 43.0.0",
+ "ahash 0.8.3",
+ "arrow-array 49.0.0",
+ "arrow-buffer 49.0.0",
+ "arrow-data 49.0.0",
+ "arrow-schema 49.0.0",
  "num",
 ]
 
@@ -627,37 +631,37 @@ dependencies = [
 
 [[package]]
 name = "arrow-string"
-version = "43.0.0"
+version = "49.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bbc92ed638851774f6d7af1ad900b92bc1486746497511868b4298fcbcfa35af"
+checksum = "a4a48149c63c11c9ff571e50ab8f017d2a7cb71037a882b42f6354ed2da9acc7"
 dependencies = [
- "arrow-array 43.0.0",
- "arrow-buffer 43.0.0",
- "arrow-data 43.0.0",
- "arrow-schema 43.0.0",
- "arrow-select 43.0.0",
+ "arrow-array 49.0.0",
+ "arrow-buffer 49.0.0",
+ "arrow-data 49.0.0",
+ "arrow-schema 49.0.0",
+ "arrow-select 49.0.0",
  "num",
  "regex",
- "regex-syntax 0.7.1",
+ "regex-syntax 0.8.2",
 ]
 
 [[package]]
 name = "arrow_ext"
 version = "1.2.6-alpha"
 dependencies = [
- "arrow 43.0.0",
+ "arrow 49.0.0",
  "serde",
  "snafu 0.6.10",
- "zstd",
+ "zstd 0.12.3+zstd.1.5.2",
 ]
 
 [[package]]
 name = "arrow_util"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=a905863#a9058633c03f018607dc1e4f6ca090b82d46a30c"
+source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997"
 dependencies = [
  "ahash 0.8.3",
- "arrow 43.0.0",
+ "arrow 49.0.0",
  "chrono",
  "comfy-table 6.1.4",
  "hashbrown 0.13.2",
@@ -682,8 +686,8 @@ dependencies = [
  "pin-project-lite",
  "tokio",
  "xz2",
- "zstd",
- "zstd-safe",
+ "zstd 0.12.3+zstd.1.5.2",
+ "zstd-safe 6.0.4+zstd.1.5.4",
 ]
 
 [[package]]
@@ -750,9 +754,9 @@ dependencies = [
 
 [[package]]
 name = "async-trait"
-version = "0.1.72"
+version = "0.1.77"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cc6dde6e4ed435a4c1ee4e73592f5ba9da2151af10076cc04858746af9352d09"
+checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -881,7 +885,7 @@ version = "1.2.6-alpha"
 dependencies = [
  "analytic_engine",
  "arena",
- "arrow 43.0.0",
+ "arrow 49.0.0",
  "base64 0.13.1",
  "bytes_ext",
  "clap 3.2.23",
@@ -908,7 +912,7 @@ dependencies = [
  "toml_ext",
  "trace_metric",
  "wal",
- "zstd",
+ "zstd 0.12.3+zstd.1.5.2",
 ]
 
 [[package]]
@@ -1452,7 +1456,7 @@ dependencies = [
  "logger",
  "macros",
  "meta_client",
- "prost",
+ "prost 0.11.8",
  "runtime",
  "serde",
  "serde_json",
@@ -1519,7 +1523,7 @@ dependencies = [
 name = "common_types"
 version = "1.2.6-alpha"
 dependencies = [
- "arrow 43.0.0",
+ "arrow 49.0.0",
  "arrow_ext",
  "bytes_ext",
  "chrono",
@@ -1528,7 +1532,7 @@ dependencies = [
  "horaedbproto 2.0.0",
  "macros",
  "paste 1.0.12",
- "prost",
+ "prost 0.11.8",
  "rand 0.7.3",
  "seahash",
  "serde",
@@ -1565,7 +1569,7 @@ version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c2895653b4d9f1538a83970077cb01dfc77a4810524e51a110944688e916b18e"
 dependencies = [
- "prost",
+ "prost 0.11.8",
  "prost-types",
  "tonic 0.9.2",
  "tracing-core",
@@ -2003,13 +2007,14 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "27.0.0"
-source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=9c3a537e25e5ab3299922864034f67fb2f79805d#9c3a537e25e5ab3299922864034f67fb2f79805d"
+version = "34.0.0"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119"
 dependencies = [
  "ahash 0.8.3",
- "arrow 43.0.0",
- "arrow-array 43.0.0",
- "arrow-schema 43.0.0",
+ "arrow 49.0.0",
+ "arrow-array 49.0.0",
+ "arrow-ipc 49.0.0",
+ "arrow-schema 49.0.0",
  "async-compression",
  "async-trait",
  "bytes",
@@ -2021,24 +2026,22 @@ dependencies = [
  "datafusion-expr",
  "datafusion-optimizer",
  "datafusion-physical-expr",
- "datafusion-row",
+ "datafusion-physical-plan",
  "datafusion-sql",
  "flate2",
  "futures 0.3.28",
  "glob",
+ "half 2.2.1",
  "hashbrown 0.14.0",
  "indexmap 2.0.0",
- "itertools 0.11.0",
- "lazy_static",
+ "itertools 0.12.0",
  "log",
  "num_cpus",
- "object_store 0.6.1",
+ "object_store 0.8.0",
  "parking_lot 0.12.1",
  "parquet",
- "percent-encoding",
  "pin-project-lite",
  "rand 0.8.5",
- "smallvec",
  "sqlparser",
  "tempfile",
  "tokio",
@@ -2046,34 +2049,42 @@ dependencies = [
  "url",
  "uuid",
  "xz2",
- "zstd",
+ "zstd 0.13.0",
 ]
 
 [[package]]
 name = "datafusion-common"
-version = "27.0.0"
-source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=9c3a537e25e5ab3299922864034f67fb2f79805d#9c3a537e25e5ab3299922864034f67fb2f79805d"
+version = "34.0.0"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119"
 dependencies = [
- "arrow 43.0.0",
- "arrow-array 43.0.0",
+ "ahash 0.8.3",
+ "arrow 49.0.0",
+ "arrow-array 49.0.0",
+ "arrow-buffer 49.0.0",
+ "arrow-schema 49.0.0",
  "chrono",
+ "half 2.2.1",
+ "libc",
  "num_cpus",
- "object_store 0.6.1",
+ "object_store 0.8.0",
  "parquet",
  "sqlparser",
 ]
 
 [[package]]
 name = "datafusion-execution"
-version = "27.0.0"
-source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=9c3a537e25e5ab3299922864034f67fb2f79805d#9c3a537e25e5ab3299922864034f67fb2f79805d"
+version = "34.0.0"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119"
 dependencies = [
+ "arrow 49.0.0",
+ "chrono",
  "dashmap 5.4.0",
  "datafusion-common",
  "datafusion-expr",
+ "futures 0.3.28",
  "hashbrown 0.14.0",
  "log",
- "object_store 0.6.1",
+ "object_store 0.8.0",
  "parking_lot 0.12.1",
  "rand 0.8.5",
  "tempfile",
@@ -2082,13 +2093,14 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr"
-version = "27.0.0"
-source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=9c3a537e25e5ab3299922864034f67fb2f79805d#9c3a537e25e5ab3299922864034f67fb2f79805d"
+version = "34.0.0"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119"
 dependencies = [
  "ahash 0.8.3",
- "arrow 43.0.0",
+ "arrow 49.0.0",
+ "arrow-array 49.0.0",
  "datafusion-common",
- "lazy_static",
+ "paste 1.0.12",
  "sqlparser",
  "strum 0.25.0",
  "strum_macros 0.25.1",
@@ -2096,45 +2108,43 @@ dependencies = [
 
 [[package]]
 name = "datafusion-optimizer"
-version = "27.0.0"
-source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=9c3a537e25e5ab3299922864034f67fb2f79805d#9c3a537e25e5ab3299922864034f67fb2f79805d"
+version = "34.0.0"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119"
 dependencies = [
- "arrow 43.0.0",
+ "arrow 49.0.0",
  "async-trait",
  "chrono",
  "datafusion-common",
  "datafusion-expr",
  "datafusion-physical-expr",
  "hashbrown 0.14.0",
- "itertools 0.11.0",
+ "itertools 0.12.0",
  "log",
- "regex-syntax 0.7.1",
+ "regex-syntax 0.8.2",
 ]
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "27.0.0"
-source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=9c3a537e25e5ab3299922864034f67fb2f79805d#9c3a537e25e5ab3299922864034f67fb2f79805d"
+version = "34.0.0"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119"
 dependencies = [
  "ahash 0.8.3",
- "arrow 43.0.0",
- "arrow-array 43.0.0",
- "arrow-buffer 43.0.0",
- "arrow-schema 43.0.0",
+ "arrow 49.0.0",
+ "arrow-array 49.0.0",
+ "arrow-buffer 49.0.0",
+ "arrow-ord 49.0.0",
+ "arrow-schema 49.0.0",
  "base64 0.21.0",
  "blake2",
  "blake3",
  "chrono",
  "datafusion-common",
  "datafusion-expr",
- "datafusion-row",
  "half 2.2.1",
  "hashbrown 0.14.0",
  "hex",
  "indexmap 2.0.0",
- "itertools 0.11.0",
- "lazy_static",
- "libc",
+ "itertools 0.12.0",
  "log",
  "md-5",
  "paste 1.0.12",
@@ -2147,37 +2157,56 @@ dependencies = [
 ]
 
 [[package]]
-name = "datafusion-proto"
-version = "27.0.0"
-source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=9c3a537e25e5ab3299922864034f67fb2f79805d#9c3a537e25e5ab3299922864034f67fb2f79805d"
+name = "datafusion-physical-plan"
+version = "34.0.0"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119"
 dependencies = [
- "arrow 43.0.0",
+ "ahash 0.8.3",
+ "arrow 49.0.0",
+ "arrow-array 49.0.0",
+ "arrow-buffer 49.0.0",
+ "arrow-schema 49.0.0",
+ "async-trait",
  "chrono",
- "datafusion",
  "datafusion-common",
+ "datafusion-execution",
  "datafusion-expr",
- "object_store 0.6.1",
- "prost",
+ "datafusion-physical-expr",
+ "futures 0.3.28",
+ "half 2.2.1",
+ "hashbrown 0.14.0",
+ "indexmap 2.0.0",
+ "itertools 0.12.0",
+ "log",
+ "once_cell",
+ "parking_lot 0.12.1",
+ "pin-project-lite",
+ "rand 0.8.5",
+ "tokio",
+ "uuid",
 ]
 
 [[package]]
-name = "datafusion-row"
-version = "27.0.0"
-source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=9c3a537e25e5ab3299922864034f67fb2f79805d#9c3a537e25e5ab3299922864034f67fb2f79805d"
+name = "datafusion-proto"
+version = "34.0.0"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119"
 dependencies = [
- "arrow 43.0.0",
+ "arrow 49.0.0",
+ "chrono",
+ "datafusion",
  "datafusion-common",
- "paste 1.0.12",
- "rand 0.8.5",
+ "datafusion-expr",
+ "object_store 0.8.0",
+ "prost 0.12.3",
 ]
 
 [[package]]
 name = "datafusion-sql"
-version = "27.0.0"
-source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=9c3a537e25e5ab3299922864034f67fb2f79805d#9c3a537e25e5ab3299922864034f67fb2f79805d"
+version = "34.0.0"
+source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119"
 dependencies = [
- "arrow 43.0.0",
- "arrow-schema 43.0.0",
+ "arrow 49.0.0",
+ "arrow-schema 49.0.0",
  "datafusion-common",
  "datafusion-expr",
  "log",
@@ -2187,7 +2216,7 @@ dependencies = [
 [[package]]
 name = "datafusion_util"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=a905863#a9058633c03f018607dc1e4f6ca090b82d46a30c"
+source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997"
 dependencies = [
  "async-trait",
  "datafusion",
@@ -2305,7 +2334,7 @@ dependencies = [
 name = "df_engine_extensions"
 version = "1.2.6-alpha"
 dependencies = [
- "arrow 43.0.0",
+ "arrow 49.0.0",
  "async-recursion",
  "async-trait",
  "catalog",
@@ -2318,7 +2347,7 @@ dependencies = [
  "insta",
  "lazy_static",
  "prometheus 0.12.0",
- "prost",
+ "prost 0.11.8",
  "runtime",
  "snafu 0.6.10",
  "table_engine",
@@ -2330,7 +2359,7 @@ dependencies = [
 name = "df_operator"
 version = "1.2.6-alpha"
 dependencies = [
- "arrow 43.0.0",
+ "arrow 49.0.0",
  "base64 0.13.1",
  "bincode",
  "chrono",
@@ -2470,7 +2499,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4319dc0fb739a6e84cb8678b8cf50c9bcfa4712ae826b33ecf00cc0850550a58"
 dependencies = [
  "http",
- "prost",
+ "prost 0.11.8",
  "tokio",
  "tokio-stream",
  "tonic 0.8.3",
@@ -2808,12 +2837,12 @@ checksum = "8f5f3913fa0bfe7ee1fd8248b6b9f42a5af4b9d65ec2dd2c3c26132b950ecfc2"
 [[package]]
 name = "generated_types"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=a905863#a9058633c03f018607dc1e4f6ca090b82d46a30c"
+source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997"
 dependencies = [
  "pbjson",
  "pbjson-build",
  "pbjson-types",
- "prost",
+ "prost 0.11.8",
  "prost-build",
  "serde",
  "tonic-build",
@@ -3071,7 +3100,7 @@ dependencies = [
  "thiserror",
  "tokio",
  "tonic 0.8.3",
- "zstd",
+ "zstd 0.12.3+zstd.1.5.2",
 ]
 
 [[package]]
@@ -3095,7 +3124,7 @@ version = "1.0.24"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5907c770ee20818978cf2050341ca2c4c7fb7888423ccb090cbb2fda250dfad7"
 dependencies = [
- "prost",
+ "prost 0.11.8",
  "protoc-bin-vendored",
  "tonic 0.8.3",
  "tonic-build",
@@ -3107,7 +3136,7 @@ name = "horaedbproto"
 version = "2.0.0"
 source = "git+https://github.com/apache/incubator-horaedb-proto.git?rev=19ece8f771fc0b3e8e734072cc3d8040de6c74cb#19ece8f771fc0b3e8e734072cc3d8040de6c74cb"
 dependencies = [
- "prost",
+ "prost 0.11.8",
  "protoc-bin-vendored",
  "tonic 0.8.3",
  "tonic-build",
@@ -3325,7 +3354,7 @@ dependencies = [
 [[package]]
 name = "influxdb_influxql_parser"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=a905863#a9058633c03f018607dc1e4f6ca090b82d46a30c"
+source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997"
 dependencies = [
  "chrono",
  "chrono-tz",
@@ -3367,7 +3396,7 @@ name = "interpreters"
 version = "1.2.6-alpha"
 dependencies = [
  "analytic_engine",
- "arrow 43.0.0",
+ "arrow 49.0.0",
  "async-trait",
  "catalog",
  "catalog_impls",
@@ -3418,9 +3447,9 @@ dependencies = [
 [[package]]
 name = "iox_query"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=a905863#a9058633c03f018607dc1e4f6ca090b82d46a30c"
+source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997"
 dependencies = [
- "arrow 43.0.0",
+ "arrow 49.0.0",
  "arrow_util",
  "async-trait",
  "chrono",
@@ -3442,9 +3471,9 @@ dependencies = [
 [[package]]
 name = "iox_query_influxql"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=a905863#a9058633c03f018607dc1e4f6ca090b82d46a30c"
+source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997"
 dependencies = [
- "arrow 43.0.0",
+ "arrow 49.0.0",
  "chrono",
  "chrono-tz",
  "datafusion",
@@ -3497,6 +3526,15 @@ dependencies = [
  "either",
 ]
 
+[[package]]
+name = "itertools"
+version = "0.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0"
+dependencies = [
+ "either",
+]
+
 [[package]]
 name = "itoa"
 version = "1.0.6"
@@ -3953,7 +3991,7 @@ dependencies = [
  "horaedbproto 2.0.0",
  "logger",
  "macros",
- "prost",
+ "prost 0.11.8",
  "reqwest",
  "serde",
  "serde_json",
@@ -4314,9 +4352,9 @@ dependencies = [
 
 [[package]]
 name = "num"
-version = "0.4.0"
+version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43db66d1170d347f9a065114077f7dccb00c1b9478c89384490a3425279a4606"
+checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af"
 dependencies = [
  "num-bigint",
  "num-complex",
@@ -4456,16 +4494,16 @@ dependencies = [
 
 [[package]]
 name = "object_store"
-version = "0.6.1"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "27c776db4f332b571958444982ff641d2531417a326ca368995073b639205d58"
+checksum = "2524735495ea1268be33d200e1ee97455096a0846295a21548cd2f3541de7050"
 dependencies = [
  "async-trait",
  "bytes",
  "chrono",
  "futures 0.3.28",
  "humantime 2.1.0",
- "itertools 0.10.5",
+ "itertools 0.11.0",
  "parking_lot 0.12.1",
  "percent-encoding",
  "snafu 0.7.4",
@@ -4497,7 +4535,7 @@ dependencies = [
  "partitioned_lock",
  "prometheus 0.12.0",
  "prometheus-static-metric",
- "prost",
+ "prost 0.11.8",
  "rand 0.7.3",
  "runtime",
  "serde",
@@ -4545,13 +4583,13 @@ dependencies = [
  "tokio",
  "tokio-util",
  "uuid",
- "zstd",
+ "zstd 0.12.3+zstd.1.5.2",
 ]
 
 [[package]]
 name = "observability_deps"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=a905863#a9058633c03f018607dc1e4f6ca090b82d46a30c"
+source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997"
 dependencies = [
  "tracing",
 ]
@@ -4675,18 +4713,18 @@ dependencies = [
 
 [[package]]
 name = "parquet"
-version = "43.0.0"
+version = "49.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec7267a9607c3f955d4d0ac41b88a67cecc0d8d009173ad3da390699a6cb3750"
+checksum = "af88740a842787da39b3d69ce5fbf6fce97d20211d3b299fee0a0da6430c74d4"
 dependencies = [
  "ahash 0.8.3",
- "arrow-array 43.0.0",
- "arrow-buffer 43.0.0",
- "arrow-cast 43.0.0",
- "arrow-data 43.0.0",
- "arrow-ipc 43.0.0",
- "arrow-schema 43.0.0",
- "arrow-select 43.0.0",
+ "arrow-array 49.0.0",
+ "arrow-buffer 49.0.0",
+ "arrow-cast 49.0.0",
+ "arrow-data 49.0.0",
+ "arrow-ipc 49.0.0",
+ "arrow-schema 49.0.0",
+ "arrow-select 49.0.0",
  "base64 0.21.0",
  "brotli",
  "bytes",
@@ -4694,24 +4732,24 @@ dependencies = [
  "flate2",
  "futures 0.3.28",
  "hashbrown 0.14.0",
- "lz4",
+ "lz4_flex",
  "num",
  "num-bigint",
- "object_store 0.6.1",
+ "object_store 0.8.0",
  "paste 1.0.12",
  "seq-macro",
  "snap",
  "thrift",
  "tokio",
  "twox-hash",
- "zstd",
+ "zstd 0.13.0",
 ]
 
 [[package]]
 name = "parquet_ext"
 version = "1.2.6-alpha"
 dependencies = [
- "arrow 43.0.0",
+ "arrow 49.0.0",
  "arrow_ext",
  "async-trait",
  "bytes",
@@ -4738,7 +4776,7 @@ name = "partition_table_engine"
 version = "1.2.6-alpha"
 dependencies = [
  "analytic_engine",
- "arrow 43.0.0",
+ "arrow 49.0.0",
  "async-trait",
  "common_types",
  "datafusion",
@@ -4805,7 +4843,7 @@ checksum = "bdbb7b706f2afc610f3853550cdbbf6372fd324824a087806bd4480ea4996e24"
 dependencies = [
  "heck",
  "itertools 0.10.5",
- "prost",
+ "prost 0.11.8",
  "prost-types",
 ]
 
@@ -4819,7 +4857,7 @@ dependencies = [
  "chrono",
  "pbjson",
  "pbjson-build",
- "prost",
+ "prost 0.11.8",
  "prost-build",
  "serde",
 ]
@@ -5179,7 +5217,7 @@ dependencies = [
  "async-trait",
  "bytes",
  "futures 0.3.28",
- "prost",
+ "prost 0.11.8",
  "prost-build",
  "snap",
  "warp",
@@ -5256,7 +5294,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e48e50df39172a3e7eb17e14642445da64996989bc212b583015435d39a58537"
 dependencies = [
  "bytes",
- "prost-derive",
+ "prost-derive 0.11.8",
+]
+
+[[package]]
+name = "prost"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "146c289cda302b98a28d40c8b3b90498d6e526dd24ac2ecea73e4e491685b94a"
+dependencies = [
+ "bytes",
+ "prost-derive 0.12.3",
 ]
 
 [[package]]
@@ -5273,7 +5321,7 @@ dependencies = [
  "multimap",
  "petgraph",
  "prettyplease 0.1.25",
- "prost",
+ "prost 0.11.8",
  "prost-types",
  "regex",
  "syn 1.0.109",
@@ -5294,13 +5342,26 @@ dependencies = [
  "syn 1.0.109",
 ]
 
+[[package]]
+name = "prost-derive"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "efb6c9a1dd1def8e2124d17e83a20af56f1570d6c2d2bd9e266ccb768df3840e"
+dependencies = [
+ "anyhow",
+ "itertools 0.11.0",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.48",
+]
+
 [[package]]
 name = "prost-types"
 version = "0.11.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "379119666929a1afd7a043aa6cf96fa67a6dce9af60c88095a4686dbce4c9c88"
 dependencies = [
- "prost",
+ "prost 0.11.8",
 ]
 
 [[package]]
@@ -5363,7 +5424,7 @@ checksum = "9653c3ed92974e34c5a6e0a510864dab979760481714c172e0a34e437cb98804"
 name = "proxy"
 version = "1.2.6-alpha"
 dependencies = [
- "arrow 43.0.0",
+ "arrow 49.0.0",
  "arrow_ext",
  "async-trait",
  "bytes",
@@ -5391,7 +5452,7 @@ dependencies = [
  "prom-remote-api",
  "prometheus 0.12.0",
  "prometheus-static-metric",
- "prost",
+ "prost 0.11.8",
  "query_engine",
  "query_frontend",
  "router",
@@ -5409,7 +5470,7 @@ dependencies = [
  "tokio-stream",
  "tonic 0.8.3",
  "warp",
- "zstd",
+ "zstd 0.12.3+zstd.1.5.2",
 ]
 
 [[package]]
@@ -5463,7 +5524,7 @@ dependencies = [
 name = "query_engine"
 version = "1.2.6-alpha"
 dependencies = [
- "arrow 43.0.0",
+ "arrow 49.0.0",
  "async-trait",
  "bytes_ext",
  "catalog",
@@ -5478,7 +5539,7 @@ dependencies = [
  "iox_query",
  "logger",
  "macros",
- "prost",
+ "prost 0.11.8",
  "query_frontend",
  "runtime",
  "serde",
@@ -5493,7 +5554,7 @@ dependencies = [
 name = "query_frontend"
 version = "1.2.6-alpha"
 dependencies = [
- "arrow 43.0.0",
+ "arrow 49.0.0",
  "async-trait",
  "catalog",
  "chrono",
@@ -5529,9 +5590,9 @@ dependencies = [
 [[package]]
 name = "query_functions"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=a905863#a9058633c03f018607dc1e4f6ca090b82d46a30c"
+source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997"
 dependencies = [
- "arrow 43.0.0",
+ "arrow 49.0.0",
  "chrono",
  "datafusion",
  "itertools 0.10.5",
@@ -5802,6 +5863,12 @@ version = "0.7.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a5996294f19bd3aae0453a862ad728f60e6600695733dd5df01da90c54363a3c"
 
+[[package]]
+name = "regex-syntax"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
+
 [[package]]
 name = "remote_engine_client"
 version = "1.2.6-alpha"
@@ -6227,9 +6294,9 @@ dependencies = [
 [[package]]
 name = "schema"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=a905863#a9058633c03f018607dc1e4f6ca090b82d46a30c"
+source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997"
 dependencies = [
- "arrow 43.0.0",
+ "arrow 49.0.0",
  "hashbrown 0.13.2",
  "indexmap 1.9.3",
  "itertools 0.10.5",
@@ -6353,7 +6420,7 @@ version = "1.2.6-alpha"
 dependencies = [
  "analytic_engine",
  "arc-swap 1.6.0",
- "arrow 43.0.0",
+ "arrow 49.0.0",
  "arrow_ext",
  "async-trait",
  "bytes_ext",
@@ -6386,7 +6453,7 @@ dependencies = [
  "prom-remote-api",
  "prometheus 0.12.0",
  "prometheus-static-metric",
- "prost",
+ "prost 0.11.8",
  "proxy",
  "query_engine",
  "query_frontend",
@@ -6407,7 +6474,7 @@ dependencies = [
  "tonic 0.8.3",
  "wal",
  "warp",
- "zstd",
+ "zstd 0.12.3+zstd.1.5.2",
 ]
 
 [[package]]
@@ -6717,9 +6784,9 @@ dependencies = [
 
 [[package]]
 name = "sqlparser"
-version = "0.35.0"
+version = "0.41.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca597d77c98894be1f965f2e4e2d2a61575d4998088e655476c73715c54b2b43"
+checksum = "5cc2c25a6c66789625ef164b4c7d2e548d627902280c13710d33da8222169964"
 dependencies = [
  "log",
  "serde",
@@ -6728,13 +6795,13 @@ dependencies = [
 
 [[package]]
 name = "sqlparser_derive"
-version = "0.1.1"
+version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "55fe75cb4a364c7f7ae06c7dbbc8d84bddd85d6cdf9975963c3935bc1991761e"
+checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 1.0.109",
+ "syn 2.0.48",
 ]
 
 [[package]]
@@ -6897,7 +6964,7 @@ dependencies = [
 name = "system_catalog"
 version = "1.2.6-alpha"
 dependencies = [
- "arrow 43.0.0",
+ "arrow 49.0.0",
  "async-trait",
  "bytes_ext",
  "catalog",
@@ -6908,7 +6975,7 @@ dependencies = [
  "horaedbproto 2.0.0",
  "logger",
  "macros",
- "prost",
+ "prost 0.11.8",
  "snafu 0.6.10",
  "table_engine",
  "tokio",
@@ -6927,7 +6994,7 @@ dependencies = [
 name = "table_engine"
 version = "1.2.6-alpha"
 dependencies = [
- "arrow 43.0.0",
+ "arrow 49.0.0",
  "arrow_ext",
  "async-trait",
  "bytes_ext",
@@ -6943,7 +7010,7 @@ dependencies = [
  "lazy_static",
  "logger",
  "macros",
- "prost",
+ "prost 0.11.8",
  "rand 0.7.3",
  "regex",
  "runtime",
@@ -7024,7 +7091,7 @@ dependencies = [
 [[package]]
 name = "test_helpers"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=a905863#a9058633c03f018607dc1e4f6ca090b82d46a30c"
+source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997"
 dependencies = [
  "dotenvy",
  "observability_deps",
@@ -7038,7 +7105,7 @@ dependencies = [
 name = "test_util"
 version = "1.2.6-alpha"
 dependencies = [
- "arrow 43.0.0",
+ "arrow 49.0.0",
  "chrono",
  "common_types",
  "env_logger",
@@ -7375,8 +7442,8 @@ dependencies = [
  "hyper-timeout",
  "percent-encoding",
  "pin-project",
- "prost",
- "prost-derive",
+ "prost 0.11.8",
+ "prost-derive 0.11.8",
  "rustls-pemfile 1.0.2",
  "tokio",
  "tokio-rustls 0.23.4",
@@ -7408,7 +7475,7 @@ dependencies = [
  "hyper-timeout",
  "percent-encoding",
  "pin-project",
- "prost",
+ "prost 0.11.8",
  "tokio",
  "tokio-stream",
  "tower",
@@ -7804,7 +7871,7 @@ dependencies = [
  "macros",
  "message_queue",
  "prometheus 0.12.0",
- "prost",
+ "prost 0.11.8",
  "rand 0.8.5",
  "rocksdb",
  "runtime",
@@ -8433,7 +8500,16 @@ version = "0.12.3+zstd.1.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "76eea132fb024e0e13fd9c2f5d5d595d8a967aa72382ac2f9d39fcc95afd0806"
 dependencies = [
- "zstd-safe",
+ "zstd-safe 6.0.4+zstd.1.5.4",
+]
+
+[[package]]
+name = "zstd"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bffb3309596d527cfcba7dfc6ed6052f1d39dfbd7c867aa2e865e4a449c10110"
+dependencies = [
+ "zstd-safe 7.0.0",
 ]
 
 [[package]]
@@ -8446,6 +8522,15 @@ dependencies = [
  "zstd-sys",
 ]
 
+[[package]]
+name = "zstd-safe"
+version = "7.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43747c7422e2924c11144d5229878b98180ef8b06cca4ab5af37afc8a8d8ea3e"
+dependencies = [
+ "zstd-sys",
+]
+
 [[package]]
 name = "zstd-sys"
 version = "2.0.7+zstd.1.5.4"
diff --git a/Cargo.toml b/Cargo.toml
index d195a121fd..aef95309bd 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -85,8 +85,8 @@ members = [
 
 [workspace.dependencies]
 alloc_tracker = { path = "src/components/alloc_tracker" }
-arrow = { version = "43.0.0", features = ["prettyprint"] }
-arrow_ipc = { version = "43.0.0" }
+arrow = { version = "49.0.0", features = ["prettyprint"] }
+arrow_ipc = { version = "49.0.0" }
 arrow_ext = { path = "src/components/arrow_ext" }
 analytic_engine = { path = "src/analytic_engine" }
 arena = { path = "src/components/arena" }
@@ -107,8 +107,8 @@ cluster = { path = "src/cluster" }
 criterion = "0.5"
 horaedb-client = "1.0.2"
 common_types = { path = "src/common_types" }
-datafusion = { git = "https://github.com/CeresDB/arrow-datafusion.git", rev = "9c3a537e25e5ab3299922864034f67fb2f79805d" }
-datafusion-proto = { git = "https://github.com/CeresDB/arrow-datafusion.git", rev = "9c3a537e25e5ab3299922864034f67fb2f79805d" }
+datafusion = {  git = "https://github.com/apache/arrow-datafusion.git", rev = "a154884545cfdeb1a6c20872b3882a5624cd1119"}
+datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "a154884545cfdeb1a6c20872b3882a5624cd1119" }
 derive_builder = "0.12"
 df_operator = { path = "src/df_operator" }
 df_engine_extensions = { path = "src/df_engine_extensions" }
@@ -121,10 +121,10 @@ hash_ext = { path = "src/components/hash_ext" }
 hex = "0.4.3"
 hyperloglog = { git = "https://github.com/jedisct1/rust-hyperloglog.git", rev = "425487ce910f26636fbde8c4d640b538431aad50" }
 id_allocator = { path = "src/components/id_allocator" }
-influxql-logical-planner = { git = "https://github.com/CeresDB/influxql.git", rev = "a905863", package = "iox_query_influxql" }
-influxql-parser = { git = "https://github.com/CeresDB/influxql.git", rev = "a905863", package = "influxdb_influxql_parser" }
-influxql-query = { git = "https://github.com/CeresDB/influxql.git", rev = "a905863", package = "iox_query" }
-influxql-schema = { git = "https://github.com/CeresDB/influxql.git", rev = "a905863", package = "schema" }
+influxql-logical-planner = { git = "https://github.com/CeresDB/influxql.git", rev = "cafd1c73e375e218b646cef5024cd27c3855f997", package = "iox_query_influxql" }
+influxql-parser = { git = "https://github.com/CeresDB/influxql.git", rev = "cafd1c73e375e218b646cef5024cd27c3855f997", package = "influxdb_influxql_parser" }
+influxql-query = { git = "https://github.com/CeresDB/influxql.git", rev = "cafd1c73e375e218b646cef5024cd27c3855f997", package = "iox_query" }
+influxql-schema = { git = "https://github.com/CeresDB/influxql.git", rev = "cafd1c73e375e218b646cef5024cd27c3855f997", package = "schema" }
 interpreters = { path = "src/interpreters" }
 itertools = "0.10.5"
 lz4_flex = { version = "0.11", default-features = false, features = ["frame"] }
@@ -142,7 +142,7 @@ panic_ext = { path = "src/components/panic_ext" }
 partitioned_lock = { path = "src/components/partitioned_lock" }
 partition_table_engine = { path = "src/partition_table_engine" }
 parquet_ext = { path = "src/components/parquet_ext" }
-parquet = { version = "43.0.0" }
+parquet = { version = "49.0.0" }
 paste = "1.0"
 pin-project-lite = "0.2.8"
 pprof = "0.12.1"
@@ -172,9 +172,9 @@ size_ext = { path = "src/components/size_ext" }
 smallvec = "1.6"
 slog = "2.7"
 spin = "0.9.6"
-sqlparser = { version = "0.35", features = ["serde"] }
-system_catalog = { path = "src/system_catalog" }
 system_statis = { path = "src/components/system_stats" }
+sqlparser = { version = "0.41", features = ["serde"] }
+system_catalog = { path = "src/system_catalog" }
 table_engine = { path = "src/table_engine" }
 table_kv = { path = "src/components/table_kv" }
 tempfile = "3.1.0"
diff --git a/src/analytic_engine/src/instance/reorder_memtable.rs b/src/analytic_engine/src/instance/reorder_memtable.rs
index e6eab4d135..0c7900e52f 100644
--- a/src/analytic_engine/src/instance/reorder_memtable.rs
+++ b/src/analytic_engine/src/instance/reorder_memtable.rs
@@ -147,8 +147,11 @@ impl ExecutionPlan for ScanMemIter {
         }))
     }
 
-    fn statistics(&self) -> Statistics {
-        Statistics::default()
+    fn statistics(
+        &self,
+    ) -> std::result::Result<datafusion::common::Statistics, datafusion::error::DataFusionError>
+    {
+        Ok(Statistics::new_unknown(&self.schema()))
     }
 }
 
diff --git a/src/analytic_engine/src/row_iter/record_batch_stream.rs b/src/analytic_engine/src/row_iter/record_batch_stream.rs
index 2a39c648c0..49c41f2432 100644
--- a/src/analytic_engine/src/row_iter/record_batch_stream.rs
+++ b/src/analytic_engine/src/row_iter/record_batch_stream.rs
@@ -161,6 +161,7 @@ fn filter_record_batch(
     let filter_array = predicate
         .evaluate(record_batch)
         .map(|v| v.into_array(record_batch.num_rows()))
+        .context(FilterExec)?
         .context(FilterExec)?;
     let selected_rows = filter_array
         .as_any()
diff --git a/src/common_types/src/datum.rs b/src/common_types/src/datum.rs
index d152e9600a..4b8b373763 100644
--- a/src/common_types/src/datum.rs
+++ b/src/common_types/src/datum.rs
@@ -294,7 +294,7 @@ impl TryFrom<&SqlDataType> for DatumKind {
             SqlDataType::BigInt(_) => Ok(Self::Int64),
             SqlDataType::Int(_) => Ok(Self::Int32),
             SqlDataType::SmallInt(_) => Ok(Self::Int16),
-            SqlDataType::String => Ok(Self::String),
+            SqlDataType::String(_) => Ok(Self::String),
             SqlDataType::Varbinary(_) => Ok(Self::Varbinary),
             SqlDataType::Date => Ok(Self::Date),
             SqlDataType::Time(_, _) => Ok(Self::Time),
@@ -1453,7 +1453,7 @@ impl Datum {
             ScalarValue::Date32(v) => v.map(Datum::Date),
             ScalarValue::Time64Nanosecond(v) => v.map(Datum::Time),
             ScalarValue::Dictionary(_, literal) => Datum::from_scalar_value(literal),
-            ScalarValue::List(_, _)
+            ScalarValue::List(_)
             | ScalarValue::Date64(_)
             | ScalarValue::Time32Second(_)
             | ScalarValue::Time32Millisecond(_)
@@ -1467,10 +1467,12 @@ impl Datum {
             | ScalarValue::Decimal128(_, _, _)
             | ScalarValue::Null
             | ScalarValue::IntervalMonthDayNano(_)
-            | ScalarValue::Fixedsizelist(_, _, _)
+            | ScalarValue::FixedSizeList(_)
             | ScalarValue::DurationSecond(_)
             | ScalarValue::DurationMillisecond(_)
             | ScalarValue::DurationMicrosecond(_)
+            | ScalarValue::Decimal256(_, _, _)
+            | ScalarValue::LargeList(_)
             | ScalarValue::DurationNanosecond(_) => None,
         }
     }
@@ -1502,7 +1504,7 @@ impl<'a> DatumView<'a> {
                 v.map(|v| DatumView::Timestamp(Timestamp::new(v)))
             }
             ScalarValue::Dictionary(_, literal) => DatumView::from_scalar_value(literal),
-            ScalarValue::List(_, _)
+            ScalarValue::List(_)
             | ScalarValue::Date64(_)
             | ScalarValue::Time32Second(_)
             | ScalarValue::Time32Millisecond(_)
@@ -1516,10 +1518,12 @@ impl<'a> DatumView<'a> {
             | ScalarValue::Decimal128(_, _, _)
             | ScalarValue::Null
             | ScalarValue::IntervalMonthDayNano(_)
-            | ScalarValue::Fixedsizelist(_, _, _)
+            | ScalarValue::FixedSizeList(_)
             | ScalarValue::DurationSecond(_)
             | ScalarValue::DurationMillisecond(_)
             | ScalarValue::DurationMicrosecond(_)
+            | ScalarValue::Decimal256(_, _, _)
+            | ScalarValue::LargeList(_)
             | ScalarValue::DurationNanosecond(_) => None,
         }
     }
diff --git a/src/components/parquet_ext/src/meta_data.rs b/src/components/parquet_ext/src/meta_data.rs
index 00a0bb3a17..ad18a36cb7 100644
--- a/src/components/parquet_ext/src/meta_data.rs
+++ b/src/components/parquet_ext/src/meta_data.rs
@@ -19,9 +19,10 @@ use std::{ops::Range, sync::Arc};
 
 use async_trait::async_trait;
 use bytes::Bytes;
+use datafusion::parquet::arrow::ParquetRecordBatchStreamBuilder;
 use generic_error::GenericResult;
 use parquet::{
-    arrow::{arrow_reader::ArrowReaderOptions, ParquetRecordBatchStreamBuilder},
+    arrow::arrow_reader::ArrowReaderOptions,
     errors::{ParquetError, Result},
     file::{footer, metadata::ParquetMetaData},
 };
diff --git a/src/components/parquet_ext/src/prune/min_max.rs b/src/components/parquet_ext/src/prune/min_max.rs
index 8ea39299ef..5f478936d5 100644
--- a/src/components/parquet_ext/src/prune/min_max.rs
+++ b/src/components/parquet_ext/src/prune/min_max.rs
@@ -196,6 +196,15 @@ impl<'a> PruningStatistics for RowGroupPruningStatistics<'a> {
     fn null_counts(&self, _column: &Column) -> Option<ArrayRef> {
         None
     }
+
+    // TODO: support this.
+    fn contained(
+        &self,
+        column: &Column,
+        values: &std::collections::HashSet<ScalarValue>,
+    ) -> Option<arrow::array::BooleanArray> {
+        None
+    }
 }
 
 #[cfg(test)]
@@ -245,7 +254,7 @@ mod test {
             })
             .collect();
         let schema = SchemaType::group_type_builder("schema")
-            .with_fields(&mut fields)
+            .with_fields(fields)
             .build()
             .unwrap();
 
diff --git a/src/df_engine_extensions/src/dist_sql_query/physical_plan.rs b/src/df_engine_extensions/src/dist_sql_query/physical_plan.rs
index feba491f50..dd430f520d 100644
--- a/src/df_engine_extensions/src/dist_sql_query/physical_plan.rs
+++ b/src/df_engine_extensions/src/dist_sql_query/physical_plan.rs
@@ -129,8 +129,10 @@ impl ExecutionPlan for UnresolvedPartitionedScan {
         ))
     }
 
-    fn statistics(&self) -> Statistics {
-        Statistics::default()
+    fn statistics(
+        &self,
+    ) -> Result<datafusion::common::Statistics, datafusion::error::DataFusionError> {
+        Ok(Statistics::new_unknown(&self.schema()))
     }
 }
 
@@ -367,8 +369,10 @@ impl ExecutionPlan for ResolvedPartitionedScan {
         Ok(Box::pin(record_stream))
     }
 
-    fn statistics(&self) -> Statistics {
-        Statistics::default()
+    fn statistics(
+        &self,
+    ) -> Result<datafusion::common::Statistics, datafusion::error::DataFusionError> {
+        Ok(Statistics::new_unknown(&self.schema()))
     }
 
     fn metrics(&self) -> Option<MetricsSet> {
@@ -578,8 +582,10 @@ impl ExecutionPlan for UnresolvedSubTableScan {
         ))
     }
 
-    fn statistics(&self) -> Statistics {
-        Statistics::default()
+    fn statistics(
+        &self,
+    ) -> Result<datafusion::common::Statistics, datafusion::error::DataFusionError> {
+        Ok(Statistics::new_unknown(&self.schema()))
     }
 }
 
diff --git a/src/df_operator/src/scalar.rs b/src/df_operator/src/scalar.rs
index 1535ebdbd4..e71f29148e 100644
--- a/src/df_operator/src/scalar.rs
+++ b/src/df_operator/src/scalar.rs
@@ -43,7 +43,7 @@ impl ScalarUdf {
 
     #[inline]
     pub fn name(&self) -> &str {
-        &self.df_udf.name
+        &self.df_udf.name()
     }
 
     /// Convert into datafusion's udf
diff --git a/src/df_operator/src/udaf.rs b/src/df_operator/src/udaf.rs
index 448a26c626..b2bb5838cd 100644
--- a/src/df_operator/src/udaf.rs
+++ b/src/df_operator/src/udaf.rs
@@ -50,7 +50,7 @@ impl AggregateUdf {
 
     #[inline]
     pub fn name(&self) -> &str {
-        &self.df_udaf.name
+        &self.df_udaf.name()
     }
 
     #[inline]
diff --git a/src/interpreters/src/insert.rs b/src/interpreters/src/insert.rs
index cac5af0cec..c67ff1dfc1 100644
--- a/src/interpreters/src/insert.rs
+++ b/src/interpreters/src/insert.rs
@@ -373,6 +373,6 @@ fn get_or_extract_column_from_row_groups(
             cached_column_values.insert(column_idx, columnar_value.clone());
             Ok(columnar_value)
         })?;
-
-    Ok(column.into_array(num_rows))
+    // TODO: solve unwarp
+    Ok(column.into_array(num_rows).unwrap())
 }
diff --git a/src/query_engine/src/datafusion_impl/physical_optimizer/repartition.rs b/src/query_engine/src/datafusion_impl/physical_optimizer/repartition.rs
index c963c75fad..24f261cd6d 100644
--- a/src/query_engine/src/datafusion_impl/physical_optimizer/repartition.rs
+++ b/src/query_engine/src/datafusion_impl/physical_optimizer/repartition.rs
@@ -21,7 +21,9 @@ use std::sync::Arc;
 
 use datafusion::{
     config::ConfigOptions,
-    physical_optimizer::{optimizer::PhysicalOptimizerRule, repartition::Repartition},
+    physical_optimizer::{
+        enforce_distribution::EnforceDistribution, optimizer::PhysicalOptimizerRule,
+    },
     physical_plan::ExecutionPlan,
 };
 use logger::debug;
@@ -34,7 +36,7 @@ pub struct RepartitionAdapter {
 
 impl Adapter for RepartitionAdapter {
     fn may_adapt(original_rule: OptimizeRuleRef) -> OptimizeRuleRef {
-        if original_rule.name() == Repartition::new().name() {
+        if original_rule.name() == EnforceDistribution::new().name() {
             Arc::new(Self { original_rule })
         } else {
             original_rule
@@ -67,4 +69,4 @@ impl PhysicalOptimizerRule for RepartitionAdapter {
     fn schema_check(&self) -> bool {
         true
     }
-}
+}
\ No newline at end of file
diff --git a/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs b/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs
index a5a6161c9b..12c94076c9 100644
--- a/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs
+++ b/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs
@@ -236,9 +236,9 @@ impl ExecutionPlan for PromAlignExec {
         }))
     }
 
-    fn statistics(&self) -> Statistics {
+    fn statistics(&self) -> std::result::Result<datafusion::common::Statistics, datafusion::error::DataFusionError> {
         // TODO(chenxiang)
-        Statistics::default()
+        Ok(Statistics::new_unknown(&self.schema()))
     }
 }
 
@@ -514,7 +514,7 @@ impl Stream for PromAlignReader {
                 if !tsid_samples.is_empty() {
                     Poll::Ready(Some(
                         self.samples_to_record_batch(schema, tsid_samples)
-                            .map_err(DataFusionError::ArrowError),
+                            .map_err(|err| DataFusionError::ArrowError(err, None)),
                     ))
                 } else {
                     Poll::Ready(Some(Ok(RecordBatch::new_empty(schema))))
@@ -529,7 +529,7 @@ impl Stream for PromAlignReader {
                     if !tsid_samples.is_empty() {
                         return Poll::Ready(Some(
                             self.samples_to_record_batch(schema, tsid_samples)
-                                .map_err(DataFusionError::ArrowError),
+                            .map_err(|err| DataFusionError::ArrowError(err, None)),
                         ));
                     }
                 }
diff --git a/src/query_engine/src/datafusion_impl/task_context.rs b/src/query_engine/src/datafusion_impl/task_context.rs
index aee9812871..e0cc01ed50 100644
--- a/src/query_engine/src/datafusion_impl/task_context.rs
+++ b/src/query_engine/src/datafusion_impl/task_context.rs
@@ -116,7 +116,7 @@ impl Preprocessor {
         ctx: &Context,
     ) -> Result<Arc<dyn ExecutionPlan>> {
         // Decode to datafusion physical plan.
-        let protobuf = protobuf::PhysicalPlanNode::decode(encoded_plan)
+        let protobuf = protobuf::PhysicalPlanNode::try_decode(encoded_plan)
             .box_err()
             .with_context(|| ExecutorWithCause {
                 msg: Some("failed to decode plan".to_string()),
diff --git a/src/query_frontend/src/logical_optimizer/type_conversion.rs b/src/query_frontend/src/logical_optimizer/type_conversion.rs
index 89f0a14ec0..e8ccd42fc5 100644
--- a/src/query_frontend/src/logical_optimizer/type_conversion.rs
+++ b/src/query_frontend/src/logical_optimizer/type_conversion.rs
@@ -124,6 +124,7 @@ impl AnalyzerRule for TypeConversion {
             LogicalPlan::Subquery(_)
             | LogicalPlan::Statement { .. }
             | LogicalPlan::SubqueryAlias(_)
+            | LogicalPlan::Copy(_)
             | LogicalPlan::Unnest(_)
             | LogicalPlan::EmptyRelation { .. } => Ok(plan.clone()),
         }
@@ -209,9 +210,10 @@ impl<'a> TypeRewriter<'a> {
             }
         }
 
-        let array = value.to_array();
+        let array = value.to_array()?;
         ScalarValue::try_from_array(
-            &compute::cast(&array, data_type).map_err(DataFusionError::ArrowError)?,
+            &compute::cast(&array, data_type)
+                .map_err(|err| DataFusionError::ArrowError(err, None))?,
             // index: Converts a value in `array` at `index` into a ScalarValue
             0,
         )
diff --git a/src/query_frontend/src/parser.rs b/src/query_frontend/src/parser.rs
index e01c4d03bc..cae7256a01 100644
--- a/src/query_frontend/src/parser.rs
+++ b/src/query_frontend/src/parser.rs
@@ -352,11 +352,13 @@ impl<'a> Parser<'a> {
                     is_dictionary = true;
                 }
             }
-            if c.data_type != DataType::String && is_dictionary {
-                return parser_err!(format!(
-                    "Only string column can be dictionary encoded: {:?}",
-                    c.to_string()
-                ));
+            if let DataType::String(_) = c.data_type {
+                if is_dictionary {
+                    return parser_err!(format!(
+                        "Only string column can be dictionary encoded: {:?}",
+                        c.to_string()
+                    ));
+                }
             }
         }
 
@@ -1001,7 +1003,7 @@ mod tests {
         let columns = vec![
             make_column_def("c1", DataType::Timestamp(None, TimezoneInfo::None)),
             make_column_def("c2", DataType::Double),
-            make_column_def("c3", DataType::String),
+            make_column_def("c3", DataType::String(None)),
         ];
 
         let sql = "CREATE TABLE mytbl(c1 timestamp, c2 double, c3 string,) ENGINE = XX";
@@ -1027,7 +1029,7 @@ mod tests {
         let columns = vec![
             make_column_def("c1", DataType::Timestamp(None, TimezoneInfo::None)),
             make_comment_column_def("c2", DataType::Double, "id".to_string()),
-            make_comment_column_def("c3", DataType::String, "name".to_string()),
+            make_comment_column_def("c3", DataType::String(None), "name".to_string()),
         ];
 
         let sql = "CREATE TABLE mytbl(c1 timestamp, c2 double comment 'id', c3 string comment 'name',) ENGINE = XX";
@@ -1053,7 +1055,7 @@ mod tests {
         let columns = vec![
             make_column_def("c1", DataType::Timestamp(None, TimezoneInfo::None)),
             make_column_def("c2", DataType::Timestamp(None, TimezoneInfo::None)),
-            make_column_def("c3", DataType::String),
+            make_column_def("c3", DataType::String(None)),
             make_column_def("c4", DataType::Double),
         ];
 
@@ -1253,7 +1255,7 @@ mod tests {
                 table_name: make_table_name("t"),
                 columns: vec![
                     make_column_def("c1", DataType::Double),
-                    make_column_def("c2", DataType::String),
+                    make_column_def("c2", DataType::String(None)),
                 ],
             });
             expect_parse_ok(sql, expected).unwrap();
@@ -1277,7 +1279,7 @@ mod tests {
                 table_name: make_table_name("t"),
                 columns: vec![
                     make_column_def("c1", DataType::Double),
-                    make_tag_column_def("c2", DataType::String),
+                    make_tag_column_def("c2", DataType::String(None)),
                 ],
             });
             expect_parse_ok(sql, expected).unwrap();
@@ -1287,7 +1289,7 @@ mod tests {
             let sql = "ALTER TABLE t ADD COLUMN c1 string tag";
             let expected = Statement::AlterAddColumn(AlterAddColumn {
                 table_name: make_table_name("t"),
-                columns: vec![make_tag_column_def("c1", DataType::String)],
+                columns: vec![make_tag_column_def("c1", DataType::String(None))],
             });
             expect_parse_ok(sql, expected).unwrap();
         }
diff --git a/src/query_frontend/src/planner.rs b/src/query_frontend/src/planner.rs
index 8e02f5ee9e..e5c8a583ab 100644
--- a/src/query_frontend/src/planner.rs
+++ b/src/query_frontend/src/planner.rs
@@ -984,7 +984,7 @@ impl<'a, P: MetaProvider> PlannerDelegate<'a, P> {
                     }
                 }
 
-                let rows = build_row_group(schema, source, column_index_in_insert)?;
+                let rows = build_row_group(schema, source.unwrap(), column_index_in_insert)?;
 
                 Ok(Plan::Insert(InsertPlan {
                     table,
diff --git a/src/query_frontend/src/promql/convert.rs b/src/query_frontend/src/promql/convert.rs
index 297e71612c..6ff90d5bba 100644
--- a/src/query_frontend/src/promql/convert.rs
+++ b/src/query_frontend/src/promql/convert.rs
@@ -24,7 +24,7 @@ use common_types::{
 use datafusion::{
     logical_expr::{
         avg, count,
-        expr::{Alias, ScalarUDF},
+        expr::{Alias, ScalarFunction},
         lit,
         logical_plan::{Extension, LogicalPlan, LogicalPlanBuilder},
         max, min, sum, Expr as DataFusionExpr,
@@ -316,11 +316,10 @@ impl Expr {
                         // TSID is lost after aggregate, but PromAlignNode need a unique id, so
                         // mock UUID as tsid based on groupby keys
                         DataFusionExpr::Alias(Alias {
-                            expr: Box::new(DataFusionExpr::ScalarUDF(ScalarUDF {
-                                fun: Arc::new(create_unique_id(tag_exprs.len())),
-                                args: tag_exprs.clone(),
-                            })),
+                            expr: Box::new(DataFusionExpr::ScalarFunction(
+                                ScalarFunction::new_udf(Arc::new(create_unique_id(tag_exprs.len())), tag_exprs.clone()))),
                             name: TSID_COLUMN.to_string(),
+                            relation: None,
                         });
                     let mut projection = tag_exprs.clone();
                     projection.extend(vec![
@@ -371,6 +370,7 @@ impl Expr {
         Ok(DataFusionExpr::Alias(Alias {
             expr: Box::new(expr),
             name: alias,
+            relation: None,
         }))
     }
 }
diff --git a/src/query_frontend/src/provider.rs b/src/query_frontend/src/provider.rs
index 4380829fef..67750fcb0e 100644
--- a/src/query_frontend/src/provider.rs
+++ b/src/query_frontend/src/provider.rs
@@ -413,6 +413,13 @@ impl<'a, P: MetaProvider> ContextProvider for ContextProviderAdapter<'a, P> {
     fn get_window_meta(&self, _name: &str) -> Option<Arc<datafusion::logical_expr::WindowUDF>> {
         None
     }
+
+    fn get_table_source(
+        &self,
+        name: TableReference,
+    ) -> datafusion::error::Result<Arc<dyn TableSource>> {
+        self.get_table_provider(name)
+    }
 }
 
 struct SchemaProviderAdapter {
diff --git a/src/table_engine/src/predicate.rs b/src/table_engine/src/predicate.rs
index 723724f35e..e71180a0e0 100644
--- a/src/table_engine/src/predicate.rs
+++ b/src/table_engine/src/predicate.rs
@@ -329,6 +329,8 @@ impl<'a> TimeRangeExtractor<'a> {
             | Operator::BitwiseAnd
             | Operator::BitwiseOr
             | Operator::BitwiseXor
+            | Operator::AtArrow
+            | Operator::ArrowAt
             | Operator::BitwiseShiftRight
             | Operator::BitwiseShiftLeft
             | Operator::StringConcat => TimeRange::min_to_max(),
@@ -432,15 +434,12 @@ impl<'a> TimeRangeExtractor<'a> {
             | Expr::TryCast { .. }
             | Expr::Sort { .. }
             | Expr::ScalarFunction { .. }
-            | Expr::ScalarUDF { .. }
             | Expr::AggregateFunction { .. }
             | Expr::WindowFunction { .. }
-            | Expr::AggregateUDF { .. }
             | Expr::Wildcard { .. }
             | Expr::Exists { .. }
             | Expr::InSubquery { .. }
             | Expr::ScalarSubquery(_)
-            | Expr::QualifiedWildcard { .. }
             | Expr::GroupingSet(_)
             | Expr::GetIndexedField { .. }
             | Expr::OuterReferenceColumn { .. }
diff --git a/src/table_engine/src/provider.rs b/src/table_engine/src/provider.rs
index d5e4c69f18..49f76460e2 100644
--- a/src/table_engine/src/provider.rs
+++ b/src/table_engine/src/provider.rs
@@ -467,9 +467,12 @@ impl ExecutionPlan for ScanTable {
         Some(metric_set)
     }
 
-    fn statistics(&self) -> Statistics {
+    fn statistics(
+        &self,
+    ) -> std::result::Result<datafusion::common::Statistics, datafusion::error::DataFusionError>
+    {
         // TODO(yingwen): Implement this
-        Statistics::default()
+        Ok(Statistics::new_unknown(&self.schema()))
     }
 }
 

From 3a447ac357a45c8046f769da749dcc13f8ec537c Mon Sep 17 00:00:00 2001
From: tanruixiang <tanruixiang0104@gmail.com>
Date: Thu, 18 Jan 2024 16:48:24 +0800
Subject: [PATCH 02/25] fix

---
 Cargo.toml                                                 | 2 +-
 src/components/parquet_ext/src/prune/min_max.rs            | 6 +++---
 src/df_operator/src/scalar.rs                              | 2 +-
 src/df_operator/src/udaf.rs                                | 2 +-
 .../src/datafusion_impl/physical_optimizer/repartition.rs  | 2 +-
 .../datafusion_impl/physical_plan_extension/prom_align.rs  | 7 +++++--
 6 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index aef95309bd..16fcb93faf 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -107,7 +107,7 @@ cluster = { path = "src/cluster" }
 criterion = "0.5"
 horaedb-client = "1.0.2"
 common_types = { path = "src/common_types" }
-datafusion = {  git = "https://github.com/apache/arrow-datafusion.git", rev = "a154884545cfdeb1a6c20872b3882a5624cd1119"}
+datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "a154884545cfdeb1a6c20872b3882a5624cd1119" }
 datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "a154884545cfdeb1a6c20872b3882a5624cd1119" }
 derive_builder = "0.12"
 df_operator = { path = "src/df_operator" }
diff --git a/src/components/parquet_ext/src/prune/min_max.rs b/src/components/parquet_ext/src/prune/min_max.rs
index 5f478936d5..6bd3ad7496 100644
--- a/src/components/parquet_ext/src/prune/min_max.rs
+++ b/src/components/parquet_ext/src/prune/min_max.rs
@@ -200,8 +200,8 @@ impl<'a> PruningStatistics for RowGroupPruningStatistics<'a> {
     // TODO: support this.
     fn contained(
         &self,
-        column: &Column,
-        values: &std::collections::HashSet<ScalarValue>,
+        _column: &Column,
+        _values: &std::collections::HashSet<ScalarValue>,
     ) -> Option<arrow::array::BooleanArray> {
         None
     }
@@ -239,7 +239,7 @@ mod test {
     }
 
     fn prepare_parquet_schema_descr(schema: &ArrowSchema) -> SchemaDescPtr {
-        let mut fields = schema
+        let fields = schema
             .fields()
             .iter()
             .map(|field| {
diff --git a/src/df_operator/src/scalar.rs b/src/df_operator/src/scalar.rs
index e71f29148e..4ae3372cfd 100644
--- a/src/df_operator/src/scalar.rs
+++ b/src/df_operator/src/scalar.rs
@@ -43,7 +43,7 @@ impl ScalarUdf {
 
     #[inline]
     pub fn name(&self) -> &str {
-        &self.df_udf.name()
+        self.df_udf.name()
     }
 
     /// Convert into datafusion's udf
diff --git a/src/df_operator/src/udaf.rs b/src/df_operator/src/udaf.rs
index b2bb5838cd..312990b252 100644
--- a/src/df_operator/src/udaf.rs
+++ b/src/df_operator/src/udaf.rs
@@ -50,7 +50,7 @@ impl AggregateUdf {
 
     #[inline]
     pub fn name(&self) -> &str {
-        &self.df_udaf.name()
+        self.df_udaf.name()
     }
 
     #[inline]
diff --git a/src/query_engine/src/datafusion_impl/physical_optimizer/repartition.rs b/src/query_engine/src/datafusion_impl/physical_optimizer/repartition.rs
index 24f261cd6d..d1406a75b9 100644
--- a/src/query_engine/src/datafusion_impl/physical_optimizer/repartition.rs
+++ b/src/query_engine/src/datafusion_impl/physical_optimizer/repartition.rs
@@ -69,4 +69,4 @@ impl PhysicalOptimizerRule for RepartitionAdapter {
     fn schema_check(&self) -> bool {
         true
     }
-}
\ No newline at end of file
+}
diff --git a/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs b/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs
index 12c94076c9..c1dcb27bf2 100644
--- a/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs
+++ b/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs
@@ -236,7 +236,10 @@ impl ExecutionPlan for PromAlignExec {
         }))
     }
 
-    fn statistics(&self) -> std::result::Result<datafusion::common::Statistics, datafusion::error::DataFusionError> {
+    fn statistics(
+        &self,
+    ) -> std::result::Result<datafusion::common::Statistics, datafusion::error::DataFusionError>
+    {
         // TODO(chenxiang)
         Ok(Statistics::new_unknown(&self.schema()))
     }
@@ -529,7 +532,7 @@ impl Stream for PromAlignReader {
                     if !tsid_samples.is_empty() {
                         return Poll::Ready(Some(
                             self.samples_to_record_batch(schema, tsid_samples)
-                            .map_err(|err| DataFusionError::ArrowError(err, None)),
+                                .map_err(|err| DataFusionError::ArrowError(err, None)),
                         ));
                     }
                 }

From 24f4d5b68503761967493b8e7bf29c54209a9930 Mon Sep 17 00:00:00 2001
From: tanruixiang <tanruixiang0104@gmail.com>
Date: Thu, 18 Jan 2024 17:07:09 +0800
Subject: [PATCH 03/25] fix

---
 src/df_engine_extensions/src/dist_sql_query/test_util.rs | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/df_engine_extensions/src/dist_sql_query/test_util.rs b/src/df_engine_extensions/src/dist_sql_query/test_util.rs
index 1f4e788fef..ffa988812c 100644
--- a/src/df_engine_extensions/src/dist_sql_query/test_util.rs
+++ b/src/df_engine_extensions/src/dist_sql_query/test_util.rs
@@ -263,7 +263,6 @@ impl TestContext {
                 self.group_by.clone(),
                 self.aggr_exprs.clone(),
                 vec![None],
-                vec![None],
                 input,
                 input_schema.clone(),
             )
@@ -289,7 +288,6 @@ impl TestContext {
                 final_group_by,
                 self.aggr_exprs.clone(),
                 vec![None],
-                vec![None],
                 merge,
                 input_schema,
             )
@@ -490,8 +488,8 @@ impl ExecutionPlan for MockScan {
         unimplemented!()
     }
 
-    fn statistics(&self) -> datafusion::physical_plan::Statistics {
-        unimplemented!()
+    fn statistics(&self) -> DfResult<datafusion::physical_plan::Statistics> {
+        Ok(datafusion::physical_plan::Statistics::new_unknown(&self.schema()))
     }
 }
 

From 88dc1f2b1be4a58c6701f96acb7c3daad1b168ff Mon Sep 17 00:00:00 2001
From: tanruixiang <tanruixiang0104@gmail.com>
Date: Thu, 18 Jan 2024 17:09:25 +0800
Subject: [PATCH 04/25] fix

---
 src/df_engine_extensions/src/dist_sql_query/test_util.rs | 4 +++-
 src/query_engine/src/datafusion_impl/task_context.rs     | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/df_engine_extensions/src/dist_sql_query/test_util.rs b/src/df_engine_extensions/src/dist_sql_query/test_util.rs
index ffa988812c..813f142b96 100644
--- a/src/df_engine_extensions/src/dist_sql_query/test_util.rs
+++ b/src/df_engine_extensions/src/dist_sql_query/test_util.rs
@@ -489,7 +489,9 @@ impl ExecutionPlan for MockScan {
     }
 
     fn statistics(&self) -> DfResult<datafusion::physical_plan::Statistics> {
-        Ok(datafusion::physical_plan::Statistics::new_unknown(&self.schema()))
+        Ok(datafusion::physical_plan::Statistics::new_unknown(
+            &self.schema(),
+        ))
     }
 }
 
diff --git a/src/query_engine/src/datafusion_impl/task_context.rs b/src/query_engine/src/datafusion_impl/task_context.rs
index e0cc01ed50..f5875a1331 100644
--- a/src/query_engine/src/datafusion_impl/task_context.rs
+++ b/src/query_engine/src/datafusion_impl/task_context.rs
@@ -40,7 +40,7 @@ use df_engine_extensions::dist_sql_query::{
 };
 use futures::future::BoxFuture;
 use generic_error::BoxError;
-use prost::Message;
+
 use runtime::Priority;
 use snafu::ResultExt;
 use table_engine::{

From 81adc4a43145f1d7dbcf8e5f2e235eb90ae5c900 Mon Sep 17 00:00:00 2001
From: tanruixiang <tanruixiang0104@gmail.com>
Date: Fri, 26 Jan 2024 16:45:04 +0800
Subject: [PATCH 05/25] fix warning

---
 src/analytic_engine/src/instance/reorder_memtable.rs    | 4 ++--
 src/analytic_engine/src/row_iter/record_batch_stream.rs | 2 +-
 src/analytic_engine/src/sst/parquet/async_reader.rs     | 2 +-
 src/query_engine/src/datafusion_impl/mod.rs             | 4 ++--
 src/query_engine/src/datafusion_impl/task_context.rs    | 1 -
 src/query_frontend/src/logical_optimizer/mod.rs         | 3 ++-
 src/query_frontend/src/promql/convert.rs                | 2 +-
 src/query_frontend/src/promql/remote.rs                 | 2 +-
 8 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/src/analytic_engine/src/instance/reorder_memtable.rs b/src/analytic_engine/src/instance/reorder_memtable.rs
index 0c7900e52f..c37417bf64 100644
--- a/src/analytic_engine/src/instance/reorder_memtable.rs
+++ b/src/analytic_engine/src/instance/reorder_memtable.rs
@@ -262,8 +262,8 @@ impl Reorder {
     pub async fn into_stream(self) -> Result<SendableFetchingRecordBatchStream> {
         // 1. Init datafusion context
         let runtime = Arc::new(RuntimeEnv::default());
-        let state = SessionState::with_config_rt(SessionConfig::new(), runtime);
-        let ctx = SessionContext::with_state(state);
+        let state = SessionState::new_with_config_rt(SessionConfig::new(), runtime);
+        let ctx = SessionContext::new_with_state(state);
         let table_provider = Arc::new(MemIterProvider {
             arrow_schema: self.schema.to_arrow_schema_ref(),
             iter: Mutex::new(Some(self.iter)),
diff --git a/src/analytic_engine/src/row_iter/record_batch_stream.rs b/src/analytic_engine/src/row_iter/record_batch_stream.rs
index 49c41f2432..5740d73715 100644
--- a/src/analytic_engine/src/row_iter/record_batch_stream.rs
+++ b/src/analytic_engine/src/row_iter/record_batch_stream.rs
@@ -32,7 +32,7 @@ use common_types::{
 use datafusion::{
     common::ToDFSchema,
     error::DataFusionError,
-    optimizer::utils::conjunction,
+    logical_expr::utils::conjunction,
     physical_expr::{self, execution_props::ExecutionProps},
     physical_plan::PhysicalExpr,
 };
diff --git a/src/analytic_engine/src/sst/parquet/async_reader.rs b/src/analytic_engine/src/sst/parquet/async_reader.rs
index 94feeab2c5..49747b5376 100644
--- a/src/analytic_engine/src/sst/parquet/async_reader.rs
+++ b/src/analytic_engine/src/sst/parquet/async_reader.rs
@@ -219,7 +219,7 @@ impl<'a> Reader<'a> {
     ) -> Result<Option<RowSelection>> {
         // TODO: remove fixed partition
         let partition = 0;
-        let exprs = datafusion::optimizer::utils::conjunction(self.predicate.exprs().to_vec());
+        let exprs = datafusion::logical_expr::utils::conjunction(self.predicate.exprs().to_vec());
         let exprs = match exprs {
             Some(exprs) => exprs,
             None => return Ok(None),
diff --git a/src/query_engine/src/datafusion_impl/mod.rs b/src/query_engine/src/datafusion_impl/mod.rs
index 48e42c211b..482628f836 100644
--- a/src/query_engine/src/datafusion_impl/mod.rs
+++ b/src/query_engine/src/datafusion_impl/mod.rs
@@ -137,7 +137,7 @@ impl DfContextBuilder {
 
         // Using default logcial optimizer, if want to add more custom rule, using
         // `add_optimizer_rule` to add.
-        let state = SessionState::with_config_rt(df_session_config, self.runtime_env.clone());
-        SessionContext::with_state(state)
+        let state = SessionState::new_with_config_rt(df_session_config, self.runtime_env.clone());
+        SessionContext::new_with_state(state)
     }
 }
diff --git a/src/query_engine/src/datafusion_impl/task_context.rs b/src/query_engine/src/datafusion_impl/task_context.rs
index f5875a1331..d1ea667de9 100644
--- a/src/query_engine/src/datafusion_impl/task_context.rs
+++ b/src/query_engine/src/datafusion_impl/task_context.rs
@@ -40,7 +40,6 @@ use df_engine_extensions::dist_sql_query::{
 };
 use futures::future::BoxFuture;
 use generic_error::BoxError;
-
 use runtime::Priority;
 use snafu::ResultExt;
 use table_engine::{
diff --git a/src/query_frontend/src/logical_optimizer/mod.rs b/src/query_frontend/src/logical_optimizer/mod.rs
index 4d62e87750..8f2bf42a2c 100644
--- a/src/query_frontend/src/logical_optimizer/mod.rs
+++ b/src/query_frontend/src/logical_optimizer/mod.rs
@@ -30,7 +30,8 @@ use datafusion::{
 use type_conversion::TypeConversion;
 
 pub fn optimize_plan(plan: &LogicalPlan) -> Result<LogicalPlan> {
-    let state = SessionState::with_config_rt(SessionConfig::new(), Arc::new(RuntimeEnv::default()));
+    let state =
+        SessionState::new_with_config_rt(SessionConfig::new(), Arc::new(RuntimeEnv::default()));
     let state = register_analyzer_rules(state);
     // Register iox optimizers, used by influxql.
     let state = influxql_query::logical_optimizer::register_iox_logical_optimizers(state);
diff --git a/src/query_frontend/src/promql/convert.rs b/src/query_frontend/src/promql/convert.rs
index 6ff90d5bba..f364a0b101 100644
--- a/src/query_frontend/src/promql/convert.rs
+++ b/src/query_frontend/src/promql/convert.rs
@@ -578,7 +578,7 @@ impl Selector {
             .context(TableNotFound { name: &table })?;
 
         let table_provider = meta_provider
-            .get_table_provider(table_ref.table.name().into())
+            .get_table_source(table_ref.table.name().into())
             .context(TableProviderNotFound { name: &table })?;
         let schema = Schema::try_from(table_provider.schema()).context(BuildTableSchema)?;
         let timestamp_column_name = schema.timestamp_name().to_string();
diff --git a/src/query_frontend/src/promql/remote.rs b/src/query_frontend/src/promql/remote.rs
index c687b51d0f..c3c1439ec7 100644
--- a/src/query_frontend/src/promql/remote.rs
+++ b/src/query_frontend/src/promql/remote.rs
@@ -64,7 +64,7 @@ pub fn remote_query_to_plan<P: MetaProvider>(
     let (metric, field, mut filters) = normalize_matchers(query.matchers)?;
 
     let table_provider = meta_provider
-        .get_table_provider(TableReference::bare(&metric))
+        .get_table_source(TableReference::bare(&metric))
         .context(TableProviderNotFound { name: &metric })?;
     let schema = Schema::try_from(table_provider.schema()).context(BuildTableSchema)?;
     let timestamp_col_name = schema.timestamp_name();

From b34a9b6fecefe3396f351855c19ec58b48f2e948 Mon Sep 17 00:00:00 2001
From: tanruixiang <tanruixiang0104@gmail.com>
Date: Fri, 26 Jan 2024 16:49:36 +0800
Subject: [PATCH 06/25] fix

---
 src/query_frontend/src/promql/convert.rs | 5 +++--
 src/query_frontend/src/promql/remote.rs  | 3 +--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/query_frontend/src/promql/convert.rs b/src/query_frontend/src/promql/convert.rs
index f364a0b101..e92e0b9922 100644
--- a/src/query_frontend/src/promql/convert.rs
+++ b/src/query_frontend/src/promql/convert.rs
@@ -27,9 +27,10 @@ use datafusion::{
         expr::{Alias, ScalarFunction},
         lit,
         logical_plan::{Extension, LogicalPlan, LogicalPlanBuilder},
-        max, min, sum, Expr as DataFusionExpr,
+        max, min, sum,
+        utils::conjunction,
+        Expr as DataFusionExpr,
     },
-    optimizer::utils::conjunction,
     prelude::ident,
     sql::planner::ContextProvider,
 };
diff --git a/src/query_frontend/src/promql/remote.rs b/src/query_frontend/src/promql/remote.rs
index c3c1439ec7..e8fc99e8be 100644
--- a/src/query_frontend/src/promql/remote.rs
+++ b/src/query_frontend/src/promql/remote.rs
@@ -21,8 +21,7 @@ use std::sync::Arc;
 
 use common_types::{schema::Schema, time::TimeRange};
 use datafusion::{
-    logical_expr::{LogicalPlanBuilder, Operator},
-    optimizer::utils::conjunction,
+    logical_expr::{utils::conjunction, LogicalPlanBuilder, Operator},
     prelude::{ident, lit, Expr},
     sql::{planner::ContextProvider, TableReference},
 };

From 5f048e50d86d6c341b75cc36e9e56bc7af765a4c Mon Sep 17 00:00:00 2001
From: jiacai2050 <dev@liujiacai.net>
Date: Fri, 26 Jan 2024 16:55:12 +0800
Subject: [PATCH 07/25] fix: remove bad optimize rule

---
 src/query_engine/src/datafusion_impl/mod.rs | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/query_engine/src/datafusion_impl/mod.rs b/src/query_engine/src/datafusion_impl/mod.rs
index 482628f836..09e00ac0a5 100644
--- a/src/query_engine/src/datafusion_impl/mod.rs
+++ b/src/query_engine/src/datafusion_impl/mod.rs
@@ -24,7 +24,7 @@ use datafusion::{
         runtime_env::{RuntimeConfig, RuntimeEnv},
         FunctionRegistry,
     },
-    prelude::{SessionConfig, SessionContext},
+    prelude::{SessionConfig, SessionContext}, physical_optimizer::{output_requirements::OutputRequirements, aggregate_statistics::AggregateStatistics, join_selection::JoinSelection, limited_distinct_aggregation::LimitedDistinctAggregation, combine_partial_final_agg::CombinePartialFinalAggregate, enforce_sorting::EnforceSorting, coalesce_batches::CoalesceBatches, pipeline_checker::PipelineChecker, topk_aggregation::TopKAggregation},
 };
 use df_engine_extensions::codec::PhysicalExtensionCodecImpl;
 use table_engine::{provider::HoraeDBOptions, remote::RemoteEngineRef};
@@ -137,7 +137,23 @@ impl DfContextBuilder {
 
         // Using default logcial optimizer, if want to add more custom rule, using
         // `add_optimizer_rule` to add.
-        let state = SessionState::new_with_config_rt(df_session_config, self.runtime_env.clone());
+        let mut state = SessionState::with_config_rt(df_session_config, self.runtime_env.clone());
+        state = state.with_physical_optimizer_rules(vec![
+            Arc::new(OutputRequirements::new_add_mode()),
+            Arc::new(AggregateStatistics::new()),
+            Arc::new(JoinSelection::new()),
+            Arc::new(LimitedDistinctAggregation::new()),
+            // TODO: this rule will throw this error
+            // Internal error: Children cannot be replaced in ScanTable
+            // Arc::new(EnforceDistribution::new()),
+            Arc::new(CombinePartialFinalAggregate::new()),
+            Arc::new(EnforceSorting::new()),
+            Arc::new(CoalesceBatches::new()),
+            Arc::new(OutputRequirements::new_remove_mode()),
+            Arc::new(PipelineChecker::new()),
+            Arc::new(TopKAggregation::new()),
+            // Arc::new(ProjectionPushdown::new()),
+        ]);
         SessionContext::new_with_state(state)
     }
 }

From 5f6a6d47cd4d510e3b557cf9765e322b192a78f8 Mon Sep 17 00:00:00 2001
From: jiacai2050 <dev@liujiacai.net>
Date: Fri, 26 Jan 2024 17:14:24 +0800
Subject: [PATCH 08/25] fix ut

---
 src/analytic_engine/src/instance/reorder_memtable.rs | 5 ++++-
 src/query_engine/src/datafusion_impl/mod.rs          | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/analytic_engine/src/instance/reorder_memtable.rs b/src/analytic_engine/src/instance/reorder_memtable.rs
index c37417bf64..fb28c4a13f 100644
--- a/src/analytic_engine/src/instance/reorder_memtable.rs
+++ b/src/analytic_engine/src/instance/reorder_memtable.rs
@@ -262,7 +262,10 @@ impl Reorder {
     pub async fn into_stream(self) -> Result<SendableFetchingRecordBatchStream> {
         // 1. Init datafusion context
         let runtime = Arc::new(RuntimeEnv::default());
-        let state = SessionState::new_with_config_rt(SessionConfig::new(), runtime);
+        let mut state = SessionState::new_with_config_rt(SessionConfig::new(), runtime);
+        // The physical optimizer rules have bug, and the plan here is simple, optimize is not required,
+        // so we disable it here.
+        state = state.with_physical_optimizer_rules(vec![]);
         let ctx = SessionContext::new_with_state(state);
         let table_provider = Arc::new(MemIterProvider {
             arrow_schema: self.schema.to_arrow_schema_ref(),
diff --git a/src/query_engine/src/datafusion_impl/mod.rs b/src/query_engine/src/datafusion_impl/mod.rs
index 09e00ac0a5..0d0d576349 100644
--- a/src/query_engine/src/datafusion_impl/mod.rs
+++ b/src/query_engine/src/datafusion_impl/mod.rs
@@ -152,6 +152,7 @@ impl DfContextBuilder {
             Arc::new(OutputRequirements::new_remove_mode()),
             Arc::new(PipelineChecker::new()),
             Arc::new(TopKAggregation::new()),
+            // TODO: This rule is not public, so we can't use it
             // Arc::new(ProjectionPushdown::new()),
         ]);
         SessionContext::new_with_state(state)

From 7493d7e1ccfc1c81548ae988ea6dfadbb05bf130 Mon Sep 17 00:00:00 2001
From: jiacai2050 <dev@liujiacai.net>
Date: Mon, 29 Jan 2024 10:40:19 +0800
Subject: [PATCH 09/25] ensure aggr expr same size with filter exprs

---
 src/df_engine_extensions/src/dist_sql_query/test_util.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/df_engine_extensions/src/dist_sql_query/test_util.rs b/src/df_engine_extensions/src/dist_sql_query/test_util.rs
index 813f142b96..c1d00fb227 100644
--- a/src/df_engine_extensions/src/dist_sql_query/test_util.rs
+++ b/src/df_engine_extensions/src/dist_sql_query/test_util.rs
@@ -262,7 +262,7 @@ impl TestContext {
                 AggregateMode::Partial,
                 self.group_by.clone(),
                 self.aggr_exprs.clone(),
-                vec![None],
+                vec![None; self.aggr_exprs.len()],
                 input,
                 input_schema.clone(),
             )
@@ -287,7 +287,7 @@ impl TestContext {
                 AggregateMode::Final,
                 final_group_by,
                 self.aggr_exprs.clone(),
-                vec![None],
+                vec![None; self.aggr_exprs.len()],
                 merge,
                 input_schema,
             )

From 2fb92c44554c889aba5a3aa073c8e72ffba03900 Mon Sep 17 00:00:00 2001
From: jiacai2050 <dev@liujiacai.net>
Date: Mon, 29 Jan 2024 11:01:45 +0800
Subject: [PATCH 10/25] fix clippy

---
 src/df_operator/src/scalar.rs                             | 1 +
 src/df_operator/src/udaf.rs                               | 1 +
 src/query_engine/src/datafusion_impl/mod.rs               | 2 +-
 src/query_frontend/src/influxql/planner.rs                | 2 +-
 .../src/logical_optimizer/type_conversion.rs              | 6 +++---
 src/query_frontend/src/provider.rs                        | 8 +-------
 6 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/src/df_operator/src/scalar.rs b/src/df_operator/src/scalar.rs
index 4ae3372cfd..58e8214c1a 100644
--- a/src/df_operator/src/scalar.rs
+++ b/src/df_operator/src/scalar.rs
@@ -31,6 +31,7 @@ pub struct ScalarUdf {
 }
 
 impl ScalarUdf {
+    #[allow(deprecated)]
     pub fn create(name: &str, func: ScalarFunction) -> Self {
         let signature = func.signature().to_datafusion_signature();
         let return_type = func.return_type().to_datafusion_return_type();
diff --git a/src/df_operator/src/udaf.rs b/src/df_operator/src/udaf.rs
index 312990b252..44f3913673 100644
--- a/src/df_operator/src/udaf.rs
+++ b/src/df_operator/src/udaf.rs
@@ -31,6 +31,7 @@ pub struct AggregateUdf {
 }
 
 impl AggregateUdf {
+    #[allow(deprecated)]
     pub fn create(name: &str, func: AggregateFunction) -> Self {
         let signature = func.signature().to_datafusion_signature();
         let return_type = func.return_type().to_datafusion_return_type();
diff --git a/src/query_engine/src/datafusion_impl/mod.rs b/src/query_engine/src/datafusion_impl/mod.rs
index 0d0d576349..46b96f01b0 100644
--- a/src/query_engine/src/datafusion_impl/mod.rs
+++ b/src/query_engine/src/datafusion_impl/mod.rs
@@ -137,7 +137,7 @@ impl DfContextBuilder {
 
         // Using default logcial optimizer, if want to add more custom rule, using
         // `add_optimizer_rule` to add.
-        let mut state = SessionState::with_config_rt(df_session_config, self.runtime_env.clone());
+        let mut state = SessionState::new_with_config_rt(df_session_config, self.runtime_env.clone());
         state = state.with_physical_optimizer_rules(vec![
             Arc::new(OutputRequirements::new_add_mode()),
             Arc::new(AggregateStatistics::new()),
diff --git a/src/query_frontend/src/influxql/planner.rs b/src/query_frontend/src/influxql/planner.rs
index 3b21228ad3..ed8d9c1460 100644
--- a/src/query_frontend/src/influxql/planner.rs
+++ b/src/query_frontend/src/influxql/planner.rs
@@ -57,7 +57,7 @@ struct InfluxQLSchemaProvider<'a, P: MetaProvider> {
 impl<'a, P: MetaProvider> SchemaProvider for InfluxQLSchemaProvider<'a, P> {
     fn get_table_provider(&self, name: &str) -> datafusion::error::Result<Arc<dyn TableSource>> {
         self.context_provider
-            .get_table_provider(name.into())
+            .get_table_source(name.into())
             .map_err(|e| {
                 DataFusionError::Plan(format!(
                     "measurement does not exist, measurement:{name}, source:{e}"
diff --git a/src/query_frontend/src/logical_optimizer/type_conversion.rs b/src/query_frontend/src/logical_optimizer/type_conversion.rs
index e8ccd42fc5..3d67f458e6 100644
--- a/src/query_frontend/src/logical_optimizer/type_conversion.rs
+++ b/src/query_frontend/src/logical_optimizer/type_conversion.rs
@@ -30,7 +30,7 @@ use datafusion::{
     logical_expr::{
         expr::{Expr, InList},
         logical_plan::{Filter, LogicalPlan, TableScan},
-        utils, Between, BinaryExpr, ExprSchemable, Operator,
+         Between, BinaryExpr, ExprSchemable, Operator,
     },
     optimizer::analyzer::AnalyzerRule,
     scalar::ScalarValue,
@@ -113,13 +113,13 @@ impl AnalyzerRule for TypeConversion {
                     .map(|plan| self.analyze(plan.clone(), config))
                     .collect::<Result<Vec<_>>>()?;
 
-                let expr = plan
+                let exprs = plan
                     .expressions()
                     .into_iter()
                     .map(|e| e.rewrite(&mut rewriter))
                     .collect::<Result<Vec<_>>>()?;
 
-                Ok(utils::from_plan(&plan, &expr, &new_inputs)?)
+                Ok(LogicalPlan::with_new_exprs(&plan, exprs, &new_inputs)?)
             }
             LogicalPlan::Subquery(_)
             | LogicalPlan::Statement { .. }
diff --git a/src/query_frontend/src/provider.rs b/src/query_frontend/src/provider.rs
index 67750fcb0e..5a9cdf8514 100644
--- a/src/query_frontend/src/provider.rs
+++ b/src/query_frontend/src/provider.rs
@@ -320,7 +320,7 @@ impl<'a, P: MetaProvider> MetaProvider for ContextProviderAdapter<'a, P> {
 }
 
 impl<'a, P: MetaProvider> ContextProvider for ContextProviderAdapter<'a, P> {
-    fn get_table_provider(
+    fn get_table_source(
         &self,
         name: TableReference,
     ) -> std::result::Result<Arc<(dyn TableSource + 'static)>, DataFusionError> {
@@ -414,12 +414,6 @@ impl<'a, P: MetaProvider> ContextProvider for ContextProviderAdapter<'a, P> {
         None
     }
 
-    fn get_table_source(
-        &self,
-        name: TableReference,
-    ) -> datafusion::error::Result<Arc<dyn TableSource>> {
-        self.get_table_provider(name)
-    }
 }
 
 struct SchemaProviderAdapter {

From cd38d616655f24c5fe46e83837e79754403ef26e Mon Sep 17 00:00:00 2001
From: jiacai2050 <dev@liujiacai.net>
Date: Mon, 29 Jan 2024 18:47:32 +0800
Subject: [PATCH 11/25] bump df

---
 Cargo.lock                                    | 38 +++++++++----------
 Cargo.toml                                    | 12 +++---
 .../cases/common/dml/issue-302.result         |  2 +-
 .../cases/common/dml/issue-341.result         | 12 +++---
 .../cases/common/dml/issue-59.result          |  4 +-
 .../cases/common/explain/explain.result       |  2 +-
 .../cases/common/optimizer/optimizer.result   |  2 +-
 .../cases/env/local/ddl/query-plan.result     | 22 +++++------
 .../src/instance/reorder_memtable.rs          |  4 +-
 src/common_types/src/datum.rs                 |  2 +
 src/query_engine/src/datafusion_impl/mod.rs   | 16 ++++++--
 .../src/logical_optimizer/type_conversion.rs  |  2 +-
 src/query_frontend/src/parser.rs              | 12 +++---
 src/table_engine/src/provider.rs              |  7 ++--
 src/table_engine/src/table.rs                 |  1 +
 15 files changed, 72 insertions(+), 66 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 7f9c89a359..a1bb14c9ee 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -658,7 +658,7 @@ dependencies = [
 [[package]]
 name = "arrow_util"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997"
+source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b"
 dependencies = [
  "ahash 0.8.3",
  "arrow 49.0.0",
@@ -2008,7 +2008,7 @@ dependencies = [
 [[package]]
 name = "datafusion"
 version = "34.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119"
+source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da"
 dependencies = [
  "ahash 0.8.3",
  "arrow 49.0.0",
@@ -2055,7 +2055,7 @@ dependencies = [
 [[package]]
 name = "datafusion-common"
 version = "34.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119"
+source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da"
 dependencies = [
  "ahash 0.8.3",
  "arrow 49.0.0",
@@ -2074,7 +2074,7 @@ dependencies = [
 [[package]]
 name = "datafusion-execution"
 version = "34.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119"
+source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da"
 dependencies = [
  "arrow 49.0.0",
  "chrono",
@@ -2094,7 +2094,7 @@ dependencies = [
 [[package]]
 name = "datafusion-expr"
 version = "34.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119"
+source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da"
 dependencies = [
  "ahash 0.8.3",
  "arrow 49.0.0",
@@ -2109,7 +2109,7 @@ dependencies = [
 [[package]]
 name = "datafusion-optimizer"
 version = "34.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119"
+source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da"
 dependencies = [
  "arrow 49.0.0",
  "async-trait",
@@ -2126,7 +2126,7 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-expr"
 version = "34.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119"
+source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da"
 dependencies = [
  "ahash 0.8.3",
  "arrow 49.0.0",
@@ -2159,7 +2159,7 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-plan"
 version = "34.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119"
+source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da"
 dependencies = [
  "ahash 0.8.3",
  "arrow 49.0.0",
@@ -2189,7 +2189,7 @@ dependencies = [
 [[package]]
 name = "datafusion-proto"
 version = "34.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119"
+source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da"
 dependencies = [
  "arrow 49.0.0",
  "chrono",
@@ -2203,7 +2203,7 @@ dependencies = [
 [[package]]
 name = "datafusion-sql"
 version = "34.0.0"
-source = "git+https://github.com/apache/arrow-datafusion.git?rev=a154884545cfdeb1a6c20872b3882a5624cd1119#a154884545cfdeb1a6c20872b3882a5624cd1119"
+source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da"
 dependencies = [
  "arrow 49.0.0",
  "arrow-schema 49.0.0",
@@ -2216,7 +2216,7 @@ dependencies = [
 [[package]]
 name = "datafusion_util"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997"
+source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b"
 dependencies = [
  "async-trait",
  "datafusion",
@@ -2837,7 +2837,7 @@ checksum = "8f5f3913fa0bfe7ee1fd8248b6b9f42a5af4b9d65ec2dd2c3c26132b950ecfc2"
 [[package]]
 name = "generated_types"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997"
+source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b"
 dependencies = [
  "pbjson",
  "pbjson-build",
@@ -3354,7 +3354,7 @@ dependencies = [
 [[package]]
 name = "influxdb_influxql_parser"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997"
+source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b"
 dependencies = [
  "chrono",
  "chrono-tz",
@@ -3447,7 +3447,7 @@ dependencies = [
 [[package]]
 name = "iox_query"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997"
+source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b"
 dependencies = [
  "arrow 49.0.0",
  "arrow_util",
@@ -3471,7 +3471,7 @@ dependencies = [
 [[package]]
 name = "iox_query_influxql"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997"
+source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b"
 dependencies = [
  "arrow 49.0.0",
  "chrono",
@@ -4589,7 +4589,7 @@ dependencies = [
 [[package]]
 name = "observability_deps"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997"
+source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b"
 dependencies = [
  "tracing",
 ]
@@ -5590,7 +5590,7 @@ dependencies = [
 [[package]]
 name = "query_functions"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997"
+source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b"
 dependencies = [
  "arrow 49.0.0",
  "chrono",
@@ -6294,7 +6294,7 @@ dependencies = [
 [[package]]
 name = "schema"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997"
+source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b"
 dependencies = [
  "arrow 49.0.0",
  "hashbrown 0.13.2",
@@ -7091,7 +7091,7 @@ dependencies = [
 [[package]]
 name = "test_helpers"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=cafd1c73e375e218b646cef5024cd27c3855f997#cafd1c73e375e218b646cef5024cd27c3855f997"
+source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b"
 dependencies = [
  "dotenvy",
  "observability_deps",
diff --git a/Cargo.toml b/Cargo.toml
index 16fcb93faf..a21209bde5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -107,8 +107,8 @@ cluster = { path = "src/cluster" }
 criterion = "0.5"
 horaedb-client = "1.0.2"
 common_types = { path = "src/common_types" }
-datafusion = { git = "https://github.com/apache/arrow-datafusion.git", rev = "a154884545cfdeb1a6c20872b3882a5624cd1119" }
-datafusion-proto = { git = "https://github.com/apache/arrow-datafusion.git", rev = "a154884545cfdeb1a6c20872b3882a5624cd1119" }
+datafusion = { git = "https://github.com/CeresDB/arrow-datafusion.git", rev = "2891cba41" }
+datafusion-proto = { git = "https://github.com/CeresDB/arrow-datafusion.git", rev = "2891cba41" }
 derive_builder = "0.12"
 df_operator = { path = "src/df_operator" }
 df_engine_extensions = { path = "src/df_engine_extensions" }
@@ -121,10 +121,10 @@ hash_ext = { path = "src/components/hash_ext" }
 hex = "0.4.3"
 hyperloglog = { git = "https://github.com/jedisct1/rust-hyperloglog.git", rev = "425487ce910f26636fbde8c4d640b538431aad50" }
 id_allocator = { path = "src/components/id_allocator" }
-influxql-logical-planner = { git = "https://github.com/CeresDB/influxql.git", rev = "cafd1c73e375e218b646cef5024cd27c3855f997", package = "iox_query_influxql" }
-influxql-parser = { git = "https://github.com/CeresDB/influxql.git", rev = "cafd1c73e375e218b646cef5024cd27c3855f997", package = "influxdb_influxql_parser" }
-influxql-query = { git = "https://github.com/CeresDB/influxql.git", rev = "cafd1c73e375e218b646cef5024cd27c3855f997", package = "iox_query" }
-influxql-schema = { git = "https://github.com/CeresDB/influxql.git", rev = "cafd1c73e375e218b646cef5024cd27c3855f997", package = "schema" }
+influxql-logical-planner = { git = "https://github.com/CeresDB/influxql.git", rev = "5077dcc", package = "iox_query_influxql" }
+influxql-parser = { git = "https://github.com/CeresDB/influxql.git", rev = "5077dcc", package = "influxdb_influxql_parser" }
+influxql-query = { git = "https://github.com/CeresDB/influxql.git", rev = "5077dcc", package = "iox_query" }
+influxql-schema = { git = "https://github.com/CeresDB/influxql.git", rev = "5077dcc", package = "schema" }
 interpreters = { path = "src/interpreters" }
 itertools = "0.10.5"
 lz4_flex = { version = "0.11", default-features = false, features = ["frame"] }
diff --git a/integration_tests/cases/common/dml/issue-302.result b/integration_tests/cases/common/dml/issue-302.result
index b57d881fd2..cd7afc3a36 100644
--- a/integration_tests/cases/common/dml/issue-302.result
+++ b/integration_tests/cases/common/dml/issue-302.result
@@ -12,7 +12,7 @@ affected_rows: 1
 
 select `t`, count(distinct name) from issue302 group by `t`;
 
-issue302.t,COUNT(DISTINCT issue302.name),
+t,COUNT(DISTINCT issue302.name),
 Timestamp(1651737067000),Int64(0),
 
 
diff --git a/integration_tests/cases/common/dml/issue-341.result b/integration_tests/cases/common/dml/issue-341.result
index 902222590b..4e42d84c80 100644
--- a/integration_tests/cases/common/dml/issue-341.result
+++ b/integration_tests/cases/common/dml/issue-341.result
@@ -58,7 +58,7 @@ WHERE
 
 plan_type,plan,
 String("logical_plan"),String("TableScan: issue341_t1 projection=[timestamp, value], full_filters=[issue341_t1.value = Int32(3)]"),
-String("physical_plan"),String("ScanTable: table=issue341_t1, parallelism=8, priority=Low\n"),
+String("physical_plan"),String("ScanTable: table=issue341_t1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"),
 
 
 -- FilterExec node should not be in plan.
@@ -71,8 +71,8 @@ WHERE
     tag1 = "t3";
 
 plan_type,plan,
-String("logical_plan"),String("Projection: issue341_t1.timestamp, issue341_t1.value\n  TableScan: issue341_t1 projection=[timestamp, value, tag1], full_filters=[issue341_t1.tag1 = Utf8(\"t3\")]"),
-String("physical_plan"),String("ProjectionExec: expr=[timestamp@0 as timestamp, value@1 as value]\n  ScanTable: table=issue341_t1, parallelism=8, priority=Low\n"),
+String("logical_plan"),String("TableScan: issue341_t1 projection=[timestamp, value], full_filters=[issue341_t1.tag1 = Utf8(\"t3\")]"),
+String("physical_plan"),String("ScanTable: table=issue341_t1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(1)\n"),
 
 
 -- Repeat operations above, but with overwrite table
@@ -116,7 +116,7 @@ WHERE
 
 plan_type,plan,
 String("logical_plan"),String("Filter: issue341_t2.value = Float64(3)\n  TableScan: issue341_t2 projection=[timestamp, value], partial_filters=[issue341_t2.value = Float64(3)]"),
-String("physical_plan"),String("CoalesceBatchesExec: target_batch_size=8192\n  FilterExec: value@1 = 3\n    ScanTable: table=issue341_t2, parallelism=8, priority=Low\n"),
+String("physical_plan"),String("CoalesceBatchesExec: target_batch_size=8192\n  FilterExec: value@1 = 3\n    ScanTable: table=issue341_t2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"),
 
 
 -- When using tag as filter, FilterExec node should not be in plan.
@@ -129,8 +129,8 @@ WHERE
     tag1 = "t3";
 
 plan_type,plan,
-String("logical_plan"),String("Projection: issue341_t2.timestamp, issue341_t2.value\n  TableScan: issue341_t2 projection=[timestamp, value, tag1], full_filters=[issue341_t2.tag1 = Utf8(\"t3\")]"),
-String("physical_plan"),String("ProjectionExec: expr=[timestamp@0 as timestamp, value@1 as value]\n  ScanTable: table=issue341_t2, parallelism=8, priority=Low\n"),
+String("logical_plan"),String("TableScan: issue341_t2 projection=[timestamp, value], full_filters=[issue341_t2.tag1 = Utf8(\"t3\")]"),
+String("physical_plan"),String("ScanTable: table=issue341_t2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(1)\n"),
 
 
 DROP TABLE IF EXISTS `issue341_t1`;
diff --git a/integration_tests/cases/common/dml/issue-59.result b/integration_tests/cases/common/dml/issue-59.result
index 549c7019cd..36d818696a 100644
--- a/integration_tests/cases/common/dml/issue-59.result
+++ b/integration_tests/cases/common/dml/issue-59.result
@@ -24,8 +24,8 @@ FROM issue59
 GROUP BY id+1;
 
 plan_type,plan,
-String("logical_plan"),String("Projection: group_alias_0 AS issue59.id + Int64(1), COUNT(alias1) AS COUNT(DISTINCT issue59.account)\n  Aggregate: groupBy=[[group_alias_0]], aggr=[[COUNT(alias1)]]\n    Projection: group_alias_0, alias1\n      Aggregate: groupBy=[[CAST(issue59.id AS Int64) + Int64(1) AS group_alias_0, issue59.account AS alias1]], aggr=[[]]\n        TableScan: issue59 projection=[id, account]"),
-String("physical_plan"),String("ProjectionExec: expr=[group_alias_0@0 as issue59.id + Int64(1), COUNT(alias1)@1 as COUNT(DISTINCT issue59.account)]\n  AggregateExec: mode=FinalPartitioned, gby=[group_alias_0@0 as group_alias_0], aggr=[COUNT(alias1)]\n    CoalesceBatchesExec: target_batch_size=8192\n      RepartitionExec: partitioning=Hash([group_alias_0@0], 8), input_partitions=8\n        AggregateExec: mode=Partial, gby=[group_alias_0@0 as group_alias_0], aggr=[COUNT(alias1)]\n          ProjectionExec: expr=[group_alias_0@0 as group_alias_0, alias1@1 as alias1]\n            AggregateExec: mode=FinalPartitioned, gby=[group_alias_0@0 as group_alias_0, alias1@1 as alias1], aggr=[]\n              CoalesceBatchesExec: target_batch_size=8192\n                RepartitionExec: partitioning=Hash([group_alias_0@0, alias1@1], 8), input_partitions=8\n                  AggregateExec: mode=Partial, gby=[CAST(id@0 AS Int64) + 1 as group_alias_0, account@1 as alias1], aggr=[]\n                    ScanTable: table=issue59, parallelism=8, priority=Low\n"),
+String("logical_plan"),String("Projection: group_alias_0 AS issue59.id + Int64(1), COUNT(alias1) AS COUNT(DISTINCT issue59.account)\n  Aggregate: groupBy=[[group_alias_0]], aggr=[[COUNT(alias1)]]\n    Aggregate: groupBy=[[CAST(issue59.id AS Int64) + Int64(1) AS group_alias_0, issue59.account AS alias1]], aggr=[[]]\n      TableScan: issue59 projection=[id, account]"),
+String("physical_plan"),String("ProjectionExec: expr=[group_alias_0@0 as issue59.id + Int64(1), COUNT(alias1)@1 as COUNT(DISTINCT issue59.account)]\n  AggregateExec: mode=SinglePartitioned, gby=[group_alias_0@0 as group_alias_0], aggr=[COUNT(alias1)]\n    AggregateExec: mode=FinalPartitioned, gby=[group_alias_0@0 as group_alias_0, alias1@1 as alias1], aggr=[]\n      AggregateExec: mode=Partial, gby=[CAST(id@0 AS Int64) + 1 as group_alias_0, account@1 as alias1], aggr=[]\n        ScanTable: table=issue59, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"),
 
 
 DROP TABLE IF EXISTS issue59;
diff --git a/integration_tests/cases/common/explain/explain.result b/integration_tests/cases/common/explain/explain.result
index 0cd06380d5..6cf09c078e 100644
--- a/integration_tests/cases/common/explain/explain.result
+++ b/integration_tests/cases/common/explain/explain.result
@@ -10,7 +10,7 @@ EXPLAIN SELECT t FROM `04_explain_t`;
 
 plan_type,plan,
 String("logical_plan"),String("TableScan: 04_explain_t projection=[t]"),
-String("physical_plan"),String("ScanTable: table=04_explain_t, parallelism=8, priority=Low\n"),
+String("physical_plan"),String("ScanTable: table=04_explain_t, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"),
 
 
 DROP TABLE `04_explain_t`;
diff --git a/integration_tests/cases/common/optimizer/optimizer.result b/integration_tests/cases/common/optimizer/optimizer.result
index f9cfac2de9..e13dd456ce 100644
--- a/integration_tests/cases/common/optimizer/optimizer.result
+++ b/integration_tests/cases/common/optimizer/optimizer.result
@@ -10,7 +10,7 @@ EXPLAIN SELECT max(value) AS c1, avg(value) AS c2 FROM `07_optimizer_t` GROUP BY
 
 plan_type,plan,
 String("logical_plan"),String("Projection: MAX(07_optimizer_t.value) AS c1, AVG(07_optimizer_t.value) AS c2\n  Aggregate: groupBy=[[07_optimizer_t.name]], aggr=[[MAX(07_optimizer_t.value), AVG(07_optimizer_t.value)]]\n    TableScan: 07_optimizer_t projection=[name, value]"),
-String("physical_plan"),String("ProjectionExec: expr=[MAX(07_optimizer_t.value)@1 as c1, AVG(07_optimizer_t.value)@2 as c2]\n  AggregateExec: mode=FinalPartitioned, gby=[name@0 as name], aggr=[MAX(07_optimizer_t.value), AVG(07_optimizer_t.value)]\n    CoalesceBatchesExec: target_batch_size=8192\n      RepartitionExec: partitioning=Hash([name@0], 8), input_partitions=8\n        AggregateExec: mode=Partial, gby=[name@0 as name], aggr=[MAX(07_optimizer_t.value), AVG(07_optimizer_t.value)]\n          ScanTable: table=07_optimizer_t, parallelism=8, priority=Low\n"),
+String("physical_plan"),String("ProjectionExec: expr=[MAX(07_optimizer_t.value)@1 as c1, AVG(07_optimizer_t.value)@2 as c2]\n  AggregateExec: mode=SinglePartitioned, gby=[name@0 as name], aggr=[MAX(07_optimizer_t.value), AVG(07_optimizer_t.value)]\n    ScanTable: table=07_optimizer_t, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"),
 
 
 DROP TABLE `07_optimizer_t`;
diff --git a/integration_tests/cases/env/local/ddl/query-plan.result b/integration_tests/cases/env/local/ddl/query-plan.result
index a421856b4c..917767bf02 100644
--- a/integration_tests/cases/env/local/ddl/query-plan.result
+++ b/integration_tests/cases/env/local/ddl/query-plan.result
@@ -31,7 +31,7 @@ explain analyze select t from `03_dml_select_real_time_range`
 where t > 1695348001000;
 
 plan_type,plan,
-String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=1\n        num_ssts=0\n        scan_count=2\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=1\n        total_rows_fetch_from_one=1\n        scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"),
+String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=1\n        num_ssts=0\n        scan_count=2\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=1\n        total_rows_fetch_from_one=1\n        scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"),
 
 
 -- This query should have higher priority
@@ -40,7 +40,7 @@ explain analyze select t from `03_dml_select_real_time_range`
 where t >= 1695348001000 and t < 1695348002000;
 
 plan_type,plan,
-String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=High, metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), t < TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=1\n        num_ssts=0\n        scan_count=2\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=1\n        total_rows_fetch_from_one=1\n        scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"),
+String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=High, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), t < TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=1\n        num_ssts=0\n        scan_count=2\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=1\n        total_rows_fetch_from_one=1\n        scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"),
 
 
 -- This query should not include memtable
@@ -49,7 +49,7 @@ explain analyze select t from `03_dml_select_real_time_range`
 where t > 1695348002000;
 
 plan_type,plan,
-String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=0\n=0]\n"),
+String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=0\n=0]\n"),
 
 
 -- SQLNESS ARG pre_cmd=flush
@@ -60,7 +60,7 @@ explain analyze select t from `03_dml_select_real_time_range`
 where t > 1695348001000;
 
 plan_type,plan,
-String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=0\n        num_ssts=1\n        scan_count=2\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=1\n        total_rows_fetch_from_one=1\n        scan_sst_1, fetched_columns:[tsid,t]:\n            meta_data_cache_hit=false\n            parallelism=1\n            project_record_batch=xxs\n            read_meta_data_duration=xxs\n            row_mem=320\n            row_num=3\n            prune_row_groups:\n                pruned_by_custom_filter=0\n                pruned_by_min_max=0\n                row_groups_after_prune=1\n                total_row_groups=1\n                use_custom_filter=false\n=0]\n"),
+String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=0\n        num_ssts=1\n        scan_count=2\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=1\n        total_rows_fetch_from_one=1\n        scan_sst_1, fetched_columns:[tsid,t]:\n            meta_data_cache_hit=false\n            parallelism=1\n            project_record_batch=xxs\n            read_meta_data_duration=xxs\n            row_mem=320\n            row_num=3\n            prune_row_groups:\n                pruned_by_custom_filter=0\n                pruned_by_min_max=0\n                row_groups_after_prune=1\n                total_row_groups=1\n                use_custom_filter=false\n=0]\n"),
 
 
 -- This query should not include SST
@@ -68,7 +68,7 @@ explain analyze select t from `03_dml_select_real_time_range`
 where t > 1695348002000;
 
 plan_type,plan,
-String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=0\n=0]\n"),
+String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=0\n=0]\n"),
 
 
 -- Table with an 'append' update mode
@@ -100,9 +100,7 @@ affected_rows: 3
 explain analyze select t from `03_append_mode_table`
 where t >= 1695348001000 and name = 'ceresdb';
 
-plan_type,plan,
-String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=[output_rows=2, elapsed_compute=xxs]\n  ScanTable: table=03_append_mode_table, parallelism=8, priority=Low, metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), name = Utf8(\"ceresdb\")], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=false\n    chain_iter_0:\n        num_memtables=1\n        num_ssts=0\n        scan_duration=xxs\n        since_create=xxs\n        since_init=xxs\n        total_batch_fetched=1\n        total_rows_fetched=2\n        scan_memtable_1, fetched_columns:[t,name]:\n=0]\n"),
-
+Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to execute plan. Caused by: Internal error, msg:Failed to execute interpreter, err:Failed to execute select, err:Failed to execute physical plan, msg:failed to collect execution results, err:Stream error, msg:convert from arrow record batch, err:Execution error: Failed to read table, partition:0, err:Failed to scan table, table:03_append_mode_table, err:Failed to build chain iterator, table:03_append_mode_table, err:Fail to build stream from the memtable, err:Failed to generate datafusion physical expr, err:Schema error: No field named name. Valid fields are t.. sql:explain analyze select t from `03_append_mode_table`\nwhere t >= 1695348001000 and name = 'ceresdb';" })
 
 -- Should just fetch projected columns from SST
 -- SQLNESS ARG pre_cmd=flush
@@ -114,9 +112,7 @@ String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=[ou
 explain analyze select t from `03_append_mode_table`
 where t >= 1695348001000 and name = 'ceresdb';
 
-plan_type,plan,
-String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=[output_rows=2, elapsed_compute=xxs]\n  ScanTable: table=03_append_mode_table, parallelism=8, priority=Low, metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), name = Utf8(\"ceresdb\")], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=false\n    chain_iter_0:\n        num_memtables=0\n        num_ssts=1\n        scan_duration=xxs\n        since_create=xxs\n        since_init=xxs\n        total_batch_fetched=1\n        total_rows_fetched=2\n        scan_sst_1, fetched_columns:[t,name]:\n            meta_data_cache_hit=false\n            parallelism=1\n            project_record_batch=xxs\n            read_meta_data_duration=xxs\n            row_mem=408\n            row_num=3\n            prune_row_groups:\n                pruned_by_custom_filter=0\n                pruned_by_min_max=0\n                row_groups_after_prune=1\n                total_row_groups=1\n                use_custom_filter=false\n=0]\n"),
-
+Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to execute plan. Caused by: Internal error, msg:Failed to execute interpreter, err:Failed to execute select, err:Failed to execute physical plan, msg:failed to collect execution results, err:Stream error, msg:convert from arrow record batch, err:Execution error: Failed to read table, partition:0, err:Failed to scan table, table:03_append_mode_table, err:Failed to build chain iterator, table:03_append_mode_table, err:Fail to build stream from the sst file, err:Failed to generate datafusion physical expr, err:Schema error: No field named name. Valid fields are t.. sql:explain analyze select t from `03_append_mode_table`\nwhere t >= 1695348001000 and name = 'ceresdb';" })
 
 CREATE TABLE `TEST_QUERY_PRIORITY` (
     NAME string TAG,
@@ -136,7 +132,7 @@ explain analyze select TS from `TEST_QUERY_PRIORITY`
 where TS >= 1695348001000 and TS < 1695348002000;
 
 plan_type,plan,
-String("Plan with Metrics"),String("ScanTable: table=TEST_QUERY_PRIORITY, parallelism=8, priority=High, metrics=[\nPredicate { exprs:[TS >= TimestampMillisecond(1695348001000, None), TS < TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n    do_merge_sort=false\n=0]\n"),
+String("Plan with Metrics"),String("ScanTable: table=TEST_QUERY_PRIORITY, parallelism=8, priority=High, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[TS >= TimestampMillisecond(1695348001000, None), TS < TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n    do_merge_sort=false\n=0]\n"),
 
 
 -- This query should have higher priority
@@ -145,7 +141,7 @@ explain analyze select TS from `TEST_QUERY_PRIORITY`
 where TS >= 1695348001000;
 
 plan_type,plan,
-String("Plan with Metrics"),String("ScanTable: table=TEST_QUERY_PRIORITY, parallelism=8, priority=Low, metrics=[\nPredicate { exprs:[TS >= TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=false\n=0]\n"),
+String("Plan with Metrics"),String("ScanTable: table=TEST_QUERY_PRIORITY, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[TS >= TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=false\n=0]\n"),
 
 
 DROP TABLE `03_dml_select_real_time_range`;
diff --git a/src/analytic_engine/src/instance/reorder_memtable.rs b/src/analytic_engine/src/instance/reorder_memtable.rs
index fb28c4a13f..2f9ac87b8e 100644
--- a/src/analytic_engine/src/instance/reorder_memtable.rs
+++ b/src/analytic_engine/src/instance/reorder_memtable.rs
@@ -263,8 +263,8 @@ impl Reorder {
         // 1. Init datafusion context
         let runtime = Arc::new(RuntimeEnv::default());
         let mut state = SessionState::new_with_config_rt(SessionConfig::new(), runtime);
-        // The physical optimizer rules have bug, and the plan here is simple, optimize is not required,
-        // so we disable it here.
+        // The physical optimizer rules have bug, and the plan here is simple, optimize
+        // is not required, so we disable it here.
         state = state.with_physical_optimizer_rules(vec![]);
         let ctx = SessionContext::new_with_state(state);
         let table_provider = Arc::new(MemIterProvider {
diff --git a/src/common_types/src/datum.rs b/src/common_types/src/datum.rs
index 4b8b373763..9b22439a22 100644
--- a/src/common_types/src/datum.rs
+++ b/src/common_types/src/datum.rs
@@ -292,7 +292,9 @@ impl TryFrom<&SqlDataType> for DatumKind {
             SqlDataType::Double => Ok(Self::Double),
             SqlDataType::Boolean => Ok(Self::Boolean),
             SqlDataType::BigInt(_) => Ok(Self::Int64),
+            SqlDataType::Int64 => Ok(Self::Int64),
             SqlDataType::Int(_) => Ok(Self::Int32),
+            SqlDataType::Int8(_) => Ok(Self::Int8),
             SqlDataType::SmallInt(_) => Ok(Self::Int16),
             SqlDataType::String(_) => Ok(Self::String),
             SqlDataType::Varbinary(_) => Ok(Self::Varbinary),
diff --git a/src/query_engine/src/datafusion_impl/mod.rs b/src/query_engine/src/datafusion_impl/mod.rs
index 46b96f01b0..3c4f18f0c5 100644
--- a/src/query_engine/src/datafusion_impl/mod.rs
+++ b/src/query_engine/src/datafusion_impl/mod.rs
@@ -24,7 +24,14 @@ use datafusion::{
         runtime_env::{RuntimeConfig, RuntimeEnv},
         FunctionRegistry,
     },
-    prelude::{SessionConfig, SessionContext}, physical_optimizer::{output_requirements::OutputRequirements, aggregate_statistics::AggregateStatistics, join_selection::JoinSelection, limited_distinct_aggregation::LimitedDistinctAggregation, combine_partial_final_agg::CombinePartialFinalAggregate, enforce_sorting::EnforceSorting, coalesce_batches::CoalesceBatches, pipeline_checker::PipelineChecker, topk_aggregation::TopKAggregation},
+    physical_optimizer::{
+        aggregate_statistics::AggregateStatistics, coalesce_batches::CoalesceBatches,
+        combine_partial_final_agg::CombinePartialFinalAggregate, enforce_sorting::EnforceSorting,
+        join_selection::JoinSelection, limited_distinct_aggregation::LimitedDistinctAggregation,
+        output_requirements::OutputRequirements, pipeline_checker::PipelineChecker,
+        projection_pushdown::ProjectionPushdown, topk_aggregation::TopKAggregation,
+    },
+    prelude::{SessionConfig, SessionContext},
 };
 use df_engine_extensions::codec::PhysicalExtensionCodecImpl;
 use table_engine::{provider::HoraeDBOptions, remote::RemoteEngineRef};
@@ -137,7 +144,8 @@ impl DfContextBuilder {
 
         // Using default logcial optimizer, if want to add more custom rule, using
         // `add_optimizer_rule` to add.
-        let mut state = SessionState::new_with_config_rt(df_session_config, self.runtime_env.clone());
+        let mut state =
+            SessionState::new_with_config_rt(df_session_config, self.runtime_env.clone());
         state = state.with_physical_optimizer_rules(vec![
             Arc::new(OutputRequirements::new_add_mode()),
             Arc::new(AggregateStatistics::new()),
@@ -146,14 +154,14 @@ impl DfContextBuilder {
             // TODO: this rule will throw this error
             // Internal error: Children cannot be replaced in ScanTable
             // Arc::new(EnforceDistribution::new()),
+            // Arc::new(EnforceSorting::new()),
             Arc::new(CombinePartialFinalAggregate::new()),
-            Arc::new(EnforceSorting::new()),
             Arc::new(CoalesceBatches::new()),
             Arc::new(OutputRequirements::new_remove_mode()),
             Arc::new(PipelineChecker::new()),
             Arc::new(TopKAggregation::new()),
             // TODO: This rule is not public, so we can't use it
-            // Arc::new(ProjectionPushdown::new()),
+            Arc::new(ProjectionPushdown::new()),
         ]);
         SessionContext::new_with_state(state)
     }
diff --git a/src/query_frontend/src/logical_optimizer/type_conversion.rs b/src/query_frontend/src/logical_optimizer/type_conversion.rs
index 3d67f458e6..95076f33c2 100644
--- a/src/query_frontend/src/logical_optimizer/type_conversion.rs
+++ b/src/query_frontend/src/logical_optimizer/type_conversion.rs
@@ -30,7 +30,7 @@ use datafusion::{
     logical_expr::{
         expr::{Expr, InList},
         logical_plan::{Filter, LogicalPlan, TableScan},
-         Between, BinaryExpr, ExprSchemable, Operator,
+        Between, BinaryExpr, ExprSchemable, Operator,
     },
     optimizer::analyzer::AnalyzerRule,
     scalar::ScalarValue,
diff --git a/src/query_frontend/src/parser.rs b/src/query_frontend/src/parser.rs
index cae7256a01..23efa0ade0 100644
--- a/src/query_frontend/src/parser.rs
+++ b/src/query_frontend/src/parser.rs
@@ -352,13 +352,11 @@ impl<'a> Parser<'a> {
                     is_dictionary = true;
                 }
             }
-            if let DataType::String(_) = c.data_type {
-                if is_dictionary {
-                    return parser_err!(format!(
-                        "Only string column can be dictionary encoded: {:?}",
-                        c.to_string()
-                    ));
-                }
+            if !matches!(c.data_type, DataType::String(_)) && is_dictionary {
+                return parser_err!(format!(
+                    "Only string column can be dictionary encoded: {:?}",
+                    c.to_string()
+                ));
             }
         }
 
diff --git a/src/table_engine/src/provider.rs b/src/table_engine/src/provider.rs
index 49f76460e2..63e5cc7d22 100644
--- a/src/table_engine/src/provider.rs
+++ b/src/table_engine/src/provider.rs
@@ -410,7 +410,7 @@ impl ExecutionPlan for ScanTable {
         // However, we have no inputs here, so `UnknownPartitioning` is suitable.
         // In datafusion, always set it to `UnknownPartitioning` in the scan plan, for
         // example:  https://github.com/apache/arrow-datafusion/blob/cf152af6515f0808d840e1fe9c63b02802595826/datafusion/core/src/datasource/physical_plan/csv.rs#L175
-        Partitioning::UnknownPartitioning(self.parallelism)
+        Partitioning::UnknownPartitioning(self.parallelism.max(1))
     }
 
     fn output_ordering(&self) -> Option<&[PhysicalSortExpr]> {
@@ -480,10 +480,11 @@ impl DisplayAs for ScanTable {
     fn fmt_as(&self, _t: DisplayFormatType, f: &mut fmt::Formatter) -> fmt::Result {
         write!(
             f,
-            "ScanTable: table={}, parallelism={}, priority={:?}",
+            "ScanTable: table={}, parallelism={}, priority={:?}, partition_count={:?}",
             self.table.name(),
             self.request.opts.read_parallelism,
-            self.request.priority
+            self.request.priority,
+            self.output_partitioning()
         )
     }
 }
diff --git a/src/table_engine/src/table.rs b/src/table_engine/src/table.rs
index 7365ca66a4..3c611b4395 100644
--- a/src/table_engine/src/table.rs
+++ b/src/table_engine/src/table.rs
@@ -421,6 +421,7 @@ impl fmt::Debug for ReadRequest {
             .field("projected", &projected)
             .field("predicate", &predicate)
             .field("priority", &self.priority)
+            .field("projected_schema", &self.projected_schema)
             .finish()
     }
 }

From ee982f2b310e19de32df16c4d910ef24342f9370 Mon Sep 17 00:00:00 2001
From: jiacai2050 <dev@liujiacai.net>
Date: Sun, 4 Feb 2024 17:43:52 +0800
Subject: [PATCH 12/25] fix tests

---
 Cargo.lock                                    | 70 +++++++++----------
 Cargo.toml                                    | 14 ++--
 .../cases/common/dml/issue-1087.result        | 17 +++--
 .../cases/common/dml/issue-59.result          |  2 +-
 .../cases/common/optimizer/optimizer.result   |  2 +-
 .../src/row_iter/record_batch_stream.rs       |  2 +-
 .../src/sst/parquet/async_reader.rs           |  2 +-
 .../parquet_ext/src/prune/min_max.rs          |  8 ---
 src/query_engine/src/datafusion_impl/mod.rs   | 26 +------
 .../physical_plan_extension/prom_align.rs     |  4 +-
 .../src/logical_optimizer/type_conversion.rs  |  2 +-
 src/query_frontend/src/plan.rs                |  1 +
 src/query_frontend/src/planner.rs             |  2 +-
 src/query_frontend/src/promql/convert.rs      |  2 +-
 src/query_frontend/src/promql/remote.rs       |  3 +-
 src/table_engine/src/predicate.rs             |  1 +
 16 files changed, 68 insertions(+), 90 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index a1bb14c9ee..f43a9036a5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -472,7 +472,6 @@ dependencies = [
  "arrow-data 49.0.0",
  "arrow-schema 49.0.0",
  "flatbuffers",
- "lz4_flex",
 ]
 
 [[package]]
@@ -658,7 +657,7 @@ dependencies = [
 [[package]]
 name = "arrow_util"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b"
+source = "git+https://github.com/CeresDB/influxql.git?rev=b9fb3ca#b9fb3ca59fda99997a51cab7a56d34fb2126dd08"
 dependencies = [
  "ahash 0.8.3",
  "arrow 49.0.0",
@@ -2007,13 +2006,12 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "34.0.0"
-source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da"
+version = "33.0.0"
+source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1"
 dependencies = [
  "ahash 0.8.3",
  "arrow 49.0.0",
  "arrow-array 49.0.0",
- "arrow-ipc 49.0.0",
  "arrow-schema 49.0.0",
  "async-compression",
  "async-trait",
@@ -2054,8 +2052,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-common"
-version = "34.0.0"
-source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da"
+version = "33.0.0"
+source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1"
 dependencies = [
  "ahash 0.8.3",
  "arrow 49.0.0",
@@ -2073,8 +2071,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-execution"
-version = "34.0.0"
-source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da"
+version = "33.0.0"
+source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1"
 dependencies = [
  "arrow 49.0.0",
  "chrono",
@@ -2093,8 +2091,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr"
-version = "34.0.0"
-source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da"
+version = "33.0.0"
+source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1"
 dependencies = [
  "ahash 0.8.3",
  "arrow 49.0.0",
@@ -2108,8 +2106,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-optimizer"
-version = "34.0.0"
-source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da"
+version = "33.0.0"
+source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1"
 dependencies = [
  "arrow 49.0.0",
  "async-trait",
@@ -2125,8 +2123,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "34.0.0"
-source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da"
+version = "33.0.0"
+source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1"
 dependencies = [
  "ahash 0.8.3",
  "arrow 49.0.0",
@@ -2158,8 +2156,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-plan"
-version = "34.0.0"
-source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da"
+version = "33.0.0"
+source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1"
 dependencies = [
  "ahash 0.8.3",
  "arrow 49.0.0",
@@ -2188,8 +2186,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-proto"
-version = "34.0.0"
-source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da"
+version = "33.0.0"
+source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1"
 dependencies = [
  "arrow 49.0.0",
  "chrono",
@@ -2202,8 +2200,8 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sql"
-version = "34.0.0"
-source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=2891cba41#2891cba41de31ea77b26ab8a2ef0d1bd23fe51da"
+version = "33.0.0"
+source = "git+https://github.com/CeresDB/arrow-datafusion.git?rev=e21b03154#e21b03154511cd61e03e299a595db6be6b1852c1"
 dependencies = [
  "arrow 49.0.0",
  "arrow-schema 49.0.0",
@@ -2216,7 +2214,7 @@ dependencies = [
 [[package]]
 name = "datafusion_util"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b"
+source = "git+https://github.com/CeresDB/influxql.git?rev=b9fb3ca#b9fb3ca59fda99997a51cab7a56d34fb2126dd08"
 dependencies = [
  "async-trait",
  "datafusion",
@@ -2837,7 +2835,7 @@ checksum = "8f5f3913fa0bfe7ee1fd8248b6b9f42a5af4b9d65ec2dd2c3c26132b950ecfc2"
 [[package]]
 name = "generated_types"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b"
+source = "git+https://github.com/CeresDB/influxql.git?rev=b9fb3ca#b9fb3ca59fda99997a51cab7a56d34fb2126dd08"
 dependencies = [
  "pbjson",
  "pbjson-build",
@@ -3354,7 +3352,7 @@ dependencies = [
 [[package]]
 name = "influxdb_influxql_parser"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b"
+source = "git+https://github.com/CeresDB/influxql.git?rev=b9fb3ca#b9fb3ca59fda99997a51cab7a56d34fb2126dd08"
 dependencies = [
  "chrono",
  "chrono-tz",
@@ -3447,7 +3445,7 @@ dependencies = [
 [[package]]
 name = "iox_query"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b"
+source = "git+https://github.com/CeresDB/influxql.git?rev=b9fb3ca#b9fb3ca59fda99997a51cab7a56d34fb2126dd08"
 dependencies = [
  "arrow 49.0.0",
  "arrow_util",
@@ -3471,7 +3469,7 @@ dependencies = [
 [[package]]
 name = "iox_query_influxql"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b"
+source = "git+https://github.com/CeresDB/influxql.git?rev=b9fb3ca#b9fb3ca59fda99997a51cab7a56d34fb2126dd08"
 dependencies = [
  "arrow 49.0.0",
  "chrono",
@@ -4589,7 +4587,7 @@ dependencies = [
 [[package]]
 name = "observability_deps"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b"
+source = "git+https://github.com/CeresDB/influxql.git?rev=b9fb3ca#b9fb3ca59fda99997a51cab7a56d34fb2126dd08"
 dependencies = [
  "tracing",
 ]
@@ -5590,7 +5588,7 @@ dependencies = [
 [[package]]
 name = "query_functions"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b"
+source = "git+https://github.com/CeresDB/influxql.git?rev=b9fb3ca#b9fb3ca59fda99997a51cab7a56d34fb2126dd08"
 dependencies = [
  "arrow 49.0.0",
  "chrono",
@@ -6294,7 +6292,7 @@ dependencies = [
 [[package]]
 name = "schema"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b"
+source = "git+https://github.com/CeresDB/influxql.git?rev=b9fb3ca#b9fb3ca59fda99997a51cab7a56d34fb2126dd08"
 dependencies = [
  "arrow 49.0.0",
  "hashbrown 0.13.2",
@@ -6784,9 +6782,9 @@ dependencies = [
 
 [[package]]
 name = "sqlparser"
-version = "0.41.0"
+version = "0.39.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5cc2c25a6c66789625ef164b4c7d2e548d627902280c13710d33da8222169964"
+checksum = "743b4dc2cbde11890ccb254a8fc9d537fa41b36da00de2a1c5e9848c9bc42bd7"
 dependencies = [
  "log",
  "serde",
@@ -6795,13 +6793,13 @@ dependencies = [
 
 [[package]]
 name = "sqlparser_derive"
-version = "0.2.2"
+version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554"
+checksum = "55fe75cb4a364c7f7ae06c7dbbc8d84bddd85d6cdf9975963c3935bc1991761e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.48",
+ "syn 1.0.109",
 ]
 
 [[package]]
@@ -7091,7 +7089,7 @@ dependencies = [
 [[package]]
 name = "test_helpers"
 version = "0.1.0"
-source = "git+https://github.com/CeresDB/influxql.git?rev=5077dcc#5077dccb51d9c06d338748128585b160cbdbde1b"
+source = "git+https://github.com/CeresDB/influxql.git?rev=b9fb3ca#b9fb3ca59fda99997a51cab7a56d34fb2126dd08"
 dependencies = [
  "dotenvy",
  "observability_deps",
@@ -7714,7 +7712,7 @@ version = "1.6.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675"
 dependencies = [
- "cfg-if 0.1.10",
+ "cfg-if 1.0.0",
  "rand 0.8.5",
  "static_assertions",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index a21209bde5..b41694b31a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -107,8 +107,8 @@ cluster = { path = "src/cluster" }
 criterion = "0.5"
 horaedb-client = "1.0.2"
 common_types = { path = "src/common_types" }
-datafusion = { git = "https://github.com/CeresDB/arrow-datafusion.git", rev = "2891cba41" }
-datafusion-proto = { git = "https://github.com/CeresDB/arrow-datafusion.git", rev = "2891cba41" }
+datafusion = { git = "https://github.com/CeresDB/arrow-datafusion.git", rev = "e21b03154" }
+datafusion-proto = { git = "https://github.com/CeresDB/arrow-datafusion.git", rev = "e21b03154" }
 derive_builder = "0.12"
 df_operator = { path = "src/df_operator" }
 df_engine_extensions = { path = "src/df_engine_extensions" }
@@ -121,10 +121,10 @@ hash_ext = { path = "src/components/hash_ext" }
 hex = "0.4.3"
 hyperloglog = { git = "https://github.com/jedisct1/rust-hyperloglog.git", rev = "425487ce910f26636fbde8c4d640b538431aad50" }
 id_allocator = { path = "src/components/id_allocator" }
-influxql-logical-planner = { git = "https://github.com/CeresDB/influxql.git", rev = "5077dcc", package = "iox_query_influxql" }
-influxql-parser = { git = "https://github.com/CeresDB/influxql.git", rev = "5077dcc", package = "influxdb_influxql_parser" }
-influxql-query = { git = "https://github.com/CeresDB/influxql.git", rev = "5077dcc", package = "iox_query" }
-influxql-schema = { git = "https://github.com/CeresDB/influxql.git", rev = "5077dcc", package = "schema" }
+influxql-logical-planner = { git = "https://github.com/CeresDB/influxql.git", rev = "b9fb3ca", package = "iox_query_influxql" }
+influxql-parser = { git = "https://github.com/CeresDB/influxql.git", rev = "b9fb3ca", package = "influxdb_influxql_parser" }
+influxql-query = { git = "https://github.com/CeresDB/influxql.git", rev = "b9fb3ca", package = "iox_query" }
+influxql-schema = { git = "https://github.com/CeresDB/influxql.git", rev = "b9fb3ca", package = "schema" }
 interpreters = { path = "src/interpreters" }
 itertools = "0.10.5"
 lz4_flex = { version = "0.11", default-features = false, features = ["frame"] }
@@ -173,7 +173,7 @@ smallvec = "1.6"
 slog = "2.7"
 spin = "0.9.6"
 system_statis = { path = "src/components/system_stats" }
-sqlparser = { version = "0.41", features = ["serde"] }
+sqlparser = { version = "0.39.0", features = ["serde"] }
 system_catalog = { path = "src/system_catalog" }
 table_engine = { path = "src/table_engine" }
 table_kv = { path = "src/components/table_kv" }
diff --git a/integration_tests/cases/common/dml/issue-1087.result b/integration_tests/cases/common/dml/issue-1087.result
index d264f4d212..fc1e0d8d5e 100644
--- a/integration_tests/cases/common/dml/issue-1087.result
+++ b/integration_tests/cases/common/dml/issue-1087.result
@@ -17,6 +17,7 @@ String("logical_plan after inline_table_scan"),String("SAME TEXT AS ABOVE"),
 String("logical_plan after type_coercion"),String("SAME TEXT AS ABOVE"),
 String("logical_plan after count_wildcard_rule"),String("SAME TEXT AS ABOVE"),
 String("analyzed_logical_plan"),String("SAME TEXT AS ABOVE"),
+String("logical_plan after eliminate_nested_union"),String("SAME TEXT AS ABOVE"),
 String("logical_plan after simplify_expressions"),String("SAME TEXT AS ABOVE"),
 String("logical_plan after unwrap_cast_in_comparison"),String("SAME TEXT AS ABOVE"),
 String("logical_plan after replace_distinct_aggregate"),String("SAME TEXT AS ABOVE"),
@@ -33,6 +34,7 @@ String("logical_plan after eliminate_cross_join"),String("SAME TEXT AS ABOVE"),
 String("logical_plan after common_sub_expression_eliminate"),String("SAME TEXT AS ABOVE"),
 String("logical_plan after eliminate_limit"),String("SAME TEXT AS ABOVE"),
 String("logical_plan after propagate_empty_relation"),String("SAME TEXT AS ABOVE"),
+String("logical_plan after eliminate_one_union"),String("SAME TEXT AS ABOVE"),
 String("logical_plan after filter_null_join_keys"),String("SAME TEXT AS ABOVE"),
 String("logical_plan after eliminate_outer_join"),String("SAME TEXT AS ABOVE"),
 String("logical_plan after push_down_limit"),String("SAME TEXT AS ABOVE"),
@@ -46,6 +48,7 @@ String("logical_plan after eliminate_projection"),String("TableScan: issue_1087
 String("logical_plan after push_down_limit"),String("SAME TEXT AS ABOVE"),
 String("logical_plan after influx_regex_to_datafusion_regex"),String("SAME TEXT AS ABOVE"),
 String("logical_plan after handle_gap_fill"),String("SAME TEXT AS ABOVE"),
+String("logical_plan after eliminate_nested_union"),String("SAME TEXT AS ABOVE"),
 String("logical_plan after simplify_expressions"),String("SAME TEXT AS ABOVE"),
 String("logical_plan after unwrap_cast_in_comparison"),String("SAME TEXT AS ABOVE"),
 String("logical_plan after replace_distinct_aggregate"),String("SAME TEXT AS ABOVE"),
@@ -62,6 +65,7 @@ String("logical_plan after eliminate_cross_join"),String("SAME TEXT AS ABOVE"),
 String("logical_plan after common_sub_expression_eliminate"),String("SAME TEXT AS ABOVE"),
 String("logical_plan after eliminate_limit"),String("SAME TEXT AS ABOVE"),
 String("logical_plan after propagate_empty_relation"),String("SAME TEXT AS ABOVE"),
+String("logical_plan after eliminate_one_union"),String("SAME TEXT AS ABOVE"),
 String("logical_plan after filter_null_join_keys"),String("SAME TEXT AS ABOVE"),
 String("logical_plan after eliminate_outer_join"),String("SAME TEXT AS ABOVE"),
 String("logical_plan after push_down_limit"),String("SAME TEXT AS ABOVE"),
@@ -76,17 +80,22 @@ String("logical_plan after push_down_limit"),String("SAME TEXT AS ABOVE"),
 String("logical_plan after influx_regex_to_datafusion_regex"),String("SAME TEXT AS ABOVE"),
 String("logical_plan after handle_gap_fill"),String("SAME TEXT AS ABOVE"),
 String("logical_plan"),String("TableScan: issue_1087 projection=[tsid, t, name, value]"),
-String("initial_physical_plan"),String("ScanTable: table=issue_1087, parallelism=8, priority=Low\n"),
+String("initial_physical_plan"),String("ScanTable: table=issue_1087, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"),
+String("initial_physical_plan_with_stats"),String("ScanTable: table=issue_1087, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:)]]\n"),
+String("physical_plan after OutputRequirements"),String("OutputRequirementExec\n  ScanTable: table=issue_1087, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"),
 String("physical_plan after aggregate_statistics"),String("SAME TEXT AS ABOVE"),
 String("physical_plan after join_selection"),String("SAME TEXT AS ABOVE"),
-String("physical_plan after PipelineFixer"),String("SAME TEXT AS ABOVE"),
-String("physical_plan after repartition"),String("SAME TEXT AS ABOVE"),
+String("physical_plan after LimitedDistinctAggregation"),String("SAME TEXT AS ABOVE"),
 String("physical_plan after EnforceDistribution"),String("SAME TEXT AS ABOVE"),
 String("physical_plan after CombinePartialFinalAggregate"),String("SAME TEXT AS ABOVE"),
 String("physical_plan after EnforceSorting"),String("SAME TEXT AS ABOVE"),
 String("physical_plan after coalesce_batches"),String("SAME TEXT AS ABOVE"),
+String("physical_plan after OutputRequirements"),String("ScanTable: table=issue_1087, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"),
 String("physical_plan after PipelineChecker"),String("SAME TEXT AS ABOVE"),
-String("physical_plan"),String("ScanTable: table=issue_1087, parallelism=8, priority=Low\n"),
+String("physical_plan after LimitAggregation"),String("SAME TEXT AS ABOVE"),
+String("physical_plan after ProjectionPushdown"),String("SAME TEXT AS ABOVE"),
+String("physical_plan"),String("ScanTable: table=issue_1087, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"),
+String("physical_plan_with_stats"),String("ScanTable: table=issue_1087, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), statistics=[Rows=Absent, Bytes=Absent, [(Col[0]:),(Col[1]:),(Col[2]:),(Col[3]:)]]\n"),
 
 
 DROP TABLE `issue_1087`;
diff --git a/integration_tests/cases/common/dml/issue-59.result b/integration_tests/cases/common/dml/issue-59.result
index 36d818696a..4f7544c87f 100644
--- a/integration_tests/cases/common/dml/issue-59.result
+++ b/integration_tests/cases/common/dml/issue-59.result
@@ -25,7 +25,7 @@ GROUP BY id+1;
 
 plan_type,plan,
 String("logical_plan"),String("Projection: group_alias_0 AS issue59.id + Int64(1), COUNT(alias1) AS COUNT(DISTINCT issue59.account)\n  Aggregate: groupBy=[[group_alias_0]], aggr=[[COUNT(alias1)]]\n    Aggregate: groupBy=[[CAST(issue59.id AS Int64) + Int64(1) AS group_alias_0, issue59.account AS alias1]], aggr=[[]]\n      TableScan: issue59 projection=[id, account]"),
-String("physical_plan"),String("ProjectionExec: expr=[group_alias_0@0 as issue59.id + Int64(1), COUNT(alias1)@1 as COUNT(DISTINCT issue59.account)]\n  AggregateExec: mode=SinglePartitioned, gby=[group_alias_0@0 as group_alias_0], aggr=[COUNT(alias1)]\n    AggregateExec: mode=FinalPartitioned, gby=[group_alias_0@0 as group_alias_0, alias1@1 as alias1], aggr=[]\n      AggregateExec: mode=Partial, gby=[CAST(id@0 AS Int64) + 1 as group_alias_0, account@1 as alias1], aggr=[]\n        ScanTable: table=issue59, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"),
+String("physical_plan"),String("ProjectionExec: expr=[group_alias_0@0 as issue59.id + Int64(1), COUNT(alias1)@1 as COUNT(DISTINCT issue59.account)]\n  AggregateExec: mode=FinalPartitioned, gby=[group_alias_0@0 as group_alias_0], aggr=[COUNT(alias1)]\n    CoalesceBatchesExec: target_batch_size=8192\n      RepartitionExec: partitioning=Hash([group_alias_0@0], 8), input_partitions=8\n        AggregateExec: mode=Partial, gby=[group_alias_0@0 as group_alias_0], aggr=[COUNT(alias1)]\n          AggregateExec: mode=FinalPartitioned, gby=[group_alias_0@0 as group_alias_0, alias1@1 as alias1], aggr=[]\n            CoalesceBatchesExec: target_batch_size=8192\n              RepartitionExec: partitioning=Hash([group_alias_0@0, alias1@1], 8), input_partitions=8\n                AggregateExec: mode=Partial, gby=[CAST(id@0 AS Int64) + 1 as group_alias_0, account@1 as alias1], aggr=[]\n                  ScanTable: table=issue59, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"),
 
 
 DROP TABLE IF EXISTS issue59;
diff --git a/integration_tests/cases/common/optimizer/optimizer.result b/integration_tests/cases/common/optimizer/optimizer.result
index e13dd456ce..5df9f47e68 100644
--- a/integration_tests/cases/common/optimizer/optimizer.result
+++ b/integration_tests/cases/common/optimizer/optimizer.result
@@ -10,7 +10,7 @@ EXPLAIN SELECT max(value) AS c1, avg(value) AS c2 FROM `07_optimizer_t` GROUP BY
 
 plan_type,plan,
 String("logical_plan"),String("Projection: MAX(07_optimizer_t.value) AS c1, AVG(07_optimizer_t.value) AS c2\n  Aggregate: groupBy=[[07_optimizer_t.name]], aggr=[[MAX(07_optimizer_t.value), AVG(07_optimizer_t.value)]]\n    TableScan: 07_optimizer_t projection=[name, value]"),
-String("physical_plan"),String("ProjectionExec: expr=[MAX(07_optimizer_t.value)@1 as c1, AVG(07_optimizer_t.value)@2 as c2]\n  AggregateExec: mode=SinglePartitioned, gby=[name@0 as name], aggr=[MAX(07_optimizer_t.value), AVG(07_optimizer_t.value)]\n    ScanTable: table=07_optimizer_t, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"),
+String("physical_plan"),String("ProjectionExec: expr=[MAX(07_optimizer_t.value)@1 as c1, AVG(07_optimizer_t.value)@2 as c2]\n  AggregateExec: mode=FinalPartitioned, gby=[name@0 as name], aggr=[MAX(07_optimizer_t.value), AVG(07_optimizer_t.value)]\n    CoalesceBatchesExec: target_batch_size=8192\n      RepartitionExec: partitioning=Hash([name@0], 8), input_partitions=8\n        AggregateExec: mode=Partial, gby=[name@0 as name], aggr=[MAX(07_optimizer_t.value), AVG(07_optimizer_t.value)]\n          ScanTable: table=07_optimizer_t, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"),
 
 
 DROP TABLE `07_optimizer_t`;
diff --git a/src/analytic_engine/src/row_iter/record_batch_stream.rs b/src/analytic_engine/src/row_iter/record_batch_stream.rs
index 5740d73715..49c41f2432 100644
--- a/src/analytic_engine/src/row_iter/record_batch_stream.rs
+++ b/src/analytic_engine/src/row_iter/record_batch_stream.rs
@@ -32,7 +32,7 @@ use common_types::{
 use datafusion::{
     common::ToDFSchema,
     error::DataFusionError,
-    logical_expr::utils::conjunction,
+    optimizer::utils::conjunction,
     physical_expr::{self, execution_props::ExecutionProps},
     physical_plan::PhysicalExpr,
 };
diff --git a/src/analytic_engine/src/sst/parquet/async_reader.rs b/src/analytic_engine/src/sst/parquet/async_reader.rs
index 49747b5376..94feeab2c5 100644
--- a/src/analytic_engine/src/sst/parquet/async_reader.rs
+++ b/src/analytic_engine/src/sst/parquet/async_reader.rs
@@ -219,7 +219,7 @@ impl<'a> Reader<'a> {
     ) -> Result<Option<RowSelection>> {
         // TODO: remove fixed partition
         let partition = 0;
-        let exprs = datafusion::logical_expr::utils::conjunction(self.predicate.exprs().to_vec());
+        let exprs = datafusion::optimizer::utils::conjunction(self.predicate.exprs().to_vec());
         let exprs = match exprs {
             Some(exprs) => exprs,
             None => return Ok(None),
diff --git a/src/components/parquet_ext/src/prune/min_max.rs b/src/components/parquet_ext/src/prune/min_max.rs
index 6bd3ad7496..4f5b27b22b 100644
--- a/src/components/parquet_ext/src/prune/min_max.rs
+++ b/src/components/parquet_ext/src/prune/min_max.rs
@@ -197,14 +197,6 @@ impl<'a> PruningStatistics for RowGroupPruningStatistics<'a> {
         None
     }
 
-    // TODO: support this.
-    fn contained(
-        &self,
-        _column: &Column,
-        _values: &std::collections::HashSet<ScalarValue>,
-    ) -> Option<arrow::array::BooleanArray> {
-        None
-    }
 }
 
 #[cfg(test)]
diff --git a/src/query_engine/src/datafusion_impl/mod.rs b/src/query_engine/src/datafusion_impl/mod.rs
index 3c4f18f0c5..218fbb90c9 100644
--- a/src/query_engine/src/datafusion_impl/mod.rs
+++ b/src/query_engine/src/datafusion_impl/mod.rs
@@ -24,13 +24,6 @@ use datafusion::{
         runtime_env::{RuntimeConfig, RuntimeEnv},
         FunctionRegistry,
     },
-    physical_optimizer::{
-        aggregate_statistics::AggregateStatistics, coalesce_batches::CoalesceBatches,
-        combine_partial_final_agg::CombinePartialFinalAggregate, enforce_sorting::EnforceSorting,
-        join_selection::JoinSelection, limited_distinct_aggregation::LimitedDistinctAggregation,
-        output_requirements::OutputRequirements, pipeline_checker::PipelineChecker,
-        projection_pushdown::ProjectionPushdown, topk_aggregation::TopKAggregation,
-    },
     prelude::{SessionConfig, SessionContext},
 };
 use df_engine_extensions::codec::PhysicalExtensionCodecImpl;
@@ -144,25 +137,8 @@ impl DfContextBuilder {
 
         // Using default logcial optimizer, if want to add more custom rule, using
         // `add_optimizer_rule` to add.
-        let mut state =
+        let state =
             SessionState::new_with_config_rt(df_session_config, self.runtime_env.clone());
-        state = state.with_physical_optimizer_rules(vec![
-            Arc::new(OutputRequirements::new_add_mode()),
-            Arc::new(AggregateStatistics::new()),
-            Arc::new(JoinSelection::new()),
-            Arc::new(LimitedDistinctAggregation::new()),
-            // TODO: this rule will throw this error
-            // Internal error: Children cannot be replaced in ScanTable
-            // Arc::new(EnforceDistribution::new()),
-            // Arc::new(EnforceSorting::new()),
-            Arc::new(CombinePartialFinalAggregate::new()),
-            Arc::new(CoalesceBatches::new()),
-            Arc::new(OutputRequirements::new_remove_mode()),
-            Arc::new(PipelineChecker::new()),
-            Arc::new(TopKAggregation::new()),
-            // TODO: This rule is not public, so we can't use it
-            Arc::new(ProjectionPushdown::new()),
-        ]);
         SessionContext::new_with_state(state)
     }
 }
diff --git a/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs b/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs
index c1dcb27bf2..c791024b1a 100644
--- a/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs
+++ b/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs
@@ -517,7 +517,7 @@ impl Stream for PromAlignReader {
                 if !tsid_samples.is_empty() {
                     Poll::Ready(Some(
                         self.samples_to_record_batch(schema, tsid_samples)
-                            .map_err(|err| DataFusionError::ArrowError(err, None)),
+                            .map_err(|err| DataFusionError::ArrowError(err)),
                     ))
                 } else {
                     Poll::Ready(Some(Ok(RecordBatch::new_empty(schema))))
@@ -532,7 +532,7 @@ impl Stream for PromAlignReader {
                     if !tsid_samples.is_empty() {
                         return Poll::Ready(Some(
                             self.samples_to_record_batch(schema, tsid_samples)
-                                .map_err(|err| DataFusionError::ArrowError(err, None)),
+                                .map_err(|err| DataFusionError::ArrowError(err)),
                         ));
                     }
                 }
diff --git a/src/query_frontend/src/logical_optimizer/type_conversion.rs b/src/query_frontend/src/logical_optimizer/type_conversion.rs
index 95076f33c2..eff51b0289 100644
--- a/src/query_frontend/src/logical_optimizer/type_conversion.rs
+++ b/src/query_frontend/src/logical_optimizer/type_conversion.rs
@@ -213,7 +213,7 @@ impl<'a> TypeRewriter<'a> {
         let array = value.to_array()?;
         ScalarValue::try_from_array(
             &compute::cast(&array, data_type)
-                .map_err(|err| DataFusionError::ArrowError(err, None))?,
+                .map_err(|err| DataFusionError::ArrowError(err))?,
             // index: Converts a value in `array` at `index` into a ScalarValue
             0,
         )
diff --git a/src/query_frontend/src/plan.rs b/src/query_frontend/src/plan.rs
index e5db6238eb..4ebe33e215 100644
--- a/src/query_frontend/src/plan.rs
+++ b/src/query_frontend/src/plan.rs
@@ -210,6 +210,7 @@ impl QueryPlan {
     // TODO: Currently we only consider the time range, consider other factors, such
     // as the number of series, or slow log metrics.
     pub fn decide_query_priority(&self, ctx: PriorityContext) -> Result<Option<Priority>> {
+            // return Ok(Some(Priority::High));
         let threshold = ctx.time_range_threshold;
         let time_range = match self.extract_time_range()? {
             Some(v) => v,
diff --git a/src/query_frontend/src/planner.rs b/src/query_frontend/src/planner.rs
index e5c8a583ab..8e02f5ee9e 100644
--- a/src/query_frontend/src/planner.rs
+++ b/src/query_frontend/src/planner.rs
@@ -984,7 +984,7 @@ impl<'a, P: MetaProvider> PlannerDelegate<'a, P> {
                     }
                 }
 
-                let rows = build_row_group(schema, source.unwrap(), column_index_in_insert)?;
+                let rows = build_row_group(schema, source, column_index_in_insert)?;
 
                 Ok(Plan::Insert(InsertPlan {
                     table,
diff --git a/src/query_frontend/src/promql/convert.rs b/src/query_frontend/src/promql/convert.rs
index e92e0b9922..1d4a7e498a 100644
--- a/src/query_frontend/src/promql/convert.rs
+++ b/src/query_frontend/src/promql/convert.rs
@@ -22,13 +22,13 @@ use common_types::{
     time::{TimeRange, Timestamp},
 };
 use datafusion::{
+    optimizer::utils::conjunction,
     logical_expr::{
         avg, count,
         expr::{Alias, ScalarFunction},
         lit,
         logical_plan::{Extension, LogicalPlan, LogicalPlanBuilder},
         max, min, sum,
-        utils::conjunction,
         Expr as DataFusionExpr,
     },
     prelude::ident,
diff --git a/src/query_frontend/src/promql/remote.rs b/src/query_frontend/src/promql/remote.rs
index e8fc99e8be..c3c1439ec7 100644
--- a/src/query_frontend/src/promql/remote.rs
+++ b/src/query_frontend/src/promql/remote.rs
@@ -21,7 +21,8 @@ use std::sync::Arc;
 
 use common_types::{schema::Schema, time::TimeRange};
 use datafusion::{
-    logical_expr::{utils::conjunction, LogicalPlanBuilder, Operator},
+    logical_expr::{LogicalPlanBuilder, Operator},
+    optimizer::utils::conjunction,
     prelude::{ident, lit, Expr},
     sql::{planner::ContextProvider, TableReference},
 };
diff --git a/src/table_engine/src/predicate.rs b/src/table_engine/src/predicate.rs
index e71180a0e0..b316b99e24 100644
--- a/src/table_engine/src/predicate.rs
+++ b/src/table_engine/src/predicate.rs
@@ -429,6 +429,7 @@ impl<'a> TimeRangeExtractor<'a> {
             | Expr::IsUnknown(_)
             | Expr::IsNotUnknown(_)
             | Expr::Negative(_)
+            | Expr::AggregateUDF(_)
             | Expr::Case { .. }
             | Expr::Cast { .. }
             | Expr::TryCast { .. }

From 6398c19e5d8b21c7f088298b03b29bcb6d8550d3 Mon Sep 17 00:00:00 2001
From: jiacai2050 <dev@liujiacai.net>
Date: Sun, 4 Feb 2024 17:52:25 +0800
Subject: [PATCH 13/25] fix clippy

---
 src/df_engine_extensions/src/dist_sql_query/test_util.rs    | 6 ++++--
 .../datafusion_impl/physical_plan_extension/prom_align.rs   | 4 ++--
 src/query_frontend/src/logical_optimizer/type_conversion.rs | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/df_engine_extensions/src/dist_sql_query/test_util.rs b/src/df_engine_extensions/src/dist_sql_query/test_util.rs
index c1d00fb227..c42f9e3862 100644
--- a/src/df_engine_extensions/src/dist_sql_query/test_util.rs
+++ b/src/df_engine_extensions/src/dist_sql_query/test_util.rs
@@ -262,7 +262,8 @@ impl TestContext {
                 AggregateMode::Partial,
                 self.group_by.clone(),
                 self.aggr_exprs.clone(),
-                vec![None; self.aggr_exprs.len()],
+                vec![None],
+                vec![None],
                 input,
                 input_schema.clone(),
             )
@@ -287,7 +288,8 @@ impl TestContext {
                 AggregateMode::Final,
                 final_group_by,
                 self.aggr_exprs.clone(),
-                vec![None; self.aggr_exprs.len()],
+                vec![None],
+                vec![None],
                 merge,
                 input_schema,
             )
diff --git a/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs b/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs
index c791024b1a..9fe8cc74c7 100644
--- a/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs
+++ b/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs
@@ -517,7 +517,7 @@ impl Stream for PromAlignReader {
                 if !tsid_samples.is_empty() {
                     Poll::Ready(Some(
                         self.samples_to_record_batch(schema, tsid_samples)
-                            .map_err(|err| DataFusionError::ArrowError(err)),
+                            .map_err(DataFusionError::ArrowError),
                     ))
                 } else {
                     Poll::Ready(Some(Ok(RecordBatch::new_empty(schema))))
@@ -532,7 +532,7 @@ impl Stream for PromAlignReader {
                     if !tsid_samples.is_empty() {
                         return Poll::Ready(Some(
                             self.samples_to_record_batch(schema, tsid_samples)
-                                .map_err(|err| DataFusionError::ArrowError(err)),
+                                .map_err(DataFusionError::ArrowError),
                         ));
                     }
                 }
diff --git a/src/query_frontend/src/logical_optimizer/type_conversion.rs b/src/query_frontend/src/logical_optimizer/type_conversion.rs
index eff51b0289..cdea2cc781 100644
--- a/src/query_frontend/src/logical_optimizer/type_conversion.rs
+++ b/src/query_frontend/src/logical_optimizer/type_conversion.rs
@@ -213,7 +213,7 @@ impl<'a> TypeRewriter<'a> {
         let array = value.to_array()?;
         ScalarValue::try_from_array(
             &compute::cast(&array, data_type)
-                .map_err(|err| DataFusionError::ArrowError(err))?,
+                .map_err(DataFusionError::ArrowError)?,
             // index: Converts a value in `array` at `index` into a ScalarValue
             0,
         )

From 21e43033a04c6b316975a65653d39e919f8728ad Mon Sep 17 00:00:00 2001
From: jiacai2050 <dev@liujiacai.net>
Date: Mon, 5 Feb 2024 17:38:48 +0800
Subject: [PATCH 14/25] fix fmt

---
 src/common_types/src/record_batch.rs             | 16 ++++++++++++----
 src/components/parquet_ext/src/prune/min_max.rs  |  1 -
 src/query_engine/src/datafusion_impl/mod.rs      |  3 +--
 .../src/logical_optimizer/type_conversion.rs     |  3 +--
 src/query_frontend/src/plan.rs                   |  1 -
 src/query_frontend/src/promql/convert.rs         |  5 ++---
 src/query_frontend/src/provider.rs               |  1 -
 7 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/src/common_types/src/record_batch.rs b/src/common_types/src/record_batch.rs
index 2a543ca552..058d4c41b4 100644
--- a/src/common_types/src/record_batch.rs
+++ b/src/common_types/src/record_batch.rs
@@ -24,7 +24,7 @@ use arrow::{
     compute,
     datatypes::{DataType, Field, Schema, SchemaRef as ArrowSchemaRef, TimeUnit},
     error::ArrowError,
-    record_batch::RecordBatch as ArrowRecordBatch,
+    record_batch::{RecordBatch as ArrowRecordBatch, RecordBatchOptions},
 };
 use arrow_ext::operation;
 use snafu::{ensure, Backtrace, OptionExt, ResultExt, Snafu};
@@ -128,10 +128,18 @@ impl RecordBatchData {
         let arrays = column_blocks
             .iter()
             .map(|column| column.to_arrow_array_ref())
-            .collect();
-
+            .collect::<Vec<_>>();
+
+        println!("debug column_blocks:{column_blocks:?}");
+        println!("debug column_blocks2:{:?}", column_blocks.len());
+        let mut options = RecordBatchOptions::new();
+        if let Some(len) =  arrays.first().map(|col| col.len()) {
+             options = options.with_row_count(Some(len));
+        } else {
+             options = options.with_row_count(Some(0));
+        }
         let arrow_record_batch =
-            ArrowRecordBatch::try_new(arrow_schema, arrays).context(CreateArrow)?;
+            ArrowRecordBatch::try_new_with_options(arrow_schema, arrays, &options).context(CreateArrow)?;
 
         Ok(RecordBatchData {
             arrow_record_batch,
diff --git a/src/components/parquet_ext/src/prune/min_max.rs b/src/components/parquet_ext/src/prune/min_max.rs
index 4f5b27b22b..0a717021a1 100644
--- a/src/components/parquet_ext/src/prune/min_max.rs
+++ b/src/components/parquet_ext/src/prune/min_max.rs
@@ -196,7 +196,6 @@ impl<'a> PruningStatistics for RowGroupPruningStatistics<'a> {
     fn null_counts(&self, _column: &Column) -> Option<ArrayRef> {
         None
     }
-
 }
 
 #[cfg(test)]
diff --git a/src/query_engine/src/datafusion_impl/mod.rs b/src/query_engine/src/datafusion_impl/mod.rs
index 218fbb90c9..482628f836 100644
--- a/src/query_engine/src/datafusion_impl/mod.rs
+++ b/src/query_engine/src/datafusion_impl/mod.rs
@@ -137,8 +137,7 @@ impl DfContextBuilder {
 
         // Using default logcial optimizer, if want to add more custom rule, using
         // `add_optimizer_rule` to add.
-        let state =
-            SessionState::new_with_config_rt(df_session_config, self.runtime_env.clone());
+        let state = SessionState::new_with_config_rt(df_session_config, self.runtime_env.clone());
         SessionContext::new_with_state(state)
     }
 }
diff --git a/src/query_frontend/src/logical_optimizer/type_conversion.rs b/src/query_frontend/src/logical_optimizer/type_conversion.rs
index cdea2cc781..0aeaaba207 100644
--- a/src/query_frontend/src/logical_optimizer/type_conversion.rs
+++ b/src/query_frontend/src/logical_optimizer/type_conversion.rs
@@ -212,8 +212,7 @@ impl<'a> TypeRewriter<'a> {
 
         let array = value.to_array()?;
         ScalarValue::try_from_array(
-            &compute::cast(&array, data_type)
-                .map_err(DataFusionError::ArrowError)?,
+            &compute::cast(&array, data_type).map_err(DataFusionError::ArrowError)?,
             // index: Converts a value in `array` at `index` into a ScalarValue
             0,
         )
diff --git a/src/query_frontend/src/plan.rs b/src/query_frontend/src/plan.rs
index 4ebe33e215..e5db6238eb 100644
--- a/src/query_frontend/src/plan.rs
+++ b/src/query_frontend/src/plan.rs
@@ -210,7 +210,6 @@ impl QueryPlan {
     // TODO: Currently we only consider the time range, consider other factors, such
     // as the number of series, or slow log metrics.
     pub fn decide_query_priority(&self, ctx: PriorityContext) -> Result<Option<Priority>> {
-            // return Ok(Some(Priority::High));
         let threshold = ctx.time_range_threshold;
         let time_range = match self.extract_time_range()? {
             Some(v) => v,
diff --git a/src/query_frontend/src/promql/convert.rs b/src/query_frontend/src/promql/convert.rs
index 1d4a7e498a..f364a0b101 100644
--- a/src/query_frontend/src/promql/convert.rs
+++ b/src/query_frontend/src/promql/convert.rs
@@ -22,15 +22,14 @@ use common_types::{
     time::{TimeRange, Timestamp},
 };
 use datafusion::{
-    optimizer::utils::conjunction,
     logical_expr::{
         avg, count,
         expr::{Alias, ScalarFunction},
         lit,
         logical_plan::{Extension, LogicalPlan, LogicalPlanBuilder},
-        max, min, sum,
-        Expr as DataFusionExpr,
+        max, min, sum, Expr as DataFusionExpr,
     },
+    optimizer::utils::conjunction,
     prelude::ident,
     sql::planner::ContextProvider,
 };
diff --git a/src/query_frontend/src/provider.rs b/src/query_frontend/src/provider.rs
index 5a9cdf8514..6464725405 100644
--- a/src/query_frontend/src/provider.rs
+++ b/src/query_frontend/src/provider.rs
@@ -413,7 +413,6 @@ impl<'a, P: MetaProvider> ContextProvider for ContextProviderAdapter<'a, P> {
     fn get_window_meta(&self, _name: &str) -> Option<Arc<datafusion::logical_expr::WindowUDF>> {
         None
     }
-
 }
 
 struct SchemaProviderAdapter {

From c10fe3c7aaffddd085d4db2bac0bf414c9084f3f Mon Sep 17 00:00:00 2001
From: jiacai2050 <dev@liujiacai.net>
Date: Wed, 7 Feb 2024 15:12:03 +0800
Subject: [PATCH 15/25] debug fmt

---
 .../env/cluster/ddl/partition_table.result    |  4 +-
 src/analytic_engine/src/row_iter/merge.rs     |  3 ++
 src/analytic_engine/src/table/mod.rs          |  2 +-
 src/common_types/src/record_batch.rs          | 47 +++++++++++--------
 src/interpreters/src/tests.rs                 |  6 +--
 5 files changed, 36 insertions(+), 26 deletions(-)

diff --git a/integration_tests/cases/env/cluster/ddl/partition_table.result b/integration_tests/cases/env/cluster/ddl/partition_table.result
index d376718cc7..ee935cb3c1 100644
--- a/integration_tests/cases/env/cluster/ddl/partition_table.result
+++ b/integration_tests/cases/env/cluster/ddl/partition_table.result
@@ -83,7 +83,7 @@ UInt64(16367588166920223437),Timestamp(1651737067000),String("horaedb9"),Int32(0
 EXPLAIN ANALYZE SELECT * from partition_table_t where name = "ceresdb0";
 
 plan_type,plan,
-String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:1, metrics=[\npartition_table_t:\n    __partition_table_t_1:\n        poll_duration=xxs\n        total_duration=xxs\n        wait_duration=xxs\n\n__partition_table_t_1:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n  ScanTable: table=__partition_table_t_1, parallelism=8, priority=Low, metrics=[\nPredicate { exprs:[name = Utf8(\"ceresdb0\")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=0\n        num_ssts=0\n        scan_count=1\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=0\n        total_rows_fetch_from_one=0\n        scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"),
+String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:1, metrics=[\npartition_table_t:\n    __partition_table_t_1:\n        poll_duration=xxs\n        total_duration=xxs\n        wait_duration=xxs\n\n__partition_table_t_1:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n  ScanTable: table=__partition_table_t_1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name = Utf8(\"ceresdb0\")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=0\n        num_ssts=0\n        scan_count=1\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=0\n        total_rows_fetch_from_one=0\n        scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"),
 
 
 -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx
@@ -92,7 +92,7 @@ String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:f
 EXPLAIN ANALYZE SELECT * from partition_table_t where name in ("ceresdb0", "ceresdb1", "ceresdb2", "ceresdb3", "ceresdb4");
 
 plan_type,plan,
-String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:3, metrics=[\npartition_table_t:\n    __partition_table_t_x:\n        poll_duration=xxs\n        total_duration=xxs\n        wait_duration=xxs\n    __partition_table_t_x:\n        poll_duration=xxs\n        total_duration=xxs\n        wait_duration=xxs\n    __partition_table_t_x:\n        poll_duration=xxs\n        total_duration=xxs\n        wait_duration=xxs\n\n__partition_table_t_x:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n  ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb1\"), Utf8(\"ceresdb2\"), Utf8(\"ceresdb3\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=0\n        num_ssts=0\n        scan_count=1\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=0\n        total_rows_fetch_from_one=0\n        scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n\n__partition_table_t_x:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n  ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb1\"), Utf8(\"ceresdb2\"), Utf8(\"ceresdb3\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=0\n        num_ssts=0\n        scan_count=1\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=0\n        total_rows_fetch_from_one=0\n        scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n\n__partition_table_t_x:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n  ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb1\"), Utf8(\"ceresdb2\"), Utf8(\"ceresdb3\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=0\n        num_ssts=0\n        scan_count=1\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=0\n        total_rows_fetch_from_one=0\n        scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"),
+String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:3, metrics=[\npartition_table_t:\n    __partition_table_t_x:\n        poll_duration=xxs\n        total_duration=xxs\n        wait_duration=xxs\n    __partition_table_t_x:\n        poll_duration=xxs\n        total_duration=xxs\n        wait_duration=xxs\n    __partition_table_t_x:\n        poll_duration=xxs\n        total_duration=xxs\n        wait_duration=xxs\n\n__partition_table_t_x:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n  ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb1\"), Utf8(\"ceresdb2\"), Utf8(\"ceresdb3\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=0\n        num_ssts=0\n        scan_count=1\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=0\n        total_rows_fetch_from_one=0\n        scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n\n__partition_table_t_x:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n  ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb1\"), Utf8(\"ceresdb2\"), Utf8(\"ceresdb3\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=0\n        num_ssts=0\n        scan_count=1\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=0\n        total_rows_fetch_from_one=0\n        scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n\n__partition_table_t_x:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n  ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb1\"), Utf8(\"ceresdb2\"), Utf8(\"ceresdb3\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=0\n        num_ssts=0\n        scan_count=1\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=0\n        total_rows_fetch_from_one=0\n        scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"),
 
 
 ALTER TABLE partition_table_t ADD COLUMN (b string);
diff --git a/src/analytic_engine/src/row_iter/merge.rs b/src/analytic_engine/src/row_iter/merge.rs
index 88f58c2df9..67a2f4045d 100644
--- a/src/analytic_engine/src/row_iter/merge.rs
+++ b/src/analytic_engine/src/row_iter/merge.rs
@@ -486,6 +486,7 @@ impl BufferedStream {
                 Ok(false)
             }
             Some(record_batch) => {
+                logger::info!("if necessary: {record_batch:?}");
                 self.state.as_mut().unwrap().reset(record_batch);
                 Ok(true)
             }
@@ -844,6 +845,7 @@ impl MergeIterator {
 
             None
         };
+        logger::info!("debug fetch rows:{record_batch:?}");
 
         self.reheap(buffered_stream).await?;
 
@@ -873,6 +875,7 @@ impl MergeIterator {
 
         self.record_batch_builder.clear();
 
+        logger::info!("fetch next batch, {}", self.record_batch_builder.len());
         while !self.hot.is_empty() && self.record_batch_builder.len() < self.iter_options.batch_size
         {
             // no need to do merge sort if only one batch in the hot heap.
diff --git a/src/analytic_engine/src/table/mod.rs b/src/analytic_engine/src/table/mod.rs
index af381b5baa..e6dfd1dc2f 100644
--- a/src/analytic_engine/src/table/mod.rs
+++ b/src/analytic_engine/src/table/mod.rs
@@ -432,7 +432,7 @@ pub fn support_pushdown(schema: &Schema, need_dedup: bool, col_names: &[String])
     // When table need dedup, only unique keys columns support pushdown
     col_names
         .iter()
-        .all(|col_name| schema.is_unique_column(col_name.as_str()))
+        .all(|col_name| !schema.is_unique_column(col_name.as_str()))
 }
 
 #[async_trait]
diff --git a/src/common_types/src/record_batch.rs b/src/common_types/src/record_batch.rs
index 058d4c41b4..6634c71e80 100644
--- a/src/common_types/src/record_batch.rs
+++ b/src/common_types/src/record_batch.rs
@@ -124,22 +124,18 @@ pub struct RecordBatchData {
 }
 
 impl RecordBatchData {
-    fn new(arrow_schema: ArrowSchemaRef, column_blocks: Vec<ColumnBlock>) -> Result<Self> {
+    fn new(
+        arrow_schema: ArrowSchemaRef,
+        column_blocks: Vec<ColumnBlock>,
+        options: RecordBatchOptions,
+    ) -> Result<Self> {
         let arrays = column_blocks
             .iter()
             .map(|column| column.to_arrow_array_ref())
             .collect::<Vec<_>>();
-
-        println!("debug column_blocks:{column_blocks:?}");
-        println!("debug column_blocks2:{:?}", column_blocks.len());
-        let mut options = RecordBatchOptions::new();
-        if let Some(len) =  arrays.first().map(|col| col.len()) {
-             options = options.with_row_count(Some(len));
-        } else {
-             options = options.with_row_count(Some(0));
-        }
         let arrow_record_batch =
-            ArrowRecordBatch::try_new_with_options(arrow_schema, arrays, &options).context(CreateArrow)?;
+            ArrowRecordBatch::try_new_with_options(arrow_schema, arrays, &options)
+                .context(CreateArrow)?;
 
         Ok(RecordBatchData {
             arrow_record_batch,
@@ -148,10 +144,7 @@ impl RecordBatchData {
     }
 
     fn num_rows(&self) -> usize {
-        self.column_blocks
-            .first()
-            .map(|column| column.num_rows())
-            .unwrap_or(0)
+        self.arrow_record_batch.num_rows()
     }
 
     fn take_column_block(&mut self, index: usize) -> ColumnBlock {
@@ -238,6 +231,11 @@ impl RecordBatch {
     pub fn new(schema: RecordSchema, column_blocks: Vec<ColumnBlock>) -> Result<Self> {
         ensure!(schema.num_columns() == column_blocks.len(), SchemaLen);
 
+        let num_rows = column_blocks
+            .first()
+            .map(|block| block.num_rows())
+            .unwrap_or_default();
+        let options = RecordBatchOptions::new().with_row_count(Some(num_rows));
         // Validate schema and column_blocks.
         for (column_schema, column_block) in schema.columns().iter().zip(column_blocks.iter()) {
             ensure!(
@@ -251,7 +249,7 @@ impl RecordBatch {
         }
 
         let arrow_schema = schema.to_arrow_schema_ref();
-        let data = RecordBatchData::new(arrow_schema, column_blocks)?;
+        let data = RecordBatchData::new(arrow_schema, column_blocks, options)?;
 
         Ok(Self { schema, data })
     }
@@ -396,6 +394,7 @@ impl FetchedRecordBatch {
         let mut column_blocks = Vec::with_capacity(fetched_schema.num_columns());
         let num_rows = arrow_record_batch.num_rows();
         let num_columns = arrow_record_batch.num_columns();
+        let options = RecordBatchOptions::new().with_row_count(Some(num_rows));
         for (col_idx_opt, col_schema) in column_indexes.iter().zip(fetched_schema.columns()) {
             match col_idx_opt {
                 Some(col_idx) => {
@@ -427,7 +426,8 @@ impl FetchedRecordBatch {
             }
         }
 
-        let data = RecordBatchData::new(fetched_schema.to_arrow_schema_ref(), column_blocks)?;
+        let data =
+            RecordBatchData::new(fetched_schema.to_arrow_schema_ref(), column_blocks, options)?;
 
         Ok(FetchedRecordBatch {
             schema: fetched_schema,
@@ -479,6 +479,8 @@ impl FetchedRecordBatch {
         // Get the schema after projection.
         let record_schema = projected_schema.to_record_schema();
         let mut column_blocks = Vec::with_capacity(record_schema.num_columns());
+        let num_rows = self.data.num_rows();
+        let options = RecordBatchOptions::new().with_row_count(Some(num_rows));
 
         for column_schema in record_schema.columns() {
             let column_index =
@@ -493,8 +495,8 @@ impl FetchedRecordBatch {
             column_blocks.push(column_block);
         }
 
-        let data = RecordBatchData::new(record_schema.to_arrow_schema_ref(), column_blocks)?;
-
+        let data =
+            RecordBatchData::new(record_schema.to_arrow_schema_ref(), column_blocks, options)?;
         Ok(RecordBatch {
             schema: record_schema,
             data,
@@ -733,11 +735,16 @@ impl FetchedRecordBatchBuilder {
             .map(|builder| builder.build())
             .collect();
         let arrow_schema = self.fetched_schema.to_arrow_schema_ref();
+        let num_rows = column_blocks
+            .first()
+            .map(|block| block.num_rows())
+            .unwrap_or_default();
+        let options = RecordBatchOptions::new().with_row_count(Some(num_rows));
 
         Ok(FetchedRecordBatch {
             schema: self.fetched_schema.clone(),
             primary_key_indexes: self.primary_key_indexes.clone(),
-            data: RecordBatchData::new(arrow_schema, column_blocks)?,
+            data: RecordBatchData::new(arrow_schema, column_blocks, options)?,
         })
     }
 }
diff --git a/src/interpreters/src/tests.rs b/src/interpreters/src/tests.rs
index 6d521738f7..a69944269b 100644
--- a/src/interpreters/src/tests.rs
+++ b/src/interpreters/src/tests.rs
@@ -117,7 +117,7 @@ where
             .enable_partition_table_access(enable_partition_table_access)
             .build();
         let sql= format!("CREATE TABLE IF NOT EXISTS {table_name}(c1 string tag not null,ts timestamp not null, c3 string, timestamp key(ts),primary key(c1, ts)) \
-        ENGINE=Analytic WITH (ttl='70d',update_mode='overwrite',arena_block_size='1KB')");
+        ENGINE=Analytic WITH (enable_ttl='false',update_mode='overwrite',arena_block_size='1KB')");
 
         let output = self.sql_to_output_with_context(&sql, ctx).await?;
         assert!(
@@ -156,7 +156,7 @@ where
             .default_catalog_and_schema(DEFAULT_CATALOG.to_string(), DEFAULT_SCHEMA.to_string())
             .enable_partition_table_access(enable_partition_table_access)
             .build();
-        let sql = format!("select * from {table_name}");
+        let sql = format!("explain analyze select * from {table_name}");
         let output = self.sql_to_output_with_context(&sql, ctx).await?;
         let records = output.try_into().unwrap();
         let expected = vec![
@@ -169,7 +169,7 @@ where
         ];
         test_util::assert_record_batches_eq(&expected, records);
 
-        let sql = "select count(*) from test_table";
+        let sql = "explain analyze select count(*) from test_table";
         let output = self.sql_to_output(sql).await?;
         let records = output.try_into().unwrap();
         let expected = vec![

From 93dafe9203cdec87b28ac9cc35c0f98d9e2ad23e Mon Sep 17 00:00:00 2001
From: jiacai2050 <dev@liujiacai.net>
Date: Tue, 20 Feb 2024 20:54:46 +0800
Subject: [PATCH 16/25] fix test_interpreters_rocks

---
 src/common_types/src/record_batch.rs | 11 +++++------
 src/interpreters/src/tests.rs        | 18 +++++++++---------
 src/proxy/src/grpc/prom_query.rs     |  2 +-
 src/proxy/src/influxdb/types.rs      |  2 +-
 src/table_engine/src/memory.rs       |  2 +-
 5 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/src/common_types/src/record_batch.rs b/src/common_types/src/record_batch.rs
index 6634c71e80..2c787be465 100644
--- a/src/common_types/src/record_batch.rs
+++ b/src/common_types/src/record_batch.rs
@@ -228,13 +228,12 @@ impl RecordBatch {
         }
     }
 
-    pub fn new(schema: RecordSchema, column_blocks: Vec<ColumnBlock>) -> Result<Self> {
+    pub fn new(
+        schema: RecordSchema,
+        column_blocks: Vec<ColumnBlock>,
+        num_rows: usize,
+    ) -> Result<Self> {
         ensure!(schema.num_columns() == column_blocks.len(), SchemaLen);
-
-        let num_rows = column_blocks
-            .first()
-            .map(|block| block.num_rows())
-            .unwrap_or_default();
         let options = RecordBatchOptions::new().with_row_count(Some(num_rows));
         // Validate schema and column_blocks.
         for (column_schema, column_block) in schema.columns().iter().zip(column_blocks.iter()) {
diff --git a/src/interpreters/src/tests.rs b/src/interpreters/src/tests.rs
index a69944269b..f9c8c75bd9 100644
--- a/src/interpreters/src/tests.rs
+++ b/src/interpreters/src/tests.rs
@@ -156,8 +156,8 @@ where
             .default_catalog_and_schema(DEFAULT_CATALOG.to_string(), DEFAULT_SCHEMA.to_string())
             .enable_partition_table_access(enable_partition_table_access)
             .build();
-        let sql = format!("explain analyze select * from {table_name}");
-        let output = self.sql_to_output_with_context(&sql, ctx).await?;
+        let sql = format!("select * from {table_name}");
+        let output = self.sql_to_output_with_context(&sql, ctx.clone()).await?;
         let records = output.try_into().unwrap();
         let expected = vec![
             "+------------+---------------------+--------+--------+------------+--------------+",
@@ -169,15 +169,15 @@ where
         ];
         test_util::assert_record_batches_eq(&expected, records);
 
-        let sql = "explain analyze select count(*) from test_table";
-        let output = self.sql_to_output(sql).await?;
+        let sql = format!("select count(*) from {table_name}");
+        let output = self.sql_to_output_with_context(&sql, ctx).await?;
         let records = output.try_into().unwrap();
         let expected = vec![
-            "+-----------------+",
-            "| COUNT(UInt8(1)) |",
-            "+-----------------+",
-            "| 2               |",
-            "+-----------------+",
+            "+----------+",
+            "| COUNT(*) |",
+            "+----------+",
+            "| 2        |",
+            "+----------+",
         ];
         test_util::assert_record_batches_eq(&expected, records);
 
diff --git a/src/proxy/src/grpc/prom_query.rs b/src/proxy/src/grpc/prom_query.rs
index 1c999ad0c0..673b6131a5 100644
--- a/src/proxy/src/grpc/prom_query.rs
+++ b/src/proxy/src/grpc/prom_query.rs
@@ -471,7 +471,7 @@ mod tests {
         let schema = build_schema();
         let record_schema = schema.to_record_schema();
         let column_blocks = build_column_block();
-        let record_batch = RecordBatch::new(record_schema, column_blocks).unwrap();
+        let record_batch = RecordBatch::new(record_schema, column_blocks, 4).unwrap();
 
         let column_name = ColumnNames {
             timestamp: "timestamp".to_string(),
diff --git a/src/proxy/src/influxdb/types.rs b/src/proxy/src/influxdb/types.rs
index 117b5cf31c..cd2b229d6d 100644
--- a/src/proxy/src/influxdb/types.rs
+++ b/src/proxy/src/influxdb/types.rs
@@ -744,7 +744,7 @@ mod tests {
     fn test_influxql_result() {
         let record_schema = build_test_record_schema();
         let column_blocks = build_test_column_blocks();
-        let record_batch = RecordBatch::new(record_schema, column_blocks).unwrap();
+        let record_batch = RecordBatch::new(record_schema, column_blocks, 4).unwrap();
 
         let mut builder = InfluxqlResultBuilder::new(record_batch.schema(), 0).unwrap();
         builder.add_record_batch(record_batch).unwrap();
diff --git a/src/table_engine/src/memory.rs b/src/table_engine/src/memory.rs
index 689677052a..20cfe583e4 100644
--- a/src/table_engine/src/memory.rs
+++ b/src/table_engine/src/memory.rs
@@ -260,7 +260,7 @@ fn row_group_to_record_batch(
         column_blocks.push(column_block);
     }
 
-    RecordBatch::new(record_schema.clone(), column_blocks)
+    RecordBatch::new(record_schema.clone(), column_blocks, rows.num_rows())
         .box_err()
         .context(ErrWithSource {
             msg: "failed to create RecordBatch",

From c9ba7a5d28a276d8fe7d8001dd2a515365de21ab Mon Sep 17 00:00:00 2001
From: jiacai2050 <dev@liujiacai.net>
Date: Tue, 20 Feb 2024 21:35:38 +0800
Subject: [PATCH 17/25] fix integration test

---
 integration_tests/cases/common/dml/issue-341.result       | 8 ++++----
 .../cases/env/cluster/ddl/partition_table.result          | 8 ++++++--
 .../cases/env/cluster/ddl/partition_table.sql             | 4 ++++
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/integration_tests/cases/common/dml/issue-341.result b/integration_tests/cases/common/dml/issue-341.result
index 4e42d84c80..a68d4f5133 100644
--- a/integration_tests/cases/common/dml/issue-341.result
+++ b/integration_tests/cases/common/dml/issue-341.result
@@ -115,8 +115,8 @@ WHERE
     `value` = 3;
 
 plan_type,plan,
-String("logical_plan"),String("Filter: issue341_t2.value = Float64(3)\n  TableScan: issue341_t2 projection=[timestamp, value], partial_filters=[issue341_t2.value = Float64(3)]"),
-String("physical_plan"),String("CoalesceBatchesExec: target_batch_size=8192\n  FilterExec: value@1 = 3\n    ScanTable: table=issue341_t2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"),
+String("logical_plan"),String("TableScan: issue341_t2 projection=[timestamp, value], full_filters=[issue341_t2.value = Float64(3)]"),
+String("physical_plan"),String("ScanTable: table=issue341_t2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"),
 
 
 -- When using tag as filter, FilterExec node should not be in plan.
@@ -129,8 +129,8 @@ WHERE
     tag1 = "t3";
 
 plan_type,plan,
-String("logical_plan"),String("TableScan: issue341_t2 projection=[timestamp, value], full_filters=[issue341_t2.tag1 = Utf8(\"t3\")]"),
-String("physical_plan"),String("ScanTable: table=issue341_t2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(1)\n"),
+String("logical_plan"),String("Projection: issue341_t2.timestamp, issue341_t2.value\n  Filter: issue341_t2.tag1 = Utf8(\"t3\")\n    TableScan: issue341_t2 projection=[timestamp, value, tag1], partial_filters=[issue341_t2.tag1 = Utf8(\"t3\")]"),
+String("physical_plan"),String("ProjectionExec: expr=[timestamp@0 as timestamp, value@1 as value]\n  CoalesceBatchesExec: target_batch_size=8192\n    FilterExec: tag1@2 = t3\n      ScanTable: table=issue341_t2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"),
 
 
 DROP TABLE IF EXISTS `issue341_t1`;
diff --git a/integration_tests/cases/env/cluster/ddl/partition_table.result b/integration_tests/cases/env/cluster/ddl/partition_table.result
index ee935cb3c1..3f7bb7d8bb 100644
--- a/integration_tests/cases/env/cluster/ddl/partition_table.result
+++ b/integration_tests/cases/env/cluster/ddl/partition_table.result
@@ -80,19 +80,23 @@ UInt64(16367588166920223437),Timestamp(1651737067000),String("horaedb9"),Int32(0
 
 -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx
 -- SQLNESS REPLACE compute=\d+.?\d*(µ|m|n) compute=xx
+-- SQLNESS REPLACE time=\d+.?\d*(µ|m|n) time=xx
+-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
 EXPLAIN ANALYZE SELECT * from partition_table_t where name = "ceresdb0";
 
 plan_type,plan,
-String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:1, metrics=[\npartition_table_t:\n    __partition_table_t_1:\n        poll_duration=xxs\n        total_duration=xxs\n        wait_duration=xxs\n\n__partition_table_t_1:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n  ScanTable: table=__partition_table_t_1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name = Utf8(\"ceresdb0\")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=0\n        num_ssts=0\n        scan_count=1\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=0\n        total_rows_fetch_from_one=0\n        scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"),
+String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:4, metrics=xx\n  CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n    FilterExec: name@2 = ceresdb0, metrics=xx\n      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n        ScanTable: table=__partition_table_t_0, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n  CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n    FilterExec: name@2 = ceresdb0, metrics=xx\n      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n        ScanTable: table=__partition_table_t_1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n  CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n    FilterExec: name@2 = ceresdb0, metrics=xx\n      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n        ScanTable: table=__partition_table_t_2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n  CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n    FilterExec: name@2 = ceresdb0, metrics=xx\n      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n        ScanTable: table=__partition_table_t_3, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=0\n        num_ssts=0\n        scan_count=2\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=1\n        total_rows_fetch_from_one=1\n        scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"),
 
 
 -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx
 -- SQLNESS REPLACE compute=\d+.?\d*(µ|m|n) compute=xx
 -- SQLNESS REPLACE __partition_table_t_\d __partition_table_t_x
+-- SQLNESS REPLACE time=\d+.?\d*(µ|m|n) time=xx
+-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
 EXPLAIN ANALYZE SELECT * from partition_table_t where name in ("ceresdb0", "ceresdb1", "ceresdb2", "ceresdb3", "ceresdb4");
 
 plan_type,plan,
-String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:3, metrics=[\npartition_table_t:\n    __partition_table_t_x:\n        poll_duration=xxs\n        total_duration=xxs\n        wait_duration=xxs\n    __partition_table_t_x:\n        poll_duration=xxs\n        total_duration=xxs\n        wait_duration=xxs\n    __partition_table_t_x:\n        poll_duration=xxs\n        total_duration=xxs\n        wait_duration=xxs\n\n__partition_table_t_x:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n  ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb1\"), Utf8(\"ceresdb2\"), Utf8(\"ceresdb3\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=0\n        num_ssts=0\n        scan_count=1\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=0\n        total_rows_fetch_from_one=0\n        scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n\n__partition_table_t_x:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n  ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb1\"), Utf8(\"ceresdb2\"), Utf8(\"ceresdb3\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=0\n        num_ssts=0\n        scan_count=1\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=0\n        total_rows_fetch_from_one=0\n        scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n\n__partition_table_t_x:\nCoalescePartitionsExec, metrics=[output_rows=0, elapsed_compute=xxs]\n  ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb1\"), Utf8(\"ceresdb2\"), Utf8(\"ceresdb3\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=0\n        num_ssts=0\n        scan_count=1\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=0\n        total_rows_fetch_from_one=0\n        scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"),
+String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:4, metrics=xx\n  CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n    FilterExec: Use name@2 IN (SET) ([Literal { value: Utf8(\"ceresdb0\") }, Literal { value: Utf8(\"ceresdb1\") }, Literal { value: Utf8(\"ceresdb2\") }, Literal { value: Utf8(\"ceresdb3\") }, Literal { value: Utf8(\"ceresdb4\") }]), metrics=xx\n      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n        ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n  CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n    FilterExec: Use name@2 IN (SET) ([Literal { value: Utf8(\"ceresdb0\") }, Literal { value: Utf8(\"ceresdb1\") }, Literal { value: Utf8(\"ceresdb2\") }, Literal { value: Utf8(\"ceresdb3\") }, Literal { value: Utf8(\"ceresdb4\") }]), metrics=xx\n      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n        ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n  CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n    FilterExec: Use name@2 IN (SET) ([Literal { value: Utf8(\"ceresdb0\") }, Literal { value: Utf8(\"ceresdb1\") }, Literal { value: Utf8(\"ceresdb2\") }, Literal { value: Utf8(\"ceresdb3\") }, Literal { value: Utf8(\"ceresdb4\") }]), metrics=xx\n      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n        ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n  CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n    FilterExec: Use name@2 IN (SET) ([Literal { value: Utf8(\"ceresdb0\") }, Literal { value: Utf8(\"ceresdb1\") }, Literal { value: Utf8(\"ceresdb2\") }, Literal { value: Utf8(\"ceresdb3\") }, Literal { value: Utf8(\"ceresdb4\") }]), metrics=xx\n      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n        ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=0\n        num_ssts=0\n        scan_count=2\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=1\n        total_rows_fetch_from_one=1\n        scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"),
 
 
 ALTER TABLE partition_table_t ADD COLUMN (b string);
diff --git a/integration_tests/cases/env/cluster/ddl/partition_table.sql b/integration_tests/cases/env/cluster/ddl/partition_table.sql
index a36b59ac2d..a87dfbb2cd 100644
--- a/integration_tests/cases/env/cluster/ddl/partition_table.sql
+++ b/integration_tests/cases/env/cluster/ddl/partition_table.sql
@@ -37,11 +37,15 @@ SELECT * from partition_table_t where name in ("horaedb5", "horaedb6", "horaedb7
 
 -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx
 -- SQLNESS REPLACE compute=\d+.?\d*(µ|m|n) compute=xx
+-- SQLNESS REPLACE time=\d+.?\d*(µ|m|n) time=xx
+-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
 EXPLAIN ANALYZE SELECT * from partition_table_t where name = "ceresdb0";
 
 -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx
 -- SQLNESS REPLACE compute=\d+.?\d*(µ|m|n) compute=xx
 -- SQLNESS REPLACE __partition_table_t_\d __partition_table_t_x
+-- SQLNESS REPLACE time=\d+.?\d*(µ|m|n) time=xx
+-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
 EXPLAIN ANALYZE SELECT * from partition_table_t where name in ("ceresdb0", "ceresdb1", "ceresdb2", "ceresdb3", "ceresdb4");
 
 ALTER TABLE partition_table_t ADD COLUMN (b string);

From be2ed54e4474a3b37b4e58b2849ddea7b9177607 Mon Sep 17 00:00:00 2001
From: jiacai2050 <dev@liujiacai.net>
Date: Wed, 21 Feb 2024 10:18:25 +0800
Subject: [PATCH 18/25] update query plan

---
 .../cases/env/local/ddl/query-plan.result     | 19 ++++++++++++++-----
 .../cases/env/local/ddl/query-plan.sql        |  9 +++++++++
 2 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/integration_tests/cases/env/local/ddl/query-plan.result b/integration_tests/cases/env/local/ddl/query-plan.result
index 917767bf02..f471cc3ced 100644
--- a/integration_tests/cases/env/local/ddl/query-plan.result
+++ b/integration_tests/cases/env/local/ddl/query-plan.result
@@ -27,48 +27,53 @@ affected_rows: 3
 
 -- This query should include memtable
 -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx
+-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
 explain analyze select t from `03_dml_select_real_time_range`
 where t > 1695348001000;
 
 plan_type,plan,
-String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=1\n        num_ssts=0\n        scan_count=2\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=1\n        total_rows_fetch_from_one=1\n        scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"),
+String("Plan with Metrics"),String("CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n  FilterExec: t@0 > 1695348001000, metrics=xx\n    ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=1\n        num_ssts=0\n        scan_count=2\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=1\n        total_rows_fetch_from_one=3\n        scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"),
 
 
 -- This query should have higher priority
 -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx
+-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
 explain analyze select t from `03_dml_select_real_time_range`
 where t >= 1695348001000 and t < 1695348002000;
 
 plan_type,plan,
-String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=High, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), t < TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=1\n        num_ssts=0\n        scan_count=2\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=1\n        total_rows_fetch_from_one=1\n        scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"),
+String("Plan with Metrics"),String("CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n  FilterExec: t@0 >= 1695348001000 AND t@0 < 1695348002000, metrics=xx\n    ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=High, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=1\n        num_ssts=0\n        scan_count=2\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=1\n        total_rows_fetch_from_one=3\n        scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"),
 
 
 -- This query should not include memtable
 -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx
+-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
 explain analyze select t from `03_dml_select_real_time_range`
 where t > 1695348002000;
 
 plan_type,plan,
-String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=0\n=0]\n"),
+String("Plan with Metrics"),String("CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n  FilterExec: t@0 > 1695348002000, metrics=xx\n    ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=0\n=0]\n"),
 
 
 -- SQLNESS ARG pre_cmd=flush
 -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx
 -- SQLNESS REPLACE project_record_batch=\d+.?\d*(µ|m|n) project_record_batch=xx
+-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
 -- This query should include SST
 explain analyze select t from `03_dml_select_real_time_range`
 where t > 1695348001000;
 
 plan_type,plan,
-String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=0\n        num_ssts=1\n        scan_count=2\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=1\n        total_rows_fetch_from_one=1\n        scan_sst_1, fetched_columns:[tsid,t]:\n            meta_data_cache_hit=false\n            parallelism=1\n            project_record_batch=xxs\n            read_meta_data_duration=xxs\n            row_mem=320\n            row_num=3\n            prune_row_groups:\n                pruned_by_custom_filter=0\n                pruned_by_min_max=0\n                row_groups_after_prune=1\n                total_row_groups=1\n                use_custom_filter=false\n=0]\n"),
+String("Plan with Metrics"),String("CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n  FilterExec: t@0 > 1695348001000, metrics=xx\n    ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=0\n        num_ssts=1\n        scan_count=2\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=1\n        total_rows_fetch_from_one=3\n        scan_sst_1, fetched_columns:[tsid,t]:\n            meta_data_cache_hit=false\n            parallelism=1\n            project_record_batch=xxs\n            read_meta_data_duration=xxs\n            row_mem=320\n            row_num=3\n            prune_row_groups:\n                pruned_by_custom_filter=0\n                pruned_by_min_max=0\n                row_groups_after_prune=1\n                total_row_groups=1\n                use_custom_filter=false\n=0]\n"),
 
 
 -- This query should not include SST
+-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
 explain analyze select t from `03_dml_select_real_time_range`
 where t > 1695348002000;
 
 plan_type,plan,
-String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=0\n=0]\n"),
+String("Plan with Metrics"),String("CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n  FilterExec: t@0 > 1695348002000, metrics=xx\n    ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=0\n=0]\n"),
 
 
 -- Table with an 'append' update mode
@@ -97,6 +102,7 @@ affected_rows: 3
 -- SQLNESS REPLACE since_create=\d+.?\d*(µ|m|n) since_create=xx
 -- SQLNESS REPLACE since_init=\d+.?\d*(µ|m|n) since_init=xx
 -- SQLNESS REPLACE elapsed_compute=\d+.?\d*(µ|m|n) elapsed_compute=xx
+-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
 explain analyze select t from `03_append_mode_table`
 where t >= 1695348001000 and name = 'ceresdb';
 
@@ -109,6 +115,7 @@ Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to ex
 -- SQLNESS REPLACE since_init=\d+.?\d*(µ|m|n) since_init=xx
 -- SQLNESS REPLACE elapsed_compute=\d+.?\d*(µ|m|n) elapsed_compute=xx
 -- SQLNESS REPLACE project_record_batch=\d+.?\d*(µ|m|n) project_record_batch=xx
+-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
 explain analyze select t from `03_append_mode_table`
 where t >= 1695348001000 and name = 'ceresdb';
 
@@ -128,6 +135,7 @@ affected_rows: 0
 
 -- This query should have higher priority
 -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx
+-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
 explain analyze select TS from `TEST_QUERY_PRIORITY`
 where TS >= 1695348001000 and TS < 1695348002000;
 
@@ -137,6 +145,7 @@ String("Plan with Metrics"),String("ScanTable: table=TEST_QUERY_PRIORITY, parall
 
 -- This query should have higher priority
 -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx
+-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
 explain analyze select TS from `TEST_QUERY_PRIORITY`
 where TS >= 1695348001000;
 
diff --git a/integration_tests/cases/env/local/ddl/query-plan.sql b/integration_tests/cases/env/local/ddl/query-plan.sql
index 218e0f7ba1..5217b1a076 100644
--- a/integration_tests/cases/env/local/ddl/query-plan.sql
+++ b/integration_tests/cases/env/local/ddl/query-plan.sql
@@ -18,27 +18,32 @@ INSERT INTO `03_dml_select_real_time_range` (t, name, value)
 
 -- This query should include memtable
 -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx
+-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
 explain analyze select t from `03_dml_select_real_time_range`
 where t > 1695348001000;
 
 -- This query should have higher priority
 -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx
+-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
 explain analyze select t from `03_dml_select_real_time_range`
 where t >= 1695348001000 and t < 1695348002000;
 
 -- This query should not include memtable
 -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx
+-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
 explain analyze select t from `03_dml_select_real_time_range`
 where t > 1695348002000;
 
 -- SQLNESS ARG pre_cmd=flush
 -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx
 -- SQLNESS REPLACE project_record_batch=\d+.?\d*(µ|m|n) project_record_batch=xx
+-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
 -- This query should include SST
 explain analyze select t from `03_dml_select_real_time_range`
 where t > 1695348001000;
 
 -- This query should not include SST
+-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
 explain analyze select t from `03_dml_select_real_time_range`
 where t > 1695348002000;
 
@@ -64,6 +69,7 @@ INSERT INTO `03_append_mode_table` (t, name, value)
 -- SQLNESS REPLACE since_create=\d+.?\d*(µ|m|n) since_create=xx
 -- SQLNESS REPLACE since_init=\d+.?\d*(µ|m|n) since_init=xx
 -- SQLNESS REPLACE elapsed_compute=\d+.?\d*(µ|m|n) elapsed_compute=xx
+-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
 explain analyze select t from `03_append_mode_table`
 where t >= 1695348001000 and name = 'ceresdb';
 
@@ -74,6 +80,7 @@ where t >= 1695348001000 and name = 'ceresdb';
 -- SQLNESS REPLACE since_init=\d+.?\d*(µ|m|n) since_init=xx
 -- SQLNESS REPLACE elapsed_compute=\d+.?\d*(µ|m|n) elapsed_compute=xx
 -- SQLNESS REPLACE project_record_batch=\d+.?\d*(µ|m|n) project_record_batch=xx
+-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
 explain analyze select t from `03_append_mode_table`
 where t >= 1695348001000 and name = 'ceresdb';
 
@@ -89,11 +96,13 @@ CREATE TABLE `TEST_QUERY_PRIORITY` (
 
 -- This query should have higher priority
 -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx
+-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
 explain analyze select TS from `TEST_QUERY_PRIORITY`
 where TS >= 1695348001000 and TS < 1695348002000;
 
 -- This query should have higher priority
 -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx
+-- SQLNESS REPLACE metrics=\[.*?s\] metrics=xx
 explain analyze select TS from `TEST_QUERY_PRIORITY`
 where TS >= 1695348001000;
 

From f51d460ad79d836a4de0c3fadad0ef5ba0b92114 Mon Sep 17 00:00:00 2001
From: jiacai2050 <dev@liujiacai.net>
Date: Wed, 21 Feb 2024 22:16:42 +0800
Subject: [PATCH 19/25] fix missing column

---
 src/analytic_engine/src/table/mod.rs |  2 +-
 src/table_engine/src/provider.rs     | 30 +++++++++++++++++++++++++++-
 2 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/src/analytic_engine/src/table/mod.rs b/src/analytic_engine/src/table/mod.rs
index e6dfd1dc2f..82b74954f4 100644
--- a/src/analytic_engine/src/table/mod.rs
+++ b/src/analytic_engine/src/table/mod.rs
@@ -429,7 +429,7 @@ pub fn support_pushdown(schema: &Schema, need_dedup: bool, col_names: &[String])
         return true;
     }
 
-    // When table need dedup, only unique keys columns support pushdown
+    // When table need dedup, only non-unique keys columns support pushdown
     col_names
         .iter()
         .all(|col_name| !schema.is_unique_column(col_name.as_str()))
diff --git a/src/table_engine/src/provider.rs b/src/table_engine/src/provider.rs
index 63e5cc7d22..7f2a00d1b8 100644
--- a/src/table_engine/src/provider.rs
+++ b/src/table_engine/src/provider.rs
@@ -19,6 +19,7 @@
 
 use std::{
     any::Any,
+    collections::HashSet,
     fmt,
     sync::{Arc, Mutex},
     time::{Duration, Instant},
@@ -28,6 +29,7 @@ use arrow::datatypes::SchemaRef;
 use async_trait::async_trait;
 use common_types::{projected_schema::ProjectedSchema, request_id::RequestId, schema::Schema};
 use datafusion::{
+    common::tree_node::{TreeNode, VisitRecursion},
     config::{ConfigEntry, ConfigExtension, ExtensionOptions},
     datasource::TableProvider,
     error::{DataFusionError, Result},
@@ -230,9 +232,17 @@ impl<B: TableScanBuilder> TableProviderAdapter<B> {
             priority,
         );
 
+        let all_projections = if let Some(proj) = projection {
+            let mut all_projections =
+                collect_projection_from_expr(filters, &self.current_table_schema);
+            all_projections.extend(proj);
+            Some(all_projections.into_iter().collect::<Vec<_>>())
+        } else {
+            None
+        };
         let predicate = self.check_and_build_predicate_from_filters(filters);
         let projected_schema =
-            ProjectedSchema::new(self.current_table_schema.clone(), projection.cloned()).map_err(
+            ProjectedSchema::new(self.current_table_schema.clone(), all_projections).map_err(
                 |e| {
                     DataFusionError::Internal(format!(
                         "Invalid projection, plan:{self:?}, projection:{projection:?}, err:{e:?}"
@@ -499,3 +509,21 @@ impl fmt::Debug for ScanTable {
             .finish()
     }
 }
+
+fn collect_projection_from_expr(exprs: &[Expr], schema: &Schema) -> HashSet<usize> {
+    let mut projections = HashSet::new();
+    exprs.iter().for_each(|expr| {
+        _ = expr.apply(&mut |expr| match &expr {
+            Expr::Column(column) => {
+                if let Some(idx) = schema.index_of(&column.name) {
+                    projections.insert(idx);
+                }
+                Ok(VisitRecursion::Stop)
+            }
+            Expr::ScalarVariable(_, _) | Expr::Literal(_) => Ok(VisitRecursion::Stop),
+            _ => Ok(VisitRecursion::Continue),
+        });
+    });
+
+    projections
+}

From 7469ca90ea3e64b786ac019dff74ea8d77fd7155 Mon Sep 17 00:00:00 2001
From: jiacai2050 <dev@liujiacai.net>
Date: Wed, 21 Feb 2024 22:31:35 +0800
Subject: [PATCH 20/25] fix pushdown

---
 src/analytic_engine/src/table/mod.rs | 5 +++--
 src/table_engine/src/provider.rs     | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/analytic_engine/src/table/mod.rs b/src/analytic_engine/src/table/mod.rs
index 82b74954f4..674f6b3bd5 100644
--- a/src/analytic_engine/src/table/mod.rs
+++ b/src/analytic_engine/src/table/mod.rs
@@ -429,10 +429,11 @@ pub fn support_pushdown(schema: &Schema, need_dedup: bool, col_names: &[String])
         return true;
     }
 
-    // When table need dedup, only non-unique keys columns support pushdown
+    // When table need dedup, only unique keys columns support pushdown
+    // See https://github.com/apache/incubator-horaedb/issues/605
     col_names
         .iter()
-        .all(|col_name| !schema.is_unique_column(col_name.as_str()))
+        .all(|col_name| schema.is_unique_column(col_name.as_str()))
 }
 
 #[async_trait]
diff --git a/src/table_engine/src/provider.rs b/src/table_engine/src/provider.rs
index 7f2a00d1b8..f00af076c9 100644
--- a/src/table_engine/src/provider.rs
+++ b/src/table_engine/src/provider.rs
@@ -304,7 +304,7 @@ impl<B: TableScanBuilder> TableProviderAdapter<B> {
                 if support_pushdown {
                     TableProviderFilterPushDown::Exact
                 } else {
-                    TableProviderFilterPushDown::Inexact
+                    TableProviderFilterPushDown::Unsupported
                 }
             })
             .collect()

From 792d94b3390b6bfeacf48c45aa2e815bd2ff2de2 Mon Sep 17 00:00:00 2001
From: jiacai2050 <dev@liujiacai.net>
Date: Thu, 22 Feb 2024 14:04:18 +0800
Subject: [PATCH 21/25] fix missing columns

---
 .../cases/common/dml/issue-341.result         | 10 +--
 .../cases/env/local/ddl/query-plan.result     | 18 +++---
 src/common_types/src/projected_schema.rs      |  6 +-
 src/table_engine/src/provider.rs              | 61 ++++++++++++++-----
 4 files changed, 66 insertions(+), 29 deletions(-)

diff --git a/integration_tests/cases/common/dml/issue-341.result b/integration_tests/cases/common/dml/issue-341.result
index a68d4f5133..c06388b824 100644
--- a/integration_tests/cases/common/dml/issue-341.result
+++ b/integration_tests/cases/common/dml/issue-341.result
@@ -72,7 +72,7 @@ WHERE
 
 plan_type,plan,
 String("logical_plan"),String("TableScan: issue341_t1 projection=[timestamp, value], full_filters=[issue341_t1.tag1 = Utf8(\"t3\")]"),
-String("physical_plan"),String("ScanTable: table=issue341_t1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(1)\n"),
+String("physical_plan"),String("ProjectionExec: expr=[timestamp@0 as timestamp, value@1 as value]\n  ScanTable: table=issue341_t1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"),
 
 
 -- Repeat operations above, but with overwrite table
@@ -115,8 +115,8 @@ WHERE
     `value` = 3;
 
 plan_type,plan,
-String("logical_plan"),String("TableScan: issue341_t2 projection=[timestamp, value], full_filters=[issue341_t2.value = Float64(3)]"),
-String("physical_plan"),String("ScanTable: table=issue341_t2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"),
+String("logical_plan"),String("Filter: issue341_t2.value = Float64(3)\n  TableScan: issue341_t2 projection=[timestamp, value]"),
+String("physical_plan"),String("CoalesceBatchesExec: target_batch_size=8192\n  FilterExec: value@1 = 3\n    ScanTable: table=issue341_t2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"),
 
 
 -- When using tag as filter, FilterExec node should not be in plan.
@@ -129,8 +129,8 @@ WHERE
     tag1 = "t3";
 
 plan_type,plan,
-String("logical_plan"),String("Projection: issue341_t2.timestamp, issue341_t2.value\n  Filter: issue341_t2.tag1 = Utf8(\"t3\")\n    TableScan: issue341_t2 projection=[timestamp, value, tag1], partial_filters=[issue341_t2.tag1 = Utf8(\"t3\")]"),
-String("physical_plan"),String("ProjectionExec: expr=[timestamp@0 as timestamp, value@1 as value]\n  CoalesceBatchesExec: target_batch_size=8192\n    FilterExec: tag1@2 = t3\n      ScanTable: table=issue341_t2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"),
+String("logical_plan"),String("TableScan: issue341_t2 projection=[timestamp, value], full_filters=[issue341_t2.tag1 = Utf8(\"t3\")]"),
+String("physical_plan"),String("ProjectionExec: expr=[timestamp@0 as timestamp, value@1 as value]\n  ScanTable: table=issue341_t2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"),
 
 
 DROP TABLE IF EXISTS `issue341_t1`;
diff --git a/integration_tests/cases/env/local/ddl/query-plan.result b/integration_tests/cases/env/local/ddl/query-plan.result
index f471cc3ced..1f63218401 100644
--- a/integration_tests/cases/env/local/ddl/query-plan.result
+++ b/integration_tests/cases/env/local/ddl/query-plan.result
@@ -32,7 +32,7 @@ explain analyze select t from `03_dml_select_real_time_range`
 where t > 1695348001000;
 
 plan_type,plan,
-String("Plan with Metrics"),String("CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n  FilterExec: t@0 > 1695348001000, metrics=xx\n    ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=1\n        num_ssts=0\n        scan_count=2\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=1\n        total_rows_fetch_from_one=3\n        scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"),
+String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=1\n        num_ssts=0\n        scan_count=2\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=1\n        total_rows_fetch_from_one=1\n        scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"),
 
 
 -- This query should have higher priority
@@ -42,7 +42,7 @@ explain analyze select t from `03_dml_select_real_time_range`
 where t >= 1695348001000 and t < 1695348002000;
 
 plan_type,plan,
-String("Plan with Metrics"),String("CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n  FilterExec: t@0 >= 1695348001000 AND t@0 < 1695348002000, metrics=xx\n    ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=High, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=1\n        num_ssts=0\n        scan_count=2\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=1\n        total_rows_fetch_from_one=3\n        scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"),
+String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=High, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), t < TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(1695348002000) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=1\n        num_ssts=0\n        scan_count=2\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=1\n        total_rows_fetch_from_one=1\n        scan_memtable_1, fetched_columns:[tsid,t]:\n=0]\n"),
 
 
 -- This query should not include memtable
@@ -52,7 +52,7 @@ explain analyze select t from `03_dml_select_real_time_range`
 where t > 1695348002000;
 
 plan_type,plan,
-String("Plan with Metrics"),String("CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n  FilterExec: t@0 > 1695348002000, metrics=xx\n    ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=0\n=0]\n"),
+String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=0\n=0]\n"),
 
 
 -- SQLNESS ARG pre_cmd=flush
@@ -64,7 +64,7 @@ explain analyze select t from `03_dml_select_real_time_range`
 where t > 1695348001000;
 
 plan_type,plan,
-String("Plan with Metrics"),String("CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n  FilterExec: t@0 > 1695348001000, metrics=xx\n    ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=0\n        num_ssts=1\n        scan_count=2\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=1\n        total_rows_fetch_from_one=3\n        scan_sst_1, fetched_columns:[tsid,t]:\n            meta_data_cache_hit=false\n            parallelism=1\n            project_record_batch=xxs\n            read_meta_data_duration=xxs\n            row_mem=320\n            row_num=3\n            prune_row_groups:\n                pruned_by_custom_filter=0\n                pruned_by_min_max=0\n                row_groups_after_prune=1\n                total_row_groups=1\n                use_custom_filter=false\n=0]\n"),
+String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348001000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348001001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=0\n        num_ssts=1\n        scan_count=2\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=1\n        total_rows_fetch_from_one=1\n        scan_sst_1, fetched_columns:[tsid,t]:\n            meta_data_cache_hit=false\n            parallelism=1\n            project_record_batch=xxs\n            read_meta_data_duration=xxs\n            row_mem=320\n            row_num=3\n            prune_row_groups:\n                pruned_by_custom_filter=0\n                pruned_by_min_max=0\n                row_groups_after_prune=1\n                total_row_groups=1\n                use_custom_filter=false\n=0]\n"),
 
 
 -- This query should not include SST
@@ -73,7 +73,7 @@ explain analyze select t from `03_dml_select_real_time_range`
 where t > 1695348002000;
 
 plan_type,plan,
-String("Plan with Metrics"),String("CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n  FilterExec: t@0 > 1695348002000, metrics=xx\n    ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=0\n=0]\n"),
+String("Plan with Metrics"),String("ScanTable: table=03_dml_select_real_time_range, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t > TimestampMillisecond(1695348002000, None)], time_range:TimeRange { inclusive_start: Timestamp(1695348002001), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=0\n=0]\n"),
 
 
 -- Table with an 'append' update mode
@@ -106,7 +106,9 @@ affected_rows: 3
 explain analyze select t from `03_append_mode_table`
 where t >= 1695348001000 and name = 'ceresdb';
 
-Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to execute plan. Caused by: Internal error, msg:Failed to execute interpreter, err:Failed to execute select, err:Failed to execute physical plan, msg:failed to collect execution results, err:Stream error, msg:convert from arrow record batch, err:Execution error: Failed to read table, partition:0, err:Failed to scan table, table:03_append_mode_table, err:Failed to build chain iterator, table:03_append_mode_table, err:Fail to build stream from the memtable, err:Failed to generate datafusion physical expr, err:Schema error: No field named name. Valid fields are t.. sql:explain analyze select t from `03_append_mode_table`\nwhere t >= 1695348001000 and name = 'ceresdb';" })
+plan_type,plan,
+String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=xx\n  ScanTable: table=03_append_mode_table, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), name = Utf8(\"ceresdb\")], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=false\n    chain_iter_0:\n        num_memtables=1\n        num_ssts=0\n        scan_duration=xxs\n        since_create=xxs\n        since_init=xxs\n        total_batch_fetched=1\n        total_rows_fetched=2\n        scan_memtable_1, fetched_columns:[t,name]:\n=0]\n"),
+
 
 -- Should just fetch projected columns from SST
 -- SQLNESS ARG pre_cmd=flush
@@ -119,7 +121,9 @@ Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to ex
 explain analyze select t from `03_append_mode_table`
 where t >= 1695348001000 and name = 'ceresdb';
 
-Failed to execute query, err: Server(ServerError { code: 500, msg: "Failed to execute plan. Caused by: Internal error, msg:Failed to execute interpreter, err:Failed to execute select, err:Failed to execute physical plan, msg:failed to collect execution results, err:Stream error, msg:convert from arrow record batch, err:Execution error: Failed to read table, partition:0, err:Failed to scan table, table:03_append_mode_table, err:Failed to build chain iterator, table:03_append_mode_table, err:Fail to build stream from the sst file, err:Failed to generate datafusion physical expr, err:Schema error: No field named name. Valid fields are t.. sql:explain analyze select t from `03_append_mode_table`\nwhere t >= 1695348001000 and name = 'ceresdb';" })
+plan_type,plan,
+String("Plan with Metrics"),String("ProjectionExec: expr=[t@0 as t], metrics=xx\n  ScanTable: table=03_append_mode_table, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[t >= TimestampMillisecond(1695348001000, None), name = Utf8(\"ceresdb\")], time_range:TimeRange { inclusive_start: Timestamp(1695348001000), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=false\n    chain_iter_0:\n        num_memtables=0\n        num_ssts=1\n        scan_duration=xxs\n        since_create=xxs\n        since_init=xxs\n        total_batch_fetched=1\n        total_rows_fetched=2\n        scan_sst_1, fetched_columns:[t,name]:\n            meta_data_cache_hit=false\n            parallelism=1\n            project_record_batch=xxs\n            read_meta_data_duration=xxs\n            row_mem=408\n            row_num=3\n            prune_row_groups:\n                pruned_by_custom_filter=0\n                pruned_by_min_max=0\n                row_groups_after_prune=1\n                total_row_groups=1\n                use_custom_filter=false\n=0]\n"),
+
 
 CREATE TABLE `TEST_QUERY_PRIORITY` (
     NAME string TAG,
diff --git a/src/common_types/src/projected_schema.rs b/src/common_types/src/projected_schema.rs
index 30e9eb01e3..1eff7dc424 100644
--- a/src/common_types/src/projected_schema.rs
+++ b/src/common_types/src/projected_schema.rs
@@ -105,7 +105,7 @@ pub struct RowProjector {
     /// For example:
     ///   source columns in sst: 0,1,2,3,4
     ///   target projection columns: 2,1,3
-    ///   
+    ///
     ///   the actual columns in fetched record: 1,2,3
     ///   relative columns indexes in fetched record: 0,1,2
     ///
@@ -347,6 +347,10 @@ impl ProjectedSchema {
     pub fn table_schema(&self) -> &Schema {
         &self.0.table_schema
     }
+
+    pub fn target_column_schema(&self, i: usize) -> &ColumnSchema {
+        self.0.target_record_schema.column(i)
+    }
 }
 
 impl From<ProjectedSchema> for horaedbproto::schema::ProjectedSchema {
diff --git a/src/table_engine/src/provider.rs b/src/table_engine/src/provider.rs
index f00af076c9..e467a01490 100644
--- a/src/table_engine/src/provider.rs
+++ b/src/table_engine/src/provider.rs
@@ -29,7 +29,6 @@ use arrow::datatypes::SchemaRef;
 use async_trait::async_trait;
 use common_types::{projected_schema::ProjectedSchema, request_id::RequestId, schema::Schema};
 use datafusion::{
-    common::tree_node::{TreeNode, VisitRecursion},
     config::{ConfigEntry, ConfigExtension, ExtensionOptions},
     datasource::TableProvider,
     error::{DataFusionError, Result},
@@ -37,8 +36,9 @@ use datafusion::{
     logical_expr::{Expr, TableProviderFilterPushDown, TableSource, TableType},
     physical_expr::PhysicalSortExpr,
     physical_plan::{
+        expressions,
         metrics::{Count, MetricValue, MetricsSet},
-        DisplayAs, DisplayFormatType, ExecutionPlan, Metric, Partitioning,
+        DisplayAs, DisplayFormatType, ExecutionPlan, Metric, Partitioning, PhysicalExpr,
         SendableRecordBatchStream as DfSendableRecordBatchStream, Statistics,
     },
 };
@@ -232,11 +232,26 @@ impl<B: TableScanBuilder> TableProviderAdapter<B> {
             priority,
         );
 
+        let mut need_reprojection = false;
         let all_projections = if let Some(proj) = projection {
-            let mut all_projections =
+            let mut original_projections = proj.clone();
+            let projections_from_filter =
                 collect_projection_from_expr(filters, &self.current_table_schema);
-            all_projections.extend(proj);
-            Some(all_projections.into_iter().collect::<Vec<_>>())
+            for proj in projections_from_filter {
+                if !original_projections.contains(&proj) {
+                    original_projections.push(proj);
+                    // If the projection from filter has columns not in the original projection,
+                    // we need to add a ProjectionExec plan to project the orignal columns. Eg:
+                    // ```
+                    // select a from table where b > 1
+                    // ```
+                    // The original projection only contains a, but the filter has column b, so we
+                    // need to query both a and b column from table but only
+                    // output a column.
+                    need_reprojection = true;
+                }
+            }
+            Some(original_projections)
         } else {
             None
         };
@@ -260,13 +275,32 @@ impl<B: TableScanBuilder> TableProviderAdapter<B> {
         let request = ReadRequest {
             request_id,
             opts,
-            projected_schema,
+            projected_schema: projected_schema.clone(),
             predicate,
             metrics_collector: MetricsCollector::new(SCAN_TABLE_METRICS_COLLECTOR_NAME.to_string()),
             priority,
         };
 
-        self.builder.build(request).await
+        if need_reprojection {
+            let original_projection = projection.unwrap();
+            let projection = (0..original_projection.len())
+                .map(|proj| {
+                    let column = projected_schema.target_column_schema(proj);
+
+                    (
+                        Arc::new(expressions::Column::new(&column.name, proj))
+                            as Arc<dyn PhysicalExpr>,
+                        column.name.clone(),
+                    )
+                })
+                .collect::<Vec<_>>();
+            let scan = self.builder.build(request).await?;
+            let plan =
+                datafusion::physical_plan::projection::ProjectionExec::try_new(projection, scan)?;
+            Ok(Arc::new(plan))
+        } else {
+            self.builder.build(request).await
+        }
     }
 
     fn check_and_build_predicate_from_filters(&self, filters: &[Expr]) -> PredicateRef {
@@ -513,16 +547,11 @@ impl fmt::Debug for ScanTable {
 fn collect_projection_from_expr(exprs: &[Expr], schema: &Schema) -> HashSet<usize> {
     let mut projections = HashSet::new();
     exprs.iter().for_each(|expr| {
-        _ = expr.apply(&mut |expr| match &expr {
-            Expr::Column(column) => {
-                if let Some(idx) = schema.index_of(&column.name) {
-                    projections.insert(idx);
-                }
-                Ok(VisitRecursion::Stop)
+        for col_name in visitor::find_columns_by_expr(expr) {
+            if let Some(idx) = schema.index_of(&col_name) {
+                projections.insert(idx);
             }
-            Expr::ScalarVariable(_, _) | Expr::Literal(_) => Ok(VisitRecursion::Stop),
-            _ => Ok(VisitRecursion::Continue),
-        });
+        }
     });
 
     projections

From 0e1096aab79269793f164df25ed52912d64c8b08 Mon Sep 17 00:00:00 2001
From: jiacai2050 <dev@liujiacai.net>
Date: Thu, 22 Feb 2024 14:30:22 +0800
Subject: [PATCH 22/25] add more comments

---
 .../env/cluster/ddl/partition_table.result    |  4 +-
 src/table_engine/src/provider.rs              | 50 +++++++++++--------
 2 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/integration_tests/cases/env/cluster/ddl/partition_table.result b/integration_tests/cases/env/cluster/ddl/partition_table.result
index 3f7bb7d8bb..233c348318 100644
--- a/integration_tests/cases/env/cluster/ddl/partition_table.result
+++ b/integration_tests/cases/env/cluster/ddl/partition_table.result
@@ -85,7 +85,7 @@ UInt64(16367588166920223437),Timestamp(1651737067000),String("horaedb9"),Int32(0
 EXPLAIN ANALYZE SELECT * from partition_table_t where name = "ceresdb0";
 
 plan_type,plan,
-String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:4, metrics=xx\n  CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n    FilterExec: name@2 = ceresdb0, metrics=xx\n      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n        ScanTable: table=__partition_table_t_0, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n  CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n    FilterExec: name@2 = ceresdb0, metrics=xx\n      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n        ScanTable: table=__partition_table_t_1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n  CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n    FilterExec: name@2 = ceresdb0, metrics=xx\n      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n        ScanTable: table=__partition_table_t_2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n  CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n    FilterExec: name@2 = ceresdb0, metrics=xx\n      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n        ScanTable: table=__partition_table_t_3, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=0\n        num_ssts=0\n        scan_count=2\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=1\n        total_rows_fetch_from_one=1\n        scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"),
+String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:1, metrics=xx\n  ScanTable: table=__partition_table_t_1, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name = Utf8(\"ceresdb0\")], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=0\n        num_ssts=0\n        scan_count=1\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=0\n        total_rows_fetch_from_one=0\n        scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"),
 
 
 -- SQLNESS REPLACE duration=\d+.?\d*(µ|m|n) duration=xx
@@ -96,7 +96,7 @@ String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:f
 EXPLAIN ANALYZE SELECT * from partition_table_t where name in ("ceresdb0", "ceresdb1", "ceresdb2", "ceresdb3", "ceresdb4");
 
 plan_type,plan,
-String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:4, metrics=xx\n  CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n    FilterExec: Use name@2 IN (SET) ([Literal { value: Utf8(\"ceresdb0\") }, Literal { value: Utf8(\"ceresdb1\") }, Literal { value: Utf8(\"ceresdb2\") }, Literal { value: Utf8(\"ceresdb3\") }, Literal { value: Utf8(\"ceresdb4\") }]), metrics=xx\n      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n        ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n  CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n    FilterExec: Use name@2 IN (SET) ([Literal { value: Utf8(\"ceresdb0\") }, Literal { value: Utf8(\"ceresdb1\") }, Literal { value: Utf8(\"ceresdb2\") }, Literal { value: Utf8(\"ceresdb3\") }, Literal { value: Utf8(\"ceresdb4\") }]), metrics=xx\n      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n        ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n  CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n    FilterExec: Use name@2 IN (SET) ([Literal { value: Utf8(\"ceresdb0\") }, Literal { value: Utf8(\"ceresdb1\") }, Literal { value: Utf8(\"ceresdb2\") }, Literal { value: Utf8(\"ceresdb3\") }, Literal { value: Utf8(\"ceresdb4\") }]), metrics=xx\n      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n        ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n  CoalesceBatchesExec: target_batch_size=8192, metrics=xx\n    FilterExec: Use name@2 IN (SET) ([Literal { value: Utf8(\"ceresdb0\") }, Literal { value: Utf8(\"ceresdb1\") }, Literal { value: Utf8(\"ceresdb2\") }, Literal { value: Utf8(\"ceresdb3\") }, Literal { value: Utf8(\"ceresdb4\") }]), metrics=xx\n      RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=8, metrics=xx\n        ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=0\n        num_ssts=0\n        scan_count=2\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=1\n        total_rows_fetch_from_one=1\n        scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"),
+String("Plan with Metrics"),String("ResolvedPartitionedScan: pushdown_continue:false, partition_count:3, metrics=xx\n  ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n  ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=xx\n  ScanTable: table=__partition_table_t_x, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8), metrics=[\nPredicate { exprs:[name IN ([Utf8(\"ceresdb0\"), Utf8(\"ceresdb1\"), Utf8(\"ceresdb2\"), Utf8(\"ceresdb3\"), Utf8(\"ceresdb4\")])], time_range:TimeRange { inclusive_start: Timestamp(-9223372036854775808), exclusive_end: Timestamp(9223372036854775807) } }\nscan_table:\n    do_merge_sort=true\n    iter_num=1\n    merge_iter_0:\n        init_duration=xxs\n        num_memtables=0\n        num_ssts=0\n        scan_count=1\n        scan_duration=xxs\n        times_fetch_row_from_multiple=0\n        times_fetch_rows_from_one=0\n        total_rows_fetch_from_one=0\n        scan_memtable_1, fetched_columns:[tsid,t,name,id,value]:\n=0]\n=0]\n"),
 
 
 ALTER TABLE partition_table_t ADD COLUMN (b string);
diff --git a/src/table_engine/src/provider.rs b/src/table_engine/src/provider.rs
index e467a01490..e9775fbec6 100644
--- a/src/table_engine/src/provider.rs
+++ b/src/table_engine/src/provider.rs
@@ -38,6 +38,7 @@ use datafusion::{
     physical_plan::{
         expressions,
         metrics::{Count, MetricValue, MetricsSet},
+        projection::ProjectionExec,
         DisplayAs, DisplayFormatType, ExecutionPlan, Metric, Partitioning, PhysicalExpr,
         SendableRecordBatchStream as DfSendableRecordBatchStream, Statistics,
     },
@@ -240,14 +241,16 @@ impl<B: TableScanBuilder> TableProviderAdapter<B> {
             for proj in projections_from_filter {
                 if !original_projections.contains(&proj) {
                     original_projections.push(proj);
-                    // If the projection from filter has columns not in the original projection,
-                    // we need to add a ProjectionExec plan to project the orignal columns. Eg:
-                    // ```
+                    // If the projection from filters have columns not in the original projection,
+                    // we need to add it to projection, and add a ProjectionExec plan to project the
+                    // orignal columns. Eg:
+                    // ```text
                     // select a from table where b > 1
                     // ```
                     // The original projection only contains a, but the filter has column b, so we
                     // need to query both a and b column from table but only
-                    // output a column.
+                    // output a column. More details can be found in:
+                    // https://github.com/apache/arrow-datafusion/pull/9131#pullrequestreview-1865020767
                     need_reprojection = true;
                 }
             }
@@ -265,6 +268,22 @@ impl<B: TableScanBuilder> TableProviderAdapter<B> {
                 },
             )?;
 
+        let projection_exprs = if need_reprojection {
+            let original_projection = projection.unwrap();
+            let exprs = (0..original_projection.len())
+                .map(|i| {
+                    let column = projected_schema.target_column_schema(i);
+                    (
+                        Arc::new(expressions::Column::new(&column.name, i))
+                            as Arc<dyn PhysicalExpr>,
+                        column.name.clone(),
+                    )
+                })
+                .collect::<Vec<_>>();
+            Some(exprs)
+        } else {
+            None
+        };
         let opts = ReadOptions {
             deadline,
             read_parallelism,
@@ -275,31 +294,18 @@ impl<B: TableScanBuilder> TableProviderAdapter<B> {
         let request = ReadRequest {
             request_id,
             opts,
-            projected_schema: projected_schema.clone(),
+            projected_schema,
             predicate,
             metrics_collector: MetricsCollector::new(SCAN_TABLE_METRICS_COLLECTOR_NAME.to_string()),
             priority,
         };
 
-        if need_reprojection {
-            let original_projection = projection.unwrap();
-            let projection = (0..original_projection.len())
-                .map(|proj| {
-                    let column = projected_schema.target_column_schema(proj);
-
-                    (
-                        Arc::new(expressions::Column::new(&column.name, proj))
-                            as Arc<dyn PhysicalExpr>,
-                        column.name.clone(),
-                    )
-                })
-                .collect::<Vec<_>>();
-            let scan = self.builder.build(request).await?;
-            let plan =
-                datafusion::physical_plan::projection::ProjectionExec::try_new(projection, scan)?;
+        let scan = self.builder.build(request).await?;
+        if let Some(expr) = projection_exprs {
+            let plan = ProjectionExec::try_new(expr, scan)?;
             Ok(Arc::new(plan))
         } else {
-            self.builder.build(request).await
+            Ok(scan)
         }
     }
 

From 43a0f3437e63ba874e61b66e775a0b39d32b6d8b Mon Sep 17 00:00:00 2001
From: jiacai2050 <dev@liujiacai.net>
Date: Thu, 22 Feb 2024 14:54:13 +0800
Subject: [PATCH 23/25] remove unwrap

---
 .../src/instance/reorder_memtable.rs          |  5 +----
 src/analytic_engine/src/row_iter/merge.rs     |  3 ---
 src/interpreters/src/insert.rs                |  4 ++--
 .../physical_plan_extension/prom_align.rs     | 19 ++++++++-----------
 4 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/src/analytic_engine/src/instance/reorder_memtable.rs b/src/analytic_engine/src/instance/reorder_memtable.rs
index 2f9ac87b8e..c37417bf64 100644
--- a/src/analytic_engine/src/instance/reorder_memtable.rs
+++ b/src/analytic_engine/src/instance/reorder_memtable.rs
@@ -262,10 +262,7 @@ impl Reorder {
     pub async fn into_stream(self) -> Result<SendableFetchingRecordBatchStream> {
         // 1. Init datafusion context
         let runtime = Arc::new(RuntimeEnv::default());
-        let mut state = SessionState::new_with_config_rt(SessionConfig::new(), runtime);
-        // The physical optimizer rules have bug, and the plan here is simple, optimize
-        // is not required, so we disable it here.
-        state = state.with_physical_optimizer_rules(vec![]);
+        let state = SessionState::new_with_config_rt(SessionConfig::new(), runtime);
         let ctx = SessionContext::new_with_state(state);
         let table_provider = Arc::new(MemIterProvider {
             arrow_schema: self.schema.to_arrow_schema_ref(),
diff --git a/src/analytic_engine/src/row_iter/merge.rs b/src/analytic_engine/src/row_iter/merge.rs
index 67a2f4045d..88f58c2df9 100644
--- a/src/analytic_engine/src/row_iter/merge.rs
+++ b/src/analytic_engine/src/row_iter/merge.rs
@@ -486,7 +486,6 @@ impl BufferedStream {
                 Ok(false)
             }
             Some(record_batch) => {
-                logger::info!("if necessary: {record_batch:?}");
                 self.state.as_mut().unwrap().reset(record_batch);
                 Ok(true)
             }
@@ -845,7 +844,6 @@ impl MergeIterator {
 
             None
         };
-        logger::info!("debug fetch rows:{record_batch:?}");
 
         self.reheap(buffered_stream).await?;
 
@@ -875,7 +873,6 @@ impl MergeIterator {
 
         self.record_batch_builder.clear();
 
-        logger::info!("fetch next batch, {}", self.record_batch_builder.len());
         while !self.hot.is_empty() && self.record_batch_builder.len() < self.iter_options.batch_size
         {
             // no need to do merge sort if only one batch in the hot heap.
diff --git a/src/interpreters/src/insert.rs b/src/interpreters/src/insert.rs
index c67ff1dfc1..cc455b3fb6 100644
--- a/src/interpreters/src/insert.rs
+++ b/src/interpreters/src/insert.rs
@@ -373,6 +373,6 @@ fn get_or_extract_column_from_row_groups(
             cached_column_values.insert(column_idx, columnar_value.clone());
             Ok(columnar_value)
         })?;
-    // TODO: solve unwarp
-    Ok(column.into_array(num_rows).unwrap())
+
+    column.into_array(num_rows).context(DatafusionExecutor)
 }
diff --git a/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs b/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs
index 9fe8cc74c7..3b1a0cd9a7 100644
--- a/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs
+++ b/src/query_engine/src/datafusion_impl/physical_plan_extension/prom_align.rs
@@ -37,7 +37,7 @@ use common_types::{
     time::{TimeRange, Timestamp},
 };
 use datafusion::{
-    error::{DataFusionError, Result as ArrowResult},
+    error::{DataFusionError, Result as DataFusionResult},
     execution::context::TaskContext,
     physical_expr::PhysicalSortExpr,
     physical_plan::{
@@ -93,15 +93,15 @@ impl PhysicalExpr for ExtractTsidExpr {
         self
     }
 
-    fn data_type(&self, _input_schema: &ArrowSchema) -> ArrowResult<DataType> {
+    fn data_type(&self, _input_schema: &ArrowSchema) -> DataFusionResult<DataType> {
         Ok(DataType::UInt64)
     }
 
-    fn nullable(&self, _input_schema: &ArrowSchema) -> ArrowResult<bool> {
+    fn nullable(&self, _input_schema: &ArrowSchema) -> DataFusionResult<bool> {
         Ok(false)
     }
 
-    fn evaluate(&self, batch: &RecordBatch) -> ArrowResult<ColumnarValue> {
+    fn evaluate(&self, batch: &RecordBatch) -> DataFusionResult<ColumnarValue> {
         let tsid_idx = batch
             .schema()
             .index_of(TSID_COLUMN)
@@ -116,7 +116,7 @@ impl PhysicalExpr for ExtractTsidExpr {
     fn with_new_children(
         self: Arc<Self>,
         _children: Vec<Arc<dyn PhysicalExpr>>,
-    ) -> ArrowResult<Arc<dyn PhysicalExpr>> {
+    ) -> DataFusionResult<Arc<dyn PhysicalExpr>> {
         Ok(self)
     }
 
@@ -204,7 +204,7 @@ impl ExecutionPlan for PromAlignExec {
     fn with_new_children(
         self: Arc<Self>,
         children: Vec<Arc<dyn ExecutionPlan>>,
-    ) -> ArrowResult<Arc<dyn ExecutionPlan>> {
+    ) -> DataFusionResult<Arc<dyn ExecutionPlan>> {
         match children.len() {
             1 => Ok(Arc::new(PromAlignExec {
                 input: children[0].clone(),
@@ -222,7 +222,7 @@ impl ExecutionPlan for PromAlignExec {
         &self,
         partition: usize,
         context: Arc<TaskContext>,
-    ) -> ArrowResult<DfSendableRecordBatchStream> {
+    ) -> DataFusionResult<DfSendableRecordBatchStream> {
         debug!("PromAlignExec: partition:{}", partition);
         Ok(Box::pin(PromAlignReader {
             input: self.input.execute(partition, context)?,
@@ -236,10 +236,7 @@ impl ExecutionPlan for PromAlignExec {
         }))
     }
 
-    fn statistics(
-        &self,
-    ) -> std::result::Result<datafusion::common::Statistics, datafusion::error::DataFusionError>
-    {
+    fn statistics(&self) -> DataFusionResult<Statistics> {
         // TODO(chenxiang)
         Ok(Statistics::new_unknown(&self.schema()))
     }

From 5ebf1f26fadcdbe1ec349575c0d1a5e724c51144 Mon Sep 17 00:00:00 2001
From: jiacai2050 <dev@liujiacai.net>
Date: Thu, 22 Feb 2024 15:01:04 +0800
Subject: [PATCH 24/25] fix influxdb tests

---
 src/proxy/src/influxdb/types.rs  | 2 +-
 src/table_engine/src/provider.rs | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/proxy/src/influxdb/types.rs b/src/proxy/src/influxdb/types.rs
index cd2b229d6d..488f5dedfe 100644
--- a/src/proxy/src/influxdb/types.rs
+++ b/src/proxy/src/influxdb/types.rs
@@ -744,7 +744,7 @@ mod tests {
     fn test_influxql_result() {
         let record_schema = build_test_record_schema();
         let column_blocks = build_test_column_blocks();
-        let record_batch = RecordBatch::new(record_schema, column_blocks, 4).unwrap();
+        let record_batch = RecordBatch::new(record_schema, column_blocks, 7).unwrap();
 
         let mut builder = InfluxqlResultBuilder::new(record_batch.schema(), 0).unwrap();
         builder.add_record_batch(record_batch).unwrap();
diff --git a/src/table_engine/src/provider.rs b/src/table_engine/src/provider.rs
index e9775fbec6..bcca5ba897 100644
--- a/src/table_engine/src/provider.rs
+++ b/src/table_engine/src/provider.rs
@@ -344,7 +344,7 @@ impl<B: TableScanBuilder> TableProviderAdapter<B> {
                 if support_pushdown {
                     TableProviderFilterPushDown::Exact
                 } else {
-                    TableProviderFilterPushDown::Unsupported
+                    TableProviderFilterPushDown::Inexact
                 }
             })
             .collect()

From cd70bfc2394255afdf401a6d885de2d8bd140021 Mon Sep 17 00:00:00 2001
From: jiacai2050 <dev@liujiacai.net>
Date: Thu, 22 Feb 2024 17:32:02 +0800
Subject: [PATCH 25/25] fix memtable scan

---
 .../cases/common/dml/issue-341.result         |  2 +-
 .../cases/common/function/aggregate.result    | 43 +++++++++++++++++++
 .../cases/common/function/aggregate.sql       | 28 ++++++++++++
 .../src/memtable/skiplist/iter.rs             |  5 +++
 src/common_types/src/record_batch.rs          | 14 +++++-
 5 files changed, 89 insertions(+), 3 deletions(-)

diff --git a/integration_tests/cases/common/dml/issue-341.result b/integration_tests/cases/common/dml/issue-341.result
index c06388b824..4d7da95cab 100644
--- a/integration_tests/cases/common/dml/issue-341.result
+++ b/integration_tests/cases/common/dml/issue-341.result
@@ -115,7 +115,7 @@ WHERE
     `value` = 3;
 
 plan_type,plan,
-String("logical_plan"),String("Filter: issue341_t2.value = Float64(3)\n  TableScan: issue341_t2 projection=[timestamp, value]"),
+String("logical_plan"),String("Filter: issue341_t2.value = Float64(3)\n  TableScan: issue341_t2 projection=[timestamp, value], partial_filters=[issue341_t2.value = Float64(3)]"),
 String("physical_plan"),String("CoalesceBatchesExec: target_batch_size=8192\n  FilterExec: value@1 = 3\n    ScanTable: table=issue341_t2, parallelism=8, priority=Low, partition_count=UnknownPartitioning(8)\n"),
 
 
diff --git a/integration_tests/cases/common/function/aggregate.result b/integration_tests/cases/common/function/aggregate.result
index 037e503a9f..f45a6841a8 100644
--- a/integration_tests/cases/common/function/aggregate.result
+++ b/integration_tests/cases/common/function/aggregate.result
@@ -105,7 +105,50 @@ COUNT(DISTINCT 02_function_aggregate_table1.arch),
 Int64(2),
 
 
+CREATE TABLE `02_function_aggregate_table2` (
+    `timestamp` timestamp NOT NULL,
+    `arch` string TAG,
+    `datacenter` string TAG,
+    `value` int,
+    `uvalue` uint64,
+    timestamp KEY (timestamp)) ENGINE=Analytic
+WITH(
+	 enable_ttl='false',
+     update_mode = 'append'
+);
+
+affected_rows: 0
+
+INSERT INTO `02_function_aggregate_table2`
+    (`timestamp`, `arch`, `datacenter`, `value`, `uvalue`)
+VALUES
+    (1658304762, 'x86-64', 'china', 100, 10),
+    (1658304763, 'x86-64', 'china', 200, 10),
+    (1658304762, 'arm64', 'china', 110, 0),
+    (1658304763, 'arm64', 'china', 210, 0);
+
+affected_rows: 4
+
+-- The should select empty column
+SELECT count(*) FROM `02_function_aggregate_table1`;
+
+COUNT(*),
+Int64(4),
+
+
+-- Same with before, but query from sst
+-- SQLNESS ARG pre_cmd=flush
+SELECT count(*) FROM `02_function_aggregate_table1`;
+
+COUNT(*),
+Int64(4),
+
+
 DROP TABLE `02_function_aggregate_table1`;
 
 affected_rows: 0
 
+DROP TABLE `02_function_aggregate_table2`;
+
+affected_rows: 0
+
diff --git a/integration_tests/cases/common/function/aggregate.sql b/integration_tests/cases/common/function/aggregate.sql
index c4f8dd50ea..8543245ae8 100644
--- a/integration_tests/cases/common/function/aggregate.sql
+++ b/integration_tests/cases/common/function/aggregate.sql
@@ -57,4 +57,32 @@ SELECT distinct(`arch`) FROM `02_function_aggregate_table1` ORDER BY `arch` DESC
 
 SELECT count(distinct(`arch`)) FROM `02_function_aggregate_table1`;
 
+CREATE TABLE `02_function_aggregate_table2` (
+    `timestamp` timestamp NOT NULL,
+    `arch` string TAG,
+    `datacenter` string TAG,
+    `value` int,
+    `uvalue` uint64,
+    timestamp KEY (timestamp)) ENGINE=Analytic
+WITH(
+	 enable_ttl='false',
+     update_mode = 'append'
+);
+
+INSERT INTO `02_function_aggregate_table2`
+    (`timestamp`, `arch`, `datacenter`, `value`, `uvalue`)
+VALUES
+    (1658304762, 'x86-64', 'china', 100, 10),
+    (1658304763, 'x86-64', 'china', 200, 10),
+    (1658304762, 'arm64', 'china', 110, 0),
+    (1658304763, 'arm64', 'china', 210, 0);
+
+-- The should select empty column
+SELECT count(*) FROM `02_function_aggregate_table1`;
+
+-- Same with before, but query from sst
+-- SQLNESS ARG pre_cmd=flush
+SELECT count(*) FROM `02_function_aggregate_table1`;
+
 DROP TABLE `02_function_aggregate_table1`;
+DROP TABLE `02_function_aggregate_table2`;
diff --git a/src/analytic_engine/src/memtable/skiplist/iter.rs b/src/analytic_engine/src/memtable/skiplist/iter.rs
index 4787b754bd..cce3913dea 100644
--- a/src/analytic_engine/src/memtable/skiplist/iter.rs
+++ b/src/analytic_engine/src/memtable/skiplist/iter.rs
@@ -154,6 +154,7 @@ impl<A: Arena<Stats = BasicStats> + Clone + Sync + Send> ColumnarIterImpl<A> {
         assert!(self.batch_size > 0);
 
         let record_schema = self.row_projector.fetched_schema().clone();
+        let is_empty_projection = record_schema.columns().is_empty();
         let primary_key_indexes = self
             .row_projector
             .primary_key_indexes()
@@ -183,6 +184,10 @@ impl<A: Arena<Stats = BasicStats> + Clone + Sync + Send> ColumnarIterImpl<A> {
             }
         }
 
+        if is_empty_projection {
+            builder.inc_row_num(num_rows);
+        }
+
         if num_rows > 0 {
             if let Some(deadline) = self.deadline {
                 let now = Instant::now();
diff --git a/src/common_types/src/record_batch.rs b/src/common_types/src/record_batch.rs
index 2c787be465..0278aa7095 100644
--- a/src/common_types/src/record_batch.rs
+++ b/src/common_types/src/record_batch.rs
@@ -591,6 +591,7 @@ pub struct FetchedRecordBatchBuilder {
     fetched_schema: RecordSchema,
     primary_key_indexes: Option<Vec<usize>>,
     builders: Vec<ColumnBlockBuilder>,
+    num_rows: usize,
 }
 
 impl FetchedRecordBatchBuilder {
@@ -610,6 +611,7 @@ impl FetchedRecordBatchBuilder {
             fetched_schema,
             primary_key_indexes,
             builders,
+            num_rows: 0,
         }
     }
 
@@ -633,6 +635,7 @@ impl FetchedRecordBatchBuilder {
             fetched_schema: record_schema,
             primary_key_indexes,
             builders,
+            num_rows: 0,
         }
     }
 
@@ -680,6 +683,13 @@ impl FetchedRecordBatchBuilder {
         Ok(())
     }
 
+    /// When the record batch contains no column, its row num may not be 0, so
+    /// we need to inc row num explicitly in this case.
+    /// See: https://github.com/apache/arrow-datafusion/pull/7920
+    pub fn inc_row_num(&mut self, n: usize) {
+        self.num_rows += n;
+    }
+
     /// Append `len` from `start` (inclusive) to this builder.
     ///
     /// REQUIRE:
@@ -711,7 +721,7 @@ impl FetchedRecordBatchBuilder {
         self.builders
             .first()
             .map(|builder| builder.len())
-            .unwrap_or(0)
+            .unwrap_or(self.num_rows)
     }
 
     /// Returns true if the builder is empty.
@@ -737,7 +747,7 @@ impl FetchedRecordBatchBuilder {
         let num_rows = column_blocks
             .first()
             .map(|block| block.num_rows())
-            .unwrap_or_default();
+            .unwrap_or(self.num_rows);
         let options = RecordBatchOptions::new().with_row_count(Some(num_rows));
 
         Ok(FetchedRecordBatch {