@@ -67,7 +67,7 @@ mod test {
6767 /// - Each partition has an "id" column (INT) with the following values:
6868 /// - First partition: [3, 4]
6969 /// - Second partition: [1, 2]
70- /// - Each row is 110 bytes in size
70+ /// - Each partition has 16 bytes total (Int32 id: 4 bytes × 2 rows + Date32 date: 4 bytes × 2 rows)
7171 ///
7272 /// @param create_table_sql Optional parameter to set the create table SQL
7373 /// @param target_partition Optional parameter to set the target partitions
@@ -215,9 +215,9 @@ mod test {
215215 . map ( |idx| scan. partition_statistics ( Some ( idx) ) )
216216 . collect :: < Result < Vec < _ > > > ( ) ?;
217217 let expected_statistic_partition_1 =
218- create_partition_statistics ( 2 , 110 , 3 , 4 , true ) ;
218+ create_partition_statistics ( 2 , 16 , 3 , 4 , true ) ;
219219 let expected_statistic_partition_2 =
220- create_partition_statistics ( 2 , 110 , 1 , 2 , true ) ;
220+ create_partition_statistics ( 2 , 16 , 1 , 2 , true ) ;
221221 // Check the statistics of each partition
222222 assert_eq ! ( statistics. len( ) , 2 ) ;
223223 assert_eq ! ( statistics[ 0 ] , expected_statistic_partition_1) ;
@@ -277,8 +277,7 @@ mod test {
277277 let statistics = ( 0 ..sort_exec. output_partitioning ( ) . partition_count ( ) )
278278 . map ( |idx| sort_exec. partition_statistics ( Some ( idx) ) )
279279 . collect :: < Result < Vec < _ > > > ( ) ?;
280- let expected_statistic_partition =
281- create_partition_statistics ( 4 , 220 , 1 , 4 , true ) ;
280+ let expected_statistic_partition = create_partition_statistics ( 4 , 32 , 1 , 4 , true ) ;
282281 assert_eq ! ( statistics. len( ) , 1 ) ;
283282 assert_eq ! ( statistics[ 0 ] , expected_statistic_partition) ;
284283 // Check the statistics_by_partition with real results
@@ -292,9 +291,9 @@ mod test {
292291 SortExec :: new ( ordering. into ( ) , scan_2) . with_preserve_partitioning ( true ) ,
293292 ) ;
294293 let expected_statistic_partition_1 =
295- create_partition_statistics ( 2 , 110 , 3 , 4 , true ) ;
294+ create_partition_statistics ( 2 , 16 , 3 , 4 , true ) ;
296295 let expected_statistic_partition_2 =
297- create_partition_statistics ( 2 , 110 , 1 , 2 , true ) ;
296+ create_partition_statistics ( 2 , 16 , 1 , 2 , true ) ;
298297 let statistics = ( 0 ..sort_exec. output_partitioning ( ) . partition_count ( ) )
299298 . map ( |idx| sort_exec. partition_statistics ( Some ( idx) ) )
300299 . collect :: < Result < Vec < _ > > > ( ) ?;
@@ -366,9 +365,9 @@ mod test {
366365 // Check that we have 4 partitions (2 from each scan)
367366 assert_eq ! ( statistics. len( ) , 4 ) ;
368367 let expected_statistic_partition_1 =
369- create_partition_statistics ( 2 , 110 , 3 , 4 , true ) ;
368+ create_partition_statistics ( 2 , 16 , 3 , 4 , true ) ;
370369 let expected_statistic_partition_2 =
371- create_partition_statistics ( 2 , 110 , 1 , 2 , true ) ;
370+ create_partition_statistics ( 2 , 16 , 1 , 2 , true ) ;
372371 // Verify first partition (from first scan)
373372 assert_eq ! ( statistics[ 0 ] , expected_statistic_partition_1) ;
374373 // Verify second partition (from first scan)
@@ -418,7 +417,7 @@ mod test {
418417
419418 let expected_stats = Statistics {
420419 num_rows : Precision :: Inexact ( 4 ) ,
421- total_byte_size : Precision :: Inexact ( 220 ) ,
420+ total_byte_size : Precision :: Inexact ( 32 ) ,
422421 column_statistics : vec ! [
423422 ColumnStatistics :: new_unknown( ) ,
424423 ColumnStatistics :: new_unknown( ) ,
@@ -462,7 +461,7 @@ mod test {
462461 // Check that we have 2 partitions
463462 assert_eq ! ( statistics. len( ) , 2 ) ;
464463 let mut expected_statistic_partition_1 =
465- create_partition_statistics ( 8 , 48400 , 1 , 4 , true ) ;
464+ create_partition_statistics ( 8 , 512 , 1 , 4 , true ) ;
466465 expected_statistic_partition_1
467466 . column_statistics
468467 . push ( ColumnStatistics {
@@ -473,7 +472,7 @@ mod test {
473472 distinct_count : Precision :: Absent ,
474473 } ) ;
475474 let mut expected_statistic_partition_2 =
476- create_partition_statistics ( 8 , 48400 , 1 , 4 , true ) ;
475+ create_partition_statistics ( 8 , 512 , 1 , 4 , true ) ;
477476 expected_statistic_partition_2
478477 . column_statistics
479478 . push ( ColumnStatistics {
@@ -501,9 +500,9 @@ mod test {
501500 let coalesce_batches: Arc < dyn ExecutionPlan > =
502501 Arc :: new ( CoalesceBatchesExec :: new ( scan, 2 ) ) ;
503502 let expected_statistic_partition_1 =
504- create_partition_statistics ( 2 , 110 , 3 , 4 , true ) ;
503+ create_partition_statistics ( 2 , 16 , 3 , 4 , true ) ;
505504 let expected_statistic_partition_2 =
506- create_partition_statistics ( 2 , 110 , 1 , 2 , true ) ;
505+ create_partition_statistics ( 2 , 16 , 1 , 2 , true ) ;
507506 let statistics = ( 0 ..coalesce_batches. output_partitioning ( ) . partition_count ( ) )
508507 . map ( |idx| coalesce_batches. partition_statistics ( Some ( idx) ) )
509508 . collect :: < Result < Vec < _ > > > ( ) ?;
@@ -525,8 +524,7 @@ mod test {
525524 let scan = create_scan_exec_with_statistics ( None , Some ( 2 ) ) . await ;
526525 let coalesce_partitions: Arc < dyn ExecutionPlan > =
527526 Arc :: new ( CoalescePartitionsExec :: new ( scan) ) ;
528- let expected_statistic_partition =
529- create_partition_statistics ( 4 , 220 , 1 , 4 , true ) ;
527+ let expected_statistic_partition = create_partition_statistics ( 4 , 32 , 1 , 4 , true ) ;
530528 let statistics = ( 0 ..coalesce_partitions. output_partitioning ( ) . partition_count ( ) )
531529 . map ( |idx| coalesce_partitions. partition_statistics ( Some ( idx) ) )
532530 . collect :: < Result < Vec < _ > > > ( ) ?;
@@ -575,8 +573,7 @@ mod test {
575573 . map ( |idx| global_limit. partition_statistics ( Some ( idx) ) )
576574 . collect :: < Result < Vec < _ > > > ( ) ?;
577575 assert_eq ! ( statistics. len( ) , 1 ) ;
578- let expected_statistic_partition =
579- create_partition_statistics ( 2 , 110 , 3 , 4 , true ) ;
576+ let expected_statistic_partition = create_partition_statistics ( 2 , 16 , 3 , 4 , true ) ;
580577 assert_eq ! ( statistics[ 0 ] , expected_statistic_partition) ;
581578 Ok ( ( ) )
582579 }
@@ -627,7 +624,11 @@ mod test {
627624
628625 let expected_p0_statistics = Statistics {
629626 num_rows : Precision :: Inexact ( 2 ) ,
630- total_byte_size : Precision :: Inexact ( 110 ) ,
627+ // Each row produces 8 bytes of data:
628+ // - id column: Int32 (4 bytes) × 2 rows = 8 bytes
629+ // - id + 1 column: Int32 (4 bytes) × 2 rows = 8 bytes
630+ // AggregateExec cannot yet derive byte sizes for the COUNT(c) column
631+ total_byte_size : Precision :: Inexact ( 16 ) ,
631632 column_statistics : vec ! [
632633 ColumnStatistics {
633634 null_count: Precision :: Absent ,
@@ -645,7 +646,11 @@ mod test {
645646
646647 let expected_p1_statistics = Statistics {
647648 num_rows : Precision :: Inexact ( 2 ) ,
648- total_byte_size : Precision :: Inexact ( 110 ) ,
649+ // Each row produces 8 bytes of data:
650+ // - id column: Int32 (4 bytes) × 2 rows = 8 bytes
651+ // - id + 1 column: Int32 (4 bytes) × 2 rows = 8 bytes
652+ // AggregateExec cannot yet derive byte sizes for the COUNT(c) column
653+ total_byte_size : Precision :: Inexact ( 16 ) ,
649654 column_statistics : vec ! [
650655 ColumnStatistics {
651656 null_count: Precision :: Absent ,
@@ -851,7 +856,7 @@ mod test {
851856
852857 let expected_stats = Statistics {
853858 num_rows : Precision :: Inexact ( 1 ) ,
854- total_byte_size : Precision :: Inexact ( 73 ) ,
859+ total_byte_size : Precision :: Inexact ( 10 ) ,
855860 column_statistics : vec ! [
856861 ColumnStatistics :: new_unknown( ) ,
857862 ColumnStatistics :: new_unknown( ) ,
@@ -955,7 +960,7 @@ mod test {
955960
956961 let expected_stats = Statistics {
957962 num_rows : Precision :: Inexact ( 2 ) ,
958- total_byte_size : Precision :: Inexact ( 110 ) ,
963+ total_byte_size : Precision :: Inexact ( 16 ) ,
959964 column_statistics : vec ! [
960965 ColumnStatistics :: new_unknown( ) ,
961966 ColumnStatistics :: new_unknown( ) ,
0 commit comments