@@ -75,8 +75,8 @@ use datafusion::physical_plan::expressions::{
7575} ;
7676use datafusion:: physical_plan:: filter:: FilterExec ;
7777use datafusion:: physical_plan:: joins:: {
78- HashJoinExec , NestedLoopJoinExec , PartitionMode , SortMergeJoinExec ,
79- StreamJoinPartitionMode , SymmetricHashJoinExec ,
78+ HashJoinExec , HashTableLookupExpr , NestedLoopJoinExec , PartitionMode ,
79+ SortMergeJoinExec , StreamJoinPartitionMode , SymmetricHashJoinExec ,
8080} ;
8181use datafusion:: physical_plan:: limit:: { GlobalLimitExec , LocalLimitExec } ;
8282use datafusion:: physical_plan:: placeholder_row:: PlaceholderRowExec ;
@@ -103,12 +103,12 @@ use datafusion_common::{
103103 internal_err, not_impl_err, DataFusionError , NullEquality , Result , UnnestOptions ,
104104} ;
105105use datafusion_expr:: {
106- Accumulator , AccumulatorFactoryFunction , AggregateUDF , ColumnarValue , ScalarUDF ,
107- Signature , SimpleAggregateUDF , WindowFrame , WindowFrameBound , WindowUDF ,
106+ Accumulator , AccumulatorFactoryFunction , AggregateUDF , ColumnarValue , ScalarUDF , Signature , SimpleAggregateUDF , WindowFrame , WindowFrameBound , WindowUDF
108107} ;
109108use datafusion_functions_aggregate:: average:: avg_udaf;
110109use datafusion_functions_aggregate:: nth_value:: nth_value_udaf;
111110use datafusion_functions_aggregate:: string_agg:: string_agg_udaf;
111+ use datafusion:: physical_plan:: joins:: join_hash_map:: JoinHashMapU32 ;
112112use datafusion_proto:: physical_plan:: {
113113 AsExecutionPlan , DefaultPhysicalExtensionCodec , PhysicalExtensionCodec ,
114114} ;
@@ -2238,3 +2238,48 @@ async fn roundtrip_memory_source() -> Result<()> {
22382238 . await ?;
22392239 roundtrip_test ( plan)
22402240}
2241+
2242+ /// Test that HashTableLookupExpr serializes to lit(true)
2243+ ///
2244+ /// HashTableLookupExpr contains a runtime hash table that cannot be serialized.
2245+ /// The serialization code replaces it with lit(true) which is safe because
2246+ /// it's a performance optimization filter, not a correctness requirement.
2247+ #[ test]
2248+ fn roundtrip_hash_table_lookup_expr_to_lit ( ) -> Result < ( ) > {
2249+ // Create a simple schema and input plan
2250+ let schema = Arc :: new ( Schema :: new ( vec ! [ Field :: new( "col" , DataType :: Int64 , false ) ] ) ) ;
2251+ let input = Arc :: new ( EmptyExec :: new ( schema. clone ( ) ) ) ;
2252+
2253+ // Create a HashTableLookupExpr - it will be replaced with lit(true) during serialization
2254+ let hash_map = Arc :: new ( JoinHashMapU32 :: with_capacity ( 0 ) ) ;
2255+ let hash_expr: Arc < dyn PhysicalExpr > = Arc :: new ( Column :: new ( "col" , 0 ) ) ;
2256+ let lookup_expr: Arc < dyn PhysicalExpr > = Arc :: new ( HashTableLookupExpr :: new (
2257+ hash_expr,
2258+ hash_map,
2259+ "test_lookup" . to_string ( ) ,
2260+ ) ) ;
2261+
2262+ // Create a filter with the lookup expression
2263+ let filter = Arc :: new ( FilterExec :: try_new ( lookup_expr, input) ?) ;
2264+
2265+ // Serialize
2266+ let ctx = SessionContext :: new ( ) ;
2267+ let codec = DefaultPhysicalExtensionCodec { } ;
2268+ let proto: protobuf:: PhysicalPlanNode =
2269+ protobuf:: PhysicalPlanNode :: try_from_physical_plan ( filter. clone ( ) , & codec)
2270+ . expect ( "serialization should succeed" ) ;
2271+
2272+ // Deserialize
2273+ let result: Arc < dyn ExecutionPlan > = proto
2274+ . try_into_physical_plan ( & ctx, & ctx. runtime_env ( ) , & codec)
2275+ . expect ( "deserialization should succeed" ) ;
2276+
2277+ // The deserialized plan should have lit(true) instead of HashTableLookupExpr
2278+ // Verify the filter predicate is a Literal(true)
2279+ let result_filter = result. as_any ( ) . downcast_ref :: < FilterExec > ( ) . unwrap ( ) ;
2280+ let predicate = result_filter. predicate ( ) ;
2281+ let literal = predicate. as_any ( ) . downcast_ref :: < Literal > ( ) . unwrap ( ) ;
2282+ assert_eq ! ( * literal. value( ) , ScalarValue :: Boolean ( Some ( true ) ) ) ;
2283+
2284+ Ok ( ( ) )
2285+ }
0 commit comments