From b4df37910b4c7619a3ac4a72665bd39572f89641 Mon Sep 17 00:00:00 2001 From: Gabriel Guralnick Date: Tue, 31 Mar 2026 12:46:14 -0700 Subject: [PATCH] [FIX] Inline ceil_log2 in gpu_2d_continuous_cumsum to fix MakePackedAPI error The intermediate variable ceil_log2 created a LetStmt-bound Var in the TIR function. When MakePackedAPI processed the function, it saw ceil_log2 as an undefined variable not passed as an API argument: Check failed: undefined.size() == 0: In PrimFunc gpu_2d_continuous_cumsum variables [ceil_log2] are used, but are not passed in as API arguments Inline the expression into total_rounds to avoid the intermediate Var. The computation is identical. --- python/tvm/relax/backend/gpu_generic/cumsum.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/tvm/relax/backend/gpu_generic/cumsum.py b/python/tvm/relax/backend/gpu_generic/cumsum.py index bd2cec3bcd50..a2054fdf4178 100644 --- a/python/tvm/relax/backend/gpu_generic/cumsum.py +++ b/python/tvm/relax/backend/gpu_generic/cumsum.py @@ -159,8 +159,7 @@ def cumsum(var_a: T.handle, var_out: T.handle): A = T.match_buffer(var_a, [m, n], dtype=in_dtype) Out = T.match_buffer(var_out, [m, n], dtype=out_dtype) Tmp = T.alloc_buffer([m, n], dtype=out_dtype) - ceil_log2 = T.Cast("int64", T.ceil(T.log2(T.Cast("float32", n)))) - total_rounds = ceil_log2 // LOG_BLOCK_N + total_rounds = T.Cast("int64", T.ceil(T.log2(T.Cast("float32", n)))) // LOG_BLOCK_N block_inclusive_inside_block( m, n, A, Out, Tmp, src_offset=T.int64(0), tmp_offset=T.int64(0)