@@ -61,7 +61,6 @@ std::vector<paddle::Tensor> AppendAttentionKernel(
6161 const float out_linear_in_scale,
6262 const int encoder_block_shape_q,
6363 const int decoder_block_shape_q,
64- const int max_partition_size,
6564 const int encoder_max_partition_size,
6665 const int speculate_max_draft_token_num,
6766 const bool causal,
@@ -209,7 +208,6 @@ std::vector<paddle::Tensor> AppendAttentionKernel(
209208 quant_max_bound,
210209 quant_min_bound,
211210 out_linear_in_scale,
212- max_partition_size,
213211 encoder_max_partition_size,
214212 speculate_max_draft_token_num,
215213 causal,
@@ -248,7 +246,6 @@ std::vector<paddle::Tensor> AppendAttentionKernel(
248246 quant_max_bound,
249247 quant_min_bound,
250248 out_linear_in_scale,
251- max_partition_size,
252249 encoder_max_partition_size,
253250 speculate_max_draft_token_num,
254251 causal,
@@ -292,7 +289,6 @@ std::vector<paddle::Tensor> AppendAttentionKernel(
292289 quant_max_bound,
293290 quant_min_bound,
294291 out_linear_in_scale,
295- max_partition_size,
296292 encoder_max_partition_size,
297293 speculate_max_draft_token_num,
298294 causal,
@@ -440,7 +436,6 @@ std::vector<paddle::Tensor> AppendAttentionKernel(
440436 quant_max_bound,
441437 quant_min_bound,
442438 out_linear_in_scale,
443- max_partition_size,
444439 encoder_max_partition_size,
445440 speculate_max_draft_token_num,
446441 causal,
@@ -479,7 +474,6 @@ std::vector<paddle::Tensor> AppendAttentionKernel(
479474 quant_max_bound,
480475 quant_min_bound,
481476 out_linear_in_scale,
482- max_partition_size,
483477 encoder_max_partition_size,
484478 speculate_max_draft_token_num,
485479 causal,
@@ -524,7 +518,6 @@ std::vector<paddle::Tensor> AppendAttentionKernel(
524518 quant_max_bound,
525519 quant_min_bound,
526520 out_linear_in_scale,
527- max_partition_size,
528521 encoder_max_partition_size,
529522 speculate_max_draft_token_num,
530523 causal,
@@ -585,7 +578,6 @@ std::vector<paddle::Tensor> AppendAttention(
585578 const float out_linear_in_scale,
586579 const int encoder_block_shape_q,
587580 const int decoder_block_shape_q,
588- const int max_partition_size,
589581 const int encoder_max_partition_size,
590582 const int speculate_max_draft_token_num,
591583 const bool causal,
@@ -650,7 +642,6 @@ std::vector<paddle::Tensor> AppendAttention(
650642 out_linear_in_scale,
651643 encoder_block_shape_q,
652644 decoder_block_shape_q,
653- max_partition_size,
654645 encoder_max_partition_size,
655646 speculate_max_draft_token_num,
656647 causal,
@@ -700,7 +691,6 @@ std::vector<paddle::Tensor> AppendAttention(
700691 out_linear_in_scale,
701692 encoder_block_shape_q,
702693 decoder_block_shape_q,
703- max_partition_size,
704694 encoder_max_partition_size,
705695 speculate_max_draft_token_num,
706696 causal,
@@ -751,7 +741,6 @@ std::vector<paddle::Tensor> AppendAttention(
751741 out_linear_in_scale,
752742 encoder_block_shape_q,
753743 decoder_block_shape_q,
754- max_partition_size,
755744 encoder_max_partition_size,
756745 speculate_max_draft_token_num,
757746 causal,
@@ -800,7 +789,6 @@ std::vector<paddle::Tensor> AppendAttention(
800789 out_linear_in_scale,
801790 encoder_block_shape_q,
802791 decoder_block_shape_q,
803- max_partition_size,
804792 encoder_max_partition_size,
805793 speculate_max_draft_token_num,
806794 causal,
@@ -905,7 +893,6 @@ std::vector<paddle::DataType> AppendAttentionInferDtype(
905893 const float out_linear_in_scale,
906894 const int encoder_block_shape_q,
907895 const int decoder_block_shape_q,
908- const int max_partition_size,
909896 const int encoder_max_partition_size,
910897 const int speculate_max_draft_token_num,
911898 const bool causal,
@@ -985,7 +972,6 @@ PD_BUILD_OP(append_attention)
985972 " out_linear_in_scale: float" ,
986973 " encoder_block_shape_q: int" ,
987974 " decoder_block_shape_q: int" ,
988- " max_partition_size: int" ,
989975 " encoder_max_partition_size: int" ,
990976 " speculate_max_draft_token_num: int" ,
991977 " causal: bool" ,
0 commit comments