@@ -61,8 +61,6 @@ std::vector<paddle::Tensor> AppendAttentionKernel(
6161 const float out_linear_in_scale,
6262 const int encoder_block_shape_q,
6363 const int decoder_block_shape_q,
64- const int max_partition_size,
65- const int encoder_max_partition_size,
6664 const int speculate_max_draft_token_num,
6765 const bool causal,
6866 const bool speculate_decoder) {
@@ -209,8 +207,6 @@ std::vector<paddle::Tensor> AppendAttentionKernel(
209207 quant_max_bound,
210208 quant_min_bound,
211209 out_linear_in_scale,
212- max_partition_size,
213- encoder_max_partition_size,
214210 speculate_max_draft_token_num,
215211 causal,
216212 false ,
@@ -248,8 +244,6 @@ std::vector<paddle::Tensor> AppendAttentionKernel(
248244 quant_max_bound,
249245 quant_min_bound,
250246 out_linear_in_scale,
251- max_partition_size,
252- encoder_max_partition_size,
253247 speculate_max_draft_token_num,
254248 causal,
255249 false ,
@@ -292,8 +286,6 @@ std::vector<paddle::Tensor> AppendAttentionKernel(
292286 quant_max_bound,
293287 quant_min_bound,
294288 out_linear_in_scale,
295- max_partition_size,
296- encoder_max_partition_size,
297289 speculate_max_draft_token_num,
298290 causal,
299291 false ,
@@ -440,8 +432,6 @@ std::vector<paddle::Tensor> AppendAttentionKernel(
440432 quant_max_bound,
441433 quant_min_bound,
442434 out_linear_in_scale,
443- max_partition_size,
444- encoder_max_partition_size,
445435 speculate_max_draft_token_num,
446436 causal,
447437 !speculate_decoder,
@@ -479,8 +469,6 @@ std::vector<paddle::Tensor> AppendAttentionKernel(
479469 quant_max_bound,
480470 quant_min_bound,
481471 out_linear_in_scale,
482- max_partition_size,
483- encoder_max_partition_size,
484472 speculate_max_draft_token_num,
485473 causal,
486474 !speculate_decoder,
@@ -524,8 +512,6 @@ std::vector<paddle::Tensor> AppendAttentionKernel(
524512 quant_max_bound,
525513 quant_min_bound,
526514 out_linear_in_scale,
527- max_partition_size,
528- encoder_max_partition_size,
529515 speculate_max_draft_token_num,
530516 causal,
531517 !speculate_decoder,
@@ -585,8 +571,6 @@ std::vector<paddle::Tensor> AppendAttention(
585571 const float out_linear_in_scale,
586572 const int encoder_block_shape_q,
587573 const int decoder_block_shape_q,
588- const int max_partition_size,
589- const int encoder_max_partition_size,
590574 const int speculate_max_draft_token_num,
591575 const bool causal,
592576 const bool speculate_decoder) {
@@ -650,8 +634,6 @@ std::vector<paddle::Tensor> AppendAttention(
650634 out_linear_in_scale,
651635 encoder_block_shape_q,
652636 decoder_block_shape_q,
653- max_partition_size,
654- encoder_max_partition_size,
655637 speculate_max_draft_token_num,
656638 causal,
657639 speculate_decoder);
@@ -700,8 +682,6 @@ std::vector<paddle::Tensor> AppendAttention(
700682 out_linear_in_scale,
701683 encoder_block_shape_q,
702684 decoder_block_shape_q,
703- max_partition_size,
704- encoder_max_partition_size,
705685 speculate_max_draft_token_num,
706686 causal,
707687 speculate_decoder);
@@ -751,8 +731,6 @@ std::vector<paddle::Tensor> AppendAttention(
751731 out_linear_in_scale,
752732 encoder_block_shape_q,
753733 decoder_block_shape_q,
754- max_partition_size,
755- encoder_max_partition_size,
756734 speculate_max_draft_token_num,
757735 causal,
758736 speculate_decoder);
@@ -800,8 +778,6 @@ std::vector<paddle::Tensor> AppendAttention(
800778 out_linear_in_scale,
801779 encoder_block_shape_q,
802780 decoder_block_shape_q,
803- max_partition_size,
804- encoder_max_partition_size,
805781 speculate_max_draft_token_num,
806782 causal,
807783 speculate_decoder);
@@ -905,8 +881,6 @@ std::vector<paddle::DataType> AppendAttentionInferDtype(
905881 const float out_linear_in_scale,
906882 const int encoder_block_shape_q,
907883 const int decoder_block_shape_q,
908- const int max_partition_size,
909- const int encoder_max_partition_size,
910884 const int speculate_max_draft_token_num,
911885 const bool causal,
912886 const bool speculate_decoder) {
@@ -985,8 +959,6 @@ PD_BUILD_OP(append_attention)
985959 " out_linear_in_scale: float" ,
986960 " encoder_block_shape_q: int" ,
987961 " decoder_block_shape_q: int" ,
988- " max_partition_size: int" ,
989- " encoder_max_partition_size: int" ,
990962 " speculate_max_draft_token_num: int" ,
991963 " causal: bool" ,
992964 " speculate_decoder: bool" })
0 commit comments