From 6620e140251e56bde3380194125fc67f7a4e509a Mon Sep 17 00:00:00 2001
From: zhulin1 <zhulinJulia24@163.com>
Date: Wed, 19 Nov 2025 13:30:27 +0800
Subject: [PATCH] update

---
 .github/workflows/api_eval.yml            | 12 ++++++++++++
 .github/workflows/benchmark.yml           | 12 ++++++++++++
 .github/workflows/daily_ete_test.yml      | 12 ++++++++++++
 .github/workflows/daily_ete_test_3090.yml | 12 ++++++++++++
 .github/workflows/daily_ete_test_5080.yml | 12 ++++++++++++
 .github/workflows/daily_ete_test_h800.yml | 12 ++++++++++++
 .github/workflows/evaluate.yml            | 12 ++++++++++++
 autotest/utils/benchmark_utils.py         |  4 +++-
 benchmark/profile_pipeline_api.py         |  4 ++++
 9 files changed, 91 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/api_eval.yml b/.github/workflows/api_eval.yml
index 84f7aceaa1..fb5f5cb5f8 100644
--- a/.github/workflows/api_eval.yml
+++ b/.github/workflows/api_eval.yml
@@ -64,6 +64,18 @@ jobs:
       DOCKER_TAG: cuda12.8
       OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }}
     steps:
+      - name: Free disk space
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
+          tool-cache: false
+          docker-images: false
+          # All of these default to true, but feel free to set to "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: false
       - name: Checkout repository
         uses: actions/checkout@v3
         with:
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 7449427bd5..2bf1ce397f 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -46,6 +46,18 @@ jobs:
       PLAT_NAME: manylinux2014_x86_64
       DOCKER_TAG: cuda12.8
     steps:
+      - name: Free disk space
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
+          tool-cache: false
+          docker-images: false
+          # All of these default to true, but feel free to set to "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: false
       - name: Checkout repository
         uses: actions/checkout@v3
         with:
diff --git a/.github/workflows/daily_ete_test.yml b/.github/workflows/daily_ete_test.yml
index aae0174672..10fda797c8 100644
--- a/.github/workflows/daily_ete_test.yml
+++ b/.github/workflows/daily_ete_test.yml
@@ -66,6 +66,18 @@ jobs:
       PLAT_NAME: manylinux2014_x86_64
       DOCKER_TAG: cuda12.8
     steps:
+      - name: Free disk space
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
+          tool-cache: false
+          docker-images: false
+          # All of these default to true, but feel free to set to "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: false
       - name: Checkout repository
         uses: actions/checkout@v3
         with:
diff --git a/.github/workflows/daily_ete_test_3090.yml b/.github/workflows/daily_ete_test_3090.yml
index 908801920e..f8ac26ce87 100644
--- a/.github/workflows/daily_ete_test_3090.yml
+++ b/.github/workflows/daily_ete_test_3090.yml
@@ -65,6 +65,18 @@ jobs:
       PLAT_NAME: manylinux2014_x86_64
       DOCKER_TAG: cuda12.4
     steps:
+      - name: Free disk space
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
+          tool-cache: false
+          docker-images: false
+          # All of these default to true, but feel free to set to "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: false
       - name: Checkout repository
         uses: actions/checkout@v3
         with:
diff --git a/.github/workflows/daily_ete_test_5080.yml b/.github/workflows/daily_ete_test_5080.yml
index 6916883978..fb6f0c8972 100644
--- a/.github/workflows/daily_ete_test_5080.yml
+++ b/.github/workflows/daily_ete_test_5080.yml
@@ -65,6 +65,18 @@ jobs:
       PLAT_NAME: manylinux2014_x86_64
       DOCKER_TAG: cuda12.8
     steps:
+      - name: Free disk space
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
+          tool-cache: false
+          docker-images: false
+          # All of these default to true, but feel free to set to "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: false
       - name: Checkout repository
         uses: actions/checkout@v3
         with:
diff --git a/.github/workflows/daily_ete_test_h800.yml b/.github/workflows/daily_ete_test_h800.yml
index 75cfa418be..547b2816ea 100644
--- a/.github/workflows/daily_ete_test_h800.yml
+++ b/.github/workflows/daily_ete_test_h800.yml
@@ -65,6 +65,18 @@ jobs:
       PLAT_NAME: manylinux2014_x86_64
       DOCKER_TAG: cuda12.8
     steps:
+      - name: Free disk space
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
+          tool-cache: false
+          docker-images: false
+          # All of these default to true, but feel free to set to "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: false
       - name: Checkout repository
         uses: actions/checkout@v3
         with:
diff --git a/.github/workflows/evaluate.yml b/.github/workflows/evaluate.yml
index 8f67a342cc..9615d4a38c 100644
--- a/.github/workflows/evaluate.yml
+++ b/.github/workflows/evaluate.yml
@@ -56,6 +56,18 @@ jobs:
       DOCKER_TAG: cuda12.8
       OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }}
     steps:
+      - name: Free disk space
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
+          tool-cache: false
+          docker-images: false
+          # All of these default to true, but feel free to set to "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: false
       - name: Checkout repository
         uses: actions/checkout@v3
         with:
diff --git a/autotest/utils/benchmark_utils.py b/autotest/utils/benchmark_utils.py
index 9f517257dd..c69fa02a35 100644
--- a/autotest/utils/benchmark_utils.py
+++ b/autotest/utils/benchmark_utils.py
@@ -105,12 +105,14 @@ def longtext_throughput_test(config,
     for input_len, out_len, num_prompts, case_name, concurrency in [(1, 32768, 20, '32k', None),
                                                                     (1, 65536, 10, '64k', None),
                                                                     (198000, 1024, 3, '198k', 1)]:
+        session_len = input_len + out_len
         csv_path = f'{benchmark_path}/longtext_{case_name}_1th.csv'
         benchmark_log = os.path.join(
             log_path, f'benchmark_longtext_throughput_{case_name}' + model.split('/')[1] + worker_id + '.log')
         cmd = ' '.join([
             command, '--dataset-name random', f'--random-input-len {input_len}', f'--random-output-len {out_len}',
-            f'--num-prompts {num_prompts}', '--stream-output', f'--csv {csv_path}'
+            f'--num-prompts {num_prompts}', '--stream-output', f'--session-len {session_len}', '--random-range-ratio 1',
+            f'--csv {csv_path}'
         ])
         if concurrency:
             cmd += f' --concurrency {concurrency}'
diff --git a/benchmark/profile_pipeline_api.py b/benchmark/profile_pipeline_api.py
index ebffdd317c..3c72722610 100644
--- a/benchmark/profile_pipeline_api.py
+++ b/benchmark/profile_pipeline_api.py
@@ -259,6 +259,7 @@ def parse_args():
 
     tp_act = ArgumentHelper.tp(pt_group)
     cache_count_act = ArgumentHelper.cache_max_entry_count(pt_group)
+    session_len_act = ArgumentHelper.session_len(pt_group)
     cache_block_seq_len_act = ArgumentHelper.cache_block_seq_len(pt_group)
     prefix_caching_act = ArgumentHelper.enable_prefix_caching(pt_group)
 
@@ -266,6 +267,7 @@ def parse_args():
     tb_group = parser.add_argument_group('TurboMind engine argument')
     tb_group._group_actions.append(tp_act)
     tb_group._group_actions.append(cache_count_act)
+    tb_group._group_actions.append(session_len_act)
     tb_group._group_actions.append(cache_block_seq_len_act)
     tb_group._group_actions.append(prefix_caching_act)
     ArgumentHelper.model_format(tb_group, default='hf')
@@ -287,6 +289,7 @@ def main():
             max_batch_size=args.concurrency,
             tp=args.tp,
             cache_max_entry_count=args.cache_max_entry_count,
+            session_len=args.session_len,
             cache_block_seq_len=args.cache_block_seq_len,
             model_format=args.model_format,
             quant_policy=args.quant_policy,
@@ -298,6 +301,7 @@ def main():
     elif args.backend == 'pytorch':
         engine_config = PytorchEngineConfig(
             cache_max_entry_count=args.cache_max_entry_count,
+            session_len=args.session_len,
             block_size=args.cache_block_seq_len,
             max_batch_size=args.concurrency,
             tp=args.tp,