From 6620e140251e56bde3380194125fc67f7a4e509a Mon Sep 17 00:00:00 2001 From: zhulin1 Date: Wed, 19 Nov 2025 13:30:27 +0800 Subject: [PATCH] update --- .github/workflows/api_eval.yml | 12 ++++++++++++ .github/workflows/benchmark.yml | 12 ++++++++++++ .github/workflows/daily_ete_test.yml | 12 ++++++++++++ .github/workflows/daily_ete_test_3090.yml | 12 ++++++++++++ .github/workflows/daily_ete_test_5080.yml | 12 ++++++++++++ .github/workflows/daily_ete_test_h800.yml | 12 ++++++++++++ .github/workflows/evaluate.yml | 12 ++++++++++++ autotest/utils/benchmark_utils.py | 4 +++- benchmark/profile_pipeline_api.py | 4 ++++ 9 files changed, 91 insertions(+), 1 deletion(-) diff --git a/.github/workflows/api_eval.yml b/.github/workflows/api_eval.yml index 84f7aceaa1..fb5f5cb5f8 100644 --- a/.github/workflows/api_eval.yml +++ b/.github/workflows/api_eval.yml @@ -64,6 +64,18 @@ jobs: DOCKER_TAG: cuda12.8 OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }} steps: + - name: Free disk space + uses: jlumbroso/free-disk-space@main + with: + # This might remove tools that are actually needed, if set to "true" but frees about 6 GB + tool-cache: false + docker-images: false + # All of these default to true, but feel free to set to "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: false - name: Checkout repository uses: actions/checkout@v3 with: diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 7449427bd5..2bf1ce397f 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -46,6 +46,18 @@ jobs: PLAT_NAME: manylinux2014_x86_64 DOCKER_TAG: cuda12.8 steps: + - name: Free disk space + uses: jlumbroso/free-disk-space@main + with: + # This might remove tools that are actually needed, if set to "true" but frees about 6 GB + tool-cache: false + docker-images: false + # All of these default to true, but feel free to set to "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: false - name: Checkout repository uses: actions/checkout@v3 with: diff --git a/.github/workflows/daily_ete_test.yml b/.github/workflows/daily_ete_test.yml index aae0174672..10fda797c8 100644 --- a/.github/workflows/daily_ete_test.yml +++ b/.github/workflows/daily_ete_test.yml @@ -66,6 +66,18 @@ jobs: PLAT_NAME: manylinux2014_x86_64 DOCKER_TAG: cuda12.8 steps: + - name: Free disk space + uses: jlumbroso/free-disk-space@main + with: + # This might remove tools that are actually needed, if set to "true" but frees about 6 GB + tool-cache: false + docker-images: false + # All of these default to true, but feel free to set to "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: false - name: Checkout repository uses: actions/checkout@v3 with: diff --git a/.github/workflows/daily_ete_test_3090.yml b/.github/workflows/daily_ete_test_3090.yml index 908801920e..f8ac26ce87 100644 --- a/.github/workflows/daily_ete_test_3090.yml +++ b/.github/workflows/daily_ete_test_3090.yml @@ -65,6 +65,18 @@ jobs: PLAT_NAME: manylinux2014_x86_64 DOCKER_TAG: cuda12.4 steps: + - name: Free disk space + uses: jlumbroso/free-disk-space@main + with: + # This might remove tools that are actually needed, if set to "true" but frees about 6 GB + tool-cache: false + docker-images: false + # All of these default to true, but feel free to set to "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: false - name: Checkout repository uses: actions/checkout@v3 with: diff --git a/.github/workflows/daily_ete_test_5080.yml b/.github/workflows/daily_ete_test_5080.yml index 6916883978..fb6f0c8972 100644 --- a/.github/workflows/daily_ete_test_5080.yml +++ b/.github/workflows/daily_ete_test_5080.yml @@ -65,6 +65,18 @@ jobs: PLAT_NAME: manylinux2014_x86_64 DOCKER_TAG: cuda12.8 steps: + - name: Free disk space + uses: jlumbroso/free-disk-space@main + with: + # This might remove tools that are actually needed, if set to "true" but frees about 6 GB + tool-cache: false + docker-images: false + # All of these default to true, but feel free to set to "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: false - name: Checkout repository uses: actions/checkout@v3 with: diff --git a/.github/workflows/daily_ete_test_h800.yml b/.github/workflows/daily_ete_test_h800.yml index 75cfa418be..547b2816ea 100644 --- a/.github/workflows/daily_ete_test_h800.yml +++ b/.github/workflows/daily_ete_test_h800.yml @@ -65,6 +65,18 @@ jobs: PLAT_NAME: manylinux2014_x86_64 DOCKER_TAG: cuda12.8 steps: + - name: Free disk space + uses: jlumbroso/free-disk-space@main + with: + # This might remove tools that are actually needed, if set to "true" but frees about 6 GB + tool-cache: false + docker-images: false + # All of these default to true, but feel free to set to "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: false - name: Checkout repository uses: actions/checkout@v3 with: diff --git a/.github/workflows/evaluate.yml b/.github/workflows/evaluate.yml index 8f67a342cc..9615d4a38c 100644 --- a/.github/workflows/evaluate.yml +++ b/.github/workflows/evaluate.yml @@ -56,6 +56,18 @@ jobs: DOCKER_TAG: cuda12.8 OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }} steps: + - name: Free disk space + uses: jlumbroso/free-disk-space@main + with: + # This might remove tools that are actually needed, if set to "true" but frees about 6 GB + tool-cache: false + docker-images: false + # All of these default to true, but feel free to set to "false" if necessary for your workflow + android: true + dotnet: true + haskell: true + large-packages: true + swap-storage: false - name: Checkout repository uses: actions/checkout@v3 with: diff --git a/autotest/utils/benchmark_utils.py b/autotest/utils/benchmark_utils.py index 9f517257dd..c69fa02a35 100644 --- a/autotest/utils/benchmark_utils.py +++ b/autotest/utils/benchmark_utils.py @@ -105,12 +105,14 @@ def longtext_throughput_test(config, for input_len, out_len, num_prompts, case_name, concurrency in [(1, 32768, 20, '32k', None), (1, 65536, 10, '64k', None), (198000, 1024, 3, '198k', 1)]: + session_len = input_len + out_len csv_path = f'{benchmark_path}/longtext_{case_name}_1th.csv' benchmark_log = os.path.join( log_path, f'benchmark_longtext_throughput_{case_name}' + model.split('/')[1] + worker_id + '.log') cmd = ' '.join([ command, '--dataset-name random', f'--random-input-len {input_len}', f'--random-output-len {out_len}', - f'--num-prompts {num_prompts}', '--stream-output', f'--csv {csv_path}' + f'--num-prompts {num_prompts}', '--stream-output', f'--session-len {session_len}', '--random-range-ratio 1', + f'--csv {csv_path}' ]) if concurrency: cmd += f' --concurrency {concurrency}' diff --git a/benchmark/profile_pipeline_api.py b/benchmark/profile_pipeline_api.py index ebffdd317c..3c72722610 100644 --- a/benchmark/profile_pipeline_api.py +++ b/benchmark/profile_pipeline_api.py @@ -259,6 +259,7 @@ def parse_args(): tp_act = ArgumentHelper.tp(pt_group) cache_count_act = ArgumentHelper.cache_max_entry_count(pt_group) + session_len_act = ArgumentHelper.session_len(pt_group) cache_block_seq_len_act = ArgumentHelper.cache_block_seq_len(pt_group) prefix_caching_act = ArgumentHelper.enable_prefix_caching(pt_group) @@ -266,6 +267,7 @@ def parse_args(): tb_group = parser.add_argument_group('TurboMind engine argument') tb_group._group_actions.append(tp_act) tb_group._group_actions.append(cache_count_act) + tb_group._group_actions.append(session_len_act) tb_group._group_actions.append(cache_block_seq_len_act) tb_group._group_actions.append(prefix_caching_act) ArgumentHelper.model_format(tb_group, default='hf') @@ -287,6 +289,7 @@ def main(): max_batch_size=args.concurrency, tp=args.tp, cache_max_entry_count=args.cache_max_entry_count, + session_len=args.session_len, cache_block_seq_len=args.cache_block_seq_len, model_format=args.model_format, quant_policy=args.quant_policy, @@ -298,6 +301,7 @@ def main(): elif args.backend == 'pytorch': engine_config = PytorchEngineConfig( cache_max_entry_count=args.cache_max_entry_count, + session_len=args.session_len, block_size=args.cache_block_seq_len, max_batch_size=args.concurrency, tp=args.tp,