From 8a40e271ddff6861836f12807ad3ae1360a5ffdd Mon Sep 17 00:00:00 2001 From: lvhan028 Date: Mon, 1 Sep 2025 19:10:50 +0800 Subject: [PATCH 1/2] adjust default values --- lmdeploy/cli/utils.py | 13 +++++++------ lmdeploy/utils.py | 2 +- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/lmdeploy/cli/utils.py b/lmdeploy/cli/utils.py index 111af08fd4..36e4d672ef 100644 --- a/lmdeploy/cli/utils.py +++ b/lmdeploy/cli/utils.py @@ -526,7 +526,7 @@ def max_log_len(parser): return parser.add_argument('--max-log-len', type=int, default=None, - help='Max number of prompt characters or prompt tokens being' + help='Max number of prompt characters or prompt tokens being ' 'printed in log. Default: Unlimited') @staticmethod @@ -552,8 +552,9 @@ def communicator(parser): return parser.add_argument('--communicator', type=str, default='nccl', - choices=['nccl', 'native'], - help='Communication backend for multi-GPU inference') + choices=['nccl', 'native', 'cuda-ipc'], + help='Communication backend for multi-GPU inference. The "native" option is ' + 'deprecated and serves as an alias for "cuda-ipc"') @staticmethod def enable_microbatch(parser): @@ -581,9 +582,9 @@ def role(parser): type=str, default='Hybrid', choices=['Hybrid', 'Prefill', 'Decode'], - help='Hybrid for Non-Disaggregated Engine;' - 'Prefill for Disaggregated Prefill Engine;' - 'Decode for Disaggregated Decode Engine;') + help='Hybrid for Non-Disaggregated Engine; ' + 'Prefill for Disaggregated Prefill Engine; ' + 'Decode for Disaggregated Decode Engine') @staticmethod def migration_backend(parser): diff --git a/lmdeploy/utils.py b/lmdeploy/utils.py index 3d4e98bfb8..cb4f5df667 100644 --- a/lmdeploy/utils.py +++ b/lmdeploy/utils.py @@ -337,7 +337,7 @@ def get_max_batch_size(device_type: str): """ assert device_type in ['cuda', 'ascend', 'maca', 'camb'] if device_type == 'cuda': - max_batch_size_map = {'a100': 256, 'a800': 256, 'h100': 512, 'h800': 512} + max_batch_size_map = {'a100': 512, 'a800': 512, 'h100': 1024, 'h800': 1024} import torch device_name = torch.cuda.get_device_name(0).lower() for name, size in max_batch_size_map.items(): From 9f73eddc627a94aa53c7e85953e749091e61c793 Mon Sep 17 00:00:00 2001 From: lvhan028 Date: Wed, 3 Sep 2025 12:58:36 +0800 Subject: [PATCH 2/2] update --- lmdeploy/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lmdeploy/utils.py b/lmdeploy/utils.py index cb4f5df667..0afeeb1ca5 100644 --- a/lmdeploy/utils.py +++ b/lmdeploy/utils.py @@ -337,7 +337,7 @@ def get_max_batch_size(device_type: str): """ assert device_type in ['cuda', 'ascend', 'maca', 'camb'] if device_type == 'cuda': - max_batch_size_map = {'a100': 512, 'a800': 512, 'h100': 1024, 'h800': 1024} + max_batch_size_map = {'a100': 384, 'a800': 384, 'h100': 1024, 'h800': 1024, 'l20y': 1024, 'h200': 1024} import torch device_name = torch.cuda.get_device_name(0).lower() for name, size in max_batch_size_map.items():