From 8a40e271ddff6861836f12807ad3ae1360a5ffdd Mon Sep 17 00:00:00 2001
From: lvhan028 <lvhan_028@163.com>
Date: Mon, 1 Sep 2025 19:10:50 +0800
Subject: [PATCH 1/2] adjust default values

---
 lmdeploy/cli/utils.py | 13 +++++++------
 lmdeploy/utils.py     |  2 +-
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/lmdeploy/cli/utils.py b/lmdeploy/cli/utils.py
index 111af08fd4..36e4d672ef 100644
--- a/lmdeploy/cli/utils.py
+++ b/lmdeploy/cli/utils.py
@@ -526,7 +526,7 @@ def max_log_len(parser):
         return parser.add_argument('--max-log-len',
                                    type=int,
                                    default=None,
-                                   help='Max number of prompt characters or prompt tokens being'
+                                   help='Max number of prompt characters or prompt tokens being '
                                    'printed in log. Default: Unlimited')
 
     @staticmethod
@@ -552,8 +552,9 @@ def communicator(parser):
         return parser.add_argument('--communicator',
                                    type=str,
                                    default='nccl',
-                                   choices=['nccl', 'native'],
-                                   help='Communication backend for multi-GPU inference')
+                                   choices=['nccl', 'native', 'cuda-ipc'],
+                                   help='Communication backend for multi-GPU inference. The "native" option is '
+                                   'deprecated and serves as an alias for "cuda-ipc"')
 
     @staticmethod
     def enable_microbatch(parser):
@@ -581,9 +582,9 @@ def role(parser):
                                    type=str,
                                    default='Hybrid',
                                    choices=['Hybrid', 'Prefill', 'Decode'],
-                                   help='Hybrid for Non-Disaggregated Engine;'
-                                   'Prefill for Disaggregated Prefill Engine;'
-                                   'Decode for Disaggregated Decode Engine;')
+                                   help='Hybrid for Non-Disaggregated Engine; '
+                                   'Prefill for Disaggregated Prefill Engine; '
+                                   'Decode for Disaggregated Decode Engine')
 
     @staticmethod
     def migration_backend(parser):
diff --git a/lmdeploy/utils.py b/lmdeploy/utils.py
index 3d4e98bfb8..cb4f5df667 100644
--- a/lmdeploy/utils.py
+++ b/lmdeploy/utils.py
@@ -337,7 +337,7 @@ def get_max_batch_size(device_type: str):
     """
     assert device_type in ['cuda', 'ascend', 'maca', 'camb']
     if device_type == 'cuda':
-        max_batch_size_map = {'a100': 256, 'a800': 256, 'h100': 512, 'h800': 512}
+        max_batch_size_map = {'a100': 512, 'a800': 512, 'h100': 1024, 'h800': 1024}
         import torch
         device_name = torch.cuda.get_device_name(0).lower()
         for name, size in max_batch_size_map.items():

From 9f73eddc627a94aa53c7e85953e749091e61c793 Mon Sep 17 00:00:00 2001
From: lvhan028 <lvhan_028@163.com>
Date: Wed, 3 Sep 2025 12:58:36 +0800
Subject: [PATCH 2/2] update

---
 lmdeploy/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lmdeploy/utils.py b/lmdeploy/utils.py
index cb4f5df667..0afeeb1ca5 100644
--- a/lmdeploy/utils.py
+++ b/lmdeploy/utils.py
@@ -337,7 +337,7 @@ def get_max_batch_size(device_type: str):
     """
     assert device_type in ['cuda', 'ascend', 'maca', 'camb']
     if device_type == 'cuda':
-        max_batch_size_map = {'a100': 512, 'a800': 512, 'h100': 1024, 'h800': 1024}
+        max_batch_size_map = {'a100': 384, 'a800': 384, 'h100': 1024, 'h800': 1024, 'l20y': 1024, 'h200': 1024}
         import torch
         device_name = torch.cuda.get_device_name(0).lower()
         for name, size in max_batch_size_map.items():