Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions lmdeploy/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,7 +526,7 @@ def max_log_len(parser):
return parser.add_argument('--max-log-len',
type=int,
default=None,
help='Max number of prompt characters or prompt tokens being'
help='Max number of prompt characters or prompt tokens being '
'printed in log. Default: Unlimited')

@staticmethod
Expand All @@ -552,8 +552,9 @@ def communicator(parser):
return parser.add_argument('--communicator',
type=str,
default='nccl',
choices=['nccl', 'native'],
help='Communication backend for multi-GPU inference')
choices=['nccl', 'native', 'cuda-ipc'],
help='Communication backend for multi-GPU inference. The "native" option is '
'deprecated and serves as an alias for "cuda-ipc"')

@staticmethod
def enable_microbatch(parser):
Expand Down Expand Up @@ -581,9 +582,9 @@ def role(parser):
type=str,
default='Hybrid',
choices=['Hybrid', 'Prefill', 'Decode'],
help='Hybrid for Non-Disaggregated Engine;'
'Prefill for Disaggregated Prefill Engine;'
'Decode for Disaggregated Decode Engine;')
help='Hybrid for Non-Disaggregated Engine; '
'Prefill for Disaggregated Prefill Engine; '
'Decode for Disaggregated Decode Engine')

@staticmethod
def migration_backend(parser):
Expand Down
2 changes: 1 addition & 1 deletion lmdeploy/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ def get_max_batch_size(device_type: str):
"""
assert device_type in ['cuda', 'ascend', 'maca', 'camb']
if device_type == 'cuda':
max_batch_size_map = {'a100': 256, 'a800': 256, 'h100': 512, 'h800': 512}
max_batch_size_map = {'a100': 384, 'a800': 384, 'h100': 1024, 'h800': 1024, 'l20y': 1024, 'h200': 1024}
import torch
device_name = torch.cuda.get_device_name(0).lower()
for name, size in max_batch_size_map.items():
Expand Down