From 619cc1e2d3d19466c035e18a084281fd8f85781f Mon Sep 17 00:00:00 2001 From: Nir Sonnenschein Date: Thu, 16 Oct 2025 14:28:38 +0300 Subject: [PATCH] add support for tensor learning rate (vs scalar) This change is intended to help enable support for using a tensor learning rate value vs a scalar ones. We found this helpful in cases where the Optimizer is torch.compiled (in such cases changing the scalar LR value could cause recompilation degrading the performance). The implementation allows the model script to determine the type of LR value used , by setting the initial value. --- deepspeed/runtime/lr_schedules.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/deepspeed/runtime/lr_schedules.py b/deepspeed/runtime/lr_schedules.py index 0ff74695b553..cb722708e683 100755 --- a/deepspeed/runtime/lr_schedules.py +++ b/deepspeed/runtime/lr_schedules.py @@ -13,6 +13,7 @@ from torch.optim import Optimizer import math from deepspeed.utils import logger +from torch import tensor, is_tensor LR_SCHEDULE = 'lr_schedule' LR_RANGE_TEST = 'LRRangeTest' @@ -249,6 +250,9 @@ def get_lr_from_config(config): def update_lr(param_groups, lrs): for param_group, lr in zip(param_groups, lrs): + # new LR should match the type of current LR for scalar and Tensor LR support + if is_tensor(param_group['lr']): + lr = tensor([lr], device=param_group['lr'].device) param_group['lr'] = lr return [group['lr'] for group in param_groups]