From 619cc1e2d3d19466c035e18a084281fd8f85781f Mon Sep 17 00:00:00 2001
From: Nir Sonnenschein <nsonnenschein@habana.ai>
Date: Thu, 16 Oct 2025 14:28:38 +0300
Subject: [PATCH] add support for tensor learning rate (vs scalar)

This change is intended to help enable support
for using a tensor learning rate value vs a scalar
ones. We found this helpful in cases where the
Optimizer is torch.compiled (in such cases changing
the scalar LR value could cause recompilation degrading
the performance).
The implementation allows the model script to determine the type of
LR value used , by setting the initial value.
---
 deepspeed/runtime/lr_schedules.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/deepspeed/runtime/lr_schedules.py b/deepspeed/runtime/lr_schedules.py
index 0ff74695b553..cb722708e683 100755
--- a/deepspeed/runtime/lr_schedules.py
+++ b/deepspeed/runtime/lr_schedules.py
@@ -13,6 +13,7 @@
 from torch.optim import Optimizer
 import math
 from deepspeed.utils import logger
+from torch import tensor, is_tensor
 
 LR_SCHEDULE = 'lr_schedule'
 LR_RANGE_TEST = 'LRRangeTest'
@@ -249,6 +250,9 @@ def get_lr_from_config(config):
 
 def update_lr(param_groups, lrs):
     for param_group, lr in zip(param_groups, lrs):
+        # new LR should match the type of current LR for scalar and Tensor LR support
+        if is_tensor(param_group['lr']):
+            lr = tensor([lr], device=param_group['lr'].device)
         param_group['lr'] = lr
     return [group['lr'] for group in param_groups]