diff --git a/airflow-core/src/airflow/models/taskinstance.py b/airflow-core/src/airflow/models/taskinstance.py index 53ca7c6589e16..6bdeba70f25f0 100644 --- a/airflow-core/src/airflow/models/taskinstance.py +++ b/airflow-core/src/airflow/models/taskinstance.py @@ -1070,12 +1070,18 @@ def next_retry_datetime(self): delay = self.task.retry_delay if self.task.retry_exponential_backoff: - # If the min_backoff calculation is below 1, it will be converted to 0 via int. Thus, - # we must round up prior to converting to an int, otherwise a divide by zero error - # will occur in the modded_hash calculation. - # this probably gives unexpected results if a task instance has previously been cleared, - # because try_number can increase without bound - min_backoff = math.ceil(delay.total_seconds() * (2 ** (self.try_number - 1))) + try: + # If the min_backoff calculation is below 1, it will be converted to 0 via int. Thus, + # we must round up prior to converting to an int, otherwise a divide by zero error + # will occur in the modded_hash calculation. + # this probably gives unexpected results if a task instance has previously been cleared, + # because try_number can increase without bound + min_backoff = math.ceil(delay.total_seconds() * (2 ** (self.try_number - 1))) + except OverflowError: + min_backoff = MAX_RETRY_DELAY + self.log.warning( + "OverflowError occurred while calculating min_backoff, using MAX_RETRY_DELAY for min_backoff." + ) # In the case when delay.total_seconds() is 0, min_backoff will not be rounded up to 1. # To address this, we impose a lower bound of 1 on min_backoff. This effectively makes diff --git a/airflow-core/tests/unit/models/test_taskinstance.py b/airflow-core/tests/unit/models/test_taskinstance.py index 2a9e86771c2f7..c4ab350c07ef3 100644 --- a/airflow-core/tests/unit/models/test_taskinstance.py +++ b/airflow-core/tests/unit/models/test_taskinstance.py @@ -627,6 +627,31 @@ def test_next_retry_datetime(self, dag_maker): date = ti.next_retry_datetime() assert date == ti.end_date + max_delay + def test_next_retry_datetime_returns_max_for_overflow(self, dag_maker): + delay = datetime.timedelta(seconds=30) + max_delay = datetime.timedelta(minutes=60) + + with dag_maker(dag_id="fail_dag"): + task = BashOperator( + task_id="task_with_exp_backoff_and_max_delay", + bash_command="exit 1", + retries=3, + retry_delay=delay, + retry_exponential_backoff=True, + max_retry_delay=max_delay, + ) + ti = dag_maker.create_dagrun().task_instances[0] + ti.task = task + ti.end_date = pendulum.instance(timezone.utcnow()) + + ti.try_number = 5000 + date = ti.next_retry_datetime() + assert date == ti.end_date + max_delay + + ti.try_number = 50000 + date = ti.next_retry_datetime() + assert date == ti.end_date + max_delay + @pytest.mark.parametrize("seconds", [0, 0.5, 1]) def test_next_retry_datetime_short_or_zero_intervals(self, dag_maker, seconds): delay = datetime.timedelta(seconds=seconds)