@@ -724,18 +724,24 @@ def classify_error_kind(self, error: Exception) -> AgentErrorKind:
724724 error_str = str (error ).lower ()
725725 error_type = type (error ).__name__ .lower ()
726726
727- # Authentication errors
727+ # Check for permanent auth errors first (non-retryable)
728728 if any (indicator in error_str for indicator in [
729- "invalid api key" , "unauthorized" , "api key" , "authentication failed" ,
730- "invalid_request_error" , "openai_error" , "authentication_error" ,
731- "invalid_api_key" , "incorrect api key" , "api key not found"
729+ "invalid api key" , "api key not found" , "invalid_api_key" ,
730+ "incorrect api key" , "authentication_error"
731+ ]):
732+ return "auth_permanent"
733+
734+ # Retryable authentication errors
735+ if any (indicator in error_str for indicator in [
736+ "unauthorized" , "api key" , "authentication failed" ,
737+ "invalid_request_error" , "openai_error"
732738 ]):
733739 return "auth"
734740
735741 # Rate limiting
736742 if any (indicator in error_str for indicator in [
737743 "rate limit" , "ratelimit" , "too many request" , "resource_exhausted" ,
738- "quota exceeded" , " usage limit" , "429"
744+ "usage limit" , "429"
739745 ]) or "429" in str (getattr (error , "status_code" , "" )):
740746 return "rate_limit"
741747
@@ -828,9 +834,9 @@ def resolve_failover_decision(self, error: Exception, attempt_state: dict) -> Fa
828834
829835 # Rate limiting - extract retry delay
830836 if error_kind == "rate_limit" :
831- backoff = self ._parse_retry_delay (str (error ))
837+ backoff = self ._parse_retry_delay (str (error )) # Returns seconds
832838 if backoff == 0 : # No specific delay found, use exponential backoff
833- backoff = min (1000 * ( 2 ** (attempt - 1 )), 60000 ) # Cap at 60s
839+ backoff = min (2 ** (attempt - 1 ), 60 ) # 1s, 2s, 4s, ... cap at 60s
834840 return FailoverDecision (
835841 action = "retry" ,
836842 reason = error_kind ,
@@ -962,10 +968,16 @@ def _call_with_retry(self, func, *args, **kwargs):
962968 return result
963969
964970 except Exception as e :
965- category , can_retry , retry_delay = self ._classify_error_and_should_retry (e , attempt + 1 )
971+ # Use new typed failover decision instead of old classification
972+ decision = self .resolve_failover_decision (e , {"attempt" : attempt + 1 , "max_retries" : self ._max_retries })
966973
967974 last_error = e
968975 error_str = str (e )
976+
977+ # Map decision to old variables for compatibility with existing logic
978+ category = decision .reason
979+ can_retry = decision .is_retryable
980+ retry_delay = decision .backoff_ms / 1000.0 # Convert ms to seconds
969981
970982 # Check for auth errors and try refreshing subscription credentials
971983 if category == "auth" and self ._auth_provider_id and attempt == 0 :
@@ -982,8 +994,27 @@ def _call_with_retry(self, func, *args, **kwargs):
982994 except Exception as refresh_error :
983995 logging .warning (f"Failed to refresh subscription credentials: { refresh_error } " )
984996
985- # Failover: mark failure and try next profile (do this before early exit)
986- if self ._failover_manager and self ._current_profile :
997+ # Handle different failover decision actions
998+ if decision .action == "rotate_profile" and self ._failover_manager :
999+ if self ._current_profile :
1000+ is_rate_limit = (category == "rate_limit" )
1001+ self ._failover_manager .mark_failure (
1002+ self ._current_profile , error_str , is_rate_limit = is_rate_limit
1003+ )
1004+ next_profile = self ._failover_manager .get_next_profile ()
1005+ if next_profile and next_profile != self ._current_profile :
1006+ self ._switch_to_profile (next_profile )
1007+ self ._current_profile = next_profile
1008+ # Update the kwargs with new profile values for the next retry
1009+ if "api_key" in kwargs :
1010+ kwargs ["api_key" ] = self .api_key
1011+ if "base_url" in kwargs :
1012+ kwargs ["base_url" ] = self .base_url
1013+ if "model" in kwargs :
1014+ kwargs ["model" ] = self .model
1015+ logging .info (f"Failover: switched to profile '{ next_profile .name } '" )
1016+ # Legacy failover for compatibility (when decision is retry but failover is configured)
1017+ elif self ._failover_manager and self ._current_profile and decision .action == "retry" :
9871018 is_rate_limit = (category == "rate_limit" )
9881019 self ._failover_manager .mark_failure (
9891020 self ._current_profile , error_str , is_rate_limit = is_rate_limit
@@ -1004,7 +1035,7 @@ def _call_with_retry(self, func, *args, **kwargs):
10041035 retry_delay = 0.0
10051036 logging .info (f"Failover: switched to profile '{ next_profile .name } '" )
10061037
1007- if not can_retry :
1038+ if decision . action == "surface_error" or not can_retry :
10081039 raise
10091040
10101041 if attempt < self ._max_retries :
@@ -1069,10 +1100,16 @@ async def _call_with_retry_async(self, func, *args, **kwargs):
10691100 return result
10701101
10711102 except Exception as e :
1072- category , can_retry , retry_delay = self ._classify_error_and_should_retry (e , attempt + 1 )
1103+ # Use new typed failover decision instead of old classification
1104+ decision = self .resolve_failover_decision (e , {"attempt" : attempt + 1 , "max_retries" : self ._max_retries })
10731105
10741106 last_error = e
10751107 error_str = str (e )
1108+
1109+ # Map decision to old variables for compatibility with existing logic
1110+ category = decision .reason
1111+ can_retry = decision .is_retryable
1112+ retry_delay = decision .backoff_ms / 1000.0 # Convert ms to seconds
10761113
10771114 # Check for auth errors and try refreshing subscription credentials
10781115 if category == "auth" and self ._auth_provider_id and attempt == 0 :
@@ -1089,8 +1126,27 @@ async def _call_with_retry_async(self, func, *args, **kwargs):
10891126 except Exception as refresh_error :
10901127 logging .warning (f"Failed to refresh subscription credentials: { refresh_error } " )
10911128
1092- # Failover: mark failure and try next profile (do this before early exit)
1093- if self ._failover_manager and self ._current_profile :
1129+ # Handle different failover decision actions
1130+ if decision .action == "rotate_profile" and self ._failover_manager :
1131+ if self ._current_profile :
1132+ is_rate_limit = (category == "rate_limit" )
1133+ self ._failover_manager .mark_failure (
1134+ self ._current_profile , error_str , is_rate_limit = is_rate_limit
1135+ )
1136+ next_profile = self ._failover_manager .get_next_profile ()
1137+ if next_profile and next_profile != self ._current_profile :
1138+ self ._switch_to_profile (next_profile )
1139+ self ._current_profile = next_profile
1140+ # Update the kwargs with new profile values for the next retry
1141+ if "api_key" in kwargs :
1142+ kwargs ["api_key" ] = self .api_key
1143+ if "base_url" in kwargs :
1144+ kwargs ["base_url" ] = self .base_url
1145+ if "model" in kwargs :
1146+ kwargs ["model" ] = self .model
1147+ logging .info (f"Failover: switched to profile '{ next_profile .name } '" )
1148+ # Legacy failover for compatibility (when decision is retry but failover is configured)
1149+ elif self ._failover_manager and self ._current_profile and decision .action == "retry" :
10941150 is_rate_limit = (category == "rate_limit" )
10951151 self ._failover_manager .mark_failure (
10961152 self ._current_profile , error_str , is_rate_limit = is_rate_limit
@@ -1111,7 +1167,7 @@ async def _call_with_retry_async(self, func, *args, **kwargs):
11111167 retry_delay = 0.0
11121168 logging .info (f"Failover: switched to profile '{ next_profile .name } '" )
11131169
1114- if not can_retry :
1170+ if decision . action == "surface_error" or not can_retry :
11151171 raise
11161172
11171173 if attempt < self ._max_retries :
@@ -2144,7 +2200,7 @@ def _prepare_return_value(text: str) -> Union[str, tuple]:
21442200 )
21452201
21462202 # Sequential tool calling loop - similar to agent.py
2147- max_iterations = 10 # Prevent infinite loops
2203+ max_iterations = self . max_iter # Use configurable iteration limit
21482204 iteration_count = 0
21492205 final_response_text = ""
21502206 response_text = "" # Initialize to prevent UnboundLocalError on API errors
@@ -3935,7 +3991,7 @@ async def get_response_async(
39353991 formatted_tools = self ._format_tools_for_litellm (tools )
39363992
39373993 # Initialize variables for iteration loop
3938- max_iterations = 50 # Prevent infinite loops
3994+ max_iterations = self . max_iter # Use configurable iteration limit
39393995 iteration_count = 0
39403996 final_response_text = ""
39413997 stored_reasoning_content = None # Store reasoning content from tool execution
@@ -4416,7 +4472,7 @@ async def get_response_async(
44164472 continue
44174473
44184474 # Safety check: prevent infinite loops for any provider
4419- if iteration_count >= 20 :
4475+ if iteration_count >= self . max_iter :
44204476 if tool_results :
44214477 final_response_text = "Task completed successfully based on tool execution results."
44224478 else :
0 commit comments