148148 validate_and_fix_openai_messages ,
149149 validate_and_fix_openai_tools ,
150150 validate_chat_completion_tool_choice ,
151- validate_openai_optional_params
151+ validate_openai_optional_params ,
152152)
153153
154154from ._logging import verbose_logger
@@ -368,7 +368,7 @@ async def create(self, messages, model=None, **kwargs):
368368
369369@tracer .wrap ()
370370@client
371- async def acompletion ( # noqa: PLR0915
371+ async def acompletion ( # noqa: PLR0915
372372 model : str ,
373373 # Optional OpenAI params: see https://platform.openai.com/docs/api-reference/chat/create
374374 messages : List = [],
@@ -603,12 +603,11 @@ async def acompletion( # noqa: PLR0915
603603 if timeout is not None and isinstance (timeout , (int , float )):
604604 timeout_value = float (timeout )
605605 init_response = await asyncio .wait_for (
606- loop .run_in_executor (None , func_with_context ),
607- timeout = timeout_value
606+ loop .run_in_executor (None , func_with_context ), timeout = timeout_value
608607 )
609608 else :
610609 init_response = await loop .run_in_executor (None , func_with_context )
611-
610+
612611 if isinstance (init_response , dict ) or isinstance (
613612 init_response , ModelResponse
614613 ): ## CACHING SCENARIO
@@ -640,6 +639,7 @@ async def acompletion( # noqa: PLR0915
640639 except asyncio .TimeoutError :
641640 custom_llm_provider = custom_llm_provider or "openai"
642641 from litellm .exceptions import Timeout
642+
643643 raise Timeout (
644644 message = f"Request timed out after { timeout } seconds" ,
645645 model = model ,
@@ -1118,7 +1118,6 @@ def completion( # type: ignore # noqa: PLR0915
11181118 # validate optional params
11191119 stop = validate_openai_optional_params (stop = stop )
11201120
1121-
11221121 ######### unpacking kwargs #####################
11231122 args = locals ()
11241123
@@ -1135,7 +1134,9 @@ def completion( # type: ignore # noqa: PLR0915
11351134 # Check if MCP tools are present (following responses pattern)
11361135 # Cast tools to Optional[Iterable[ToolParam]] for type checking
11371136 tools_for_mcp = cast (Optional [Iterable [ToolParam ]], tools )
1138- if LiteLLM_Proxy_MCP_Handler ._should_use_litellm_mcp_gateway (tools = tools_for_mcp ):
1137+ if LiteLLM_Proxy_MCP_Handler ._should_use_litellm_mcp_gateway (
1138+ tools = tools_for_mcp
1139+ ):
11391140 # Return coroutine - acompletion will await it
11401141 # completion() can return a coroutine when MCP tools are present, which acompletion() awaits
11411142 return acompletion_with_mcp ( # type: ignore[return-value]
@@ -1536,6 +1537,8 @@ def completion( # type: ignore # noqa: PLR0915
15361537 max_retries = max_retries ,
15371538 timeout = timeout ,
15381539 litellm_request_debug = kwargs .get ("litellm_request_debug" , False ),
1540+ tpm = kwargs .get ("tpm" ),
1541+ rpm = kwargs .get ("rpm" ),
15391542 )
15401543 cast (LiteLLMLoggingObj , logging ).update_environment_variables (
15411544 model = model ,
@@ -2361,11 +2364,7 @@ def completion( # type: ignore # noqa: PLR0915
23612364 input = messages , api_key = api_key , original_response = response
23622365 )
23632366 elif custom_llm_provider == "minimax" :
2364- api_key = (
2365- api_key
2366- or get_secret_str ("MINIMAX_API_KEY" )
2367- or litellm .api_key
2368- )
2367+ api_key = api_key or get_secret_str ("MINIMAX_API_KEY" ) or litellm .api_key
23692368
23702369 api_base = (
23712370 api_base
@@ -2413,7 +2412,9 @@ def completion( # type: ignore # noqa: PLR0915
24132412 or custom_llm_provider == "wandb"
24142413 or custom_llm_provider == "clarifai"
24152414 or custom_llm_provider in litellm .openai_compatible_providers
2416- or JSONProviderRegistry .exists (custom_llm_provider ) # JSON-configured providers
2415+ or JSONProviderRegistry .exists (
2416+ custom_llm_provider
2417+ ) # JSON-configured providers
24172418 or "ft:gpt-3.5-turbo" in model # finetune gpt-3.5-turbo
24182419 ): # allow user to make an openai call with a custom base
24192420 # note: if a user sets a custom base - we should ensure this works
@@ -4724,7 +4725,7 @@ def embedding( # noqa: PLR0915
47244725
47254726 if headers is not None and headers != {}:
47264727 optional_params ["extra_headers" ] = headers
4727-
4728+
47284729 if encoding_format is not None :
47294730 optional_params ["encoding_format" ] = encoding_format
47304731 else :
@@ -6759,9 +6760,7 @@ def speech( # noqa: PLR0915
67596760 if text_to_speech_provider_config is None :
67606761 text_to_speech_provider_config = MinimaxTextToSpeechConfig ()
67616762
6762- minimax_config = cast (
6763- MinimaxTextToSpeechConfig , text_to_speech_provider_config
6764- )
6763+ minimax_config = cast (MinimaxTextToSpeechConfig , text_to_speech_provider_config )
67656764
67666765 if api_base is not None :
67676766 litellm_params_dict ["api_base" ] = api_base
@@ -6901,7 +6900,7 @@ async def ahealth_check(
69016900 custom_llm_provider_from_params = model_params .get ("custom_llm_provider" , None )
69026901 api_base_from_params = model_params .get ("api_base" , None )
69036902 api_key_from_params = model_params .get ("api_key" , None )
6904-
6903+
69056904 model , custom_llm_provider , _ , _ = get_llm_provider (
69066905 model = model ,
69076906 custom_llm_provider = custom_llm_provider_from_params ,
@@ -7275,8 +7274,9 @@ def __getattr__(name: str) -> Any:
72757274 _encoding = tiktoken .get_encoding ("cl100k_base" )
72767275 # Cache it in the module's __dict__ for subsequent accesses
72777276 import sys
7277+
72787278 sys .modules [__name__ ].__dict__ ["encoding" ] = _encoding
72797279 global _encoding_cache
72807280 _encoding_cache = _encoding
72817281 return _encoding
7282- raise AttributeError (f"module { __name__ !r} has no attribute { name !r} " )
7282+ raise AttributeError (f"module { __name__ !r} has no attribute { name !r} " )
0 commit comments