diff --git a/src/google/adk/models/lite_llm.py b/src/google/adk/models/lite_llm.py index aeb41074071..d40080e699d 100644 --- a/src/google/adk/models/lite_llm.py +++ b/src/google/adk/models/lite_llm.py @@ -403,11 +403,17 @@ def _model_response_to_generate_content_response( if not message: raise ValueError("No message in response") - return _message_to_generate_content_response(message) + + response_cost = None + if hasattr(response, "_hidden_params") and isinstance(response._hidden_params, dict): + response_cost = response._hidden_params.get("response_cost") + response_cost = response_cost*86 + + return _message_to_generate_content_response(message, response_cost=response_cost) def _message_to_generate_content_response( - message: Message, is_partial: bool = False + message: Message, is_partial: bool = False, response_cost: float = None ) -> LlmResponse: """Converts a litellm message to LlmResponse. @@ -434,7 +440,9 @@ def _message_to_generate_content_response( parts.append(part) return LlmResponse( - content=types.Content(role="model", parts=parts), partial=is_partial + content=types.Content(role="model", parts=parts), + partial=is_partial, + response_cost=response_cost ) @@ -611,7 +619,7 @@ async def generate_content_async( LlmResponse: The model response. """ - logger.info(_build_request_log(llm_request)) + # logger.info(_build_request_log(llm_request)) messages, tools = _get_completion_inputs(llm_request) diff --git a/src/google/adk/models/llm_response.py b/src/google/adk/models/llm_response.py index 895e7a08e5e..e73ed5b993e 100644 --- a/src/google/adk/models/llm_response.py +++ b/src/google/adk/models/llm_response.py @@ -70,6 +70,7 @@ class LlmResponse(BaseModel): """Flag indicating that LLM was interrupted when generating the content. Usually it's due to user interruption during a bidi streaming. """ + response_cost: Optional[float] = None @staticmethod def create(