|
13 | 13 | import uuid |
14 | 14 | from copy import deepcopy |
15 | 15 | from pathlib import Path |
16 | | -from typing import Any |
| 16 | +from typing import Any, Optional |
17 | 17 |
|
18 | 18 | import numpy as np |
19 | 19 | import tiktoken |
@@ -401,7 +401,10 @@ def build_messages( |
401 | 401 | *, |
402 | 402 | shrink_multiple_break: bool = False, |
403 | 403 | ) -> list[dict]: |
404 | | - """build the messages to avoid implementing several redundant lines of code""" |
| 404 | + """ |
| 405 | + build the messages to avoid implementing several redundant lines of code |
| 406 | +
|
| 407 | + """ |
405 | 408 | if former_messages is None: |
406 | 409 | former_messages = [] |
407 | 410 | # shrink multiple break will recursively remove multiple breaks(more than 2) |
@@ -440,7 +443,10 @@ def build_messages_and_create_chat_completion( |
440 | 443 | if former_messages is None: |
441 | 444 | former_messages = [] |
442 | 445 | messages = self.build_messages( |
443 | | - user_prompt, system_prompt, former_messages, shrink_multiple_break=shrink_multiple_break |
| 446 | + user_prompt, |
| 447 | + system_prompt, |
| 448 | + former_messages, |
| 449 | + shrink_multiple_break=shrink_multiple_break, |
444 | 450 | ) |
445 | 451 | return self._try_create_chat_completion_or_embedding( |
446 | 452 | messages=messages, |
@@ -567,14 +573,21 @@ def _create_chat_completion_inner_function( # noqa: C901, PLR0912, PLR0915 |
567 | 573 | *, |
568 | 574 | json_mode: bool = False, |
569 | 575 | add_json_in_prompt: bool = False, |
| 576 | + seed: Optional[int] = None, |
570 | 577 | ) -> str: |
| 578 | + """ |
| 579 | + seed : Optional[int] |
| 580 | + When retrying with cache enabled, it will keep returning the same results. |
| 581 | + To make retries useful, we need to enable a seed. |
| 582 | + This seed is different from `self.chat_seed` for GPT. It is for the local cache mechanism enabled by RD-Agent locally. |
| 583 | + """ |
571 | 584 | # TODO: we can add this function back to avoid so much `self.cfg.log_llm_chat_content` |
572 | 585 | if self.cfg.log_llm_chat_content: |
573 | 586 | logger.info(self._build_log_messages(messages), tag="llm_messages") |
574 | 587 | # TODO: fail to use loguru adaptor due to stream response |
575 | 588 | input_content_json = json.dumps(messages) |
576 | 589 | input_content_json = ( |
577 | | - chat_cache_prefix + input_content_json |
| 590 | + chat_cache_prefix + input_content_json + f"<seed={seed}/>" |
578 | 591 | ) # FIXME this is a hack to make sure the cache represents the round index |
579 | 592 | if self.use_chat_cache: |
580 | 593 | cache_result = self.cache.chat_get(input_content_json) |
|
0 commit comments