Skip to content

Commit 6e42d52

Browse files
WinstonLiytpeteryang1peteryangms
authored
feat: add RD-Agent-Quant scenario (microsoft#838)
* fix model input shape bug and costeer_model bug * fix a bug * fix a bug in docker result extraction * a system-level optimization * add a filter of stdout * update * add stdout to model * model training_hyperparameters update * quant scenario * update some quant settings * llm choose action * Thompson Sampling Bandit for action choosing * refine both scens * add trace messages for quant scen * fix some bugs * fix some bugs * update * update * update * fix * fix * fix * update for merge * fix ci * fix some bugs * fix ci * fix ci * fix ci * fix ci * refactor * default qlib4rdagent local env downloading * fix ci * fix ci * fix a bug * fix ci * fix: align all prompts on template (microsoft#908) * use template to render all prompts * fix CI --------- Co-authored-by: Xu Yang <xuyang1@microsoft.com> * add fin_quant in cli * fix a bug * fix ci * fix some bugs * refactor * remove the columns in hypothesis if no value generated in this column * fix a bug * fix ci * fix conda env * add qlib gitignore * remove existed qlib folder & install torch in qlib conda * fix workspace ui in feedback * align model config in coder and runner in docker or conda * fix CI * fix CI --------- Co-authored-by: Xu Yang <peteryang@vip.qq.com> Co-authored-by: Xu Yang <xuyang1@microsoft.com>
1 parent 8800624 commit 6e42d52

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+2426
-1024
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ Pipfile
55
public
66
release-notes.md
77
typescript*
8+
qlib
89

910
# Byte-compiled / optimized / DLL files
1011
__pycache__/

rdagent/app/cli.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
from rdagent.app.qlib_rd_loop.factor import main as fin_factor
2626
from rdagent.app.qlib_rd_loop.factor_from_report import main as fin_factor_report
2727
from rdagent.app.qlib_rd_loop.model import main as fin_model
28+
from rdagent.app.qlib_rd_loop.quant import main as fin_quant
2829
from rdagent.app.utils.health_check import health_check
2930
from rdagent.app.utils.info import collect_info
3031

@@ -50,6 +51,7 @@ def app():
5051
"fin_factor": fin_factor,
5152
"fin_factor_report": fin_factor_report,
5253
"fin_model": fin_model,
54+
"fin_quant": fin_quant,
5355
"med_model": med_model,
5456
"general_model": general_model,
5557
"ui": ui,

rdagent/app/qlib_rd_loop/conf.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,50 @@ class FactorFromReportPropSetting(FactorBasePropSetting):
7171
"""Limits report processing count if True; processes all if False"""
7272

7373

74+
class QuantBasePropSetting(BasePropSetting):
75+
model_config = SettingsConfigDict(env_prefix="QLIB_QUANT_", protected_namespaces=())
76+
77+
# 1) override base settings
78+
scen: str = "rdagent.scenarios.qlib.experiment.quant_experiment.QlibQuantScenario"
79+
"""Scenario class for Qlib Model"""
80+
81+
quant_hypothesis_gen: str = "rdagent.scenarios.qlib.proposal.quant_proposal.QlibQuantHypothesisGen"
82+
"""Hypothesis generation class"""
83+
84+
model_hypothesis2experiment: str = "rdagent.scenarios.qlib.proposal.model_proposal.QlibModelHypothesis2Experiment"
85+
"""Hypothesis to experiment class"""
86+
87+
model_coder: str = "rdagent.scenarios.qlib.developer.model_coder.QlibModelCoSTEER"
88+
"""Coder class"""
89+
90+
model_runner: str = "rdagent.scenarios.qlib.developer.model_runner.QlibModelRunner"
91+
"""Runner class"""
92+
93+
model_summarizer: str = "rdagent.scenarios.qlib.developer.feedback.QlibModelExperiment2Feedback"
94+
"""Summarizer class"""
95+
96+
factor_hypothesis2experiment: str = (
97+
"rdagent.scenarios.qlib.proposal.factor_proposal.QlibFactorHypothesis2Experiment"
98+
)
99+
"""Hypothesis to experiment class"""
100+
101+
factor_coder: str = "rdagent.scenarios.qlib.developer.factor_coder.QlibFactorCoSTEER"
102+
"""Coder class"""
103+
104+
factor_runner: str = "rdagent.scenarios.qlib.developer.factor_runner.QlibFactorRunner"
105+
"""Runner class"""
106+
107+
factor_summarizer: str = "rdagent.scenarios.qlib.developer.feedback.QlibFactorExperiment2Feedback"
108+
"""Summarizer class"""
109+
110+
evolving_n: int = 10
111+
"""Number of evolutions"""
112+
113+
action_selection: str = "bandit"
114+
"""Action selection strategy: 'bandit' for bandit-based selection, 'llm' for LLM-based selection, 'random' for random selection"""
115+
116+
74117
FACTOR_PROP_SETTING = FactorBasePropSetting()
75118
FACTOR_FROM_REPORT_PROP_SETTING = FactorFromReportPropSetting()
76119
MODEL_PROP_SETTING = ModelBasePropSetting()
120+
QUANT_PROP_SETTING = QuantBasePropSetting()

rdagent/app/qlib_rd_loop/factor_from_report.py

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,27 +3,23 @@
33
from typing import Any, Dict, Tuple
44

55
import fire
6-
from jinja2 import Environment, StrictUndefined
76

87
from rdagent.app.qlib_rd_loop.conf import FACTOR_FROM_REPORT_PROP_SETTING
98
from rdagent.app.qlib_rd_loop.factor import FactorRDLoop
109
from rdagent.components.document_reader.document_reader import (
1110
extract_first_page_screenshot_from_pdf,
1211
load_and_process_pdfs_by_langchain,
1312
)
14-
from rdagent.core.prompts import Prompts
1513
from rdagent.core.proposal import Hypothesis
1614
from rdagent.log import rdagent_logger as logger
1715
from rdagent.oai.llm_utils import APIBackend
1816
from rdagent.scenarios.qlib.experiment.factor_experiment import QlibFactorExperiment
1917
from rdagent.scenarios.qlib.factor_experiment_loader.pdf_loader import (
2018
FactorExperimentLoaderFromPDFfiles,
2119
)
20+
from rdagent.utils.agent.tpl import T
2221
from rdagent.utils.workflow import LoopMeta
2322

24-
prompts_path = Path(__file__).parent / "prompts.yaml"
25-
prompts = Prompts(file_path=prompts_path)
26-
2723

2824
def generate_hypothesis(factor_result: dict, report_content: str) -> str:
2925
"""
@@ -36,13 +32,9 @@ def generate_hypothesis(factor_result: dict, report_content: str) -> str:
3632
Returns:
3733
str: The generated hypothesis.
3834
"""
39-
system_prompt = (
40-
Environment(undefined=StrictUndefined).from_string(prompts["hypothesis_generation"]["system"]).render()
41-
)
42-
user_prompt = (
43-
Environment(undefined=StrictUndefined)
44-
.from_string(prompts["hypothesis_generation"]["user"])
45-
.render(factor_descriptions=json.dumps(factor_result), report_content=report_content)
35+
system_prompt = T(".prompts:hypothesis_generation.system").r()
36+
user_prompt = T(".prompts:hypothesis_generation.user").r(
37+
factor_descriptions=json.dumps(factor_result), report_content=report_content
4638
)
4739

4840
response = APIBackend().build_messages_and_create_chat_completion(

rdagent/app/qlib_rd_loop/prompts.yaml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,6 @@ hypothesis_generation:
55
{
66
"hypothesis": "A clear and concise hypothesis based on the provided information.",
77
"reason": "A detailed explanation supporting the generated hypothesis.",
8-
"concise_reason": "One line summary that focuses on the justification for the change that leads to the hypothesis (like a part of a knowledge that we are building)",
9-
"concise_observation": "One line summary. It focuses on the observation of the given scenario, data characteristics, or previous experiences (failures & succeses).",
10-
"concise_justification": "One line summary. It focuses on the justification for the change in new hypothesis and the route of exploration supporting the growth of the hypothesis, based on the observation. ",
11-
"concise_knowledge": "One line summary. It focuses on a transferable knowledege that comes with the new hypothesis. Use conditional grammar. eg. "If...., ..; When..., .; and etc"
128
}
139
1410
user: |-

rdagent/app/qlib_rd_loop/quant.py

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
"""
2+
Quant (Factor & Model) workflow with session control
3+
"""
4+
5+
from typing import Any
6+
7+
import fire
8+
9+
from rdagent.app.qlib_rd_loop.conf import QUANT_PROP_SETTING
10+
from rdagent.components.workflow.conf import BasePropSetting
11+
from rdagent.components.workflow.rd_loop import RDLoop
12+
from rdagent.core.developer import Developer
13+
from rdagent.core.exception import FactorEmptyError, ModelEmptyError
14+
from rdagent.core.proposal import (
15+
Experiment2Feedback,
16+
Hypothesis2Experiment,
17+
HypothesisFeedback,
18+
HypothesisGen,
19+
)
20+
from rdagent.core.scenario import Scenario
21+
from rdagent.core.utils import import_class
22+
from rdagent.log import rdagent_logger as logger
23+
from rdagent.scenarios.qlib.proposal.quant_proposal import QuantTrace
24+
25+
26+
class QuantRDLoop(RDLoop):
27+
skip_loop_error = (
28+
FactorEmptyError,
29+
ModelEmptyError,
30+
)
31+
32+
def __init__(self, PROP_SETTING: BasePropSetting):
33+
with logger.tag("init"):
34+
scen: Scenario = import_class(PROP_SETTING.scen)()
35+
logger.log_object(scen, tag="scenario")
36+
37+
self.hypothesis_gen: HypothesisGen = import_class(PROP_SETTING.quant_hypothesis_gen)(scen)
38+
logger.log_object(self.hypothesis_gen, tag="quant hypothesis generator")
39+
40+
self.factor_hypothesis2experiment: Hypothesis2Experiment = import_class(
41+
PROP_SETTING.factor_hypothesis2experiment
42+
)()
43+
logger.log_object(self.factor_hypothesis2experiment, tag="factor hypothesis2experiment")
44+
self.model_hypothesis2experiment: Hypothesis2Experiment = import_class(
45+
PROP_SETTING.model_hypothesis2experiment
46+
)()
47+
logger.log_object(self.model_hypothesis2experiment, tag="model hypothesis2experiment")
48+
49+
self.factor_coder: Developer = import_class(PROP_SETTING.factor_coder)(scen)
50+
logger.log_object(self.factor_coder, tag="factor coder")
51+
self.model_coder: Developer = import_class(PROP_SETTING.model_coder)(scen)
52+
logger.log_object(self.model_coder, tag="model coder")
53+
54+
self.factor_runner: Developer = import_class(PROP_SETTING.factor_runner)(scen)
55+
logger.log_object(self.factor_runner, tag="factor runner")
56+
self.model_runner: Developer = import_class(PROP_SETTING.model_runner)(scen)
57+
logger.log_object(self.model_runner, tag="model runner")
58+
59+
self.factor_summarizer: Experiment2Feedback = import_class(PROP_SETTING.factor_summarizer)(scen)
60+
logger.log_object(self.factor_summarizer, tag="factor summarizer")
61+
self.model_summarizer: Experiment2Feedback = import_class(PROP_SETTING.model_summarizer)(scen)
62+
logger.log_object(self.model_summarizer, tag="model summarizer")
63+
64+
self.trace = QuantTrace(scen=scen)
65+
super(RDLoop, self).__init__()
66+
67+
def direct_exp_gen(self, prev_out: dict[str, Any]):
68+
with logger.tag("r"): # research
69+
hypo = self._propose()
70+
assert hypo.action in ["factor", "model"]
71+
if hypo.action == "factor":
72+
exp = self.factor_hypothesis2experiment.convert(hypo, self.trace)
73+
else:
74+
exp = self.model_hypothesis2experiment.convert(hypo, self.trace)
75+
logger.log_object(exp.sub_tasks, tag="experiment generation")
76+
return {"propose": hypo, "exp_gen": exp}
77+
78+
def coding(self, prev_out: dict[str, Any]):
79+
with logger.tag("d"): # development
80+
if prev_out["direct_exp_gen"]["propose"].action == "factor":
81+
exp = self.factor_coder.develop(prev_out["direct_exp_gen"]["exp_gen"])
82+
elif prev_out["direct_exp_gen"]["propose"].action == "model":
83+
exp = self.model_coder.develop(prev_out["direct_exp_gen"]["exp_gen"])
84+
logger.log_object(exp, tag="coder result")
85+
return exp
86+
87+
def running(self, prev_out: dict[str, Any]):
88+
with logger.tag("ef"):
89+
if prev_out["direct_exp_gen"]["propose"].action == "factor":
90+
exp = self.factor_runner.develop(prev_out["coding"])
91+
if exp is None:
92+
logger.error(f"Factor extraction failed.")
93+
raise FactorEmptyError("Factor extraction failed.")
94+
elif prev_out["direct_exp_gen"]["propose"].action == "model":
95+
exp = self.model_runner.develop(prev_out["coding"])
96+
logger.log_object(exp, tag="runner result")
97+
return exp
98+
99+
def feedback(self, prev_out: dict[str, Any]):
100+
e = prev_out.get(self.EXCEPTION_KEY, None)
101+
if e is not None:
102+
feedback = HypothesisFeedback(
103+
observations=e,
104+
hypothesis_evaluation="",
105+
new_hypothesis="",
106+
reason="",
107+
decision=False,
108+
)
109+
with logger.tag("ef"): # evaluate and feedback
110+
logger.log_object(feedback, tag="feedback")
111+
self.trace.hist.append((prev_out["direct_exp_gen"]["exp_gen"], feedback))
112+
else:
113+
if prev_out["direct_exp_gen"]["propose"].action == "factor":
114+
feedback = self.factor_summarizer.generate_feedback(prev_out["running"], self.trace)
115+
elif prev_out["direct_exp_gen"]["propose"].action == "model":
116+
feedback = self.model_summarizer.generate_feedback(prev_out["running"], self.trace)
117+
with logger.tag("ef"):
118+
logger.log_object(feedback, tag="feedback")
119+
self.trace.hist.append((prev_out["running"], feedback))
120+
121+
122+
def main(path=None, step_n=None):
123+
"""
124+
Auto R&D Evolving loop for fintech factors.
125+
You can continue running session by
126+
.. code-block:: python
127+
dotenv run -- python rdagent/app/qlib_rd_loop/quant.py $LOG_PATH/__session__/1/0_propose --step_n 1 # `step_n` is a optional paramter
128+
"""
129+
if path is None:
130+
quant_loop = QuantRDLoop(QUANT_PROP_SETTING)
131+
else:
132+
quant_loop = QuantRDLoop.load(path)
133+
quant_loop.run(step_n=step_n)
134+
135+
136+
if __name__ == "__main__":
137+
fire.Fire(main)

rdagent/components/coder/CoSTEER/evolving_strategy.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from __future__ import annotations
22

33
from abc import abstractmethod
4-
from pathlib import Path
54

65
from rdagent.components.coder.CoSTEER.config import CoSTEERSettings
76
from rdagent.components.coder.CoSTEER.evaluators import (
@@ -15,12 +14,9 @@
1514
from rdagent.core.conf import RD_AGENT_SETTINGS
1615
from rdagent.core.evolving_framework import EvolvingStrategy, EvoStep, QueriedKnowledge
1716
from rdagent.core.experiment import FBWorkspace, Task
18-
from rdagent.core.prompts import Prompts
1917
from rdagent.core.scenario import Scenario
2018
from rdagent.core.utils import multiprocessing_wrapper
2119

22-
implement_prompts = Prompts(file_path=Path(__file__).parent / "prompts.yaml")
23-
2420

2521
class MultiProcessEvolvingStrategy(EvolvingStrategy):
2622
def __init__(self, scen: Scenario, settings: CoSTEERSettings):

rdagent/components/coder/CoSTEER/knowledge_management.py

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,6 @@
88
from pathlib import Path
99
from typing import List, Union
1010

11-
from jinja2 import Environment, StrictUndefined
12-
1311
from rdagent.components.coder.CoSTEER.config import CoSTEERSettings
1412
from rdagent.components.coder.CoSTEER.evaluators import CoSTEERSingleFeedback
1513
from rdagent.components.knowledge_management.graph import (
@@ -26,12 +24,12 @@
2624
RAGStrategy,
2725
)
2826
from rdagent.core.experiment import FBWorkspace, Task
29-
from rdagent.core.prompts import Prompts
3027
from rdagent.log import rdagent_logger as logger
3128
from rdagent.oai.llm_utils import (
3229
APIBackend,
3330
calculate_embedding_distance_between_str_list,
3431
)
32+
from rdagent.utils.agent.tpl import T
3533

3634

3735
class CoSTEERKnowledge(Knowledge):
@@ -216,8 +214,6 @@ def __init__(
216214

217215

218216
class CoSTEERRAGStrategyV2(RAGStrategy):
219-
prompt = Prompts(file_path=Path(__file__).parent / "prompts.yaml")
220-
221217
def __init__(self, knowledgebase: CoSTEERKnowledgeBaseV2, settings: CoSTEERSettings) -> None:
222218
super().__init__(knowledgebase)
223219
self.current_generated_trace_count = 0
@@ -324,12 +320,8 @@ def analyze_component(
324320
all_component_content = ""
325321
for _, component_node in enumerate(all_component_nodes):
326322
all_component_content += f"{component_node.content}, \n"
327-
analyze_component_system_prompt = (
328-
Environment(undefined=StrictUndefined)
329-
.from_string(self.prompt["analyze_component_prompt_v1_system"])
330-
.render(
331-
all_component_content=all_component_content,
332-
)
323+
analyze_component_system_prompt = T(".prompts:analyze_component_prompt_v1_system").r(
324+
all_component_content=all_component_content,
333325
)
334326

335327
analyze_component_user_prompt = target_task_information

rdagent/components/coder/data_science/ensemble/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,7 @@
1111
- Each coder could be tested.
1212
"""
1313

14-
import json
1514
from pathlib import Path
16-
from typing import Dict
1715

1816
from jinja2 import Environment, StrictUndefined
1917

rdagent/components/coder/factor_coder/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ class FactorCoSTEERSettings(CoSTEERSettings):
1515
simple_background: bool = False
1616
"""Whether to use simple background information for code feedback"""
1717

18-
file_based_execution_timeout: int = 120
18+
file_based_execution_timeout: int = 3600
1919
"""Timeout in seconds for each factor implementation execution"""
2020

2121
select_method: str = "random"

0 commit comments

Comments
 (0)