Skip to content

Commit b0d4338

Browse files
committed
Add hook for supporting RL strategy
1 parent d087054 commit b0d4338

File tree

1 file changed

+31
-3
lines changed

1 file changed

+31
-3
lines changed

qlib/backtest/executor.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -395,9 +395,25 @@ def _collect_data(self, trade_decision: BaseTradeDecision, level: int = 0):
395395
if not self._align_range_limit or start_idx <= sub_cal.get_trade_step() <= end_idx:
396396
# if force align the range limit, skip the steps outside the decision range limit
397397

398-
_inner_trade_decision: BaseTradeDecision = self.inner_strategy.generate_trade_decision(
399-
_inner_execute_result
400-
)
398+
res = self.inner_strategy.generate_trade_decision(_inner_execute_result)
399+
400+
# NOTE: !!!!!
401+
# the two lines below is for a special case in RL
402+
# To solve the confliction below
403+
# - Normally, user will create a strategy and embed it into Qlib's executor and simulator interaction loop
404+
# For a _nested qlib example_, (Qlib Strategy) <=> (Qlib Executor[(inner Qlib Strategy) <=> (inner Qlib Executor)])
405+
# - However, RL-based framework has it's own script to run the loop
406+
# For an _RL learning example_, (RL Policy) <=> (RL Env[(inner Qlib Executor)])
407+
# To make it possible to run _nested qlib example_ and _RL learning example_ together, the solution below is proposed
408+
# - The entry script follow the example of _RL learning example_ to be compatible with all kinds of RL Framework
409+
# - Each step of (RL Env) will make (inner Qlib Executor) one step forward
410+
# - (inner Qlib Strategy) is a proxy strategy, it will give the program control right to (RL Env) by `yield from` and wait for the action from the policy
411+
# So the two lines below is the implementation of yielding control rights
412+
if isinstance(res, GeneratorType):
413+
res = yield from res
414+
415+
_inner_trade_decision: BaseTradeDecision = res
416+
401417
trade_decision.mod_inner_decision(_inner_trade_decision) # propagate part of decision information
402418

403419
# NOTE sub_cal.get_step_time() must be called before collect_data in case of step shifting
@@ -407,6 +423,7 @@ def _collect_data(self, trade_decision: BaseTradeDecision, level: int = 0):
407423
_inner_execute_result = yield from self.inner_executor.collect_data(
408424
trade_decision=_inner_trade_decision, level=level + 1
409425
)
426+
self.post_inner_exe_step(_inner_execute_result)
410427
execute_result.extend(_inner_execute_result)
411428

412429
inner_order_indicators.append(
@@ -418,6 +435,17 @@ def _collect_data(self, trade_decision: BaseTradeDecision, level: int = 0):
418435

419436
return execute_result, {"inner_order_indicators": inner_order_indicators, "decision_list": decision_list}
420437

438+
def post_inner_exe_step(self, inner_exe_res):
439+
"""
440+
A hook for doing sth after each step of inner strategy
441+
442+
Parameters
443+
----------
444+
inner_exe_res :
445+
the execution result of inner task
446+
"""
447+
pass
448+
421449
def get_all_executors(self):
422450
"""get all executors, including self and inner_executor.get_all_executors()"""
423451
return [self, *self.inner_executor.get_all_executors()]

0 commit comments

Comments
 (0)