Skip to content

Commit 6006e29

Browse files
zhupryou-n-g
andauthored
Fix backtest (microsoft#719)
* modify FileStorage to support multiple freqs * modify backtest's sample documentation * change the logging level of read data exception from error to debug * fix the backtest exception when volume is 0 or np.nan * fix test_storage.py * add backtest_daily * modify backtest_daily's docstring * add __repr__/__str__ to Position * fix the bug of nested_decision_execution example Co-authored-by: Young <afe.young@gmail.com> Co-authored-by: you-n-g <you-n-g@users.noreply.github.com>
1 parent 045e6b1 commit 6006e29

File tree

19 files changed

+678
-247
lines changed

19 files changed

+678
-247
lines changed

docs/component/strategy.rst

Lines changed: 119 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -84,31 +84,125 @@ Usage & Example
8484
====================
8585
``Portfolio Strategy`` can be specified in the ``Intraday Trading(Backtest)``, the example is as follows.
8686

87-
.. code-block:: python
88-
89-
from qlib.contrib.strategy.strategy import TopkDropoutStrategy
90-
from qlib.contrib.evaluate import backtest
91-
STRATEGY_CONFIG = {
92-
"topk": 50,
93-
"n_drop": 5,
94-
}
95-
BACKTEST_CONFIG = {
96-
"limit_threshold": 0.095,
97-
"account": 100000000,
98-
"benchmark": BENCHMARK,
99-
"deal_price": "close",
100-
"open_cost": 0.0005,
101-
"close_cost": 0.0015,
102-
"min_cost": 5,
103-
104-
}
105-
# use default strategy
106-
strategy = TopkDropoutStrategy(**STRATEGY_CONFIG)
107-
108-
# pred_score is the `prediction score` output by Model
109-
report_normal, positions_normal = backtest(
110-
pred_score, strategy=strategy, **BACKTEST_CONFIG
111-
)
87+
- daily
88+
89+
.. code-block:: python
90+
91+
from pprint import pprint
92+
93+
import qlib
94+
import pandas as pd
95+
from qlib.utils.time import Freq
96+
from qlib.utils import flatten_dict
97+
from qlib.contrib.evaluate import backtest_daily
98+
from qlib.contrib.evaluate import risk_analysis
99+
from qlib.contrib.strategy import TopkDropoutStrategy
100+
101+
# init qlib
102+
qlib.init(provider_uri=<qlib data dir>)
103+
104+
CSI300_BENCH = "SH000300"
105+
STRATEGY_CONFIG = {
106+
"topk": 50,
107+
"n_drop": 5,
108+
# pred_score, pd.Series
109+
"signal": pred_score,
110+
}
111+
112+
113+
strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
114+
report_normal, positions_normal = backtest_daily(
115+
start_time="2017-01-01", end_time="2020-08-01", strategy=strategy_obj
116+
)
117+
analysis = dict()
118+
analysis["excess_return_without_cost"] = risk_analysis(
119+
report_normal["return"] - report_normal["bench"], freq=analysis_freq
120+
)
121+
analysis["excess_return_with_cost"] = risk_analysis(
122+
report_normal["return"] - report_normal["bench"] - report_normal["cost"], freq=analysis_freq
123+
)
124+
125+
analysis_df = pd.concat(analysis) # type: pd.DataFrame
126+
pprint(analysis_df)
127+
128+
129+
130+
- nested decision execution
131+
132+
.. code-block:: python
133+
134+
from pprint import pprint
135+
136+
import qlib
137+
import pandas as pd
138+
from qlib.utils.time import Freq
139+
from qlib.utils import flatten_dict
140+
from qlib.backtest import backtest, executor
141+
from qlib.contrib.evaluate import risk_analysis
142+
from qlib.contrib.strategy import TopkDropoutStrategy
143+
144+
# init qlib
145+
qlib.init(provider_uri=<qlib data dir>)
146+
147+
CSI300_BENCH = "SH000300"
148+
FREQ = "day"
149+
STRATEGY_CONFIG = {
150+
"topk": 50,
151+
"n_drop": 5,
152+
# pred_score, pd.Series
153+
"signal": pred_score,
154+
}
155+
156+
EXECUTOR_CONFIG = {
157+
"time_per_step": "day",
158+
"generate_portfolio_metrics": True,
159+
}
160+
161+
backtest_config = {
162+
"start_time": "2017-01-01",
163+
"end_time": "2020-08-01",
164+
"account": 100000000,
165+
"benchmark": CSI300_BENCH,
166+
"exchange_kwargs": {
167+
"freq": FREQ,
168+
"limit_threshold": 0.095,
169+
"deal_price": "close",
170+
"open_cost": 0.0005,
171+
"close_cost": 0.0015,
172+
"min_cost": 5,
173+
},
174+
}
175+
176+
# strategy object
177+
strategy_obj = TopkDropoutStrategy(**STRATEGY_CONFIG)
178+
# executor object
179+
executor_obj = executor.SimulatorExecutor(**EXECUTOR_CONFIG)
180+
# backtest
181+
portfolio_metric_dict, indicator_dict = backtest(executor=executor_obj, strategy=strategy_obj, **backtest_config)
182+
analysis_freq = "{0}{1}".format(*Freq.parse(FREQ))
183+
# backtest info
184+
report_normal, positions_normal = portfolio_metric_dict.get(analysis_freq)
185+
186+
# analysis
187+
analysis = dict()
188+
analysis["excess_return_without_cost"] = risk_analysis(
189+
report_normal["return"] - report_normal["bench"], freq=analysis_freq
190+
)
191+
analysis["excess_return_with_cost"] = risk_analysis(
192+
report_normal["return"] - report_normal["bench"] - report_normal["cost"], freq=analysis_freq
193+
)
194+
195+
analysis_df = pd.concat(analysis) # type: pd.DataFrame
196+
# log metrics
197+
analysis_dict = flatten_dict(analysis_df["risk"].unstack().T.to_dict())
198+
# print out results
199+
pprint(f"The following are analysis results of benchmark return({analysis_freq}).")
200+
pprint(risk_analysis(report_normal["bench"], freq=analysis_freq))
201+
pprint(f"The following are analysis results of the excess return without cost({analysis_freq}).")
202+
pprint(analysis["excess_return_without_cost"])
203+
pprint(f"The following are analysis results of the excess return with cost({analysis_freq}).")
204+
pprint(analysis["excess_return_with_cost"])
205+
112206
113207
To know more about the `prediction score` `pred_score` output by ``Forecast Model``, please refer to `Forecast Model: Model Training & Prediction <model.html>`_.
114208

examples/nested_decision_execution/workflow.py

Lines changed: 190 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,105 @@
11
# Copyright (c) Microsoft Corporation.
22
# Licensed under the MIT License.
3+
"""
4+
The expect result of `backtest` is following in current version
35
6+
'The following are analysis results of benchmark return(1day).'
7+
risk
8+
mean 0.000651
9+
std 0.012472
10+
annualized_return 0.154967
11+
information_ratio 0.805422
12+
max_drawdown -0.160445
13+
'The following are analysis results of the excess return without cost(1day).'
14+
risk
15+
mean 0.001258
16+
std 0.007575
17+
annualized_return 0.299303
18+
information_ratio 2.561219
19+
max_drawdown -0.068386
20+
'The following are analysis results of the excess return with cost(1day).'
21+
risk
22+
mean 0.001110
23+
std 0.007575
24+
annualized_return 0.264280
25+
information_ratio 2.261392
26+
max_drawdown -0.071842
27+
[1706497:MainThread](2021-12-07 14:08:30,263) INFO - qlib.workflow - [record_temp.py:441] - Portfolio analysis record 'port_analysis_30minute.
28+
pkl' has been saved as the artifact of the Experiment 2
29+
'The following are analysis results of benchmark return(30minute).'
30+
risk
31+
mean 0.000078
32+
std 0.003646
33+
annualized_return 0.148787
34+
information_ratio 0.935252
35+
max_drawdown -0.142830
36+
('The following are analysis results of the excess return without '
37+
'cost(30minute).')
38+
risk
39+
mean 0.000174
40+
std 0.003343
41+
annualized_return 0.331867
42+
information_ratio 2.275019
43+
max_drawdown -0.074752
44+
'The following are analysis results of the excess return with cost(30minute).'
45+
risk
46+
mean 0.000155
47+
std 0.003343
48+
annualized_return 0.294536
49+
information_ratio 2.018860
50+
max_drawdown -0.075579
51+
[1706497:MainThread](2021-12-07 14:08:30,277) INFO - qlib.workflow - [record_temp.py:441] - Portfolio analysis record 'port_analysis_5minute.p
52+
kl' has been saved as the artifact of the Experiment 2
53+
'The following are analysis results of benchmark return(5minute).'
54+
risk
55+
mean 0.000015
56+
std 0.001460
57+
annualized_return 0.172170
58+
information_ratio 1.103439
59+
max_drawdown -0.144807
60+
'The following are analysis results of the excess return without cost(5minute).'
61+
risk
62+
mean 0.000028
63+
std 0.001412
64+
annualized_return 0.319771
65+
information_ratio 2.119563
66+
max_drawdown -0.077426
67+
'The following are analysis results of the excess return with cost(5minute).'
68+
risk
69+
mean 0.000025
70+
std 0.001412
71+
annualized_return 0.281536
72+
information_ratio 1.866091
73+
max_drawdown -0.078194
74+
[1706497:MainThread](2021-12-07 14:08:30,287) INFO - qlib.workflow - [record_temp.py:466] - Indicator analysis record 'indicator_analysis_1day
75+
.pkl' has been saved as the artifact of the Experiment 2
76+
'The following are analysis results of indicators(1day).'
77+
value
78+
ffr 0.945821
79+
pa 0.000324
80+
pos 0.542882
81+
[1706497:MainThread](2021-12-07 14:08:30,293) INFO - qlib.workflow - [record_temp.py:466] - Indicator analysis record 'indicator_analysis_30mi
82+
nute.pkl' has been saved as the artifact of the Experiment 2
83+
'The following are analysis results of indicators(30minute).'
84+
value
85+
ffr 0.982910
86+
pa 0.000037
87+
pos 0.500806
88+
[1706497:MainThread](2021-12-07 14:08:30,302) INFO - qlib.workflow - [record_temp.py:466] - Indicator analysis record 'indicator_analysis_5min
89+
ute.pkl' has been saved as the artifact of the Experiment 2
90+
'The following are analysis results of indicators(5minute).'
91+
value
92+
ffr 0.991017
93+
pa 0.000000
94+
pos 0.000000
95+
[1706497:MainThread](2021-12-07 14:08:30,627) INFO - qlib.timer - [log.py:113] - Time cost: 0.014s | waiting `async_log` Done
96+
"""
497

98+
99+
from copy import deepcopy
5100
import qlib
6101
import fire
102+
import pandas as pd
7103
from qlib.config import REG_CN, HIGH_FREQ_CONFIG
8104
from qlib.data import D
9105
from qlib.utils import exists_qlib_data, init_instance_by_config, flatten_dict
@@ -14,6 +110,13 @@
14110

15111

16112
class NestedDecisionExecutionWorkflow:
113+
# TODO: add test for nested workflow.
114+
# 1) comparing same backtest
115+
# - Basic test idea: the shared accumulated value are equal in multiple levels
116+
# - Aligning the profit calculation between multiple levels and single levels.
117+
# 2) comparing different backtest
118+
# - Basic test idea:
119+
# - the daily backtest will be similar as multi-level(the data quality makes this gap samller)
17120

18121
market = "csi300"
19122
benchmark = "SH000300"
@@ -167,8 +270,6 @@ def backtest(self):
167270
par = PortAnaRecord(
168271
recorder,
169272
self.port_analysis_config,
170-
risk_analysis_freq=["day", "30min", "5min"],
171-
indicator_analysis_freq=["day", "30min", "5min"],
172273
indicator_analysis_method="value_weighted",
173274
)
174275
par.generate()
@@ -199,6 +300,93 @@ def collect_data(self):
199300
for trade_decision in data_generator:
200301
print(trade_decision)
201302

303+
# the code below are for checking, users don't have to care about it
304+
def check_diff_freq(self):
305+
self._init_qlib()
306+
exp = R.get_exp(experiment_name="backtest")
307+
rec = next(iter(exp.list_recorders().values())) # assuming this will get the latest recorder
308+
for check_key in "account", "total_turnover", "total_cost":
309+
check_key = "total_cost"
310+
311+
acc_dict = {}
312+
for freq in ["30minute", "5minute", "1day"]:
313+
acc_dict[freq] = rec.load_object(f"portfolio_analysis/report_normal_{freq}.pkl")[check_key]
314+
acc_df = pd.DataFrame(acc_dict)
315+
acc_resam = acc_df.resample("1d").last().dropna()
316+
assert (acc_resam["30minute"] == acc_resam["1day"]).all()
317+
318+
def backtest_only_daily(self):
319+
"""
320+
This backtest is used for comparing the nested execution and single layer execution
321+
Due to the low quality daily-level and miniute-level data, they are hardly comparable.
322+
So it is used for detecting serious bugs which make the results different greatly.
323+
324+
.. code-block:: shell
325+
326+
[1724971:MainThread](2021-12-07 16:24:31,156) INFO - qlib.workflow - [record_temp.py:441] - Portfolio analysis record 'port_analysis_1day.pkl'
327+
has been saved as the artifact of the Experiment 2
328+
'The following are analysis results of benchmark return(1day).'
329+
risk
330+
mean 0.000651
331+
std 0.012472
332+
annualized_return 0.154967
333+
information_ratio 0.805422
334+
max_drawdown -0.160445
335+
'The following are analysis results of the excess return without cost(1day).'
336+
risk
337+
mean 0.001375
338+
std 0.006103
339+
annualized_return 0.327204
340+
information_ratio 3.475016
341+
max_drawdown -0.024927
342+
'The following are analysis results of the excess return with cost(1day).'
343+
risk
344+
mean 0.001184
345+
std 0.006091
346+
annualized_return 0.281801
347+
information_ratio 2.998749
348+
max_drawdown -0.029568
349+
[1724971:MainThread](2021-12-07 16:24:31,170) INFO - qlib.workflow - [record_temp.py:466] - Indicator analysis record 'indicator_analysis_1day.
350+
pkl' has been saved as the artifact of the Experiment 2
351+
'The following are analysis results of indicators(1day).'
352+
value
353+
ffr 1.0
354+
pa 0.0
355+
pos 0.0
356+
[1724971:MainThread](2021-12-07 16:24:31,188) INFO - qlib.timer - [log.py:113] - Time cost: 0.007s | waiting `async_log` Done
357+
358+
"""
359+
self._init_qlib()
360+
model = init_instance_by_config(self.task["model"])
361+
dataset = init_instance_by_config(self.task["dataset"])
362+
self._train_model(model, dataset)
363+
strategy_config = {
364+
"class": "TopkDropoutStrategy",
365+
"module_path": "qlib.contrib.strategy.signal_strategy",
366+
"kwargs": {
367+
"signal": (model, dataset),
368+
"topk": 50,
369+
"n_drop": 5,
370+
},
371+
}
372+
pa_conf = deepcopy(self.port_analysis_config)
373+
pa_conf["strategy"] = strategy_config
374+
pa_conf["executor"] = {
375+
"class": "SimulatorExecutor",
376+
"module_path": "qlib.backtest.executor",
377+
"kwargs": {
378+
"time_per_step": "day",
379+
"generate_portfolio_metrics": True,
380+
"verbose": True,
381+
},
382+
}
383+
pa_conf["backtest"]["benchmark"] = self.benchmark
384+
385+
with R.start(experiment_name="backtest"):
386+
recorder = R.get_recorder()
387+
par = PortAnaRecord(recorder, pa_conf)
388+
par.generate()
389+
202390

203391
if __name__ == "__main__":
204392
fire.Fire(NestedDecisionExecutionWorkflow)

qlib/backtest/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,8 +186,10 @@ def get_strategy_executor(
186186
trade_exchange = get_exchange(**exchange_kwargs)
187187

188188
common_infra = CommonInfrastructure(trade_account=trade_account, trade_exchange=trade_exchange)
189-
trade_strategy = init_instance_by_config(strategy, accept_types=BaseStrategy, common_infra=common_infra)
190-
trade_executor = init_instance_by_config(executor, accept_types=BaseExecutor, common_infra=common_infra)
189+
trade_strategy = init_instance_by_config(strategy, accept_types=BaseStrategy)
190+
trade_strategy.reset_common_infra(common_infra)
191+
trade_executor = init_instance_by_config(executor, accept_types=BaseExecutor)
192+
trade_executor.reset_common_infra(common_infra)
191193

192194
return trade_strategy, trade_executor
193195

0 commit comments

Comments
 (0)