Merge pull request #358 from javaThonc/high_freq_demp

you-n-g · web-flow · commit a3a2b5ae0bf4 · 2021-04-14T20:05:07.000+08:00
update high freq demo
diff --git a/examples/benchmarks/README.md b/examples/benchmarks/README.md
@@ -17,6 +17,7 @@ The numbers shown below demonstrate the performance of the entire `workflow` of
 | ALSTM (Yao Qin, et al.) | Alpha360 | 0.0493±0.01 | 0.3778±0.06| 0.0585±0.00 | 0.4606±0.04 | 0.0513±0.03 | 0.6727±0.38| -0.1085±0.02 |
 | GATs (Petar Velickovic, et al.) | Alpha360 | 0.0475±0.00 | 0.3515±0.02| 0.0592±0.00 | 0.4585±0.01 | 0.0876±0.02 | 1.1513±0.27| -0.0795±0.02 |
 | DoubleEnsemble (Chuheng Zhang, et al.) | Alpha360 | 0.0407±0.00| 0.3053±0.00 | 0.0490±0.00 | 0.3840±0.00 | 0.0380±0.02 | 0.5000±0.21 | -0.0984±0.02 |
+| TabNet (Sercan O. Arik, et al.)| Alpha360 | 0.0192±0.00 | 0.1401±0.00| 0.0291±0.00 | 0.2163±0.00 | -0.0258±0.00 | -0.2961±0.00| -0.1429±0.00 |
 
 ## Alpha158 dataset
 | Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
@@ -32,6 +33,7 @@ The numbers shown below demonstrate the performance of the entire `workflow` of
 | ALSTM (Yao Qin, et al.) | Alpha158 (with selected 20 features) | 0.0385±0.01 | 0.3022±0.06| 0.0478±0.00 | 0.3874±0.04 | 0.0486±0.03 | 0.7141±0.45| -0.1088±0.03 |
 | GATs (Petar Velickovic, et al.) | Alpha158 (with selected 20 features) | 0.0349±0.00 | 0.2511±0.01| 0.0457±0.00 | 0.3537±0.01 | 0.0578±0.02 | 0.8221±0.25| -0.0824±0.02 |
 | DoubleEnsemble (Chuheng Zhang, et al.) | Alpha158 | 0.0544±0.00 | 0.4338±0.01 | 0.0523±0.00 | 0.4257±0.01 | 0.1253±0.01 | 1.4105±0.14 | -0.0902±0.01 |
+| TabNet (Sercan O. Arik, et al.)| Alpha158 | 0.0383±0.00 | 0.3414±0.00| 0.0388±0.00 | 0.3460±0.00 | 0.0226±0.00 | 0.2652±0.00| -0.1072±0.00 |
 
 - The selected 20 features are based on the feature importance of a lightgbm-based model.
 - The base model of DoubleEnsemble is LGBM.
diff --git a/examples/highfreq/README.md b/examples/highfreq/README.md
@@ -25,4 +25,11 @@ The example is given in `workflow.py`, users can run the code as follows.
 Run the example by running the following command:
 ```bash
     python workflow.py dump_and_load_dataset
-```
+```
+
+## Benchmarks Performance
+### Signal Test
+Here are the results of signal test for benchmark models. We will keep updating benchmark models in future.
+| Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Long precision| Short Precision | Long-Short Average Return | Long-Short Average Sharpe |
+|---|---|---|---|---|---|---|---|---|---|
+| LightGBM | Alpha158 | 0.3042±0.00 | 1.5372±0.00| 0.3117±0.00 | 1.6258±0.00 | 0.6720±0.00 | 0.6870±0.00 | 0.000769±0.00 | 1.0190±0.00 |
diff --git a/examples/highfreq/workflow_config_High_Freq_Tree_Alpha158.yaml b/examples/highfreq/workflow_config_High_Freq_Tree_Alpha158.yaml
@@ -0,0 +1,65 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data_1min"
+    region: cn
+market: &market 'csi300'
+start_time: &start_time "2020-09-15 00:00:00"
+end_time: &end_time "2021-01-18 16:00:00"
+train_end_time: &train_end_time "2020-11-15 16:00:00"
+valid_start_time: &valid_start_time "2020-11-16 00:00:00"
+valid_end_time: &valid_end_time "2020-11-30 16:00:00"
+test_start_time: &test_start_time "2020-12-01 00:00:00"
+data_handler_config: &data_handler_config
+    start_time: *start_time
+    end_time: *end_time
+    fit_start_time: *start_time
+    fit_end_time: *train_end_time
+    instruments: *market
+    freq: '1min'
+    infer_processors:
+        - class: 'RobustZScoreNorm'
+          kwargs:
+              fields_group: 'feature'
+              clip_outlier: false
+        - class: "Fillna"
+          kwargs:
+              fields_group: 'feature'
+    learn_processors:
+        - class: 'DropnaLabel'
+        - class: 'CSRankNorm'
+          kwargs:
+              fields_group: 'label'
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+    
+task:
+    model:
+        class: "HFLGBModel"
+        module_path: "qlib.contrib.model.highfreq_gdbt_model"
+        kwargs:
+            objective: 'binary'
+            metric: ['binary_logloss','auc']
+            verbosity: -1
+            learning_rate: 0.01
+            max_depth: 8
+            num_leaves: 150
+            lambda_l1: 1.5
+            lambda_l2: 1
+            num_threads: 20
+    dataset:
+        class: "DatasetH"
+        module_path: "qlib.data.dataset"
+        kwargs:
+            handler:
+                class: "Alpha158"
+                module_path: "qlib.contrib.data.handler"
+                kwargs: *data_handler_config
+            segments:
+                train: [*start_time, *train_end_time]
+                valid: [*train_end_time, *valid_end_time]
+                test: [*test_start_time, *end_time]
+    record: 
+        - class: "SignalRecord"
+          module_path: "qlib.workflow.record_temp"
+          kwargs: {}
+        - class: "HFSignalRecord"
+          module_path: "qlib.workflow.record_temp"
+          kwargs: {}
diff --git a/qlib/contrib/eva/alpha.py b/qlib/contrib/eva/alpha.py
@@ -8,6 +8,59 @@
 from typing import Tuple
 
 
+def calc_long_short_prec(
+    pred: pd.Series, label: pd.Series, date_col="datetime", quantile: float = 0.2, dropna=False, is_alpha=False
+) -> Tuple[pd.Series, pd.Series]:
+    """
+    calculate the precision for long and short operation
+
+
+    :param pred/label: index is **pd.MultiIndex**, index name is **[datetime, instruments]**; columns names is **[score]**.
+
+            .. code-block:: python
+                                                  score
+                datetime            instrument
+                2020-12-01 09:30:00 SH600068    0.553634
+                                    SH600195    0.550017
+                                    SH600276    0.540321
+                                    SH600584    0.517297
+                                    SH600715    0.544674
+    label :
+        label
+    date_col :
+        date_col
+
+    Returns
+    -------
+    (pd.Series, pd.Series)
+        long precision and short precision in time level
+    """
+    if is_alpha:
+        label = label - label.mean(level=date_col)
+    if int(1 / quantile) >= len(label.index.get_level_values(1).unique()):
+        raise ValueError("Need more instruments to calculate precision")
+
+    df = pd.DataFrame({"pred": pred, "label": label})
+    if dropna:
+        df.dropna(inplace=True)
+
+    group = df.groupby(level=date_col)
+
+    N = lambda x: int(len(x) * quantile)
+    # find the top/low quantile of prediction and treat them as long and short target
+    long = group.apply(lambda x: x.nlargest(N(x), columns="pred").label).reset_index(level=0, drop=True)
+    short = group.apply(lambda x: x.nsmallest(N(x), columns="pred").label).reset_index(level=0, drop=True)
+
+    groupll = long.groupby(date_col)
+    l_dom = groupll.apply(lambda x: x > 0)
+    l_c = groupll.count()
+
+    groups = short.groupby(date_col)
+    s_dom = groups.apply(lambda x: x < 0)
+    s_c = groups.count()
+    return (l_dom.groupby(date_col).sum() / l_c), (s_dom.groupby(date_col).sum() / s_c)
+
+
 def calc_ic(pred: pd.Series, label: pd.Series, date_col="datetime", dropna=False) -> Tuple[pd.Series, pd.Series]:
     """calc_ic.
 
diff --git a/qlib/contrib/model/highfreq_gdbt_model.py b/qlib/contrib/model/highfreq_gdbt_model.py
@@ -0,0 +1,157 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import numpy as np
+import pandas as pd
+import lightgbm as lgb
+
+from qlib.model.base import ModelFT
+from qlib.data.dataset import DatasetH
+from qlib.data.dataset.handler import DataHandlerLP
+import warnings
+
+
+class HFLGBModel(ModelFT):
+    """LightGBM Model for high frequency prediction"""
+
+    def __init__(self, loss="mse", **kwargs):
+        if loss not in {"mse", "binary"}:
+            raise NotImplementedError
+        self.params = {"objective": loss, "verbosity": -1}
+        self.params.update(kwargs)
+        self.model = None
+
+    def _cal_signal_metrics(self, y_test, l_cut, r_cut):
+        """
+        Calcaute the signal metrics by daily level
+        """
+        up_pre, down_pre = [], []
+        up_alpha_ll, down_alpha_ll = [], []
+        for date in y_test.index.get_level_values(0).unique():
+            df_res = y_test.loc[date].sort_values("pred")
+            if int(l_cut * len(df_res)) < 10:
+                warnings.warn("Warning: threhold is too low or instruments number is not enough")
+                continue
+            top = df_res.iloc[: int(l_cut * len(df_res))]
+            bottom = df_res.iloc[int(r_cut * len(df_res)) :]
+
+            down_precision = len(top[top[top.columns[0]] < 0]) / (len(top))
+            up_precision = len(bottom[bottom[top.columns[0]] > 0]) / (len(bottom))
+
+            down_alpha = top[top.columns[0]].mean()
+            up_alpha = bottom[bottom.columns[0]].mean()
+
+            up_pre.append(up_precision)
+            down_pre.append(down_precision)
+            up_alpha_ll.append(up_alpha)
+            down_alpha_ll.append(down_alpha)
+
+        return (
+            np.array(up_pre).mean(),
+            np.array(down_pre).mean(),
+            np.array(up_alpha_ll).mean(),
+            np.array(down_alpha_ll).mean(),
+        )
+
+    def hf_signal_test(self, dataset: DatasetH, threhold=0.2):
+        """
+        Test the sigal in high frequency test set
+        """
+        if self.model == None:
+            raise ValueError("Model hasn't been trained yet")
+        df_test = dataset.prepare("test", col_set=["feature", "label"], data_key=DataHandlerLP.DK_I)
+        df_test.dropna(inplace=True)
+        x_test, y_test = df_test["feature"], df_test["label"]
+        # Convert label into alpha
+        y_test[y_test.columns[0]] = y_test[y_test.columns[0]] - y_test[y_test.columns[0]].mean(level=0)
+
+        res = pd.Series(self.model.predict(x_test.values), index=x_test.index)
+        y_test["pred"] = res
+
+        up_p, down_p, up_a, down_a = self._cal_signal_metrics(y_test, threhold, 1 - threhold)
+        print("===============================")
+        print("High frequency signal test")
+        print("===============================")
+        print("Test set precision: ")
+        print("Positive precision: {}, Negative precision: {}".format(up_p, down_p))
+        print("Test Alpha Average in test set: ")
+        print("Positive average alpha: {}, Negative average alpha: {}".format(up_a, down_a))
+
+    def _prepare_data(self, dataset: DatasetH):
+        df_train, df_valid = dataset.prepare(
+            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
+        )
+
+        x_train, y_train = df_train["feature"], df_train["label"]
+        x_valid, y_valid = df_train["feature"], df_valid["label"]
+        if y_train.values.ndim == 2 and y_train.values.shape[1] == 1:
+            l_name = df_train["label"].columns[0]
+            # Convert label into alpha
+            df_train["label"][l_name] = df_train["label"][l_name] - df_train["label"][l_name].mean(level=0)
+            df_valid["label"][l_name] = df_valid["label"][l_name] - df_valid["label"][l_name].mean(level=0)
+            mapping_fn = lambda x: 0 if x < 0 else 1
+            df_train["label_c"] = df_train["label"][l_name].apply(mapping_fn)
+            df_valid["label_c"] = df_valid["label"][l_name].apply(mapping_fn)
+            x_train, y_train = df_train["feature"], df_train["label_c"].values
+            x_valid, y_valid = df_valid["feature"], df_valid["label_c"].values
+        else:
+            raise ValueError("LightGBM doesn't support multi-label training")
+
+        dtrain = lgb.Dataset(x_train.values, label=y_train)
+        dvalid = lgb.Dataset(x_valid.values, label=y_valid)
+        return dtrain, dvalid
+
+    def fit(
+        self,
+        dataset: DatasetH,
+        num_boost_round=1000,
+        early_stopping_rounds=50,
+        verbose_eval=20,
+        evals_result=dict(),
+        **kwargs
+    ):
+        dtrain, dvalid = self._prepare_data(dataset)
+        self.model = lgb.train(
+            self.params,
+            dtrain,
+            num_boost_round=num_boost_round,
+            valid_sets=[dtrain, dvalid],
+            valid_names=["train", "valid"],
+            early_stopping_rounds=early_stopping_rounds,
+            verbose_eval=verbose_eval,
+            evals_result=evals_result,
+            **kwargs
+        )
+        evals_result["train"] = list(evals_result["train"].values())[0]
+        evals_result["valid"] = list(evals_result["valid"].values())[0]
+
+    def predict(self, dataset):
+        if self.model is None:
+            raise ValueError("model is not fitted yet!")
+        x_test = dataset.prepare("test", col_set="feature", data_key=DataHandlerLP.DK_I)
+        return pd.Series(self.model.predict(x_test.values), index=x_test.index)
+
+    def finetune(self, dataset: DatasetH, num_boost_round=10, verbose_eval=20):
+        """
+        finetune model
+
+        Parameters
+        ----------
+        dataset : DatasetH
+            dataset for finetuning
+        num_boost_round : int
+            number of round to finetune model
+        verbose_eval : int
+            verbose level
+        """
+        # Based on existing model and finetune by train more rounds
+        dtrain, _ = self._prepare_data(dataset)
+        self.model = lgb.train(
+            self.params,
+            dtrain,
+            num_boost_round=num_boost_round,
+            init_model=self.model,
+            valid_sets=[dtrain],
+            valid_names=["train"],
+            verbose_eval=verbose_eval,
+        )
diff --git a/qlib/workflow/record_temp.py b/qlib/workflow/record_temp.py
@@ -13,7 +13,7 @@
 from ..utils import init_instance_by_config, get_module_by_module_path
 from ..log import get_module_logger
 from ..utils import flatten_dict
-from ..contrib.eva.alpha import calc_ic, calc_long_short_return
+from ..contrib.eva.alpha import calc_ic, calc_long_short_return, calc_long_short_prec
 from ..contrib.strategy.strategy import BaseStrategy
 
 logger = get_module_logger("workflow", "INFO")
@@ -162,6 +162,60 @@ def load(self, name="pred.pkl"):
         return super().load(name)
 
 
+class HFSignalRecord(SignalRecord):
+    """
+    This is the Signal Analysis Record class that generates the analysis results such as IC and IR. This class inherits the ``RecordTemp`` class.
+    """
+
+    artifact_path = "hg_sig_analysis"
+
+    def __init__(self, recorder, **kwargs):
+        super().__init__(recorder=recorder)
+
+    def generate(self):
+        pred = self.load("pred.pkl")
+        raw_label = self.load("label.pkl")
+        long_pre, short_pre = calc_long_short_prec(pred.iloc[:, 0], raw_label.iloc[:, 0], is_alpha=True)
+        ic, ric = calc_ic(pred.iloc[:, 0], raw_label.iloc[:, 0])
+        metrics = {
+            "IC": ic.mean(),
+            "ICIR": ic.mean() / ic.std(),
+            "Rank IC": ric.mean(),
+            "Rank ICIR": ric.mean() / ric.std(),
+            "Long precision": long_pre.mean(),
+            "Short precision": short_pre.mean(),
+        }
+        objects = {"ic.pkl": ic, "ric.pkl": ric}
+        objects.update({"long_pre.pkl": long_pre, "short_pre.pkl": short_pre})
+        long_short_r, long_avg_r = calc_long_short_return(pred.iloc[:, 0], raw_label.iloc[:, 0])
+        metrics.update(
+            {
+                "Long-Short Average Return": long_short_r.mean(),
+                "Long-Short Average Sharpe": long_short_r.mean() / long_short_r.std(),
+            }
+        )
+        objects.update(
+            {
+                "long_short_r.pkl": long_short_r,
+                "long_avg_r.pkl": long_avg_r,
+            }
+        )
+        self.recorder.log_metrics(**metrics)
+        self.recorder.save_objects(**objects, artifact_path=self.get_path())
+        pprint(metrics)
+
+    def list(self):
+        paths = [
+            self.get_path("ic.pkl"),
+            self.get_path("ric.pkl"),
+            self.get_path("long_pre.pkl"),
+            self.get_path("short_pre.pkl"),
+            self.get_path("long_short_r.pkl"),
+            self.get_path("long_avg_r.pkl"),
+        ]
+        return paths
+
+
 class SigAnaRecord(SignalRecord):
     """
     This is the Signal Analysis Record class that generates the analysis results such as IC and IR. This class inherits the ``RecordTemp`` class.