From a9eadfcaaa69b6b1437f56752d74ab67f9a4da0b Mon Sep 17 00:00:00 2001
From: Alex Wang <haouyw2@gmail.com>
Date: Fri, 5 Mar 2021 21:04:05 +0900
Subject: [PATCH 1/7] high frequency examolke

---
 examples/highfreq/high_freq_tree.ipynb   | 192 +++++++++++++++++++++++
 examples/highfreq/highfreq_gdbt_model.py | 155 ++++++++++++++++++
 2 files changed, 347 insertions(+)
 create mode 100644 examples/highfreq/high_freq_tree.ipynb
 create mode 100644 examples/highfreq/highfreq_gdbt_model.py

diff --git a/examples/highfreq/high_freq_tree.ipynb b/examples/highfreq/high_freq_tree.ipynb
new file mode 100644
index 00000000000..18a559e141a
--- /dev/null
+++ b/examples/highfreq/high_freq_tree.ipynb
@@ -0,0 +1,192 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#  Copyright (c) Microsoft Corporation.\n",
+    "#  Licensed under the MIT License.\n",
+    "import qlib\n",
+    "import os\n",
+    "import random\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "from multiprocessing import Pool\n",
+    "from qlib.config import REG_CN, HIGH_FREQ_CONFIG\n",
+    "from qlib.contrib.model.gbdt import LGBModel\n",
+    "from qlib.contrib.data.handler import Alpha158\n",
+    "from qlib.contrib.strategy.strategy import TopkDropoutStrategy\n",
+    "from qlib.contrib.evaluate import (\n",
+    "    backtest as normal_backtest,\n",
+    "    risk_analysis,\n",
+    ")\n",
+    "from qlib.utils import exists_qlib_data, init_instance_by_config\n",
+    "from qlib.workflow import R\n",
+    "from qlib.data import D\n",
+    "from qlib.data.filter import NameDFilter\n",
+    "from qlib.workflow.record_temp import SignalRecord, PortAnaRecord\n",
+    "from qlib.data.dataset.handler import DataHandlerLP\n",
+    "from qlib.utils import flatten_dict\n",
+    "import lightgbm as lgb"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Qlib configuration"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "QLIB_INIT_CONFIG = {**HIGH_FREQ_CONFIG}\n",
+    "qlib.init(**QLIB_INIT_CONFIG)\n",
+    "instruments = D.instruments(market='all')\n",
+    "random.seed(710)\n",
+    "instruments = D.list_instruments(instruments=instruments, freq = '1min', as_list=True)\n",
+    "# Randomly select instruments to boost the training efficiency\n",
+    "instruments = random.sample(instruments, 150)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# train model configuration\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "MARKET = 'ALL'\n",
+    "BENCHMARK = \"SH000300\"\n",
+    "\n",
+    "start_time = \"2020-09-15 00:00:00\"\n",
+    "end_time = \"2021-01-18 16:00:00\"\n",
+    "train_end_time = \"2020-11-15 16:00:00\"\n",
+    "valid_start_time = \"2020-11-16 00:00:00\"\n",
+    "valid_end_time = \"2020-11-30 16:00:00\"\n",
+    "test_start_time = \"2020-12-01 00:00:00\"\n",
+    "\n",
+    "data_handler_config = {\n",
+    "    \"start_time\": start_time,\n",
+    "    \"end_time\": end_time,\n",
+    "    \"fit_start_time\": start_time,\n",
+    "    \"fit_end_time\": train_end_time,\n",
+    "    \"freq\": \"1min\",\n",
+    "    \"instruments\": instruments,\n",
+    "    \"learn_processors\":[\n",
+    "        {\"class\": \"DropnaLabel\"}\n",
+    "    ],\n",
+    "    \"infer_processors\": [         \n",
+    "        {\"class\": \"RobustZScoreNorm\",\n",
+    "        \"kwargs\": {\n",
+    "            \"fields_group\": \"feature\",\n",
+    "            \"clip_outlier\": True,\n",
+    "        }},\n",
+    "        {\"class\": \"Fillna\",\n",
+    "         \"kwargs\": {\n",
+    "             \"fields_group\": \"feature\",\n",
+    "         }},],\n",
+    "    \"label\": [\"Ref($close, -1) / $close - 1\"],\n",
+    "}\n",
+    "\n",
+    "\n",
+    "task = {\n",
+    "    \"model\": {\n",
+    "        \"class\": \"HF_LGBModel\",\n",
+    "        \"module_path\": \"highfreq_gdbt_model.py\",\n",
+    "        \"kwargs\": {\n",
+    "            \"objective\": 'binary', \n",
+    "            \"metric\": ['binary_logloss','auc'],\n",
+    "            \"verbosity\": -1,\n",
+    "            \"learning_rate\": 0.01,\n",
+    "            \"max_depth\": 8,\n",
+    "            \"num_leaves\": 150, \n",
+    "            \"lambda_l1\": 1.5,\n",
+    "            \"lambda_l2\": 1,\n",
+    "            \"num_threads\": 20\n",
+    "        },\n",
+    "    },\n",
+    "    \"dataset\": {\n",
+    "        \"class\": \"DatasetH\",\n",
+    "        \"module_path\": \"qlib.data.dataset\",\n",
+    "        \"kwargs\": {\n",
+    "            \"handler\": {\n",
+    "                \"class\": \"Alpha158\",\n",
+    "                \"module_path\": \"qlib.contrib.data.handler\",\n",
+    "                \"kwargs\": data_handler_config,\n",
+    "            },\n",
+    "            \"segments\": {\n",
+    "                \"train\": (start_time, train_end_time),\n",
+    "                \"valid\": (train_end_time, valid_end_time),\n",
+    "                \"test\": (\n",
+    "                    test_start_time,\n",
+    "                    end_time,\n",
+    "                ),\n",
+    "            },\n",
+    "        },\n",
+    "    },\n",
+    "}\n",
+    "\n",
+    "provider_uri = QLIB_INIT_CONFIG.get(\"provider_uri\")\n",
+    "if not exists_qlib_data(provider_uri):\n",
+    "    print(f\"Qlib data is not found in {provider_uri}\")\n",
+    "    GetData().qlib_data(target_dir=provider_uri, interval=\"1min\", region=REG_CN)\n",
+    "\n",
+    "dataset = init_instance_by_config(task[\"dataset\"])\n",
+    "model = init_instance_by_config(task[\"model\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# train model and back test\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# start exp to train model with signal test\n",
+    "with R.start(experiment_name=\"train_model\"):\n",
+    "    R.log_params(**flatten_dict(task))\n",
+    "    model.fit(dataset)\n",
+    "    model.hf_signal_test(dataset, 0.1)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python [conda env:trade]",
+   "language": "python",
+   "name": "conda-env-trade-py"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/examples/highfreq/highfreq_gdbt_model.py b/examples/highfreq/highfreq_gdbt_model.py
new file mode 100644
index 00000000000..0774670f8b6
--- /dev/null
+++ b/examples/highfreq/highfreq_gdbt_model.py
@@ -0,0 +1,155 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import numpy as np
+import pandas as pd
+import lightgbm as lgb
+
+from qlib.model.base import ModelFT
+from qlib.data.dataset import DatasetH
+from qlib.data.dataset.handler import DataHandlerLP
+import warnings
+
+class HF_LGBModel(ModelFT):
+    """LightGBM Model"""
+
+    def __init__(self, loss="mse", **kwargs):
+        if loss not in {"mse", "binary"}:
+            raise NotImplementedError
+        self.params = {"objective": loss, "verbosity": -1}
+        self.params.update(kwargs)
+        self.model = None
+        
+    def _cal_signal_metrics(self, y_test, l_cut, r_cut):
+        """
+        Calcaute the signal metrics by daily level
+        """
+        up_pre, down_pre = [],[]
+        up_alpha_ll, down_alpha_ll = [], []
+        for date in y_test.index.get_level_values(0).unique():
+            df_res = y_test.loc[date].sort_values('pred')
+            if int(l_cut*len(df_res)) < 10:
+                    warnings.warn("Warning: threhold is too low or instruments number is not enough")
+                    continue
+            top = df_res.iloc[:int(l_cut*len(df_res))]
+            bottom = df_res.iloc[int(r_cut*len(df_res)):]
+
+            down_precision = len(top[top[top.columns[0]] < 0])/(len(top))
+            up_precision = len(bottom[bottom[top.columns[0]] > 0])/(len(bottom))
+
+            down_alpha = top[top.columns[0]].mean()
+            up_alpha = bottom[bottom.columns[0]].mean()
+
+            up_pre.append(up_precision)
+            down_pre.append(down_precision)
+            up_alpha_ll.append(up_alpha)
+            down_alpha_ll.append(down_alpha)
+        
+        return np.array(up_pre).mean(), np.array(down_pre).mean(),np.array(up_alpha_ll).mean(),np.array(down_alpha_ll).mean()
+
+    def hf_signal_test(self, dataset: DatasetH, threhold = 0.2):
+        """
+        Test the sigal in high frequency test set
+        """
+        if self.model == None:
+            raise ValueError("Model hasn't been trained yet")
+        df_test = dataset.prepare(
+            "test", col_set=["feature", "label"], data_key=DataHandlerLP.DK_I
+        )
+        df_test.dropna(inplace = True)
+        x_test, y_test =  df_test['feature'], df_test['label']
+        # Convert label into alpha
+        y_test[y_test.columns[0]] = y_test[y_test.columns[0]]-y_test[y_test.columns[0]].mean(level=0)
+
+        res = pd.Series(self.model.predict(x_test.values), index=x_test.index)
+        y_test['pred'] = res
+                                                                                                                                
+        up_p, down_p, up_a, down_a = self._cal_signal_metrics(y_test, threhold, 1-threhold)
+        print("===============================") 
+        print("High frequency signal test")
+        print("===============================")                                                                            
+        print("Test set precision: ")
+        print("Positive precision: {}, Negative precision: {}".format(up_p, down_p))
+        print("Test Alpha Average in test set: ")
+        print("Positive average alpha: {}, Negative average alpha: {}".format(up_a, down_a))
+
+        
+    def _prepare_data(self, dataset: DatasetH):
+        df_train, df_valid = dataset.prepare(
+            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
+        )
+        
+        x_train, y_train = df_train['feature'], df_train['label']
+        x_valid, y_valid = df_train['feature'], df_valid['label']
+        if y_train.values.ndim == 2 and y_train.values.shape[1] == 1:
+            l_name = df_train['label'].columns[0]
+            # Convert label into alpha
+            df_train['label'][l_name] = df_train['label'][l_name] - df_train['label'][l_name].mean(level=0)
+            df_valid['label'][l_name] = df_valid['label'][l_name] - df_valid['label'][l_name].mean(level=0)
+            mapping_fn = lambda x: 0 if x < 0 else 1
+            df_train['label_c'] = df_train['label'][l_name].apply(mapping_fn)
+            df_valid['label_c'] = df_valid['label'][l_name].apply(mapping_fn)
+            x_train, y_train = df_train['feature'], df_train['label_c'].values
+            x_valid, y_valid = df_valid['feature'], df_valid['label_c'].values
+        else:
+            raise ValueError("LightGBM doesn't support multi-label training")
+        
+
+        dtrain = lgb.Dataset(x_train.values, label=y_train)
+        dvalid = lgb.Dataset(x_valid.values, label=y_valid)
+        return dtrain, dvalid
+
+    def fit(
+        self,
+        dataset: DatasetH,
+        num_boost_round=1000,
+        early_stopping_rounds=50,
+        verbose_eval=20,
+        evals_result=dict(),
+        **kwargs
+    ):
+        dtrain, dvalid = self._prepare_data(dataset)
+        self.model = lgb.train(
+            self.params,
+            dtrain,
+            num_boost_round=num_boost_round,
+            valid_sets=[dtrain, dvalid],
+            valid_names=["train", "valid"],
+            early_stopping_rounds=early_stopping_rounds,
+            verbose_eval=verbose_eval,
+            evals_result=evals_result,
+            **kwargs
+        )
+        evals_result["train"] = list(evals_result["train"].values())[0]
+        evals_result["valid"] = list(evals_result["valid"].values())[0]
+
+    def predict(self, dataset):
+        if self.model is None:
+            raise ValueError("model is not fitted yet!")
+        x_test = dataset.prepare("test", col_set="feature", data_key=DataHandlerLP.DK_I)
+        return pd.Series(self.model.predict(x_test.values), index=x_test.index)
+
+    def finetune(self, dataset: DatasetH, num_boost_round=10, verbose_eval=20):
+        """
+        finetune model
+
+        Parameters
+        ----------
+        dataset : DatasetH
+            dataset for finetuning
+        num_boost_round : int
+            number of round to finetune model
+        verbose_eval : int
+            verbose level
+        """
+        # Based on existing model and finetune by train more rounds
+        dtrain, _ = self._prepare_data(dataset)
+        self.model = lgb.train(
+            self.params,
+            dtrain,
+            num_boost_round=num_boost_round,
+            init_model=self.model,
+            valid_sets=[dtrain],
+            valid_names=["train"],
+            verbose_eval=verbose_eval,
+        )

From dc5dd0d019fd90e29edc4d94c15fc5d9ab2a2e04 Mon Sep 17 00:00:00 2001
From: Alex Wang <haouyw2@gmail.com>
Date: Fri, 5 Mar 2021 21:08:22 +0900
Subject: [PATCH 2/7] format

---
 examples/highfreq/highfreq_gdbt_model.py | 74 ++++++++++++------------
 1 file changed, 38 insertions(+), 36 deletions(-)

diff --git a/examples/highfreq/highfreq_gdbt_model.py b/examples/highfreq/highfreq_gdbt_model.py
index 0774670f8b6..62e45c84133 100644
--- a/examples/highfreq/highfreq_gdbt_model.py
+++ b/examples/highfreq/highfreq_gdbt_model.py
@@ -10,6 +10,7 @@
 from qlib.data.dataset.handler import DataHandlerLP
 import warnings
 
+
 class HF_LGBModel(ModelFT):
     """LightGBM Model"""
 
@@ -19,23 +20,23 @@ def __init__(self, loss="mse", **kwargs):
         self.params = {"objective": loss, "verbosity": -1}
         self.params.update(kwargs)
         self.model = None
-        
+
     def _cal_signal_metrics(self, y_test, l_cut, r_cut):
         """
         Calcaute the signal metrics by daily level
         """
-        up_pre, down_pre = [],[]
+        up_pre, down_pre = [], []
         up_alpha_ll, down_alpha_ll = [], []
         for date in y_test.index.get_level_values(0).unique():
-            df_res = y_test.loc[date].sort_values('pred')
-            if int(l_cut*len(df_res)) < 10:
-                    warnings.warn("Warning: threhold is too low or instruments number is not enough")
-                    continue
-            top = df_res.iloc[:int(l_cut*len(df_res))]
-            bottom = df_res.iloc[int(r_cut*len(df_res)):]
+            df_res = y_test.loc[date].sort_values("pred")
+            if int(l_cut * len(df_res)) < 10:
+                warnings.warn("Warning: threhold is too low or instruments number is not enough")
+                continue
+            top = df_res.iloc[: int(l_cut * len(df_res))]
+            bottom = df_res.iloc[int(r_cut * len(df_res)) :]
 
-            down_precision = len(top[top[top.columns[0]] < 0])/(len(top))
-            up_precision = len(bottom[bottom[top.columns[0]] > 0])/(len(bottom))
+            down_precision = len(top[top[top.columns[0]] < 0]) / (len(top))
+            up_precision = len(bottom[bottom[top.columns[0]] > 0]) / (len(bottom))
 
             down_alpha = top[top.columns[0]].mean()
             up_alpha = bottom[bottom.columns[0]].mean()
@@ -44,56 +45,57 @@ def _cal_signal_metrics(self, y_test, l_cut, r_cut):
             down_pre.append(down_precision)
             up_alpha_ll.append(up_alpha)
             down_alpha_ll.append(down_alpha)
-        
-        return np.array(up_pre).mean(), np.array(down_pre).mean(),np.array(up_alpha_ll).mean(),np.array(down_alpha_ll).mean()
 
-    def hf_signal_test(self, dataset: DatasetH, threhold = 0.2):
+        return (
+            np.array(up_pre).mean(),
+            np.array(down_pre).mean(),
+            np.array(up_alpha_ll).mean(),
+            np.array(down_alpha_ll).mean(),
+        )
+
+    def hf_signal_test(self, dataset: DatasetH, threhold=0.2):
         """
         Test the sigal in high frequency test set
         """
         if self.model == None:
             raise ValueError("Model hasn't been trained yet")
-        df_test = dataset.prepare(
-            "test", col_set=["feature", "label"], data_key=DataHandlerLP.DK_I
-        )
-        df_test.dropna(inplace = True)
-        x_test, y_test =  df_test['feature'], df_test['label']
+        df_test = dataset.prepare("test", col_set=["feature", "label"], data_key=DataHandlerLP.DK_I)
+        df_test.dropna(inplace=True)
+        x_test, y_test = df_test["feature"], df_test["label"]
         # Convert label into alpha
-        y_test[y_test.columns[0]] = y_test[y_test.columns[0]]-y_test[y_test.columns[0]].mean(level=0)
+        y_test[y_test.columns[0]] = y_test[y_test.columns[0]] - y_test[y_test.columns[0]].mean(level=0)
 
         res = pd.Series(self.model.predict(x_test.values), index=x_test.index)
-        y_test['pred'] = res
-                                                                                                                                
-        up_p, down_p, up_a, down_a = self._cal_signal_metrics(y_test, threhold, 1-threhold)
-        print("===============================") 
+        y_test["pred"] = res
+
+        up_p, down_p, up_a, down_a = self._cal_signal_metrics(y_test, threhold, 1 - threhold)
+        print("===============================")
         print("High frequency signal test")
-        print("===============================")                                                                            
+        print("===============================")
         print("Test set precision: ")
         print("Positive precision: {}, Negative precision: {}".format(up_p, down_p))
         print("Test Alpha Average in test set: ")
         print("Positive average alpha: {}, Negative average alpha: {}".format(up_a, down_a))
 
-        
     def _prepare_data(self, dataset: DatasetH):
         df_train, df_valid = dataset.prepare(
             ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
         )
-        
-        x_train, y_train = df_train['feature'], df_train['label']
-        x_valid, y_valid = df_train['feature'], df_valid['label']
+
+        x_train, y_train = df_train["feature"], df_train["label"]
+        x_valid, y_valid = df_train["feature"], df_valid["label"]
         if y_train.values.ndim == 2 and y_train.values.shape[1] == 1:
-            l_name = df_train['label'].columns[0]
+            l_name = df_train["label"].columns[0]
             # Convert label into alpha
-            df_train['label'][l_name] = df_train['label'][l_name] - df_train['label'][l_name].mean(level=0)
-            df_valid['label'][l_name] = df_valid['label'][l_name] - df_valid['label'][l_name].mean(level=0)
+            df_train["label"][l_name] = df_train["label"][l_name] - df_train["label"][l_name].mean(level=0)
+            df_valid["label"][l_name] = df_valid["label"][l_name] - df_valid["label"][l_name].mean(level=0)
             mapping_fn = lambda x: 0 if x < 0 else 1
-            df_train['label_c'] = df_train['label'][l_name].apply(mapping_fn)
-            df_valid['label_c'] = df_valid['label'][l_name].apply(mapping_fn)
-            x_train, y_train = df_train['feature'], df_train['label_c'].values
-            x_valid, y_valid = df_valid['feature'], df_valid['label_c'].values
+            df_train["label_c"] = df_train["label"][l_name].apply(mapping_fn)
+            df_valid["label_c"] = df_valid["label"][l_name].apply(mapping_fn)
+            x_train, y_train = df_train["feature"], df_train["label_c"].values
+            x_valid, y_valid = df_valid["feature"], df_valid["label_c"].values
         else:
             raise ValueError("LightGBM doesn't support multi-label training")
-        
 
         dtrain = lgb.Dataset(x_train.values, label=y_train)
         dvalid = lgb.Dataset(x_valid.values, label=y_valid)

From 2e874111d6a71b63a80ef79c9af53be548c2d7ea Mon Sep 17 00:00:00 2001
From: Alex Wang <haouyw2@gmail.com>
Date: Wed, 10 Mar 2021 22:57:26 +0900
Subject: [PATCH 3/7] 360 example

---
 .../workflow_config_TabNet_Alpha360.yaml      | 81 +++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100644 examples/benchmarks/TabNet/workflow_config_TabNet_Alpha360.yaml

diff --git a/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha360.yaml b/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha360.yaml
new file mode 100644
index 00000000000..fac322e65bf
--- /dev/null
+++ b/examples/benchmarks/TabNet/workflow_config_TabNet_Alpha360.yaml
@@ -0,0 +1,81 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/cn_data"
+    region: cn
+market: &market csi300
+benchmark: &benchmark SH000300
+data_handler_config: &data_handler_config
+    start_time: 2008-01-01
+    end_time: 2020-08-01
+    fit_start_time: 2008-01-01
+    fit_end_time: 2014-12-31
+    instruments: *market
+    infer_processors:
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+port_analysis_config: &port_analysis_config
+    strategy:
+        class: TopkDropoutStrategy
+        module_path: qlib.contrib.strategy.strategy
+        kwargs:
+            topk: 50
+            n_drop: 5
+    backtest:
+        verbose: False
+        limit_threshold: 0.095
+        account: 100000000
+        benchmark: *benchmark
+        deal_price: close
+        open_cost: 0.0005
+        close_cost: 0.0015
+        min_cost: 5
+task:
+    model:
+        class: TabnetModel
+        module_path: qlib.contrib.model.pytorch_tabnet
+        kwargs:
+            pretrain: True
+            d_feat: 360
+            n_d: 8
+            n_a: 8
+            n_shared: 2
+            n_ind: 2
+            n_steps: 3
+            GPU: "2"
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha360
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                pretrain: [2008-01-01, 2014-12-31]
+                pretrain_validation: [2015-01-01, 2020-08-01]
+                train: [2008-01-01, 2014-12-31]
+                valid: [2015-01-01, 2016-12-31]
+                test: [2017-01-01, 2020-08-01]
+    record: 
+        - class: SignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
+        - class: SigAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            ana_long_short: False
+            ann_scaler: 252
+        - class: PortAnaRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: 
+            config: *port_analysis_config

From fdec411cf53b81e655dfdf92725f8325e589c147 Mon Sep 17 00:00:00 2001
From: Alex Wang <haouyw2@gmail.com>
Date: Wed, 10 Mar 2021 22:59:43 +0900
Subject: [PATCH 4/7] tabnet fix bug and adding alph360

---
 qlib/contrib/model/pytorch_tabnet.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/qlib/contrib/model/pytorch_tabnet.py b/qlib/contrib/model/pytorch_tabnet.py
index 62e32d701ce..ef031039142 100644
--- a/qlib/contrib/model/pytorch_tabnet.py
+++ b/qlib/contrib/model/pytorch_tabnet.py
@@ -182,7 +182,7 @@ def fit(
 
         stop_steps = 0
         train_loss = 0
-        best_score = np.inf
+        best_score = -np.inf
         best_epoch = 0
         evals_result["train"] = []
         evals_result["valid"] = []
@@ -201,7 +201,7 @@ def fit(
             evals_result["train"].append(train_score)
             evals_result["valid"].append(val_score)
 
-            if val_score < best_score:
+            if val_score > best_score:
                 best_score = val_score
                 stop_steps = 0
                 best_epoch = epoch_idx

From d499962271e7111d0b853727cd9fe31232801a45 Mon Sep 17 00:00:00 2001
From: Alex Wang <haouyw2@gmail.com>
Date: Thu, 11 Mar 2021 16:35:55 +0900
Subject: [PATCH 5/7] high freq demo

---
 ...rkflow_config_High_Freq_Tree_Alpha158.yaml |  62 +++++++
 qlib/contrib/eva/alpha.py                     |  40 ++++-
 qlib/contrib/model/highfreq_gdbt_model.py     | 157 ++++++++++++++++++
 qlib/workflow/record_temp.py                  |  82 ++++++++-
 4 files changed, 339 insertions(+), 2 deletions(-)
 create mode 100644 examples/highfreq/workflow_config_High_Freq_Tree_Alpha158.yaml
 create mode 100644 qlib/contrib/model/highfreq_gdbt_model.py

diff --git a/examples/highfreq/workflow_config_High_Freq_Tree_Alpha158.yaml b/examples/highfreq/workflow_config_High_Freq_Tree_Alpha158.yaml
new file mode 100644
index 00000000000..d1b36e98084
--- /dev/null
+++ b/examples/highfreq/workflow_config_High_Freq_Tree_Alpha158.yaml
@@ -0,0 +1,62 @@
+qlib_init:
+    provider_uri: "~/.qlib/qlib_data/yahoo_cn_1min"
+    region: cn
+market: &market "all"
+start_time: &start_time "2020-09-15 00:00:00"
+end_time: &end_time "2021-01-18 16:00:00"
+train_end_time: &train_end_time "2020-11-15 16:00:00"
+valid_start_time: &valid_start_time "2020-11-16 00:00:00"
+valid_end_time: &valid_end_time "2020-11-30 16:00:00"
+test_start_time: &test_start_time "2020-12-01 00:00:00"
+data_handler_config: &data_handler_config
+    start_time: *start_time
+    end_time: *end_time
+    fit_start_time: *start_time
+    fit_end_time: *train_end_time
+    instruments: *market
+    freq: '1min'
+    infer_processors:
+        - class: RobustZScoreNorm
+          kwargs:
+              fields_group: feature
+              clip_outlier: true
+        - class: Fillna
+          kwargs:
+              fields_group: feature
+    learn_processors:
+        - class: DropnaLabel
+        - class: CSRankNorm
+          kwargs:
+              fields_group: label
+    label: ["Ref($close, -2) / Ref($close, -1) - 1"]
+    
+task:
+    model:
+        class: HF_LGBModel
+        module_path: qlib.contrib.model.highfreq_gdbt_model
+        kwargs:
+            objective: 'binary'
+            metric: ['binary_logloss','auc']
+            verbosity: -1
+            learning_rate: 0.01
+            max_depth: 8
+            num_leaves: 150
+            lambda_l1: 1.5
+            lambda_l2: 1
+            num_threads: 20
+    dataset:
+        class: DatasetH
+        module_path: qlib.data.dataset
+        kwargs:
+            handler:
+                class: Alpha158
+                module_path: qlib.contrib.data.handler
+                kwargs: *data_handler_config
+            segments:
+                train: [*start_time, *train_end_time]
+                valid: [*train_end_time, *valid_end_time]
+                test: [*test_start_time, *end_time]
+    record: 
+        - class: HFSignalRecord
+          module_path: qlib.workflow.record_temp
+          kwargs: {}
\ No newline at end of file
diff --git a/qlib/contrib/eva/alpha.py b/qlib/contrib/eva/alpha.py
index c68571853f1..87f5b892342 100644
--- a/qlib/contrib/eva/alpha.py
+++ b/qlib/contrib/eva/alpha.py
@@ -4,9 +4,47 @@
 The interface should be redesigned carefully in the future.
 """
 import pandas as pd
-
 from typing import Tuple
 
+def calc_prec(pred: pd.Series, label: pd.Series, date_col="datetime", quantile: float = 0.2, dropna=False, is_alpha=False) -> Tuple[pd.Series, pd.Series]:
+    """ calculate the precision
+    pred :
+        pred
+    label :
+        label
+    date_col :
+        date_col
+    
+    Returns
+    -------
+    (pd.Series, pd.Series)
+        long precision and short precision in time level
+    """
+    if is_alpha:
+        label = label - label.mean(level=0)
+
+    
+    df = pd.DataFrame({"pred": pred, "label": label})
+    print(df)
+    if dropna:
+        df.dropna(inplace = True)
+        
+    group = df.groupby(level=date_col)
+    
+    N = lambda x: int(len(x) * quantile)
+    # find the top/low quantile of prediction and treat them as long and short target
+    long = group.apply(lambda x: x.nlargest(N(x), columns="pred").label).reset_index(level=0, drop=True)
+    short = group.apply(lambda x: x.nsmallest(N(x), columns="pred").label).reset_index(level=0, drop=True)
+    
+    groupll = long.groupby(date_col)
+    ll_ration = groupll.apply(lambda x: x > 0)
+    ll_c = groupll.count()
+    
+    groups = short.groupby(date_col)
+    s_ration = groups.apply(lambda x: x < 0)
+    s_c = groups.count()
+    return (ll_ration.groupby(date_col).sum()/ll_c), (s_ration.groupby(date_col).sum()/s_c)
+
 
 def calc_ic(pred: pd.Series, label: pd.Series, date_col="datetime", dropna=False) -> Tuple[pd.Series, pd.Series]:
     """calc_ic.
diff --git a/qlib/contrib/model/highfreq_gdbt_model.py b/qlib/contrib/model/highfreq_gdbt_model.py
new file mode 100644
index 00000000000..62e45c84133
--- /dev/null
+++ b/qlib/contrib/model/highfreq_gdbt_model.py
@@ -0,0 +1,157 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT License.
+
+import numpy as np
+import pandas as pd
+import lightgbm as lgb
+
+from qlib.model.base import ModelFT
+from qlib.data.dataset import DatasetH
+from qlib.data.dataset.handler import DataHandlerLP
+import warnings
+
+
+class HF_LGBModel(ModelFT):
+    """LightGBM Model"""
+
+    def __init__(self, loss="mse", **kwargs):
+        if loss not in {"mse", "binary"}:
+            raise NotImplementedError
+        self.params = {"objective": loss, "verbosity": -1}
+        self.params.update(kwargs)
+        self.model = None
+
+    def _cal_signal_metrics(self, y_test, l_cut, r_cut):
+        """
+        Calcaute the signal metrics by daily level
+        """
+        up_pre, down_pre = [], []
+        up_alpha_ll, down_alpha_ll = [], []
+        for date in y_test.index.get_level_values(0).unique():
+            df_res = y_test.loc[date].sort_values("pred")
+            if int(l_cut * len(df_res)) < 10:
+                warnings.warn("Warning: threhold is too low or instruments number is not enough")
+                continue
+            top = df_res.iloc[: int(l_cut * len(df_res))]
+            bottom = df_res.iloc[int(r_cut * len(df_res)) :]
+
+            down_precision = len(top[top[top.columns[0]] < 0]) / (len(top))
+            up_precision = len(bottom[bottom[top.columns[0]] > 0]) / (len(bottom))
+
+            down_alpha = top[top.columns[0]].mean()
+            up_alpha = bottom[bottom.columns[0]].mean()
+
+            up_pre.append(up_precision)
+            down_pre.append(down_precision)
+            up_alpha_ll.append(up_alpha)
+            down_alpha_ll.append(down_alpha)
+
+        return (
+            np.array(up_pre).mean(),
+            np.array(down_pre).mean(),
+            np.array(up_alpha_ll).mean(),
+            np.array(down_alpha_ll).mean(),
+        )
+
+    def hf_signal_test(self, dataset: DatasetH, threhold=0.2):
+        """
+        Test the sigal in high frequency test set
+        """
+        if self.model == None:
+            raise ValueError("Model hasn't been trained yet")
+        df_test = dataset.prepare("test", col_set=["feature", "label"], data_key=DataHandlerLP.DK_I)
+        df_test.dropna(inplace=True)
+        x_test, y_test = df_test["feature"], df_test["label"]
+        # Convert label into alpha
+        y_test[y_test.columns[0]] = y_test[y_test.columns[0]] - y_test[y_test.columns[0]].mean(level=0)
+
+        res = pd.Series(self.model.predict(x_test.values), index=x_test.index)
+        y_test["pred"] = res
+
+        up_p, down_p, up_a, down_a = self._cal_signal_metrics(y_test, threhold, 1 - threhold)
+        print("===============================")
+        print("High frequency signal test")
+        print("===============================")
+        print("Test set precision: ")
+        print("Positive precision: {}, Negative precision: {}".format(up_p, down_p))
+        print("Test Alpha Average in test set: ")
+        print("Positive average alpha: {}, Negative average alpha: {}".format(up_a, down_a))
+
+    def _prepare_data(self, dataset: DatasetH):
+        df_train, df_valid = dataset.prepare(
+            ["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
+        )
+
+        x_train, y_train = df_train["feature"], df_train["label"]
+        x_valid, y_valid = df_train["feature"], df_valid["label"]
+        if y_train.values.ndim == 2 and y_train.values.shape[1] == 1:
+            l_name = df_train["label"].columns[0]
+            # Convert label into alpha
+            df_train["label"][l_name] = df_train["label"][l_name] - df_train["label"][l_name].mean(level=0)
+            df_valid["label"][l_name] = df_valid["label"][l_name] - df_valid["label"][l_name].mean(level=0)
+            mapping_fn = lambda x: 0 if x < 0 else 1
+            df_train["label_c"] = df_train["label"][l_name].apply(mapping_fn)
+            df_valid["label_c"] = df_valid["label"][l_name].apply(mapping_fn)
+            x_train, y_train = df_train["feature"], df_train["label_c"].values
+            x_valid, y_valid = df_valid["feature"], df_valid["label_c"].values
+        else:
+            raise ValueError("LightGBM doesn't support multi-label training")
+
+        dtrain = lgb.Dataset(x_train.values, label=y_train)
+        dvalid = lgb.Dataset(x_valid.values, label=y_valid)
+        return dtrain, dvalid
+
+    def fit(
+        self,
+        dataset: DatasetH,
+        num_boost_round=1000,
+        early_stopping_rounds=50,
+        verbose_eval=20,
+        evals_result=dict(),
+        **kwargs
+    ):
+        dtrain, dvalid = self._prepare_data(dataset)
+        self.model = lgb.train(
+            self.params,
+            dtrain,
+            num_boost_round=num_boost_round,
+            valid_sets=[dtrain, dvalid],
+            valid_names=["train", "valid"],
+            early_stopping_rounds=early_stopping_rounds,
+            verbose_eval=verbose_eval,
+            evals_result=evals_result,
+            **kwargs
+        )
+        evals_result["train"] = list(evals_result["train"].values())[0]
+        evals_result["valid"] = list(evals_result["valid"].values())[0]
+
+    def predict(self, dataset):
+        if self.model is None:
+            raise ValueError("model is not fitted yet!")
+        x_test = dataset.prepare("test", col_set="feature", data_key=DataHandlerLP.DK_I)
+        return pd.Series(self.model.predict(x_test.values), index=x_test.index)
+
+    def finetune(self, dataset: DatasetH, num_boost_round=10, verbose_eval=20):
+        """
+        finetune model
+
+        Parameters
+        ----------
+        dataset : DatasetH
+            dataset for finetuning
+        num_boost_round : int
+            number of round to finetune model
+        verbose_eval : int
+            verbose level
+        """
+        # Based on existing model and finetune by train more rounds
+        dtrain, _ = self._prepare_data(dataset)
+        self.model = lgb.train(
+            self.params,
+            dtrain,
+            num_boost_round=num_boost_round,
+            init_model=self.model,
+            valid_sets=[dtrain],
+            valid_names=["train"],
+            verbose_eval=verbose_eval,
+        )
diff --git a/qlib/workflow/record_temp.py b/qlib/workflow/record_temp.py
index be458a24d29..c691af9605b 100644
--- a/qlib/workflow/record_temp.py
+++ b/qlib/workflow/record_temp.py
@@ -13,7 +13,7 @@
 from ..utils import init_instance_by_config, get_module_by_module_path
 from ..log import get_module_logger
 from ..utils import flatten_dict
-from ..contrib.eva.alpha import calc_ic, calc_long_short_return
+from ..contrib.eva.alpha import calc_ic, calc_long_short_return, calc_prec
 from ..contrib.strategy.strategy import BaseStrategy
 
 logger = get_module_logger("workflow", "INFO")
@@ -155,7 +155,87 @@ def list(self):
     def load(self, name="pred.pkl"):
         return super().load(name)
 
+class HFSignalRecord(SignalRecord):
+    """
+    This is the Signal Analysis Record class that generates the analysis results such as IC and IR. This class inherits the ``RecordTemp`` class.
+    """
+    artifact_path = "hg_sig_analysis"
+
+    def __init__(self, model=None, dataset=None, recorder=None, **kwargs):
+        super().__init__(recorder=recorder)
+        self.model = model
+        self.dataset = dataset
+
+    def generate(self):
+        pred = self.model.predict(self.dataset)
+                 
+        if isinstance(pred, pd.Series):
+            pred = pred.to_frame("score")
+        self.recorder.save_objects(**{"pred.pkl": pred})
+                 
+        logger.info(
+            f"Signal record 'pred.pkl' has been saved as the artifact of the Experiment {self.recorder.experiment_id}"
+        )
+        # print out results
+        pprint(f"The following are prediction results of the {type(self.model).__name__} model.")
+        pprint(pred.head(5))
+        
+        if isinstance(self.dataset, DatasetH):
+            # NOTE:
+            # Python doesn't provide the downcasting mechanism.
+            # We use the trick here to downcast the class
+            orig_cls = self.dataset.__class__
+            self.dataset.__class__ = DatasetH
 
+            params = dict(segments="test", col_set="label", data_key=DataHandlerLP.DK_R)
+            try:
+                # Assume the backend handler is DataHandlerLP
+                raw_label = self.dataset.prepare(**params)
+            except TypeError:
+                # The argument number is not right
+                del params["data_key"]
+                # The backend handler should be DataHandler
+                raw_label = self.dataset.prepare(**params)
+
+            self.recorder.save_objects(**{"label.pkl": raw_label})
+            self.dataset.__class__ = orig_cls
+                 
+        ic, ric = calc_ic(pred.iloc[:, 0], raw_label.iloc[:, 0])
+        long_pre, short_pre = calc_prec(pred.iloc[:, 0], raw_label.iloc[:, 0], is_alpha = True)
+        metrics = {
+            "IC": ic.mean(),
+            "ICIR": ic.mean() / ic.std(),
+            "Rank IC": ric.mean(),
+            "Rank ICIR": ric.mean() / ric.std(),
+            "Long precision": long_pre.mean(),
+            "Short precision": short_pre.mean()
+        }
+        objects = {"ic.pkl": ic, "ric.pkl": ric}
+        objects.update({"long_pre.pkl": long_pre, "short_pre.pkl": short_pre})
+        long_short_r, long_avg_r = calc_long_short_return(pred.iloc[:, 0], raw_label.iloc[:, 0])
+        metrics.update(
+            {
+                "Long-Short Average Return": long_short_r.mean(),
+                "Long-Short Average Sharpe": long_short_r.mean() / long_short_r.std(),
+            }
+        )
+        objects.update(
+            {
+                "long_short_r.pkl": long_short_r,
+                "long_avg_r.pkl": long_avg_r,
+            }
+        )
+        self.recorder.log_metrics(**metrics)
+        self.recorder.save_objects(**objects, artifact_path=self.get_path())
+        pprint(metrics)
+
+    def list(self):
+        paths = [self.get_path("ic.pkl"), self.get_path("ric.pkl"), self.get_path("long_pre.pkl"), self.get_path("short_pre.pkl")]
+        if self.ana_long_short:
+            paths.extend([self.get_path("long_short_r.pkl"), self.get_path("long_avg_r.pkl")])
+        return paths
+    
+    
 class SigAnaRecord(SignalRecord):
     """
     This is the Signal Analysis Record class that generates the analysis results such as IC and IR. This class inherits the ``RecordTemp`` class.

From 029d4e0d9abfc26a40129073a27f7568aaf5d2ff Mon Sep 17 00:00:00 2001
From: Alex Wang <haouyw2@gmail.com>
Date: Fri, 12 Mar 2021 16:07:32 +0900
Subject: [PATCH 6/7] update sfm and tabnet benchmark

---
 examples/benchmarks/README.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/examples/benchmarks/README.md b/examples/benchmarks/README.md
index c5bfced6dc2..d83562f9d0e 100644
--- a/examples/benchmarks/README.md
+++ b/examples/benchmarks/README.md
@@ -17,6 +17,9 @@ The numbers shown below demonstrate the performance of the entire `workflow` of
 | ALSTM (Yao Qin, et al.) | Alpha360 | 0.0493±0.01 | 0.3778±0.06| 0.0585±0.00 | 0.4606±0.04 | 0.0513±0.03 | 0.6727±0.38| -0.1085±0.02 |
 | GATs (Petar Velickovic, et al.) | Alpha360 | 0.0475±0.00 | 0.3515±0.02| 0.0592±0.00 | 0.4585±0.01 | 0.0876±0.02 | 1.1513±0.27| -0.0795±0.02 |
 | DoubleEnsemble (Chuheng Zhang, et al.) | Alpha360 | 0.0407±0.00| 0.3053±0.00 | 0.0490±0.00 | 0.3840±0.00 | 0.0380±0.02 | 0.5000±0.21 | -0.0984±0.02 |
+| SFM (Liheng Zhang, et al.)| Alpha360 | 0.0314±0.00 | 0.2389±0.02| 0.0407±0.00 | 0.3245±0.01 | 0.0127±0.01 | 0.1440±0.07| -0.1631±0.01 |
+| TabNet (Sercan O. Arik, et al.)| Alpha360 | 0.0192±0.00 | 0.1401±0.00| 0.0291±0.00 | 0.2163±0.00 | -0.0258±0.00 | -0.2961±0.00| -0.1429±0.00 |
+
 ## Alpha158 dataset
 | Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
 |---|---|---|---|---|---|---|---|---|
@@ -32,6 +35,8 @@ The numbers shown below demonstrate the performance of the entire `workflow` of
 | ALSTM (Yao Qin, et al.) | Alpha158 (with selected 20 features) | 0.0385±0.01 | 0.3022±0.06| 0.0478±0.00 | 0.3874±0.04 | 0.0486±0.03 | 0.7141±0.45| -0.1088±0.03 |
 | GATs (Petar Velickovic, et al.) | Alpha158 (with selected 20 features) | 0.0349±0.00 | 0.2511±0.01| 0.0457±0.00 | 0.3537±0.01 | 0.0578±0.02 | 0.8221±0.25| -0.0824±0.02 |
 | DoubleEnsemble (Chuheng Zhang, et al.) | Alpha158 | 0.0544±0.00 | 0.4338±0.01 | 0.0523±0.00 | 0.4257±0.01 | 0.1253±0.01 | 1.4105±0.14 | -0.0902±0.01 |
+| TabNet (Sercan O. Arik, et al.)| Alpha158 | 0.0383±0.00 | 0.3414±0.00| 0.0388±0.00 | 0.3460±0.00 | 0.0226±0.00 | 0.2652±0.00| -0.1072±0.00 |
+
 
 - The selected 20 features are based on the feature importance of a lightgbm-based model.
 - The base model of DoubleEnsemble is LGBM.

From 22a110be430bc444a38af920fb7649f24d9cd044 Mon Sep 17 00:00:00 2001
From: Alex Wang <haouyw2@gmail.com>
Date: Fri, 12 Mar 2021 16:32:15 +0900
Subject: [PATCH 7/7] update tabnet metrics

---
 examples/benchmarks/README.md | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/examples/benchmarks/README.md b/examples/benchmarks/README.md
index d83562f9d0e..d6ae557e65e 100644
--- a/examples/benchmarks/README.md
+++ b/examples/benchmarks/README.md
@@ -18,7 +18,7 @@ The numbers shown below demonstrate the performance of the entire `workflow` of
 | GATs (Petar Velickovic, et al.) | Alpha360 | 0.0475±0.00 | 0.3515±0.02| 0.0592±0.00 | 0.4585±0.01 | 0.0876±0.02 | 1.1513±0.27| -0.0795±0.02 |
 | DoubleEnsemble (Chuheng Zhang, et al.) | Alpha360 | 0.0407±0.00| 0.3053±0.00 | 0.0490±0.00 | 0.3840±0.00 | 0.0380±0.02 | 0.5000±0.21 | -0.0984±0.02 |
 | SFM (Liheng Zhang, et al.)| Alpha360 | 0.0314±0.00 | 0.2389±0.02| 0.0407±0.00 | 0.3245±0.01 | 0.0127±0.01 | 0.1440±0.07| -0.1631±0.01 |
-| TabNet (Sercan O. Arik, et al.)| Alpha360 | 0.0192±0.00 | 0.1401±0.00| 0.0291±0.00 | 0.2163±0.00 | -0.0258±0.00 | -0.2961±0.00| -0.1429±0.00 |
+
 
 ## Alpha158 dataset
 | Model Name | Dataset | IC | ICIR | Rank IC | Rank ICIR | Annualized Return | Information Ratio | Max Drawdown |
@@ -28,7 +28,6 @@ The numbers shown below demonstrate the performance of the entire `workflow` of
 | XGBoost (Tianqi Chen, et al.) | Alpha158 | 0.0481±0.00 | 0.3659±0.00| 0.0495±0.00 | 0.4033±0.00 | 0.1111±0.00 | 1.2915±0.00| -0.0893±0.00 |
 | LightGBM (Guolin Ke, et al.) | Alpha158 | 0.0475±0.00 | 0.3979±0.00| 0.0485±0.00 | 0.4123±0.00 | 0.1143±0.00 | 1.2744±0.00| -0.0800±0.00 |
 | MLP | Alpha158 | 0.0358±0.00 | 0.2738±0.03| 0.0425±0.00 | 0.3221±0.01 | 0.0836±0.02 | 1.0323±0.25| -0.1127±0.02 |
-| TabNet with pretrain (Sercan O. Arikm et al) | Alpha158 | 0.0344±0.00|0.205±0.11|0.0398±0.00 |0.3479±0.01|0.0827±0.02|1.1141±0.32 |-0.0925±0.02  |
 | TFT (Bryan Lim, et al.) | Alpha158 (with selected 20 features) | 0.0343±0.00 | 0.2071±0.02| 0.0107±0.00 | 0.0660±0.02 | 0.0623±0.02 | 0.5818±0.20| -0.1762±0.01 |
 | GRU (Kyunghyun Cho, et al.) | Alpha158 (with selected 20 features) | 0.0311±0.00 | 0.2418±0.04| 0.0425±0.00 | 0.3434±0.02 | 0.0330±0.02 | 0.4805±0.30| -0.1021±0.02 |
 | LSTM (Sepp Hochreiter, et al.) | Alpha158 (with selected 20 features) | 0.0312±0.00 | 0.2394±0.04| 0.0418±0.00 | 0.3324±0.03 | 0.0298±0.02 | 0.4198±0.33| -0.1348±0.03 |
@@ -37,6 +36,5 @@ The numbers shown below demonstrate the performance of the entire `workflow` of
 | DoubleEnsemble (Chuheng Zhang, et al.) | Alpha158 | 0.0544±0.00 | 0.4338±0.01 | 0.0523±0.00 | 0.4257±0.01 | 0.1253±0.01 | 1.4105±0.14 | -0.0902±0.01 |
 | TabNet (Sercan O. Arik, et al.)| Alpha158 | 0.0383±0.00 | 0.3414±0.00| 0.0388±0.00 | 0.3460±0.00 | 0.0226±0.00 | 0.2652±0.00| -0.1072±0.00 |
 
-
 - The selected 20 features are based on the feature importance of a lightgbm-based model.
-- The base model of DoubleEnsemble is LGBM.
+- The base model of DoubleEnsemble is LGBM.
\ No newline at end of file