From 7dc19a8201780c7b3e9d0f6d9ca3f68809d9343a Mon Sep 17 00:00:00 2001
From: zhupr <zhu.pengrong@foxmail.com>
Date: Wed, 29 Sep 2021 12:30:31 +0800
Subject: [PATCH 1/7] modify the example of multi-freq

---
 examples/benchmarks/LightGBM/average_ops.py   |  37 ++++++
 .../LightGBM/features_resample_N.py           |  15 +++
 .../benchmarks/LightGBM/multi_freq_handler.py | 120 ++++++++++++++++++
 ...w_config_lightgbm_Alpha158_multi_freq.yaml |  32 ++---
 qlib/workflow/record_temp.py                  |   6 +-
 5 files changed, 193 insertions(+), 17 deletions(-)
 create mode 100644 examples/benchmarks/LightGBM/average_ops.py
 create mode 100644 examples/benchmarks/LightGBM/features_resample_N.py
 create mode 100644 examples/benchmarks/LightGBM/multi_freq_handler.py

diff --git a/examples/benchmarks/LightGBM/average_ops.py b/examples/benchmarks/LightGBM/average_ops.py
new file mode 100644
index 00000000000..6de7710338b
--- /dev/null
+++ b/examples/benchmarks/LightGBM/average_ops.py
@@ -0,0 +1,37 @@
+import math
+import numpy as np
+
+from qlib.data.ops import ElemOperator
+
+
+class Avg(ElemOperator):
+    MINUTES = 240
+
+    def __init__(self, feature, start_index, end_index, func="nanmean"):
+        assert start_index < end_index, "Avg in end_index must be greater than start_index"
+        self.feature = feature
+        self.s_i = start_index
+        self.e_i = end_index
+        self.func = func
+        self.min_periods = 1 if self.func == "nanmean" else self.e_i - self.s_i
+        super().__init__(feature)
+
+    def _load_internal(self, instrument, start_index, end_index, freq):
+        series = self.feature.load(instrument, start_index, end_index, freq)
+        if series.empty:
+            return series
+        start_index = math.ceil(series.index[0] / self.MINUTES) * self.MINUTES
+        res = series.rolling(self.e_i - self.s_i, min_periods=self.min_periods).mean()
+        mask = []
+        while start_index <= series.index[-1]:
+            mask.append(start_index + self.e_i - 1)
+            start_index += self.MINUTES
+        res.loc[~series.index.isin(mask)] = np.nan
+        return res
+
+    def get_extended_window_size(self):
+        lft_etd, rght_etd = self.feature.get_extended_window_size()
+        return lft_etd + self.MINUTES, rght_etd + self.MINUTES
+
+    def __str__(self):
+        return "{}({},{},{},{})".format(type(self).__name__, self.feature, self.s_i, self.e_i, self.func)
diff --git a/examples/benchmarks/LightGBM/features_resample_N.py b/examples/benchmarks/LightGBM/features_resample_N.py
new file mode 100644
index 00000000000..8496ab00f09
--- /dev/null
+++ b/examples/benchmarks/LightGBM/features_resample_N.py
@@ -0,0 +1,15 @@
+import pandas as pd
+
+from qlib.data.inst_processor import InstProcessor
+from qlib.utils.resam import resam_calendar
+
+
+class ResampleNProcessor(InstProcessor):
+    def __init__(self, target_frq: str, **kwargs):
+        self.target_frq = target_frq
+
+    def __call__(self, df: pd.DataFrame, *args, **kwargs):
+        df.index = pd.to_datetime(df.index)
+        res_index = resam_calendar(df.index, "1min", self.target_frq)
+        df = df.resample(self.target_frq).last().reindex(res_index)
+        return df
diff --git a/examples/benchmarks/LightGBM/multi_freq_handler.py b/examples/benchmarks/LightGBM/multi_freq_handler.py
new file mode 100644
index 00000000000..33a486e7e13
--- /dev/null
+++ b/examples/benchmarks/LightGBM/multi_freq_handler.py
@@ -0,0 +1,120 @@
+import warnings
+from pathlib import Path
+from typing import Union
+import pandas as pd
+
+from qlib.data import D
+from qlib.data.dataset.loader import QlibDataLoader
+from qlib.contrib.data.handler import DataHandlerLP, _DEFAULT_LEARN_PROCESSORS, check_transform_proc
+
+
+class MultiFreqLoader(QlibDataLoader):
+    def load_group_df(
+        self,
+        instruments,
+        exprs: list,
+        names: list,
+        start_time: Union[str, pd.Timestamp] = None,
+        end_time: Union[str, pd.Timestamp] = None,
+        gp_name: str = None,
+    ) -> pd.DataFrame:
+        if instruments is None:
+            warnings.warn("`instruments` is not set, will load all stocks")
+            instruments = "all"
+        if isinstance(instruments, str):
+            instruments = D.instruments(instruments, filter_pipe=self.filter_pipe)
+        elif self.filter_pipe is not None:
+            warnings.warn("`filter_pipe` is not None, but it will not be used with `instruments` as list")
+
+        if gp_name == "feature":
+            # freq == day
+            _exps = list(filter(lambda x: not x.startswith("Avg"), exprs))
+            _day_df = D.features(instruments, _exps, start_time, end_time, freq="day")
+            _day_df.columns = list(filter(lambda x: int("".join(filter(str.isdigit, x))) == 0, names))
+            # freq == 1min
+            _exps = list(filter(lambda x: x.startswith("Avg"), exprs))
+            _min_df = D.features(
+                instruments,
+                _exps,
+                start_time,
+                end_time,
+                freq="1min",
+                inst_processors=self.inst_processor.get("feature", []),
+            )
+            _min_df.columns = list(filter(lambda x: int("".join(filter(str.isdigit, x))) > 0, names))
+            df = pd.concat([_day_df, _min_df], axis=1, sort=False)
+        elif gp_name == "label":
+            freq = self.freq[gp_name] if isinstance(self.freq, dict) else self.freq
+            df = D.features(
+                instruments,
+                exprs,
+                start_time,
+                end_time,
+                freq=freq,
+                inst_processors=self.inst_processor.get(gp_name, []),
+            )
+            df.columns = names
+        else:
+            raise ValueError(f"not support")
+
+        if self.swap_level:
+            df = df.swaplevel().sort_index()  # NOTE: if swaplevel, return <datetime, instrument>
+        return df
+
+
+class Avg15minHandler(DataHandlerLP):
+    def __init__(
+        self,
+        instruments="csi500",
+        start_time=None,
+        end_time=None,
+        freq="day",
+        infer_processors=[],
+        learn_processors=_DEFAULT_LEARN_PROCESSORS,
+        fit_start_time=None,
+        fit_end_time=None,
+        process_type=DataHandlerLP.PTYPE_A,
+        filter_pipe=None,
+        inst_processor=None,
+        **kwargs,
+    ):
+        infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
+        learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
+
+        data_loader = {
+            "class": "MultiFreqLoader",
+            "module_path": str(Path(__file__).resolve()),
+            "kwargs": {
+                "config": {
+                    "feature": self.get_feature_config(),
+                    "label": kwargs.get("label", self.get_label_config()),
+                },
+                "filter_pipe": filter_pipe,
+                "freq": freq,
+                "inst_processor": inst_processor,
+            },
+        }
+        super().__init__(
+            instruments=instruments,
+            start_time=start_time,
+            end_time=end_time,
+            data_loader=data_loader,
+            infer_processors=infer_processors,
+            learn_processors=learn_processors,
+            process_type=process_type,
+        )
+
+    def get_feature_config(self):
+        fields = ["$close", "$open", "$low", "$high", "$volume", "$vwap"]
+        names = list(map(lambda x: x.strip("$") + "0", fields))
+        tmp_fields = []
+        tmp_names = []
+        for i, _f in enumerate(fields):
+            _fields = [f"Avg({_f}, {15 * j}, {15 * j + 15}, 'nanmean')" for j in range(0, 240 // 15)]
+            _names = [f"{names[i][:-1]}{int(names[i][-1])+j}" for j in range(1, 240 // 15 + 1)]
+            tmp_fields += _fields
+            tmp_names += _names
+        return fields + tmp_fields, names + tmp_names
+
+    def get_label_config(self):
+        return (["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"])
diff --git a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_multi_freq.yaml b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_multi_freq.yaml
index fcb3dd367ac..b19dfb8083b 100644
--- a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_multi_freq.yaml
+++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_multi_freq.yaml
@@ -4,7 +4,10 @@ qlib_init:
         1min: "~/.qlib/qlib_data/cn_data_1min"
     region: cn
     dataset_cache: null
-    maxtasksperchild: 1
+    maxtasksperchild: null
+    custom_ops:
+        - class: Avg
+          module_path: average_ops.py
 market: &market csi300
 benchmark: &benchmark SH000300
 data_handler_config: &data_handler_config
@@ -20,11 +23,10 @@ data_handler_config: &data_handler_config
     # with label as reference
     inst_processor:
         feature:
-            - class: Resample1minProcessor
-              module_path: features_sample.py
+            - class: ResampleNProcessor
+              module_path: features_resample_N.py
               kwargs:
-                  hour: 14
-                  minute: 56
+                  target_frq: 1d
 
 port_analysis_config: &port_analysis_config
     strategy:
@@ -62,25 +64,25 @@ task:
         module_path: qlib.data.dataset
         kwargs:
             handler:
-                class: Alpha158
-                module_path: qlib.contrib.data.handler
+                class: Avg15minHandler
+                module_path: multi_freq_handler.py
                 kwargs: *data_handler_config
             segments:
                 train: [2008-01-01, 2014-12-31]
                 valid: [2015-01-01, 2016-12-31]
                 test: [2017-01-01, 2020-08-01]
-    record: 
+    record:
         - class: SignalRecord
-        module_path: qlib.workflow.record_temp
-        kwargs: 
+          module_path: qlib.workflow.record_temp
+          kwargs:
             model: <MODEL>
             dataset: <DATASET>
         - class: SigAnaRecord
-        module_path: qlib.workflow.record_temp
-        kwargs: 
+          module_path: qlib.workflow.record_temp
+          kwargs:
             ana_long_short: False
             ann_scaler: 252
         - class: PortAnaRecord
-        module_path: qlib.workflow.record_temp
-        kwargs: 
-            config: *port_analysis_config
\ No newline at end of file
+          module_path: qlib.workflow.record_temp
+          kwargs:
+            config: *port_analysis_config
diff --git a/qlib/workflow/record_temp.py b/qlib/workflow/record_temp.py
index 98615fba2be..8b290a5ffe0 100644
--- a/qlib/workflow/record_temp.py
+++ b/qlib/workflow/record_temp.py
@@ -88,7 +88,7 @@ def load(self, name):
         obj = self.recorder.load_object(name)
         return obj
 
-    def list():
+    def list(self):
         """
         List the supported artifacts.
 
@@ -98,7 +98,7 @@ def list():
         """
         return []
 
-    def check(self, cls=self):
+    def check(self, cls=None):
         """
         Check if the records is properly generated and saved.
 
@@ -106,6 +106,8 @@ def check(self, cls=self):
         ------
         FileExistsError: whether the records are stored properly.
         """
+        if cls is None:
+            cls = self
         artifacts = set(self.recorder.list_artifacts())
         flist = cls.list()
         for item in flist:

From 67d3b7fe4322ad968df96fceb026e829d2c9913b Mon Sep 17 00:00:00 2001
From: zhupr <zhu.pengrong@foxmail.com>
Date: Wed, 29 Sep 2021 12:41:46 +0800
Subject: [PATCH 2/7] add Copyright

---
 examples/benchmarks/LightGBM/average_ops.py         | 3 +++
 examples/benchmarks/LightGBM/features_resample_N.py | 3 +++
 examples/benchmarks/LightGBM/features_sample.py     | 3 +++
 examples/benchmarks/LightGBM/multi_freq_handler.py  | 3 +++
 4 files changed, 12 insertions(+)

diff --git a/examples/benchmarks/LightGBM/average_ops.py b/examples/benchmarks/LightGBM/average_ops.py
index 6de7710338b..a67976fb677 100644
--- a/examples/benchmarks/LightGBM/average_ops.py
+++ b/examples/benchmarks/LightGBM/average_ops.py
@@ -1,3 +1,6 @@
+#  Copyright (c) Microsoft Corporation.
+#  Licensed under the MIT License.
+
 import math
 import numpy as np
 
diff --git a/examples/benchmarks/LightGBM/features_resample_N.py b/examples/benchmarks/LightGBM/features_resample_N.py
index 8496ab00f09..13061513cb6 100644
--- a/examples/benchmarks/LightGBM/features_resample_N.py
+++ b/examples/benchmarks/LightGBM/features_resample_N.py
@@ -1,3 +1,6 @@
+#  Copyright (c) Microsoft Corporation.
+#  Licensed under the MIT License.
+
 import pandas as pd
 
 from qlib.data.inst_processor import InstProcessor
diff --git a/examples/benchmarks/LightGBM/features_sample.py b/examples/benchmarks/LightGBM/features_sample.py
index 0b996bd1f98..4cf9121dda5 100644
--- a/examples/benchmarks/LightGBM/features_sample.py
+++ b/examples/benchmarks/LightGBM/features_sample.py
@@ -1,3 +1,6 @@
+#  Copyright (c) Microsoft Corporation.
+#  Licensed under the MIT License.
+
 import datetime
 import pandas as pd
 
diff --git a/examples/benchmarks/LightGBM/multi_freq_handler.py b/examples/benchmarks/LightGBM/multi_freq_handler.py
index 33a486e7e13..13e24d9283e 100644
--- a/examples/benchmarks/LightGBM/multi_freq_handler.py
+++ b/examples/benchmarks/LightGBM/multi_freq_handler.py
@@ -1,3 +1,6 @@
+#  Copyright (c) Microsoft Corporation.
+#  Licensed under the MIT License.
+
 import warnings
 from pathlib import Path
 from typing import Union

From 5c0af31c6387fff45a33330dcad08e0a2eeed9d8 Mon Sep 17 00:00:00 2001
From: zhupr <zhu.pengrong@foxmail.com>
Date: Wed, 29 Sep 2021 13:06:47 +0800
Subject: [PATCH 3/7] add a comment to average_ops.py

---
 examples/benchmarks/LightGBM/average_ops.py   | 27 +++++++++++++++++++
 .../benchmarks/LightGBM/multi_freq_handler.py |  2 +-
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/examples/benchmarks/LightGBM/average_ops.py b/examples/benchmarks/LightGBM/average_ops.py
index a67976fb677..18813a4ed2f 100644
--- a/examples/benchmarks/LightGBM/average_ops.py
+++ b/examples/benchmarks/LightGBM/average_ops.py
@@ -8,6 +8,33 @@
 
 
 class Avg(ElemOperator):
+    """On the 1min data, calculate the mean value of the specified range within the day
+
+    Parameters
+    ----------
+    feature : Expression
+        feature instance
+    start_index: int
+        start index, [0, 239)
+    end_index: int
+        end index, [1, 240]
+    func: str
+        value from ["nanmean", "mean"], same as "np.nanmean" or "np.mean", by default "nanmean"
+    Notes
+    ------
+        start_index < end_index
+    Examples
+    ------
+        close = [0, 1, 2, 3, 4, 5]
+        Avg($close, 0, 2) == [np.nan, 0.5, np.nan, np.nan, np.nan, np.nan]
+        Avg($close, 2, 4) == [np.nan, np.nan, np.nan, 2.5, np.nan, np.nan]
+
+    Returns
+    ----------
+    Expression
+        The data for each trading day is: data[end_index-1] = data[start_index: end_index]).mean()
+    """
+
     MINUTES = 240
 
     def __init__(self, feature, start_index, end_index, func="nanmean"):
diff --git a/examples/benchmarks/LightGBM/multi_freq_handler.py b/examples/benchmarks/LightGBM/multi_freq_handler.py
index 13e24d9283e..f62d24e06ed 100644
--- a/examples/benchmarks/LightGBM/multi_freq_handler.py
+++ b/examples/benchmarks/LightGBM/multi_freq_handler.py
@@ -58,7 +58,7 @@ def load_group_df(
             )
             df.columns = names
         else:
-            raise ValueError(f"not support")
+            raise ValueError(f"Unsupported gp_name: {gp_name}")
 
         if self.swap_level:
             df = df.swaplevel().sort_index()  # NOTE: if swaplevel, return <datetime, instrument>

From 57152f0f10b93cb2410d682ecb7d6a97f48c0562 Mon Sep 17 00:00:00 2001
From: zhupr <zhu.pengrong@foxmail.com>
Date: Thu, 30 Sep 2021 15:22:01 +0800
Subject: [PATCH 4/7] modify the example of multi-freq

---
 examples/benchmarks/LightGBM/average_ops.py   | 67 -------------
 .../benchmarks/LightGBM/features_sample.py    | 19 ----
 .../benchmarks/LightGBM/multi_freq_handler.py | 98 +++++--------------
 ...w_config_lightgbm_Alpha158_multi_freq.yaml |  8 +-
 4 files changed, 25 insertions(+), 167 deletions(-)
 delete mode 100644 examples/benchmarks/LightGBM/average_ops.py
 delete mode 100644 examples/benchmarks/LightGBM/features_sample.py

diff --git a/examples/benchmarks/LightGBM/average_ops.py b/examples/benchmarks/LightGBM/average_ops.py
deleted file mode 100644
index 18813a4ed2f..00000000000
--- a/examples/benchmarks/LightGBM/average_ops.py
+++ /dev/null
@@ -1,67 +0,0 @@
-#  Copyright (c) Microsoft Corporation.
-#  Licensed under the MIT License.
-
-import math
-import numpy as np
-
-from qlib.data.ops import ElemOperator
-
-
-class Avg(ElemOperator):
-    """On the 1min data, calculate the mean value of the specified range within the day
-
-    Parameters
-    ----------
-    feature : Expression
-        feature instance
-    start_index: int
-        start index, [0, 239)
-    end_index: int
-        end index, [1, 240]
-    func: str
-        value from ["nanmean", "mean"], same as "np.nanmean" or "np.mean", by default "nanmean"
-    Notes
-    ------
-        start_index < end_index
-    Examples
-    ------
-        close = [0, 1, 2, 3, 4, 5]
-        Avg($close, 0, 2) == [np.nan, 0.5, np.nan, np.nan, np.nan, np.nan]
-        Avg($close, 2, 4) == [np.nan, np.nan, np.nan, 2.5, np.nan, np.nan]
-
-    Returns
-    ----------
-    Expression
-        The data for each trading day is: data[end_index-1] = data[start_index: end_index]).mean()
-    """
-
-    MINUTES = 240
-
-    def __init__(self, feature, start_index, end_index, func="nanmean"):
-        assert start_index < end_index, "Avg in end_index must be greater than start_index"
-        self.feature = feature
-        self.s_i = start_index
-        self.e_i = end_index
-        self.func = func
-        self.min_periods = 1 if self.func == "nanmean" else self.e_i - self.s_i
-        super().__init__(feature)
-
-    def _load_internal(self, instrument, start_index, end_index, freq):
-        series = self.feature.load(instrument, start_index, end_index, freq)
-        if series.empty:
-            return series
-        start_index = math.ceil(series.index[0] / self.MINUTES) * self.MINUTES
-        res = series.rolling(self.e_i - self.s_i, min_periods=self.min_periods).mean()
-        mask = []
-        while start_index <= series.index[-1]:
-            mask.append(start_index + self.e_i - 1)
-            start_index += self.MINUTES
-        res.loc[~series.index.isin(mask)] = np.nan
-        return res
-
-    def get_extended_window_size(self):
-        lft_etd, rght_etd = self.feature.get_extended_window_size()
-        return lft_etd + self.MINUTES, rght_etd + self.MINUTES
-
-    def __str__(self):
-        return "{}({},{},{},{})".format(type(self).__name__, self.feature, self.s_i, self.e_i, self.func)
diff --git a/examples/benchmarks/LightGBM/features_sample.py b/examples/benchmarks/LightGBM/features_sample.py
deleted file mode 100644
index 4cf9121dda5..00000000000
--- a/examples/benchmarks/LightGBM/features_sample.py
+++ /dev/null
@@ -1,19 +0,0 @@
-#  Copyright (c) Microsoft Corporation.
-#  Licensed under the MIT License.
-
-import datetime
-import pandas as pd
-
-from qlib.data.inst_processor import InstProcessor
-
-
-class Resample1minProcessor(InstProcessor):
-    def __init__(self, hour: int, minute: int, **kwargs):
-        self.hour = hour
-        self.minute = minute
-
-    def __call__(self, df: pd.DataFrame, *args, **kwargs):
-        df.index = pd.to_datetime(df.index)
-        df = df.loc[df.index.time == datetime.time(self.hour, self.minute)]
-        df.index = df.index.normalize()
-        return df
diff --git a/examples/benchmarks/LightGBM/multi_freq_handler.py b/examples/benchmarks/LightGBM/multi_freq_handler.py
index f62d24e06ed..08956aac8fb 100644
--- a/examples/benchmarks/LightGBM/multi_freq_handler.py
+++ b/examples/benchmarks/LightGBM/multi_freq_handler.py
@@ -1,67 +1,17 @@
 #  Copyright (c) Microsoft Corporation.
 #  Licensed under the MIT License.
 
-import warnings
-from pathlib import Path
-from typing import Union
 import pandas as pd
 
-from qlib.data import D
 from qlib.data.dataset.loader import QlibDataLoader
 from qlib.contrib.data.handler import DataHandlerLP, _DEFAULT_LEARN_PROCESSORS, check_transform_proc
 
 
-class MultiFreqLoader(QlibDataLoader):
-    def load_group_df(
-        self,
-        instruments,
-        exprs: list,
-        names: list,
-        start_time: Union[str, pd.Timestamp] = None,
-        end_time: Union[str, pd.Timestamp] = None,
-        gp_name: str = None,
-    ) -> pd.DataFrame:
-        if instruments is None:
-            warnings.warn("`instruments` is not set, will load all stocks")
-            instruments = "all"
-        if isinstance(instruments, str):
-            instruments = D.instruments(instruments, filter_pipe=self.filter_pipe)
-        elif self.filter_pipe is not None:
-            warnings.warn("`filter_pipe` is not None, but it will not be used with `instruments` as list")
-
-        if gp_name == "feature":
-            # freq == day
-            _exps = list(filter(lambda x: not x.startswith("Avg"), exprs))
-            _day_df = D.features(instruments, _exps, start_time, end_time, freq="day")
-            _day_df.columns = list(filter(lambda x: int("".join(filter(str.isdigit, x))) == 0, names))
-            # freq == 1min
-            _exps = list(filter(lambda x: x.startswith("Avg"), exprs))
-            _min_df = D.features(
-                instruments,
-                _exps,
-                start_time,
-                end_time,
-                freq="1min",
-                inst_processors=self.inst_processor.get("feature", []),
-            )
-            _min_df.columns = list(filter(lambda x: int("".join(filter(str.isdigit, x))) > 0, names))
-            df = pd.concat([_day_df, _min_df], axis=1, sort=False)
-        elif gp_name == "label":
-            freq = self.freq[gp_name] if isinstance(self.freq, dict) else self.freq
-            df = D.features(
-                instruments,
-                exprs,
-                start_time,
-                end_time,
-                freq=freq,
-                inst_processors=self.inst_processor.get(gp_name, []),
-            )
-            df.columns = names
-        else:
-            raise ValueError(f"Unsupported gp_name: {gp_name}")
-
-        if self.swap_level:
-            df = df.swaplevel().sort_index()  # NOTE: if swaplevel, return <datetime, instrument>
+class Avg15minLoader(QlibDataLoader):
+    def load(self, instruments=None, start_time=None, end_time=None) -> pd.DataFrame:
+        df = super(Avg15minLoader, self).load(instruments, start_time, end_time)
+        if self.is_group:
+            df.columns = df.columns.map(lambda x: ("feature", x[1]) if x[0].startswith("feature") else x)
         return df
 
 
@@ -83,20 +33,9 @@ def __init__(
     ):
         infer_processors = check_transform_proc(infer_processors, fit_start_time, fit_end_time)
         learn_processors = check_transform_proc(learn_processors, fit_start_time, fit_end_time)
-
-        data_loader = {
-            "class": "MultiFreqLoader",
-            "module_path": str(Path(__file__).resolve()),
-            "kwargs": {
-                "config": {
-                    "feature": self.get_feature_config(),
-                    "label": kwargs.get("label", self.get_label_config()),
-                },
-                "filter_pipe": filter_pipe,
-                "freq": freq,
-                "inst_processor": inst_processor,
-            },
-        }
+        data_loader = Avg15minLoader(
+            config=self.loader_config(), filter_pipe=filter_pipe, freq=freq, inst_processor=inst_processor
+        )
         super().__init__(
             instruments=instruments,
             start_time=start_time,
@@ -107,17 +46,24 @@ def __init__(
             process_type=process_type,
         )
 
-    def get_feature_config(self):
+    def loader_config(self):
         fields = ["$close", "$open", "$low", "$high", "$volume", "$vwap"]
         names = list(map(lambda x: x.strip("$") + "0", fields))
+
+        config = {"feature_day": (fields, names)}
+        # features day
+        # features 15min
         tmp_fields = []
         tmp_names = []
+        # Ref(Mean($close, 15), 0), Ref(Mean($close, 15), 14)
         for i, _f in enumerate(fields):
-            _fields = [f"Avg({_f}, {15 * j}, {15 * j + 15}, 'nanmean')" for j in range(0, 240 // 15)]
-            _names = [f"{names[i][:-1]}{int(names[i][-1])+j}" for j in range(1, 240 // 15 + 1)]
+            _fields = [f"Ref(Mean({_f}, 15), {j * 15})" for j in range(1, 240 // 15)]
+            _names = [f"{names[i][:-1]}{int(names[i][-1])+j}" for j in range(240 // 15 - 1, 0, -1)]
+            _fields.append(f"Mean({_f}, 15)")
+            _names.append(f"{names[i][:-1]}{int(names[i][-1])+240 // 15}")
             tmp_fields += _fields
             tmp_names += _names
-        return fields + tmp_fields, names + tmp_names
-
-    def get_label_config(self):
-        return (["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"])
+        config["feature_15min"] = (tmp_fields, tmp_names)
+        # label
+        config["label"] = (["Ref($close, -2)/Ref($close, -1) - 1"], ["LABEL0"])
+        return config
diff --git a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_multi_freq.yaml b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_multi_freq.yaml
index b19dfb8083b..829c8711594 100644
--- a/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_multi_freq.yaml
+++ b/examples/benchmarks/LightGBM/workflow_config_lightgbm_Alpha158_multi_freq.yaml
@@ -5,9 +5,6 @@ qlib_init:
     region: cn
     dataset_cache: null
     maxtasksperchild: null
-    custom_ops:
-        - class: Avg
-          module_path: average_ops.py
 market: &market csi300
 benchmark: &benchmark SH000300
 data_handler_config: &data_handler_config
@@ -19,10 +16,11 @@ data_handler_config: &data_handler_config
     instruments: *market
     freq:
         label: day
-        feature: 1min
+        feature_15min: 1min
+        feature_day: day
     # with label as reference
     inst_processor:
-        feature:
+        feature_15min:
             - class: ResampleNProcessor
               module_path: features_resample_N.py
               kwargs:

From eeaacfbed0265c591a1f12536a8a80406ca370c9 Mon Sep 17 00:00:00 2001
From: zhupr <zhu.pengrong@foxmail.com>
Date: Thu, 30 Sep 2021 16:10:17 +0800
Subject: [PATCH 5/7] add comment to multi_freq_handler.py

---
 .../benchmarks/LightGBM/multi_freq_handler.py | 48 +++++++++++++++++--
 1 file changed, 45 insertions(+), 3 deletions(-)

diff --git a/examples/benchmarks/LightGBM/multi_freq_handler.py b/examples/benchmarks/LightGBM/multi_freq_handler.py
index 08956aac8fb..39667a53ba8 100644
--- a/examples/benchmarks/LightGBM/multi_freq_handler.py
+++ b/examples/benchmarks/LightGBM/multi_freq_handler.py
@@ -11,6 +11,7 @@ class Avg15minLoader(QlibDataLoader):
     def load(self, instruments=None, start_time=None, end_time=None) -> pd.DataFrame:
         df = super(Avg15minLoader, self).load(instruments, start_time, end_time)
         if self.is_group:
+            # feature_day(day freq) and feature_15min(1min freq, Average every 15 minutes) renamed feature
             df.columns = df.columns.map(lambda x: ("feature", x[1]) if x[0].startswith("feature") else x)
         return df
 
@@ -47,15 +48,56 @@ def __init__(
         )
 
     def loader_config(self):
+
+        # Results for dataset: df: pd.DataFrame
+        #   len(df.columns) == 6 + 6 * 16, len(df.index.get_level_values(level="datetime").unique()) == T
+        #   df.columns: close0, close1, ..., close16, open0, ..., open16, ..., vwap16
+        #       freq == day:
+        #           close0, open0, low0, high0, volume0, vwap0
+        #       freq == 1min:
+        #           close1, ..., close16, ..., vwap1, ..., vwap16
+        #   df.index.name == ["datetime", "instrument"]: pd.MultiIndex
+        # Example:
+        #                          feature                        ...                  label
+        #                           close0      open0       low0  ... vwap1 vwap16    LABEL0
+        # datetime   instrument                                   ...
+        # 2020-10-09 SH600000    11.794546  11.819587  11.769505  ...   NaN    NaN -0.005214
+        # 2020-10-15 SH600000    12.044961  11.944795  11.932274  ...   NaN    NaN -0.007202
+        # ...                          ...        ...        ...  ...   ...    ...       ...
+        # 2021-05-28 SZ300676     6.369684   6.495406   6.306568  ...   NaN    NaN -0.001321
+        # 2021-05-31 SZ300676     6.601626   6.465643   6.465130  ...   NaN    NaN -0.023428
+
+        # features day: len(columns) == 6
         fields = ["$close", "$open", "$low", "$high", "$volume", "$vwap"]
+        # names: close0, open0, ..., vwap0
         names = list(map(lambda x: x.strip("$") + "0", fields))
 
         config = {"feature_day": (fields, names)}
-        # features day
-        # features 15min
+
+        # features 15min: len(columns) == 6 * 16
+        #   time:   09:00 --> 09:14,            ..., 14:45 --> 14:59
+        #   fields: Ref(Mean($close, 15), 225), ..., Mean($close, 15)
+        #   name:   close1,                     ..., close16
+
+        # Expression description: take close as an example
+        #   Mean($close, 15) ==> df["$close"].rolling(15, min_periods=1).mean()
+        #   Ref(Mean($close, 15), 15) ==> df["$close"].rolling(15, min_periods=1).mean().shift(15)
+
+        #   NOTE: The last data of each trading day, which is the average of the i-th 15 minutes
+
+        # Average:
+        #   Average of the i-th 15-minute period of each trading day: 1 <= i <= 250 // 16
+        #       Avg(15minutes): Ref(Mean($close, 15), 240 - i * 15)
+        #
+        #   Average of the first 15 minutes of each trading day; i = 1
+        #       Avg(09:00 --> 09:14), df.index.loc["09:14"]: Ref(Mean($close, 15), 240- 1 * 15) ==> Ref(Mean($close, 15), 225)
+        #   Average of the last 15 minutes of each trading day; i = 16
+        #       Avg(14:45 --> 14:59), df.index.loc["14:59"]: Ref(Mean($close, 15), 240 - 16 * 15) ==> Ref(Mean($close, 15), 0) ==> Mean($close, 15)
+
+        # 15min resample to day
+        #   df.resample("1d").last()
         tmp_fields = []
         tmp_names = []
-        # Ref(Mean($close, 15), 0), Ref(Mean($close, 15), 14)
         for i, _f in enumerate(fields):
             _fields = [f"Ref(Mean({_f}, 15), {j * 15})" for j in range(1, 240 // 15)]
             _names = [f"{names[i][:-1]}{int(names[i][-1])+j}" for j in range(240 // 15 - 1, 0, -1)]

From 23b0320f7291765c0cfdd920103bc4fa14f4dac2 Mon Sep 17 00:00:00 2001
From: zhupr <zhu.pengrong@foxmail.com>
Date: Thu, 30 Sep 2021 21:50:14 +0800
Subject: [PATCH 6/7] add the Ref expression description to
 multi_freq_handler.py

---
 examples/benchmarks/LightGBM/multi_freq_handler.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/examples/benchmarks/LightGBM/multi_freq_handler.py b/examples/benchmarks/LightGBM/multi_freq_handler.py
index 39667a53ba8..eec629b5551 100644
--- a/examples/benchmarks/LightGBM/multi_freq_handler.py
+++ b/examples/benchmarks/LightGBM/multi_freq_handler.py
@@ -68,6 +68,15 @@ def loader_config(self):
         # 2021-05-31 SZ300676     6.601626   6.465643   6.465130  ...   NaN    NaN -0.023428
 
         # features day: len(columns) == 6
+        # $close is the closing price of the current trading day：
+        #   if the user needs to get the `close` before the last T days, use Ref($close, T-1), for example:
+        #                                    $close  Ref($close, 1)  Ref($close, 2)  Ref($close, 3)  Ref($close, 4)
+        #         instrument datetime
+        #         SH600519   2021-06-01  244.271530
+        #                    2021-06-02  242.205917      244.271530
+        #                    2021-06-03  242.229889      242.205917      244.271530
+        #                    2021-06-04  245.421524      242.229889      242.205917      244.271530
+        #                    2021-06-07  247.547089      245.421524      242.229889      242.205917      244.271530
         fields = ["$close", "$open", "$low", "$high", "$volume", "$vwap"]
         # names: close0, open0, ..., vwap0
         names = list(map(lambda x: x.strip("$") + "0", fields))

From ac1e09982da9208581c736c051ce84516ddbd167 Mon Sep 17 00:00:00 2001
From: zhupr <zhu.pengrong@foxmail.com>
Date: Thu, 30 Sep 2021 23:07:23 +0800
Subject: [PATCH 7/7] add expression description to multi_freq_handler.py

---
 .../benchmarks/LightGBM/multi_freq_handler.py | 21 ++++++++++++++++---
 qlib/data/data.py                             |  2 +-
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/examples/benchmarks/LightGBM/multi_freq_handler.py b/examples/benchmarks/LightGBM/multi_freq_handler.py
index eec629b5551..07d7ac27c41 100644
--- a/examples/benchmarks/LightGBM/multi_freq_handler.py
+++ b/examples/benchmarks/LightGBM/multi_freq_handler.py
@@ -67,8 +67,8 @@ def loader_config(self):
         # 2021-05-28 SZ300676     6.369684   6.495406   6.306568  ...   NaN    NaN -0.001321
         # 2021-05-31 SZ300676     6.601626   6.465643   6.465130  ...   NaN    NaN -0.023428
 
-        # features day: len(columns) == 6
-        # $close is the closing price of the current trading day：
+        # features day: len(columns) == 6, freq = day
+        # $close is the closing price of the current trading day:
         #   if the user needs to get the `close` before the last T days, use Ref($close, T-1), for example:
         #                                    $close  Ref($close, 1)  Ref($close, 2)  Ref($close, 3)  Ref($close, 4)
         #         instrument datetime
@@ -77,16 +77,31 @@ def loader_config(self):
         #                    2021-06-03  242.229889      242.205917      244.271530
         #                    2021-06-04  245.421524      242.229889      242.205917      244.271530
         #                    2021-06-07  247.547089      245.421524      242.229889      242.205917      244.271530
+
+        # WARNING: Ref($close, N), if N == 0, Ref($close, N) ==> $close
+
         fields = ["$close", "$open", "$low", "$high", "$volume", "$vwap"]
         # names: close0, open0, ..., vwap0
         names = list(map(lambda x: x.strip("$") + "0", fields))
 
         config = {"feature_day": (fields, names)}
 
-        # features 15min: len(columns) == 6 * 16
+        # features 15min: len(columns) == 6 * 16, freq = 1min
+        #   $close is the closing price of the current trading day:
+        #       if the user gets 'close' for the i-th 15min of the last T days, use `Ref(Mean($close, 15), (T-1) * 240 + i * 15)`, for example:
+        #                                    Ref(Mean($close, 15), 225)  Ref(Mean($close, 15), 465)  Ref(Mean($close, 15), 705)
+        #             instrument datetime
+        #             SH600519   2021-05-31                  241.769897                  243.077942                  244.712997
+        #                        2021-06-01                  244.271530                  241.769897                  243.077942
+        #                        2021-06-02                  242.205917                  244.271530                  241.769897
+
+        # WARNING: Ref(Mean($close, 15), N), if N == 0, Ref(Mean($close, 15), N) ==> Mean($close, 15)
+
+        # Results of the current script:
         #   time:   09:00 --> 09:14,            ..., 14:45 --> 14:59
         #   fields: Ref(Mean($close, 15), 225), ..., Mean($close, 15)
         #   name:   close1,                     ..., close16
+        #
 
         # Expression description: take close as an example
         #   Mean($close, 15) ==> df["$close"].rolling(15, min_periods=1).mean()
diff --git a/qlib/data/data.py b/qlib/data/data.py
index 8bb9cb89df4..7fbc48f715f 100644
--- a/qlib/data/data.py
+++ b/qlib/data/data.py
@@ -549,7 +549,7 @@ def dataset_processor(instruments_d, column_names, start_time, end_time, freq, i
             inst_l.append(inst)
             task_l.append(
                 delayed(DatasetProvider.expression_calculator)(
-                    inst, start_time, end_time, freq, normalize_column_names, spans, C
+                    inst, start_time, end_time, freq, normalize_column_names, spans, C, inst_processors
                 )
             )