Skip to content

Commit b6b145b

Browse files
authored
Merge pull request microsoft#378 from D-X-Y/main
Add MultiSegRecord and add segment kwargs in model.pred
2 parents 1369eb4 + 39dd81e commit b6b145b

27 files changed

+328
-134
lines changed

qlib/contrib/model/__init__.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# Copyright (c) Microsoft Corporation.
2+
# Licensed under the MIT License.
3+
try:
4+
from .catboost_model import CatBoostModel
5+
except ModuleNotFoundError:
6+
CatBoostModel = None
7+
print("Please install necessary libs for CatBoostModel.")
8+
try:
9+
from .double_ensemble import DEnsembleModel
10+
from .gbdt import LGBModel
11+
except ModuleNotFoundError:
12+
DEnsembleModel, LGBModel = None, None
13+
print("Please install necessary libs for DEnsembleModel and LGBModel, such as lightgbm.")
14+
try:
15+
from .xgboost import XGBModel
16+
except ModuleNotFoundError:
17+
XGBModel = None
18+
print("Please install necessary libs for XGBModel, such as xgboost.")
19+
try:
20+
from .linear import LinearModel
21+
except ModuleNotFoundError:
22+
LinearModel = None
23+
print("Please install necessary libs for LinearModel, such as scipy and sklearn.")
24+
# import pytorch models
25+
try:
26+
from .pytorch_alstm import ALSTM
27+
from .pytorch_gats import GATs
28+
from .pytorch_gru import GRU
29+
from .pytorch_lstm import LSTM
30+
from .pytorch_nn import DNNModelPytorch
31+
from .pytorch_tabnet import TabnetModel
32+
from .pytorch_sfm import SFM_Model
33+
34+
pytorch_classes = (ALSTM, GATs, GRU, LSTM, DNNModelPytorch, TabnetModel, SFM_Model)
35+
except ModuleNotFoundError:
36+
pytorch_classes = ()
37+
print("Please install necessary libs for PyTorch models.")
38+
39+
all_model_classes = (CatBoostModel, DEnsembleModel, LGBModel, XGBModel, LinearModel) + pytorch_classes

qlib/contrib/model/catboost_model.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
import numpy as np
55
import pandas as pd
6+
from typing import Text, Union
67
from catboost import Pool, CatBoost
78
from catboost.utils import get_gpu_device_count
89

@@ -62,10 +63,10 @@ def fit(
6263
evals_result["train"] = list(evals_result["learn"].values())[0]
6364
evals_result["valid"] = list(evals_result["validation"].values())[0]
6465

65-
def predict(self, dataset):
66+
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
6667
if self.model is None:
6768
raise ValueError("model is not fitted yet!")
68-
x_test = dataset.prepare("test", col_set="feature")
69+
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
6970
return pd.Series(self.model.predict(x_test.values), index=x_test.index)
7071

7172

qlib/contrib/model/double_ensemble.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import lightgbm as lgb
55
import numpy as np
66
import pandas as pd
7-
7+
from typing import Text, Union
88
from ...model.base import Model
99
from ...data.dataset import DatasetH
1010
from ...data.dataset.handler import DataHandlerLP
@@ -40,6 +40,10 @@ def __init__(
4040
self.bins_sr = bins_sr
4141
self.bins_fs = bins_fs
4242
self.decay = decay
43+
if sample_ratios is None: # the default values for sample_ratios
44+
sample_ratios = [0.8, 0.7, 0.6, 0.5, 0.4]
45+
if sub_weights is None: # the default values for sub_weights
46+
sub_weights = [1.0, 0.2, 0.2, 0.2, 0.2, 0.2]
4347
if not len(sample_ratios) == bins_fs:
4448
raise ValueError("The length of sample_ratios should be equal to bins_fs.")
4549
self.sample_ratios = sample_ratios
@@ -228,10 +232,10 @@ def retrieve_loss_curve(self, model, df_train, features):
228232
raise ValueError("not implemented yet")
229233
return loss_curve
230234

231-
def predict(self, dataset):
235+
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
232236
if self.ensemble is None:
233237
raise ValueError("model is not fitted yet!")
234-
x_test = dataset.prepare("test", col_set="feature", data_key=DataHandlerLP.DK_I)
238+
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
235239
pred = pd.Series(np.zeros(x_test.shape[0]), index=x_test.index)
236240
for i_sub, submodel in enumerate(self.ensemble):
237241
feat_sub = self.sub_features[i_sub]

qlib/contrib/model/gbdt.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import numpy as np
55
import pandas as pd
66
import lightgbm as lgb
7-
7+
from typing import Text, Union
88
from ...model.base import ModelFT
99
from ...data.dataset import DatasetH
1010
from ...data.dataset.handler import DataHandlerLP
@@ -61,10 +61,10 @@ def fit(
6161
evals_result["train"] = list(evals_result["train"].values())[0]
6262
evals_result["valid"] = list(evals_result["valid"].values())[0]
6363

64-
def predict(self, dataset):
64+
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
6565
if self.model is None:
6666
raise ValueError("model is not fitted yet!")
67-
x_test = dataset.prepare("test", col_set="feature", data_key=DataHandlerLP.DK_I)
67+
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
6868
return pd.Series(self.model.predict(x_test.values), index=x_test.index)
6969

7070
def finetune(self, dataset: DatasetH, num_boost_round=10, verbose_eval=20):

qlib/contrib/model/linear.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
import numpy as np
55
import pandas as pd
6-
6+
from typing import Text, Union
77
from scipy.optimize import nnls
88
from sklearn.linear_model import LinearRegression, Ridge, Lasso
99

@@ -84,8 +84,8 @@ def _fit_nnls(self, X, y):
8484
self.coef_ = coef
8585
self.intercept_ = 0.0
8686

87-
def predict(self, dataset):
87+
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
8888
if self.coef_ is None:
8989
raise ValueError("model is not fitted yet!")
90-
x_test = dataset.prepare("test", col_set="feature", data_key=DataHandlerLP.DK_I)
90+
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
9191
return pd.Series(x_test.values @ self.coef_ + self.intercept_, index=x_test.index)

qlib/contrib/model/pytorch_alstm.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,9 @@
88
import os
99
import numpy as np
1010
import pandas as pd
11+
from typing import Text, Union
1112
import copy
12-
from ...utils import (
13-
unpack_archive_with_buffer,
14-
save_multiple_parts_file,
15-
get_or_create_path,
16-
drop_nan_by_y_index,
17-
)
13+
from ...utils import get_or_create_path
1814
from ...log import get_module_logger
1915

2016
import torch
@@ -273,11 +269,11 @@ def fit(
273269
if self.use_gpu:
274270
torch.cuda.empty_cache()
275271

276-
def predict(self, dataset):
272+
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
277273
if not self.fitted:
278274
raise ValueError("model is not fitted yet!")
279275

280-
x_test = dataset.prepare("test", col_set="feature")
276+
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
281277
index = x_test.index
282278
self.ALSTM_model.eval()
283279
x_values = x_test.values

qlib/contrib/model/pytorch_alstm_ts.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,9 @@
88
import os
99
import numpy as np
1010
import pandas as pd
11+
from typing import Text, Union
1112
import copy
12-
from ...utils import (
13-
unpack_archive_with_buffer,
14-
save_multiple_parts_file,
15-
get_or_create_path,
16-
drop_nan_by_y_index,
17-
)
13+
from ...utils import get_or_create_path
1814
from ...log import get_module_logger
1915

2016
import torch
@@ -264,11 +260,11 @@ def fit(
264260
if self.use_gpu:
265261
torch.cuda.empty_cache()
266262

267-
def predict(self, dataset):
263+
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
268264
if not self.fitted:
269265
raise ValueError("model is not fitted yet!")
270266

271-
dl_test = dataset.prepare("test", col_set=["feature", "label"], data_key=DataHandlerLP.DK_I)
267+
dl_test = dataset.prepare(segment, col_set=["feature", "label"], data_key=DataHandlerLP.DK_I)
272268
dl_test.config(fillna_type="ffill+bfill")
273269
test_loader = DataLoader(dl_test, batch_size=self.batch_size, num_workers=self.n_jobs)
274270
self.ALSTM_model.eval()

qlib/contrib/model/pytorch_gats.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,9 @@
88
import os
99
import numpy as np
1010
import pandas as pd
11+
from typing import Text, Union
1112
import copy
12-
from ...utils import (
13-
unpack_archive_with_buffer,
14-
save_multiple_parts_file,
15-
get_or_create_path,
16-
drop_nan_by_y_index,
17-
)
13+
from ...utils import get_or_create_path
1814
from ...log import get_module_logger
1915
import torch
2016
import torch.nn as nn
@@ -83,7 +79,6 @@ def __init__(
8379
self.with_pretrain = with_pretrain
8480
self.model_path = model_path
8581
self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu")
86-
self.use_gpu = torch.cuda.is_available()
8782
self.seed = seed
8883

8984
self.logger.info(
@@ -310,11 +305,11 @@ def fit(
310305
if self.use_gpu:
311306
torch.cuda.empty_cache()
312307

313-
def predict(self, dataset):
308+
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
314309
if not self.fitted:
315310
raise ValueError("model is not fitted yet!")
316311

317-
x_test = dataset.prepare("test", col_set="feature")
312+
x_test = dataset.prepare(segment, col_set="feature")
318313
index = x_test.index
319314
self.GAT_model.eval()
320315
x_values = x_test.values

qlib/contrib/model/pytorch_gats_ts.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,7 @@
99
import numpy as np
1010
import pandas as pd
1111
import copy
12-
from ...utils import (
13-
unpack_archive_with_buffer,
14-
save_multiple_parts_file,
15-
get_or_create_path,
16-
drop_nan_by_y_index,
17-
)
12+
from ...utils import get_or_create_path
1813
from ...log import get_module_logger
1914
import torch
2015
import torch.nn as nn

qlib/contrib/model/pytorch_gru.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,9 @@
88
import os
99
import numpy as np
1010
import pandas as pd
11+
from typing import Text, Union
1112
import copy
12-
from ...utils import (
13-
unpack_archive_with_buffer,
14-
save_multiple_parts_file,
15-
get_or_create_path,
16-
drop_nan_by_y_index,
17-
)
13+
from ...utils import get_or_create_path
1814
from ...log import get_module_logger
1915

2016
import torch
@@ -273,11 +269,11 @@ def fit(
273269
if self.use_gpu:
274270
torch.cuda.empty_cache()
275271

276-
def predict(self, dataset):
272+
def predict(self, dataset: DatasetH, segment: Union[Text, slice] = "test"):
277273
if not self.fitted:
278274
raise ValueError("model is not fitted yet!")
279275

280-
x_test = dataset.prepare("test", col_set="feature")
276+
x_test = dataset.prepare(segment, col_set="feature", data_key=DataHandlerLP.DK_I)
281277
index = x_test.index
282278
self.gru_model.eval()
283279
x_values = x_test.values

0 commit comments

Comments
 (0)