Skip to content

Commit 3b11912

Browse files
authored
Checking dataset empty (microsoft#647)
* Checking dataset empty * add dataset checker
1 parent 7101ea1 commit 3b11912

21 files changed

+47
-0
lines changed

qlib/contrib/model/catboost_model.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,8 @@ def fit(
3838
col_set=["feature", "label"],
3939
data_key=DataHandlerLP.DK_L,
4040
)
41+
if df_train.empty or df_valid.empty:
42+
raise ValueError("Empty data from dataset, please check your dataset config.")
4143
x_train, y_train = df_train["feature"], df_train["label"]
4244
x_valid, y_valid = df_valid["feature"], df_valid["label"]
4345

qlib/contrib/model/double_ensemble.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,8 @@ def fit(self, dataset: DatasetH):
6464
df_train, df_valid = dataset.prepare(
6565
["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
6666
)
67+
if df_train.empty or df_valid.empty:
68+
raise ValueError("Empty data from dataset, please check your dataset config.")
6769
x_train, y_train = df_train["feature"], df_train["label"]
6870
# initialize the sample weights
6971
N, F = x_train.shape

qlib/contrib/model/gbdt.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ def _prepare_data(self, dataset: DatasetH):
2525
df_train, df_valid = dataset.prepare(
2626
["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
2727
)
28+
if df_train.empty or df_valid.empty:
29+
raise ValueError("Empty data from dataset, please check your dataset config.")
2830
x_train, y_train = df_train["feature"], df_train["label"]
2931
x_valid, y_valid = df_valid["feature"], df_valid["label"]
3032

@@ -83,6 +85,8 @@ def finetune(self, dataset: DatasetH, num_boost_round=10, verbose_eval=20):
8385
"""
8486
# Based on existing model and finetune by train more rounds
8587
dtrain, _ = self._prepare_data(dataset)
88+
if dtrain.empty:
89+
raise ValueError("Empty data from dataset, please check your dataset config.")
8690
self.model = lgb.train(
8791
self.params,
8892
dtrain,

qlib/contrib/model/highfreq_gdbt_model.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,8 @@ def _prepare_data(self, dataset: DatasetH):
8282
df_train, df_valid = dataset.prepare(
8383
["train", "valid"], col_set=["feature", "label"], data_key=DataHandlerLP.DK_L
8484
)
85+
if df_train.empty or df_valid.empty:
86+
raise ValueError("Empty data from dataset, please check your dataset config.")
8587

8688
x_train, y_train = df_train["feature"], df_train["label"]
8789
x_valid, y_valid = df_train["feature"], df_valid["label"]

qlib/contrib/model/linear.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ def __init__(self, estimator="ols", alpha=0.0, fit_intercept=False):
5151

5252
def fit(self, dataset: DatasetH):
5353
df_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
54+
if df_train.empty:
55+
raise ValueError("Empty data from dataset, please check your dataset config.")
5456
X, y = df_train["feature"].values, np.squeeze(df_train["label"].values)
5557

5658
if self.estimator in [self.OLS, self.RIDGE, self.LASSO]:

qlib/contrib/model/pytorch_alstm.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,8 @@ def fit(
224224
col_set=["feature", "label"],
225225
data_key=DataHandlerLP.DK_L,
226226
)
227+
if df_train.empty or df_valid.empty:
228+
raise ValueError("Empty data from dataset, please check your dataset config.")
227229

228230
x_train, y_train = df_train["feature"], df_train["label"]
229231
x_valid, y_valid = df_valid["feature"], df_valid["label"]

qlib/contrib/model/pytorch_alstm_ts.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,8 @@ def fit(
207207
):
208208
dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
209209
dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
210+
if dl_train.empty or dl_valid.empty:
211+
raise ValueError("Empty data from dataset, please check your dataset config.")
210212

211213
dl_train.config(fillna_type="ffill+bfill") # process nan brought by dataloader
212214
dl_valid.config(fillna_type="ffill+bfill") # process nan brought by dataloader

qlib/contrib/model/pytorch_gats.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,8 @@ def fit(
237237
col_set=["feature", "label"],
238238
data_key=DataHandlerLP.DK_L,
239239
)
240+
if df_train.empty or df_valid.empty:
241+
raise ValueError("Empty data from dataset, please check your dataset config.")
240242

241243
x_train, y_train = df_train["feature"], df_train["label"]
242244
x_valid, y_valid = df_valid["feature"], df_valid["label"]

qlib/contrib/model/pytorch_gats_ts.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,8 @@ def fit(
245245

246246
dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
247247
dl_valid = dataset.prepare("valid", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L)
248+
if dl_train.empty or dl_valid.empty:
249+
raise ValueError("Empty data from dataset, please check your dataset config.")
248250

249251
dl_train.config(fillna_type="ffill+bfill") # process nan brought by dataloader
250252
dl_valid.config(fillna_type="ffill+bfill") # process nan brought by dataloader

qlib/contrib/model/pytorch_gru.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,8 @@ def fit(
224224
col_set=["feature", "label"],
225225
data_key=DataHandlerLP.DK_L,
226226
)
227+
if df_train.empty or df_valid.empty:
228+
raise ValueError("Empty data from dataset, please check your dataset config.")
227229

228230
x_train, y_train = df_train["feature"], df_train["label"]
229231
x_valid, y_valid = df_valid["feature"], df_valid["label"]

0 commit comments

Comments
 (0)