diff --git a/qlib/config.py b/qlib/config.py index 52b05568d57..75ab0fa3e8e 100644 --- a/qlib/config.py +++ b/qlib/config.py @@ -105,7 +105,7 @@ def set_conf_from_C(self, config_c): "redis_port": 6379, "redis_task_db": 1, # This value can be reset via qlib.init - "logging_level": "INFO", + "logging_level": logging.INFO, # Global configuration of qlib log # logging_level can control the logging level more finely "logging_config": { @@ -124,12 +124,12 @@ def set_conf_from_C(self, config_c): "handlers": { "console": { "class": "logging.StreamHandler", - "level": "DEBUG", + "level": logging.DEBUG, "formatter": "logger_format", "filters": ["field_not_found"], } }, - "loggers": {"qlib": {"level": "DEBUG", "handlers": ["console"]}}, + "loggers": {"qlib": {"level": logging.DEBUG, "handlers": ["console"]}}, }, # Defatult config for experiment manager "exp_manager": { @@ -185,7 +185,7 @@ def set_conf_from_C(self, config_c): # The nfs should be auto-mounted by qlib on other # serversS(such as PAI) [auto_mount:True] "timeout": 100, - "logging_level": "INFO", + "logging_level": logging.INFO, "region": REG_CN, ## Custom Operator "custom_ops": [], diff --git a/qlib/contrib/backtest/account.py b/qlib/contrib/backtest/account.py index d8b28501903..a614f08b674 100644 --- a/qlib/contrib/backtest/account.py +++ b/qlib/contrib/backtest/account.py @@ -104,10 +104,9 @@ def update_daily_end(self, today, trader): # if suspend, no new price to be updated, profit is 0 if trader.check_stock_suspended(code, today): continue - else: - today_close = trader.get_close(code, today) - profit += (today_close - self.current.position[code]["price"]) * self.current.position[code]["amount"] - self.current.update_stock_price(stock_id=code, price=today_close) + today_close = trader.get_close(code, today) + profit += (today_close - self.current.position[code]["price"]) * self.current.position[code]["amount"] + self.current.update_stock_price(stock_id=code, price=today_close) self.rtn += profit # update holding day count self.current.add_count_all() diff --git a/qlib/contrib/model/double_ensemble.py b/qlib/contrib/model/double_ensemble.py index a340489c205..541f74e99ca 100644 --- a/qlib/contrib/model/double_ensemble.py +++ b/qlib/contrib/model/double_ensemble.py @@ -184,7 +184,7 @@ def feature_selection(self, df_train, loss_values): / M ) loss_feat = self.get_loss(y_train.values.squeeze(), pred.values) - g.loc[i_f, "g_value"] = np.mean(loss_feat - loss_values) / np.std(loss_feat - loss_values) + g.loc[i_f, "g_value"] = np.mean(loss_feat - loss_values) / (np.std(loss_feat - loss_values) + 1e-7) x_train_tmp.loc[:, feat] = x_train.loc[:, feat].copy() # one column in train features is all-nan # if g['g_value'].isna().any() diff --git a/qlib/contrib/model/pytorch_alstm.py b/qlib/contrib/model/pytorch_alstm.py index bbbb61851b1..e8b3167959b 100644 --- a/qlib/contrib/model/pytorch_alstm.py +++ b/qlib/contrib/model/pytorch_alstm.py @@ -23,6 +23,7 @@ import torch.nn as nn import torch.optim as optim +from .pytorch_utils import count_parameters from ...model.base import Model from ...data.dataset import DatasetH from ...data.dataset.handler import DataHandlerLP @@ -39,8 +40,8 @@ class ALSTM(Model): the evaluate metric used in early stop optimizer : str optimizer name - GPU : str - the GPU ID(s) used for training + GPU : int + the GPU ID used for training """ def __init__( @@ -76,7 +77,7 @@ def __init__( self.early_stop = early_stop self.optimizer = optimizer.lower() self.loss = loss - self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() else "cpu") + self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") self.use_gpu = torch.cuda.is_available() self.seed = seed @@ -123,6 +124,9 @@ def __init__( num_layers=self.num_layers, dropout=self.dropout, ) + self.logger.info("model:\n{:}".format(self.ALSTM_model)) + self.logger.info("model size: {:.4f} MB".format(count_parameters(self.ALSTM_model))) + if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.ALSTM_model.parameters(), lr=self.lr) elif optimizer.lower() == "gd": @@ -214,7 +218,6 @@ def fit( self, dataset: DatasetH, evals_result=dict(), - verbose=True, save_path=None, ): diff --git a/qlib/contrib/model/pytorch_alstm_ts.py b/qlib/contrib/model/pytorch_alstm_ts.py index 725568de855..1907b9d762c 100644 --- a/qlib/contrib/model/pytorch_alstm_ts.py +++ b/qlib/contrib/model/pytorch_alstm_ts.py @@ -24,6 +24,7 @@ import torch.optim as optim from torch.utils.data import DataLoader +from .pytorch_utils import count_parameters from ...model.base import Model from ...data.dataset import DatasetH, TSDatasetH from ...data.dataset.handler import DataHandlerLP @@ -40,8 +41,8 @@ class ALSTM(Model): the evaluate metric used in early stop optimizer : str optimizer name - GPU : str - the GPU ID(s) used for training + GPU : int + the GPU ID used for training """ def __init__( @@ -78,7 +79,7 @@ def __init__( self.early_stop = early_stop self.optimizer = optimizer.lower() self.loss = loss - self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() else "cpu") + self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") self.n_jobs = n_jobs self.use_gpu = torch.cuda.is_available() self.seed = seed @@ -127,7 +128,10 @@ def __init__( hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, - ).to(self.device) + ) + self.logger.info("model:\n{:}".format(self.ALSTM_model)) + self.logger.info("model size: {:.4f} MB".format(count_parameters(self.ALSTM_model))) + if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.ALSTM_model.parameters(), lr=self.lr) elif optimizer.lower() == "gd": @@ -201,7 +205,6 @@ def fit( self, dataset, evals_result=dict(), - verbose=True, save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) diff --git a/qlib/contrib/model/pytorch_gats.py b/qlib/contrib/model/pytorch_gats.py index 07048e1bc1a..7d3b00232ba 100644 --- a/qlib/contrib/model/pytorch_gats.py +++ b/qlib/contrib/model/pytorch_gats.py @@ -22,6 +22,7 @@ import torch.nn as nn import torch.optim as optim +from .pytorch_utils import count_parameters from ...model.base import Model from ...data.dataset import DatasetH from ...data.dataset.handler import DataHandlerLP @@ -42,8 +43,8 @@ class GATs(Model): the evaluate metric used in early stop optimizer : str optimizer name - GPU : str - the GPU ID(s) used for training + GPU : int + the GPU ID used for training """ def __init__( @@ -83,7 +84,7 @@ def __init__( self.base_model = base_model self.with_pretrain = with_pretrain self.model_path = model_path - self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() else "cpu") + self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") self.use_gpu = torch.cuda.is_available() self.seed = seed @@ -135,6 +136,9 @@ def __init__( dropout=self.dropout, base_model=self.base_model, ) + self.logger.info("model:\n{:}".format(self.GAT_model)) + self.logger.info("model size: {:.4f} MB".format(count_parameters(self.GAT_model))) + if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.GAT_model.parameters(), lr=self.lr) elif optimizer.lower() == "gd": @@ -232,7 +236,6 @@ def fit( self, dataset: DatasetH, evals_result=dict(), - verbose=True, save_path=None, ): diff --git a/qlib/contrib/model/pytorch_gats_ts.py b/qlib/contrib/model/pytorch_gats_ts.py index 1e94f56e418..99ae3d4da3e 100644 --- a/qlib/contrib/model/pytorch_gats_ts.py +++ b/qlib/contrib/model/pytorch_gats_ts.py @@ -24,6 +24,7 @@ from torch.utils.data import DataLoader from torch.utils.data import Sampler +from .pytorch_utils import count_parameters from ...model.base import Model from ...data.dataset import DatasetH from ...data.dataset.handler import DataHandlerLP @@ -62,8 +63,8 @@ class GATs(Model): the evaluate metric used in early stop optimizer : str optimizer name - GPU : str - the GPU ID(s) used for training + GPU : int + the GPU ID used for training """ def __init__( @@ -104,7 +105,7 @@ def __init__( self.base_model = base_model self.with_pretrain = with_pretrain self.model_path = model_path - self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() else "cpu") + self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") self.n_jobs = n_jobs self.use_gpu = torch.cuda.is_available() self.seed = seed @@ -157,6 +158,9 @@ def __init__( dropout=self.dropout, base_model=self.base_model, ) + self.logger.info("model:\n{:}".format(self.GAT_model)) + self.logger.info("model size: {:.4f} MB".format(count_parameters(self.GAT_model))) + if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.GAT_model.parameters(), lr=self.lr) elif optimizer.lower() == "gd": @@ -245,7 +249,6 @@ def fit( self, dataset, evals_result=dict(), - verbose=True, save_path=None, ): diff --git a/qlib/contrib/model/pytorch_gru.py b/qlib/contrib/model/pytorch_gru.py index 84f863b9fb0..d4dc884527b 100755 --- a/qlib/contrib/model/pytorch_gru.py +++ b/qlib/contrib/model/pytorch_gru.py @@ -23,6 +23,7 @@ import torch.nn as nn import torch.optim as optim +from .pytorch_utils import count_parameters from ...model.base import Model from ...data.dataset import DatasetH from ...data.dataset.handler import DataHandlerLP @@ -76,7 +77,7 @@ def __init__( self.early_stop = early_stop self.optimizer = optimizer.lower() self.loss = loss - self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() else "cpu") + self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") self.use_gpu = torch.cuda.is_available() self.seed = seed @@ -123,6 +124,9 @@ def __init__( num_layers=self.num_layers, dropout=self.dropout, ) + self.logger.info("model:\n{:}".format(self.gru_model)) + self.logger.info("model size: {:.4f} MB".format(count_parameters(self.gru_model))) + if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.gru_model.parameters(), lr=self.lr) elif optimizer.lower() == "gd": @@ -214,7 +218,6 @@ def fit( self, dataset: DatasetH, evals_result=dict(), - verbose=True, save_path=None, ): diff --git a/qlib/contrib/model/pytorch_gru_ts.py b/qlib/contrib/model/pytorch_gru_ts.py index bb6618b854c..1f2137c8fa3 100755 --- a/qlib/contrib/model/pytorch_gru_ts.py +++ b/qlib/contrib/model/pytorch_gru_ts.py @@ -24,6 +24,7 @@ import torch.optim as optim from torch.utils.data import DataLoader +from .pytorch_utils import count_parameters from ...model.base import Model from ...data.dataset import DatasetH, TSDatasetH from ...data.dataset.handler import DataHandlerLP @@ -78,7 +79,7 @@ def __init__( self.early_stop = early_stop self.optimizer = optimizer.lower() self.loss = loss - self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() else "cpu") + self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") self.n_jobs = n_jobs self.use_gpu = torch.cuda.is_available() self.seed = seed @@ -127,7 +128,10 @@ def __init__( hidden_size=self.hidden_size, num_layers=self.num_layers, dropout=self.dropout, - ).to(self.device) + ) + self.logger.info("model:\n{:}".format(self.gru_model)) + self.logger.info("model size: {:.4f} MB".format(count_parameters(self.gru_model))) + if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.GRU_model.parameters(), lr=self.lr) elif optimizer.lower() == "gd": @@ -201,7 +205,6 @@ def fit( self, dataset, evals_result=dict(), - verbose=True, save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) diff --git a/qlib/contrib/model/pytorch_lstm.py b/qlib/contrib/model/pytorch_lstm.py index 163d500ec87..be8d20a151a 100755 --- a/qlib/contrib/model/pytorch_lstm.py +++ b/qlib/contrib/model/pytorch_lstm.py @@ -76,7 +76,7 @@ def __init__( self.early_stop = early_stop self.optimizer = optimizer.lower() self.loss = loss - self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() else "cpu") + self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") self.use_gpu = torch.cuda.is_available() self.seed = seed @@ -214,7 +214,6 @@ def fit( self, dataset: DatasetH, evals_result=dict(), - verbose=True, save_path=None, ): diff --git a/qlib/contrib/model/pytorch_lstm_ts.py b/qlib/contrib/model/pytorch_lstm_ts.py index cf4f8fb9f1f..3142f15c526 100755 --- a/qlib/contrib/model/pytorch_lstm_ts.py +++ b/qlib/contrib/model/pytorch_lstm_ts.py @@ -78,7 +78,7 @@ def __init__( self.early_stop = early_stop self.optimizer = optimizer.lower() self.loss = loss - self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() else "cpu") + self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") self.n_jobs = n_jobs self.use_gpu = torch.cuda.is_available() self.seed = seed @@ -201,7 +201,6 @@ def fit( self, dataset, evals_result=dict(), - verbose=True, save_path=None, ): dl_train = dataset.prepare("train", col_set=["feature", "label"], data_key=DataHandlerLP.DK_L) diff --git a/qlib/contrib/model/pytorch_nn.py b/qlib/contrib/model/pytorch_nn.py index 16fcea9ff53..a51481c858d 100644 --- a/qlib/contrib/model/pytorch_nn.py +++ b/qlib/contrib/model/pytorch_nn.py @@ -15,6 +15,7 @@ import torch.nn as nn import torch.optim as optim +from .pytorch_utils import count_parameters from ...model.base import Model from ...data.dataset import DatasetH from ...data.dataset.handler import DataHandlerLP @@ -42,8 +43,8 @@ class DNNModelPytorch(Model): learning rate decay steps optimizer : str optimizer name - GPU : str - the GPU ID(s) used for training + GPU : int + the GPU ID used for training """ def __init__( @@ -80,7 +81,7 @@ def __init__( self.lr_decay_steps = lr_decay_steps self.optimizer = optimizer.lower() self.loss_type = loss - self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() else "cpu") + self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") self.use_GPU = torch.cuda.is_available() self.seed = seed self.weight_decay = weight_decay @@ -129,6 +130,9 @@ def __init__( self._scorer = mean_squared_error if loss == "mse" else roc_auc_score self.dnn_model = Net(input_dim, output_dim, layers, loss=self.loss_type) + self.logger.info("model:\n{:}".format(self.dnn_model)) + self.logger.info("model size: {:.4f} MB".format(count_parameters(self.dnn_model))) + if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.dnn_model.parameters(), lr=self.lr, weight_decay=self.weight_decay) elif optimizer.lower() == "gd": diff --git a/qlib/contrib/model/pytorch_sfm.py b/qlib/contrib/model/pytorch_sfm.py index d5169e6c7bd..40b991c9c42 100644 --- a/qlib/contrib/model/pytorch_sfm.py +++ b/qlib/contrib/model/pytorch_sfm.py @@ -23,6 +23,7 @@ import torch.nn.init as init import torch.optim as optim +from .pytorch_utils import count_parameters from ...model.base import Model from ...data.dataset import DatasetH from ...data.dataset.handler import DataHandlerLP @@ -196,8 +197,8 @@ class SFM(Model): learning rate optimizer : str optimizer name - GPU : str - the GPU ID(s) used for training + GPU : int + the GPU ID used for training """ def __init__( @@ -216,7 +217,7 @@ def __init__( eval_steps=5, loss="mse", optimizer="gd", - GPU="0", + GPU=0, seed=None, **kwargs ): @@ -239,7 +240,7 @@ def __init__( self.eval_steps = eval_steps self.optimizer = optimizer.lower() self.loss = loss - self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() else "cpu") + self.device = torch.device("cuda:%d" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu") self.use_gpu = torch.cuda.is_available() self.seed = seed @@ -295,6 +296,9 @@ def __init__( dropout_U=self.dropout_U, device=self.device, ) + self.logger.info("model:\n{:}".format(self.sfm_model)) + self.logger.info("model size: {:.4f} MB".format(count_parameters(self.sfm_model))) + if optimizer.lower() == "adam": self.train_optimizer = optim.Adam(self.sfm_model.parameters(), lr=self.lr) elif optimizer.lower() == "gd": @@ -365,7 +369,6 @@ def fit( self, dataset: DatasetH, evals_result=dict(), - verbose=True, save_path=None, ): diff --git a/qlib/contrib/model/pytorch_tabnet.py b/qlib/contrib/model/pytorch_tabnet.py index 62e32d701ce..020bbaff28a 100644 --- a/qlib/contrib/model/pytorch_tabnet.py +++ b/qlib/contrib/model/pytorch_tabnet.py @@ -23,6 +23,7 @@ import torch.nn.functional as F from torch.autograd import Function +from .pytorch_utils import count_parameters from ...model.base import Model from ...data.dataset import DatasetH from ...data.dataset.handler import DataHandlerLP @@ -49,7 +50,7 @@ def __init__( loss="mse", metric="", early_stop=20, - GPU="1", + GPU=0, pretrain_loss="custom", ps=0.3, lr=0.01, @@ -75,7 +76,7 @@ def __init__( self.n_epochs = n_epochs self.logger = get_module_logger("TabNet") self.pretrain_n_epochs = pretrain_n_epochs - self.device = "cuda:%s" % (GPU) if torch.cuda.is_available() else "cpu" + self.device = "cuda:%s" % (GPU) if torch.cuda.is_available() and GPU >= 0 else "cpu" self.loss = loss self.metric = metric self.early_stop = early_stop @@ -98,6 +99,8 @@ def __init__( self.tabnet_decoder = TabNet_Decoder(self.out_dim, self.d_feat, n_shared, n_ind, vbs, n_steps, self.device).to( self.device ) + self.logger.info("model:\n{:}\n{:}".format(self.tabnet_model, self.tabnet_decoder)) + self.logger.info("model size: {:.4f} MB".format(count_parameters([self.tabnet_model, self.tabnet_decoder]))) if optimizer.lower() == "adam": self.pretrain_optimizer = optim.Adam( @@ -159,7 +162,6 @@ def fit( self, dataset: DatasetH, evals_result=dict(), - verbose=True, save_path=None, ): if self.pretrain: diff --git a/qlib/contrib/model/pytorch_utils.py b/qlib/contrib/model/pytorch_utils.py new file mode 100644 index 00000000000..1148a596af5 --- /dev/null +++ b/qlib/contrib/model/pytorch_utils.py @@ -0,0 +1,37 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT License. + +import torch.nn as nn + + +def count_parameters(models_or_parameters, unit="m"): + """ + This function is to obtain the storage size unit of a (or multiple) models. + + Parameters + ---------- + models_or_parameters : PyTorch model(s) or a list of parameters. + unit : the storage size unit. + + Returns + ------- + The number of parameters of the given model(s) or parameters. + """ + if isinstance(models_or_parameters, nn.Module): + counts = sum(v.numel() for v in models_or_parameters.parameters()) + elif isinstance(models_or_parameters, nn.Parameter): + counts = models_or_parameters.numel() + elif isinstance(models_or_parameters, (list, tuple)): + return sum(count_parameters(x, unit) for x in models_or_parameters) + else: + counts = sum(v.numel() for v in models_or_parameters) + unit = unit.lower() + if unit == "kb" or unit == "k": + counts /= 2 ** 10 + elif unit == "mb" or unit == "m": + counts /= 2 ** 20 + elif unit == "gb" or unit == "g": + counts /= 2 ** 30 + elif unit is not None: + raise ValueError("Unknow unit: {:}".format(unit)) + return counts diff --git a/qlib/contrib/online/operator.py b/qlib/contrib/online/operator.py index c8b44f57858..d2307dad545 100644 --- a/qlib/contrib/online/operator.py +++ b/qlib/contrib/online/operator.py @@ -148,7 +148,7 @@ def execute(self, date, exchange_config, path): for user_id, user in um.users.items(): dates, trade_exchange = prepare(um, trade_date, user_id, exchange_config) executor = SimulatorExecutor(trade_exchange=trade_exchange) - if not str(dates[0].date()) == str(pred_date.date()): + if str(dates[0].date()) != str(pred_date.date()): raise ValueError( "The account data is not newest! last trading date {}, today {}".format( dates[0].date(), trade_date.date() diff --git a/qlib/log.py b/qlib/log.py index 6553dcb114e..126acb9d26c 100644 --- a/qlib/log.py +++ b/qlib/log.py @@ -3,8 +3,7 @@ import logging -import logging.handlers -import os +from typing import Optional, Text, Dict, Any import re from logging import config as logging_config from time import time @@ -13,16 +12,13 @@ from .config import C -def get_module_logger(module_name, level=None): +def get_module_logger(module_name, level: Optional[int] = None): """ Get a logger for a specific module. :param module_name: str Logic module name. :param level: int - :param sh_level: int - Stream handler log level. - :param log_format: str :return: Logger Logger object. """ @@ -103,7 +99,7 @@ def logt(cls, name="", show_start=False): cls.log_cost_time(info=f"{name} Done") -def set_log_with_config(log_config: dict): +def set_log_with_config(log_config: Dict[Text, Any]): """set log with config :param log_config: diff --git a/qlib/workflow/__init__.py b/qlib/workflow/__init__.py index 7bff505ce35..834bd059e68 100644 --- a/qlib/workflow/__init__.py +++ b/qlib/workflow/__init__.py @@ -2,6 +2,7 @@ # Licensed under the MIT License. from contextlib import contextmanager +from typing import Text, Optional from .expm import MLflowExpManager from .exp import Experiment from .recorder import Recorder @@ -20,7 +21,9 @@ def __repr__(self): return "{name}(manager={manager})".format(name=self.__class__.__name__, manager=self.exp_manager) @contextmanager - def start(self, experiment_name=None, recorder_name=None, uri=None): + def start( + self, experiment_name: Optional[Text] = None, recorder_name: Optional[Text] = None, uri: Optional[Text] = None + ): """ Method to start an experiment. This method can only be called within a Python's `with` statement. Here is the example code: @@ -282,6 +285,12 @@ def get_uri(self): """ return self.exp_manager.uri + def set_uri(self, uri: Optional[Text]): + """ + Method to reset the current uri of current experiment manager. + """ + self.exp_manager.set_uri(uri) + def get_recorder(self, recorder_id=None, recorder_name=None, experiment_name=None): """ Method for retrieving a recorder. diff --git a/qlib/workflow/cli.py b/qlib/workflow/cli.py index 6eba962779b..879c0aaeb5f 100644 --- a/qlib/workflow/cli.py +++ b/qlib/workflow/cli.py @@ -16,7 +16,7 @@ def get_path_list(path): if isinstance(path, str): return [path] else: - return [p for p in path] + return list(path) def sys_config(config, config_path): diff --git a/qlib/workflow/exp.py b/qlib/workflow/exp.py index 18b0a143d93..9cda020c375 100644 --- a/qlib/workflow/exp.py +++ b/qlib/workflow/exp.py @@ -23,7 +23,7 @@ def __init__(self, id, name): self.active_recorder = None # only one recorder can running each time def __repr__(self): - return "{name}(info={info})".format(name=self.__class__.__name__, info=self.info) + return "{name}(id={id}, info={info})".format(name=self.__class__.__name__, id=self.id, info=self.info) def __str__(self): return str(self.info) @@ -175,6 +175,9 @@ def __init__(self, id, name, uri): self._default_rec_name = "mlflow_recorder" self._client = mlflow.tracking.MlflowClient(tracking_uri=self._uri) + def __repr__(self): + return "{name}(id={id}, info={info})".format(name=self.__class__.__name__, id=self.id, info=self.info) + def start(self, recorder_name=None): logger.info(f"Experiment {self.id} starts running ...") # set up recorder diff --git a/qlib/workflow/expm.py b/qlib/workflow/expm.py index 4ba72a63440..56fe810c38d 100644 --- a/qlib/workflow/expm.py +++ b/qlib/workflow/expm.py @@ -10,6 +10,7 @@ from typing import Optional, Text from .exp import MLflowExperiment, Experiment +from ..config import C from .recorder import Recorder from ..log import get_module_logger @@ -23,15 +24,12 @@ class ExpManager: """ def __init__(self, uri: Text, default_exp_name: Optional[Text]): - self._default_uri = uri - self._current_uri = None + self._current_uri = uri self.default_exp_name = default_exp_name self.active_experiment = None # only one experiment can active each time def __repr__(self): - return "{name}(default_uri={duri}, current_uri={curi})".format( - name=self.__class__.__name__, duri=self._default_uri, curi=self._current_uri - ) + return "{name}(current_uri={curi})".format(name=self.__class__.__name__, curi=self._current_uri) def start_exp( self, @@ -217,6 +215,15 @@ def delete_exp(self, experiment_id=None, experiment_name=None): """ raise NotImplementedError(f"Please implement the `delete_exp` method.") + @property + def default_uri(self): + """ + Get the default tracking URI from qlib.config.C + """ + if "kwargs" not in C.exp_manager or "uri" not in C.exp_manager["kwargs"]: + raise ValueError("The default URI is not set in qlib.config.C") + return C.exp_manager["kwargs"]["uri"] + @property def uri(self): """ @@ -226,7 +233,7 @@ def uri(self): ------- The tracking URI string. """ - return self._current_uri or self._default_uri + return self._current_uri or self.default_uri def set_uri(self, uri: Optional[Text] = None): """ @@ -239,7 +246,7 @@ def set_uri(self, uri: Optional[Text] = None): """ if uri is None: logger.info("No tracking URI is provided. Use the default tracking URI.") - self._current_uri = self._default_uri + self._current_uri = self.default_uri else: # Temporarily re-set the current uri as the uri argument. self._current_uri = uri diff --git a/qlib/workflow/recorder.py b/qlib/workflow/recorder.py index e75ae347b0d..5915e58da64 100644 --- a/qlib/workflow/recorder.py +++ b/qlib/workflow/recorder.py @@ -201,7 +201,7 @@ class MLflowRecorder(Recorder): def __init__(self, experiment_id, uri, name=None, mlflow_run=None): super(MLflowRecorder, self).__init__(experiment_id, name) self._uri = uri - self.artifact_uri = None + self._artifact_uri = None self.client = mlflow.tracking.MlflowClient(tracking_uri=self._uri) # construct from mlflow run if mlflow_run is not None: @@ -220,14 +220,51 @@ def __init__(self, experiment_id, uri, name=None, mlflow_run=None): else None ) + def __repr__(self): + name = self.__class__.__name__ + space_length = len(name) + 1 + return "{name}(info={info},\n{space}uri={uri},\n{space}artifact_uri={artifact_uri},\n{space}client={client})".format( + name=name, + space=" " * space_length, + info=self.info, + uri=self.uri, + artifact_uri=self.artifact_uri, + client=self.client, + ) + + @property + def uri(self): + return self._uri + + @property + def artifact_uri(self): + return self._artifact_uri + + def get_local_dir(self): + """ + This function will return the directory path of this recorder. + """ + if self.artifact_uri is not None: + local_dir_path = Path(self.artifact_uri.lstrip("file:")) / ".." + local_dir_path = str(local_dir_path.resolve()) + if os.path.isdir(local_dir_path): + return local_dir_path + else: + raise RuntimeError("This recorder is not saved in the local file system.") + + else: + raise Exception( + "Please make sure the recorder has been created and started properly before getting artifact uri." + ) + def start_run(self): # set the tracking uri - mlflow.set_tracking_uri(self._uri) + mlflow.set_tracking_uri(self.uri) # start the run run = mlflow.start_run(self.id, self.experiment_id, self.name) # save the run id and artifact_uri self.id = run.info.run_id - self.artifact_uri = run.info.artifact_uri + self._artifact_uri = run.info.artifact_uri self.start_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S") self.status = Recorder.STATUS_R logger.info(f"Recorder {self.id} starts running under Experiment {self.experiment_id} ...") @@ -247,7 +284,7 @@ def end_run(self, status: str = Recorder.STATUS_S): self.status = status def save_objects(self, local_path=None, artifact_path=None, **kwargs): - assert self._uri is not None, "Please start the experiment and recorder first before using recorder directly." + assert self.uri is not None, "Please start the experiment and recorder first before using recorder directly." if local_path is not None: self.client.log_artifacts(self.id, local_path, artifact_path) else: @@ -259,7 +296,7 @@ def save_objects(self, local_path=None, artifact_path=None, **kwargs): shutil.rmtree(temp_dir) def load_object(self, name): - assert self._uri is not None, "Please start the experiment and recorder first before using recorder directly." + assert self.uri is not None, "Please start the experiment and recorder first before using recorder directly." path = self.client.download_artifacts(self.id, name) with Path(path).open("rb") as f: return pickle.load(f) @@ -289,7 +326,7 @@ def get_artifact_uri(self): ) def list_artifacts(self, artifact_path=None): - assert self._uri is not None, "Please start the experiment and recorder first before using recorder directly." + assert self.uri is not None, "Please start the experiment and recorder first before using recorder directly." artifacts = self.client.list_artifacts(self.id, artifact_path) return [art.path for art in artifacts] diff --git a/tests/test_all_pipeline.py b/tests/test_all_pipeline.py index d9d684697c0..fbf15d29ad7 100644 --- a/tests/test_all_pipeline.py +++ b/tests/test_all_pipeline.py @@ -123,6 +123,8 @@ def train(): recorder = R.get_recorder() # To test __repr__ print(recorder) + # To test get_local_dir + print(recorder.get_local_dir()) rid = recorder.id sr = SignalRecord(model, dataset, recorder) sr.generate()