-
Notifications
You must be signed in to change notification settings - Fork 5.9k
Description
❓ Questions and Help
Hi,
I am running the example 'workflow_by_code.ipynb', not is the first time that I run it, but today this part of code take a lot of time (more than 30 minutes and it has not finished).
<dataset = init_instance_by_config(task["dataset"])>
I am using Google Colab
Whe I force stop the code, this show me
`<---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/qlib/data/data.py in features(self, instruments, fields, start_time, end_time, freq, disk_cache)
974 try:
--> 975 return DatasetD.dataset(instruments, fields, start_time, end_time, freq, disk_cache)
976 except TypeError:
TypeError: dataset() takes from 3 to 6 positional arguments but 7 were given
During handling of the above exception, another exception occurred:
KeyboardInterrupt Traceback (most recent call last)
19 frames
in ()
46 # model initiaiton
47 model = init_instance_by_config(task["model"])
---> 48 dataset = init_instance_by_config(task["dataset"])
/usr/local/lib/python3.6/dist-packages/qlib/utils/init.py in init_instance_by_config(config, module, accept_types, **kwargs)
251
252 klass, cls_kwargs = get_cls_kwargs(config, module)
--> 253 return klass(**cls_kwargs, **kwargs)
254
255
/usr/local/lib/python3.6/dist-packages/qlib/data/dataset/init.py in init(self, handler, segments)
86 handler will be passed into setup_data.
87 """
---> 88 super().init(handler, segments)
89
90 def setup_data(self, handler: Union[dict, DataHandler], segments: list):
/usr/local/lib/python3.6/dist-packages/qlib/data/dataset/init.py in init(self, *args, **kwargs)
29 The data could specify the info to caculate the essential data for preparation
30 """
---> 31 self.setup_data(*args, **kwargs)
32 super().init()
33
/usr/local/lib/python3.6/dist-packages/qlib/data/dataset/init.py in setup_data(self, handler, segments)
117 }
118 """
--> 119 self._handler = init_instance_by_config(handler, accept_types=DataHandler)
120 self._segments = segments.copy()
121
/usr/local/lib/python3.6/dist-packages/qlib/utils/init.py in init_instance_by_config(config, module, accept_types, **kwargs)
251
252 klass, cls_kwargs = get_cls_kwargs(config, module)
--> 253 return klass(**cls_kwargs, **kwargs)
254
255
/usr/local/lib/python3.6/dist-packages/qlib/contrib/data/handler.py in init(self, instruments, start_time, end_time, infer_processors, learn_processors, fit_start_time, fit_end_time, process_type, **kwargs)
154 infer_processors=infer_processors,
155 learn_processors=learn_processors,
--> 156 process_type=process_type,
157 )
158
/usr/local/lib/python3.6/dist-packages/qlib/data/dataset/handler.py in init(self, instruments, start_time, end_time, data_loader, infer_processors, learn_processors, process_type, **kwargs)
320
321 self.process_type = process_type
--> 322 super().init(instruments, start_time, end_time, data_loader, **kwargs)
323
324 def get_all_processors(self):
/usr/local/lib/python3.6/dist-packages/qlib/data/dataset/handler.py in init(self, instruments, start_time, end_time, data_loader, init_data, fetch_orig)
96 if init_data:
97 with TimeInspector.logt("Init data"):
---> 98 self.init()
99 super().init()
100
/usr/local/lib/python3.6/dist-packages/qlib/data/dataset/handler.py in init(self, init_type, enable_cache)
401 """
402 # init raw data
--> 403 super().init(enable_cache=enable_cache)
404
405 with TimeInspector.logt("fit & process data"):
/usr/local/lib/python3.6/dist-packages/qlib/data/dataset/handler.py in init(self, enable_cache)
120 # _data may be with multiple column index level. The outer level indicates the feature set name
121 with TimeInspector.logt("Loading data"):
--> 122 self._data = self.data_loader.load(self.instruments, self.start_time, self.end_time)
123 # TODO: cache
124
/usr/local/lib/python3.6/dist-packages/qlib/data/dataset/loader.py in load(self, instruments, start_time, end_time)
120 {
121 grp: self.load_group_df(instruments, exprs, names, start_time, end_time)
--> 122 for grp, (exprs, names) in self.fields.items()
123 },
124 axis=1,
/usr/local/lib/python3.6/dist-packages/qlib/data/dataset/loader.py in (.0)
120 {
121 grp: self.load_group_df(instruments, exprs, names, start_time, end_time)
--> 122 for grp, (exprs, names) in self.fields.items()
123 },
124 axis=1,
/usr/local/lib/python3.6/dist-packages/qlib/data/dataset/loader.py in load_group_df(self, instruments, exprs, names, start_time, end_time)
154 warnings.warn("filter_pipe is not None, but it will not be used with instruments as list")
155
--> 156 df = D.features(instruments, exprs, start_time, end_time)
157 df.columns = names
158 df = df.swaplevel().sort_index() # NOTE: always return <datetime, instrument>
/usr/local/lib/python3.6/dist-packages/qlib/data/data.py in features(self, instruments, fields, start_time, end_time, freq, disk_cache)
975 return DatasetD.dataset(instruments, fields, start_time, end_time, freq, disk_cache)
976 except TypeError:
--> 977 return DatasetD.dataset(instruments, fields, start_time, end_time, freq)
978
979
/usr/local/lib/python3.6/dist-packages/qlib/data/data.py in dataset(self, instruments, fields, start_time, end_time, freq)
706 end_time = cal[-1]
707
--> 708 data = self.dataset_processor(instruments_d, column_names, start_time, end_time, freq)
709
710 return data
/usr/local/lib/python3.6/dist-packages/qlib/data/data.py in dataset_processor(instruments_d, column_names, start_time, end_time, freq)
456
457 p.close()
--> 458 p.join()
459
460 new_data = dict()
/usr/lib/python3.6/multiprocessing/pool.py in join(self)
544 util.debug('joining pool')
545 assert self._state in (CLOSE, TERMINATE)
--> 546 self._worker_handler.join()
547 self._task_handler.join()
548 self._result_handler.join()
/usr/lib/python3.6/threading.py in join(self, timeout)
1054
1055 if timeout is None:
-> 1056 self._wait_for_tstate_lock()
1057 else:
1058 # the behavior of a negative timeout isn't documented, but
/usr/lib/python3.6/threading.py in _wait_for_tstate_lock(self, block, timeout)
1070 if lock is None: # already determined that the C code is done
1071 assert self._is_stopped
-> 1072 elif lock.acquire(block, timeout):
1073 lock.release()
1074 self._stop()>`
The qlib version is '0.6.1'
Python version in '3.6.9'
Thanks.