Skip to content

Commit 539ab10

Browse files
authored
Remove arctic from Qlib core to Contrib (microsoft#865)
* Remove arctic from Qlib core to Contrib * fix empty df bug
1 parent 54bceee commit 539ab10

File tree

6 files changed

+62
-45
lines changed

6 files changed

+62
-45
lines changed

examples/orderbook_data/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ Current version of script with default value tries to connect localhost **via de
1717
Run following command to install necessary libraries
1818
```
1919
pip install pytest
20+
pip install arctic # NOTE: pip may fail to resolve the right package dependency !!! Please make sure the dependency are satisfied.
2021
```
2122

2223
# Importing example data

examples/orderbook_data/example.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,11 @@ def setUp(self):
2525
mem_cache_type="sizeof",
2626
kernels=1,
2727
expression_provider={"class": "LocalExpressionProvider", "kwargs": {"time2idx": False}},
28-
feature_provider={"class": "ArcticFeatureProvider", "kwargs": {"uri": "127.0.0.1"}},
28+
feature_provider={
29+
"class": "ArcticFeatureProvider",
30+
"module_path": "qlib.contrib.data.data",
31+
"kwargs": {"uri": "127.0.0.1"},
32+
},
2933
dataset_provider={
3034
"class": "LocalDatasetProvider",
3135
"kwargs": {

qlib/contrib/data/data.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
# Copyright (c) Microsoft Corporation.
2+
# Licensed under the MIT License.
3+
4+
# We remove arctic from core framework of Qlib to contrib due to
5+
# - Arctic has very strict limitation on pandas and numpy version
6+
# - https://github.com/man-group/arctic/pull/908
7+
# - pip fail to computing the right version number!!!!
8+
# - Maybe we can solve this problem by poetry
9+
10+
# FIXME: So if you want to use arctic-based provider, please install arctic manually
11+
# `pip install arctic` may not be enough.
12+
from arctic import Arctic
13+
import pandas as pd
14+
import pymongo
15+
16+
from qlib.data.data import FeatureProvider
17+
18+
19+
class ArcticFeatureProvider(FeatureProvider):
20+
def __init__(
21+
self, uri="127.0.0.1", retry_time=0, market_transaction_time_list=[("09:15", "11:30"), ("13:00", "15:00")]
22+
):
23+
super().__init__()
24+
self.uri = uri
25+
# TODO:
26+
# retry connecting if error occurs
27+
# does it real matters?
28+
self.retry_time = retry_time
29+
# NOTE: this is especially important for TResample operator
30+
self.market_transaction_time_list = market_transaction_time_list
31+
32+
def feature(self, instrument, field, start_index, end_index, freq):
33+
field = str(field)[1:]
34+
with pymongo.MongoClient(self.uri) as client:
35+
# TODO: this will result in frequently connecting the server and performance issue
36+
arctic = Arctic(client)
37+
38+
if freq not in arctic.list_libraries():
39+
raise ValueError("lib {} not in arctic".format(freq))
40+
41+
if instrument not in arctic[freq].list_symbols():
42+
# instruments does not exist
43+
return pd.Series()
44+
else:
45+
df = arctic[freq].read(instrument, columns=[field], chunk_range=(start_index, end_index))
46+
s = df[field]
47+
48+
if not s.empty:
49+
s = pd.concat(
50+
[
51+
s.between_time(time_tuple[0], time_tuple[1])
52+
for time_tuple in self.market_transaction_time_list
53+
]
54+
)
55+
return s

qlib/data/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@
1515
LocalCalendarProvider,
1616
LocalInstrumentProvider,
1717
LocalFeatureProvider,
18-
ArcticFeatureProvider,
1918
LocalExpressionProvider,
2019
LocalDatasetProvider,
2120
ClientCalendarProvider,

qlib/data/data.py

Lines changed: 1 addition & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,9 @@
1717
from multiprocessing import Pool
1818
from typing import Iterable, Union
1919
from typing import List, Union
20-
from arctic import Arctic
2120

2221
# For supporting multiprocessing in outer code, joblib is used
2322
from joblib import delayed
24-
import pymongo
2523

2624
from .cache import H
2725
from ..config import C
@@ -582,7 +580,7 @@ def inst_calculator(inst, start_time, end_time, freq, column_names, spans=None,
582580
data.index = _calendar[data.index.values.astype(int)]
583581
data.index.names = ["datetime"]
584582

585-
if spans is not None:
583+
if not data.empty and spans is not None:
586584
mask = np.zeros(len(data), dtype=bool)
587585
for begin, end in spans:
588586
mask |= (data.index >= begin) & (data.index <= end)
@@ -702,45 +700,6 @@ def feature(self, instrument, field, start_index, end_index, freq):
702700
return self.backend_obj(instrument=instrument, field=field, freq=freq)[start_index : end_index + 1]
703701

704702

705-
class ArcticFeatureProvider(FeatureProvider):
706-
def __init__(
707-
self, uri="127.0.0.1", retry_time=0, market_transaction_time_list=[("09:15", "11:30"), ("13:00", "15:00")]
708-
):
709-
super().__init__()
710-
self.uri = uri
711-
# TODO:
712-
# retry connecting if error occurs
713-
# does it real matters?
714-
self.retry_time = retry_time
715-
# NOTE: this is especially important for TResample operator
716-
self.market_transaction_time_list = market_transaction_time_list
717-
718-
def feature(self, instrument, field, start_index, end_index, freq):
719-
field = str(field)[1:]
720-
with pymongo.MongoClient(self.uri) as client:
721-
# TODO: this will result in frequently connecting the server and performance issue
722-
arctic = Arctic(client)
723-
724-
if freq not in arctic.list_libraries():
725-
raise ValueError("lib {} not in arctic".format(freq))
726-
727-
if instrument not in arctic[freq].list_symbols():
728-
# instruments does not exist
729-
return pd.Series()
730-
else:
731-
df = arctic[freq].read(instrument, columns=[field], chunk_range=(start_index, end_index))
732-
s = df[field]
733-
734-
if not s.empty:
735-
s = pd.concat(
736-
[
737-
s.between_time(time_tuple[0], time_tuple[1])
738-
for time_tuple in self.market_transaction_time_list
739-
]
740-
)
741-
return s
742-
743-
744703
class LocalExpressionProvider(ExpressionProvider):
745704
"""Local expression data provider class
746705

setup.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ def get_version(rel_path: str) -> str:
7878
"dill",
7979
"dataclasses;python_version<'3.7'",
8080
"filelock",
81-
"arctic",
8281
]
8382

8483
# Numpy include

0 commit comments

Comments
 (0)